def getSequence(blastDb, accession, start=0, end=0, strand='+', padding=0, debug=False): """Load a sequence from a BLAST database. @param blastDb: BLAST database @param accession: Accession name @param start: Start coordinate (Default: 0, extract from start of sequence) @param end: End coordinate (Default: 0, extract to the end of sequence) @param strand: Strand (Default: '+') @param padding: Sequence padding (Default: 0) @returns: (header,seq) """ if start>end: start,end = end,start cmd = 'fastacmd -d %s -s "%s" -L %i,%i' % (blastDb,accession,start,end) if debug: print cmd p = os.popen(cmd) header = p.readline()[1:].strip() if not header: raise Exception('BLAST failure') seq = [] for line in p: seq.append(line.strip()) seq = ''.join(seq) if not seq: print blastDb, accession raise NotFoundException() if strand=='-': seq = sequence.reverseComplement(seq) return header,seq
def projectOntoString(self, seq): extracted = [] for tStart,blockSize in zip(self.tStarts, self.blockSizes): tEnd = tStart+3*blockSize-1 if self.strand=='+': s = seq[tStart:tEnd+1] else: s = sequence.reverseComplement(seq)[tStart:tEnd+1] extracted.append(s) return extracted
def projectOntoString(self, seq): extracted = [] for tStart, blockSize in zip(self.tStarts, self.blockSizes): tEnd = tStart + 3 * blockSize - 1 if self.strand == '+': s = seq[tStart:tEnd + 1] else: s = sequence.reverseComplement(seq)[tStart:tEnd + 1] extracted.append(s) return extracted
def writeSeq(ref, seq, pos, type, strand): preoffset, postoffset = offsets[type] ntype = type if strand == '-': if ntype == 'ATG': type = 'TGA' elif ntype == 'TGA': type = 'ATG' elif ntype == 'GT': type = 'AG' else: type = 'GT' postoffset += 2 else: preoffset += 2 retseq = seq[pos-preoffset:pos+postoffset] if strand == '-': retseq = reverseComplement(retseq) head = ref + ':' + str(pos-preoffset) + ':' + str(pos+postoffset-1) + ':' + strand outfiles[type].write(head, retseq)
def getSequence(blastDb, accession, start=0, end=0, strand='+', padding=0, debug=False): """Load a sequence from a BLAST database. @param blastDb: BLAST database @param accession: Accession name @param start: Start coordinate (Default: 0, extract from start of sequence) @param end: End coordinate (Default: 0, extract to the end of sequence) @param strand: Strand (Default: '+') @param padding: Sequence padding (Default: 0) @returns: (header,seq) """ if start > end: start, end = end, start cmd = 'fastacmd -d %s -s "%s" -L %i,%i' % (blastDb, accession, start, end) if debug: print cmd p = os.popen(cmd) header = p.readline()[1:].strip() if not header: raise Exception('BLAST failure') seq = [] for line in p: seq.append(line.strip()) seq = ''.join(seq) if not seq: print blastDb, accession raise NotFoundException() if strand == '-': seq = sequence.reverseComplement(seq) return header, seq
def _get_reverse_bases(self): if self._reverse is None: self._reverse = sequence.reverseComplement(self.get_bases()) return self._reverse
#!/usr/bin/env python """ stops.py Author: Tony Papenfuss Date: Mon Apr 17 19:04:42 EST 2006 """ import os, sys import sequence stops = ['TAA', 'TGA', 'TAG', 'taa', 'tga', 'tag'] for stop in stops: print stop, sequence.reverseComplement(stop)
i = 0 writer = fasta.MfaWriter('ORFs.fa') filename = sys.argv[1] header,dna = fasta.load(filename) header = header.strip() orfIter = sequence.extractOrfsIter(dna, minLen=minLen, pattern=pattern) for i,gStart,gEnd,orf in orfIter: h = '%s.%i.%i-%i Length=%i' % (header,i,gStart,gEnd,len(orf)) writer.write(h, orf) fasta.pretty(h, orf) if gStart<gEnd: s = dna[gStart-1:gEnd] print gStart, gEnd, len(s), len(s) % 3==0 print sequence.codons(s, remainder=True) print sequence.translate(s) else: gStart,gEnd = gEnd,gStart s = dna[gStart-1:gEnd] s = sequence.reverseComplement(s) print gStart, gEnd, len(s), len(s) % 3==0 print sequence.codons(s, remainder=True) print sequence.translate(s) print writer.close()
import bacillussubtilis168 as bs import sequence import unittest forward = bs.bases reverse = sequence.reverseComplement(forward) def find_once(sear, seq): f, r = seq.find(sear), seq.rfind(sear) if f == r: return f else: return 'No!' class PetersTest(unittest.TestCase): def setUp(self): pass def test_length(self): self.assertEqual(len(forward), len(reverse)) def test_find_once(self): self.assertEqual('No!', find_once('A', forward))
length += coord[1] - coord[0] + 1 if gene.strand == '-': frame = str(swap((length-1)%3)) if (coord[0] == gene.min) and (coord[1] == gene.max): split.addFeature(Feature(gene.ref+'_inter', gene.source, ['inter'], [[prev+1, coord[0]-1]], gene.score, '+', '.', gene.name)) split.addFeature(Feature(gene.ref+'_esing', gene.source, ['esing'], [coord], gene.score, gene.strand, frame, gene.name)) elif coord[0] == gene.min: split.addFeature(Feature(gene.ref+'_inter', gene.source, ['inter'], [[prev+1, coord[0]-1]], gene.score, '+', '.', gene.name)) split.addFeature(Feature(gene.ref+'_'+forward, gene.source, [forward], [coord], gene.score, gene.strand, frame, gene.name)) elif coord[1] == gene.max: split.addFeature(Feature(gene.ref+'_intrn', gene.source, ['intrn'], [[prev+1, coord[0]-1]], gene.score, gene.strand, '.', gene.name)) split.addFeature(Feature(gene.ref+'_'+backward, gene.source, [backward], [coord], gene.score, gene.strand, frame, gene.name)) else: split.addFeature(Feature(gene.ref+'_intrn', gene.source, ['intrn'], [[prev+1, coord[0]-1]], gene.score, gene.strand, '.', gene.name)) split.addFeature(Feature(gene.ref+'_eintn', gene.source, ['eintn'], [coord], gene.score, gene.strand, frame, gene.name)) prev = coord[1] split.addFeature(Feature(gene.ref+'_inter', gene.source, ['inter'], [[prev+1, endref]], gene.score, '+', '.', gene.name)) for typeref in split: type = typeref.split('_')[-1] if outfiles.has_key(type): split.writeGff(outfiles[type][0], typeref) for generef in split[typeref]: gene = split[typeref][generef] for a in range(len(gene.coords)): start = gene.coords[a][0] end = gene.coords[a][1] ref = gene.ref strand = gene.strand outhead = gene.ref + ':' + str(start) + ':' + str(end) + ':' + strand + ':' + gene.frame[a] outseq = seq[start-startref:end-startref+1] if strand == '-': outseq = reverseComplement(outseq) outfiles[gene.type[0]][1].write(outhead, outseq)
def selfSW(s): """Return the self-alignment, i.e. the sequence to the reverse complement sequence. The better the alignment the more secondary structure. """ return SW(s,sequence.reverseComplement(s))
def blastEnergy(qrySeq,subjSeq,findGaps=True,debug=False): """Return the energy for the association of the query and subject sequences. The gaps are found by SW algnment. """ if debug: dbOut=file('beDebug.log','w') dbOut=sys.stdout if findGaps: swAlign,swScore = SW(qrySeq,subjSeq) #debug=False qrySw,subjSw = [line.strip().replace(' ','x') for line in swAlign.split('\n')] if 'x' in qrySw or 'x' in subjSw : debug=True if debug: print >> dbOut, "\ninput:" print >> dbOut, qrySeq print >> dbOut, subjSeq print >> dbOut, "SW:" print >> dbOut, qrySw print >> dbOut, subjSw print >> dbOut, swScore # add x to right end if necessary # this should not happen qrySw=qrySw.rstrip('.') subjSw=subjSw.rstrip('.') sizeDiff = len(qrySw)-len(subjSw) if sizeDiff > 0: subjSw+=['x']*sizeDiff elif sizeDiff < 0: qrySw+=['x']*abs(sizeDiff) qrySw=list(qrySw) subjSw=list(subjSw) #put mismatches back in for i in range(len(qrySw)): if qrySw[i] == '.': #print >> dbOut, qrySw[:i].count('x')] qrySw[i]=qrySeq[i-qrySw[:i].count('x')] for i in range(len(subjSw)): if subjSw[i] == '.': subjSw[i]=subjSeq[i-subjSw[:i].count('x')] # go back to strings qrySw=''.join(qrySw) subjSw=''.join(subjSw) if debug: print >> dbOut, "e-mangle:" print >> dbOut, qrySw print >> dbOut, subjSw else: qrySw=qrySw.replace('U','T') subjSw=subjSw.replace('U','T') qrySw=qrySw subjSw=subjSeq try: e= energy(qrySw,sequence.reverseComplement(subjSw)) except: print ("""energy calculation failed: Query:\t%s\t%s Subj:\t%s\t%s """ % (qrySeq,qrySw,subjSeq,subjSw)) raise return e
def getSequenceFromString(self, seq): s = seq[self.sStart-1:self.sEnd] if self.strand=='-': s = sequence.reverseComplement(s) return s
i = 0 writer = fasta.MfaWriter('ORFs.fa') filename = sys.argv[1] header, dna = fasta.load(filename) header = header.strip() orfIter = sequence.extractOrfsIter(dna, minLen=minLen, pattern=pattern) for i, gStart, gEnd, orf in orfIter: h = '%s.%i.%i-%i Length=%i' % (header, i, gStart, gEnd, len(orf)) writer.write(h, orf) fasta.pretty(h, orf) if gStart < gEnd: s = dna[gStart - 1:gEnd] print gStart, gEnd, len(s), len(s) % 3 == 0 print sequence.codons(s, remainder=True) print sequence.translate(s) else: gStart, gEnd = gEnd, gStart s = dna[gStart - 1:gEnd] s = sequence.reverseComplement(s) print gStart, gEnd, len(s), len(s) % 3 == 0 print sequence.codons(s, remainder=True) print sequence.translate(s) print writer.close()
if False: domains = hmmer4.load_domains('hmmer/6frames.txt') for d in domains: p = hmmer4.parseSixFrameHeader(d.accession) print d print p.name, p.frame gStart,gEnd,strand = hmmer4.convert6FrameToGenomic(d.sStart,d.sEnd,p.frame,L) print gStart,gEnd,strand if strand=='+': dna = s[gStart-1:gEnd] print len(dna), len(dna) % 3==0 print sequence.codons(dna, remainder=True) print sequence.translate(dna) else: gStart,gEnd = gEnd,gStart dna = sequence.reverseComplement(s[gStart-1:gEnd]) print len(dna), len(dna) % 3==0 print sequence.codons(dna, remainder=True) print sequence.translate(dna) print else: domains = hmmer4.load_domains('hmmer/ORFs.txt') for d in domains: o = hmmer4.parseOrfHeader(d.accession) print d print o gStart,gEnd = hmmer4.convertOrfToGenomic(d.sStart,d.sEnd,o.strand,o.start) if o.strand=='+': dna = s[gStart-1:gEnd] else: gStart,gEnd = gEnd,gStart
def getSequenceFromString(self, seq): s = seq[self.sStart - 1:self.sEnd] if self.strand == '-': s = sequence.reverseComplement(s) return s
#!/usr/bin/env python """ stops.py Author: Tony Papenfuss Date: Mon Apr 17 19:04:42 EST 2006 """ import os, sys import sequence stops = ['TAA','TGA','TAG','taa','tga','tag'] for stop in stops: print stop, sequence.reverseComplement(stop)
Date: Tue Aug 22 20:14:57 EST 2006 """ import os, sys import fasta, sequence header,seq = fasta.load('NKC.fa') orfIterator = fasta.load_iter('ORFs.fa') writer = fasta.MfaWriter('ORFs2.fa') for h,orf in orfIterator: chrom,block,orfId,limits = h.split()[0].split('.') start,end = limits.split('-') start = int(start) end = int(end) if start>end: strand = '-' start,end = end,start s = sequence.translate(sequence.reverseComplement(seq[start-1:end])) else: strand = '+' s = sequence.translate(seq[start-1:end]) if s!=orf: print h writer.write(h,s + '\n') writer.close()