예제 #1
0
파일: blast.py 프로젝트: SiriusShiu/Mungo
def getSequence(blastDb, accession, start=0, end=0, strand='+', padding=0, debug=False):
    """Load a sequence from a BLAST database.
    
    @param blastDb: BLAST database
    @param accession: Accession name
    @param start: Start coordinate (Default: 0, extract from start of sequence)
    @param end: End coordinate (Default: 0, extract to the end of sequence)
    @param strand: Strand (Default: '+')
    @param padding: Sequence padding (Default: 0)
    @returns: (header,seq)
    """
    if start>end: start,end = end,start
    
    cmd = 'fastacmd -d %s -s "%s" -L %i,%i' % (blastDb,accession,start,end)
    if debug: print cmd
    p = os.popen(cmd)
    header = p.readline()[1:].strip()
    if not header:
        raise Exception('BLAST failure')
    seq = []
    for line in p:
        seq.append(line.strip())
    seq = ''.join(seq)
    
    if not seq:
        print blastDb, accession
        raise NotFoundException()
    
    if strand=='-':
        seq = sequence.reverseComplement(seq)
    
    return header,seq
예제 #2
0
 def projectOntoString(self, seq):
     extracted = []
     for tStart,blockSize in zip(self.tStarts, self.blockSizes):
         tEnd = tStart+3*blockSize-1
         if self.strand=='+':
             s = seq[tStart:tEnd+1]
         else:
             s = sequence.reverseComplement(seq)[tStart:tEnd+1]
         extracted.append(s)
     return extracted
예제 #3
0
 def projectOntoString(self, seq):
     extracted = []
     for tStart, blockSize in zip(self.tStarts, self.blockSizes):
         tEnd = tStart + 3 * blockSize - 1
         if self.strand == '+':
             s = seq[tStart:tEnd + 1]
         else:
             s = sequence.reverseComplement(seq)[tStart:tEnd + 1]
         extracted.append(s)
     return extracted
예제 #4
0
파일: extractsensor.py 프로젝트: mrG7/tgene
def writeSeq(ref, seq, pos, type, strand):
    preoffset, postoffset = offsets[type]
    ntype = type
    if strand == '-':
        if ntype == 'ATG': type = 'TGA'
        elif ntype == 'TGA': type = 'ATG'
        elif ntype == 'GT': type = 'AG'
        else: type = 'GT'
        postoffset += 2
    else:
        preoffset += 2
    retseq = seq[pos-preoffset:pos+postoffset]
    if strand == '-': retseq = reverseComplement(retseq)
    head = ref + ':' + str(pos-preoffset) + ':' + str(pos+postoffset-1) + ':' + strand
    outfiles[type].write(head, retseq) 
예제 #5
0
파일: blast.py 프로젝트: PapenfussLab/Mungo
def getSequence(blastDb,
                accession,
                start=0,
                end=0,
                strand='+',
                padding=0,
                debug=False):
    """Load a sequence from a BLAST database.
    
    @param blastDb: BLAST database
    @param accession: Accession name
    @param start: Start coordinate (Default: 0, extract from start of sequence)
    @param end: End coordinate (Default: 0, extract to the end of sequence)
    @param strand: Strand (Default: '+')
    @param padding: Sequence padding (Default: 0)
    @returns: (header,seq)
    """
    if start > end: start, end = end, start

    cmd = 'fastacmd -d %s -s "%s" -L %i,%i' % (blastDb, accession, start, end)
    if debug: print cmd
    p = os.popen(cmd)
    header = p.readline()[1:].strip()
    if not header:
        raise Exception('BLAST failure')
    seq = []
    for line in p:
        seq.append(line.strip())
    seq = ''.join(seq)

    if not seq:
        print blastDb, accession
        raise NotFoundException()

    if strand == '-':
        seq = sequence.reverseComplement(seq)

    return header, seq
예제 #6
0
 def _get_reverse_bases(self):
     if self._reverse is None:
         self._reverse = sequence.reverseComplement(self.get_bases())
     return self._reverse
예제 #7
0
#!/usr/bin/env python
"""
stops.py

Author: Tony Papenfuss
Date: Mon Apr 17 19:04:42 EST 2006

"""

import os, sys
import sequence

stops = ['TAA', 'TGA', 'TAG', 'taa', 'tga', 'tag']
for stop in stops:
    print stop, sequence.reverseComplement(stop)
예제 #8
0
i = 0
writer = fasta.MfaWriter('ORFs.fa')

filename = sys.argv[1]
header,dna = fasta.load(filename)
header = header.strip()

orfIter = sequence.extractOrfsIter(dna, minLen=minLen, pattern=pattern)
for i,gStart,gEnd,orf in orfIter:
    h = '%s.%i.%i-%i  Length=%i' % (header,i,gStart,gEnd,len(orf))
    writer.write(h, orf)
    
    fasta.pretty(h, orf)
    
    if gStart<gEnd:
        s = dna[gStart-1:gEnd]
        print gStart, gEnd, len(s), len(s) % 3==0
        print sequence.codons(s, remainder=True)
        print sequence.translate(s)
    else:
        gStart,gEnd = gEnd,gStart
        s = dna[gStart-1:gEnd]
        s = sequence.reverseComplement(s)
        print gStart, gEnd, len(s), len(s) % 3==0
        print sequence.codons(s, remainder=True)
        print sequence.translate(s)
        
    print
writer.close()
예제 #9
0
import bacillussubtilis168 as bs
import sequence
import unittest



forward = bs.bases
reverse = sequence.reverseComplement(forward)



def find_once(sear, seq):
    f, r = seq.find(sear), seq.rfind(sear)
    if f == r:
        return f
    else:
        return 'No!'
    
    
    
class PetersTest(unittest.TestCase):
    
    def setUp(self):
        pass
    
    def test_length(self):
        self.assertEqual(len(forward), len(reverse))
        
    def test_find_once(self):
        self.assertEqual('No!', find_once('A', forward))
    
예제 #10
0
         length += coord[1] - coord[0] + 1
         if gene.strand == '-': frame = str(swap((length-1)%3))
         if (coord[0] == gene.min) and (coord[1] == gene.max):
             split.addFeature(Feature(gene.ref+'_inter', gene.source, ['inter'], [[prev+1, coord[0]-1]], gene.score, '+', '.', gene.name))
             split.addFeature(Feature(gene.ref+'_esing', gene.source, ['esing'], [coord], gene.score, gene.strand, frame, gene.name))
         elif coord[0] == gene.min:
             split.addFeature(Feature(gene.ref+'_inter', gene.source, ['inter'], [[prev+1, coord[0]-1]], gene.score, '+', '.', gene.name))
             split.addFeature(Feature(gene.ref+'_'+forward, gene.source, [forward], [coord], gene.score, gene.strand, frame, gene.name))
         elif coord[1] == gene.max:
             split.addFeature(Feature(gene.ref+'_intrn', gene.source, ['intrn'], [[prev+1, coord[0]-1]], gene.score, gene.strand, '.', gene.name))
             split.addFeature(Feature(gene.ref+'_'+backward, gene.source, [backward], [coord], gene.score, gene.strand, frame, gene.name))
         else:
             split.addFeature(Feature(gene.ref+'_intrn', gene.source, ['intrn'], [[prev+1, coord[0]-1]], gene.score, gene.strand, '.', gene.name))
             split.addFeature(Feature(gene.ref+'_eintn', gene.source, ['eintn'], [coord], gene.score, gene.strand, frame, gene.name))
         prev = coord[1]
 split.addFeature(Feature(gene.ref+'_inter', gene.source, ['inter'], [[prev+1, endref]], gene.score, '+', '.', gene.name))
 for typeref in split:
     type = typeref.split('_')[-1]
     if outfiles.has_key(type):
         split.writeGff(outfiles[type][0], typeref)
         for generef in split[typeref]:
             gene = split[typeref][generef]
             for a in range(len(gene.coords)):
                 start = gene.coords[a][0]
                 end = gene.coords[a][1]
                 ref = gene.ref
                 strand = gene.strand
                 outhead = gene.ref + ':' + str(start) + ':' + str(end) + ':' + strand + ':' + gene.frame[a]
                 outseq = seq[start-startref:end-startref+1]
                 if strand == '-': outseq = reverseComplement(outseq)
                 outfiles[gene.type[0]][1].write(outhead, outseq)
예제 #11
0
def selfSW(s):
    """Return the self-alignment, i.e. the sequence to the
    reverse complement sequence.
    The better the alignment the more secondary structure.
    """
    return SW(s,sequence.reverseComplement(s))
예제 #12
0
def blastEnergy(qrySeq,subjSeq,findGaps=True,debug=False):
    """Return the energy for the association of the query and subject
    sequences.  The gaps are found by SW algnment.  
    """
    if debug:
        dbOut=file('beDebug.log','w')

    dbOut=sys.stdout

    if findGaps:
        swAlign,swScore = SW(qrySeq,subjSeq)

        #debug=False

        qrySw,subjSw = [line.strip().replace(' ','x')
                        for line in swAlign.split('\n')]

        if 'x' in qrySw or 'x' in subjSw :
            debug=True

        if debug:
            print >> dbOut,  "\ninput:"
            print >> dbOut,  qrySeq
            print >> dbOut,  subjSeq

            print >> dbOut,  "SW:"
            print >> dbOut,  qrySw
            print >> dbOut,  subjSw
            print >> dbOut,  swScore


        # add x to right end if necessary
        # this should not happen
        qrySw=qrySw.rstrip('.')
        subjSw=subjSw.rstrip('.')
        sizeDiff = len(qrySw)-len(subjSw)
        if sizeDiff > 0:
            subjSw+=['x']*sizeDiff
        elif sizeDiff < 0:
            qrySw+=['x']*abs(sizeDiff)

        qrySw=list(qrySw)
        subjSw=list(subjSw)

        #put mismatches back in
        for i in range(len(qrySw)):
            if qrySw[i] == '.':
                #print >> dbOut,  qrySw[:i].count('x')]
                qrySw[i]=qrySeq[i-qrySw[:i].count('x')]
        for i in range(len(subjSw)):
            if subjSw[i] == '.':
                subjSw[i]=subjSeq[i-subjSw[:i].count('x')]

        # go back to strings
        qrySw=''.join(qrySw)
        subjSw=''.join(subjSw)

        if debug:
            print >> dbOut,  "e-mangle:"
            print >> dbOut,  qrySw
            print >> dbOut,  subjSw

    else:
        qrySw=qrySw.replace('U','T')
        subjSw=subjSw.replace('U','T')
        qrySw=qrySw
        subjSw=subjSeq

    
    try:
        e= energy(qrySw,sequence.reverseComplement(subjSw))
    except:
        print ("""energy calculation failed:

        Query:\t%s\t%s
        Subj:\t%s\t%s
        """ % (qrySeq,qrySw,subjSeq,subjSw))

        raise
        
    return e
예제 #13
0
파일: hmmer.py 프로젝트: SiriusShiu/Mungo
 def getSequenceFromString(self, seq):
     s = seq[self.sStart-1:self.sEnd]
     if self.strand=='-':
         s = sequence.reverseComplement(s)
     return s
예제 #14
0
i = 0
writer = fasta.MfaWriter('ORFs.fa')

filename = sys.argv[1]
header, dna = fasta.load(filename)
header = header.strip()

orfIter = sequence.extractOrfsIter(dna, minLen=minLen, pattern=pattern)
for i, gStart, gEnd, orf in orfIter:
    h = '%s.%i.%i-%i  Length=%i' % (header, i, gStart, gEnd, len(orf))
    writer.write(h, orf)

    fasta.pretty(h, orf)

    if gStart < gEnd:
        s = dna[gStart - 1:gEnd]
        print gStart, gEnd, len(s), len(s) % 3 == 0
        print sequence.codons(s, remainder=True)
        print sequence.translate(s)
    else:
        gStart, gEnd = gEnd, gStart
        s = dna[gStart - 1:gEnd]
        s = sequence.reverseComplement(s)
        print gStart, gEnd, len(s), len(s) % 3 == 0
        print sequence.codons(s, remainder=True)
        print sequence.translate(s)

    print
writer.close()
예제 #15
0
if False:
    domains = hmmer4.load_domains('hmmer/6frames.txt')
    for d in domains:
        p = hmmer4.parseSixFrameHeader(d.accession)
        print d
        print p.name, p.frame
        gStart,gEnd,strand = hmmer4.convert6FrameToGenomic(d.sStart,d.sEnd,p.frame,L)
        print gStart,gEnd,strand
        if strand=='+':
            dna = s[gStart-1:gEnd]
            print len(dna), len(dna) % 3==0
            print sequence.codons(dna, remainder=True)
            print sequence.translate(dna)
        else:
            gStart,gEnd = gEnd,gStart
            dna = sequence.reverseComplement(s[gStart-1:gEnd])
            print len(dna), len(dna) % 3==0
            print sequence.codons(dna, remainder=True)
            print sequence.translate(dna)
        print
else:
    domains = hmmer4.load_domains('hmmer/ORFs.txt')
    for d in domains:
        o = hmmer4.parseOrfHeader(d.accession)
        print d
        print o
        gStart,gEnd = hmmer4.convertOrfToGenomic(d.sStart,d.sEnd,o.strand,o.start)
        if o.strand=='+':
            dna = s[gStart-1:gEnd]
        else:
            gStart,gEnd = gEnd,gStart
예제 #16
0
파일: hmmer.py 프로젝트: PapenfussLab/Mungo
 def getSequenceFromString(self, seq):
     s = seq[self.sStart - 1:self.sEnd]
     if self.strand == '-':
         s = sequence.reverseComplement(s)
     return s
예제 #17
0
파일: stops.py 프로젝트: SiriusShiu/Mungo
#!/usr/bin/env python

"""
stops.py

Author: Tony Papenfuss
Date: Mon Apr 17 19:04:42 EST 2006

"""

import os, sys
import sequence


stops = ['TAA','TGA','TAG','taa','tga','tag']
for stop in stops:
    print stop, sequence.reverseComplement(stop)
    
예제 #18
0
Date: Tue Aug 22 20:14:57 EST 2006

"""

import os, sys
import fasta, sequence


header,seq = fasta.load('NKC.fa')
orfIterator = fasta.load_iter('ORFs.fa')
writer = fasta.MfaWriter('ORFs2.fa')

for h,orf in orfIterator:
    chrom,block,orfId,limits = h.split()[0].split('.')
    start,end = limits.split('-')
    start = int(start)
    end = int(end)
    
    if start>end:
        strand = '-'
        start,end = end,start
        s = sequence.translate(sequence.reverseComplement(seq[start-1:end]))
    else:
        strand = '+'
        s = sequence.translate(seq[start-1:end])
    
    if s!=orf: print h
    
    writer.write(h,s + '\n')
writer.close()