Example #1
0
def extractOrfsIter(seq, minLen=20, pattern='\*|X{200,}', verbose=False):
    """Returns an ORF extracting iterator
    @param seq: DNA sequence
    @param minLen: Minimum ORF length (default=20)
    @param pattern: Stop and break pattern (default="*|X{200,}")
    @return: Iterator yielding (i,gStart,gEnd,ORF)
    """
    L = len(seq)
    regex = re.compile(pattern)
    
    i = 0
    sixFrameIter = sixFrameTranslationIter(seq)
    for frame,p in sixFrameIter:
        if verbose: print frame
        matchIter = regex.finditer(p)
        
        # As though there is a stop to the left of 0
        start = -1
        for match in matchIter:
            end = match.start()
            orf = p[start+1:end]
            if len(orf)>=minLen:
                i += 1
                gStart,gEnd = hmmer.convertSixFrameToGenomic(start+2, end, frame, L)
                yield i, gStart, gEnd, orf
            start = copy.copy(end)
        
        # As though there is a stop to the right of len(p)-1
        end = len(p)
        orf = p[start+1:end]
        if len(orf)>=minLen:
            i += 1
            gStart,gEnd = hmmer.convertSixFrameToGenomic(start+2, end, frame, L)
            yield i, gStart, gEnd, orf
Example #2
0
    frame = hmmer.hmmer2frame[hmmerFrame]
    print >> sys.stderr, 'Frame:', frame
    if frame>0:
        strand = '+'
    else:
        strand = '-'
    
    matchIter = pattern.finditer(p)
    match = matchIter.next()
    start = match.start()
    for match in matchIter:
        end = match.start()
        orf = p[start+1:end]
        length = len(orf)
        if length>=minLen:
            gStart,gEnd = hmmer.convertSixFrameToGenomic(start+2, end, frame, L)
            
            i += 1
            h = '%s.%i.%i-%i  length %i' % (hstart, i, gStart, gEnd, length)
            writer.write(h, orf + '\n')
            
#             if strand=='+':
#                 print orf
#                 print [x for x in sequence.codonIterator(seq[gStart-1:gEnd], remainder=True)]
#                 print sequence.translate(seq[gStart-1:gEnd])
#             else:
#                 start,end = gEnd,gStart
#                 print orf
#                 print [x for x in sequence.codonIterator(sequence.reverseComplement(seq[start-1:end]), remainder=True)]
#                 print sequence.translate(sequence.reverseComplement(seq[start-1:end]))
#             print
Example #3
0
    frame = hmmer.hmmer2frame[hmmerFrame]
    print >> sys.stderr, 'Frame:', frame
    if frame > 0:
        strand = '+'
    else:
        strand = '-'

    matchIter = pattern.finditer(p)
    match = matchIter.next()
    start = match.start()
    for match in matchIter:
        end = match.start()
        orf = p[start + 1:end]
        length = len(orf)
        if length >= minLen:
            gStart, gEnd = hmmer.convertSixFrameToGenomic(
                start + 2, end, frame, L)

            i += 1
            h = '%s.%i.%i-%i  length %i' % (hstart, i, gStart, gEnd, length)
            writer.write(h, orf + '\n')

#             if strand=='+':
#                 print orf
#                 print [x for x in sequence.codonIterator(seq[gStart-1:gEnd], remainder=True)]
#                 print sequence.translate(seq[gStart-1:gEnd])
#             else:
#                 start,end = gEnd,gStart
#                 print orf
#                 print [x for x in sequence.codonIterator(sequence.reverseComplement(seq[start-1:end]), remainder=True)]
#                 print sequence.translate(sequence.reverseComplement(seq[start-1:end]))
#             print