def extractOrfsIter(seq, minLen=20, pattern='\*|X{200,}', verbose=False): """Returns an ORF extracting iterator @param seq: DNA sequence @param minLen: Minimum ORF length (default=20) @param pattern: Stop and break pattern (default="*|X{200,}") @return: Iterator yielding (i,gStart,gEnd,ORF) """ L = len(seq) regex = re.compile(pattern) i = 0 sixFrameIter = sixFrameTranslationIter(seq) for frame,p in sixFrameIter: if verbose: print frame matchIter = regex.finditer(p) # As though there is a stop to the left of 0 start = -1 for match in matchIter: end = match.start() orf = p[start+1:end] if len(orf)>=minLen: i += 1 gStart,gEnd = hmmer.convertSixFrameToGenomic(start+2, end, frame, L) yield i, gStart, gEnd, orf start = copy.copy(end) # As though there is a stop to the right of len(p)-1 end = len(p) orf = p[start+1:end] if len(orf)>=minLen: i += 1 gStart,gEnd = hmmer.convertSixFrameToGenomic(start+2, end, frame, L) yield i, gStart, gEnd, orf
frame = hmmer.hmmer2frame[hmmerFrame] print >> sys.stderr, 'Frame:', frame if frame>0: strand = '+' else: strand = '-' matchIter = pattern.finditer(p) match = matchIter.next() start = match.start() for match in matchIter: end = match.start() orf = p[start+1:end] length = len(orf) if length>=minLen: gStart,gEnd = hmmer.convertSixFrameToGenomic(start+2, end, frame, L) i += 1 h = '%s.%i.%i-%i length %i' % (hstart, i, gStart, gEnd, length) writer.write(h, orf + '\n') # if strand=='+': # print orf # print [x for x in sequence.codonIterator(seq[gStart-1:gEnd], remainder=True)] # print sequence.translate(seq[gStart-1:gEnd]) # else: # start,end = gEnd,gStart # print orf # print [x for x in sequence.codonIterator(sequence.reverseComplement(seq[start-1:end]), remainder=True)] # print sequence.translate(sequence.reverseComplement(seq[start-1:end])) # print
frame = hmmer.hmmer2frame[hmmerFrame] print >> sys.stderr, 'Frame:', frame if frame > 0: strand = '+' else: strand = '-' matchIter = pattern.finditer(p) match = matchIter.next() start = match.start() for match in matchIter: end = match.start() orf = p[start + 1:end] length = len(orf) if length >= minLen: gStart, gEnd = hmmer.convertSixFrameToGenomic( start + 2, end, frame, L) i += 1 h = '%s.%i.%i-%i length %i' % (hstart, i, gStart, gEnd, length) writer.write(h, orf + '\n') # if strand=='+': # print orf # print [x for x in sequence.codonIterator(seq[gStart-1:gEnd], remainder=True)] # print sequence.translate(seq[gStart-1:gEnd]) # else: # start,end = gEnd,gStart # print orf # print [x for x in sequence.codonIterator(sequence.reverseComplement(seq[start-1:end]), remainder=True)] # print sequence.translate(sequence.reverseComplement(seq[start-1:end])) # print