def ExtendNonMetORFs(predictions, DNAseq): for p in predictions: if (p.sequence[0] != 'M' and p.sequence[0] != 'V'): #starting amino acid is not M #extend upstream in DNAseq until Met found nextAA = "" i=0 while(nextAA != 'M'): nextcodon = '' if p.strand == '+': if p.start-i-3 < 0: break nextcodon = DNAseq[p.start-i-3 : p.start - i] else: if p.stop + i + 3 > len(DNAseq): break nextcodon = DNAseq[p.stop + i : p.stop + i + 3] nextcodon= CommonDNA.reverseComplement(nextcodon) nextAA = CommonDNA.translate(nextcodon) if(nextAA == "_"): break i+=3 p.sequence = nextAA + p.sequence if p.strand == "+": p.start = p.start-i-3 else: p.stop = p.stop + i + 3 return predictions
def ExtendTruncatedORFs(predictions, DNAseq): #takes in a gene prediction, along with its homologs, and sees if there is an upstream sequence in the DNA that matches for p in predictions: if len(p.homologs) < 1: #skip if no homologs continue else: firstTenAA = "" try: firstTenAA = p.sequence[0:10] except IndexError: continue else: #find first ten AA in other homologs for h in p.homologs: offset = h.sequence.find(firstTenAA) if h.sequence.find(firstTenAA) > 1: #find extra sequence in our DNA input inputMatch = "" if p.strand == "+": inputMatch = DNAseq[p.start: p.start - offset*3] else: inputMatch = DNAseq[p.stop: p.stop + offset*3] inputMatch = CommonDNA.reverseComplement(inputMatch) return predictions