コード例 #1
0
ファイル: GenePredictionReader.py プロジェクト: hsiaut/EDSSI
def ExtendNonMetORFs(predictions, DNAseq):
	for p in predictions:
		if (p.sequence[0] != 'M' and p.sequence[0] != 'V'): #starting amino acid is not M
			#extend upstream in DNAseq until Met found
			nextAA = ""
			i=0
			while(nextAA != 'M'):
				nextcodon = ''
				if p.strand == '+':
					if p.start-i-3 < 0:
						break
					nextcodon = DNAseq[p.start-i-3 : p.start - i]
				else:
					if p.stop + i + 3 > len(DNAseq):
						break
					nextcodon = DNAseq[p.stop + i : p.stop + i + 3]
					nextcodon= CommonDNA.reverseComplement(nextcodon)
                                nextAA = CommonDNA.translate(nextcodon)
				if(nextAA == "_"):
					break
				i+=3
				p.sequence = nextAA + p.sequence
			if p.strand == "+":
				p.start = p.start-i-3
			else:
				p.stop = p.stop + i + 3
	return predictions
コード例 #2
0
ファイル: GenePredictionReader.py プロジェクト: hsiaut/EDSSI
def ExtendTruncatedORFs(predictions, DNAseq):
	#takes in a gene prediction, along with its homologs, and sees if there is an upstream sequence in the DNA that matches
	for p in predictions:
		if len(p.homologs) < 1:
			#skip if no homologs
			continue
		else:
			firstTenAA = ""
			try:
				firstTenAA = p.sequence[0:10]
			except IndexError:
				continue
			else:
				#find first ten AA in other homologs
				for h in p.homologs:
					offset = h.sequence.find(firstTenAA)
					if h.sequence.find(firstTenAA) > 1:
						#find extra sequence in our DNA input
						inputMatch = ""
						if p.strand == "+":
							inputMatch = DNAseq[p.start: p.start - offset*3]
						else:
							inputMatch = DNAseq[p.stop: p.stop + offset*3]
							inputMatch = CommonDNA.reverseComplement(inputMatch)
						
	return predictions