def getX4Codons(pssm,dna): std_nt = CodonTable.unambiguous_dna_by_name["Standard"] # create normal codon table object nonstd = IUPACData.ambiguous_dna_values # create list of all ambiguous DNA values, includes normal bases aa_trans = [] for i in range(0,len(dna),3): codon = dna.tostring()[i:i+3] # For each ambiguous (or not) codon, returns list of all posible translations aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) aa_trans.append(aa) prot ="" pos = 0 final_score=0 print "xxxxxxxxxxxxxxxxxx getX4Codons xxxxxxxx" for x in aa_trans: if len(x) == 1: prot = prot+x[0] final_score = final_score + pssm[pos][x[0]] print x[0]+" = ",pssm[pos][x[0]] else: c = {} for y in x: score = pssm[pos][y] c[score] = y best = c.keys() best.sort() prot = prot + c[best[len(best)-1]] final_score = final_score + best[len(best)-1] print c[best[len(best)-1]] + " = " + str(best[len(best)-1]) pos = pos + 1 print "----------- end ---------" return (prot,final_score)
def main(): bases = ['T','C','A','G'] ambig_bases = ['R','Y','S','W','K','M','D','H','B','V','N'] codons = [a+b+c for a in bases for b in bases for c in bases] amino_acids = "F F L L S S S S Y Y stop stop C C stop W L L L L P P P P H H Q Q R R R R I I I M T T T T N N K K S S R R V V V V A A A A D D E E G G G G".split(' ') codon_table = dict(zip(codons, amino_acids)) nonstd = IUPACData.ambiguous_dna_values # create list of all ambiguous DNA values, includes normal bases std_nt = CodonTable.unambiguous_dna_by_name["Standard"] # create normal codon table object all_ambig_trip = [] for s in list(itertools.product(*[ambig_bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[ambig_bases,bases,ambig_bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[ambig_bases,ambig_bases,bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[ambig_bases,bases,bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[bases,ambig_bases,bases])): all_ambig_trip.append(s) for s in list(itertools.product(*[bases,bases,ambig_bases])): all_ambig_trip.append(s) k = codon_table.keys() k.sort() for b in k: print "codon_table:",b," aa:",codon_table[b] stop_set ={} # store final results of ambig codon and all possible translations minus the potential stop for trip in all_ambig_trip: stop_seen = False ambig = "".join(trip) # print "Ambig = ",ambig stop_set[ambig] = [] for codon in list(itertools.product(*[ list(nonstd[trip[0]]), list(nonstd[trip[1]]), list(nonstd[trip[2]]) ])): # translate ambig into all possible real combinations codon = "".join(codon) aa = codon_table[codon] # print "\tcodon: ",codon," aa: ",aa if aa == 'stop': stop_seen = True else: stop_set[ambig].append(aa) if stop_seen is False: del(stop_set[ambig]) try: CodonTable.list_possible_proteins(ambig,std_nt.forward_table,nonstd) except KeyError, err: print "Sanity check failed for: ",ambig," with Key error. stop_seen is ",str(stop_seen)," ",err except CodonTable.TranslationError, TransError: print "Sanity check failed for: ",ambig," with Translation error. stop_seen is ",str(stop_seen)," ",TransError
def generateProtFromAmbiguousDNA(self, s): standard_nucleotide = CodonTable.unambiguous_dna_by_name["Standard"] non_standard_nucleotide = IUPACData.ambiguous_dna_values aaTranslations = [] for i in range(0,len(s),3): codon = s.tostring()[i:i+3] # list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values) if codon.count('-') == 3: aa = ['-'] elif codon.count('-') == 1 and codon.index('-') == 2: if codon[0] == 'U': # UC- : Serine (S) if codon [1] == 'C': aa = ['S'] elif codon[0] == 'C': # CU- : Leucine (L) if codon[1] == 'U': aa = ['L'] # CC- : Proline (P) elif codon[1] == 'C': aa = ['P'] # CG- : Arginine (R) elif codon[1] == 'G': aa = ['R'] elif codon[0] == 'A': # AC- : Threonine (T) if codon[1] == 'C': aa = ['T'] elif codon[0] == 'G': # GU- : Valine (V) if codon[1] == 'U': aa = ['V'] # GC- : Alanine (A) elif codon[1] == 'C': aa = ['A'] # GG- : Glycine (G) elif codon[1] == 'G': aa = ['G'] elif codon.count('-') < 3 and codon.count('-') > 0: aa = ['X'] else: try: aa = CodonTable.list_possible_proteins(codon,standard_nucleotide.forward_table,non_standard_nucleotide) except: aa = ['X'] aaTranslations.append(aa) return aaTranslations
def generateProtFromAmbiguousDNA(s): std_nt = CodonTable.unambiguous_dna_by_name["Standard"] # create normal codon table object nonstd = IUPACData.ambiguous_dna_values # create list of all ambiguous DNA values, includes normal bases aa_trans = [] for i in range(0,len(s),3): codon = s.tostring()[i:i+3] # For each ambiguous (or not) codon, returns list of all posible translations aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) aa_trans.append(aa) # Now have a list of format [ [a], [b,c], [d], [e,f,g], etc ] # this function creates a list of tuples containing all possible ordered combinations like # [(a,b,d,e), (a,b,d,f), (a,b,d,g), (a,c,d,e), (a,c,d,f), (a,c,d,g)] proteins = list(itertools.product(*aa_trans)) possible_proteins = [] for x in proteins: possible_proteins.append("".join(x)) return possible_proteins