Esempio n. 1
0
def getX4Codons(pssm,dna):
   std_nt = CodonTable.unambiguous_dna_by_name["Standard"]  # create normal codon table object
   nonstd = IUPACData.ambiguous_dna_values                  # create list of all ambiguous DNA values, includes normal bases
   aa_trans = []
   for i in range(0,len(dna),3):
      codon = dna.tostring()[i:i+3]
      # For each ambiguous (or not) codon, returns list of all posible translations
      aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) 
      aa_trans.append(aa)
      
   prot =""
   pos = 0
   final_score=0
   print "xxxxxxxxxxxxxxxxxx getX4Codons xxxxxxxx"
   for x in aa_trans:
      if len(x) == 1: 
         prot = prot+x[0]
         final_score = final_score + pssm[pos][x[0]]
         print x[0]+" = ",pssm[pos][x[0]]
      else:
         c = {}
         for y in x:
            score = pssm[pos][y]
            c[score] = y
         best = c.keys()
         best.sort()
         prot = prot + c[best[len(best)-1]]
         final_score = final_score + best[len(best)-1]
         print c[best[len(best)-1]] + " = " +  str(best[len(best)-1])
      pos = pos + 1
   print "----------- end ---------"
   return (prot,final_score)
Esempio n. 2
0
def main():
   bases = ['T','C','A','G']
   ambig_bases = ['R','Y','S','W','K','M','D','H','B','V','N']
   codons = [a+b+c for a in bases for b in bases for c in bases]
   amino_acids = "F F L L S S S S Y Y stop stop C C stop W L L L L P P P P H H Q Q R R R R I I I M T T T T N N K K S S R R V V V V A A A A D D E E G G G G".split(' ')
   codon_table = dict(zip(codons, amino_acids))
   nonstd = IUPACData.ambiguous_dna_values                  # create list of all ambiguous DNA values, includes normal bases
   std_nt = CodonTable.unambiguous_dna_by_name["Standard"]  # create normal codon table object
   
   all_ambig_trip = []
   for s in list(itertools.product(*[ambig_bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[ambig_bases,bases,ambig_bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[ambig_bases,ambig_bases,bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[ambig_bases,bases,bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[bases,ambig_bases,bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[bases,bases,ambig_bases])): all_ambig_trip.append(s)
   
   k = codon_table.keys()
   k.sort()
   for b in k:
      print "codon_table:",b," aa:",codon_table[b]
      
   stop_set ={}  # store final results of ambig codon and all possible translations minus the potential stop
   for trip in all_ambig_trip:
      stop_seen = False
      ambig = "".join(trip)
#      print "Ambig = ",ambig
      stop_set[ambig] = []
      for codon in list(itertools.product(*[ list(nonstd[trip[0]]), list(nonstd[trip[1]]), list(nonstd[trip[2]]) ])):  # translate ambig into all possible real combinations
         codon = "".join(codon)
         aa = codon_table[codon]
#         print "\tcodon: ",codon," aa: ",aa
         if aa == 'stop': stop_seen = True
         else: stop_set[ambig].append(aa)
      if stop_seen is False: 
         del(stop_set[ambig])
         try:
            CodonTable.list_possible_proteins(ambig,std_nt.forward_table,nonstd)
         except KeyError, err:
            print "Sanity check failed for: ",ambig," with Key error. stop_seen is ",str(stop_seen)," ",err
         except CodonTable.TranslationError, TransError:
            print "Sanity check failed for: ",ambig," with Translation error. stop_seen is ",str(stop_seen)," ",TransError
Esempio n. 3
0
 def generateProtFromAmbiguousDNA(self, s):
     standard_nucleotide = CodonTable.unambiguous_dna_by_name["Standard"]
     non_standard_nucleotide = IUPACData.ambiguous_dna_values
     aaTranslations = []
     for i in range(0,len(s),3):
         codon = s.tostring()[i:i+3]
         # list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values)
         if codon.count('-') == 3:
             aa = ['-']
         elif codon.count('-') == 1 and codon.index('-') == 2:
             if codon[0] == 'U':
                 # UC- : Serine (S)
                 if codon [1] == 'C':
                     aa = ['S']
             elif codon[0] == 'C':
                 # CU- : Leucine (L)
                 if codon[1] == 'U':
                     aa = ['L']
                 # CC- : Proline (P)
                 elif codon[1] == 'C':
                     aa = ['P']
                 # CG- : Arginine (R)
                 elif codon[1] == 'G':
                     aa = ['R']
             elif codon[0] == 'A':
                 # AC- : Threonine (T)
                 if codon[1] == 'C':
                     aa = ['T']
             elif codon[0] == 'G':
                 # GU- : Valine (V)
                 if codon[1] == 'U':
                     aa = ['V']
                 # GC- : Alanine (A)
                 elif codon[1] == 'C':
                     aa = ['A']
                 # GG- : Glycine (G)
                 elif codon[1] == 'G':
                     aa = ['G']
         elif codon.count('-') < 3 and codon.count('-') > 0:
             aa = ['X']
         else:
             try:
                 aa = CodonTable.list_possible_proteins(codon,standard_nucleotide.forward_table,non_standard_nucleotide)
             except:
                 aa = ['X']
         aaTranslations.append(aa)
     return aaTranslations
Esempio n. 4
0
def generateProtFromAmbiguousDNA(s):
   std_nt = CodonTable.unambiguous_dna_by_name["Standard"]  # create normal codon table object
   nonstd = IUPACData.ambiguous_dna_values                  # create list of all ambiguous DNA values, includes normal bases
   aa_trans = []
   for i in range(0,len(s),3):
      codon = s.tostring()[i:i+3]
      # For each ambiguous (or not) codon, returns list of all posible translations
      aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) 
      aa_trans.append(aa)
      
   # Now have a list of format [ [a], [b,c], [d], [e,f,g], etc ]
   # this function creates a list of tuples containing all possible ordered combinations like
   # [(a,b,d,e), (a,b,d,f), (a,b,d,g), (a,c,d,e), (a,c,d,f), (a,c,d,g)]
   proteins = list(itertools.product(*aa_trans))
   possible_proteins = []
   for x in proteins:
      possible_proteins.append("".join(x))
   return possible_proteins