예제 #1
0
def getX4Codons(pssm,dna):
   std_nt = CodonTable.unambiguous_dna_by_name["Standard"]  # create normal codon table object
   nonstd = IUPACData.ambiguous_dna_values                  # create list of all ambiguous DNA values, includes normal bases
   aa_trans = []
   for i in range(0,len(dna),3):
      codon = dna.tostring()[i:i+3]
      # For each ambiguous (or not) codon, returns list of all posible translations
      aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) 
      aa_trans.append(aa)
      
   prot =""
   pos = 0
   final_score=0
   print "xxxxxxxxxxxxxxxxxx getX4Codons xxxxxxxx"
   for x in aa_trans:
      if len(x) == 1: 
         prot = prot+x[0]
         final_score = final_score + pssm[pos][x[0]]
         print x[0]+" = ",pssm[pos][x[0]]
      else:
         c = {}
         for y in x:
            score = pssm[pos][y]
            c[score] = y
         best = c.keys()
         best.sort()
         prot = prot + c[best[len(best)-1]]
         final_score = final_score + best[len(best)-1]
         print c[best[len(best)-1]] + " = " +  str(best[len(best)-1])
      pos = pos + 1
   print "----------- end ---------"
   return (prot,final_score)
예제 #2
0
def count_usage(input_file):

    standard_forward_table = CodonTable.standard_dna_table.forward_table
    counting_forward_table = LookupCountingForwardTable(standard_forward_table)
    codon_table = CodonTable.CodonTable(forward_table=counting_forward_table)

    record = FeatureParser().parse(open(input_file))

    complement = record.seq.complement()

    for feat in record.features:

        if feat.type == "gene":

            bSeq = feat.strand == 1 and record.seq or complement

            start = feat.location.start.position
            end = feat.location.end.position

            pSeq = bSeq[start:end].translate(codon_table)


#	print "Usage counts for ", input_file
#	print counting_forward_table.counts

    return counting_forward_table.counts
예제 #3
0
def makeTableX(table):
    assert table.protein_alphabet == IUPAC.extended_protein
    return CodonTable.CodonTable(
        table.nucleotide_alphabet,
        proteinX,
        MissingTable(table.forward_table),
        table.back_table,
        table.start_codons,
        table.stop_codons,
    )
예제 #4
0
def main():
   bases = ['T','C','A','G']
   ambig_bases = ['R','Y','S','W','K','M','D','H','B','V','N']
   codons = [a+b+c for a in bases for b in bases for c in bases]
   amino_acids = "F F L L S S S S Y Y stop stop C C stop W L L L L P P P P H H Q Q R R R R I I I M T T T T N N K K S S R R V V V V A A A A D D E E G G G G".split(' ')
   codon_table = dict(zip(codons, amino_acids))
   nonstd = IUPACData.ambiguous_dna_values                  # create list of all ambiguous DNA values, includes normal bases
   std_nt = CodonTable.unambiguous_dna_by_name["Standard"]  # create normal codon table object
   
   all_ambig_trip = []
   for s in list(itertools.product(*[ambig_bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[bases,ambig_bases,ambig_bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[ambig_bases,bases,ambig_bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[ambig_bases,ambig_bases,bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[ambig_bases,bases,bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[bases,ambig_bases,bases])): all_ambig_trip.append(s)
   for s in list(itertools.product(*[bases,bases,ambig_bases])): all_ambig_trip.append(s)
   
   k = codon_table.keys()
   k.sort()
   for b in k:
      print "codon_table:",b," aa:",codon_table[b]
      
   stop_set ={}  # store final results of ambig codon and all possible translations minus the potential stop
   for trip in all_ambig_trip:
      stop_seen = False
      ambig = "".join(trip)
#      print "Ambig = ",ambig
      stop_set[ambig] = []
      for codon in list(itertools.product(*[ list(nonstd[trip[0]]), list(nonstd[trip[1]]), list(nonstd[trip[2]]) ])):  # translate ambig into all possible real combinations
         codon = "".join(codon)
         aa = codon_table[codon]
#         print "\tcodon: ",codon," aa: ",aa
         if aa == 'stop': stop_seen = True
         else: stop_set[ambig].append(aa)
      if stop_seen is False: 
         del(stop_set[ambig])
         try:
            CodonTable.list_possible_proteins(ambig,std_nt.forward_table,nonstd)
         except KeyError, err:
            print "Sanity check failed for: ",ambig," with Key error. stop_seen is ",str(stop_seen)," ",err
         except CodonTable.TranslationError, TransError:
            print "Sanity check failed for: ",ambig," with Translation error. stop_seen is ",str(stop_seen)," ",TransError
예제 #5
0
 def generateProtFromAmbiguousDNA(self, s):
     standard_nucleotide = CodonTable.unambiguous_dna_by_name["Standard"]
     non_standard_nucleotide = IUPACData.ambiguous_dna_values
     aaTranslations = []
     for i in range(0,len(s),3):
         codon = s.tostring()[i:i+3]
         # list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values)
         if codon.count('-') == 3:
             aa = ['-']
         elif codon.count('-') == 1 and codon.index('-') == 2:
             if codon[0] == 'U':
                 # UC- : Serine (S)
                 if codon [1] == 'C':
                     aa = ['S']
             elif codon[0] == 'C':
                 # CU- : Leucine (L)
                 if codon[1] == 'U':
                     aa = ['L']
                 # CC- : Proline (P)
                 elif codon[1] == 'C':
                     aa = ['P']
                 # CG- : Arginine (R)
                 elif codon[1] == 'G':
                     aa = ['R']
             elif codon[0] == 'A':
                 # AC- : Threonine (T)
                 if codon[1] == 'C':
                     aa = ['T']
             elif codon[0] == 'G':
                 # GU- : Valine (V)
                 if codon[1] == 'U':
                     aa = ['V']
                 # GC- : Alanine (A)
                 elif codon[1] == 'C':
                     aa = ['A']
                 # GG- : Glycine (G)
                 elif codon[1] == 'G':
                     aa = ['G']
         elif codon.count('-') < 3 and codon.count('-') > 0:
             aa = ['X']
         else:
             try:
                 aa = CodonTable.list_possible_proteins(codon,standard_nucleotide.forward_table,non_standard_nucleotide)
             except:
                 aa = ['X']
         aaTranslations.append(aa)
     return aaTranslations
예제 #6
0
def generateProtFromAmbiguousDNA(s):
   std_nt = CodonTable.unambiguous_dna_by_name["Standard"]  # create normal codon table object
   nonstd = IUPACData.ambiguous_dna_values                  # create list of all ambiguous DNA values, includes normal bases
   aa_trans = []
   for i in range(0,len(s),3):
      codon = s.tostring()[i:i+3]
      # For each ambiguous (or not) codon, returns list of all posible translations
      aa = CodonTable.list_possible_proteins(codon,std_nt.forward_table,nonstd) 
      aa_trans.append(aa)
      
   # Now have a list of format [ [a], [b,c], [d], [e,f,g], etc ]
   # this function creates a list of tuples containing all possible ordered combinations like
   # [(a,b,d,e), (a,b,d,f), (a,b,d,g), (a,c,d,e), (a,c,d,f), (a,c,d,g)]
   proteins = list(itertools.product(*aa_trans))
   possible_proteins = []
   for x in proteins:
      possible_proteins.append("".join(x))
   return possible_proteins
예제 #7
0
#make a codon table that can handle gaps
#start by getting the standard codon table
table = CodonTable.standard_dna_table.forward_table
#add gaps
for c1 in ["A", "C", "G", "T", "N"]:
    table["%s--" % c1] = "X"
    table["-%s-" % c1] = "X"
    table["--%s" % c1] = "X"
    for c2 in ["A", "C", "G", "T", "N"]:
        table["%s%s-" % (c1, c2)] = "X"
        table["-%s%s" % (c1, c2)] = "X"
        table["%s-%s" % (c1, c2)] = "X"
table["---"] = "-"
#now register is and export
CodonTable.register_ncbi_table(name='gapped',
                               alt_name="CAS0",
                               id=99,
                               table=table,
                               stop_codons=[
                                   'TAA',
                                   'TAG',
                                   'TGA',
                               ],
                               start_codons=[
                                   'TTG',
                                   'CTG',
                                   'ATG',
                               ])
GAPPED_CODON_TABLE = CodonTable.ambiguous_dna_by_name["gapped"]
예제 #8
0
파일: commonVars.py 프로젝트: scharch/SONAR
\n\
%s\n"


CMD_BLASTCLUST	= "/ifs/home/c2b2/bh_lab/shares/blast/current/ia32-linux/bin/blastclust -p F -L .9 -S 95 -i %s -o %s"	#pF: nucleotide; L.9: 90%[coverage]  S: Identities 


PARSED_BLAST_HEADER = ["qid", "sid", "identity", "align_len", "mismatches", "gaps", "qstart", "qend", "sstart", "send", "evalue", "score", "strand","other_sids"]
PARSED_BLAST_HEADER_VERBOSE = ["query_id", "sbjct_id", "strand", "evalue", "score", "identities", "gaps", "aln_len", 
								"query_start", "query_end", "query_len", "sbjct_start", "sbjct_end", "aln_query", "aln_sbjct"]


#make a codon table that can handle gaps
#start by getting the standard codon table
table = CodonTable.standard_dna_table.forward_table
#add gaps
for c1 in ["A", "C", "G", "T", "N"]:
	table["%s--"%c1] = "X"
	table["-%s-"%c1] = "X"
	table["--%s"%c1] = "X"
	for c2 in ["A", "C", "G", "T", "N"]:
		table["%s%s-"%(c1,c2)] = "X"
		table["-%s%s"%(c1,c2)] = "X"
		table["%s-%s"%(c1,c2)] = "X"
table["---"]="-"
#now register is and export
CodonTable.register_ncbi_table(name='gapped',alt_name="CAS0",id=99,table=table, stop_codons=['TAA', 'TAG', 'TGA', ], start_codons=['TTG', 'CTG', 'ATG', ] )
GAPPED_CODON_TABLE=CodonTable.ambiguous_dna_by_name["gapped"]

    
예제 #9
0
def _translate_str(sequence,
                   table,
                   stop_symbol="*",
                   to_stop=False,
                   pos_stop="X"):
    """Helper function to translate a nucleotide string (PRIVATE).

    sequence    - a string
    table       - a CodonTable object (NOT a table name or id number)
    stop_symbol - a single character string, what to use for terminators.
    to_stop     - boolean, should translation terminate at the first
                  in frame stop codon?  If there is no in-frame stop codon
                  then translation continues to the end.
    pos_stop    - a single character string for a possible stop codon
                  (e.g. TAN or NNN)

    Returns a string.

    e.g.
    >>> from Bio.Data import CodonTable
    >>> table = CodonTable.ambiguous_dna_by_id[1]
    >>> _translate_str("AAA", table)
    'K'
    >>> _translate_str("TAR", table)
    '*'
    >>> _translate_str("TAN", table)
    'X'
    >>> _translate_str("TAN", table, pos_stop="@")
    '@'
    >>> _translate_str("TA?", table)
    Traceback (most recent call last):
       ...
    TranslationError: Codon 'TA?' is invalid
    """
    sequence = sequence.upper()
    amino_acids = []
    forward_table = table.forward_table
    stop_codons = table.stop_codons
    if table.nucleotide_alphabet.letters is not None:
        valid_letters = set(table.nucleotide_alphabet.letters.upper())
    else:
        #Assume the worst case, ambiguous DNA or RNA:
        valid_letters = set(IUPAC.ambiguous_dna.letters.upper() + \
                            IUPAC.ambiguous_rna.letters.upper())

    n = len(sequence)
    for i in xrange(0, n - n % 3, 3):
        codon = sequence[i:i + 3]
        try:
            amino_acids.append(forward_table[codon])
        except (KeyError, CodonTable.TranslationError):
            #Todo? Treat "---" as a special case (gapped translation)
            if codon in table.stop_codons:
                if to_stop: break
                amino_acids.append(stop_symbol)
            elif valid_letters.issuperset(set(codon)):
                #Possible stop codon (e.g. NNN or TAN)
                amino_acids.append(pos_stop)
            else:
                raise CodonTable.TranslationError(\
                    "Codon '%s' is invalid" % codon)
    return "".join(amino_acids)
예제 #10
0
        # print trans
        # if str(trans) == str(TolC):
        #     print 'congrats, you reinvented the wheel'
        # else:
        #     print 'you still suck'

    def reverse_translate_amino_acid(self, amino_acid):
        codon_options = self.amino_acid_to_codon_map[amino_acid]

        # Somehow make a choice.  For now, just first one.
        codon = codon_options[0]

        # Return the one we chose.
        return codon


if __name__ == '__main__':
    my_codon_table = CodonTable('standard_usage.txt')
    print my_codon_table.translate_codon('AAA')
    print my_codon_table.translate_sequence('ATGAAGAAATTGCTCCCCATT')
    #print len(my_codon_table.codon_to_amino_acid_map)
    #print my_codon_table.amino_acid_to_codon_map
    #print len(my_codon_table.amino_acid_to_codon_map)
    #print my_codon_table.amino_acid_to_codon_map['A']
    #print my_codon_table.amino_acid_to_weight_map
    #print my_codon_table.translate_codon('AAA')
    #print my_codon_table.reverse_translate_amino_acid('F')

    #my_second_codon_table = CodonTable('weird_usage.txt')

예제 #11
0
def run_script(args):
    #open the input file, which should be args.scriptinput
    #send to the correct function based on the choice
    # print args.scriptinput
    scriptinput = open(args.scriptinput[0])
    outputfh = open(args.output[0],'w')
    output = csv.writer(outputfh)#output.txt','w')
    my_codon_table = CodonTable('rEcoli-codon-usage.txt')
    my_rare_codon_table = CodonTable('rEcoli-codon-usage-1st20.txt')
    scriptinputreader = csv.reader(scriptinput)
    if args.mode[0] == 'translate':
        for row in scriptinputreader:
            line = row[1]
            # print line
            codonseq = ''
            ti = 0
            line = line.replace('T','U')
            while ti + 2 < len(line):
                codon = line[ti] + line[ti+1] + line[ti+2]
                codonseq += my_codon_table.translate_codon(codon)
                ti += 3
            output.writerow([row[0]] + [codonseq]) 
    elif args.mode[0] == 'revtranslate':
        for row in scriptinputreader:
            line = row[1]
            # print line
            codonseq = ''
            ti = 0
            for character in line:
                if ti < 20:
                    table = my_rare_codon_table
                else:
                    table = my_codon_table
                codonseq += table.reverse_translate_codon(character)
                ti += 1
                #print ti
            codonseq = codonseq.replace('U','T')
            output.writerow([row[0]] + [codonseq])
    elif args.mode[0] == 'transcribe':
        for row in scriptinputreader:
            line = row[1]
            # print line
            codonseq = ''
            codonseq = line.replace('T','U')
            output.writerow([row[0]] + [codonseq])
            # else:
            #     print args.mode
    elif args.mode[0] == 'calCUT':
        calCUT = {}
        for row in scriptinputreader:
            line = row[1]
            ti = 0
            while ti + 2 < len(line):
                codon = line[ti] + line[ti+1] + line[ti+2]
                #print codon
                if not codon in calCUT:
                    calCUT[codon] = 0
                calCUT[codon] += 1
                ti += 3
                #print ti
        for codon in calCUT:
            count = calCUT[codon]
            cudun = codon.replace('T','U')
            AA = my_codon_table.translate_codon(cudun)
            output.writerow([codon] + [AA] + [count])
                
    # print codonseq
    # if str(codonseq) == str(TolC):
    #     print "nice"
    # else:
    #     print "you suck"
    outputfh.close()