def makeMutantFromSequence(target_protein_seq, base_dna_seq): codons = [x for x in translate.codons(base_dna_seq)] base_prot_seq = translate.translate(base_dna_seq) assert len(base_prot_seq) == len(target_protein_seq) mutant_dna_seq = '' for (i, aa) in enumerate(target_protein_seq): if aa == base_prot_seq[i]: mutant_dna_seq += codons[i] else: mutant_dna_seq += translate.randomReverseTranslate(aa) assert translate.translate(mutant_dna_seq) == target_protein_seq return mutant_dna_seq
def test_run(self): # Here we are trying to test whether ln odds X/Y + ln odds Y/Z = ln odds X/Z. # Assign reference codons # Build a dictionary where each codon gets its reference. random.seed(111) gc = translate.geneticCode(rna=False) reference_codon_dict1 = {} reference_codon_dict2 = {} for codon in translate.AADNACodons(): aa = gc[codon] aa_codons = translate.getCodonsForAA(aa, rna=False) # Sort in alphabetical order by reverse. aa_codons.sort(key=lambda x: x[::-1]) reference_codon_dict1[codon] = aa_codons[0] reference_codon_dict2[codon] = aa_codons[-1] reference_codon_dict1['GCA'] = 'GCC' reference_codon_dict2['GCA'] = 'GCT' # Focus on alanine: GCN. Check if GCA->GCC + GCC->GCT = GCA->GCT #reference_codon_dict[] species = ['x','y'] prots = {"dmel":'MNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILP', #"dere":'MNKYGIVGVCLLAALGALLLEVTADS-----A-SPKLDPSQLGGLSAQFLPPEYRNTNVSIDDMKRIYREKCKKVNGADNATFYAEIERAAAKMSNCLNGVVNLTALQEEMDVAKPNGDLDTVFSKYCQKAPEAVACVKEFNEKAQHCLTAEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFESCVIHHLEQCTQITTANIVQSVFKFVKNETDCQSWMQARANEKPILLAASSNNTATGLA-YS--LAGPL-----LGATLLLMRP', #"dana":'MHKYTLMGLCLMAALGAVLLEVNASPAG--VAIPTKLDPSQLGGLSAQFLPPEYRNTNVTVDDLKRLYREKCKKVTGADNSSFYEEIERAAAKMSNCISGVANLTAIQEEMEQAKPQGELDTVFHKYCQKAPEAEACVKEFNTKMQVCLTAEEKRHQETIARIGASLLGFACSRGGDQIALFVAEQGPECLDANKEAIANCLNQSFHNYIPKDGQVPDLMSAPELLFSPTHCVDLQRFESCVLHHLEQCSEITPANIVQSIFKFVKNETDCQAYMTARANEKPILMAAAGNSTGGGATGLTSHFGSLLAGIFASGLVLILNRY', #"dyak":'MNKYGMVGVCLLAALGALLLEVTASPSSTGSA-STKLDPSQLGGLSAQFLPPEYRNTNVSIEDVKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVAKPNGDLDMVFSKYCQKAPQAEACVKEFNAKAQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRP'} "dyak":'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XX--XXXXX-----XXXXXXXXXX'} genes = {"dmel":"ATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGAATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGA", "dyak":"ATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGAATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGA"} #algenes = dict([(s,muscle.alignGeneFromProtein(genes[s], prots[s])) for s in species]) species = ['x','y'] prot1 = randomProtein(1000) prot2 = mutProtein(prot1) prots = {"x":prot1, "y":prot2} algenes = dict([(s,translate.randomReverseTranslate(prots[s],bad_codon='---')) for s in species]) #print algenes gene_codon_tables1 = cai.getAkashi2x2TablesForORFRefCodon(cai.conservedAA, reference_codon_dict1, algenes['x'], prots['x'], [algenes['y']], [prots['y']], pseudocount=0, n_terminal_start=0) gene_codon_tables2 = cai.getAkashi2x2TablesForORFRefCodon(cai.conservedAA, reference_codon_dict2, algenes['x'], prots['x'], [algenes['y']], [prots['y']], pseudocount=0, n_terminal_start=0) #print gene_codon_tables1 eps = 1e-6 #for aa in translate.degenerateAAs(): # for codon in translate.getCodonsForAA(aa): for codon in ['GCA']: ref_codon1 = reference_codon_dict1[codon] ref_codon2 = reference_codon_dict2[codon] self.assertTrue(gc[codon] == gc[ref_codon1]) self.assertTrue(gc[codon] == gc[ref_codon2]) # codon to ref_codon1 mh_res1 = stats.MantelHaenszelOddsRatioVariance(gene_codon_tables1[codon]) sc_1_to_r1 = -mh_res1.ln_odds_ratio # ref_codon1 to ref_codon2 mh_res2 = stats.MantelHaenszelOddsRatioVariance(gene_codon_tables2[ref_codon1]) sc_r1_to_r2 = -mh_res2.ln_odds_ratio # codon to ref_codon2 mh_res3 = stats.MantelHaenszelOddsRatioVariance(gene_codon_tables2[codon]) sc_1_to_r2 = -mh_res3.ln_odds_ratio # prediction from additivity pred_sc_1_to_r2 = sc_1_to_r1 + sc_r1_to_r2 #print "{0}->{1} = {2}".format(codon, ref_codon1, sc_1_to_r1) #print "{0}->{1} = {2}".format(ref_codon1, ref_codon2, sc_r1_to_r2) #print "{0}->{1} = {2}".format(codon, ref_codon2, sc_1_to_r2) #, ref_codon2, sc_1_to_r1, sc_r1_to_r2, sc_1_to_r2, pred_sc_1_to_r2 self.assertTrue(abs(sc_1_to_r2-pred_sc_1_to_r2) < eps)
def test_run(self): # Here we are trying to test whether ln odds X/Y + ln odds Y/Z = ln odds X/Z. # Assign reference codons # Build a dictionary where each codon gets its reference. random.seed(111) gc = translate.geneticCode(rna=False) reference_codon_dict1 = {} reference_codon_dict2 = {} for codon in translate.AADNACodons(): aa = gc[codon] aa_codons = translate.getCodonsForAA(aa, rna=False) # Sort in alphabetical order by reverse. aa_codons.sort(key=lambda x: x[::-1]) reference_codon_dict1[codon] = aa_codons[0] reference_codon_dict2[codon] = aa_codons[-1] reference_codon_dict1['GCA'] = 'GCC' reference_codon_dict2['GCA'] = 'GCT' # Focus on alanine: GCN. Check if GCA->GCC + GCC->GCT = GCA->GCT #reference_codon_dict[] species = ['x', 'y'] prots = { "dmel": 'MNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILP', #"dere":'MNKYGIVGVCLLAALGALLLEVTADS-----A-SPKLDPSQLGGLSAQFLPPEYRNTNVSIDDMKRIYREKCKKVNGADNATFYAEIERAAAKMSNCLNGVVNLTALQEEMDVAKPNGDLDTVFSKYCQKAPEAVACVKEFNEKAQHCLTAEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFESCVIHHLEQCTQITTANIVQSVFKFVKNETDCQSWMQARANEKPILLAASSNNTATGLA-YS--LAGPL-----LGATLLLMRP', #"dana":'MHKYTLMGLCLMAALGAVLLEVNASPAG--VAIPTKLDPSQLGGLSAQFLPPEYRNTNVTVDDLKRLYREKCKKVTGADNSSFYEEIERAAAKMSNCISGVANLTAIQEEMEQAKPQGELDTVFHKYCQKAPEAEACVKEFNTKMQVCLTAEEKRHQETIARIGASLLGFACSRGGDQIALFVAEQGPECLDANKEAIANCLNQSFHNYIPKDGQVPDLMSAPELLFSPTHCVDLQRFESCVLHHLEQCSEITPANIVQSIFKFVKNETDCQAYMTARANEKPILMAAAGNSTGGGATGLTSHFGSLLAGIFASGLVLILNRY', #"dyak":'MNKYGMVGVCLLAALGALLLEVTASPSSTGSA-STKLDPSQLGGLSAQFLPPEYRNTNVSIEDVKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVAKPNGDLDMVFSKYCQKAPQAEACVKEFNAKAQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRP'} "dyak": 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XX--XXXXX-----XXXXXXXXXX' } genes = { "dmel": "ATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGAATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGA", "dyak": "ATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGAATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGA" } #algenes = dict([(s,muscle.alignGeneFromProtein(genes[s], prots[s])) for s in species]) species = ['x', 'y'] prot1 = randomProtein(1000) prot2 = mutProtein(prot1) prots = {"x": prot1, "y": prot2} algenes = dict([(s, translate.randomReverseTranslate(prots[s], bad_codon='---')) for s in species]) #print algenes gene_codon_tables1 = cai.getAkashi2x2TablesForORFRefCodon( cai.conservedAA, reference_codon_dict1, algenes['x'], prots['x'], [algenes['y']], [prots['y']], pseudocount=0, n_terminal_start=0) gene_codon_tables2 = cai.getAkashi2x2TablesForORFRefCodon( cai.conservedAA, reference_codon_dict2, algenes['x'], prots['x'], [algenes['y']], [prots['y']], pseudocount=0, n_terminal_start=0) #print gene_codon_tables1 eps = 1e-6 #for aa in translate.degenerateAAs(): # for codon in translate.getCodonsForAA(aa): for codon in ['GCA']: ref_codon1 = reference_codon_dict1[codon] ref_codon2 = reference_codon_dict2[codon] self.assertTrue(gc[codon] == gc[ref_codon1]) self.assertTrue(gc[codon] == gc[ref_codon2]) # codon to ref_codon1 mh_res1 = stats.MantelHaenszelOddsRatioVariance( gene_codon_tables1[codon]) sc_1_to_r1 = -mh_res1.ln_odds_ratio # ref_codon1 to ref_codon2 mh_res2 = stats.MantelHaenszelOddsRatioVariance( gene_codon_tables2[ref_codon1]) sc_r1_to_r2 = -mh_res2.ln_odds_ratio # codon to ref_codon2 mh_res3 = stats.MantelHaenszelOddsRatioVariance( gene_codon_tables2[codon]) sc_1_to_r2 = -mh_res3.ln_odds_ratio # prediction from additivity pred_sc_1_to_r2 = sc_1_to_r1 + sc_r1_to_r2 #print "{0}->{1} = {2}".format(codon, ref_codon1, sc_1_to_r1) #print "{0}->{1} = {2}".format(ref_codon1, ref_codon2, sc_r1_to_r2) #print "{0}->{1} = {2}".format(codon, ref_codon2, sc_1_to_r2) #, ref_codon2, sc_1_to_r1, sc_r1_to_r2, sc_1_to_r2, pred_sc_1_to_r2 self.assertTrue(abs(sc_1_to_r2 - pred_sc_1_to_r2) < eps)
for (hdr,seq) in zip(headers,seqs): seq = seq.replace(' ','') seq = seq.replace('-','') (name, props) = parseHeader(hdr) mutantof = None try: mutantof = props['mutant.of'] baseseq = sug_dict[mutantof] dnaseq = makeMutantFromSequence(seq, baseseq) #print "Used suggestion" except KeyError: if not mutantof is None: raise Exception, "Asked to make mutant of {} but sequence not found in suggestions".format(mutantof) dnaseq = translate.randomReverseTranslate(seq) #dnaseq = translate.reverseTranslate(seq) assert(translate.translate(dnaseq)==seq) fullseq = options.prefix + dnaseq + options.suffix mutant_seqs[name] = (dnaseq, fullseq) #name = biofile.firstField(hdr) line = "{name:s}\t{dna:s}\tL={length:d}bp, {desc:s}\n".format(name=name, dna=fullseq, length=len(fullseq), desc=hdr) data_outs.write(line) n_written += 1 data_outs.write("\n\n# Confirmation details:\n") for (hdr,seq) in zip(headers,seqs): (name, props) = parseHeader(hdr) (mutant_seq, fullseq) = mutant_seqs[name] prot = translate.translate(mutant_seq) fullprots = [translate.translateRaw(fullseq[i:]) for i in range(3)]