Example #1
0
def makeMutantFromSequence(target_protein_seq, base_dna_seq):
	codons = [x for x in translate.codons(base_dna_seq)]
	base_prot_seq = translate.translate(base_dna_seq)
	assert len(base_prot_seq) == len(target_protein_seq)
	mutant_dna_seq = ''
	for (i, aa) in enumerate(target_protein_seq):
		if aa == base_prot_seq[i]:
			mutant_dna_seq += codons[i]
		else:
			mutant_dna_seq += translate.randomReverseTranslate(aa)
	assert translate.translate(mutant_dna_seq) == target_protein_seq
	return mutant_dna_seq
Example #2
0
File: cai_test.py Project: dad/base
	def test_run(self):
		# Here we are trying to test whether ln odds X/Y + ln odds Y/Z = ln odds X/Z.
		# Assign reference codons
		# Build a dictionary where each codon gets its reference.
		random.seed(111)

		gc = translate.geneticCode(rna=False)
		reference_codon_dict1 = {}
		reference_codon_dict2 = {}
		for codon in translate.AADNACodons():
			aa = gc[codon]
			aa_codons = translate.getCodonsForAA(aa, rna=False)
			# Sort in alphabetical order by reverse.
			aa_codons.sort(key=lambda x: x[::-1])
			reference_codon_dict1[codon] = aa_codons[0]
			reference_codon_dict2[codon] = aa_codons[-1]
		
		reference_codon_dict1['GCA'] = 'GCC'
		reference_codon_dict2['GCA'] = 'GCT'
		
		# Focus on alanine: GCN. Check if GCA->GCC + GCC->GCT = GCA->GCT
		#reference_codon_dict[]
		species = ['x','y']

		prots = {"dmel":'MNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILP',
				#"dere":'MNKYGIVGVCLLAALGALLLEVTADS-----A-SPKLDPSQLGGLSAQFLPPEYRNTNVSIDDMKRIYREKCKKVNGADNATFYAEIERAAAKMSNCLNGVVNLTALQEEMDVAKPNGDLDTVFSKYCQKAPEAVACVKEFNEKAQHCLTAEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFESCVIHHLEQCTQITTANIVQSVFKFVKNETDCQSWMQARANEKPILLAASSNNTATGLA-YS--LAGPL-----LGATLLLMRP',
				#"dana":'MHKYTLMGLCLMAALGAVLLEVNASPAG--VAIPTKLDPSQLGGLSAQFLPPEYRNTNVTVDDLKRLYREKCKKVTGADNSSFYEEIERAAAKMSNCISGVANLTAIQEEMEQAKPQGELDTVFHKYCQKAPEAEACVKEFNTKMQVCLTAEEKRHQETIARIGASLLGFACSRGGDQIALFVAEQGPECLDANKEAIANCLNQSFHNYIPKDGQVPDLMSAPELLFSPTHCVDLQRFESCVLHHLEQCSEITPANIVQSIFKFVKNETDCQAYMTARANEKPILMAAAGNSTGGGATGLTSHFGSLLAGIFASGLVLILNRY',
				#"dyak":'MNKYGMVGVCLLAALGALLLEVTASPSSTGSA-STKLDPSQLGGLSAQFLPPEYRNTNVSIEDVKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVAKPNGDLDMVFSKYCQKAPQAEACVKEFNAKAQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRP'}
				 "dyak":'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XX--XXXXX-----XXXXXXXXXX'}
		genes = {"dmel":"ATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGAATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGA",
				"dyak":"ATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGAATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGA"}
				
		#algenes = dict([(s,muscle.alignGeneFromProtein(genes[s], prots[s])) for s in species])
		
		species = ['x','y']
		prot1 = randomProtein(1000)
		prot2 = mutProtein(prot1)
		prots = {"x":prot1, "y":prot2}
		algenes = dict([(s,translate.randomReverseTranslate(prots[s],bad_codon='---')) for s in species])
		
		#print algenes
		gene_codon_tables1 = cai.getAkashi2x2TablesForORFRefCodon(cai.conservedAA, reference_codon_dict1, algenes['x'], prots['x'], [algenes['y']], [prots['y']], pseudocount=0, n_terminal_start=0)
		gene_codon_tables2 = cai.getAkashi2x2TablesForORFRefCodon(cai.conservedAA, reference_codon_dict2, algenes['x'], prots['x'], [algenes['y']], [prots['y']], pseudocount=0, n_terminal_start=0)
		#print gene_codon_tables1
		
		eps = 1e-6
		
		#for aa in translate.degenerateAAs():
		#	for codon in translate.getCodonsForAA(aa):

		for codon in ['GCA']:				
			ref_codon1 = reference_codon_dict1[codon]
			ref_codon2 = reference_codon_dict2[codon]
			self.assertTrue(gc[codon] == gc[ref_codon1])
			self.assertTrue(gc[codon] == gc[ref_codon2])
			# codon to ref_codon1 
			mh_res1 = stats.MantelHaenszelOddsRatioVariance(gene_codon_tables1[codon])
			sc_1_to_r1 = -mh_res1.ln_odds_ratio
			# ref_codon1 to ref_codon2
			mh_res2 = stats.MantelHaenszelOddsRatioVariance(gene_codon_tables2[ref_codon1])
			sc_r1_to_r2 = -mh_res2.ln_odds_ratio
			# codon to ref_codon2
			mh_res3 = stats.MantelHaenszelOddsRatioVariance(gene_codon_tables2[codon])
			sc_1_to_r2 = -mh_res3.ln_odds_ratio
			# prediction from additivity
			pred_sc_1_to_r2 = sc_1_to_r1 + sc_r1_to_r2
			#print "{0}->{1} = {2}".format(codon, ref_codon1, sc_1_to_r1)
			#print "{0}->{1} = {2}".format(ref_codon1, ref_codon2, sc_r1_to_r2)
			#print "{0}->{1} = {2}".format(codon, ref_codon2, sc_1_to_r2)
			#, ref_codon2, sc_1_to_r1, sc_r1_to_r2, sc_1_to_r2, pred_sc_1_to_r2
			self.assertTrue(abs(sc_1_to_r2-pred_sc_1_to_r2) < eps)
Example #3
0
    def test_run(self):
        # Here we are trying to test whether ln odds X/Y + ln odds Y/Z = ln odds X/Z.
        # Assign reference codons
        # Build a dictionary where each codon gets its reference.
        random.seed(111)

        gc = translate.geneticCode(rna=False)
        reference_codon_dict1 = {}
        reference_codon_dict2 = {}
        for codon in translate.AADNACodons():
            aa = gc[codon]
            aa_codons = translate.getCodonsForAA(aa, rna=False)
            # Sort in alphabetical order by reverse.
            aa_codons.sort(key=lambda x: x[::-1])
            reference_codon_dict1[codon] = aa_codons[0]
            reference_codon_dict2[codon] = aa_codons[-1]

        reference_codon_dict1['GCA'] = 'GCC'
        reference_codon_dict2['GCA'] = 'GCT'

        # Focus on alanine: GCN. Check if GCA->GCC + GCC->GCT = GCA->GCT
        #reference_codon_dict[]
        species = ['x', 'y']

        prots = {
            "dmel":
            'MNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVIHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILMAASSNNTAPGLA-YS--LAGTL-----LGATILLILP',
            #"dere":'MNKYGIVGVCLLAALGALLLEVTADS-----A-SPKLDPSQLGGLSAQFLPPEYRNTNVSIDDMKRIYREKCKKVNGADNATFYAEIERAAAKMSNCLNGVVNLTALQEEMDVAKPNGDLDTVFSKYCQKAPEAVACVKEFNEKAQHCLTAEEKRHQETVTRIGASVLGFACSRGGDQIALFIAEQGPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFESCVIHHLEQCTQITTANIVQSVFKFVKNETDCQSWMQARANEKPILLAASSNNTATGLA-YS--LAGPL-----LGATLLLMRP',
            #"dana":'MHKYTLMGLCLMAALGAVLLEVNASPAG--VAIPTKLDPSQLGGLSAQFLPPEYRNTNVTVDDLKRLYREKCKKVTGADNSSFYEEIERAAAKMSNCISGVANLTAIQEEMEQAKPQGELDTVFHKYCQKAPEAEACVKEFNTKMQVCLTAEEKRHQETIARIGASLLGFACSRGGDQIALFVAEQGPECLDANKEAIANCLNQSFHNYIPKDGQVPDLMSAPELLFSPTHCVDLQRFESCVLHHLEQCSEITPANIVQSIFKFVKNETDCQAYMTARANEKPILMAAAGNSTGGGATGLTSHFGSLLAGIFASGLVLILNRY',
            #"dyak":'MNKYGMVGVCLLAALGALLLEVTASPSSTGSA-STKLDPSQLGGLSAQFLPPEYRNTNVSIEDVKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVAKPNGDLDMVFSKYCQKAPQAEACVKEFNAKAQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRP'}
            "dyak":
            'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXQHCLTAEEKRHQETVTRIGASVLGFACSHGGDQI-------GPECLEANKEAISNCLNQSFHQYIPKDGQVPDLMSRPELLFSPTHCVDLQRFEACVVHHLEQCTQITTANIVQSVFRFVKNETDCQAWMQARANEKPILLAASGNNTATGLA-YS--LAGPL-----LGATMLLMRPMNKYGMVGVCLLAALGALLLEVTASPSS--AA-SSKVDPSQLGGLSAQFLPPEYRNTNVSIEDIKRIYREKCKKVNGADNATFYEEIERAAAKMSTCISGVVNLTALQEEMDVARPNGDLDTVFSKYCLKAPEAEACVKEFNDKAQHCLTPEEKRHQETVTRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XX--XXXXX-----XXXXXXXXXX'
        }
        genes = {
            "dmel":
            "ATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGAATGAACAAGTACGGGATGGTCGGCGTCTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACTGCCTCTCCTTCGTCCGCCGCCTCGTCTAAGGTGGATCCTAGCCAACTTGGCGGACTTTCAGCTCAGTTCTTGCCACCCGAGTACCGCAACACGAACGTTAGCATCGAGGATATAAAAAGAATATATCGTGAAAAATGCAAGAAGGTAAATGGAGCGGACAACGCAACCTTCTACGAAGAAATCGAGCGGGCGGCAGCCAAGATGAGCACCTGCATCAGCGGGGTGGTCAATCTGACGGCTCTGCAGGAGGAGATGGATGTGGCGAGGCCGAACGGCGACTTGGACACCGTGTTTAGCAAATACTGTCTCAAGGCACCGGAGGCAGAGGCCTGCGTCAAGGAGTTCAACGACAAGGCGCAGCATTGCTTGACCCCCGAGGAGAAGCGCCACCAGGAGACGGTTACCCGAATTGGAGCGTCCGTTTTGGGATTCGCCTGTTCGCGTGGCGGCGATCAGATTGCCCTCTTCATTGCCGAGCAGGGACCCGAGTGCCTGGAGGCCAACAAGGAAGCCATTAGCAATTGCCTCAATCAATCCTTTCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAGCTCCTTTTCTCACCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCCTGTGTCATCCATCATTTGGAGCAGTGCACGCAGATCACCACCGCTAATATCGTTCAGTCCGTCTTCCGTTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCACGTGCGAACGAGAAGCCCATTCTGATGGCCGCCTCCAGCAACAACACAGCCCCTGGACTCGCCTACTCCCTGGCCGGCACTCTTTTGGGCGCCACAATACTCCTGATACTCCCCTGA",
            "dyak":
            "ATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGAATGAACAAGTACGGGATGGTTGGCGTTTGCCTACTGGCTGCTCTGGGCGCTCTGCTCCTGGAGGTCACCGCCTCTCCTTCGTCCACCGGCTCGGCGAGTACCAAGCTGGATCCCAGCCAGCTAGGCGGACTTTCGGCCCAGTTCTTACCGCCCGAGTACCGCAACACGAACGTTAGCATCGAGGACGTTAAAAGAATATATCGTGAAAAATGCAAGAAGGTTAATGGAGCGGACAACGCGACCTTCTACGAGGAAATCGAGCGGGCGGCCGCGAAGATGAGCACCTGCATCAGCGGAGTGGTCAACCTGACGGCTCTGCAGGAGGAGATGGATGTGGCCAAGCCGAACGGCGACCTGGACATGGTGTTTAGCAAGTACTGCCAGAAGGCACCGCAGGCGGAGGCCTGTGTCAAGGAGTTCAACGCCAAGGCCCAGCATTGCTTGACCGCCGAGGAGAAGCGCCACCAGGAGACGGTCACCCGCATTGGAGCGTCCGTTCTGGGCTTCGCCTGCTCGCATGGTGGCGATCAGATTGGACCCGAGTGCCTGGAGGCCAACAAGGAGGCCATAAGCAATTGCCTCAACCAATCCTTCCATCAGTACATTCCCAAGGATGGCCAAGTTCCGGACCTGATGAGCCGCCCAGAACTCCTGTTCTCGCCCACCCACTGCGTGGACCTGCAGCGCTTCGAGGCGTGTGTCGTCCATCATTTGGAACAGTGCACCCAGATCACAACCGCCAACATCGTTCAGTCCGTCTTCCGCTTCGTGAAGAACGAGACCGACTGCCAGGCTTGGATGCAGGCTCGTGCCAACGAGAAGCCCATCCTGCTGGCCGCCTCCGGCAACAATACAGCCACTGGACTCGCCTACTCTCTGGCCGGCCCTCTCTTGGGCGCCACAATGCTCCTGATGCGCCCCTGA"
        }

        #algenes = dict([(s,muscle.alignGeneFromProtein(genes[s], prots[s])) for s in species])

        species = ['x', 'y']
        prot1 = randomProtein(1000)
        prot2 = mutProtein(prot1)
        prots = {"x": prot1, "y": prot2}
        algenes = dict([(s,
                         translate.randomReverseTranslate(prots[s],
                                                          bad_codon='---'))
                        for s in species])

        #print algenes
        gene_codon_tables1 = cai.getAkashi2x2TablesForORFRefCodon(
            cai.conservedAA,
            reference_codon_dict1,
            algenes['x'],
            prots['x'], [algenes['y']], [prots['y']],
            pseudocount=0,
            n_terminal_start=0)
        gene_codon_tables2 = cai.getAkashi2x2TablesForORFRefCodon(
            cai.conservedAA,
            reference_codon_dict2,
            algenes['x'],
            prots['x'], [algenes['y']], [prots['y']],
            pseudocount=0,
            n_terminal_start=0)
        #print gene_codon_tables1

        eps = 1e-6

        #for aa in translate.degenerateAAs():
        #	for codon in translate.getCodonsForAA(aa):

        for codon in ['GCA']:
            ref_codon1 = reference_codon_dict1[codon]
            ref_codon2 = reference_codon_dict2[codon]
            self.assertTrue(gc[codon] == gc[ref_codon1])
            self.assertTrue(gc[codon] == gc[ref_codon2])
            # codon to ref_codon1
            mh_res1 = stats.MantelHaenszelOddsRatioVariance(
                gene_codon_tables1[codon])
            sc_1_to_r1 = -mh_res1.ln_odds_ratio
            # ref_codon1 to ref_codon2
            mh_res2 = stats.MantelHaenszelOddsRatioVariance(
                gene_codon_tables2[ref_codon1])
            sc_r1_to_r2 = -mh_res2.ln_odds_ratio
            # codon to ref_codon2
            mh_res3 = stats.MantelHaenszelOddsRatioVariance(
                gene_codon_tables2[codon])
            sc_1_to_r2 = -mh_res3.ln_odds_ratio
            # prediction from additivity
            pred_sc_1_to_r2 = sc_1_to_r1 + sc_r1_to_r2
            #print "{0}->{1} = {2}".format(codon, ref_codon1, sc_1_to_r1)
            #print "{0}->{1} = {2}".format(ref_codon1, ref_codon2, sc_r1_to_r2)
            #print "{0}->{1} = {2}".format(codon, ref_codon2, sc_1_to_r2)
            #, ref_codon2, sc_1_to_r1, sc_r1_to_r2, sc_1_to_r2, pred_sc_1_to_r2
            self.assertTrue(abs(sc_1_to_r2 - pred_sc_1_to_r2) < eps)
Example #4
0
	for (hdr,seq) in zip(headers,seqs):
		seq = seq.replace(' ','')
		seq = seq.replace('-','')
		
		(name, props) = parseHeader(hdr)
		mutantof = None
		try:
			mutantof = props['mutant.of']
			baseseq = sug_dict[mutantof]
			dnaseq = makeMutantFromSequence(seq, baseseq)
			#print "Used suggestion"
		except KeyError:
			if not mutantof is None:
				raise Exception, "Asked to make mutant of {} but sequence not found in suggestions".format(mutantof)
			dnaseq = translate.randomReverseTranslate(seq)
		#dnaseq = translate.reverseTranslate(seq)
		assert(translate.translate(dnaseq)==seq)
		fullseq = options.prefix + dnaseq + options.suffix
		mutant_seqs[name] = (dnaseq, fullseq)
		#name = biofile.firstField(hdr)
		line = "{name:s}\t{dna:s}\tL={length:d}bp, {desc:s}\n".format(name=name, dna=fullseq, length=len(fullseq), desc=hdr)
		data_outs.write(line)
		n_written += 1

	data_outs.write("\n\n# Confirmation details:\n")
	for (hdr,seq) in zip(headers,seqs):
		(name, props) = parseHeader(hdr)
		(mutant_seq, fullseq) = mutant_seqs[name]
		prot = translate.translate(mutant_seq)
		fullprots = [translate.translateRaw(fullseq[i:]) for i in range(3)]