예제 #1
0
파일: test-paml.py 프로젝트: dad/base
def test003():
	print "** Test 003 **"
	# Tree remapping
	whole_tree = newick.tree.parseTree("((((scer,spar),smik),sbay),scas);")
	sub_tree = newick.tree.parseTree("((scer,smik),scas);")

	# Name nodes on whole tree
	newick.tree.labelInternalNodes(whole_tree)

	if False:
		# Only need to do this if you want more/other data.
		# Load the genes
		load_fxn = biofile.getIDFunction("vanilla")
		cdna_dicts = {}
		geneutil.readGenomesFromFile(os.path.expanduser("~/research/data/scerevisiae/saccharomyces-files.txt"), os.path.expanduser("~/bio/genomes"), cdna_dicts, 1, load_fxn)

		align_dict = pickle.load(file(os.path.expanduser("~/research/data/scerevisiae/scer-ortholog-alignments.p"),'r'))
		(nal, spec_orf_list, protal) = align_dict["YBR177C"]
		aligned_seqs = {}
		for xi in range(len(spec_orf_list)):
			(spec,orf) = spec_orf_list[xi]
			gene = cdna_dicts[spec][orf]
			prot = protal[xi]
			aligned_gene = muscle.alignGeneFromProtein(gene, prot)
			aligned_seqs[spec] = aligned_gene
		print aligned_seqs

	seq_dict = {'spar': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCGTTCCATTGGGGATACAATGGTACTGTTTCACATGTCGTCGGTGAAAATGGTTCCATCAAACTAAATTTAAAAGACAACAAGGAACAGGTTGAATTTGACGAGTTCGTTAACAAATATGTCCCAACGTTGAAGAATGGTGCTCAATTTAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACCCTGTACTTAAATGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGAGAAATTGTCAAATTCTCGGATAATGGAGTTTGTACCGCTGACTGGCTCATGGATTCCTGGAAGAAGGATTACAAACTCGATCAAAGTACTATGGGTTTCGATAAGAAAAAATTTGGTGAAGACGAGAAGGAGACGCATCCAGAAGGGTGGCCTCGTTTACAACCACGTACAAGGTATCTGAAGGATAATGAATTGGAAAATGTAAGGGAGGTTGATCTGCCCTTAGTAGTTATCCTACATGGTCTTGCTGGTGGTAGTCATGAGCCTATCATAAGATCTCTTGCTGAAAACCTCTCTCGG------AGTGGGAGATTTCAAGTGGTGGTACTAAATACTAGAGGCTGTGCACGTTCTAAAATTACAACCAGAAATTTATTTACGGCTTACCACACAATGGATATTCGTGAATTTTTGCAAAGAGAAAAGGAGAGATATCCAAATAGAAAATTATACGCTGTGGGATGCTCCTTCGGTGCTACGATGTTGGGAAACTATCTGGGAGAAGAAGGCGATAAATCTCCTTTATCTGCAGCTGCTACCCTGTGCAACCCTTGGGATCTTCTCCTTTCGGCACTTAGAATGACCGAGGATTGGTGGTCAAAGACTTTATTTTCCAAAAATATTGCCCAATTCTTAACAAGAACTGTTCAAGTTAATATGGGTGAACTAGGAGTTCCAAATGGCTCCCGTCCTGACCATCCTCCCACAGTCAAGAATCCATCTTACTATATGTTCACACCTGAAAATCTAATAAAGGCAAAAAGCTTTAAATCGAGTCTGGAATTTGATGAATTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAATATTATAAAGCGGCCAGCTCAATAAGCAGAGTTGATACAATTCGGGTTCCTACTCTAGTTATCAATTCAAGGGATGATCCTGTTGTCGGCCCGGATCAA---CCTTACTCAATCGTGGAAAAAAATCCTCGTGTTTTGTATTGTAGAACCGACTTAGGAGGTCATTTAGCTTACCTAGATAAAGACAATAATTCGTGGGCTACCAAGGCGATTGCAGAATTCTTTACTAAGTTTGATGAATTAGTTGTA', \
				'smik': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCATTCCATTGGGGATACAATGGTACTGTTTCGCATGTTGTCGGTGAAAATGGTTCCATGAAACTAGGTTTAAAAGATAACAAGGAACAGATTGAATTTGATCAGTTCGTTAACAAATATGTTCCAAGTTTGAAGAATGGTGCTCACTTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTACAAACGTTGTACTTAAACGCTGCAGACTTCTCGAAGAAATTTCCTGTATTTTATGGCAGAGAAATTATCAAATTCTCCGATAATGGAGTTTGCACCGCTGATTGGGTTATGAGCTCCTGGAAGAGGGATTACAAACTCAATCAAAGTACCATGAGCTTTGATAAAAGCAAATTCGACGGAGACGAAAAAGCGACGCATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGATATTTGAAGGATAATGAGTTAGAGGAGCTCAGAGAAATTGAGCTCCCCTTAGTAGTCATTTTGCATGGACTTGCCGGTGGCAGTCATGAACCGATCATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGCGGGAAATTTCAAGTGGTGGTGCTAAATACCAGAGGTTGTGCACGCTCCAAAATTACAACCAGGAACTTATTCACGGCTTACCACACAATGGATATCCGTGAATTTTTGCAAAGAGAAAATCAAAGACATCCAAACAGAAAGTTATACGCTGTAGGATGTTCTTTTGGTGCCACCATGTTGGGGAATTATCTCGGTGAAGAAGGTGATAAATCACCTTTATCTGCAGCTGCTACCTTATGCAATCCTTGGGACCTTCTCCTTTCAGCGCTTAGAATGACCGAGGATTGGTGGTCAAAAACTTTATTTTCCAAAAATATTGCACAGTTTTTAACAAGAACCGTTCAAGTTAACATGGGTGAACTAGGAGTCCCAAACGGCTCTCATCCCGACCATCCTCCTACAGTAAAAAATCCATCCTACTATATGTTCACCCCTGAAAATTTAATAAAGGCAAAACACTTCAAATCGAGTCTGGAATTCGATGAATTGTATACTGCACCTGCTTTAGGCTTTCCAAATGCAATGGAGTATTATAAAGCAGCTAGCTCAATAAACAGAGTTGCTACAATTAAGGTTCCTACTTTAGTTATCAATTCTAGAGATGATCCTGTTGTCGGCCCAGATCAG---CCTTATTCAATTGTAGAAAAAAACCCTCGTATTTTGTATTGCGGAACCGATTTAGGGGGCCATTTAGCCTACCTAGATAATGACAATAACTCATGGGCAACTAAGGCGATTGCAGAATTCTTTACTAAATTTGATCAACTGGTTGTA', \
				'scas': 'ATGGCTTCACAATCAACATATCCACTCATTAAACCATGGAATTGGGGGTATCACGGAACCGTGACCCAAATTACCAGTAAGGAAGGTACTGTACTCATTCCATTAAAGGACAACAAAGAGGGTATTCCATTAGCAGAATTAGTTTCAAAGAATGTCCCTAGTTTAAAGGATGGTGCTAAGTTTGAGTTGAAACCTTTTTTATTCACTGGTATTTTACAAACTCTGTACCTTGGCGCAGCTGACTTTTCTAAGAAATTCCAAGTCTTTTATGGTAGAGAAATTGTGGAATTCTCAGATACTGGTGTATGTACTGCCGATTGGGTAATGCCATCTTGGAAGCAAAAATATAACTTTAATGAGAAAACATCAACTTTTGACAAGAAAGCATTCGACCTGGACGAAAAAGAAACACATCCAGACAATTGGCCTCGTTTGCAACCTCGTACCAGATACTTAAATGAAAAAGAAATGACGACTATCCACGAGGATGACAGACCATTGGTTGTTTGTTGTCATGGGTTAGCTGGTGGCTCTCACGAACCAATTATCAGATCATTGACTGAAAATCTATCTAAGGTTGGTAATGGGAAATTCCAAGTGGTTGTCCTAAATACTCGTGGCTGTGCACGTTCTAAGATTACTACTCGTAACCTATTTACTGCTTTCCATACTATGGATCTACGTGAATTTGTCAACAGAGAACACCAAAAACATCCTAACAGAAAGATTTATGCCGTTGGATTTTCATTCGGGGGTACAATGTTAGCAAATTATTTAGGAGAAGAAGGTGATAAAACTCCAATTGCATCTGCTGCAGTGTTATGTAACCCGTGGGATATGGTATTATCCGGTATGAAAACGAGAGATGATTTTTGGACAAGAACGCTATTTGCTAAGAATATTACAGATTTCTTGACTAGAATGGTTAAAGTTAATATGGCAGAATTGGAATCTCCAGATGGTTCTAAGCCTGATCACATCCCAACAGTGAAAAATCCATCTTATTATACATTTACCCAAGAAAATTTGGCAAAAGCCAAGGATTTTAAATTAATATCTGACTTTGATGACGTATTTACTGCACCTGCATTGGGTTTCAAAAACGCATTGGAGTACTACGCTGCAGCTGGGTCCATTAACAGACTACCTAATATTAAGATTCCTTTATTAGTTATCAATTCCACTGATGATCCAGTTGTTGGGCCGGATCCAATCCCAAACCATATCATAGATTCAAACAACCACCTACTGCTATGTGAAACCGATATCGGTGGCCATTTGGCATATTTGGATAAAAATAATGATTCATGGTCAACGAACCAAATCGCCAATTATTTCGACAAATTTGATGAAGTGGCATTA', \
				'sbay': 'ATGTCAGAAGTTTCAAAGTGGCCAGCTATTAACCCATTTCATTGGGGGTACAACGGTACAGTTTCACATGTCGTTGGTGGTAATGGTTCTGTGAAGTTAAGCTTGAAGAGCGATAAGGAGCAAGTCGAGTTTGATACGTTTGTTAATAAATATGTCCCGATTCTGAAAAACGGGGCCCATTATAAACTAAGTCCCTACTTGTTCACAGGTATTTTACAAACCCTATACTTGAACGCTGCTGATTTCTCAAAGAAATTTCCCGTATTTTATGGTAGAGAAATCGTCAAGTTCTCGGATGACGGTGTCTGTACTGCTGATTGGGTCATGAACTCTTGGGAAAAGGAATATGATTTCGACCAAAAGACTATGAAATTTGATACGAAGAAGTTTGGCGACGACGAAAAGGCGACGCACCCAGAAGGATGGCCTCGTTTACAACCACGTACGAGGTACCTCAGGGACGAAGAGTTGGAAGAACAGAGAAAAGTAGATCTTCCCCTAGTTATCATCCTCCATGGTCTTGCCGGAGGCAGTCATGAACCAATCATAAGATCCCTAACTGAGAACTTGTCTCGTATCGGCAATGGGAGATTCCAAGTCGTGGTGCTAAACACGAGAGGCTGTGCACGTTCTAAAATCACCACTAGAAACCTATTCACAGCTTACCACACAATGGATATCCGTGAGTTCTTGCAAAGGGAAAAAGAAAGATATCCAAACAGAAAATTATACACTGTAGGGTGCTCTTTCGGGGCTACCATGTTAGCAAACTATTTGGGTGAAGAAGGTGACAAATCACCTGTATCTGCTGCTGTTACGTTATGTAATCCTTGGGATCTTCTTCTTTCGGCACTTAGAATGACTGAAGACTGGTGGTCAAAAACTTTGTTTTCTAAAAATATTGCCCAATTTTTAACAAGAACCGTTCAAGTTAACATGGGCGAATTAGGTGTTCCAAATGGCTCTCGTCCTGACCATACACCTACAGTTAAAAATCCATCTTACTATAAGTTCACACCTGAGAATTTGATGAAGGCAAAGCGCTTTAAGTCGAGTCTCGAATTCGATGAGCTGTACACTGCACCAGCTTTGGGCTTCCCGAATGCTATGGAATATTATAAATCAGCTAGTTCAATCAACAGGGCTGATAAAATCAAGGTTCCTACTTTAGTAATCAATTCTAGAGATGATCCTGTTGTTGGCCCAGACCAA---CCTTATTCATTTGTGGAGAAGAACCCTAATATACTATTCTGTAGAACCGACCTAGGTGGCCATTTAGCCTACCTAGATAGCAACAATGATTCGTGGGTTACAAAGGCGATTTCCGAGTTCTTGAATAAGTTTGAGGAGTTAGTGTTA',
				'scer': 'ATGTCAGAAGTTTCCAAATGGCCAGCAATCAACCCATTCCATTGGGGATACAATGGTACAGTTTCGCATATTGTCGGTGAAAATGGTTCCATTAAACTCCATTTAAAAGACAACAAGGAGCAAGTTGATTTTGACGAGTTCGCTAACAAATATGTCCCAACGTTGAAGAATGGTGCCCAATTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACTTTGTACTTAGGTGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGGGAAATTGTCAAATTCTCGGATGGTGGAGTTTGCACCGCTGACTGGCTCATAGATTCATGGAAAAAGGATTATGAATTCGATCAAAGTACTACGAGCTTTGATAAAAAAAAATTTGATAAAGACGAGAAGGCGACACATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGGTACCTGAAAGATAATGAGTTGGAAGAACTACGGGAGGTTGATCTACCCCTAGTAGTTATTCTACATGGTCTTGCTGGTGGTAGTCATGAGCCGATTATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGTGGGAGATTTCAAGTGGTCGTCCTAAATACCAGAGGTTGTGCACGTTCCAAAATTACCACCAGAAATTTATTTACAGCTTATCACACAATGGATATTCGCGAGTTTTTGCAAAGAGAAAAGCAAAGACATCCAGATAGAAAACTATACGCTGTGGGATGCTCTTTTGGTGCTACGATGCTGGCAAACTATCTGGGAGAAGAGGGCGATAAATCACCTTTATCCGCAGCTGCTACTTTGTGCAATCCTTGGGATCTTCTCCTTTCAGCAATTAGGATGAGCCAGGATTGGTGGTCAAGAACTTTATTTTCCAAAAATATTGCGCAATTCTTAACAAGAACCGTTCAGGTTAATATGGGTGAATTAGGAGTTCCAAATGGCTCTCTCCCCGATCATCCTCCCACAGTCAAGAATCCATCTTTCTATATGTTCACGCCTGAAAATCTAATAAAGGCAAAGAGCTTTAAATCGACCCGGGAATTTGATGAAGTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAGTATTATAAAGCGGCCAGCTCAATAAACAGAGTTGATACAATTCGGGTTCCTACCCTTGTTATCAATTCCAGGGATGATCCTGTTGTCGGCCCAGATCAA---CCATACTCAATCGTGGAAAAGAATCCTCGTATTTTGTATTGTAGAACCGATTTAGGTGGTCATTTAGCTTACCTAGATAAAGACAACAACTCGTGGGCTACCAAGGCAATTGCAGAATTTTTCACTAAGTTTGATGAATTAGTCGTA'}
	seq_labels = [x.name for x in sub_tree.leaves]
	seqs = [seq_dict[k] for k in seq_labels]
	# Run PAML
	opts = paml.CodeML.FMutSel_F_options
	opts["RateAncestor"] = "1"
	cm = paml.CodeML("codon", opts)
	cm.loadSequences(seqs, seq_labels, str(sub_tree))
	cm.run()
	cm.putBranchRatesOnTree(seq_labels, sub_tree, label="rate")
	cm.putAncestralSequencesOnTree(seq_labels, sub_tree, label="sequence")

	# Now remap the tree.
	whole_node_dict = dict([(x.name, x) for x in whole_tree.nodes])
	sub_node_dict = dict([(x.name, x) for x in sub_tree.nodes])
	sub_names = [x.name for x in sub_tree.leaves]
	#print whole_node_dict["scer"].getMostRecentCommonAncestor(whole_node_dict["smik"]).name

	newick.tree.mapLabelsOntoSubtree(whole_tree, sub_tree)
	for i in range(len(sub_names)-1):
		for j in range(i+1, len(sub_names)):
			s1 = sub_names[i]
			s2 = sub_names[j]
			sub_mrca = sub_node_dict[s1].getMostRecentCommonAncestor(sub_node_dict[s2])
			mrca = whole_node_dict[s1].getMostRecentCommonAncestor(whole_node_dict[s2])
			assert sub_mrca.name == mrca.name
	shutil.rmtree(cm.tmpdir)
예제 #2
0
def test003():
    print "** Test 003 **"
    # Tree remapping
    whole_tree = newick.tree.parseTree("((((scer,spar),smik),sbay),scas);")
    sub_tree = newick.tree.parseTree("((scer,smik),scas);")

    # Name nodes on whole tree
    newick.tree.labelInternalNodes(whole_tree)

    if False:
        # Only need to do this if you want more/other data.
        # Load the genes
        load_fxn = biofile.getIDFunction("vanilla")
        cdna_dicts = {}
        geneutil.readGenomesFromFile(
            os.path.expanduser(
                "~/research/data/scerevisiae/saccharomyces-files.txt"),
            os.path.expanduser("~/bio/genomes"), cdna_dicts, 1, load_fxn)

        align_dict = pickle.load(
            file(
                os.path.expanduser(
                    "~/research/data/scerevisiae/scer-ortholog-alignments.p"),
                'r'))
        (nal, spec_orf_list, protal) = align_dict["YBR177C"]
        aligned_seqs = {}
        for xi in range(len(spec_orf_list)):
            (spec, orf) = spec_orf_list[xi]
            gene = cdna_dicts[spec][orf]
            prot = protal[xi]
            aligned_gene = muscle.alignGeneFromProtein(gene, prot)
            aligned_seqs[spec] = aligned_gene
        print aligned_seqs

    seq_dict = {'spar': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCGTTCCATTGGGGATACAATGGTACTGTTTCACATGTCGTCGGTGAAAATGGTTCCATCAAACTAAATTTAAAAGACAACAAGGAACAGGTTGAATTTGACGAGTTCGTTAACAAATATGTCCCAACGTTGAAGAATGGTGCTCAATTTAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACCCTGTACTTAAATGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGAGAAATTGTCAAATTCTCGGATAATGGAGTTTGTACCGCTGACTGGCTCATGGATTCCTGGAAGAAGGATTACAAACTCGATCAAAGTACTATGGGTTTCGATAAGAAAAAATTTGGTGAAGACGAGAAGGAGACGCATCCAGAAGGGTGGCCTCGTTTACAACCACGTACAAGGTATCTGAAGGATAATGAATTGGAAAATGTAAGGGAGGTTGATCTGCCCTTAGTAGTTATCCTACATGGTCTTGCTGGTGGTAGTCATGAGCCTATCATAAGATCTCTTGCTGAAAACCTCTCTCGG------AGTGGGAGATTTCAAGTGGTGGTACTAAATACTAGAGGCTGTGCACGTTCTAAAATTACAACCAGAAATTTATTTACGGCTTACCACACAATGGATATTCGTGAATTTTTGCAAAGAGAAAAGGAGAGATATCCAAATAGAAAATTATACGCTGTGGGATGCTCCTTCGGTGCTACGATGTTGGGAAACTATCTGGGAGAAGAAGGCGATAAATCTCCTTTATCTGCAGCTGCTACCCTGTGCAACCCTTGGGATCTTCTCCTTTCGGCACTTAGAATGACCGAGGATTGGTGGTCAAAGACTTTATTTTCCAAAAATATTGCCCAATTCTTAACAAGAACTGTTCAAGTTAATATGGGTGAACTAGGAGTTCCAAATGGCTCCCGTCCTGACCATCCTCCCACAGTCAAGAATCCATCTTACTATATGTTCACACCTGAAAATCTAATAAAGGCAAAAAGCTTTAAATCGAGTCTGGAATTTGATGAATTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAATATTATAAAGCGGCCAGCTCAATAAGCAGAGTTGATACAATTCGGGTTCCTACTCTAGTTATCAATTCAAGGGATGATCCTGTTGTCGGCCCGGATCAA---CCTTACTCAATCGTGGAAAAAAATCCTCGTGTTTTGTATTGTAGAACCGACTTAGGAGGTCATTTAGCTTACCTAGATAAAGACAATAATTCGTGGGCTACCAAGGCGATTGCAGAATTCTTTACTAAGTTTGATGAATTAGTTGTA', \
       'smik': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCATTCCATTGGGGATACAATGGTACTGTTTCGCATGTTGTCGGTGAAAATGGTTCCATGAAACTAGGTTTAAAAGATAACAAGGAACAGATTGAATTTGATCAGTTCGTTAACAAATATGTTCCAAGTTTGAAGAATGGTGCTCACTTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTACAAACGTTGTACTTAAACGCTGCAGACTTCTCGAAGAAATTTCCTGTATTTTATGGCAGAGAAATTATCAAATTCTCCGATAATGGAGTTTGCACCGCTGATTGGGTTATGAGCTCCTGGAAGAGGGATTACAAACTCAATCAAAGTACCATGAGCTTTGATAAAAGCAAATTCGACGGAGACGAAAAAGCGACGCATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGATATTTGAAGGATAATGAGTTAGAGGAGCTCAGAGAAATTGAGCTCCCCTTAGTAGTCATTTTGCATGGACTTGCCGGTGGCAGTCATGAACCGATCATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGCGGGAAATTTCAAGTGGTGGTGCTAAATACCAGAGGTTGTGCACGCTCCAAAATTACAACCAGGAACTTATTCACGGCTTACCACACAATGGATATCCGTGAATTTTTGCAAAGAGAAAATCAAAGACATCCAAACAGAAAGTTATACGCTGTAGGATGTTCTTTTGGTGCCACCATGTTGGGGAATTATCTCGGTGAAGAAGGTGATAAATCACCTTTATCTGCAGCTGCTACCTTATGCAATCCTTGGGACCTTCTCCTTTCAGCGCTTAGAATGACCGAGGATTGGTGGTCAAAAACTTTATTTTCCAAAAATATTGCACAGTTTTTAACAAGAACCGTTCAAGTTAACATGGGTGAACTAGGAGTCCCAAACGGCTCTCATCCCGACCATCCTCCTACAGTAAAAAATCCATCCTACTATATGTTCACCCCTGAAAATTTAATAAAGGCAAAACACTTCAAATCGAGTCTGGAATTCGATGAATTGTATACTGCACCTGCTTTAGGCTTTCCAAATGCAATGGAGTATTATAAAGCAGCTAGCTCAATAAACAGAGTTGCTACAATTAAGGTTCCTACTTTAGTTATCAATTCTAGAGATGATCCTGTTGTCGGCCCAGATCAG---CCTTATTCAATTGTAGAAAAAAACCCTCGTATTTTGTATTGCGGAACCGATTTAGGGGGCCATTTAGCCTACCTAGATAATGACAATAACTCATGGGCAACTAAGGCGATTGCAGAATTCTTTACTAAATTTGATCAACTGGTTGTA', \
       'scas': 'ATGGCTTCACAATCAACATATCCACTCATTAAACCATGGAATTGGGGGTATCACGGAACCGTGACCCAAATTACCAGTAAGGAAGGTACTGTACTCATTCCATTAAAGGACAACAAAGAGGGTATTCCATTAGCAGAATTAGTTTCAAAGAATGTCCCTAGTTTAAAGGATGGTGCTAAGTTTGAGTTGAAACCTTTTTTATTCACTGGTATTTTACAAACTCTGTACCTTGGCGCAGCTGACTTTTCTAAGAAATTCCAAGTCTTTTATGGTAGAGAAATTGTGGAATTCTCAGATACTGGTGTATGTACTGCCGATTGGGTAATGCCATCTTGGAAGCAAAAATATAACTTTAATGAGAAAACATCAACTTTTGACAAGAAAGCATTCGACCTGGACGAAAAAGAAACACATCCAGACAATTGGCCTCGTTTGCAACCTCGTACCAGATACTTAAATGAAAAAGAAATGACGACTATCCACGAGGATGACAGACCATTGGTTGTTTGTTGTCATGGGTTAGCTGGTGGCTCTCACGAACCAATTATCAGATCATTGACTGAAAATCTATCTAAGGTTGGTAATGGGAAATTCCAAGTGGTTGTCCTAAATACTCGTGGCTGTGCACGTTCTAAGATTACTACTCGTAACCTATTTACTGCTTTCCATACTATGGATCTACGTGAATTTGTCAACAGAGAACACCAAAAACATCCTAACAGAAAGATTTATGCCGTTGGATTTTCATTCGGGGGTACAATGTTAGCAAATTATTTAGGAGAAGAAGGTGATAAAACTCCAATTGCATCTGCTGCAGTGTTATGTAACCCGTGGGATATGGTATTATCCGGTATGAAAACGAGAGATGATTTTTGGACAAGAACGCTATTTGCTAAGAATATTACAGATTTCTTGACTAGAATGGTTAAAGTTAATATGGCAGAATTGGAATCTCCAGATGGTTCTAAGCCTGATCACATCCCAACAGTGAAAAATCCATCTTATTATACATTTACCCAAGAAAATTTGGCAAAAGCCAAGGATTTTAAATTAATATCTGACTTTGATGACGTATTTACTGCACCTGCATTGGGTTTCAAAAACGCATTGGAGTACTACGCTGCAGCTGGGTCCATTAACAGACTACCTAATATTAAGATTCCTTTATTAGTTATCAATTCCACTGATGATCCAGTTGTTGGGCCGGATCCAATCCCAAACCATATCATAGATTCAAACAACCACCTACTGCTATGTGAAACCGATATCGGTGGCCATTTGGCATATTTGGATAAAAATAATGATTCATGGTCAACGAACCAAATCGCCAATTATTTCGACAAATTTGATGAAGTGGCATTA', \
       'sbay': 'ATGTCAGAAGTTTCAAAGTGGCCAGCTATTAACCCATTTCATTGGGGGTACAACGGTACAGTTTCACATGTCGTTGGTGGTAATGGTTCTGTGAAGTTAAGCTTGAAGAGCGATAAGGAGCAAGTCGAGTTTGATACGTTTGTTAATAAATATGTCCCGATTCTGAAAAACGGGGCCCATTATAAACTAAGTCCCTACTTGTTCACAGGTATTTTACAAACCCTATACTTGAACGCTGCTGATTTCTCAAAGAAATTTCCCGTATTTTATGGTAGAGAAATCGTCAAGTTCTCGGATGACGGTGTCTGTACTGCTGATTGGGTCATGAACTCTTGGGAAAAGGAATATGATTTCGACCAAAAGACTATGAAATTTGATACGAAGAAGTTTGGCGACGACGAAAAGGCGACGCACCCAGAAGGATGGCCTCGTTTACAACCACGTACGAGGTACCTCAGGGACGAAGAGTTGGAAGAACAGAGAAAAGTAGATCTTCCCCTAGTTATCATCCTCCATGGTCTTGCCGGAGGCAGTCATGAACCAATCATAAGATCCCTAACTGAGAACTTGTCTCGTATCGGCAATGGGAGATTCCAAGTCGTGGTGCTAAACACGAGAGGCTGTGCACGTTCTAAAATCACCACTAGAAACCTATTCACAGCTTACCACACAATGGATATCCGTGAGTTCTTGCAAAGGGAAAAAGAAAGATATCCAAACAGAAAATTATACACTGTAGGGTGCTCTTTCGGGGCTACCATGTTAGCAAACTATTTGGGTGAAGAAGGTGACAAATCACCTGTATCTGCTGCTGTTACGTTATGTAATCCTTGGGATCTTCTTCTTTCGGCACTTAGAATGACTGAAGACTGGTGGTCAAAAACTTTGTTTTCTAAAAATATTGCCCAATTTTTAACAAGAACCGTTCAAGTTAACATGGGCGAATTAGGTGTTCCAAATGGCTCTCGTCCTGACCATACACCTACAGTTAAAAATCCATCTTACTATAAGTTCACACCTGAGAATTTGATGAAGGCAAAGCGCTTTAAGTCGAGTCTCGAATTCGATGAGCTGTACACTGCACCAGCTTTGGGCTTCCCGAATGCTATGGAATATTATAAATCAGCTAGTTCAATCAACAGGGCTGATAAAATCAAGGTTCCTACTTTAGTAATCAATTCTAGAGATGATCCTGTTGTTGGCCCAGACCAA---CCTTATTCATTTGTGGAGAAGAACCCTAATATACTATTCTGTAGAACCGACCTAGGTGGCCATTTAGCCTACCTAGATAGCAACAATGATTCGTGGGTTACAAAGGCGATTTCCGAGTTCTTGAATAAGTTTGAGGAGTTAGTGTTA',
       'scer': 'ATGTCAGAAGTTTCCAAATGGCCAGCAATCAACCCATTCCATTGGGGATACAATGGTACAGTTTCGCATATTGTCGGTGAAAATGGTTCCATTAAACTCCATTTAAAAGACAACAAGGAGCAAGTTGATTTTGACGAGTTCGCTAACAAATATGTCCCAACGTTGAAGAATGGTGCCCAATTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACTTTGTACTTAGGTGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGGGAAATTGTCAAATTCTCGGATGGTGGAGTTTGCACCGCTGACTGGCTCATAGATTCATGGAAAAAGGATTATGAATTCGATCAAAGTACTACGAGCTTTGATAAAAAAAAATTTGATAAAGACGAGAAGGCGACACATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGGTACCTGAAAGATAATGAGTTGGAAGAACTACGGGAGGTTGATCTACCCCTAGTAGTTATTCTACATGGTCTTGCTGGTGGTAGTCATGAGCCGATTATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGTGGGAGATTTCAAGTGGTCGTCCTAAATACCAGAGGTTGTGCACGTTCCAAAATTACCACCAGAAATTTATTTACAGCTTATCACACAATGGATATTCGCGAGTTTTTGCAAAGAGAAAAGCAAAGACATCCAGATAGAAAACTATACGCTGTGGGATGCTCTTTTGGTGCTACGATGCTGGCAAACTATCTGGGAGAAGAGGGCGATAAATCACCTTTATCCGCAGCTGCTACTTTGTGCAATCCTTGGGATCTTCTCCTTTCAGCAATTAGGATGAGCCAGGATTGGTGGTCAAGAACTTTATTTTCCAAAAATATTGCGCAATTCTTAACAAGAACCGTTCAGGTTAATATGGGTGAATTAGGAGTTCCAAATGGCTCTCTCCCCGATCATCCTCCCACAGTCAAGAATCCATCTTTCTATATGTTCACGCCTGAAAATCTAATAAAGGCAAAGAGCTTTAAATCGACCCGGGAATTTGATGAAGTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAGTATTATAAAGCGGCCAGCTCAATAAACAGAGTTGATACAATTCGGGTTCCTACCCTTGTTATCAATTCCAGGGATGATCCTGTTGTCGGCCCAGATCAA---CCATACTCAATCGTGGAAAAGAATCCTCGTATTTTGTATTGTAGAACCGATTTAGGTGGTCATTTAGCTTACCTAGATAAAGACAACAACTCGTGGGCTACCAAGGCAATTGCAGAATTTTTCACTAAGTTTGATGAATTAGTCGTA'}
    seq_labels = [x.name for x in sub_tree.leaves]
    seqs = [seq_dict[k] for k in seq_labels]
    # Run PAML
    opts = paml.CodeML.FMutSel_F_options
    opts["RateAncestor"] = "1"
    cm = paml.CodeML("codon", opts)
    cm.loadSequences(seqs, seq_labels, str(sub_tree))
    cm.run()
    cm.putBranchRatesOnTree(seq_labels, sub_tree, label="rate")
    cm.putAncestralSequencesOnTree(seq_labels, sub_tree, label="sequence")

    # Now remap the tree.
    whole_node_dict = dict([(x.name, x) for x in whole_tree.nodes])
    sub_node_dict = dict([(x.name, x) for x in sub_tree.nodes])
    sub_names = [x.name for x in sub_tree.leaves]
    #print whole_node_dict["scer"].getMostRecentCommonAncestor(whole_node_dict["smik"]).name

    newick.tree.mapLabelsOntoSubtree(whole_tree, sub_tree)
    for i in range(len(sub_names) - 1):
        for j in range(i + 1, len(sub_names)):
            s1 = sub_names[i]
            s2 = sub_names[j]
            sub_mrca = sub_node_dict[s1].getMostRecentCommonAncestor(
                sub_node_dict[s2])
            mrca = whole_node_dict[s1].getMostRecentCommonAncestor(
                whole_node_dict[s2])
            assert sub_mrca.name == mrca.name
    shutil.rmtree(cm.tmpdir)
예제 #3
0
	parser.add_option("-d", "--dict-out", dest="score_dict_fname", type="string", default=None, help="score dictionary output filename")
	parser.add_option("-s", "--scores-out", dest="score_fname", type="string", default="vanilla", help="format of ID in FASTA entry")
	parser.add_option("-p", "--pseudocount", dest="pseudocount", type="float", default=0.0, help="pseudocount to be added to all frequencies")
	(options, args) = parser.parse_args()
	in_fname = args[0]

	info_outs = util.OutStreams(sys.stdout)
	data_outs = util.OutStreams()

	# Start up output
	if not options.out_fname is None:
		outf = file(options.out_fname, 'w')
		data_outs.addStream(outf)
	else:
		data_outs.addStream(sys.stdout)
	formatFxn = biofile.getIDFunction(options.format)
	cdna_dict = biofile.readFASTADict(in_fname, formatFxn)
	calc = Calculator()
	calc.initializeFromSequences(cdna_dict.values(), options.pseudocount)
	syn_dict = calc.getCodonSYNScores()
	syn_opt_codons = []
	for aa in translate.degenerateAAs():
		codons = translate.getCodonsForAA(aa, rna=False)
		best_syn_codon = sorted([(syn_dict[c],c) for c in codons])[-1][1]
		syn_opt_codons.append(best_syn_codon)
	data_outs.write("# Read {0}\n#{1:d} sequences, {2:d} codons, {3:d} nucleotides\n".format(in_fname, len(cdna_dict.keys()), int(sum(calc.codon_freq.values())), int(sum(calc.nucleotide_freq.values()))))
	data_outs.write("# syn_scores = {0!s}\n".format(syn_dict))
	data_outs.write("# SYN opt codons = {0!s}\n".format(sorted(syn_opt_codons)))
	data_outs.write("{0!s}".format(calc))

	if not options.score_dict_fname is None: