def test003(): print "** Test 003 **" # Tree remapping whole_tree = newick.tree.parseTree("((((scer,spar),smik),sbay),scas);") sub_tree = newick.tree.parseTree("((scer,smik),scas);") # Name nodes on whole tree newick.tree.labelInternalNodes(whole_tree) if False: # Only need to do this if you want more/other data. # Load the genes load_fxn = biofile.getIDFunction("vanilla") cdna_dicts = {} geneutil.readGenomesFromFile(os.path.expanduser("~/research/data/scerevisiae/saccharomyces-files.txt"), os.path.expanduser("~/bio/genomes"), cdna_dicts, 1, load_fxn) align_dict = pickle.load(file(os.path.expanduser("~/research/data/scerevisiae/scer-ortholog-alignments.p"),'r')) (nal, spec_orf_list, protal) = align_dict["YBR177C"] aligned_seqs = {} for xi in range(len(spec_orf_list)): (spec,orf) = spec_orf_list[xi] gene = cdna_dicts[spec][orf] prot = protal[xi] aligned_gene = muscle.alignGeneFromProtein(gene, prot) aligned_seqs[spec] = aligned_gene print aligned_seqs seq_dict = {'spar': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCGTTCCATTGGGGATACAATGGTACTGTTTCACATGTCGTCGGTGAAAATGGTTCCATCAAACTAAATTTAAAAGACAACAAGGAACAGGTTGAATTTGACGAGTTCGTTAACAAATATGTCCCAACGTTGAAGAATGGTGCTCAATTTAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACCCTGTACTTAAATGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGAGAAATTGTCAAATTCTCGGATAATGGAGTTTGTACCGCTGACTGGCTCATGGATTCCTGGAAGAAGGATTACAAACTCGATCAAAGTACTATGGGTTTCGATAAGAAAAAATTTGGTGAAGACGAGAAGGAGACGCATCCAGAAGGGTGGCCTCGTTTACAACCACGTACAAGGTATCTGAAGGATAATGAATTGGAAAATGTAAGGGAGGTTGATCTGCCCTTAGTAGTTATCCTACATGGTCTTGCTGGTGGTAGTCATGAGCCTATCATAAGATCTCTTGCTGAAAACCTCTCTCGG------AGTGGGAGATTTCAAGTGGTGGTACTAAATACTAGAGGCTGTGCACGTTCTAAAATTACAACCAGAAATTTATTTACGGCTTACCACACAATGGATATTCGTGAATTTTTGCAAAGAGAAAAGGAGAGATATCCAAATAGAAAATTATACGCTGTGGGATGCTCCTTCGGTGCTACGATGTTGGGAAACTATCTGGGAGAAGAAGGCGATAAATCTCCTTTATCTGCAGCTGCTACCCTGTGCAACCCTTGGGATCTTCTCCTTTCGGCACTTAGAATGACCGAGGATTGGTGGTCAAAGACTTTATTTTCCAAAAATATTGCCCAATTCTTAACAAGAACTGTTCAAGTTAATATGGGTGAACTAGGAGTTCCAAATGGCTCCCGTCCTGACCATCCTCCCACAGTCAAGAATCCATCTTACTATATGTTCACACCTGAAAATCTAATAAAGGCAAAAAGCTTTAAATCGAGTCTGGAATTTGATGAATTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAATATTATAAAGCGGCCAGCTCAATAAGCAGAGTTGATACAATTCGGGTTCCTACTCTAGTTATCAATTCAAGGGATGATCCTGTTGTCGGCCCGGATCAA---CCTTACTCAATCGTGGAAAAAAATCCTCGTGTTTTGTATTGTAGAACCGACTTAGGAGGTCATTTAGCTTACCTAGATAAAGACAATAATTCGTGGGCTACCAAGGCGATTGCAGAATTCTTTACTAAGTTTGATGAATTAGTTGTA', \ 'smik': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCATTCCATTGGGGATACAATGGTACTGTTTCGCATGTTGTCGGTGAAAATGGTTCCATGAAACTAGGTTTAAAAGATAACAAGGAACAGATTGAATTTGATCAGTTCGTTAACAAATATGTTCCAAGTTTGAAGAATGGTGCTCACTTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTACAAACGTTGTACTTAAACGCTGCAGACTTCTCGAAGAAATTTCCTGTATTTTATGGCAGAGAAATTATCAAATTCTCCGATAATGGAGTTTGCACCGCTGATTGGGTTATGAGCTCCTGGAAGAGGGATTACAAACTCAATCAAAGTACCATGAGCTTTGATAAAAGCAAATTCGACGGAGACGAAAAAGCGACGCATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGATATTTGAAGGATAATGAGTTAGAGGAGCTCAGAGAAATTGAGCTCCCCTTAGTAGTCATTTTGCATGGACTTGCCGGTGGCAGTCATGAACCGATCATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGCGGGAAATTTCAAGTGGTGGTGCTAAATACCAGAGGTTGTGCACGCTCCAAAATTACAACCAGGAACTTATTCACGGCTTACCACACAATGGATATCCGTGAATTTTTGCAAAGAGAAAATCAAAGACATCCAAACAGAAAGTTATACGCTGTAGGATGTTCTTTTGGTGCCACCATGTTGGGGAATTATCTCGGTGAAGAAGGTGATAAATCACCTTTATCTGCAGCTGCTACCTTATGCAATCCTTGGGACCTTCTCCTTTCAGCGCTTAGAATGACCGAGGATTGGTGGTCAAAAACTTTATTTTCCAAAAATATTGCACAGTTTTTAACAAGAACCGTTCAAGTTAACATGGGTGAACTAGGAGTCCCAAACGGCTCTCATCCCGACCATCCTCCTACAGTAAAAAATCCATCCTACTATATGTTCACCCCTGAAAATTTAATAAAGGCAAAACACTTCAAATCGAGTCTGGAATTCGATGAATTGTATACTGCACCTGCTTTAGGCTTTCCAAATGCAATGGAGTATTATAAAGCAGCTAGCTCAATAAACAGAGTTGCTACAATTAAGGTTCCTACTTTAGTTATCAATTCTAGAGATGATCCTGTTGTCGGCCCAGATCAG---CCTTATTCAATTGTAGAAAAAAACCCTCGTATTTTGTATTGCGGAACCGATTTAGGGGGCCATTTAGCCTACCTAGATAATGACAATAACTCATGGGCAACTAAGGCGATTGCAGAATTCTTTACTAAATTTGATCAACTGGTTGTA', \ 'scas': 'ATGGCTTCACAATCAACATATCCACTCATTAAACCATGGAATTGGGGGTATCACGGAACCGTGACCCAAATTACCAGTAAGGAAGGTACTGTACTCATTCCATTAAAGGACAACAAAGAGGGTATTCCATTAGCAGAATTAGTTTCAAAGAATGTCCCTAGTTTAAAGGATGGTGCTAAGTTTGAGTTGAAACCTTTTTTATTCACTGGTATTTTACAAACTCTGTACCTTGGCGCAGCTGACTTTTCTAAGAAATTCCAAGTCTTTTATGGTAGAGAAATTGTGGAATTCTCAGATACTGGTGTATGTACTGCCGATTGGGTAATGCCATCTTGGAAGCAAAAATATAACTTTAATGAGAAAACATCAACTTTTGACAAGAAAGCATTCGACCTGGACGAAAAAGAAACACATCCAGACAATTGGCCTCGTTTGCAACCTCGTACCAGATACTTAAATGAAAAAGAAATGACGACTATCCACGAGGATGACAGACCATTGGTTGTTTGTTGTCATGGGTTAGCTGGTGGCTCTCACGAACCAATTATCAGATCATTGACTGAAAATCTATCTAAGGTTGGTAATGGGAAATTCCAAGTGGTTGTCCTAAATACTCGTGGCTGTGCACGTTCTAAGATTACTACTCGTAACCTATTTACTGCTTTCCATACTATGGATCTACGTGAATTTGTCAACAGAGAACACCAAAAACATCCTAACAGAAAGATTTATGCCGTTGGATTTTCATTCGGGGGTACAATGTTAGCAAATTATTTAGGAGAAGAAGGTGATAAAACTCCAATTGCATCTGCTGCAGTGTTATGTAACCCGTGGGATATGGTATTATCCGGTATGAAAACGAGAGATGATTTTTGGACAAGAACGCTATTTGCTAAGAATATTACAGATTTCTTGACTAGAATGGTTAAAGTTAATATGGCAGAATTGGAATCTCCAGATGGTTCTAAGCCTGATCACATCCCAACAGTGAAAAATCCATCTTATTATACATTTACCCAAGAAAATTTGGCAAAAGCCAAGGATTTTAAATTAATATCTGACTTTGATGACGTATTTACTGCACCTGCATTGGGTTTCAAAAACGCATTGGAGTACTACGCTGCAGCTGGGTCCATTAACAGACTACCTAATATTAAGATTCCTTTATTAGTTATCAATTCCACTGATGATCCAGTTGTTGGGCCGGATCCAATCCCAAACCATATCATAGATTCAAACAACCACCTACTGCTATGTGAAACCGATATCGGTGGCCATTTGGCATATTTGGATAAAAATAATGATTCATGGTCAACGAACCAAATCGCCAATTATTTCGACAAATTTGATGAAGTGGCATTA', \ 'sbay': 'ATGTCAGAAGTTTCAAAGTGGCCAGCTATTAACCCATTTCATTGGGGGTACAACGGTACAGTTTCACATGTCGTTGGTGGTAATGGTTCTGTGAAGTTAAGCTTGAAGAGCGATAAGGAGCAAGTCGAGTTTGATACGTTTGTTAATAAATATGTCCCGATTCTGAAAAACGGGGCCCATTATAAACTAAGTCCCTACTTGTTCACAGGTATTTTACAAACCCTATACTTGAACGCTGCTGATTTCTCAAAGAAATTTCCCGTATTTTATGGTAGAGAAATCGTCAAGTTCTCGGATGACGGTGTCTGTACTGCTGATTGGGTCATGAACTCTTGGGAAAAGGAATATGATTTCGACCAAAAGACTATGAAATTTGATACGAAGAAGTTTGGCGACGACGAAAAGGCGACGCACCCAGAAGGATGGCCTCGTTTACAACCACGTACGAGGTACCTCAGGGACGAAGAGTTGGAAGAACAGAGAAAAGTAGATCTTCCCCTAGTTATCATCCTCCATGGTCTTGCCGGAGGCAGTCATGAACCAATCATAAGATCCCTAACTGAGAACTTGTCTCGTATCGGCAATGGGAGATTCCAAGTCGTGGTGCTAAACACGAGAGGCTGTGCACGTTCTAAAATCACCACTAGAAACCTATTCACAGCTTACCACACAATGGATATCCGTGAGTTCTTGCAAAGGGAAAAAGAAAGATATCCAAACAGAAAATTATACACTGTAGGGTGCTCTTTCGGGGCTACCATGTTAGCAAACTATTTGGGTGAAGAAGGTGACAAATCACCTGTATCTGCTGCTGTTACGTTATGTAATCCTTGGGATCTTCTTCTTTCGGCACTTAGAATGACTGAAGACTGGTGGTCAAAAACTTTGTTTTCTAAAAATATTGCCCAATTTTTAACAAGAACCGTTCAAGTTAACATGGGCGAATTAGGTGTTCCAAATGGCTCTCGTCCTGACCATACACCTACAGTTAAAAATCCATCTTACTATAAGTTCACACCTGAGAATTTGATGAAGGCAAAGCGCTTTAAGTCGAGTCTCGAATTCGATGAGCTGTACACTGCACCAGCTTTGGGCTTCCCGAATGCTATGGAATATTATAAATCAGCTAGTTCAATCAACAGGGCTGATAAAATCAAGGTTCCTACTTTAGTAATCAATTCTAGAGATGATCCTGTTGTTGGCCCAGACCAA---CCTTATTCATTTGTGGAGAAGAACCCTAATATACTATTCTGTAGAACCGACCTAGGTGGCCATTTAGCCTACCTAGATAGCAACAATGATTCGTGGGTTACAAAGGCGATTTCCGAGTTCTTGAATAAGTTTGAGGAGTTAGTGTTA', 'scer': 'ATGTCAGAAGTTTCCAAATGGCCAGCAATCAACCCATTCCATTGGGGATACAATGGTACAGTTTCGCATATTGTCGGTGAAAATGGTTCCATTAAACTCCATTTAAAAGACAACAAGGAGCAAGTTGATTTTGACGAGTTCGCTAACAAATATGTCCCAACGTTGAAGAATGGTGCCCAATTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACTTTGTACTTAGGTGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGGGAAATTGTCAAATTCTCGGATGGTGGAGTTTGCACCGCTGACTGGCTCATAGATTCATGGAAAAAGGATTATGAATTCGATCAAAGTACTACGAGCTTTGATAAAAAAAAATTTGATAAAGACGAGAAGGCGACACATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGGTACCTGAAAGATAATGAGTTGGAAGAACTACGGGAGGTTGATCTACCCCTAGTAGTTATTCTACATGGTCTTGCTGGTGGTAGTCATGAGCCGATTATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGTGGGAGATTTCAAGTGGTCGTCCTAAATACCAGAGGTTGTGCACGTTCCAAAATTACCACCAGAAATTTATTTACAGCTTATCACACAATGGATATTCGCGAGTTTTTGCAAAGAGAAAAGCAAAGACATCCAGATAGAAAACTATACGCTGTGGGATGCTCTTTTGGTGCTACGATGCTGGCAAACTATCTGGGAGAAGAGGGCGATAAATCACCTTTATCCGCAGCTGCTACTTTGTGCAATCCTTGGGATCTTCTCCTTTCAGCAATTAGGATGAGCCAGGATTGGTGGTCAAGAACTTTATTTTCCAAAAATATTGCGCAATTCTTAACAAGAACCGTTCAGGTTAATATGGGTGAATTAGGAGTTCCAAATGGCTCTCTCCCCGATCATCCTCCCACAGTCAAGAATCCATCTTTCTATATGTTCACGCCTGAAAATCTAATAAAGGCAAAGAGCTTTAAATCGACCCGGGAATTTGATGAAGTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAGTATTATAAAGCGGCCAGCTCAATAAACAGAGTTGATACAATTCGGGTTCCTACCCTTGTTATCAATTCCAGGGATGATCCTGTTGTCGGCCCAGATCAA---CCATACTCAATCGTGGAAAAGAATCCTCGTATTTTGTATTGTAGAACCGATTTAGGTGGTCATTTAGCTTACCTAGATAAAGACAACAACTCGTGGGCTACCAAGGCAATTGCAGAATTTTTCACTAAGTTTGATGAATTAGTCGTA'} seq_labels = [x.name for x in sub_tree.leaves] seqs = [seq_dict[k] for k in seq_labels] # Run PAML opts = paml.CodeML.FMutSel_F_options opts["RateAncestor"] = "1" cm = paml.CodeML("codon", opts) cm.loadSequences(seqs, seq_labels, str(sub_tree)) cm.run() cm.putBranchRatesOnTree(seq_labels, sub_tree, label="rate") cm.putAncestralSequencesOnTree(seq_labels, sub_tree, label="sequence") # Now remap the tree. whole_node_dict = dict([(x.name, x) for x in whole_tree.nodes]) sub_node_dict = dict([(x.name, x) for x in sub_tree.nodes]) sub_names = [x.name for x in sub_tree.leaves] #print whole_node_dict["scer"].getMostRecentCommonAncestor(whole_node_dict["smik"]).name newick.tree.mapLabelsOntoSubtree(whole_tree, sub_tree) for i in range(len(sub_names)-1): for j in range(i+1, len(sub_names)): s1 = sub_names[i] s2 = sub_names[j] sub_mrca = sub_node_dict[s1].getMostRecentCommonAncestor(sub_node_dict[s2]) mrca = whole_node_dict[s1].getMostRecentCommonAncestor(whole_node_dict[s2]) assert sub_mrca.name == mrca.name shutil.rmtree(cm.tmpdir)
def test003(): print "** Test 003 **" # Tree remapping whole_tree = newick.tree.parseTree("((((scer,spar),smik),sbay),scas);") sub_tree = newick.tree.parseTree("((scer,smik),scas);") # Name nodes on whole tree newick.tree.labelInternalNodes(whole_tree) if False: # Only need to do this if you want more/other data. # Load the genes load_fxn = biofile.getIDFunction("vanilla") cdna_dicts = {} geneutil.readGenomesFromFile( os.path.expanduser( "~/research/data/scerevisiae/saccharomyces-files.txt"), os.path.expanduser("~/bio/genomes"), cdna_dicts, 1, load_fxn) align_dict = pickle.load( file( os.path.expanduser( "~/research/data/scerevisiae/scer-ortholog-alignments.p"), 'r')) (nal, spec_orf_list, protal) = align_dict["YBR177C"] aligned_seqs = {} for xi in range(len(spec_orf_list)): (spec, orf) = spec_orf_list[xi] gene = cdna_dicts[spec][orf] prot = protal[xi] aligned_gene = muscle.alignGeneFromProtein(gene, prot) aligned_seqs[spec] = aligned_gene print aligned_seqs seq_dict = {'spar': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCGTTCCATTGGGGATACAATGGTACTGTTTCACATGTCGTCGGTGAAAATGGTTCCATCAAACTAAATTTAAAAGACAACAAGGAACAGGTTGAATTTGACGAGTTCGTTAACAAATATGTCCCAACGTTGAAGAATGGTGCTCAATTTAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACCCTGTACTTAAATGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGAGAAATTGTCAAATTCTCGGATAATGGAGTTTGTACCGCTGACTGGCTCATGGATTCCTGGAAGAAGGATTACAAACTCGATCAAAGTACTATGGGTTTCGATAAGAAAAAATTTGGTGAAGACGAGAAGGAGACGCATCCAGAAGGGTGGCCTCGTTTACAACCACGTACAAGGTATCTGAAGGATAATGAATTGGAAAATGTAAGGGAGGTTGATCTGCCCTTAGTAGTTATCCTACATGGTCTTGCTGGTGGTAGTCATGAGCCTATCATAAGATCTCTTGCTGAAAACCTCTCTCGG------AGTGGGAGATTTCAAGTGGTGGTACTAAATACTAGAGGCTGTGCACGTTCTAAAATTACAACCAGAAATTTATTTACGGCTTACCACACAATGGATATTCGTGAATTTTTGCAAAGAGAAAAGGAGAGATATCCAAATAGAAAATTATACGCTGTGGGATGCTCCTTCGGTGCTACGATGTTGGGAAACTATCTGGGAGAAGAAGGCGATAAATCTCCTTTATCTGCAGCTGCTACCCTGTGCAACCCTTGGGATCTTCTCCTTTCGGCACTTAGAATGACCGAGGATTGGTGGTCAAAGACTTTATTTTCCAAAAATATTGCCCAATTCTTAACAAGAACTGTTCAAGTTAATATGGGTGAACTAGGAGTTCCAAATGGCTCCCGTCCTGACCATCCTCCCACAGTCAAGAATCCATCTTACTATATGTTCACACCTGAAAATCTAATAAAGGCAAAAAGCTTTAAATCGAGTCTGGAATTTGATGAATTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAATATTATAAAGCGGCCAGCTCAATAAGCAGAGTTGATACAATTCGGGTTCCTACTCTAGTTATCAATTCAAGGGATGATCCTGTTGTCGGCCCGGATCAA---CCTTACTCAATCGTGGAAAAAAATCCTCGTGTTTTGTATTGTAGAACCGACTTAGGAGGTCATTTAGCTTACCTAGATAAAGACAATAATTCGTGGGCTACCAAGGCGATTGCAGAATTCTTTACTAAGTTTGATGAATTAGTTGTA', \ 'smik': 'ATGTCAGAAGTTTCGAAATGGCCAGCTATCAACCCATTCCATTGGGGATACAATGGTACTGTTTCGCATGTTGTCGGTGAAAATGGTTCCATGAAACTAGGTTTAAAAGATAACAAGGAACAGATTGAATTTGATCAGTTCGTTAACAAATATGTTCCAAGTTTGAAGAATGGTGCTCACTTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTACAAACGTTGTACTTAAACGCTGCAGACTTCTCGAAGAAATTTCCTGTATTTTATGGCAGAGAAATTATCAAATTCTCCGATAATGGAGTTTGCACCGCTGATTGGGTTATGAGCTCCTGGAAGAGGGATTACAAACTCAATCAAAGTACCATGAGCTTTGATAAAAGCAAATTCGACGGAGACGAAAAAGCGACGCATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGATATTTGAAGGATAATGAGTTAGAGGAGCTCAGAGAAATTGAGCTCCCCTTAGTAGTCATTTTGCATGGACTTGCCGGTGGCAGTCATGAACCGATCATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGCGGGAAATTTCAAGTGGTGGTGCTAAATACCAGAGGTTGTGCACGCTCCAAAATTACAACCAGGAACTTATTCACGGCTTACCACACAATGGATATCCGTGAATTTTTGCAAAGAGAAAATCAAAGACATCCAAACAGAAAGTTATACGCTGTAGGATGTTCTTTTGGTGCCACCATGTTGGGGAATTATCTCGGTGAAGAAGGTGATAAATCACCTTTATCTGCAGCTGCTACCTTATGCAATCCTTGGGACCTTCTCCTTTCAGCGCTTAGAATGACCGAGGATTGGTGGTCAAAAACTTTATTTTCCAAAAATATTGCACAGTTTTTAACAAGAACCGTTCAAGTTAACATGGGTGAACTAGGAGTCCCAAACGGCTCTCATCCCGACCATCCTCCTACAGTAAAAAATCCATCCTACTATATGTTCACCCCTGAAAATTTAATAAAGGCAAAACACTTCAAATCGAGTCTGGAATTCGATGAATTGTATACTGCACCTGCTTTAGGCTTTCCAAATGCAATGGAGTATTATAAAGCAGCTAGCTCAATAAACAGAGTTGCTACAATTAAGGTTCCTACTTTAGTTATCAATTCTAGAGATGATCCTGTTGTCGGCCCAGATCAG---CCTTATTCAATTGTAGAAAAAAACCCTCGTATTTTGTATTGCGGAACCGATTTAGGGGGCCATTTAGCCTACCTAGATAATGACAATAACTCATGGGCAACTAAGGCGATTGCAGAATTCTTTACTAAATTTGATCAACTGGTTGTA', \ 'scas': 'ATGGCTTCACAATCAACATATCCACTCATTAAACCATGGAATTGGGGGTATCACGGAACCGTGACCCAAATTACCAGTAAGGAAGGTACTGTACTCATTCCATTAAAGGACAACAAAGAGGGTATTCCATTAGCAGAATTAGTTTCAAAGAATGTCCCTAGTTTAAAGGATGGTGCTAAGTTTGAGTTGAAACCTTTTTTATTCACTGGTATTTTACAAACTCTGTACCTTGGCGCAGCTGACTTTTCTAAGAAATTCCAAGTCTTTTATGGTAGAGAAATTGTGGAATTCTCAGATACTGGTGTATGTACTGCCGATTGGGTAATGCCATCTTGGAAGCAAAAATATAACTTTAATGAGAAAACATCAACTTTTGACAAGAAAGCATTCGACCTGGACGAAAAAGAAACACATCCAGACAATTGGCCTCGTTTGCAACCTCGTACCAGATACTTAAATGAAAAAGAAATGACGACTATCCACGAGGATGACAGACCATTGGTTGTTTGTTGTCATGGGTTAGCTGGTGGCTCTCACGAACCAATTATCAGATCATTGACTGAAAATCTATCTAAGGTTGGTAATGGGAAATTCCAAGTGGTTGTCCTAAATACTCGTGGCTGTGCACGTTCTAAGATTACTACTCGTAACCTATTTACTGCTTTCCATACTATGGATCTACGTGAATTTGTCAACAGAGAACACCAAAAACATCCTAACAGAAAGATTTATGCCGTTGGATTTTCATTCGGGGGTACAATGTTAGCAAATTATTTAGGAGAAGAAGGTGATAAAACTCCAATTGCATCTGCTGCAGTGTTATGTAACCCGTGGGATATGGTATTATCCGGTATGAAAACGAGAGATGATTTTTGGACAAGAACGCTATTTGCTAAGAATATTACAGATTTCTTGACTAGAATGGTTAAAGTTAATATGGCAGAATTGGAATCTCCAGATGGTTCTAAGCCTGATCACATCCCAACAGTGAAAAATCCATCTTATTATACATTTACCCAAGAAAATTTGGCAAAAGCCAAGGATTTTAAATTAATATCTGACTTTGATGACGTATTTACTGCACCTGCATTGGGTTTCAAAAACGCATTGGAGTACTACGCTGCAGCTGGGTCCATTAACAGACTACCTAATATTAAGATTCCTTTATTAGTTATCAATTCCACTGATGATCCAGTTGTTGGGCCGGATCCAATCCCAAACCATATCATAGATTCAAACAACCACCTACTGCTATGTGAAACCGATATCGGTGGCCATTTGGCATATTTGGATAAAAATAATGATTCATGGTCAACGAACCAAATCGCCAATTATTTCGACAAATTTGATGAAGTGGCATTA', \ 'sbay': 'ATGTCAGAAGTTTCAAAGTGGCCAGCTATTAACCCATTTCATTGGGGGTACAACGGTACAGTTTCACATGTCGTTGGTGGTAATGGTTCTGTGAAGTTAAGCTTGAAGAGCGATAAGGAGCAAGTCGAGTTTGATACGTTTGTTAATAAATATGTCCCGATTCTGAAAAACGGGGCCCATTATAAACTAAGTCCCTACTTGTTCACAGGTATTTTACAAACCCTATACTTGAACGCTGCTGATTTCTCAAAGAAATTTCCCGTATTTTATGGTAGAGAAATCGTCAAGTTCTCGGATGACGGTGTCTGTACTGCTGATTGGGTCATGAACTCTTGGGAAAAGGAATATGATTTCGACCAAAAGACTATGAAATTTGATACGAAGAAGTTTGGCGACGACGAAAAGGCGACGCACCCAGAAGGATGGCCTCGTTTACAACCACGTACGAGGTACCTCAGGGACGAAGAGTTGGAAGAACAGAGAAAAGTAGATCTTCCCCTAGTTATCATCCTCCATGGTCTTGCCGGAGGCAGTCATGAACCAATCATAAGATCCCTAACTGAGAACTTGTCTCGTATCGGCAATGGGAGATTCCAAGTCGTGGTGCTAAACACGAGAGGCTGTGCACGTTCTAAAATCACCACTAGAAACCTATTCACAGCTTACCACACAATGGATATCCGTGAGTTCTTGCAAAGGGAAAAAGAAAGATATCCAAACAGAAAATTATACACTGTAGGGTGCTCTTTCGGGGCTACCATGTTAGCAAACTATTTGGGTGAAGAAGGTGACAAATCACCTGTATCTGCTGCTGTTACGTTATGTAATCCTTGGGATCTTCTTCTTTCGGCACTTAGAATGACTGAAGACTGGTGGTCAAAAACTTTGTTTTCTAAAAATATTGCCCAATTTTTAACAAGAACCGTTCAAGTTAACATGGGCGAATTAGGTGTTCCAAATGGCTCTCGTCCTGACCATACACCTACAGTTAAAAATCCATCTTACTATAAGTTCACACCTGAGAATTTGATGAAGGCAAAGCGCTTTAAGTCGAGTCTCGAATTCGATGAGCTGTACACTGCACCAGCTTTGGGCTTCCCGAATGCTATGGAATATTATAAATCAGCTAGTTCAATCAACAGGGCTGATAAAATCAAGGTTCCTACTTTAGTAATCAATTCTAGAGATGATCCTGTTGTTGGCCCAGACCAA---CCTTATTCATTTGTGGAGAAGAACCCTAATATACTATTCTGTAGAACCGACCTAGGTGGCCATTTAGCCTACCTAGATAGCAACAATGATTCGTGGGTTACAAAGGCGATTTCCGAGTTCTTGAATAAGTTTGAGGAGTTAGTGTTA', 'scer': 'ATGTCAGAAGTTTCCAAATGGCCAGCAATCAACCCATTCCATTGGGGATACAATGGTACAGTTTCGCATATTGTCGGTGAAAATGGTTCCATTAAACTCCATTTAAAAGACAACAAGGAGCAAGTTGATTTTGACGAGTTCGCTAACAAATATGTCCCAACGTTGAAGAATGGTGCCCAATTCAAATTGAGTCCTTACTTGTTCACAGGTATTTTGCAAACTTTGTACTTAGGTGCTGCTGATTTCTCTAAGAAATTTCCTGTATTCTACGGCAGGGAAATTGTCAAATTCTCGGATGGTGGAGTTTGCACCGCTGACTGGCTCATAGATTCATGGAAAAAGGATTATGAATTCGATCAAAGTACTACGAGCTTTGATAAAAAAAAATTTGATAAAGACGAGAAGGCGACACATCCAGAAGGATGGCCTCGTTTACAACCACGTACAAGGTACCTGAAAGATAATGAGTTGGAAGAACTACGGGAGGTTGATCTACCCCTAGTAGTTATTCTACATGGTCTTGCTGGTGGTAGTCATGAGCCGATTATAAGATCTCTTGCTGAAAACCTGTCTCGC------AGTGGGAGATTTCAAGTGGTCGTCCTAAATACCAGAGGTTGTGCACGTTCCAAAATTACCACCAGAAATTTATTTACAGCTTATCACACAATGGATATTCGCGAGTTTTTGCAAAGAGAAAAGCAAAGACATCCAGATAGAAAACTATACGCTGTGGGATGCTCTTTTGGTGCTACGATGCTGGCAAACTATCTGGGAGAAGAGGGCGATAAATCACCTTTATCCGCAGCTGCTACTTTGTGCAATCCTTGGGATCTTCTCCTTTCAGCAATTAGGATGAGCCAGGATTGGTGGTCAAGAACTTTATTTTCCAAAAATATTGCGCAATTCTTAACAAGAACCGTTCAGGTTAATATGGGTGAATTAGGAGTTCCAAATGGCTCTCTCCCCGATCATCCTCCCACAGTCAAGAATCCATCTTTCTATATGTTCACGCCTGAAAATCTAATAAAGGCAAAGAGCTTTAAATCGACCCGGGAATTTGATGAAGTGTACACTGCGCCTGCTTTAGGCTTCCCAAATGCTATGGAGTATTATAAAGCGGCCAGCTCAATAAACAGAGTTGATACAATTCGGGTTCCTACCCTTGTTATCAATTCCAGGGATGATCCTGTTGTCGGCCCAGATCAA---CCATACTCAATCGTGGAAAAGAATCCTCGTATTTTGTATTGTAGAACCGATTTAGGTGGTCATTTAGCTTACCTAGATAAAGACAACAACTCGTGGGCTACCAAGGCAATTGCAGAATTTTTCACTAAGTTTGATGAATTAGTCGTA'} seq_labels = [x.name for x in sub_tree.leaves] seqs = [seq_dict[k] for k in seq_labels] # Run PAML opts = paml.CodeML.FMutSel_F_options opts["RateAncestor"] = "1" cm = paml.CodeML("codon", opts) cm.loadSequences(seqs, seq_labels, str(sub_tree)) cm.run() cm.putBranchRatesOnTree(seq_labels, sub_tree, label="rate") cm.putAncestralSequencesOnTree(seq_labels, sub_tree, label="sequence") # Now remap the tree. whole_node_dict = dict([(x.name, x) for x in whole_tree.nodes]) sub_node_dict = dict([(x.name, x) for x in sub_tree.nodes]) sub_names = [x.name for x in sub_tree.leaves] #print whole_node_dict["scer"].getMostRecentCommonAncestor(whole_node_dict["smik"]).name newick.tree.mapLabelsOntoSubtree(whole_tree, sub_tree) for i in range(len(sub_names) - 1): for j in range(i + 1, len(sub_names)): s1 = sub_names[i] s2 = sub_names[j] sub_mrca = sub_node_dict[s1].getMostRecentCommonAncestor( sub_node_dict[s2]) mrca = whole_node_dict[s1].getMostRecentCommonAncestor( whole_node_dict[s2]) assert sub_mrca.name == mrca.name shutil.rmtree(cm.tmpdir)
parser.add_option("-d", "--dict-out", dest="score_dict_fname", type="string", default=None, help="score dictionary output filename") parser.add_option("-s", "--scores-out", dest="score_fname", type="string", default="vanilla", help="format of ID in FASTA entry") parser.add_option("-p", "--pseudocount", dest="pseudocount", type="float", default=0.0, help="pseudocount to be added to all frequencies") (options, args) = parser.parse_args() in_fname = args[0] info_outs = util.OutStreams(sys.stdout) data_outs = util.OutStreams() # Start up output if not options.out_fname is None: outf = file(options.out_fname, 'w') data_outs.addStream(outf) else: data_outs.addStream(sys.stdout) formatFxn = biofile.getIDFunction(options.format) cdna_dict = biofile.readFASTADict(in_fname, formatFxn) calc = Calculator() calc.initializeFromSequences(cdna_dict.values(), options.pseudocount) syn_dict = calc.getCodonSYNScores() syn_opt_codons = [] for aa in translate.degenerateAAs(): codons = translate.getCodonsForAA(aa, rna=False) best_syn_codon = sorted([(syn_dict[c],c) for c in codons])[-1][1] syn_opt_codons.append(best_syn_codon) data_outs.write("# Read {0}\n#{1:d} sequences, {2:d} codons, {3:d} nucleotides\n".format(in_fname, len(cdna_dict.keys()), int(sum(calc.codon_freq.values())), int(sum(calc.nucleotide_freq.values())))) data_outs.write("# syn_scores = {0!s}\n".format(syn_dict)) data_outs.write("# SYN opt codons = {0!s}\n".format(sorted(syn_opt_codons))) data_outs.write("{0!s}".format(calc)) if not options.score_dict_fname is None: