def run_codeml(mark_id, aln_file, tree_file, sleep): logger.info('sub-process: {0}'.format(str(mark_id))) time.sleep(round(sleep / args.threads, 2)) run_dir = os.path.join(output_dir, str(mark_id)) os.makedirs(run_dir) tree = EvolTree(tree_file, format=0) tree.link_to_alignment(aln_file) tree.run_model('M0') tree.workdir = run_dir tree.mark_tree([mark_id], marks=['#1']) tree.run_model('bsA.' + str(mark_id)) tree.run_model('bsA1.' + str(mark_id)) ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id)) rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0') bsA = tree.get_evol_model('bsA.' + str(mark_id)) p_bsA = bsA.classes['proportions'][2] wfrg2a = bsA.classes['foreground w'][2] if ps < 0.05 and float(wfrg2a) > 1: result = [mark_id, ps, rx, p_bsA, 'positive selection'] elif rx < 0.05 and ps >= 0.05: result = [mark_id, ps, rx, p_bsA, 'relaxation'] else: result = [mark_id, ps, rx, p_bsA, 'no signal'] return result
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input('\n tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n') print 'running model M0, for comparison with branch-site models...' tree.run_model('M0') # each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify # the node_id of the nodes we want to mark, and the kind of mark in this way: for leaf in tree: leaf.node_id print '\n---------\nNow working with leaf ' + leaf.name tree.mark_tree([leaf.node_id], marks=['#1']) print tree.write() # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete3.../bsA.. directory) print 'running model bsA and bsA1' tree.run_model('bsA.'+ leaf.name) tree.run_model('bsA1.' + leaf.name) print 'p-value of positive selection for sites on this branch is: ' ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.'+ leaf.name) rx = tree.get_most_likely('bsA1.'+ leaf.name, 'M0') print str(ps) print 'p-value of relaxation for sites on this branch is: ' print str(rx) if ps < 0.05 and float(bsA.wfrg2a) > 1: print 'we have positive selection on sites on this branch'
evotree = EvolTree(subtree.write()) subfasta = make_clean_fasta(closest_seq_ids, seqdatadict) if not subfasta: omega_list.append("NA") continue else: evotree.link_to_alignment(subfasta) workdirname = './codeml_' + "__".join(closest_seq_ids) evotree.workdir = workdirname list_of_tempdirs.append(workdirname) # mark the foreground branch foreground_leafnode = evotree & seqid # print (seqid) # print(foreground_leafnode.node_id) # print (evotree.write()) evotree.mark_tree([foreground_leafnode.node_id], ['#1']) # print (evotree.write()) evotree.run_model('b_free.run') b_free_fit = evotree.get_evol_model('b_free.run') out_branches_dict = b_free_fit.branches for b in out_branches_dict: if out_branches_dict[b]["mark"] == " #1": # check if there are at least 1 synonymous substitutions expected on this branch... otherwise not very meaningful to estimate omega (it will be very high). if out_branches_dict[b]["S"] * out_branches_dict[b][ "dS"] >= 1.0: omega = out_branches_dict[b]["w"] else: omega = "NA" break omega_list.append(omega)
from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print (tree) print ('Tree and alignment loaded.') input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.') marks = ['1', '3', '7'] tree.mark_tree (marks, ['#1'] * 3) print (tree.write ()) print ('we can easily colorize marked branches') # display marked branches in orange for node in tree.traverse (): if not hasattr (node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle () node.img_style ['bgcolor'] = '#ffaa00' tree.show() print ('''now running branch models free branch models, 2 groups of branches, one with Gorilla and
from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print(tree) print('Tree and alignment loaded.') input( 'Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.' ) marks = ['1', 3, '7'] tree.mark_tree(marks, ['#1'] * 3) print(tree.write()) # display marked branches in orange for node in tree.traverse(): if not hasattr(node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle() node.img_style['bgcolor'] = '#ffaa00' tree.show() print('''now running branch-site models C and D that represents the addition of one class of sites in on specific branch. These models must be compared to null models M1 and M3.
from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta") print(tree) print("Tree and alignment loaded.") input("Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.") marks = ["1", 3, "7"] tree.mark_tree(marks, ["#1"] * 3) print(tree.write()) # display marked branches in orange for node in tree.traverse(): if not hasattr(node, "mark"): continue if node.mark == "": continue node.img_style = NodeStyle() node.img_style["bgcolor"] = "#ffaa00" tree.show() print( """now running branch-site models C and D that represents
for node in tree.traverse('postorder'): list_of_node_ids.append(node.node_id) test_taxa = [] with open(test_taxa_file, 'r') as test_taxa_list: for taxon in test_taxa_list: taxon = taxon.rstrip() test_taxa.append(taxon) marked_taxon_ids = [] # Mark test taxa for taxon in test_taxa: taxon_node = tree & taxon # ete3 notation for finding a node within a tree marked_taxon_id = taxon_node.node_id # mark_tree only takes node_ids, not # labels tree.mark_tree([marked_taxon_id]) marked_taxon_ids.append(marked_taxon_id) # Find internal nodes below the test taxa and mark them for i in range(len(test_taxa), 1, -1): taxa_groups = [x for x in combinations(test_taxa, i)] for group in taxa_groups: common_node = tree.get_common_ancestor(*group) marked_taxon_id = common_node.node_id tree.mark_tree([marked_taxon_id]) marked_taxon_ids.append(marked_taxon_id) best_model = { 'M0': None, 'b_free': None, 'bsA1': None,
tree = EvolTree("tree.nw", binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin") tree.link_to_alignment("infile.phy", alg_format="phylip") tree.workdir = os.getcwd() print(tree) print('running model M0, for comparison with branch-site models...') tree.run_model('M0', keep=True) #tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0") chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1") #chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP") for leaf in chimaeriformes: tree.mark_tree([leaf.node_id], marks=["#1"]) #tree.run_model("bsA." + chimaeriformes) #tree.mark_tree([leaf.node_id], marks = ["#1"]) print("Running") print(tree.write()) tree.run_model('bsA.Chimaeriformes') tree.run_model("bsA1.Chimaeriformes") print('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes') print(str(ps)) rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0') print(str(rx)) model = tree.get_evol_model("bsA.Chimaeriformes") if ps < 0.05 and float(model.classes['foreground w'][2]) > 1: print('we have positive selection on sites on this branch')