def showTreeWithPictures(tree = None, alignment=None, branchLengths=True, bootstrapSupport=True, tolabel=None): print("ShowTreeWithPictures",tree, alignment, branchLengths,bootstrapSupport, tolabel) if alignment: t = EvolTree(tree, alignment,alg_format="paml") t.link_to_alignment(alignment,alg_format="paml") else: t = EvolTree(tree) nsFG = NodeStyle() nsFG["fgcolor"] = "darkgreen" nsFG["size"] = 8 for node in t.traverse(): print(node.node_id) if tolabel: if str(node.node_id) in tolabel: node.set_style(nsFG) ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = branchLengths ts.show_branch_support = bootstrapSupport out = FILE if branchLengths: out+="_Len" if bootstrapSupport: out+="_Boot" t.render(out+"_tree.pdf",tree_style=ts) t.render(out+"_tree.png",tree_style=ts) if INTERACTIVE: t.show(tree_style=ts)
def labelForPaml(unlabelledTreeAsString,listOfNodes, tree): t = EvolTree(unlabelledTreeAsString) marks = [] count = 1 for i in listOfNodes: marks.append("#"+str(count)) count+=1 t.mark_tree(listOfNodes, marks=marks) print(t.write()) outfile = tree+"."+"_".join(listOfNodes) with open(outfile, 'w') as out: out.write(t.write())
def showTreeNodes(unlabelledTreeAsString): t = EvolTree(unlabelledTreeAsString) for node in t.traverse(): #print(node) #print(node.node_id, node.name) #if (node.name.split("_")[0] in GENES): # print(node.name, node.node_id) leaves = node.get_leaf_names() #leaves = [l for l in leaves if l.split("_")[0] in GENES ] if leaves !=[]: print(node) print(leaves, node.name, node.node_id) print("\n")
def showAlignmentWithTree(unlabelledTreeAsString,alignment): t = EvolTree(unlabelledTreeAsString, alignment,alg_format="paml") t.link_to_alignment(alignment, alg_format="paml") for node in t.traverse(): print(node) print(node.node_id, node.name) print(t.write()) #print(t) t.show() #layout=evol_clean_layout)
def showAlignmentWithTree(tree,alignment): print(tree) t = EvolTree(tree, alignment,alg_format="paml") nsFG = NodeStyle() nsFG["fgcolor"] = "darkgreen" nsFG["size"] = 15 #print(t) #t.run_model ('fb.example') # t.show() t.link_to_alignment(alignment, alg_format="paml") for node in t.traverse(): print(node) #if (node.name.split("_")[0] in GENES): # print(node.name, node.node_id) # if (node.name.split("_")[0] == "GRMZM2G083841"): # node.add_face(ImgFace("83841.1.png", height=50, width=50), column=1, position="aligned") # if (node.name.split("_")[0] == "GRMZM2G473001"): # node.add_face(ImgFace("473001.png", height=50, width=50), column=1, position="aligned") node.add_face(TextFace(str(node.node_id)),column=0) #node.add_face(ImgFace("tux.png", height=50), column=1) # node.set_style(nsFG) leaves = node.get_leaf_names() #leaves = [l for l in leaves if l.split("_")[0] in GENES ] if leaves !=[]: print(node.name, node.node_id) #print(node.node_id) #t.mark_tree([8], marks=["#1"]) #print(t.write()) #print(alignment) #print(t) #t.show() #layout=evol_clean_layout) ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True t = fakeUnroot(t) t.show(tree_style=ts) t.render("tree.pdf", tree_style=ts)
def showAlignmentWithTree(unlabelledTreeAsString,alignment): t = EvolTree(unlabelledTreeAsString, alignment,alg_format="paml") #print(t) #t.run_model ('fb.example') # t.show() t.link_to_alignment(alignment, alg_format="paml") for node in t.traverse(): print(node) print(node.node_id, node.name) #t.mark_tree([8], marks=["#1"]) #print(t.write()) print(alignment) #print(t) t.show() #layout=evol_clean_layout)
example of computation and display of an ancestral sequence computed under free'ratio model. """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete2 import TreeStyle from ete2 import EvolTree from ete2 import faces tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta") print tree print "\n Running free-ratio model with calculation of ancestral sequences..." tree.run_model("fb_anc") # tree.link_to_evol_model('/tmp/ete2-codeml/fb_anc/out', 'fb_anc') I = TreeStyle() I.force_topology = False I.draw_aligned_faces_as_table = True I.draw_guiding_lines = True I.guiding_lines_type = 2 I.guiding_lines_color = "#CCCCCC"
def showTreeNodes(unlabelledTreeAsString): t = EvolTree(unlabelledTreeAsString) for node in t.traverse(): print(node) print(node.node_id, node.name)
print "Alignment missing: " + align_file no_results_file.write(cluster + "\n") #Check alignment length. If only two sequences, move to the next one fasta_count = 0 for line in open(align_file, 'r'): line = line.strip() if line.startswith(">"): fasta_count += 1 if not fasta_count > 2: continue node_id_2_names = defaultdict() for descend in EvolTree(tree_file).iter_descendants(): node_id_2_names[descend.node_id] = descend.get_leaf_names() #Results, the first element has: #The second is a dictionary with the positive selected sites #results_dict[cluster] = run_site_branch(cluster, tree_file, align_file, temp_folder, plot_folder) p = pool.apply_async(run_site_branch, args=( cluster, tree_file, align_file, temp_folder, plot_folder, ),
""" 15 Nov 2010 example to illustrate use of sites model, displaying and comparison """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete2 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input ('\n tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n') print 'running model M1' tree.run_model ('M1') print 'running model M2' tree.run_model ('M2') print '\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1')) #tree.show()
def run_site_tests(cluster_name, treefile, alignment, folder_temp, folder_plots): from ete2 import EvolTree from ete2.treeview.layouts import evol_clean_layout import os from collections import defaultdict import math from scipy.stats import chi2 print "Processing cluster: " + cluster_name tree = EvolTree(treefile) tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True) # Create temporal folder temp_cluster_folder = folder_temp + "/" + cluster_name if not os.path.exists(temp_cluster_folder): os.makedirs(temp_cluster_folder) tree.workdir = temp_cluster_folder # Run M1 as the null model tree.run_model("M1") # Run M2 as the alternative model tree.run_model("M2") model1 = tree.get_evol_model("M1") model2 = tree.get_evol_model("M2") # Get the results of the model # Run the LRT test, using ETE # pval = tree.get_most_likely("M2", "M1") # Get the positive selected sites ps_sites = defaultdict() total_sites = 0 sites_over_95 = 0 for s in range(len(model2.sites["BEB"]["aa"])): p_value_site = float(model2.sites["BEB"]["p2"][s]) if p_value_site > 0.50: ps_sites[s] = [model2.sites["BEB"]["aa"][s], model2.sites["BEB"]["p2"][s]] total_sites += 1 if p_value_site > 0.95: sites_over_95 += 1 # LRT Test lrt_value = 2 * math.fabs(model1.lnL - model2.lnL) # LRT test value pval = 1 - chi2.cdf(lrt_value, 2) # p-value based on chi-square test_status = None # Evidence of positive selection in the branch omega_value = float(model2.classes["w"][2]) proportion_sites = float(model2.classes["proportions"][2]) # Plot file plot_file = folder_plots + "/" + cluster_name col2 = {"NS": "black", "RX": "black", "RX+": "black", "CN": "black", "CN+": "black", "PS": "black", "PS+": "black"} if pval < 0.05 and omega_value > 1: # Save plots, both in jpg and svg of the clusters with evidence of positive selection test_status = "Positive" model2.set_histface( up=False, kind="curve", colors=col2, ylim=[0, 4], hlines=[2.5, 1.0, 4.0, 0.5], hlines_col=["orange", "yellow", "red", "cyan"], errors=True, ) tree.render(plot_file + ".svg", layout=evol_clean_layout, histfaces=["M2"]) # tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2']) else: # print "no signal" test_status = None result_entry = [cluster_name, omega_value, proportion_sites, pval, test_status, total_sites, sites_over_95] # print result_entry # print ps_sites # node_results[node.node_id] = [result_entry, ps_sites] output_list = [result_entry, ps_sites] return output_list
def main(): """ main function """ tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'data/protamine/PRM1/paml/' random_swap(tree) tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb') check_annotation (tree) tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2') tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7') tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8') tree.link_to_alignment (WRKDIR + 'alignments.fasta_ali') print 'pv of LRT M2 vs M1: ', print tree.get_most_likely ('M2','M1') print 'pv of LRT M8 vs M7: ', print tree.get_most_likely ('M8','M7') tree.show (histfaces=['M2']) print 'The End.'
def run_site_tests(cluster_name, treefile, alignment, folder_temp, folder_plots): from ete2 import EvolTree from ete2.treeview.layouts import evol_clean_layout import os from collections import defaultdict import math from scipy.stats import chi2 print "Processing cluster: " + cluster_name tree = EvolTree(treefile) tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True) #Create temporal folder temp_cluster_folder = folder_temp + "/" + cluster_name if not os.path.exists(temp_cluster_folder): os.makedirs(temp_cluster_folder) tree.workdir = temp_cluster_folder #Run M1 as the null model tree.run_model("M1") #Run M2 as the alternative model tree.run_model("M2") model1 = tree.get_evol_model("M1") model2 = tree.get_evol_model("M2") # Get the results of the model #Run the LRT test, using ETE #pval = tree.get_most_likely("M2", "M1") #Get the positive selected sites ps_sites = defaultdict() total_sites = 0 sites_over_95 = 0 for s in range(len(model2.sites['BEB']['aa'])): p_value_site = float(model2.sites['BEB']['p2'][s]) if p_value_site > 0.50: ps_sites[s] = [ model2.sites['BEB']['aa'][s], model2.sites['BEB']['p2'][s] ] total_sites += 1 if p_value_site > 0.95: sites_over_95 += 1 #LRT Test lrt_value = 2 * math.fabs(model1.lnL - model2.lnL) # LRT test value pval = 1 - chi2.cdf(lrt_value, 2) # p-value based on chi-square test_status = None #Evidence of positive selection in the branch omega_value = float(model2.classes['w'][2]) proportion_sites = float(model2.classes['proportions'][2]) #Plot file plot_file = folder_plots + "/" + cluster_name col2 = { 'NS': 'black', 'RX': 'black', 'RX+': 'black', 'CN': 'black', 'CN+': 'black', 'PS': 'black', 'PS+': 'black' } if pval < 0.05 and omega_value > 1: #Save plots, both in jpg and svg of the clusters with evidence of positive selection test_status = "Positive" model2.set_histface(up=False, kind='curve', colors=col2, ylim=[0, 4], hlines=[2.5, 1.0, 4.0, 0.5], hlines_col=['orange', 'yellow', 'red', 'cyan'], errors=True) tree.render(plot_file + ".svg", layout=evol_clean_layout, histfaces=['M2']) #tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2']) else: #print "no signal" test_status = None result_entry = [ cluster_name, omega_value, proportion_sites, pval, test_status, total_sites, sites_over_95 ] # print result_entry #print ps_sites #node_results[node.node_id] = [result_entry, ps_sites] output_list = [result_entry, ps_sites] return output_list
""" 15 Nov 2010 first example, load a tree and compute free ratios model, to find omega value of each branch. """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete2 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") print tree raw_input ('\n tree loaded, hit some key.\n') print 'Now, it is necessary to link this tree to an alignment:' tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') raw_input ('\n alignment loaded, hit some key to see.\n') tree.show() print ''' we will run free-ratio model that is one of models available through
def labelForPamlRegex(unlabelledTreeAsString, regex, tree): pattern = re.compile(regex) t = EvolTree(unlabelledTreeAsString) marks = [] count = 1 outfiles = [] #nsFG=TreeStyle() nsMatch = NodeStyle() #match nsMatch["fgcolor"] = "blue" nsMatch["size"] = 10 nsBG = NodeStyle() nsBG["fgcolor"] = "black" nsBG["size"] = 0 nsFG = [] tolabelreg = [] for i in range(0,MAX_PARENT): nsFG.append( NodeStyle()) nsFG[i]["size"] = 10 nsFG[i]["fgcolor"] = NODE_COLORS[i] isroot=True for node in t.traverse(): node.set_style(nsBG) if node.is_root(): print("root") node.unroot() node._support = None for node in t.get_descendants(): node.add_face(TextFace(node.node_id), column=0) #traverse and match for node in t.traverse(): if re.match(pattern, node.name): print("MATCH", node.name, node.node_id) node.set_style(nsMatch) n = node try: for i in range(0,MAX_PARENT): n = n.up n.set_style(nsFG[i]) marks.append("#"+str(count)) print(count) t.mark_tree([str(count)], marks=marks) #just label everything with #1 print(t.write()) tolabelreg.append(str(n.node_id)) outfile = tree+"."+"_".join([str(n.node_id)]) with open(outfile, 'w') as out: out.write(t.write()) outfiles.append(outfile) except AttributeError: pass marks.append("#"+str(count)) print(count) t.mark_tree([str(count)], marks=marks) #just label everything with #1 print(t.write()) outfile = tree+"."+"_".join([str(node.node_id)]) with open(outfile, 'w') as out: out.write(t.write()) outfiles.append(outfile) #t.show() t.render(tree+".png") return(outfiles, tolabelreg)
def showTreeWithPictures(tree = None, alignment=None, branchLengths=True, bootstrapSupport=True, tolabel=None,showZScores=False,showLogs=False ): print(PICS) print("ShowTreeWithPictures",tree, alignment, branchLengths,bootstrapSupport, tolabel,showZScores,showLogs ) if not alignment: nsFG = NodeStyle() nsFG["fgcolor"] = "darkgreen" nsFG["size"] = 8 t = EvolTree(tree) #todo:label # for node in t.traverse(): print(node.node_id) if tolabel: if str(node.node_id) in tolabel: node.set_style(nsFG) #q'n'd if (node.name.split("_")[0]+".png" in PICS): print(node.name.split("_")[0]+".png") node.add_face(ImgFace(PICDIR+os.sep+node.name.split("_")[0]+".png", height=50), column=1, position="aligned") #non GRZM identifier elif (node.name+".png" in PICS): print(node.name+".png") node.add_face(ImgFace(PICDIR+os.sep+node.name+".png", height=50), column=1, position="aligned") ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = branchLengths ts.show_branch_support = bootstrapSupport out = FILE if branchLengths: out+="_Len" if bootstrapSupport: out+="_Boot" if Z: out+="_Z" if L: out+="_L" t.render(out+"_tree.pdf",tree_style=ts) t.render(out+"_tree.png",tree_style=ts) if INTERACTIVE: t.show(tree_style=ts) else: t = EvolTree(tree, alignment,alg_format="paml") t.link_to_alignment(alignment,alg_format="paml") #todo label #todo check treestyle #ts = TreeStyle() #ts.show_leaf_name = True #ts.show_branch_length = branchLength #ts.show_branch_support = bootstrapSupport t.show()
def label_regex(unlabeled_tree, regex, treefile, depth=4, model_list=None, paml_msa=None, outfile=None): pattern = re.compile(regex) t = EvolTree(unlabeled_tree) marks = [] count = 1 outfiles = [] ts = TreeStyle() ts.mode = "c" nsMatch = NodeStyle() # match nsMatch["fgcolor"] = "blue" nsMatch["size"] = 10 nsBG = NodeStyle() nsBG["fgcolor"] = "black" nsBG["size"] = 0 nsFG = [] tolabelreg = [] for i in range(0, depth): nsFG.append(NodeStyle()) nsFG[i]["size"] = 10 nsFG[i]["fgcolor"] = "blue" #isroot = True #for node in t.traverse(): # node.set_style(nsBG) # if node.is_root(): # print("root") # node.unroot() # node._support = None for node in t.get_descendants(): node.add_face(TextFace(node.node_id), column=0) # traverse and match for node in t.traverse(): if re.match(pattern, node.name): node.set_style(nsMatch) n = node try: for i in range(0, depth): n = n.up n.set_style(nsFG[i]) marks.append("#" + str(count)) #print(count) t.mark_tree([str(count)], marks=marks) #just label everything with #1 tolabelreg.append(str(n.node_id)) outfile = treefile + "." + "_".join([str(n.node_id)]) with open(outfile, 'w') as out: out.write(t.write()) outfiles.append(outfile) except AttributeError: pass else: node.set_style(nsBG) for f in tolabelreg: print(f,"FFF") for m in model_list: generateCtl(model=m, treefile=treefile+"."+f, seqfile=paml_msa, outfile=treefile+"."+f, generateOther=False) t = fakeUnroot(t) t.render(treefile+".png", tree_style=ts) return outfiles, tolabelreg
results_list.append(entry_results) site_file.close() #Create the pool of processors pool = multiprocessing.Pool(args.num_processors) run_results = [] for cluster in clusters_to_analyze: tree_file = args.tree_folder + "/" + cluster + ".tre" align_file = args.align_folder + "/" + cluster + ".fna" node_id_2_names = defaultdict() for entry in EvolTree(tree_file).iter_descendants(): node_id_2_names[entry.node_id] = entry.get_leaf_names() #Check that the files exists if not os.path.exists(tree_file): print "Tree file missing: " + tree_file no_results_file.write(cluster + "\n") continue if not os.path.exists(align_file): print "Alignment missing: " + align_file no_results_file.write(cluster + "\n") #Results, the first element has: #The second is a dictionary with the positive selected sites
15 Nov 2010 example of tests for different rates among sites in clades """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete2 import EvolTree from ete2 import NodeStyle tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print tree print 'Tree and alignment loaded.' raw_input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.') marks = ['1', 3, '7'] tree.mark_tree (marks, ['#1'] * 3) print tree.write () # display marked branches in orange for node in tree.traverse (): if not hasattr (node, 'mark'):
def run_branch_test(cluster_name, treefile, alignment, folder_temp, folder_plots): from ete2 import EvolTree from ete2.treeview.layouts import evol_clean_layout import os from collections import defaultdict import math from scipy.stats import chi2 print "Processing cluster: " + cluster_name tree = EvolTree(treefile) tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True) #Create temporal folder temp_cluster_folder = folder_temp + "/" + cluster_name if not os.path.exists(temp_cluster_folder): os.makedirs(temp_cluster_folder) tree.workdir = temp_cluster_folder #Run M0 as the null model tree.run_model("M0") #Look at the site selection on each branch printed_tree = 0 i = 0 #Output list with the results output_list = [] for node in tree.iter_descendants(): #Mark the tree for the leaf under analysis tree.mark_tree([node.node_id], marks=["#1"]) #Use the node id as folder name temp_leaf_name = str(node.node_id) print "Processing: " + cluster_name + " " + temp_leaf_name + " " + ",".join(node.get_leaf_names()) #Run computation of each model. #From the notes on ETE: # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete2.../bsA.. directory) tree.run_model("bsA." + temp_leaf_name) tree.run_model("bsA1." + temp_leaf_name) bsA = tree.get_evol_model("bsA." + temp_leaf_name) bsA1 = tree.get_evol_model("bsA1." + temp_leaf_name) ps_sites = defaultdict() total_sites = 0 sites_over_95 = 0 for s in range(len(bsA.sites['BEB']['aa'])): p_value_site = float(bsA.sites['BEB']['p2'][s]) if p_value_site > 0.50: ps_sites[s] = [bsA.sites['BEB']['aa'][s], bsA.sites['BEB']['p2'][s]] total_sites += 1 if p_value_site > 0.95: sites_over_95 += 1 #ps = float(tree.get_most_likely("bsA." + temp_leaf_name, "bsA1." + temp_leaf_name)) rx = float(tree.get_most_likely("bsA1." + temp_leaf_name, "M0")) lrt_value = 2 * math.fabs(bsA1.lnL - bsA.lnL) # LRT test value ps = 1 - chi2.cdf(lrt_value, 1) # p-value based on chi-square test_status = None #Evidence of positive selection in the branch omega_value = float(bsA.classes['foreground w'][2]) proportion_sites = float(bsA.classes['proportions'][2]) #Plot file plot_file = folder_plots + "/" + cluster_name if ps < 0.05 and omega_value > 1: #Save plots, both in jpg and svg of the clusters with evidence of positive selection test_status = "Positive" if printed_tree == 0: #tree.render(plot_file + ".svg", layout=evol_clean_layout) #tree.render(plot_file + ".jpg", layout=evol_clean_layout) printed_tree = 1 else: continue elif rx < 0.05 and ps >= 0.05: test_status = "Relaxed" else: #print "no signal" test_status = None #Remove marks on the tree tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()), marks=[''] * len(tree.get_descendants()), verbose=False) result_entry = [cluster_name, node.node_id, omega_value, proportion_sites, ps, test_status, total_sites, sites_over_95, ",".join(node.get_leaf_names())] # print result_entry #print ps_sites #node_results[node.node_id] = [result_entry, ps_sites] output_list = [result_entry, ps_sites] return output_list
""" 15 Nov 2010 simple example to mark a tree and compute branch-site test of positive selection """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete2 import EvolTree tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input('\n tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n') print 'running model M0, for comparison with branch-site models...' tree.run_model('M0') # each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify # the node_id of the nodes we want to mark, and the kind of mark in this way: for leaf in tree: leaf.node_id print '\n---------\nNow working with leaf ' + leaf.name