def parse_tree(t): """Simple function to read a tree file off the disk and return it as a Tree object. Also calls the render() method on it so that it gets node IDs that we can use to apply labels/marks.""" treeobj = EvolTree(t) treeobj.render('') return treeobj
def prepare_codeml(round, fasta_file_name, species_tree, marks, wd): """Creates all files necessary for codeml performance, including marked, pruned tree, control files and alignment in phy format""" run_name = "r" + str(round).zfill(2) tree = EvolTree(species_tree) # init tree every time a fasta is open name = fasta_file_name.replace(args.suffix, "") # print(name) # create path and change dir create_dir(wd, name) os.chdir(os.path.join(wd, name)) genomes = fasta_ids(os.path.join(wd, fasta_file_name)) # genomes contained in fasta file # Tree prunning prune_tree(tree, genomes) # Mark branches if branch or branch-site models selected if args.mode in ["BM", "BS"]: mark_spp = list(set(marks[str(round)]).intersection(genomes)) mark_branches(tree, mark_spp) # Check monophyly of taxa if -gene_trees option # Individual gene trees if -gene_trees option if args.single_trees: fast_tree(os.path.join(wd, fasta_file_name), os.path.join(wd, name, fasta_file_name + ".ftree")) gene_tree = midpoint_root(tree_features(os.path.join(wd, name, fasta_file_name + ".ftree"))) if not is_monophyletic(gene_tree, mark_spp): logging.warning("Check monophyly in the clade-of-interest: {}".format(name)) tree.write(outfile=name + ".tree") # write tree with only topology # File format converter: MSA fasta --> Phylip fasta2phy(os.path.join(wd, fasta_file_name), name + ".phy") # Create alt and null ctl files control_files(wd, args.mode, name, run_name)
def run(self, pamlsrc, output_folder, model='M1'): """Run PAML using ETE. The default model is M1 as it is best for orthology inference in our case. You can use models `M2`, `M0`, `M3`. Ensure that you have the correct path to your codeml binary. It should be in the paml `/bin`. :param pamlsrc: Path to the codemly binary. :param output_folder: The name of the output folder. :param model: The model to be used. (Default value = 'M1') """ # Import the newick tree tree = EvolTree('temptree.nw') # Import the alignment tree.link_to_alignment(self.alignmentfile) tree.workdir = self.workdir # Set the binpath of the codeml binary tree.execpath = pamlsrc # Run the model M1, M2, M3, or M0 model_path = model + '.' + output_folder tree.run_model(model_path) self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
def main(self): """The main function for running the test.""" print("Running model %s paml on input." % str(self.defaultmodel)) tree = EvolTree(self.tree) # Import the newick tree tree.link_to_alignment(self.alignment) # Import the alignment tree.workdir = self.workdir # Set the working directory tree.execpath = self.pamlpath # Set the binpath of the codeml binary tree.run_model(self.defaultmodel) # Run the codeml model
def count_omega(align_file, gene_name): print(gene_name) tree = EvolTree(tree_file) tree.link_to_alignment(align_file) # # #free branch ratio count tree.run_model('fb') fb_results = tree.get_evol_model('fb') print(fb_results) with open(temp, 'w') as temp_file: temp_file.write(str(fb_results)) write_in_table(gene_name)
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'): """ Use ETE3's integration with PAML""" # Import the species tree to compare species that are present in alignment # file t = Tree('data/initial-data/species_tree.nw', format=1) orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None) # Create a list name/variable and use list() orgs = list(orgsfile[0]) organismslist = formatlist(orgs) # Import alignment file as string alignment_file = open( 'data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta', 'r') alignment_str = alignment_file.read() alignment_file.close() # Keep the branches in the species tree for species in the alignment file # Some species may not be present in the alignment file branches2keep = [] for organism in organismslist: if organism in alignment_str: #print('Yup.') branches2keep.append(organism) else: pass #print('Nope.') Make an error code in the log # Input a list of branches to keep on the base tree speciestree = t.prune(branches2keep, preserve_branch_length=True) # Import the newick tree tree = EvolTree(speciestree) # Import the alignment tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta') tree.workdir = workdir # Set the binpath of the codeml binary tree.execpath = paml_path # Run the codeml model tree.run_model(model + '.' + gene)
def main(args): if args.BinPath: tree = EvolTree(args.Tree, binpath=args.BinPath) else: tree = EvolTree(args.Tree) if args.MSA[:-3] == ".phy": tree.link_to_alignment(args.MSA, format="phylip") elif args.MSA: tree.link_to_alignment(args.MSA) print(tree) tree.workdir = os.getcwd() if args.LoadedModels: load_model(args.LoadedModels, tree) compare_models(models=args.LoadedModels, tree=tree, args=args) if args.Models: run_models(args.models, tree) if args.Compare: if args.TreeStruct: tree_structure = parse_structure_file(args.TreeStruct) compare_models(models=args.Compare, tree=tree, tree_structure=tree_structure) else: compare_models(models=args.Compare, tree=tree, args=args)
def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger): tree = EvolTree(treeFile) os.mkdir(outDir + "paml_site/") tree.workdir = outDir + "paml_site/" tree.link_to_alignment(alnFile, "Fasta") logger.info("PAML codeml") dModelRun = {} for model in lModels: if model in ["M0", "M1", "M2", "M7", "M8"]: logger.info("Running {:s}".format(model)) dModelRun[model] = tree.run_model(model) if "M1" and "M2" in dModelRun: p12 = tree.get_most_likely("M2", "M1") logger.info("LRT of M1 vs M2 = {}".format(p12)) if "M7" and "M8" in dModelRun: p78 = tree.get_most_likely("M8", "M7") logger.info("LRT of M7 vs M8 = {}".format(p78)) """
def tree_layout(tree_file, ps_node_list): t = EvolTree(tree_file, format=0) style_other = NodeStyle() style_other['size'] = 6 style_ps = NodeStyle() style_ps['fgcolor'] = '#ff0000' style_ps['size'] = 6 for node in t.iter_descendants(): descendant = t.get_descendant_by_node_id(node.node_id) if node.node_id in ps_node_list: descendant.img_style = style_ps else: descendant.img_style = style_other ts = TreeStyle() ts.layout_fn = layout ts.show_branch_support = False ts.show_branch_length = False ts.show_leaf_name = False result_picture = os.path.join(output_dir, 'positive_selection_tree.png') t.render(result_picture, tree_style=ts)
example of computation and display of an ancestral sequence computed under free'ratio model. """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import TreeStyle from ete3 import EvolTree from ete3 import faces tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print tree print '\n Running free-ratio model with calculation of ancestral sequences...' tree.run_model ('fb_anc') #tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc') I = TreeStyle() I.force_topology = False I.draw_aligned_faces_as_table = True I.draw_guiding_lines = True I.guiding_lines_type = 2 I.guiding_lines_color = "#CCCCCC"
def main(): """ main function """ tree = EvolTree(WRKDIR + 'tree.nw') tree.workdir = 'data/protamine/PRM1/paml/' random_swap(tree) tree.link_to_evol_model(WRKDIR + 'paml/fb/fb.out', 'fb') check_annotation(tree) tree.link_to_evol_model(WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model(WRKDIR + 'paml/M2/M2.out', 'M2') tree.link_to_evol_model(WRKDIR + 'paml/M7/M7.out', 'M7') tree.link_to_evol_model(WRKDIR + 'paml/M8/M8.out', 'M8') tree.link_to_alignment(WRKDIR + 'alignments.fasta_ali') print 'pv of LRT M2 vs M1: ', print tree.get_most_likely('M2', 'M1') print 'pv of LRT M8 vs M7: ', print tree.get_most_likely('M8', 'M7') tree.show(histfaces=['M2']) print 'The End.'
'can be run from within a paml directory' from ete3 import EvolTree import os tree_file = "testTree.tre" alignment_file = "testAlignment.fasta" model = "./model/out" model_name = "bsD.bl_0.2w" # model_name = os.path.basename(os.getcwd()) testTree = EvolTree(tree_file) testTree.link_to_alignment(alignment_file) testTree.link_to_evol_model(model, model_name) testTree.show()
def load_parameters(): descendant_dict = defaultdict() with open(input_file, 'r') as f: all_lines = f.readlines() aln_file = all_lines[0].strip() if not os.path.exists(aln_file): logger.error('Invalid cDNA alignment file: {0}'.format(aln_file)) sys.exit(1) logger.info('Input cDNA alignment file: {0}'.format(aln_file)) seq_id_dict = defaultdict() seq_id_list = [] for seq_record in AlignIO.read(aln_file, 'fasta'): seq_id_dict[str(seq_record.id)] = 1 seq_id_list.append(str(seq_record.id)) tree_file = all_lines[1].strip() if not os.path.exists(tree_file): logger.error('Invalid tree file: {0}'.format(tree_file)) sys.exit(1) logger.info('Input tree file: {0}'.format(tree_file)) tmp_t = Tree(tree_file, format=0) node_id_dict = defaultdict() for node in tmp_t: node_id_dict[str(node.name)] = 1 if seq_id_dict != node_id_dict: if len(seq_id_dict) < len(node_id_dict): logger.warning('Sequences is less than tree nodes.') logger.info('Trim input tree file.') tree_file = trim_tree(tree_file, seq_id_list) else: logger.error('Sequences is falsely greater than tree nodes.') sys.exit(1) t = EvolTree(tree_file, format=1) for descendant in t.iter_descendants(): descendant_dict[descendant.node_id] = str(descendant) root = t.get_tree_root() id_list = [] for leaf in t.traverse('preorder'): id_list.append(leaf.node_id) select_nodes = [] if len(all_lines) > 2: for each_line in all_lines[2:]: s = each_line.strip() if s: select_nodes.append(s) if select_nodes: nodes_line = ', '.join(select_nodes) logger.info('Input nodes: {0}'.format(nodes_line)) for node in select_nodes: if node not in t: logger.error('Error node: {0}'.format(node)) sys.exit(1) if not t.check_monophyly(values=select_nodes, target_attr='name'): logger.error('Some nodes are not monophyletic.') sys.exit(1) common_ancestor = t.get_common_ancestor(select_nodes) else: common_ancestor = root logger.info('No specific node') run_list = [] for s in common_ancestor.iter_descendants(): run_list.append(s.node_id) logger.info('These node ids will be checked: {0}'.format( str(run_list))) return run_list, aln_file, tree_file, descendant_dict
def tree_features(tree_file): """Add a feature to the tree called <spptag> with the Species Tag""" tree_handle = EvolTree(tree_file) for leaf in tree_handle.iter_leaves(): leaf.add_feature("spptag", leaf.name.split("_")[0]) return tree_handle
"-")) == gapSeq: # if it's just gaps empty_seq_count += 1 # If there were empty sequences found in the alignment, record the names of the # taxa with sequences for pruning the tree if empty_seq_count >= 1: taxa_in_alignment = [] for record in SeqIO.parse(alignment_file, format=alignment_format): gapSeq = '-' * len(record.seq) if (str(record.seq).upper().replace( "N", "-")) == gapSeq: # if it's just gaps pass else: taxa_in_alignment.append(record.id) tree = EvolTree(tree_file) out_tree_name = os.path.basename(tree_file) out_tree_name = os.path.splitext(out_tree_name)[0] out_tree_name = out_tree_name + '_' + gene_name + '.tre' # If there is a new alignment, prune the tree down to the taxa that remain in # the new alignment and write a new tree because EvolTree is shit and can't # use the pruned tree saved in memory if empty_seq_count >= 1: if len(taxa_in_alignment) >= 1: tree.prune(taxa_in_alignment, preserve_branch_length=True) tree.unroot() tree.write(outfile=out_tree_name, format=0) tree = EvolTree(out_tree_name) tree.link_to_alignment(alignment_file)
__author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" try: input = raw_input except NameError: pass from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta") print(tree) print("Tree and alignment loaded.") input("Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.") marks = ["1", 3, "7"] tree.mark_tree(marks, ["#1"] * 3) print(tree.write()) # display marked branches in orange for node in tree.traverse(): if not hasattr(node, "mark"):
import sys, os, subprocess import argparse from ete3 import EvolTree tree = EvolTree("tree.nw", binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin") tree.link_to_alignment("infile.phy", alg_format="phylip") tree.workdir = os.getcwd() print(tree) print('running model M0, for comparison with branch-site models...') tree.run_model('M0', keep=True) #tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0") chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1") #chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP") for leaf in chimaeriformes: tree.mark_tree([leaf.node_id], marks=["#1"]) #tree.run_model("bsA." + chimaeriformes) #tree.mark_tree([leaf.node_id], marks = ["#1"]) print("Running") print(tree.write()) tree.run_model('bsA.Chimaeriformes') tree.run_model("bsA1.Chimaeriformes") print('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes') print(str(ps)) rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')
def main(): """ main function """ tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'data/protamine/PRM1/paml/' random_swap(tree) tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb') check_annotation (tree) tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2') tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7') tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8') tree.link_to_alignment (WRKDIR + 'alignments.fasta_ali') print 'pv of LRT M2 vs M1: ', print tree.get_most_likely ('M2','M1') print 'pv of LRT M8 vs M7: ', print tree.get_most_likely ('M8','M7') tree.show (histfaces=['M2']) print 'The End.'
from ete3 import EvolTree import sys treepath=sys.argv[1] treeout=sys.argv[2] t = EvolTree(treepath) ratites = {'aptHaa', 'aptRow', 'aptOwe', 'strCam', 'droNov', 'casCas', 'rheAme', 'rhePen'} #annotate leaves for leaf in t.traverse(): if leaf.is_leaf(): if leaf.name in ratites: leaf.add_features(mark="{RatiteLeaf}") else: #internal node, get all leaf names and make sure all are in ratites desc=set(leaf.get_leaf_names()) checkDesc=desc - ratites if not checkDesc: leaf.add_features(mark="{RatiteInternal}") t.write(outfile=treeout)
def parse_tree(treestring): t = EvolTree(treestring) return (t)
resultsfile = sys.argv[3] test_to_use = sys.argv[4] if not os.path.isfile(resultsfile): print(hog, 'NA', 'NA', 'NA', 'NA', sep="\t") quit() if not os.path.isfile(treefile): print(hog, 'NA', 'NA', 'NA', 'NA', sep="\t") quit() with open(treefile, 'r') as treefile: treestring = treefile.read().replace('\n', '') treestring = re.sub(r"{\w+}", "", treestring) t = EvolTree(treestring, format=1) #ugly ratites = { 'droNov', 'casCas', 'strCam', 'aptHaa', 'aptOwe', 'aptRow', 'rheAme', 'rhePen' } vl = { 'calAnn', 'corBra', 'serCan', 'geoFor', 'melUnd', 'pseHum', 'taeGut', 'ficAlb' } rand1 = {'colLiv', 'chaVoc', 'halLeu', 'taeGut', 'nipNip'} rand2 = {'falPer', 'picPub', 'lepDis', 'melUnd', 'aquChr'} if test_to_use == "ratites": testclade = ratites elif test_to_use == "vl": testclade = vl
from ete3 import EvolTree from string import ascii_letters # CREATE TREE fasta_lines = open("./whales.fasta", "r").readlines() taxa = [l.replace('>', '').strip() for l in fasta_lines if l.startswith('>')] taxa_map = { t: ascii_letters[i] for i, t in enumerate(taxa) } taxa_string = '(' * (len(taxa) - 1) + '%s,%s)' % (ascii_letters[0], ascii_letters[1]) for t in ascii_letters[2:len(taxa)]: taxa_string = taxa_string + ',%s)' % t taxa_string = taxa_string + ';' align = ''.join(fasta_lines) for t in taxa: align = align.replace(t, taxa_map[t]) tree = EvolTree(taxa_string) tree.link_to_alignment(align) #tree.link_to_evol_model("M2") #tree.get_evol_model("M2") print(tree.run_model.__doc__) tree.run_model("fb")
""" __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" try: input = raw_input except NameError: pass from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print(tree) print('Tree and alignment loaded.') input( 'Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.' ) marks = ['1', 3, '7'] tree.mark_tree(marks, ['#1'] * 3) print(tree.write()) # display marked branches in orange
""" 15 Nov 2010 example to illustrate use of sites model, displaying and comparison """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print (tree) try: input = raw_input except NameError: pass input ('\n tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n') print ('running model M1') tree.run_model ('M1') print ('running model M2') tree.run_model ('M2')
15 Nov 2010 example of computation and display of an ancestral sequence computed under free'ratio model. """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import TreeStyle from ete3 import EvolTree from ete3 import faces tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print(tree) print('\n Running free-ratio model with calculation of ancestral sequences...') tree.run_model('fb_anc') #tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc') I = TreeStyle() I.force_topology = False I.draw_aligned_faces_as_table = True I.draw_guiding_lines = True I.guiding_lines_type = 2 I.guiding_lines_color = "#CCCCCC"
#get test class from file testclassfile = sys.argv[2] with open(testclassfile) as tf: target_tips = set(tf.read().splitlines()) for line in lines: if line == "": continue else: fields = line.split("\t") hog = fields[1] tree = fields[0] try: t = EvolTree(fields[2]) except: continue for node in t.traverse(): #UGLY! istarget = node_in_class(node, t, target_tips) brstat = node.dist nname = node.name if nname == "": nname = "-".join(node.get_leaf_names()) try: pname = node.up.name except AttributeError: pname = "root" if pname == "": pname = "-".join(node.up.get_leaf_names())
""" 15 Nov 2010 first example, load a tree and compute free ratios model, to find omega value of each branch. """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") print tree raw_input ('\n tree loaded, hit some key.\n') print 'Now, it is necessary to link this tree to an alignment:' tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') raw_input ('\n alignment loaded, hit some key to see.\n') tree.show() print ''' we will run free-ratio model that is one of models available through
all_dists.append(dist) # find indexes of the three shortest distances try: idxes_of_3_smallest = np.argpartition(np.array(all_dists), 3)[:3] except ValueError: idxes_of_3_smallest = np.argpartition( np.array(all_dists), 2) # for the case that list is only 3 items long closest_seq_ids = [seqid] for d in idxes_of_3_smallest: closest_seq_ids.append(seqids_of_other_species[d]) # ete3 has codeml handling implemented!! No need for own functions. subtree = t.copy() subtree.prune(closest_seq_ids, preserve_branch_length=True) subtree.unroot() evotree = EvolTree(subtree.write()) subfasta = make_clean_fasta(closest_seq_ids, seqdatadict) if not subfasta: omega_list.append("NA") continue else: evotree.link_to_alignment(subfasta) workdirname = './codeml_' + "__".join(closest_seq_ids) evotree.workdir = workdirname list_of_tempdirs.append(workdirname) # mark the foreground branch foreground_leafnode = evotree & seqid # print (seqid) # print(foreground_leafnode.node_id) # print (evotree.write()) evotree.mark_tree([foreground_leafnode.node_id], ['#1'])
treefile=sys.argv[2] resultsfile=sys.argv[3] if not os.path.isfile(resultsfile): print(hog, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', sep="\t") quit() if not os.path.isfile(treefile): print(hog, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', sep="\t") quit() with open(treefile, 'r') as treefile: treestring=treefile.read().replace('\n', '') treestring=re.sub(r"{\w+}", "", treestring) t=EvolTree(treestring, format=1) #now read results into list with open(resultsfile, 'r') as f: reader=csv.reader(f) res_list=list(reader) #make output table, one line per hog, rs={'hog' : hog, 'selected_nom' : 0, 'selected_holm' : 0, 'total_tests' : 0, 'nom_branches' : "", 'holm_branches' : "", 'tree' : treestring} for line in res_list: taxa=line[0] if t.search_nodes(name=taxa): pval=float(line[6]) pvalholm=float(line[7]) node_id=trans_node(taxa, t)
#get test class from file testclassfile=sys.argv[2] with open(testclassfile) as tf: target_tips=set(tf.read().splitlines()) for line in lines: if line=="": continue else: fields=line.split("\t") hog=fields[1] tree=fields[0] try: t=EvolTree(fields[2]) except: continue for node in t.traverse(): #UGLY! istarget=node_in_class(node,t,target_tips) brstat=node.dist nname=node.name if nname=="": nname="-".join(node.get_leaf_names()) try: pname=node.up.name except AttributeError: pname="root" if pname=="": pname="-".join(node.up.get_leaf_names())
__author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" try: input = raw_input except NameError: pass from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print (tree) print ('Tree and alignment loaded.') input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.') marks = ['1', '3', '7'] tree.mark_tree (marks, ['#1'] * 3) print (tree.write ()) print ('we can easily colorize marked branches') # display marked branches in orange for node in tree.traverse ():
def run_codeml(mark_id, aln_file, tree_file, sleep): logger.info('sub-process: {0}'.format(str(mark_id))) time.sleep(round(sleep / args.threads, 2)) run_dir = os.path.join(output_dir, str(mark_id)) os.makedirs(run_dir) tree = EvolTree(tree_file, format=0) tree.link_to_alignment(aln_file) tree.run_model('M0') tree.workdir = run_dir tree.mark_tree([mark_id], marks=['#1']) tree.run_model('bsA.' + str(mark_id)) tree.run_model('bsA1.' + str(mark_id)) ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id)) rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0') bsA = tree.get_evol_model('bsA.' + str(mark_id)) p_bsA = bsA.classes['proportions'][2] wfrg2a = bsA.classes['foreground w'][2] if ps < 0.05 and float(wfrg2a) > 1: result = [mark_id, ps, rx, p_bsA, 'positive selection'] elif rx < 0.05 and ps >= 0.05: result = [mark_id, ps, rx, p_bsA, 'relaxation'] else: result = [mark_id, ps, rx, p_bsA, 'no signal'] return result
for record in SeqIO.parse(alignment_file, format=alignment_format): gapSeq = '-' * len(record.seq) if (str(record.seq).upper().replace( "N", "-")) == gapSeq: # if it's just gaps pass else: trimmed_alignment.append(record) taxa_in_alignment.append(record.id) # Only write a new alignment if there is a new alignment if empty_seq_count >= 1: if len(trimmed_alignment) >= 1: SeqIO.write(trimmed_alignment, handle=alignment_file, \ format=alignment_format) tree = EvolTree(tree_file) out_tree_name = os.path.basename(tree_file) out_tree_name = os.path.splitext(out_tree_name)[0] out_tree_name = out_tree_name + '_' + gene_name + '.tre' # If there is a new alignment, prune the tree down to the taxa that remain in # the new alignment if empty_seq_count >= 1: if len(taxa_in_alignment) >= 1: tree.prune(taxa_in_alignment, preserve_branch_length=True) test_taxa = [] with open(test_taxa_file, 'r') as test_taxa_list: for taxon in test_taxa_list: taxon = taxon.rstrip() test_taxa.append(taxon)
def run_evol_py(tree, alg, branch_model, site_models, workir='data/evol_output', tool_dir="ete3_apps/bin"): print(tree, alg, branch_model, site_models) builtin_apps_path = None builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir) tree = EvolTree(tree, binpath=builtin_apps_path) tree.link_to_alignment(alg) tree.workdir = workir ###branch model if branch_model: branch_model = str(branch_model) tree.run_model(branch_model) print(tree.get_evol_model(branch_model)) ### site model for site_model in site_models: tree.run_model(site_model) #tree.run_model('SLR.lele') global evol_output_dir, final_evol_tree evol_output_dir = workir final_evol_tree = evol_output_dir + '/tree_evol_result.png' tree.render(final_evol_tree, layout=evol_clean_layout, histfaces=site_models) return tree
""" 15 Nov 2010 simple example to mark a tree and compute branch-site test of positive selection """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input('\n tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n') print 'running model M0, for comparison with branch-site models...' tree.run_model('M0') # each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify # the node_id of the nodes we want to mark, and the kind of mark in this way: for leaf in tree: leaf.node_id print '\n---------\nNow working with leaf ' + leaf.name
#!/usr/bin/python """ 06 Feb 2011 use slr to compute evolutionary rates """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ("data/S_example/alignment_S_measuring_evol.fasta") tree.run_model ('SLR') slr = tree.get_evol_model ('SLR') slr.set_histface (up=False, kind='curve',errors=True, hlines = [1.0,0.3], hlines_col=['black','grey']) tree.show (histfaces=['SLR'])