def main(): # getting the tree tree_gen = Phylo.parse(PATH_EXAMPLE, 'newick') tree_object = next(tree_gen) # the tree basic information print(tree_info(tree_object)) # drawing the tree Phylo.draw(tree_object) # distance comparing tns = dendropy.TaxonNamespace() tre_one = Tree.get_from_path(PATH_EXAMPLE, 'newick', taxon_namespace=tns) tre_two = Tree.get_from_path(PATH_BIF, 'newick', taxon_namespace=tns) euclidean_distance = treecompare.euclidean_distance(tre_one, tre_two) robinson_distance = treecompare.robinson_foulds_distance(tre_one, tre_two) print("Robinson Foulds distance: ", robinson_distance) print("Euclidean distance: ", euclidean_distance) # common ancestors common_ancestor_tree = tree_object.common_ancestor({"name": "C"}, {"name": "D"}) common_ancestor_tree.color = "blue" print("COMMON ANCESTOR: ", common_ancestor_tree) Phylo.draw(common_ancestor_tree)
def calculate_robinson_foulds(self, species_tree, gene_tree, weighted): """ Calculates the Robinson Foulds distances for weighted and unweighted trees. Input: species_tree -- newick file or newick string containing the species tree gene_tree -- newick file or newick string containing the tree to be compared to the species tree weighted -- boolean parameter for whether the files have weights Returns: The weighted and/or unweighted Robinson Foulds distance of the species tree and input tree. """ # taxon names tns = dendropy.TaxonNamespace() # Create dendropy tree from species tree input file if os.path.isfile(species_tree): species_tree = Tree.get_from_path(species_tree, 'newick', taxon_namespace=tns) # Create dendropy tree from species tree input newick string else: species_tree = Tree.get_from_string(species_tree, 'newick', taxon_namespace=tns) # Create dendropy tree from gene tree input file if os.path.isfile(gene_tree): gene_tree = Tree.get_from_path(gene_tree, 'newick', taxon_namespace=tns) # Create dendropy tree from gene tree input newick string else: gene_tree = Tree.get_from_string(gene_tree, 'newick', taxon_namespace=tns) # both weighted and unweighted foulds distance if weighted: return treecompare.weighted_robinson_foulds_distance(species_tree, gene_tree), \ treecompare.unweighted_robinson_foulds_distance(species_tree, gene_tree) # only unweighted foulds distance else: return treecompare.unweighted_robinson_foulds_distance( species_tree, gene_tree)
def read_lsd_results(inputDir): # suppose LSD was run on the "mytree.newick" and all the outputs are placed inside inputDir log_file = normpath(join(inputDir, "mytree.tre.result")) input_tree_file = normpath(join(inputDir, "mytree.tre")) result_tree_file = normpath(join(inputDir, "mytree.tre.result.newick")) s = open(log_file, 'r').read() i = s.find("Tree 1 rate ") + 12 mu = "" found_dot = False while (s[i] == '.' and not found_dot) or (s[i] in [str(x) for x in range(10)]): mu += s[i] if s[i] == '.': found_dot = True i += 1 mu = float(mu) taxa = TaxonNamespace() tree = Tree.get_from_path(input_tree_file, schema="newick", taxon_namespace=taxa, rooting="force-rooted") tree.encode_bipartitions() n = len(list(tree.leaf_node_iter())) N = 2 * n - 2 x0 = [10**-10] * N + [mu] idx = 0 brlen_map = {} for node in tree.postorder_node_iter(): if not node is tree.seed_node: key = node.bipartition brlen_map[key] = (idx, node.edge_length) idx += 1 tree2 = Tree.get_from_path(result_tree_file, schema="newick", taxon_namespace=taxa, rooting="force-rooted") tree2.encode_bipartitions() for node in tree2.postorder_node_iter(): if not node is tree2.seed_node: key = node.bipartition idx, el = brlen_map[key] if el > 0 and node.edge_length > 0: x0[idx] = node.edge_length / float(el) return x0
def main(): from sys import argv tree = Tree.get_from_path(argv[1], "newick") ''' smpl_times = {} with open(argv[2],"r") as fin: fin.readline() for line in fin: name,time = line.split() smpl_times[name] = float(time) ''' f = deviation_from_clock(tree) tree.write_to_path(argv[2], "newick") ''' m=sum([1/x for x in f])/len(f) with open('f.txt','w') as fout: for x in f: fout.write(str(1/x/m) + "\n") ''' #f = calibrate_with_sampling_time(tree,smpl_times) #f = calibrate_tree(tree) #print(f) '''
def main(): cpu = sys.argv[1] job_name = sys.argv[2] try: alnfile = sys.argv[3] except: assert(restart is True), "Specified alignment file does not exist. Path?" try: treefile = sys.argv[4] except: assert(restart is True), "Specified tree file does not exist. Path?" # Rewrite tree to create trifurcating root, as needed by phylobayes mpi tree = Tree.get_from_path(treefile, "newick", rooting = "force-unrooted") tree.resolve_polytomies() # in case of polytomies. tree.update_bipartitions() # this will create a trifurcating root on an unrooted tree tstring = str(tree).replace('[&U] ', '') with open('temp.tre', 'w') as tf: tf.write(tstring + ';\n') # Phylobayes is run to chain length 5500, sampling every 5 to yield 1100. Later, burnin of 100 is removed to get a final posterior n=1000 (same procedure as Rodrigue 2013 Genetics) pb_call = "mpirun -np " + str(cpu) + " ./pb_mpi -mutsel -cat -d " + alnfile + " -T temp.tre -x 5 1100 " + job_name run_pb_call = subprocess.call(pb_call, shell = True) assert( run_pb_call == 0 ), "pb_mpi didn't run!" # Parse output with readpb_mpi, using a burnin of 100 and saving everything else (posterior size = 1000) readpb_call = "mpirun -np " + str(cpu) + " ./readpb_mpi -x 100 1 -1 " + job_name + "\n" run_readpb_call = subprocess.call(readpb_call, shell = True) assert( run_readpb_call == 0 ), "readpb_mpi didn't run!"
def readTreeFromFile( treePath): ''' input: path to the file containing newick tree return Tree object ''' myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False) return myTree
def __init__(self,ddpTree=None,tree_file=None,schema="newick",Tree_records=[]): if tree_file: self.ddpTree = Tree.get_from_path(tree_file,schema) else: #self.ddpTree = copy.deepcopy(ddpTree) self.ddpTree = ddpTree self.Tree_records = Tree_records
def __init__(self, ddpTree=None, tree_file=None, schema="newick"): if ddpTree: self.ddpTree = ddpTree else: self.ddpTree = Tree.get_from_path(tree_file, schema, preserve_underscores=True)
def get_tree_lines(Tname): stringlist =[] from dendropy import Tree tree = Tree.get_from_path(Tname,"newick") for nd in tree.postorder_internal_node_iter(): for child in nd.child_nodes(): stringlist.append(child.as_newick_string()) return (stringlist)
def tree_compare(tempdir): # CHANGE to tempdir tns = dendropy.TaxonNamespace() tree1 = Tree.get_from_path(tempdir + "/ref.tree", "newick", taxon_namespace=tns) tree2 = Tree.get_from_path(tempdir + "/normal_tree", "newick", taxon_namespace=tns) tree3 = Tree.get_from_path(tempdir + "/red_tree", "newick", taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() tree3.encode_bipartitions() distance_normal = treecompare.symmetric_difference(tree1, tree2) distance_reduced = treecompare.symmetric_difference(tree1, tree3) return distance_normal, distance_reduced
def _read_tree_from_path(path, taxon_namespace): """ Wrapper for netwick-file to dendropy tree """ tree = Tree() my_tree = tree.get_from_path(path, "newick", taxon_namespace=taxon_namespace) return my_tree
def __init__(self,ddpTree=None,tree_file=None,schema="newick",Tree_records=[]): if tree_file: self.ddpTree = Tree.get_from_path(tree_file,schema) else: #self.ddpTree = copy.deepcopy(ddpTree) self.ddpTree = ddpTree self.Tree_records = Tree_records self.min_MD = None self.opt_root = self.ddpTree.seed_node self.opt_x = 0
def main(): from sys import argv treefile = argv[1] t = Tree.get_from_path(treefile, "newick") R = resolve_tree(t) for s in R: print(s)
def recom_resultFig_dm(recom_prob, mixtureProb): output = np.zeros((alignment_len, nodes_number)) for i in range(len(recom_prob)): if (recom_prob['recom_nodes'][i] < tips_num): for j in range(alignment_len): if (recom_prob['posterior'][i][j][1] >= mixtureProb): output[j, recom_prob['recom_nodes'][i]] = 1 else: # for j in range(alignment_len): # if (recom_prob['posterior'][i][j][1] >= mixtureProb): # output[j, recom_prob['target_node'][i]] = 1 for j in range(i + 1, len(recom_prob)): if (recom_prob['recom_nodes'][i] == recom_prob['target_node'][j]) and ( recom_prob['recom_nodes'][j] == recom_prob['target_node'][i]): for k in range(alignment_len): if ((recom_prob['posterior'][i][k][1] >= mixtureProb) and (recom_prob['posterior'][j][k][1] >= mixtureProb)): output[k, recom_prob['target_node'][i]] = 1 # if (recom_prob['posterior'][i][k] < recom_prob['posterior'][j][k]): # recom_prob['posterior'][i][k] = recom_prob['posterior'][j][k] # if (recom_prob['posterior'][i][k] >= mixtureProb): # output[k, recom_prob['target_node'][i]] = 1 fig = plt.figure(figsize=(tips_num + 9, tips_num / 2)) color = ['red', 'green', 'purple', 'blue', 'black'] clonaltree = Tree.get_from_path(tree_path, 'newick') set_index(clonaltree, alignment) for i in range(nodes_number): ax = fig.add_subplot(nodes_number, 1, i + 1) if i >= tips_num: desc = set() d = give_descendents(clonaltree, i, desc) ax.plot(output[:, i], label=str(i) + ' is mrca:' + str(d), color=color[i % 5]) else: ax.plot(output[:, i], label=give_taxon(clonaltree, i), color=color[i % 5]) ax.legend(bbox_to_anchor=(0.045, 1.5), prop={'size': 10}) ax.set_frame_on(False) ax.axis('off') ax.axis('on') ax.set_yticklabels([]) plt.savefig("PhyloHMM_Recombination_two.jpeg") # plt.show() return output
def main(): from sys import argv tree = Tree.get_from_path(argv[1], 'newick') sampling_time = {} with open(argv[2], 'r') as fin: fin.readline() for line in fin: taxon, time = line.split() sampling_time[taxon] = float(time) x_best = log_from_random_init(tree, sampling_time)
def scale_tree_branch(tree, format="newick"): tree_obj = None if os.path.exists(tree): tree_obj = Tree.get_from_path(tree, format) elif isinstance(tree, str): tree_obj = Tree(stream=StringIO(tree), schema=format) elif isinstance(tree, Tree): tree_obj = Tree if sum([ e.length > 1 for e in tree_obj.postorder_edge_iter()]): for e in tree_obj.postorder_edge_iter(): if e.length is not None: e.length = e.length/100 return tree_obj.as_newick_string()
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("-i","--input",required=True,help="Input trees") parser.add_argument("-o","--output",required=True,help="Output trees") parser.add_argument("-r","--ref",required=True,help="Reference tree") args = vars(parser.parse_args()) inputfiles = args["input"].split() outputfiles = args["output"].split() refFile = args["ref"] if args["ref"] else None if not (len(outputfiles) == 1 or len(outputfiles) == len(inputfiles)): print("The number of output files must either be 1 or the same as the number of input files!") else: multi_output = len(outputfiles) > 1 if not multi_output: fout = open(outputfiles[0],'w') taxa = TaxonNamespace() tree = Tree.get_from_path(refFile,"newick",taxon_namespace=taxa,rooting="force-rooted") label_mapping = read_label_from_reference_tree(tree) # Although using TreeList provided in Dendropy can be a more convenient solution, # I opted out for that because it requires storing a large number of trees in the memory at the same time # If the input trees are big then we will run out of memory # Had problem with a set of 7k trees of 10k leaves which required >60G of memory just to store the trees # Here I read each tree and label it one-by-one. #Just have to be thoughtful about making the taxon_namespace shared among all the trees for i,filein in enumerate(inputfiles): if multi_output: fout = open(outputfiles[i],'w') with open(filein,'r') as fin: strings = fin.readlines() for s in strings: tree = Tree.get(data=s,schema="newick",taxon_namespace=taxa,rooting="force-rooted") label_tree(tree,label_mapping) fout.write(tree.as_string("newick")) if multi_output: fout.close() if not multi_output: fout.close()
def main(): tree_file = argv[1] sampling_time_file = argv[2] tree = Tree.get_from_path(tree_file, "newick") sampling_time = {} with open(sampling_time_file, 'r') as fin: fin.readline() for line in fin: taxon, time = line.split() sampling_time[taxon] = float(time) random_date_init(tree, sampling_time, 10, min_nleaf=8) '''
def returnRootOfTree( infile, filePrefix, ext): ''' input: path to the file containing newick tree return root of the Tree ''' directory=os.path.dirname(os.path.realpath(infile)) treePath= directory+'/'+filePrefix+'.'+ ext rootNode='' myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False) for i in myTree.internal_nodes(): if i.level() == 0: rootNode=i.get_node_str() break return rootNode
def changeSpeciesTreeLabels(stree): ''' this function change the labels of species tree to new names ''' myStree= Tree.get_from_path(stree, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False) myStree.print_plot() k=0 with open(stree+'.labels', 'w') as wf: for n in myStree.leaf_nodes(): #wf.write(n.taxon.label +'\t'+ 'S'+str(k+1) +'\n') wf.write(n.taxon.label +'\t'+ str(k+1) +'\n') #n.taxon.label= 'S'+str(k+1) n.taxon.label= str(k+1) k=k+1 myStree.print_plot() with open(stree+'.newNewick', 'w') as wf: st=myStree.as_string('newick') wf.write(st)
def g(x): from dendropy import Tree t = Tree.get_from_path(x, 'newick') # normalize branch lengths # first make sure the root has an edge length of None num_edges = 0 scale = 0. for n in t.nodes(): if n.parent_node is None: n.edge_length = None else: num_edges += 1 scale += n.edge_length scale /= num_edges for n in t.nodes(): if n.edge_length is not None: n.edge_length /= scale assert (t.length()/num_edges - 1.) < 0.01 return t
def evaluate(ref, file_name): # To store the data during the process, we create two temporary files. tmp1 = tempfile.mkstemp() tmp2 = tempfile.mkstemp() # Use the commands of fastprot and fnj. # The output of the FastPhylo programs is in file 'tmp2'. os.system("fastprot -m -o " + tmp1[1] + " " + file_name) os.system("fnj -O newick -m FNJ -o " + tmp2[1] + " " + tmp1[1]) #Use Dendropy to compare the trees. in_tree = Tree.get_from_stream(os.fdopen(tmp2[0]), schema='newick', taxon_namespace=tns) ref_tree = Tree.get_from_path(ref, schema='newick', taxon_namespace=tns) sym_diff = treecompare.symmetric_difference(ref_tree, in_tree) return sym_diff
def make_recombination_trees(tree_path, tree, dna, target_node, nu): temptree = {} recombination_trees = [] tree.reroot_at_node(target_node, update_bipartitions=False, suppress_unifurcations=True) recombination_trees.append(tree.as_string(schema="newick")) for id, child in enumerate(target_node.child_node_iter()): temptree["tree{}".format(id)] = Tree.get_from_path(tree_path, 'newick') set_index(temptree["tree{}".format(id)], dna) temptree["tree{}".format(id)].reroot_at_node( target_node, update_bipartitions=False, suppress_unifurcations=True) filter_fn = lambda n: hasattr(n, 'index') and n.index == child.index recombined_node = temptree["tree{}".format(id)].find_node( filter_fn=filter_fn) recombination_trees.append( tree_evolver_rerooted(temptree["tree{}".format(id)], recombined_node, nu)) return recombination_trees
def extract_tree_info(file): t = Tree.get_from_path(file, 'newick') # tree length tree_length = str(t.length()) # mean root-to-tip distance treetips = t.leaf_nodes() rtt = [] for tip in treetips: rtt.append( tip.distance_from_root() ) mean_rtt = str(np.mean(rtt)) # mean patristic distance pd = [] dist = treemeasure.PatristicDistanceMatrix(tree=t) for i, t1 in enumerate(t.taxon_namespace): for t2 in t.taxon_namespace[i:]: d = dist(t1,t2) pd.append( float(d) ) mean_pairwise = str(np.mean(pd)) return tree_length, mean_rtt, mean_pairwise
parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', required=True, help="input file") parser.add_argument('-o', '--outfile', required=True, help="output file") parser.add_argument('-u', '--unit', required=False, help="unit-length for unit-based filter") parser.add_argument('-l', '--lowthres', required=False, help="low threshold") parser.add_argument('-g', '--highthres', required=False, help="high threshold") parser.add_argument('-f', '--factor', required=False, help="factor") args = vars(parser.parse_args()) infile = args['input'] outfile = args['outfile'] a_tree = Tree.get_from_path(infile, "newick", preserve_underscores=True) unit = args['unit'] if args['unit'] else None low = float(args['lowthres']) if args['lowthres'] else 0 high = float(args['highthres']) if args['highthres'] else 1 factor = float(args['factor']) if args['factor'] else 1 filter_branch(a_tree, unit_length=args['unit'], low_percentile=low, high_percentile=high, factor=factor) a_tree.write_to_path(outfile, "newick")
#! /usr/bin/env python from dendropy import Tree from decompose_lib import decompose_by_diameter, compute_group_distance_matrix, place_group_onto_tree import sys import os from pasta import get_logger _LOG = get_logger(__name__) intree_file = sys.argv[1] grouping_file = sys.argv[2] nleaf_file = sys.argv[3] distance_file = sys.argv[4] t = Tree.get_from_path(intree_file, 'newick') grouping = {} with open(grouping_file, 'r') as f: for line in f: name, taxon = line.split() grouping[taxon] = name _LOG.info('computing treeMap ... ') treeMap = place_group_onto_tree(t, grouping) D = compute_group_distance_matrix(t, treeMap) with open(distance_file, 'w') as f: for A, B in D: f.write(A + " " + B + " " + str(D[(A, B)]) + "\n")
def open_tree(self, treefile): self.sim_tree = Tree.get_from_path(treefile, schema="newick") # , as_rooted=True) self.sim_tree.reroot_at_midpoint()
def get_taxa(tree_file, scheme='newick'): a_tree = Tree.get_from_path(tree_file, scheme, preserve_underscores=True) return [leaf.taxon.label for leaf in a_tree.leaf_nodes()]
#! /usr/bin/env python import logdate from logdate.logD_lib import random_timetree from dendropy import Tree import dendropy #import treeswift from logdate.tree_lib import tree_as_newick import argparse from sys import argv,stdout parser = argparse.ArgumentParser() parser.add_argument("-i","--input",required=True,help="Input tree") parser.add_argument("-t","--samplingTime",required=False,help="Sampling time at leaf nodes. Default: None") parser.add_argument("-p","--rep",required=False,help="The number of random replicates. Default: 1") parser.add_argument("-s","--rseed",required=False,help="Random seed. Default: randomly chosen and will be reported") parser.add_argument("-o","--output",required=False,help="Output file. Default: None. The trees will be printed to screen") args = vars(parser.parse_args()) tree = Tree.get_from_path(args["input"],'newick',preserve_underscores=True) sampling_time = args["samplingTime"] nrep = int(args["rep"]) if args["rep"] else 1 randseed = int(args["rseed"]) if args["rseed"] else None fout = open(args["output"],'w') if args["output"] is not None else stdout random_timetree(tree,sampling_time,nrep,seed=randseed,fout=fout)
from calibration_lib import calibrate_tree from sys import argv from dendropy import Tree treefile = argv[1] outfile = argv[2] myTree = Tree.get_from_path(treefile, 'newick') print("Read tree successfully") print(calibrate_tree(myTree, verbose=True)) myTree.write_to_path(outfile, "newick")
# print(node.taxon) if not node.is_leaf(): node.index = s node.label = str(node.index) s += 1 else: for idx, name in enumerate(dna): # print(idx , str(name) , str(node.taxon)) if str(name) == str(node.taxon): node.index = idx node.label = str(node.index) break tree_path = '/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/ShortDataset/RAxML_bestTree.tree' tree = Tree.get_from_path(tree_path, 'newick') alignment = dendropy.DnaCharacterMatrix.get(file=open("/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/ShortDataset/wholegenome.fasta"), schema="fasta") print(tree.as_ascii_plot()) pi = [0.2184,0.2606,0.3265,0.1946] rates = [0.975070 ,4.088451 ,0.991465 ,0.640018 ,3.840919 ] GTR_sample = myPhylo.GTR_model(rates,pi) column = myPhylo.get_DNA_fromAlignment(alignment) dna = column[0] setup_indexes(tree,alignment) tips = len(dna)
#! /usr/bin/env python from sys import argv from dendropy import Tree annoFile = argv[1] # ~/10kBacGenome/repophlan_microbes_ranks.txt treefile = argv[2] myTree = Tree.get_from_path(treefile, "newick") nameHash = {} global_phylCount = {} with open(annoFile, 'r') as f: for line in f: fields = line.split() name = fields[0] phylum = fields[2] nameHash[name] = phylum #global_phylCount[phylum] = 1 + (global_phylCount[phylum] if phylum in global_phylCount else 0) # count the number of species in each phylum for node in myTree.leaf_node_iter(): phylum = nameHash[node.taxon.label] global_phylCount[phylum] = 1 + (global_phylCount[phylum] if phylum in global_phylCount else 0) # label internal nodes ID = 0 for node in myTree.preorder_node_iter(): if not node.is_leaf(): node.label = "I_" + str(ID)
#! /usr/bin/env python import re from dendropy import Tree from sys import argv filename = argv[1] a_tree = Tree.get_from_path(filename, 'newick') br_sum = 0 br_count = 0 br_max = -1.0 for edge in a_tree.preorder_edge_iter(): if edge.length is not None: br_count += 1 br_sum += edge.length if edge.length > br_max: br_max = edge.length br_avg = br_sum / br_count print("branch #: " + str(br_count)) print("branch max: " + str(br_max)) print("branch sum: " + str(br_sum)) print("branch avg: " + str(br_avg))
def compare_trees(tree_filename1, tree_filename2): from dendropy import Tree, TreeList from dendropy.treecalc import symmetric_difference g = lambda x: Tree.get_from_path(x, 'newick') c = TreeList([g(tree_filename1), g(tree_filename2)]) return symmetric_difference(c[0], c[1])
argparser = argparse.ArgumentParser() argparser.add_argument('--preproot', metavar='tree_root', type=str, required=True) args = argparser.parse_args() for infile in glob(path.join(args.preproot, "*", "*.nwk")): print infile basename = path.basename(infile).partition('.')[0] prefix = basename.partition('_')[0][:2] tree = Tree.get_from_path(infile, 'newick', preserve_underscores=True) for node in tree: if node.is_leaf(): if "." in node.taxon.label: node.taxon.label = node.taxon.label.replace(".", "") tree_file = open(infile, "r+") tree_file.seek(0) tree_file.write(tree.as_string('newick')) tree_file.truncate() tree_file.close # remove quotes tree_file = open(infile, "r+")
def __get_tree(self,taxon_set): tree_file_name=self.path+"/RAxML_result."+self.param_names tree=Tree.get_from_path(tree_file_name,'newick',encode_splits=True,taxon_set=taxon_set) return tree
except (AttributeError, KeyError): out.write(label) if sel is not None: s = "" try: s = float(sel) s = str(s) except ValueError: s = str(sel) if s: out.write(":%s" % s) if __name__ == "__main__": #test import sys from dendropy import Tree from collections import OrderedDict Tree.write_preorder_to_csv = write_preorder_to_csv Tree.set_node_ages = set_node_ages t = Tree.get_from_path(sys.argv[1], schema="newick", suppress_internal_node_taxa=True, suppress_leaf_node_taxa=True) for i, nd in enumerate(t.preorder_node_iter()): nd.data={'preorder_index':i} t.set_node_ages() #print(t.find_node_with_label("Primates").data) t.ladderize(ascending=True) with open('test_leaves.csv', 'w+') as l, open('test_nodes.csv', 'w+') as n: node_extras=OrderedDict() node_extras['preorder index']=['preorder_index'] t.write_preorder_to_csv(l,{},n,node_extras,-1)
def topology_counter(self, rooted=False, outgroup=None): """ Counts the number of times that each topology appears as outputted by running RAxML. Output: topologies_to_counts --- a dictionary mapping topologies to the number of times they appear """ # Initialize a dictionary mapping newick strings to unique topologies unique_topologies_to_newicks = {} # taxon names tns = dendropy.TaxonNamespace() # Create a set of unique topologies unique_topologies = set([]) # Get the topology files from the "Topologies" folder input_directory = "Topologies" # Initialize topology_count to a defaultdict topologies_to_counts = defaultdict(int) # Iterate over each file in the given directory for filename in os.listdir(input_directory): # Create a boolean flag for determining the uniqueness of tree new_tree_is_unique = True # If file is the file with the best tree newick string if os.path.splitext(filename)[0] == "Topology_bestTree": input_file = os.path.join(input_directory, filename) new_tree = Tree.get_from_path(input_file, 'newick', taxon_namespace=tns) if rooted: outgroup_node = new_tree.find_node_with_taxon_label( outgroup) new_tree.to_outgroup_position(outgroup_node, update_bipartitions=False) # Iterate over each topology in unique_topologies for unique_topology in unique_topologies: # Create a tree for each of the unique topologies calculate RF distance compared to new_tree unique_tree = Tree.get_from_string(unique_topology, 'newick', taxon_namespace=tns) rf_distance = treecompare.unweighted_robinson_foulds_distance( unique_tree, new_tree) # If the RF distance is 0 then the new tree is the same as one of the unique topologies if rf_distance == 0: topologies_to_counts[unique_topology] += 1 new_tree_is_unique = False new_tree = new_tree.as_string("newick").replace( "\n", "") unique_topologies_to_newicks[unique_topology].add( new_tree) break # If the new tree is a unique tree add it to the set of unique topologies if new_tree_is_unique: new_tree = new_tree.as_string("newick").replace("\n", "") unique_topologies.add(new_tree) topologies_to_counts[new_tree] += 1 unique_topologies_to_newicks[new_tree] = set([new_tree]) return topologies_to_counts, unique_topologies_to_newicks
import numpy as np import numpy.linalg as la from dendropy import Tree, DnaCharacterMatrix import myPhylo tree_path = '/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/exampledataset/exampledataset_RAxML_bestTree' tree = Tree.get_from_path(tree_path, 'newick') alignment = DnaCharacterMatrix.get(file=open( "/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/exampledataset/wholegenome.fasta" ), schema="fasta") tree2 = Tree.get_from_path( '/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/exampledataset/RerootTree_node12', 'newick') pi = [0.317, 0.183, 0.367, 0.133] rates = [0.000100, 0.636612, 2.547706, 0.000100, 2.151395] GTR_sample = myPhylo.GTR_model(rates, pi) column = myPhylo.get_DNA_fromAlignment(alignment) dna = column[0] myPhylo.set_index(tree, dna) print("Original tree:::::::::::::::") print(tree.as_string(schema='newick')) print(tree.as_ascii_plot()) LL_normal = myPhylo.computelikelihood(tree, dna, GTR_sample) W_LL_normal = myPhylo.wholeAlignmentLikelihood(tree, alignment, GTR_sample)
col = "" for t in range(tips): col += str(alignment[t][l]) # LL_vector.append(computelikelihood(tree, col, model)) LL_vector[:, l] = computelikelihood(tree, col, model) return LL_vector #======================================================================================================================= pi = [0.2184, 0.2606, 0.3265, 0.1946] rates = [2.0431, 0.0821, 0, 0.067, 0] f = 1 tree = Tree.get_from_path( '/home/nehleh/0_Research/PhD/Data/simulationdata/recombination/500000/RAxML_bestTree.wholegenometree', 'newick') alignment_GTR = dendropy.DnaCharacterMatrix.get(file=open( "/home/nehleh/0_Research/PhD/Data/simulationdata/recombination/500000/wholegenome.fasta" ), schema="fasta") # alignment_JC = dendropy.DnaCharacterMatrix.get(file=open("/home/nehleh/0_Research/PhD/Data/LL_vector/JC69_100.fasta"), schema="fasta") tips = len(alignment_GTR) alignment_len = alignment_GTR.sequence_size # GTRGTRvector = [] # JCJCvector = [] # GTRJCvector = [] # JCGTRvector = []
parser.add_argument("-N", "--population", required=True, help="Population size") parser.add_argument( "-g", "--growth", required=False, help="Growing rate (exponential) of the population. Default: 0") parser.add_argument("-o", "--outputFile", required=False, help="The name of the output tree. Default: stdout") args = vars(parser.parse_args()) infile = args["inputFile"] outfile = args["outputFile"] if args["outputFile"] else None N = float(args["population"]) alpha = int(args["growth"]) if args["growth"] else 0 myTree = Tree.get_from_path(infile, 'newick') simulateTreeFromTopology(myTree, N, alpha) if outfile is not None: myTree.write_to_path(outfile, 'newick') else: stdout.write(myTree.as_string('newick'))
import dendropy from dendropy import TreeList,Tree,Taxon,Node import sys import argparse import re parser = argparse.ArgumentParser(description="Parses a Newick tree file and writes another with subtrees formed by the same species collapsed. It assumes that all samples for each species form a monophyletic group. Leave names are expected to follow the scheme species_\d+_\d+") parser.add_argument("-i",type=str,default="infile.tree",required=True,help="Input Newick tree file") parser.add_argument("-o",type=str,default="outtree.tree",required=False,help="Output Newick tree file") args = parser.parse_args() tree=Tree.get_from_path(args.i,schema="newick",rooting="force-unrooted") namespace=tree.taxon_namespace labels=namespace.labels() regex=re.compile("(.+) .+ .+") species=[match.group(1) for label in labels for match in [regex.match(label)] if match] species_set=set(species) species=list(species_set) newNamespace=dendropy.datamodel.taxonmodel.TaxonNamespace() for specie in species: regex=re.compile(specie + " .+ .+") leaves=[match.group(0) for label in labels for match in [regex.match(label)] if match] mrca_node=tree.mrca(taxon_labels=leaves) del mrca_node._child_nodes[:] taxon=Taxon(specie) mrca_node.taxon=taxon newNamespace.add_taxon(taxon) tree.taxon_namespace=newNamespace tree.write(path=args.o,schema="newick",suppress_rooting=True)
def windows_to_newick(self, top_topologies_to_counts, unique_topologies_to_newicks, rooted=False, outgroup=None): """ Creates a dictionary of window numbers to the topology of that window if the newick string contained in the window is a top topology; otherwise the window number is mapped to "Other". Input: unique_topologies_to_newicks -- a mapping outputted by topology_counter() Returns: wins_to_tops --- a dictionary as described above tops_list --- a list of the top topologies """ # Initialize dictionary tops_list = top_topologies_to_counts.keys() wins_to_tops = {} # Iterate over each folder in the given directory for filename in natsorted(os.listdir("Topologies")): # If file is the file with the topology of the best tree newick string if os.path.splitext(filename)[0] == "Topology_bestTree": filename = os.path.join("Topologies", filename) # Open file and read newick string with open(filename) as f: # Read newick string from file newick = f.readline() if rooted: # taxon names tns = dendropy.TaxonNamespace() # Create tree root it and return newick string new_tree = Tree.get_from_path(filename, 'newick', taxon_namespace=tns) outgroup_node = new_tree.find_node_with_taxon_label( outgroup) new_tree.to_outgroup_position(outgroup_node, update_bipartitions=False) newick = new_tree.as_string("newick").replace("\n", "") window_number = int( (os.path.splitext(filename)[1]).replace(".", "")) for unique_topology in unique_topologies_to_newicks: # If the newick string is in the set of newick strings corresponding to the unique topology if newick in unique_topologies_to_newicks[unique_topology]: # If the unique topology is a top topology map to it if unique_topology in tops_list: wins_to_tops[window_number] = unique_topology # Otherwise map to "Other" else: wins_to_tops[window_number] = "Other" if "Other" not in tops_list: # Adds "Other" so all topologies are included with top ones tops_list.append("Other") return wins_to_tops, tops_list
#! /usr/bin/env python from dendropy import Tree from decompose_lib import decompose_by_diameter, compute_group_distance_matrix,place_group_onto_tree import sys import os from pasta import get_logger _LOG = get_logger(__name__) intree_file = sys.argv[1] grouping_file = sys.argv[2] nleaf_file = sys.argv[3] distance_file = sys.argv[4] t = Tree.get_from_path(intree_file,'newick') grouping = {} with open(grouping_file,'r') as f: for line in f: name, taxon = line.split() grouping[taxon] = name _LOG.info('computing treeMap ... ') treeMap = place_group_onto_tree(t,grouping) D = compute_group_distance_matrix(t,treeMap) with open(distance_file,'w') as f: for A,B in D: f.write(A + " " + B + " " + str(D[(A,B)]) + "\n")
import dendropy from dendropy.calculate import treecompare from dendropy import Tree import os protein_dir_set = [] for i in os.listdir('output'): if "nex" in i.split("."): protein_dir_set.append(str(i)) tns = dendropy.TaxonNamespace() for i in range(0, len(protein_dir_set)): for j in range(i + 1, len(protein_dir_set)): tree1 = Tree.get_from_path("output/" + protein_dir_set[i], "nexus", taxon_namespace=tns) tree2 = Tree.get_from_path("output/" + protein_dir_set[j], "nexus", taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() print(protein_dir_set[i], protein_dir_set[j], treecompare.unweighted_robinson_foulds_distance(tree1, tree2))
def reroot(treename): tree = Tree.get_from_path("work/" + treename + ".nex", "nexus") tree.reroot_at_node(tree.find_node_with_taxon_label("Vampyroteuthis infernalis").parent_node) tree.ladderize() tree.write_to_path("work/" + treename + ".rooted.nex", "nexus")