def fromSimulator(self, filename, N=10000, ambienti=2, replicates=2, bonus=0.1, Xs=[], prob=[]): t = Tree(filename) if not Xs: for a in range((ambienti - 1)): X = SelectNode(list(t.traverse())) Xs.append(X) names, DB, Ns, table, t = FakeCommunity(t, 10000, ambienti=ambienti, replicates=replicates, bonus=bonus, Xs=Xs, prob=prob) self.countTable = DB self.SeqName = names self.samplesNames = range(DB.shape[1]) self.nogroup = {0: self.samplesNames} counter = 0 for g in range(ambienti): self.groups[g] = range(counter, counter + replicates) counter += replicates self.expandTable() self.readTree(filename) return names, DB, Ns, table, t
def parseTreeWithDB(t, Ordername, method='PitTrap'): A = Tree(t) A.get_leaf_names() con = MySQLdb.connect(host='cerbero.ba.itb.cnr.it', user='******', passwd='PyroNoise', db='TAXONOMYdb') cur = con.cursor() sql = """SELECT 454Reads.ReadAccno, 454Reads.Region, 454Reads.Rich FROM 454Reads INNER JOIN BestHit_Order ON BestHit_Order.QueryName = 454Reads.ReadAccno AND 454Reads.Run = '""" + method + """' AND BestHit_Order.order_name = '""" + Ordername + """'""" cur.execute(sql) results = cur.fetchall() DB = {} DB.update([[x, [y, z]] for x in results]) res = [] names = [] for n in A.get_leaf_names(): temp = [] names.append(n) try: region, abb = DB[n] if region == regions[0]: temp = [abb, 0] else: temp = [0, abb] except KeyError: temp = [0, 0] res.append(temp) return names, numpy.array(res).T
def __init__(self, filename, ftype="nexus", reroot=False, method="H1", seed=1234, thinning=100, sampling=10000, burnin=0.1, firstktrees=0, taxa_order=[]): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot
def smart_reroot(treefile, outgroupfile, outfile, format=0): """ simple function to reroot Newick format tree using ete2 Tree reading format options see here: http://packages.python.org/ete2/tutorial/tutorial_trees.html#reading-newick-trees """ tree = Tree(treefile, format=format) leaves = [t.name for t in tree.get_leaves()][::-1] outgroup = [] for o in must_open(outgroupfile): o = o.strip() for leaf in leaves: if leaf[:len(o)] == o: outgroup.append(leaf) if outgroup: break if not outgroup: print >>sys.stderr, \ "Outgroup not found. Tree {0} cannot be rerooted.".format(treefile) return treefile try: tree.set_outgroup(tree.get_common_ancestor(*outgroup)) except ValueError: assert type(outgroup) == list outgroup = outgroup[0] tree.set_outgroup(outgroup) tree.write(outfile=outfile, format=format) logging.debug("Rerooted tree printed to {0}".format(outfile)) return outfile
def timing(tree_size, num_trees, num_samples): FastUnifrac_times = list() EMDUnifrac_times = list() EMDUnifrac_flow_times = list() for tree_it in range(num_trees): t = Tree() t.populate(tree_size, random_branches = True) tree_str = t.write(format=1) tr = DndParser(tree_str, UniFracTreeNode) (T,l,nodes_in_order) = EMDU.parse_tree(tree_str) for it in range(num_samples): envs = EMDU.simulate_data(t.get_leaf_names()) # FastUnifrac can only take weight on leaf nodes (envs_prob_dict, samples) = EMDU.parse_envs(envs, nodes_in_order) P = envs_prob_dict[samples[0]] Q = envs_prob_dict[samples[1]] #EMDUnifrac with flow t0 = timeit.default_timer() (Z, Flow, diffab) = EMDU.EMDUnifrac_weighted_flow(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_flow_times.append(t1-t0) #EMDUnifrac no flow t0 = timeit.default_timer() (Z,diffab) = EMDU.EMDUnifrac_weighted(T, l, nodes_in_order, P, Q) t1 = timeit.default_timer() EMDUnifrac_times.append(t1-t0) #FastUnifrac weighted t0 = timeit.default_timer() res = fast_unifrac(tr, envs, weighted=True, modes=set(['distance_matrix'])) t1 = timeit.default_timer() FastUnifrac_times.append(t1-t0) return (np.array(EMDUnifrac_times).mean(), np.array(EMDUnifrac_flow_times).mean(), np.array(FastUnifrac_times).mean())
def sanitizeByType(container, sanitizeby='tsv', onlycolumns=False): '''for a iterable of strings, carry out sanitizeString by: line, tsv (all or onlycolumns), fasta headers, or leaf in nwk''' assert sanitizeby in set(['line', 'tsv', 'newick', 'fasta']) if sanitizeby=='line': for line in container: print sanitizeString(line.strip("\r\n"), False) if sanitizeby=='tsv': for line in container: if onlycolumns: newline = line.strip("\r\n").split("\t") for i in onlycolumns: newline[i-1]=sanitizeString(newline[i-1], False) else: newline=[sanitizeString(item.strip("\r\n"), False) for item in line.split("\t")] print "\t".join(newline) if sanitizeby=='newick': from ete2 import Tree t=Tree("".join(container)) for l in t: l.name=sanitizeString(l.name, False) print t.write() if sanitizeby=='fasta': from Bio import SeqIO from StringIO import StringIO from sys import stdout fasta = StringIO("".join(container)) for seq_record in SeqIO.parse(fasta, "fasta"): seq_record.id=sanitizeString(seq_record.description, False) seq_record.description='' SeqIO.write(seq_record, stdout, "fasta")
def partition_main(args): print(args, file=sys.stderr) base_prior = make_base_prior(args.het, GTYPE3) # base genotype prior mm,mm0,mm1 = make_mut_matrix(args.mu, GTYPE3) # substitution rate matrix, with non-diagonal set to 0, with diagonal set to 0 vcffile, variants, DPRs, PLs = read_vcf(args.vcf, args.min_ev) n_site,n_smpl = PLs.shape[0:2] tree = Tree() if sem(PLs[...,1],axis=1).mean() > sem(PLs[...,2],axis=1).mean(): partition(PLs[...,0:2], tree, np.arange(n_smpl), args.min_ev) else: partition(PLs, tree, np.arange(n_smpl), args.min_ev) init_tree(tree) PLs = PLs.astype(np.longdouble) populate_tree_PL(tree, PLs, mm, 'PL') populate_tree_PL(tree, PLs, mm0, 'PL0') calc_mut_likelihoods(tree, mm0, mm1) print(tree) tree.write(outfile=args.output+'.pt0.nwk', format=5) best_tree,best_PL = recursive_NNI(tree, mm0, mm1, base_prior) best_tree,best_PL = recursive_reroot(best_tree, mm0, mm1, base_prior) print(best_tree) print('PL_per_site = %.4f' % (best_PL/n_site)) best_tree.write(outfile=args.output+'.pt.nwk', format=5)
def resolve_polytomies(infileName, outfileName): newickString = open(infileName, 'rb').readline().rstrip().replace('[&R] ', '') tree = Tree(newickString) tree.resolve_polytomy(recursive=True) with open(outfileName, 'wb') as outfile: outfile.write(tree.write(format=1))
def buildFreqTree(data_seq, depth): t = Tree() # Creates an empty tree for start_i in range(0, len(data_seq)): end_i = start_i + depth - 1 if end_i >= len(data_seq): end_i = len(data_seq) - 1 sub_seq = data_seq[start_i:(end_i+1)] if len(sub_seq) <= 1: break cur_node = t.get_tree_root() for item in sub_seq: children_nodes = cur_node.get_children() children_names = [] for children_node in children_nodes: children_names.append(children_node.name) #print children_names if item not in children_names: cur_node = cur_node.add_child(name=item, dist=1) else: child_i = children_names.index(item) cur_node = children_nodes[child_i] cur_node.dist = cur_node.dist + 1 return t
def date_tree(tree): '''Dates each internal node of a provided newick tree in format 1. The tree is traversed using "postorder". Three internal node cases are beeing distinguished by the inner_type() function. For type 0, both children are leafes, thus the age of the node is the divergence time of the two leafes. For type 1, only child A is a leaf the other child B is an internal node. The age of the node is the divergence time of child A and the first leaf that descents from child B. For type 2 both children are internal nodes, the age of the node is the divergence time of the first leaf found that descents of child A and child B respectivly.''' tree = Tree(tree, format=1) print "Tree loaded!" for node in tree.traverse("postorder"): print "Dating %s" % node.name if not node.is_root() and not node.is_leaf(): left, right = node.get_children()[0], node.get_children()[1] if inner_type(node) == 0: node.dist = date_node(left.name, right.name) elif inner_type(node) == 1: if left.is_leaf(): right = right.get_leaf_names()[0] node.dist = date_node(left.name, right) elif right.is_leaf(): left = left.get_leaf_names()[0] node.dist = date_node(left, right.name) elif inner_type(node) == 2: left = left.get_leaf_names()[0] right = right.get_leaf_names()[1] node.dist = date_node(left, right) return tree
def compare_trees(tree_1,tree_2): #Compare the trees pairwise at each nucleotide using the Robinson-Foulds, #or symmetric, metric t1 = Tree(tree_1) t2 = Tree(tree_2) rf = t1.robinson_foulds(t2)[0] return rf
def small_parsimony(tree, strings): tree = Tree(tree, format = 1) length = len(strings.values()[0]) S = defaultdict(dict) L = defaultdict(str) Z = 0 for i in xrange(length): for node in tree.traverse('postorder'): if node.is_leaf(): S[node.name][i] = {strings[node.name][i]} else: children = node.get_children() s1 = S[children[0].name][i] & S[children[1].name][i] if s1: S[node.name][i] = s1 else: S[node.name][i] = S[children[0].name][i] | S[children[1].name][i] Z += 1 for i in xrange(length): for node in tree.traverse('preorder'): if not node.up: L[node.name] += S[node.name][i].pop() else: if L[node.up.name][i] in S[node.name][i]: L[node.name] += L[node.up.name][i] else: L[node.name] += S[node.name][i].pop() return Z, {key: value for key, value in L.iteritems() if key not in strings}
def visualizeTree(sTreePath, pathToSfamilies, bootValue, width, height): # Random tree stree = Tree() stree = readTreeFromFile(sTreePath) snodesStatDic={} snodesStatDic= getFamiliesStatisticsForEachNode(pathToSfamilies, bootValue) #print snodesStatDic # Some random features in all nodes for n in stree.traverse(): if n.name in snodesStatDic.keys(): total= reduce(lambda x,y: x+y, snodesStatDic[n.name]) #norm= [(x*100)/total for x in snodesStatDic[n.name]] norm= [x for x in snodesStatDic[n.name]] n.add_features(pie_data=norm) # Create an empty TreeStyle ts = TreeStyle() # Set our custom layout function ts.layout_fn=layout # Draw a tree ts.mode = "r" #ts.force_topology= False ts.complete_branch_lines_when_necessary= True # We will add node names manually ts.show_leaf_name = False # Show branch data #ts.show_branch_length = True #ts.show_branch_support = True return stree, ts
def findCombination(word, lstFunc, alphabet, offset, reprs): debug("findCombination(%s,%s,%s)" % (word, lstFunc, alphabet)) found = False tmpAlph = [] mutation = 1 his = dict() spaces = dict() spaceTree = Tree() spaceTree.add_features(space=offset) if contains(word, alphabet): info("Alphabet contains Word") info("PUSH %s" % word) exit() while not found: info("Mutation: %d !" % mutation) #debug #debug("> Tree:") #print spaceTree #print spaceTree.get_ascii(attributes=['space',]) for n in spaceTree.get_leaves(): #debug(">> Node:") #print spaceTree.get_ascii(attributes=['space',]) for f in lstFunc: tmpAlph = n.space #generate space from the new alphabet space = generateSpaceEx(f, tmpAlph, alphabet) tmpSpace = list(set([c[0] for c in space])) debugListHex(tmpSpace, "SPACE") #check to see any the word representation exists in the space for r in reprs: #debugListHex(r,"Checking Representation") if contains(r, tmpSpace): found = True info("FOUND : %s" % r) lstAncestors = [ n, ] lstAncestors.extend(n.get_ancestors()) nodeF = n.add_child(name=f) nodeF.add_features(space=tmpSpace, history=space) lstAncestors = [ nodeF, ] lstAncestors.extend(nodeF.get_ancestors()) getSolution(r, offset, lstAncestors) exit() nodeF = n.add_child(name=f) nodeF.add_features(space=tmpSpace, history=space) mutation = mutation + 1
def WriteDotFile(newick): """ Write newick string to a DOT file :param newick: a string with newick tree structure :return: DOT file name """ tree = Tree(newick) dot_file_name = datetime.datetime.now().strftime(FILE_FORMAT) + ".gv" fileobj = open(dot_file_name, "w") # rename internal tree name i = 0 for n in tree.traverse(): if not n.name: n.name = "F" + str(i) i = i + 1 else: n.name = n.name.replace("\'", "") aline = "graph G{\nnode [shape=circle, style=filled];" fileobj.write(aline + "\n") filecontent = [] for n in tree.traverse(): if n.up: filecontent.append(n.name + "--" + n.up.name + "[len=" + "{:f}".format(n.dist).rstrip("0") + "]") else: filecontent.append(n.name) fileobj.write("\n".join(filecontent) + "}") return dot_file_name
def delete(file, target_file, taxa): f = open(file) t_file = open(target_file, "w") count = 0 for line in f: # print "looking at tree", count line = line.strip().split("=") t = Tree(line[1]) for taxon in taxa: leaves = t.get_leaves_by_name(name=taxon) for leaf in leaves: leaf.delete() if len(t) < 3: pass else: # prevent falsy trees for RAxML while len(t.children) == 1: t = t.children[0] # write it into the file t_file.write(line[0] + "=" + t.write() + "\n") count += 1 print(count) f.close() t_file.close()
def ETETree(seqs, ref, metric): """Tree showing bola alleles covered by tepitope""" from ete2 import Tree,PhyloTree,TreeStyle,NodeStyle aln = Genome.clustalAlignment(seqs=seqs) t = Tree('temp.dnd') #t.set_outgroup(t&ref) ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 180 cutoff=0.25 def func(node): if node.name=='NoName' or not node.name in metric: return False if metric[node.name]<=cutoff: return True matches = filter(func, t.traverse()) print len(matches), "nodes have distance <=%s" %cutoff nst1 = NodeStyle() nst1["bgcolor"] = "Yellow" for n in matches: n.set_style(nst1) nst2 = NodeStyle() nst2["bgcolor"] = "LightGreen" hlanodes = [t.get_leaves_by_name(name=r)[0] for r in refalleles] for n in hlanodes: n.set_style(nst2) t.show(tree_style=ts) return
def parse_weird_tree(tree_string): s = tree_string.split("]") normTree = "" doubleTaxa = {} for elem in s: if "[" in elem: taxa = elem.split("{")[1].split(",") if len(taxa) > 1: doubleTaxa[taxa[0]] = taxa[1:] x = elem.split("[") normTree += x[0] else: normTree += elem tree = Tree(normTree, format=1) #tree.unroot() for node in tree.traverse(): if node.name in doubleTaxa: for elem in doubleTaxa[node.name]: n = elem.rstrip("}") node.add_child(name=n) #strategy: remove [] first, remember all nodes that represent multiple taxa #build ete2 tree #add additional taxa: if leaf, add sister leaf #if internal, add sister node as leaf (should be fine for def of splits) a = tree.write(format=1, format_root_node=True) return a
def compute_GUniFrac(abundance,treefile, alpha=0.5, unweighted=False): n_samples = len(abundance.columns) n_distance = n_samples * (n_samples - 1) / 2 d_array = np.zeros((n_distance)) t = Tree(treefile,format=1) if set(t.get_leaf_names()) != set(abundance.index): print 'Error: OTU table contains unknown OTUs. All of OTU names in OTU table should be contained in tree file.' quit() for i,(sample1, sample2) in enumerate(itertools.combinations(abundance.columns, 2)): print 'calculating ',sample1,' vs. ',sample2,'...' denom = 0.0 numer = 0.0 for node in t.traverse(): if node.is_root(): continue else: p_a = 0.0 p_b = 0.0 for leaf in node.get_leaf_names(): if leaf in abundance.index: p_a += abundance.loc[leaf,sample1] p_b += abundance.loc[leaf,sample2] if p_a == 0.0 and p_b == 0.0: continue if unweighted: if p_a == 0.0 or p_b == 0.0: numer += node.dist denom += node.dist else: denom += node.dist * (p_a + p_b) ** alpha numer += node.dist * (p_a + p_b) ** alpha * abs(p_a - p_b) / (p_a + p_b) d_array[i] = numer / denom return squareform(d_array)
def main(): args = parser.parse_args() beta_metrics = args.beta_metrics.split(',') otu_widths = args.otu_widths.split(',') input_dir = args.input_dir output_fp = args.output_fp tree_fp = args.tree_fp nrows = len(beta_metrics) ncols = len(otu_widths) results_dict, labels_list = load_rug_dict(input_dir, beta_metrics, otu_widths) try: tree = Tree(tree_fp, format=3) except: tree = add_tip_branches(tree_fp) annotate_tree_with_rugs(tree, results_dict, labels_list) ts = TreeStyle() for row in range(len(labels_list)): for col in range(len(labels_list[row])): ts.legend.add_face(TextFace(labels_list[row][col], fsize=20), column=col) tree.render(output_fp, tree_style = ts) tree.show(tree_style = ts)
def write_xml(fname, E, C, l): n, _ = E.shape root = Tree() root.name = str(n - 1) stack = [root] while stack: cur = stack.pop() i = int(cur.name) child_idxs = np.where(E[i, :] == 1)[0] for ci in child_idxs: child = cur.add_child(name=str(ci)) child.dist = np.linalg.norm(np.subtract(C[i, l:], C[ci, l:]), ord=1) stack.append(child) newick_str = root.write( features=['name'], format=1, format_root_node=True ) # format_root_node=True puts root node name in str newick_tree = Phylo.read( StringIO(newick_str), 'newick' ) # format=1 gives branch lengths and names for all nodes (leaves and internal) for clade in newick_tree.find_clades(): if clade.confidence is not None: # Phylo.read() stupidly interprets names of internal nodes as confidences for newick strings clade.name = clade.confidence clade.confidence = None xmltree = newick_tree.as_phyloxml() # convert to PhyloXML.Phylogeny type Phylo.write(xmltree, open(fname, 'w'), 'phyloxml')
def writeSeqsAndTree(): prepareNameDict() tree = Tree(TREE_FILE) terminals = tree.get_leaves() # Change protein names in datas sctrucuters and write protein sequences with changed names to file with open(OUTPUT_ALIGNED_FILENAME, "w") as outputFile: for i in xrange(len(terminals)): proteinName = terminals[i].name.strip("'") processedName = prepareName(proteinName) if processedName in PROCESSED_TO_ALIGNED_NAMES: if ENUMERATE: terminals[i].name = str(i + 1) + "_" + proteinName alnProtName = terminals[i].name alignedName = PROCESSED_TO_ALIGNED_NAMES[processedName] ALIGNED_PROTEIN_NAME_TO_SEQ[ alnProtName] = ALIGNED_PROTEIN_NAME_TO_SEQ[alignedName] del ALIGNED_PROTEIN_NAME_TO_SEQ[alignedName] else: terminals[i].name = proteinName alnProtName = PROCESSED_TO_ALIGNED_NAMES[processedName] outputFile.write(">" + terminals[i].name + "\n") outputFile.write( str(ALIGNED_PROTEIN_NAME_TO_SEQ[alnProtName]) + "\n") tree.write(outfile=OUTPUT_TREE_NEWICK_FILENAME)
def ete_tree(aln): """Tree showing alleles""" from ete2 import Tree, PhyloTree, TreeStyle, NodeStyle t = Tree('temp.dnd') ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 180 cutoff = 0.25 def func(node): if node.name == 'NoName': #or not node.name in metric: return False #if metric[node.name]<=cutoff: # return True matches = filter(func, t.traverse()) print(len(matches), "nodes have distance <=%s" % cutoff) nst1 = NodeStyle() nst1["bgcolor"] = "Yellow" for n in matches: n.set_style(nst1) nst2 = NodeStyle() nst2["bgcolor"] = "LightGreen" #hlanodes = [t.get_leaves_by_name(name=r)[0] for r in refalleles] #for n in hlanodes: # n.set_style(nst2) t.show(tree_style=ts) return
def show(self, i=0): t = Tree(str(self)+";") ts = TreeStyle() ts.show_leaf_name = True ts.rotation = 90 t.render("mytree-{0}.png".format(i), w=183, units="mm", tree_style=ts) t.show(tree_style=ts)
def map_cafe_to_tree(clusters, cafe, tree): '''Takes the tree objects and family p-value for each cluster provided by the cafe parser and maps the reconstructed counts to the nodes of a tree for each cluster. Some postprocessing, like parsing the tree object is done in here, which should be moved into the parser at some point. This should result in a similar small function as map_count_to_tree provides.''' for cluster in clusters: c_counts = {} cafe_tree = cafe.clusters[cluster.name][0] cafe_tree = Tree(cafe_tree+";",format=1) cafe_tree = add_num_to_nodes(cafe_tree) for node in cafe_tree.traverse("postorder"): if node.is_leaf(): a = node.name.split("_") c_counts[a[0]] = a[1] else: a = node.name[1:] c_counts[node.num] = a for node in cluster.tree.traverse("postorder"): if node.is_leaf(): node.cafe = c_counts[node.name] else: node.cafe = c_counts[node.num] return clusters
class K_Graph(object): """docstring for K_Graph""" def __init__(self): self.theme = Tree() self.topic = '' def add_point(self,topic,point): for t in self.theme.traverse(): if t.name in topic: t.add_child(name=point) def add_topic(self,topic): self.theme.add_child(name=topic) self.topic = topic def getCurrentGraph(self): for t in self.theme.traverse(): if t.name in self.topic: return t def get_topic(self): return self.topic def save(self): with open('data.pickle', 'wb') as f: # Pickle the 'data' dictionary using the highest protocol available. pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) def load(self): with open('data.pickle', 'rb') as f: # The protocol version used is detected automatically, so we do not # have to specify it. return pickle.load(f)
def neighbor_joining(D, tree, internals): #fsum will have better precision when adding distances across sites #based on PLs not mutation """ Args: D (np.array): pairwise differences between samples based on PLs (passing copy) tree (Tree): tree of class Tree with num tips = num samples internals (np.array): array of sample numbers Returns: Tree D (np.array): update pairwise differences now there are internal nodes to compare """ print('neighbor_joining() begin', end=' ', file=sys.stderr) m = len(internals) while m > 2: #if m is 2 then only two connected to root d = D[ internals[:, None], internals] #initially D matrix w/o 0 distance btwn internal nodes; then add in nodes as they have distances u = d.sum(axis=1) / (m - 2) Q = np.zeros(shape=(m, m), dtype=np.longdouble) for i, j in itertools.combinations(xrange(m), 2): #std Q matrix calc Q[i, j] = d[i, j] - u[i] - u[j] Q[j, i] = Q[i, j] #print(Q.astype(int)) np.fill_diagonal(Q, np.inf) #print(np.unique(Q, return_counts=True)) i, j = np.unravel_index( Q.argmin(), (m, m) ) #location in matrix of smallest Q value (ie closest nodes/tips) l = len(D) + 2 - m for k in xrange(m): D[l, internals[k]] = D[internals[k], l] = d[i, k] + d[j, k] - d[i, j] D[l, internals[i]] = D[internals[i], l] = vi = (d[i, j] + u[i] - u[j]) / 2 D[l, internals[j]] = D[internals[j], l] = vj = (d[i, j] + u[j] - u[i]) / 2 ci = tree & str(internals[i]) cj = tree & str(internals[j]) ci.detach() cj.detach() node = Tree(name=str(l)) node.add_child(ci, dist=int(vi)) node.add_child(cj, dist=int(vj)) tree.add_child(node) #print(tree) internals = np.delete(internals, [i, j]) internals = np.append(internals, l) m = len(internals) print('.', end='', file=sys.stderr) print(' done', file=sys.stderr) return D, tree
def date_tree(tree): '''Dates each internal node of a provided newick tree in format 1. The tree is traversed using "postorder". Three internal node cases are beeing distinguished by the inner_type() function. For type 0, both children are leafes, thus the age of the node is the divergence time of the two leafes. For type 1, only child A is a leaf the other child B is an internal node. The age of the node is the divergence time of child A and the first leaf that descents from child B. For type 2 both children are internal nodes, the age of the node is the divergence time of the first leaf found that descents of child A and child B respectivly.''' tree = Tree(tree, format=1) print "Tree loaded!" for node in tree.traverse("postorder"): print "Dating %s" %node.name if not node.is_root() and not node.is_leaf(): left, right = node.get_children()[0], node.get_children()[1] if inner_type(node) == 0: node.dist = date_node(left.name,right.name) elif inner_type(node) == 1: if left.is_leaf(): right = right.get_leaf_names()[0] node.dist = date_node(left.name, right) elif right.is_leaf(): left = left.get_leaf_names()[0] node.dist = date_node(left, right.name) elif inner_type(node) == 2: left = left.get_leaf_names()[0] right = right.get_leaf_names()[1] node.dist = date_node(left, right) return tree
def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []): if start_config == None: me = exponential_mixture(tree= tree) me.search(strategy = startmethod, reroot = reroot) me.count_species(print_log = False, pv = 0.0) self.tree = me.tree self.current_setting = me.max_setting else: self.current_setting = start_config self.tree = Tree(tree, format = 1) self.burning = burning self.last_setting = self.current_setting self.current_logl = self.current_setting.get_log_l() self.last_logl = self.last_setting.get_log_l() self.min_br = min_br self.rand_nr = random.Random() self.rand_nr.seed(seed) self.thinning = thinning self.sampling = sampling if taxa_order == []: self.taxaorder = self.tree.get_leaf_names() else: self.taxaorder = taxa_order self.numtaxa = len(self.taxaorder) self.partitions = [] self.llhs = [] self.nsplit = 0 self.nmerge = 0 """remember the ML partition""" self.maxllh = self.current_logl to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting """record all delimitation settings for plotting, this could consume a lot of MEM""" self.settings = []
def calDistanceMatrix(wordlist, treeList): synsetList = [] distanceMatrix = np.zeros(len(wordlist)**2) + 100 distanceMatrix = distanceMatrix.reshape(10,10) for word in wordlist: if db.wordSynsetMap.find({'word': word}).count(): synset = db.wordSynsetMap.find({'word': word})[0]['synset'] synsetList.append(synset) for i in range(len(synsetList)): if i == 0: for tree in treeList: for synset in ['travel.n.01','travel.v.03','travel.v.04','travel.v.05','travel.v.06']: for pos1 in tree.search_nodes(name = synset): for j in range(len(synsetList) - i - 1): for pos2 in tree.search_nodes(name = synsetList[i+j+1]): distance = Tree.get_distance(pos1, pos2) print synsetList[i], synsetList[i+j+1], wordlist[i], wordlist[i+j+1] if distance < distanceMatrix[i][i+j+1]: distanceMatrix[i][i+j+1] = distance distanceMatrix[i+j+1][i] = distance else: for tree in treeList: for pos1 in tree.search_nodes(name = synsetList[i]): for j in range(len(synsetList) - i - 1): for pos2 in tree.search_nodes(name = synsetList[i+j+1]): distance = Tree.get_distance(pos1, pos2) print synsetList[i], synsetList[i+j+1], wordlist[i], wordlist[i+j+1] if distance < distanceMatrix[i][i+j+1]: distanceMatrix[i][i+j+1] = distance distanceMatrix[i+j+1][i] = distance print distanceMatrix
def make_tree(treefile, image_file, clone_info): colour_list = ['MidnightBlue','RoyalBlue', 'LightSkyBlue', 'Aquamarine', 'SpringGreen', 'GreenYellow',\ 'Gold','DarkOrange'] weeks = ['16', '30', '38', '48', '59', '119', '176', '206'] weeks = ['6', '14', '53', '92','144'] t = Tree(treefile,format = 1) ts = TreeStyle() for i in range(5): ts.legend.add_face(CircleFace(20, colour_list[i]), column=0) ts.legend.add_face(TextFace('week' + weeks[i]), column=1) ts.legend_position = 2 ts.show_leaf_name = True ts.branch_vertical_margin = 15 ts.rotation = 90 ns = NodeStyle() ns["size"] = 1 ns.hz_line_width = 10 ns.vt_line_width = 10 edge = 0 for node in t.traverse(): node.name = node.name.replace("'", "") node.name = node.name.replace(".", ",") name = node.name.split(' ')[0] print name if name in clone_info.keys(): style_node(node, colour_list[int(clone_info[name][0])-1], int(int(clone_info[name][1])/10)+5) if not node.is_leaf() and node.name != 'NoName': f = TextFace(node.name) f.margin_top = 2.5 f.margin_bottom = 2.5 f.margin_right = 2.5 f.margin_left = 2.5 node.add_face(f, column=0, position="branch-top") t.render(image_file, tree_style = ts)
def tree_from_character_table(species, table): leaves = [] tree = Tree() root = tree.get_tree_root() table = sorted([invert(row) for row in table], key = lambda x: x.count('1')) for specie in species: leaves.append(root.add_child(name = specie)) while table: for row in table: if row.count('1') == 2: i1, i2 = [i.start() for i in re.finditer('1', row)] n1, n2 = leaves[i1], leaves[i2] leaves[i1] = root.add_child() leaves[i1].add_child(n1.detach()) leaves[i1].add_child(n2.detach()) table.remove(row) leaves = leaves[:i2] + leaves[i2 + 1:] table = [row[:i2] + row[i2 + 1:] for row in table] break else: return None return tree
def get_example_tree(): # Random tree t = Tree() t.populate(20, random_branches=True) # Some random features in all nodes for n in t.traverse(): n.add_features(weight=random.randint(0, 50)) # Create an empty TreeStyle ts = TreeStyle() # Set our custom layout function ts.layout_fn = layout # Draw a tree ts.mode = "c" # We will add node names manually ts.show_leaf_name = False # Show branch data ts.show_branch_length = True ts.show_branch_support = True return t, ts
def main(args): gtr_file, cdt_file, nwk_file = args reader = csv.reader(file(cdt_file), delimiter="\t") reader.next() # header reader.next() # EWEIGHT gid_to_name = {} for row in reader: gid, name = row[:2] #gid_to_name[gid] = name gid_to_name[gid] = name.upper() reader = csv.reader(file(gtr_file), delimiter="\t") nodes = {} for gtr in map(GTRLine._make, reader): node = Tree() parent_name, parent_dist = gtr.parent, float(gtr.dist) for child in (gtr.left_child, gtr.right_child): if child in gid_to_name: node.add_child(name=gid_to_name[child], dist=1-parent_dist) else: assert child in nodes, child child_node, child_dist = nodes[child] node.add_child(child_node, dist=child_dist-parent_dist) nodes[parent_name] = (node, parent_dist) t = node print >>sys.stderr, "writing newick tree to %s" % nwk_file t.write(format=5, outfile=nwk_file)
class TrackedItem(object): def __init__(self): self.name = '' self.parent = None self.data = DataStore(float) self.leaf = False self.node = Tree() @property def root(self): return self.parent.root if self.parent else self def update_stats(self, name, parent, data, sf): self.data.merge(data) self.name = self.node.name = name self.node.item = self if parent and self.node not in parent.node.children: self.parent = parent parent.node.add_child(self.node) self.node.add_feature("weight", self.data[sf]) for key in self.data: self.node.add_feature(key, self.data[key]) def __str__(self): return "%s: %s" % (self.name, ','.join(["%d %s" % (self.data[key], key) for key in self.data]))
def setUp(self): tree = Tree() root = tree.get_tree_root() root.dist = 0 root.name = "root" node = root.add_child(name="Left") node.add_child(name="Alpha") node.add_child(name="Beta") node = root.add_child(name="Right") node.add_child(name="Gamma") node.add_child(name="Delta") for desc in tree.iter_descendants(): desc.dist = 0 ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = False ts.mode = "c" ts.arc_start = 0 ts.arc_span = 360 self.circular_style = ts self.exampleTree = tree self.alignment = MultipleSeqAlignment([ SeqRecord(Seq("AAG", generic_dna), id="Alpha"), SeqRecord(Seq("AGA", generic_dna), id="Beta"), SeqRecord(Seq("AAA", generic_dna), id="Gamma"), SeqRecord(Seq("GGA", generic_dna), id="Delta"), ])
def write_json_files(OTU_table, target_directory): # Write JSON files containing children node abundances from a given OTU table to a target directory. # Currently does this for each internal node in the 'fill_mod.newick' tree. tree_file = '/home/ubuntu/templates/fill_mod.newick' OTU_table_labeled = OTU_table + '.taxonomies' Taxonomy.convert_GGIDs_to_latin_names(OTU_table, OTU_table_labeled, '/home/ubuntu/databases/gg_13_5_otus/taxonomy/97_otu_taxonomy.txt') # Load tree tree = Tree(tree_file, format=1) leaves = [] internals = [] all_nodes = tree.get_descendants("preorder") # Sort into leaves and internal nodes for node in all_nodes: if(node.is_leaf()): leaves.append(node) else: internals.append(node) # Write each node's JSON for node in internals: node_taxonomies = taxonomy_parser(node.name) node_name = node_taxonomies[node_taxonomies['Level']] children_taxa = node_taxonomies['Children'] json_dict = Taxonomy.collapse_taxonomic_contents_for_json(OTU_table_labeled, children_taxa, node_name) with open(os.path.join(target_directory, node_name + '.json'), 'w') as outfile: if len(json_dict) > 0: json.dump(json_dict, outfile, sort_keys=True)
def read_tree(tree): t = Tree(tree) for node in t.traverse(): if node.name.startswith("""'"""): node.name = node.name.replace("""'""", "") node.name = node.name.replace(" ", "_") return t.write(format=9)
def calculate_mislabels_distance(self, human_ranks, bestplace_bid): t = Tree(self.tree, format = 1) bestnode = t.search_nodes(B=bestplace_bid)[0] #print("Best node: " + str(bestnode)) #find all nodes that match the original labels distance = [] node_distance = [] for i, rank in enumerate(human_ranks): curr_rank_nodes = [] for bid in self.bid_taxonomy_map.keys(): curr_ranks = self.bid_taxonomy_map[bid] if curr_ranks[i] == rank: if i == 5: curr_rank_nodes.append(t.search_nodes(B=str(bid))[0]) else: #if curr_ranks[i+1] == "-": curr_rank_nodes.append(t.search_nodes(B=str(bid))[0]) num_nodes = float(len(curr_rank_nodes)) sumdis = 0.0 sumnodedis = 0.0 if num_nodes!=0.0: for node in curr_rank_nodes: sumdis = sumdis + bestnode.get_distance(node) sumnodedis = sumnodedis + bestnode.get_distance(node, topology_only=True) distance.append(sumdis/num_nodes) node_distance.append(sumnodedis/num_nodes) else: distance.append(0.0) node_distance.append(0.0) print("Average distance from best EPA-placement to original labeled ranks: \n " + str(distance)) print("Average node distance from best EPA-placement to original labeled ranks: \n " +str(node_distance)) return distance, node_distance
def parse_bootrep_file(fname, root, bootrep_num): bootrep_temp_files = [] #bootreps = defaultdict(dendropy.TreeList) bootreps = defaultdict(list) sys.stdout.write("Parsing bootrep file") sys.stdout.flush() printrep = 1 for line in open(fname, 'rU'): repnum, tree_string = line.strip().split('\t') # clean up input repnum = int(repnum) if repnum > printrep: sys.stdout.write('.') sys.stdout.flush() printrep = repnum tree_string = tree_string.strip('"') if repnum <= bootrep_num: tree = Tree(tree_string) tree.set_outgroup(root) bootreps[repnum].append(tree) else: break genes = list(set([len(trees) for trees in bootreps.values()])) assert len(genes) == 1 for repnum, trees in bootreps.iteritems(): temp_fd, temp_out = tempfile.mkstemp(prefix='{}-'.format(repnum), suffix='.mpest-bootrep') for tree in trees: os.write(temp_fd, tree.write(format=5) + "\n") os.close(temp_fd) bootrep_temp_files.append(temp_out) return genes[0], bootrep_temp_files
def map_cafe_tree(self, cafe_file): cafe_file = open(cafe_file, "r").readlines() for line in cafe_file: if line[0:5] == "# IDs": line = line.split(":") tree = Tree(line[1] + ";", format=1) for node in tree.traverse("postorder"): node.add_features(ident=None, branch_p="na", position=None, count=0) if node.is_leaf(): pos = node.name.find("<") match = re.search("\d+", node.name) match = match.group(0) node.ident = match node.name = node.name[:pos] if not node.is_leaf(): if node.up: child_1 = node.children[0].name child_2 = node.children[1].name ancestor = self.tree.get_common_ancestor(child_1, child_2) match = re.search("\d+", node.name) match = match.group(0) node.ident = match node.name = ancestor.name self.tree = tree
def map_cafe_to_tree(clusters, cafe, tree): '''Takes the tree objects and family p-value for each cluster provided by the cafe parser and maps the reconstructed counts to the nodes of a tree for each cluster. Some postprocessing, like parsing the tree object is done in here, which should be moved into the parser at some point. This should result in a similar small function as map_count_to_tree provides.''' for cluster in clusters: c_counts = {} cafe_tree = cafe.clusters[cluster.name][0] cafe_tree = Tree(cafe_tree + ";", format=1) cafe_tree = add_num_to_nodes(cafe_tree) for node in cafe_tree.traverse("postorder"): if node.is_leaf(): a = node.name.split("_") c_counts[a[0]] = a[1] else: a = node.name[1:] c_counts[node.num] = a for node in cluster.tree.traverse("postorder"): if node.is_leaf(): node.cafe = c_counts[node.name] else: node.cafe = c_counts[node.num] return clusters
def get_tree_object_in_newick(tree, id_to_sample_dict, normalize_branches=False): """i.e., tree = hcluster.to_tree(c_res)""" root = Tree() root.dist = 0 root.name = "root" item2node = {tree: root} to_visit = [tree] while to_visit: node = to_visit.pop() cl_dist = node.dist / 2.0 for ch_node in [node.left, node.right]: if ch_node: ch = Tree() ch.dist = cl_dist if ch_node.is_leaf(): ch.name = id_to_sample_dict[ch_node.id] else: ch.name = 'Int' + str(ch_node.id) item2node[node].add_child(ch) item2node[ch_node] = ch to_visit.append(ch_node) if normalize_branches: root = get_normalized_newick(root) return root.write(format=1)
class bayesianptp: """Run MCMC on multiple trees""" def __init__(self, filename, ftype = "nexus", reroot = False, method = "H1", seed = 1234, thinning = 100, sampling = 10000, burnin = 0.1, firstktrees = 0, taxa_order = []): self.method = method self.seed = seed self.thinning = thinning self.sampling = sampling self.burnin = burnin self.firstktrees = firstktrees if ftype == "nexus": self.nexus = NexusReader(filename) self.nexus.blocks['trees'].detranslate() self.trees = self.nexus.trees.trees else: self.trees = self.raxmlTreeParser(filename) if self.firstktrees > 0 and self.firstktrees <= len(self.trees): self.trees = self.trees[:self.firstktrees] self.taxa_order = taxa_order if len(self.taxa_order) == 0: self.taxa_order = Tree(self.trees[0]).get_leaf_names() self.numtaxa = len(self.taxa_order) self.numtrees = len(self.trees) self.reroot = reroot def remove_outgroups(self, ognames, remove = False, output = ""): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() if remove and output!="": with open(output, "w") as fout: for t in self.trees: fout.write(t + "\n") except ValueError, e: print(e) print("") print("") print("Somthing is wrong with the input outgroup names") print("") print("Quiting .....") sys.exit()
def run(args): import random from ete2 import Tree for n in xrange(args.number): t = Tree() t.populate(args.size, random_branches=args.random_branches) dump(t)
def convert_tree(infile, id_dict): tree_file = '%s.formal_id.tree' % (os.path.splitext(infile)[0]) tree_t = Tree(infile, format=1) for node in tree_t.traverse("postorder"): #print '%s\t%s' %(node.name, id_dict[node.name]) if id_dict.has_key(node.name): node.name = id_dict[node.name] tree_t.write(format=1, outfile=tree_file)
def convert_tree(infile, id_dict): tree_file = '%s.formal_id.tree' %(os.path.splitext(infile)[0]) tree_t = Tree(infile, format=1) for node in tree_t.traverse("postorder"): #print '%s\t%s' %(node.name, id_dict[node.name]) if id_dict.has_key(node.name): node.name = id_dict[node.name] tree_t.write(format=1, outfile=tree_file)
def get_example_tree(): t = Tree() ts = TreeStyle() ts.layout_fn = layout ts.mode = "r" ts.show_leaf_name = False t.populate(10) return t, ts
def printNodeNames(treeFilePath): tree = Tree(treeFilePath) file = open("NodeName.txt","w") for n in tree.traverse(): file.write(n.name) file.write("\n") file.close()
def getEte2Tree(hypoTree): t = Tree() for entry in hypoTree: if type(entry) is list: t.add_child(getEte2Tree(entry)) else: t.name = entry.name return t
def build_ete_tree(node): from ete2 import Tree eteNode = Tree() eteNode.dist = 0. for i in range(3): for j in range(len(node.closest[i])): build_ete_edge(node, node.closest[i][j].edge, 1, eteNode) return eteNode
def readTree(tree): t = Tree(tree) #print (t.get_ascii(attributes=["name", "dist", "size"])) #print (t.dist) #print(t.write(format=9)) with open(tree+".nl","w") as tree_nolabel: tree_nolabel.write(t.write(format=9)) return(t.write(format=9))
def parents(data): t = Tree(data, format=1) ps = [] for node in t.traverse('levelorder'): if node.name != 'NoName': d = {'AA': 0.0, 'Aa': 0.0, 'aa': 0.0} d[node.name] = 1.0 ps.append((d, t.get_distance(node))) return ps[::-1]
def constructing_final_tree(distance_matrix, protein_labels): v = str(neighbor_joining(distance_matrix, protein_labels)) + ";" t = Tree(v) t.dist = 0 ts = TreeStyle() ts.mode = "c" ts.show_leaf_name = True ts.layout_fn = my_layout t.show(tree_style=ts)
def treeorder(treefile): from ete2 import Tree, faces, TreeStyle, NodeStyle, AttrFace t = Tree(treefile) rt = t.get_tree_root() nameorder = [] for desc in rt.iter_descendants("preorder"): if not desc.is_leaf(): continue nameorder.append(desc.name) return nameorder
def get_distances(input_dir, group, genomes): results = {} in_file = os.path.join(input_dir, group + ".nwk") try: t = Tree(in_file) a = t.get_common_ancestor(*genomes) except Exception, e: sys.stderr.write("Problem with newick " + in_file + "\n") print "Unexpected error:", str(e) sys.exit()
def tree_generation(entities): for entity in entities: words = split(r'[\s-]+', entity) reversed_words_list = [words[i - 1:] for i in range(len(words), 0, -1)] t = Tree() for word in reversed_words_list: string = ' '.join(word) z = t.add_child(name=string) t = z print t.show()
def get_taxa_for_one_alignment(fname, raxml=False): line = open(fname, 'rU').readline() if raxml: tree_string = line.strip() else: repnum, tree_string = line.strip().split('\t') tree_string = tree_string.strip('"') tree = Tree(tree_string) taxa = tuple(tree.get_leaf_names()) return taxa