def parse_weird_tree(tree_string): s = tree_string.split("]") normTree = "" doubleTaxa = {} for elem in s: if "[" in elem: taxa = elem.split("{")[1].split(",") if len(taxa) > 1: doubleTaxa[taxa[0]] = taxa[1:] x = elem.split("[") normTree += x[0] else: normTree += elem tree = Tree(normTree, format=1) #tree.unroot() for node in tree.traverse(): if node.name in doubleTaxa: for elem in doubleTaxa[node.name]: n = elem.rstrip("}") node.add_child(name=n) #strategy: remove [] first, remember all nodes that represent multiple taxa #build ete2 tree #add additional taxa: if leaf, add sister leaf #if internal, add sister node as leaf (should be fine for def of splits) a = tree.write(format=1, format_root_node=True) return a
def remove_outgroups(self, ognames, remove = False, output = ""): """reroot using outgroups and remove them""" self.reroot = False try: if remove: for og in ognames: self.taxa_order.remove(og) self.numtaxa = len(self.taxa_order) for i in range(len(self.trees)): t = Tree(self.trees[i]) if len(ognames) < 2: t.set_outgroup(ognames[0]) if remove: t.prune(self.taxa_order, preserve_branch_length=True) else: ancestor = t.get_common_ancestor(ognames) if not t == ancestor: t.set_outgroup(ancestor) if remove: t.prune(self.taxa_order, preserve_branch_length=True) self.trees[i] = t.write() if remove and output!="": with open(output, "w") as fout: for t in self.trees: fout.write(t + "\n") except ValueError, e: print(e) print("") print("") print("Somthing is wrong with the input outgroup names") print("") print("Quiting .....") sys.exit()
def ete_tree(aln): """Tree showing alleles""" from ete2 import Tree,PhyloTree,TreeStyle,NodeStyle t = Tree('temp.dnd') ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 180 cutoff=0.25 def func(node): if node.name=='NoName': #or not node.name in metric: return False #if metric[node.name]<=cutoff: # return True matches = filter(func, t.traverse()) print (len(matches), "nodes have distance <=%s" %cutoff) nst1 = NodeStyle() nst1["bgcolor"] = "Yellow" for n in matches: n.set_style(nst1) nst2 = NodeStyle() nst2["bgcolor"] = "LightGreen" #hlanodes = [t.get_leaves_by_name(name=r)[0] for r in refalleles] #for n in hlanodes: # n.set_style(nst2) t.show(tree_style=ts) return
def date_tree(tree): '''Dates each internal node of a provided newick tree in format 1. The tree is traversed using "postorder". Three internal node cases are beeing distinguished by the inner_type() function. For type 0, both children are leafes, thus the age of the node is the divergence time of the two leafes. For type 1, only child A is a leaf the other child B is an internal node. The age of the node is the divergence time of child A and the first leaf that descents from child B. For type 2 both children are internal nodes, the age of the node is the divergence time of the first leaf found that descents of child A and child B respectivly.''' tree = Tree(tree, format=1) print "Tree loaded!" for node in tree.traverse("postorder"): print "Dating %s" % node.name if not node.is_root() and not node.is_leaf(): left, right = node.get_children()[0], node.get_children()[1] if inner_type(node) == 0: node.dist = date_node(left.name, right.name) elif inner_type(node) == 1: if left.is_leaf(): right = right.get_leaf_names()[0] node.dist = date_node(left.name, right) elif right.is_leaf(): left = left.get_leaf_names()[0] node.dist = date_node(left, right.name) elif inner_type(node) == 2: left = left.get_leaf_names()[0] right = right.get_leaf_names()[1] node.dist = date_node(left, right) return tree
def resolve_polytomies(infileName, outfileName): newickString = open(infileName, 'rb').readline().rstrip().replace('[&R] ', '') tree = Tree(newickString) tree.resolve_polytomy(recursive=True) with open(outfileName, 'wb') as outfile: outfile.write(tree.write(format=1))
def __init__(self, tree, start_config = None, reroot = False, startmethod = "H0", min_br = 0.0001, seed = 1234, thinning = 100, sampling = 10000, burning = 0.1, taxa_order = []): if start_config == None: me = exponential_mixture(tree= tree) me.search(strategy = startmethod, reroot = reroot) me.count_species(print_log = False, pv = 0.0) self.tree = me.tree self.current_setting = me.max_setting else: self.current_setting = start_config self.tree = Tree(tree, format = 1) self.burning = burning self.last_setting = self.current_setting self.current_logl = self.current_setting.get_log_l() self.last_logl = self.last_setting.get_log_l() self.min_br = min_br self.rand_nr = random.Random() self.rand_nr.seed(seed) self.thinning = thinning self.sampling = sampling if taxa_order == []: self.taxaorder = self.tree.get_leaf_names() else: self.taxaorder = taxa_order self.numtaxa = len(self.taxaorder) self.partitions = [] self.llhs = [] self.nsplit = 0 self.nmerge = 0 """remember the ML partition""" self.maxllh = self.current_logl to, spe = self.current_setting.output_species(taxa_order = self.taxaorder) self.maxpar = spe self.max_setting = self.current_setting """record all delimitation settings for plotting, this could consume a lot of MEM""" self.settings = []
def findCombination(word,lstFunc,alphabet,offset): debug("findCombination(%s,%s,%s)" % (word,lstFunc,alphabet)) found = False tmpAlph = [] mutation = 1 his = dict() spaces = dict() spaceTree = Tree() spaceTree.add_features(space=offset) if contains(word,alphabet): info("Alphabet contains Word") info("PUSH %s" % word) exit() while not found: info("Mutation: %d !" % mutation) #generate word representation in the current mutation reprs = generateRepresentation(word,mutation) #debug #debug("> Tree:") #print spaceTree #print spaceTree.get_ascii(attributes=['space',]) for n in spaceTree.get_leaves(): #debug(">> Node:") #print spaceTree.get_ascii(attributes=['space',]) for f in lstFunc: tmpAlph = n.space #generate space from the new alphabet space = generateSpaceEx(f,tmpAlph,alphabet) tmpSpace = list(set([c[0] for c in space])) debugListHex(tmpSpace,"SPACE") #check to see any the word representation exists in the space for r in reprs: #debugListHex(r,"Checking Representation") if contains(r,tmpSpace): found = True info("FOUND : %s" % r) lstAncestors = [n,] lstAncestors.extend(n.get_ancestors()) nodeF = n.add_child(name=f) nodeF.add_features(space=tmpSpace,history=space) lstAncestors = [nodeF,] lstAncestors.extend(nodeF.get_ancestors()) getSolution(r,offset,lstAncestors) exit() nodeF = n.add_child(name=f) nodeF.add_features(space=tmpSpace,history=space) mutation = mutation + 1
def main(): args = parser.parse_args() beta_metrics = args.beta_metrics.split(',') otu_widths = args.otu_widths.split(',') input_dir = args.input_dir output_fp = args.output_fp tree_fp = args.tree_fp nrows = len(beta_metrics) ncols = len(otu_widths) results_dict, labels_list = load_rug_dict(input_dir, beta_metrics, otu_widths) try: tree = Tree(tree_fp, format=3) except: tree = add_tip_branches(tree_fp) annotate_tree_with_rugs(tree, results_dict, labels_list) ts = TreeStyle() for row in range(len(labels_list)): for col in range(len(labels_list[row])): ts.legend.add_face(TextFace(labels_list[row][col], fsize=20), column=col) tree.render(output_fp, tree_style=ts) tree.show(tree_style=ts)
def run(args): from ete2 import Tree for nw in args.src_tree_iterator: t = Tree(nw) mod_tree(t, args) dump(t)
def constructing_final_tree(distance_matrix, protein_labels): v = str(upgma(distance_matrix, protein_labels)) + ";" t = Tree(v) ts = TreeStyle() ts.show_leaf_name = True t.convert_to_ultrametric() ts.scale = 120 t.show(tree_style=ts)
def run(args): import random from ete2 import Tree for n in xrange(args.number): t = Tree() t.populate(args.size, random_branches=args.random_branches) dump(t)
def convert_tree(infile, id_dict): tree_file = '%s.formal_id.tree' % (os.path.splitext(infile)[0]) tree_t = Tree(infile, format=1) for node in tree_t.traverse("postorder"): #print '%s\t%s' %(node.name, id_dict[node.name]) if id_dict.has_key(node.name): node.name = id_dict[node.name] tree_t.write(format=1, outfile=tree_file)
def subdiv(PLs, tree): n,m,g = PLs.shape tree.sid = range(m) for i in xrange(n): for leaf in tree.get_leaves(): PL = PLs[i,leaf.sid,0] k0 = PL==0 k1 = ~k0 p0 = PL[k0].sum() p1 = PL[k1].sum() c0 = Tree() c0.sid = leaf.sid[k0] leaf.add_child(c0) c1 = Tree() c1.sid = leaf.sid[k1] leaf.add_child(c1)
def compare_main(args): """compare tree topologies Args: args.tree (str): input tree(s), in Newick format args.ref (str): reference tree, in Newick format Prints: tree result['norm_rf']: normalized robinson-foulds distance (from 0 to 1) result['ref_edges_in_source']: compatibility score of the target tree with respect to the source tree (how many edges in reference are found in the source) result['source_edges_in_ref']: compatibility score of the source tree with respect to the reference tree (how many edges in source are found in the reference) dstat: sum of differences between two distance matrices / sum of ref matrix rstat: avg ratio between corresponding pairwise distances """ print(args, file=sys.stderr) ref_tree = Tree(args.ref) ref_tree_leafnames = [l.name for l in ref_tree.get_leaves()] leaf_idx = {l:i for i,l in enumerate(ref_tree_leafnames)} #how to get int for leaf name consistent btwn trees ref_am = tree2adjacency(ref_tree,leaf_idx) #matrix of "distances" for ref (node counts) for f in args.tree: tree = Tree(f) tree_leafnames = [l.name for l in tree.get_leaves()] if set(tree_leafnames) != set(ref_tree_leafnames): print('leaf names are not the same', file=sys.stderr) am = tree2adjacency(tree,leaf_idx) #matrix of "distances" for comparison if ref_am.shape != am.shape: print('%s incompatible with %s' % (f, args.ref), file=sys.stderr) else: k = ref_am > 0 diff = np.abs(ref_am - am) dstat = diff[k].sum()/k.sum() ratio = am[k]/ref_am[k] ratio[ratio>1] = 1.0/ratio[ratio>1] rstat = np.power(ratio.prod(), 1.0/k.sum()) result = ref_tree.compare(tree, unrooted=True) #comparison calculated by ete2 # <tree>,<norm_rf>,<ref_edge_in_tree>,<tree_edge_in_ref>,<diff_adj>,<ratio_adj> print('%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (f, result['norm_rf'], result['ref_edges_in_source'], result['source_edges_in_ref'], dstat, rstat))
def constructing_final_tree(distance_matrix, protein_labels): v = str(neighbor_joining(distance_matrix, protein_labels)) + ";" t = Tree(v) t.dist = 0 ts = TreeStyle() ts.mode = "c" ts.show_leaf_name = True ts.layout_fn = my_layout t.show(tree_style=ts)
def parents(data): t = Tree(data, format=1) ps = [] for node in t.traverse('levelorder'): if node.name != 'NoName': d = {'AA': 0.0, 'Aa': 0.0, 'aa': 0.0} d[node.name] = 1.0 ps.append((d, t.get_distance(node))) return ps[::-1]
def getSolution(reprs,offset,his): debugListHex(reprs,"reprs:",2) debugListHex(offset,"offset:",2) sol = dict() sol2 = dict() for rg in range(len(reprs)): r = reprs[rg] of = offset[rg] #print prettyText("searching for 0x%02x <= 0x%02x" % (r,of),"red") tPath = Tree(name=r) tPath.add_features(value=r) for h in his[:-1]: #print prettyText("in H","red") for leaf in tPath.get_leaves(): r = leaf.value #print prettyText("leaves: %s" % str(tPath.get_leaves()),"cyan") for line in h.history: res, alph, past, method = line[0], line[1], line[2], line[3].func_name #debug("0x%02x = 0x%02x %s. (0x%02x)" % (res,alph,method,past),2) #print prettyText("comparing res=0x%02x ?= r=0x%02x" % (res,r),"yellow") if res == r: n = leaf.add_child(name=alph) n.add_features(function=method,value=past) #print tPath.get_ascii(attributes=['name','function','value']) lf = tPath.get_leaves()[0] anc = lf.get_ancestors()[:-1] llf = [lf,] llf.extend(anc) vls = [c.name for c in llf] sol[rg] = llf for i in sol: vls = [(c.name, c.function) for c in sol[i]] sol2["method"] = [] for j in range(len(vls)): sol2["method"].append(sol[i][0].function) if sol2.has_key(j): sol2[j].append(vls[j][0]) else: sol2[j] = [] sol2[j].append(vls[j][0]) print prettyText("Solution:","red") info("PUSH\t\t0x%02x%02x%02x%02x" % (offset[0],offset[1],offset[2],offset[3])) test = [] test.append(offset[0] * 0x01000000 + offset[1] * 0x00010000 + offset[2] * 0x00000100 + offset[3] * 0x00000001) for m in range(len(sol2["method"])): test.append(sol2[m][0] * 0x01000000 + sol2[m][1] * 0x00010000 + sol2[m][2] * 0x00000100 + sol2[m][3] * 0x00000001) info("%s\t\t\t0x%02x%02x%02x%02x" % (sol2["method"][m],sol2[m][0],sol2[m][1],sol2[m][2],sol2[m][3])) info("RESULT\t\t0x%08x" % (reprs[0] * 0x01000000 + reprs[1] * 0x00010000 + reprs[2] * 0x00000100 + reprs[3] * 0x00000001)) testResult(test,(reprs[0] * 0x01000000 + reprs[1] * 0x00010000 + reprs[2] * 0x00000100 + reprs[3] * 0x00000001))
def read_in_data(uid): # resulting dictionary in which the taxonomy is collected res = defaultdict(int) # holds all the user ids user_ids = read_user_IDs() t7s = Tree7s("thing") cnt = 0 with codecs.open(f_in, 'r', encoding='utf8') as input_file: for line7s in input_file: try: line = json.loads(line7s) taxonomy_all = line["taxonomy"] user_name = line["_id"] user_id = user_ids[user_name] #docSentiment = taxonomy_all["docSentiment"] # the user we analyze user_name = line["_id"] user_id = user_ids[user_name] if uid != user_id: continue res[user_id] = defaultdict(int) taxonomy = taxonomy_all["taxonomy"] for el in taxonomy: n = t7s.find_root() taxonomy_tree = el["label"] taxonomy_tree = taxonomy_tree.split("/") taxonomy_tree.pop(0) levels = len(taxonomy_tree) score = float(el["score"]) if float(score) > 0.4: print levels, taxonomy_tree, score for i in range(levels): label = taxonomy_tree[i] n.add_child(label, score, i + 1) n = n.find_child(label) cnt += 1 except KeyError: #print line7s # we don't print since it is tested, there some 10% users for whom # the taxonomy was not successfuly downloaded and they would be listed here continue print "Taxonomy collected for %d users " % (cnt) #t7s.find_root().print_me() t = t7s.find_root() S = t.create_newick() + ";" #print S #T = Tree(S, format=8) T = Tree(S, format=1) return T
def get_nodesheight(self): root = Tree(self.tree) nh_map = {} for node in root.traverse(strategy="preorder"): if hasattr(node, "B"): height = node.get_closest_leaf(topology_only=True) #height = node.get_farthest_leaf(topology_only=True) nh_map[node.B] = height[1] + 1 return nh_map
def treeorder(treefile): from ete2 import Tree, faces, TreeStyle, NodeStyle, AttrFace t = Tree(treefile) rt = t.get_tree_root() nameorder = [] for desc in rt.iter_descendants("preorder"): if not desc.is_leaf(): continue nameorder.append(desc.name) return nameorder
def get_tree_object_in_newick(tree, id_to_sample_dict=None): """Take a tree object, and create a newick formatted representation of it""" new_tree = Tree() new_tree.dist = 0 new_tree.name = "root" node_id = 0 node_id_to_node_in_old_tree = {node_id: tree} node_id_to_node_in_new_tree = {node_id: new_tree} node_ids_to_visit_in_old_tree = [node_id] while node_ids_to_visit_in_old_tree: node_id_in_old_tree = node_ids_to_visit_in_old_tree.pop() node_in_old_tree = node_id_to_node_in_old_tree[node_id_in_old_tree] cl_dist = node_in_old_tree.dist / 2.0 for ch_node_in_old_tree in [ node_in_old_tree.left, node_in_old_tree.right ]: if ch_node_in_old_tree: ch_for_new_tree = Tree() ch_for_new_tree.dist = cl_dist node_id += 1 node_id_to_node_in_new_tree[node_id] = ch_for_new_tree if ch_node_in_old_tree.is_leaf(): if id_to_sample_dict: ch_for_new_tree.name = id_to_sample_dict[ ch_node_in_old_tree.id] else: ch_for_new_tree.name = ch_node_in_old_tree.id else: ch_for_new_tree.name = 'Int' + str(ch_node_in_old_tree.id) node_id_to_node_in_new_tree[node_id_in_old_tree].add_child( ch_for_new_tree) node_id_to_node_in_old_tree[node_id] = ch_node_in_old_tree node_ids_to_visit_in_old_tree.append(node_id) return new_tree.write(format=1)
def get_taxa_for_one_alignment(fname, raxml=False): line = open(fname, 'rU').readline() if raxml: tree_string = line.strip() else: repnum, tree_string = line.strip().split('\t') tree_string = tree_string.strip('"') tree = Tree(tree_string) taxa = tuple(tree.get_leaf_names()) return taxa
def midpointRooting(infileName, outfileName): """ using ete2 for mid-point rooting """ newickString=open(infileName, 'rb').readline().rstrip().replace('[&R] ', '') tree = Tree(newickString); #tree.resolve_polytomy(recursive=True) if tree.get_midpoint_outgroup()!=None: tree.set_outgroup( tree.get_midpoint_outgroup() ) tree.ladderize() with open(outfileName, 'wb') as outfile: outfile.write(tree.write(format=1))
def tree_generation(entities): for entity in entities: words = split(r'[\s-]+', entity) reversed_words_list = [words[i - 1:] for i in range(len(words), 0, -1)] t = Tree() for word in reversed_words_list: string = ' '.join(word) z = t.add_child(name=string) t = z print t.show()
def get_distances(input_dir, group, genomes): results = {} in_file = os.path.join(input_dir, group + ".nwk") try: t = Tree(in_file) a = t.get_common_ancestor(*genomes) except Exception, e: sys.stderr.write("Problem with newick " + in_file + "\n") print "Unexpected error:", str(e) sys.exit()
def __init__(self, driver, target_url, depth=-1, delay=5, mitm=False): self.driver = driver self.target_url = target_url self.t = Tree() self.root = self.t.add_child(name=target_url) self.root.add_features(path=target_url, advance=True) self.depth = depth self.delay = delay self.subscribers = [] self.url_cache = UrlCache(self.depth) self.mitm = mitm
def compare(f1, f2): # Load tree 1 t1n = f1 with open(t1n) as f: t1s = f.read() t1 = Tree(t1s) # Load tree 2 t2n = f2 with open(t2n) as f: t2s = f.read() t2 = Tree(t2s) rf, rf_max, common_attrs, names, edges_t1, edges_t2, discarded_edges_t1 = t1.robinson_foulds( t2, unrooted_trees=True) # At most there are 2*leaves-3 elementary changes to transform t1 into t2 leaves = len(t2) maxnodes = 2 * leaves - 3 return float(rf) / maxnodes
def buildTree(filename, names, nodes, filter=None): result = Tree() result.name = "root" inFile = open(filename, 'r') convertDist = {} convertDist[1] = 1 convertDist[2] = 2.22 convertDist[3] = 3.43 convertDist[4] = 4.63 convertDist[5] = 5.85 convertDist[6] = 7.05 convertDist[7] = 8.25 convertDist[8] = 9.46 for line in inFile: #k = int(line.split()[0]) #2015-08-19 k = int(line.split(',')[0]) #2016-04-05 reverseList = buildTaxaLevelList2(k, nodes, filter) currentNode = result prevDistance = 1 for pair in reverseList: (k, distance) = pair (junk, level) = nodes[k] txt = levelToText(level) if DEBUGtaxa: name = txt + "_" + names[k] else: name = " " + names[k] #2016-04 change for readability #name = names[k] if filter == None or level in filter: kids = currentNode.get_children() # this is list found = False #look for name in children for m in range(len(kids)): if kids[m].name == name: found = True currentNode = kids[m] break if found == False: # make a new node #print "'%s' not found, adding" % name #add child and returns child node currentNode = currentNode.add_child( name=name, dist=convertDist[prevDistance]) #Because moving up and down tree to add leaves, need to store previousDistance value as jump around prevDistance = distance #else skip and go on to next traversal in list return result
def test_init_tree(): tree = Tree("(1:1,(2:1,(5:1,4:1):0.5):0.5);") tree = init_tree(tree) nid = [node.nid for node in tree.traverse(strategy='postorder')] sid = [node.sid for node in tree.traverse(strategy='postorder')] assert nid == [0, 1, 2, 3, 4, 5, 6] assert sid == [[1], [2], [5], [4], [4, 5], [2, 4, 5], [1, 2, 4, 5]]
def treeFromQuartet(quartet): root = Tree() root.name = "root" left = root.add_child(name="Left") left.add_child(name=quartet[0]) left.add_child(name=quartet[1]) right = root.add_child(name="Right") right.add_child(name=quartet[2]) right.add_child(name=quartet[3]) for desc in root.iter_descendants(): desc.dist = 0 return root