def test(): custom_functions = {"length":length} pattern = """ ( len{@.children} > 2 , len{set{{@.name|}}.intersection{set{{"hello"|"bye"}}}} > 0 ){length{@.name} < 3 or @.name == "pasa"} and @.dist >= 0.5 ; """ pattern = TreePattern(pattern, format=8) print pattern tree = Tree("((hello,(1,2,3)kk)pasa:1, NODE);", format=1) print tree.get_ascii(attributes=["name", "dist"]) print "Pattern matches tree?:", pattern.find_match(tree, custom_functions) tree = Tree("((hello,(1,2,3)kk)pasa:0.4, NODE);", format=1) print tree.get_ascii(attributes=["name", "dist"]) print "Pattern matches tree?:", pattern.find_match(tree, custom_functions) tree = Tree("(hello,(1,2,3)kk)pasa:1;", format=1) print tree.get_ascii(attributes=["name", "dist"]) print "Pattern matches tree?:", pattern.find_match(tree, custom_functions) tree = Tree("((bye,(1,2,3)kk)none:1, NODE);", format=1) print tree.get_ascii(attributes=["name", "dist"]) print "Pattern matches tree?:", pattern.find_match(tree, custom_functions) tree = Tree("((bye,(1,2,3)kk)y:1, NODE);", format=1) print tree.get_ascii(attributes=["name", "dist"]) print "Pattern matches tree?:", pattern.find_match(tree, custom_functions)
def ete_print(self): """ Pretty print. TODO Debug and document better for case USE_ETE3 == False """ if Cfg.USE_ETE3: t = EteTree(self.ete_str(), format=1) print(t.get_ascii(show_internal=True)) else: return str(self)
class Syntax_tree: def __init__(self, parse_tree): # parse_tree = parse_tree.encode("utf-8", errors="replace") newick_text = self.to_newick_format(parse_tree) if newick_text == None: self.tree = None else: # newick_text = newick_text.encode("utf-8") # self.tree = Tree(newick_text, format=1) try: # newick_text = newick_text.encode("utf-8") self.tree = Tree(newick_text, format=1) except: self.tree = None pass def print_tree(self): print(self.tree.get_ascii(show_internal=True)) def get_node_path_to_root(self, node): path = "" while (not node.is_root()): path += node.name + "-->" node = node.up path += node.name return path def get_leaf_node_by_token_index(self, token_index): leaves = self.tree.get_leaves() if token_index >= len(leaves) : return None return leaves[token_index] #根据词的indices ,获取 common_ancestor def get_self_category_node_by_token_indices(self, token_indices): if len(token_indices) == 1: return self.get_leaf_node_by_token_index(token_indices[0]).up nodes = [] for token_index in token_indices: node = self.get_leaf_node_by_token_index(token_index) nodes.append(node) return self.tree.get_common_ancestor(nodes) def get_common_ancestor_by_token_indices(self, token_indices): nodes = [] for token_index in token_indices: node = self.get_leaf_node_by_token_index(token_index) nodes.append(node) return self.tree.get_common_ancestor(nodes) def get_left_sibling_category_node_by_token_indices(self, token_indices): self_category_node = self.get_self_category_node_by_token_indices(token_indices) node_id = id(self_category_node) if self_category_node.up == None: return None children = self_category_node.up.get_children() for i, child in enumerate(children): if node_id == id(child): if i == 0: return None else: return children[i - 1] def get_right_sibling_category_node_by_token_indices(self, token_indices): self_category_node = self.get_self_category_node_by_token_indices(token_indices) node_id = id(self_category_node) if self_category_node.up == None: return None children = self_category_node.up.get_children() for i, child in enumerate(children): if node_id == id(child): if i == len(children) - 1: return None else: return children[i + 1] def get_parent_category_node_by_token_indices(self, token_indices): self_category_node = self.get_self_category_node_by_token_indices(token_indices) return self_category_node.up def get_arg1_arg2_None_nodes_list(self,Arg1_token_indices, Arg2_token_indices): for node in self.tree.traverse(): node.add_feature("label","NONE") for node in self.tree.get_leaves(): node.label = "X" node.up.label ="X" nodes = [] for token_index in Arg1_token_indices: node = self.get_leaf_node_by_token_index(token_index) nodes.append(node) self.tree.get_common_ancestor(nodes).label = "Arg1_node" nodes = [] for token_index in Arg2_token_indices: node = self.get_leaf_node_by_token_index(token_index) nodes.append(node) self.tree.get_common_ancestor(nodes).label = "Arg2_node" arg1_arg2_None_nodes_list = [] for node in self.tree.traverse(): if node.label != "X": arg1_arg2_None_nodes_list.append(node) return arg1_arg2_None_nodes_list def to_newick_format(self, parse_tree): # 替换 parse_tree 中的 , parse_tree = parse_tree.replace(",", "*COMMA*") parse_tree = parse_tree.replace(":", "*COLON*") tree_list = self.load_syntax_tree(parse_tree) if tree_list == None: return None tree_list = tree_list[1] #去 root s = self.syntax_tree_to_newick(tree_list) s = s.replace(",)",")") if s[-1] == ",": s = s[:-1] + ";" return s def load_syntax_tree(self, raw_text): stack = ["ROOT"] text = re.sub(r"\(", " ( ", raw_text) text = re.sub(r"\)", " ) ", text) text = re.sub(r"\n", " ", text) text = re.sub(r"\s+", " ", text) text = re.sub(r"^\(\s*\(\s*", "", text) text = re.sub(r"\s*\)\s*\)$", "", text) for c in text.strip(" ").split(" "): if c == ")": node = [] while(1): popped = stack.pop() if popped == "(": break node.append(popped) node.reverse() if len(node) > 1: stack.append(node) else: if node == []: return None stack.append(node[0]) else: stack.append(c) return stack def syntax_tree_to_newick(self,syntax_tree): s = "(" for child in syntax_tree[1:]: if not isinstance(child,list): s += child else: s += self.syntax_tree_to_newick(child) s += ")" + str(syntax_tree[0]) + "," return s #获取的内部节点的位置,不包括pos—tag节点 def get_internal_node_location(self, node): leaves = self.tree.get_leaves() if len(node.get_children()) > 1: child1 = node.get_children()[0] child2 = node.get_children()[1] #移至叶子节点 while not child1.is_leaf(): child1 = child1.get_children()[0] while not child2.is_leaf(): child2 = child2.get_children()[0] index1 = leaves.index(child1) index2 = leaves.index(child2) return [index1, index2] if len(node.get_children()) == 1: child1 = node.get_children()[0] #移至叶子节点 while not child1.is_leaf(): child1 = child1.get_children()[0] index1 = leaves.index(child1) return [index1] def get_node_by_internal_node_location(self, location): if len(location) > 1: nodes = [] for token_index in location: node = self.get_leaf_node_by_token_index(token_index) nodes.append(node) return self.tree.get_common_ancestor(nodes) if len(location) == 1: return self.get_leaf_node_by_token_index(location[0]).up.up def get_right_siblings(self, node): if node.is_root(): return [] children = node.up.get_children() for i, child in enumerate(children): if child == node: if i == len(children) - 1: return [] return children[i+1:] def get_left_siblings(self, node): if node.is_root(): return [] children = node.up.get_children() for i, child in enumerate(children): if child == node: if i == 0: return [] return children[:i] def get_siblings(self, node): if node.is_root(): return [] siblings = [] children = node.up.get_children() for i, child in enumerate(children): if child != node: siblings.append(child) return siblings def get_relative_position(self, node1, node2): if node1 == node2 or node2.is_root(): return "middle" curr = node1 rsibs = [] lsibs = [] while not curr.is_root(): rsibs.extend(self.get_right_siblings(curr)) lsibs.extend(self.get_left_siblings(curr)) curr = curr.up if curr == node2: return "middle" for node in rsibs: if node2 in node.get_descendants(): return "right" for node in lsibs: if node2 in node.get_descendants(): return "left" def get_node_to_node_path(self, node1, node2): common_ancestor = self.tree.get_common_ancestor([node1, node2]) path = "" # node1->common_ancestor temp = node1 while temp != common_ancestor: path += temp.name +">" temp = temp.up path += common_ancestor.name ## common_ancestor -> node p = "" temp = node2 while temp != common_ancestor: p = "<" + temp.name + p temp = temp.up path += p return path #获取他在syntax_tree的叶子节点的indices,也就是句子中的index def get_leaves_indices(self, node): leaves = self.tree.get_leaves() node_leaves = node.get_leaves() indices = sorted([leaves.index(leaf) for leaf in node_leaves]) return indices
from ete3 import Tree import sys t = Tree(sys.argv[1]) i = 0 for node in t.traverse("preorder"): if not node.is_leaf(): node.name = str(i) i += 1 print t.get_ascii(show_internal=True) t.write(format=8, outfile="with_internal_nodes.tree")
def run(args): if args.text_mode: from ete3 import Tree for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = Tree(nw) else: t = Tree(tfile) print(t.get_ascii(show_internal=args.show_internal_names, attributes=args.show_attributes)) return import random import re import colorsys from collections import defaultdict from ete3 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle, add_face_to_node, random_color) global FACES if args.face: FACES = parse_faces(args.face) else: FACES = [] # VISUALIZATION ts = TreeStyle() ts.mode = args.mode ts.show_leaf_name = True ts.tree_width = args.tree_width for f in FACES: if f["value"] == "@name": ts.show_leaf_name = False break if args.as_ncbi: ts.show_leaf_name = False FACES.extend(parse_faces( ['value:@sci_name, size:10, fstyle:italic', 'value:@taxid, color:grey, size:6, format:" - %s"', 'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal', 'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal', ])) if args.alg: FACES.extend(parse_faces( ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type] )) if args.heatmap: FACES.extend(parse_faces( ['value:@name, size:10, pos:aligned, ftype:heatmap'] )) if args.bubbles: for bubble in args.bubbles: FACES.extend(parse_faces( ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble, ])) ts.branch_vertical_margin = args.branch_separation if args.show_support: ts.show_branch_support = True if args.show_branch_length: ts.show_branch_length = True if args.force_topology: ts.force_topology = True ts.layout_fn = lambda x: None for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = PhyloTree(nw) else: t = PhyloTree(tfile) if args.alg: t.link_to_alignment(args.alg, alg_format=args.alg_format) if args.heatmap: DEFAULT_COLOR_SATURATION = 0.3 BASE_LIGHTNESS = 0.7 def gradient_color(value, max_value, saturation=0.5, hue=0.1): def rgb2hex(rgb): return '#%02x%02x%02x' % rgb def hls2hex(h, l, s): return rgb2hex( tuple([int(x*255) for x in colorsys.hls_to_rgb(h, l, s)])) lightness = 1 - (value * BASE_LIGHTNESS) / max_value return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION) heatmap_data = {} max_value, min_value = None, None for line in open(args.heatmap): if line.startswith('#COLNAMES'): pass elif line.startswith('#') or not line.strip(): pass else: fields = line.split('\t') name = fields[0].strip() values = [float(x) if x else None for x in fields[1:]] maxv = max(values) minv = min(values) if max_value is None or maxv > max_value: max_value = maxv if min_value is None or minv < min_value: min_value = minv heatmap_data[name] = values heatmap_center_value = 0 heatmap_color_center = "white" heatmap_color_up = 0.3 heatmap_color_down = 0.7 heatmap_color_missing = "black" heatmap_max_value = abs(heatmap_center_value - max_value) heatmap_min_value = abs(heatmap_center_value - min_value) if heatmap_center_value <= min_value: heatmap_max_value = heatmap_min_value + heatmap_max_value else: heatmap_max_value = max(heatmap_min_value, heatmap_max_value) # scale the tree if not args.height: args.height = None if not args.width: args.width = None f2color = {} f2last_seed = {} for node in t.traverse(): node.img_style['size'] = 0 if len(node.children) == 1: node.img_style['size'] = 2 node.img_style['shape'] = "square" node.img_style['fgcolor'] = "steelblue" ftype_pos = defaultdict(int) for findex, f in enumerate(FACES): if (f['nodetype'] == 'any' or (f['nodetype'] == 'leaf' and node.is_leaf()) or (f['nodetype'] == 'internal' and not node.is_leaf())): # if node passes face filters if node_matcher(node, f["filters"]): if f["value"].startswith("@"): fvalue = getattr(node, f["value"][1:], None) else: fvalue = f["value"] # if node's attribute has content, generate face if fvalue is not None: fsize = f["size"] fbgcolor = f["bgcolor"] fcolor = f['color'] if fcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fcolor = f2color.setdefault(color_bin, random_color(h=seed)) if fbgcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fbgcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fbgcolor = f2color.setdefault(color_bin, random_color(h=seed)) if f["ftype"] == "text": if f.get("format", None): fvalue = f["format"] % fvalue F = TextFace(fvalue, fsize = fsize, fgcolor = fcolor or "black", fstyle = f.get('fstyle', None)) elif f["ftype"] == "fullseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="seq", seqtail_format="seq", height=fsize) elif f["ftype"] == "compactseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq", seqtail_format="compactseq", height=fsize) elif f["ftype"] == "blockseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq", seqtail_format="blockseq", height=fsize, fgcolor=fcolor or "slategrey", bgcolor=fbgcolor or "slategrey", scale_factor = 1.0) fbgcolor = None elif f["ftype"] == "bubble": try: v = float(fvalue) except ValueError: rad = fsize else: rad = fsize * v F = faces.CircleFace(radius=rad, style="sphere", color=fcolor or "steelblue") elif f["ftype"] == "heatmap": if not f['column']: col = ftype_pos[f["pos"]] else: col = f["column"] for i, value in enumerate(heatmap_data.get(node.name, [])): ftype_pos[f["pos"]] += 1 if value is None: color = heatmap_color_missing elif value > heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up) elif value < heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down) else: color = heatmap_color_center node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i) # Add header # for i, name in enumerate(header): # nameF = TextFace(name, fsize=7) # nameF.rotation = -90 # tree_style.aligned_header.add_face(nameF, column=i) F = None elif f["ftype"] == "profile": # internal profiles? F = None elif f["ftype"] == "barchart": F = None elif f["ftype"] == "piechart": F = None # Add the Face if F: F.opacity = f['opacity'] or 1.0 # Set face general attributes if fbgcolor: F.background.color = fbgcolor if not f['column']: col = ftype_pos[f["pos"]] ftype_pos[f["pos"]] += 1 else: col = f["column"] node.add_face(F, column=col, position=f["pos"]) if args.image: t.render("t%d.%s" %(tindex, args.image), tree_style=ts, w=args.width, h=args.height, units=args.size_units) else: t.show(None, tree_style=ts)
parser.add_argument( '--verbose', action='store_true', help=('Print information about the outgroup (if any) taxa to standard ' 'error')) args = parser.parse_args() tree = Tree(args.treeFile.read()) if args.outgroupRegex: from re import compile regex = compile(args.outgroupRegex) taxa = [leaf.name for leaf in tree.iter_leaves() if regex.match(leaf.name)] if taxa: ca = tree.get_common_ancestor(taxa) if args.verbose: print('Taxa for outgroup:', taxa, file=sys.stderr) print('Common ancestor:', ca.name, file=sys.stderr) print('Common ancestor is tree:', tree == ca, file=sys.stderr) if len(taxa) == 1: tree.set_outgroup(tree & taxa[0]) else: if ca == tree: tree.set_outgroup(tree.get_midpoint_outgroup()) else: tree.set_outgroup(tree.get_common_ancestor(taxa)) print(tree.get_ascii())
continue if node.support < fNodeSupportCutoff: ancestor = node.get_ancestors()[0] for child in node.get_children(): child.dist += node.dist ancestor.add_child(child) ancestor.remove_child(node) return tree removeUnsupported(tree, fSupportCutoff) fileOut.write( 'Input tree with unsupported nodes removed. In ascii format, having problems writing in newick:\n' ) fileOut.write(tree.get_ascii() + '\n') fileOut.write('=====\n') fileOut.flush() ## Progressively assign a compound taxonomy going from the leaves to the root. def setTaxonomy(node): if node.is_leaf(): return node node.taxonomy = {} lChildren = node.get_children() fChildrenTaxFreqSum = 0 ## typically the number of children, but some taxons may be masked for child in lChildren: setTaxonomy(child) for (sTaxon, fFreq) in child.taxonomy.items(): fChildrenTaxFreqSum += fFreq
return score def logLikelihood(msafile, treefile): #Takes in an msa and a tree topology and uses IQTree to generate a log likelihood cmd = "iqtree -s " + msafile + " -z " + treefile os.system(cmd) if __name__ == '__main__': #Test Pruning if False: t = Tree() t.populate(10) print t.get_ascii() keep = [leaf.name for leaf in t] np.random.shuffle(keep) keep = keep[:7] #keep = ['A','B','C','D','G','H','J'] i = 1 for node in t.traverse(): if node.name == '': node.name = str(i) i += 1 print keep print prune(t, keep)
import string host = Tree("(C,(A,B)D)E;", format=8) guest = Tree( "(((a:0.2,r:0.2)p:0.3,b:0.5)e:0.5,((c:0.4,s:0.4)q:0.3,d:0.1)f:0.3)z;", format=1) nodemap = { host & "A": [guest & 'a', guest & 's'], host & 'B': [guest & 'c', guest & 'r'], host & 'C': [guest & 'b', guest & 'd'], host & 'D': [guest & 'p', guest & 'q'], host & 'E': [guest & 'e', guest & 'f', guest & 'z'] } print("HOST TREE (W/ NAMES):") print(host.get_ascii(attributes=['name'])) print print("GUEST TREE (W/ NAMES):") print(guest.get_ascii(attributes=['name'])) print print("GUEST TREE (W/ DISTANCES):") print(guest.get_ascii(attributes=['dist'])) print for node in guest.traverse(): if node.name in ['z']: node.add_feature('event', "DUPLICATION") elif node.name in ['d']: node.add_feature('event', "LOSS") else: node.add_feature('event', "SPECIATION")
for i, haplogroup in enumerate(row): if not pd.isnull(haplogroup): if not pd.isnull(df.iloc[index, i + 1]): annotation = df.iloc[index, i + 1].strip() snp = annotation.split(' ') #print index, i, haplogroup, snp #print t.get_ascii(show_internal=True, attributes=["name", "x", "y"]) uppers = t.search_nodes(x=i - 1) #print uppers node = uppers[-1].add_child(name=haplogroup) node.add_feature('snp', snp) node.add_feature('x', i) node.add_feature('y', index) break else: haplogroup = 'unknown' annotation = df.iloc[index, i].strip() snp = annotation.split(' ') #print index, i-1, haplogroup, snp uppers = t.search_nodes(x=i - 2) #print uppers node = uppers[-1].add_child(name=haplogroup + str(index)) node.add_feature('snp', snp) node.add_feature('x', i - 1) node.add_feature('y', index) break print t.get_ascii(show_internal=True, attributes=["name", "position"]) #t.show() t.write(format=1, features=["snp"], outfile="mtDNA_EntireTree.nw")
dend[176].add_child(dend[180]) dend[180].add_child(dend[181]) dend[180].add_child(dend[182]) dend[175].add_child(dend[183]) dend[183].add_child(dend[184]) dend[184].add_child(dend[185]) dend[184].add_child(dend[186]) dend[186].add_child(dend[187]) dend[186].add_child(dend[188]) dend[183].add_child(dend[189]) dend[189].add_child(dend[190]) dend[189].add_child(dend[191]) dend[135].add_child(dend[192]) dend[192].add_child(dend[193]) dend[193].add_child(dend[194]) dend[194].add_child(dend[195]) dend[194].add_child(dend[196]) dend[193].add_child(dend[197]) dend[192].add_child(dend[198]) print(soma.get_ascii(show_internal=True)) def my_layout(node): F = TextFace(node.name, tight_text=True) add_face_to_node(F, node, column=0, position="branch-top") ts.layout_fn = my_layout if showQt: soma.show(tree_style=ts)
class ASRTree: #Attributes __tree = None __charStateChanges = 0 __numOfTaxa = 0 __anadromyLookUp = dict( ) #Dictionary matching FASTA file names (key) to a list of taxa names and character states scientificIndex = 0 commonIndex = 1 stateIndex = 2 #Constructor def __init__(self): self.__tree = None #Public Methods #-----------------------------buildTree------------------------------------- # Description: Builds newick tree from an aligned and filter FASTA file. #--------------------------------------------------------------------------- def buildTree(self, path): raxFile = open(path, "r") if raxFile.mode == "r": contents = raxFile.read() self.__tree = Tree(contents) print("\nRAxML tree imported successully.") else: print( "\nRAxML tree failed to import successfully. Please check the file path and try again." ) #end buildTree #-----------------------runMaxParsimony------------------------------------- # Description: Calls private functions for Fitch's algorithm of maximum # parsimony. #--------------------------------------------------------------------------- def runMaxParsimony( self ): #Calls private functions for Fitch's algorithm of maximum parsimony if self.__tree is None: print( "\n****************Error****************\nTree has not been imported. Please run buildTree method first." ) else: self.__tree.resolve_polytomy( ) #Transform tree to bifurcating - does nothing if already bifurcating self.__downPass() self.__upPass() self.__findCharStateChanges() #end runMaxParsimony #-----------------------------getNumOfTaxa---------------------------------- # Description: Returns number of taxa. #--------------------------------------------------------------------------- def getNumOfTaxa(self): #Returns the number of taxa return self.__numOfTaxa #end getNumOfTaxa #--------------------------getCharStateChanges------------------------------ # Description: Returns number of character state changes. #--------------------------------------------------------------------------- def getCharStateChanges( self): #Returns the number of character state changes return self.__charStateChanges #end of getCharStateChanges #---------------------------importLookUp------------------------------------ # Description: Imports the look-up file for assigning character state # changes and taxa names. #--------------------------------------------------------------------------- def importLookUp( self, path ): #Imports the look-up file for assigning character state changes and taxa names importFile = xlrd.open_workbook(path) file = importFile.sheet_by_index(0) values = list() #Local list for holding cell row information for row in range( 1, file.nrows): #Nested loops to cover entire spreadsheet for col in range( file.ncols ): #Creates a list of the scientific names, common names and character states for each fish in file if col == 0: fileName = file.cell_value(row, col) values.append(fileName) elif col == 1: scientificName = file.cell_value(row, col) values.append(scientificName) elif col == 2: commonName = file.cell_value(row, col) values.append(commonName) else: anadromous = int(file.cell_value(row, col)) values.append(anadromous) self.__anadromyLookUp[values[0]] = values[1:] values.clear() __numOfTaxa = len(self.__anadromyLookUp) #end importLookUp #-----------------------------showTree-------------------------------------- # Description: Displays tree in console and opens an external window to # interact with tree and see branch length. #--------------------------------------------------------------------------- def showTree(self): print( self.__tree.get_ascii(attributes=["name", "anadromy"], show_internal=True)) self.__tree.show() #end showTree #-----------------------------toString-------------------------------------- # Description: Prints to console number of taxa and their names, as well as # the number of character state changes. #--------------------------------------------------------------------------- def toString(self): if self.__tree == None or self.__charStateChanges == 0: return "\n****************Error****************\nTree not constructed, or maximum parsimony not yet run. Please run methods and try again." count = 0 asrInfo = "\n\t\tTaxa\n" for key in self.__anadromyLookUp: count += 1 asrInfo += str(count) + ": " + self.__anadromyLookUp[key][ self.scientificIndex] asrInfo += " (" + self.__anadromyLookUp[key][ self.commonIndex] + ")\n" asrInfo += "\nCharacter State Changes: " + str(self.__charStateChanges) return asrInfo #end toString #Private Methods #----------------------------__downPass------------------------------------- # Description: Private method to perform down-pass to assign character state # to tips and internal nodes. #--------------------------------------------------------------------------- def __downPass(self): for node in self.__tree.traverse("postorder"): #Check for internal nodes that have been visted - marked as "Ancestor" if node.name is "Ancestor": if not node.is_root(): if node.up.name is "": node.up.add_feature("anadromy", node.anadromy) node.up.name = "Ancestor" elif node.anadromy.issubset( node.up.anadromy) or node.anadromy.issuperset( node.up.anadromy): node.up.add_feature( "anadromy", node.up.anadromy.intersection(node.anadromy)) else: node.up.add_feature( "anadromy", node.up.anadromy.union(node.anadromy)) else: if node.name in self.__anadromyLookUp: isAnadromous = set( [self.__anadromyLookUp[node.name][self.stateIndex]]) node.add_feature("anadromy", isAnadromous) if node.up.name is "": #If the internal node is not yet named, it is unvisited node.up.add_feature("anadromy", isAnadromous) node.up.name = "Ancestor" #Tag internal nodes as Ancestor to easily identify visited nodes elif self.__anadromyLookUp[node.name][ self.stateIndex] in node.up.anadromy: node.up.add_feature( "anadromy", node.anadromy.intersection(node.up.anadromy)) else: node.up.add_feature( "anadromy", node.up.anadromy.union(node.anadromy)) node.name = self.__anadromyLookUp[node.name][self.commonIndex] #end __downPass #-----------------------------__upPass-------------------------------------- # Description: Private method to perform up-pass to clear any union in # ancestor nodes by sinding the intersection of the # ancestor and its parent node. #--------------------------------------------------------------------------- def __upPass(self): #Up-pass to clear any union in ancestor nodes for node in self.__tree.traverse("preorder"): if node.name is "Ancestor": if not node.is_root(): if len(node.anadromy) > 1: node.add_feature( "anadromy", node.anadromy.intersection(node.up.anadromy)) #end __upPass #----------------------__findCharStateChanges------------------------------- # Description: Private function to find the number of character states # changes in the tree. #--------------------------------------------------------------------------- def __findCharStateChanges(self): characterState = 0 for node in self.__tree.traverse("preorder"): if node.is_root(): characterState = next(iter(node.anadromy)) else: if not (characterState in node.anadromy): self.__charStateChanges += 1
print "num species in common ancestor: ", len(common_ancestor) print "score 1: ", len(species_in_tree), "\\", len( common_ancestor), "= ", calc_score1(common_ancestor, species_in_tree) print "******************************************************" (score, monophyly_nodes) = calc_score2(common_ancestor, species_in_tree) monos = [node for node in monophyly_nodes if not node.is_leaf()] max_node = monos[0] max_include = 0 for node in monos: if node.include > max_include: max_include = node.include max_node = node print "******************************************************" print "biggest monophyly group: ", len(max_node) print "score 2: ", len(max_node), "\\", len(species_in_tree), "= ", score print "******************************************************" print max_node a = [x.name for x in max_node.get_leaves()] out = tree.get_ascii(show_internal=True) with open("out", "w") as f: f.write(out) print tree.write(format=1, outfile="new_tree.nw") a = [x for x in common_ancestor.get_leaves() if x.name in species_in_tree] print a
import os, uuid from ete3 import Tree for file in os.listdir("/Users/David/Downloads/Chunks"): if file.endswith(".tre"): outname = "/Users/David/Downloads/Chunks/Chunks_90/" + str(file) t = Tree(file, format=0) print t.get_ascii(attributes=['support', 'name']) for node in t.get_descendants(): if not node.is_leaf() and node.support <= 0.9: node.delete() print t.get_ascii(attributes=['support', 'name']) t.write(format=0, outfile=outname)
def main(): random.seed() #Open the files trainingFile = open("data/training.txt","r") testFile = open("data/test.txt", "r") trainingExamples = [] testExamples = [] #Read eaxmaples from files for line in trainingFile: trainingExamples.append(line.split()) for line in testFile: testExamples.append(line.split()) #Convert 2 to 0 in examples and make the values integers for i in xrange(0, len(trainingExamples)): for j in xrange(0, len(trainingExamples[0])): if trainingExamples[i][j] == '1': trainingExamples[i][j] = 1 else: trainingExamples[i][j] = 0 for i in xrange(0, len(testExamples)): for j in xrange(0, len(testExamples[0])): if testExamples[i][j] == '1': testExamples[i][j] = 1 else: testExamples[i][j] = 0 #Create the attrbutes 0 to 6 attributes = [x for x in range(0, len(trainingExamples[0])-1)] #Create deep copy in order for training with random Importance random_attributes = copy.deepcopy(attributes) #Close files trainingFile.close() testFile.close() #Train two trees, one with regular Importance and one with random Importance tree = train(trainingExamples, attributes) random_tree = train(trainingExamples, random_attributes, True) #Test the trees accuracy = test(tree, testExamples) random_accuracy = test(random_tree, testExamples) print accuracy print random_accuracy #Visualise the trees s = print_tree(tree) s = s[:-1] s += ';' print s try: t = Tree(s, format=1) print t.get_ascii(show_internal=True) except NameError as e: pass r = print_tree(random_tree) r = r[:-1] r += ';' print r try: rt = Tree(r, format=1) print rt.get_ascii(show_internal=True) except NameError as e: pass
kmeans = KMeans(n_clusters=2, random_state=0, precompute_distances=True, tol=1e-10).fit(dis.values) kmeans.labels_ id2info = defaultdict(list) for idx, id in enumerate(dis.index): new_name = convert_genome_ID_rev(id.split('_')[0]) + '_' + id id2info[new_name] = [str(kmeans.labels_[idx])] from api_tools.itol_func import * text = to_binary_shape(id2info, {'1': {}, '0': {}}) with open('../itol_txt/separate_tmp.txt', 'w') as f1: f1.write(text) t = PhyloTree(intree) # t.set_outgroup(t.get_midpoint_outgroup()) t.set_species_naming_function( lambda node: convert_genome_ID_rev(node.name.split('_')[0])) print(t.get_ascii(attributes=["name", "species"], show_internal=False)) t2 = t.collapse_lineage_specific_expansions() ntrees, ndups, sptrees = t2.get_speciation_trees() sptrees = list(sptrees) print("Found %d species trees and %d duplication nodes" % (ntrees, ndups)) for spt in sptrees: print(len(spt.get_leaf_names()))
This function works with tree files in newick format ''' t = Tree(treefile) branchLenDist = [] for n in t.traverse(): if n.dist > 0: branchLenDist.append(n.dist) df = pd.DataFrame({'branchLen':branchLenDist}) df['dataset'] = os.path.basename(os.path.dirname(treefile)) return df if __name__ == '__main__': print("In order to run this script all files must have the same name and extension and they should be saved in directories that have the datasets name. Please see an example below") diagram = Tree("((----->treeFileName.treefile)----->dataset1Dir, (----->treeFileName.treefile)----->dataset2Dir, (----->treeFileName.treefile)----->dataset3Dir)rootDir;", format=1) print(diagram.get_ascii(show_internal=True)) rootDir = '/data/Suha/GTR_parameters_dist' #the rootDir name to the directories that contain the tree files treeFileName = 'branches.treefile' #the name of the tree file with .treefile extension (any newick format file can be used) branchLenFile = 'BranchLen.csv' #the name of the branch lengths output file with .csv extension proceed = input("do you want to proceed? Y/N\n") if proceed == 'Y': df = pd.DataFrame() for DirName, subdirList, fileList in os.walk(rootDir): if treeFileName in fileList: treeFile = os.path.join(DirName,treeFileName) df = df.append(branchLen(treeFile)) df.to_csv(os.path.join(rootDir, branchLenFile))
'--verbose', action='store_true', help=('Print information about the outgroup (if any) taxa to standard ' 'error')) args = parser.parse_args() tree = Tree(args.treeFile.read()) if args.outgroupRegex: from re import compile regex = compile(args.outgroupRegex) taxa = [leaf.name for leaf in tree.iter_leaves() if regex.match(leaf.name)] if taxa: ca = tree.get_common_ancestor(taxa) if args.verbose: print('Taxa for outgroup:', taxa, file=sys.stderr) print('Common ancestor:', ca.name, file=sys.stderr) print('Common ancestor is tree:', tree == ca, file=sys.stderr) if len(taxa) == 1: tree.set_outgroup(tree & taxa[0]) else: if ca == tree: tree.set_outgroup(tree.get_midpoint_outgroup()) else: tree.set_outgroup(tree.get_common_ancestor(taxa)) print(tree.get_ascii())
def phylogenetic_tree_to_cluster_format(tree, pairwise_estimates): """ Convert a phylogenetic tree to a 'cluster' data structure as in ``fastcluster``. The first two columns indicate the nodes that are joined by the relevant node, the third indicates the distance (calculated from branch lengths in the case of a phylogenetic tree) and the fourth the number of leaves underneath the node. Note that the trees are rooted using midpoint-rooting. Example of the data structure (output from ``fastcluster``):: [[ 3. 7. 4.26269776 2. ] [ 0. 5. 26.75703595 2. ] [ 2. 8. 56.16007598 2. ] [ 9. 12. 78.91813609 3. ] [ 1. 11. 87.91756528 3. ] [ 4. 6. 93.04790855 2. ] [ 14. 15. 114.71302639 5. ] [ 13. 16. 137.94616373 8. ] [ 10. 17. 157.29055403 10. ]] :param tree: newick tree file :param pairwise_estimates: pairwise Ks estimates data frame (pandas) (only the index is used) :return: clustering data structure, pairwise distances dictionary """ id_map = { pairwise_estimates.index[i]: i for i in range(len(pairwise_estimates)) } t = Tree(tree) # midpoint rooting midpoint = t.get_midpoint_outgroup() if not midpoint: # midpoint = None when their are only two leaves midpoint = list(t.get_leaves())[0] t.set_outgroup(midpoint) logging.debug('Tree after rooting:\n{}'.format(t.get_ascii())) # algorithm for getting cluster data structure n = len(id_map) out = [] pairwise_distances = {} for node in t.traverse('postorder'): if node.is_leaf(): node.name = id_map[node.name] id_map[node.name] = node.name # add identity map for renamed nodes # to id_map for line below pairwise_distances[node.name] = { id_map[x.name]: node.get_distance(x) for x in t.get_leaves() } else: node.name = n n += 1 children = node.get_children() out.append([ children[0].name, children[1].name, children[0].get_distance(children[1]), len(node.get_leaves()) ]) return np.array(out), pairwise_distances
node_cur.add_face(nameFace, column=1, position="branch-bottom") else: # for child #### search the parent node by parent_id node_cur = root.search_nodes(name=str(parent_id)) # there should be only one parent node if len(node_cur) == 1: #### set child with its id node_cur = node_cur[0].add_child(name=str(cell_id)) #### set duration node_cur.add_feature("dist", time_duration) # set node style node_cur.set_style(ns) # set node name to face nameFace = TextFace(node_cur.name) nameFace.fgcolor = "white" nameFace.fsize = 15 nameFace.background.color = "green" node_cur.add_face(nameFace, column=1, position="branch-bottom") else: raise RuntimeError("the cell id should be unique!") #node = root.search_nodes(name=str(5)) #node[0].add_feature("dist", 1.5) print root.get_ascii() root.show(tree_style=ts)
percstr[node.name] = str(sum_parent) + "%" for node in t2.traverse(): if not node.is_leaf() and node.level == 1: node.add_features(perc=percs.get(node.name, "none"), percstring=percstr.get(node.name, "none")) sum_title = 0 for child in t2.children: sum_title += child.perc sum_title = round(sum_title, 2) for node in t2.traverse(): if node.name == title: node.add_features(perc=sum_title, percstring=str(sum_title) + "%") print t2.get_ascii(attributes=["name", "percstring", "buff"]) ts = TreeStyle() # provide a list of layout functions, instead of a single one ts.layout_fn = [size_internal] ts.show_leaf_name = False ts.branch_vertical_margin = 10 ts.title.add_face(TextFace("Graphical Representation of " + title, fsize=20), column=0) ns = NodeStyle() ns["shape"] = "sphere" ns["size"] = 1 ns["fgcolor"] = "darkred"
def visualizeTree(postOrderStr): t = Tree(postOrderStr, format=8) t_ascii = t.get_ascii(show_internal=True) print(t_ascii)
class ASRTree: #Attributes __tree = None #Actual tree __sim_tree = None #Simulation tree ____transition_prob_anad = None __transition_prob_aqp3 = None __sim_effect_sizes = [] #List containing simulation effect sizes __p_value_count = 0 #Number of times an effect size is simulated => actual __effect_size = 0 #Actual effect size of model __num_of_branches = __num_anad = __num_aqp3 = __num_anad_and_aqp3 = __num_taxa = __p_value = 0 __anadromy_lookup = dict( ) #Dictionary matching FASTA file names (key) to a list of taxa names and character states SCIENTIFIC_INDEX = 0 COMMON_INDEX = 1 ANAD_INDEX = 2 AQP3_INDEX = 3 EPSILON = 0.00000000000000000001 #Number being added to anadromy/aqp3 variables to avoid division by 0 in effect size #Public Methods #--------------------------constructor-------------------------------------- # Description: Constructs ASTree and sets default value for tree, and creates # the 2D list for transition rate matrix, setting initial # values to 0. #--------------------------------------------------------------------------- def __init__(self): self.__tree = None self.____transition_prob_anad = [[0.0 for x in range(2)] for y in range(2)] self.__transition_prob_aqp3 = [[0.0 for x in range(2)] for y in range(2)] #end constructor #-----------------------------build_tree------------------------------------ # Description: Builds phylogenetic tree from newick tree file in RAxML result. #--------------------------------------------------------------------------- def build_tree(self, path): rax_file = open(path, "r") if rax_file.mode == "r": contents = rax_file.read() self.__tree = Tree(contents) print("\nRAxML tree imported successully.") else: print( "\nRAxML tree failed to import successfully. Please check the file path and try again." ) #end build_tree #-----------------------run_max_parsimony----------------------------------- # Description: Calls private functions for Fitch's algorithm of maximum # parsimony. #--------------------------------------------------------------------------- def run_max_parsimony( self ): #Calls private functions for Fitch's algorithm of maximum parsimony if self.__tree is None: print( "\n****************Error****************\nTree has not been imported. Please run build_tree method first." ) else: self.__tree.resolve_polytomy( ) #Transform tree to bifurcating - does nothing if already bifurcating self.__down_pass() self.__up_pass() self.__clean_tree() self.__find_char_states() self.__find_transition_prob() self.__effect_size = self.calc_effect_size(self.__num_anad + self.EPSILON,\ self.__num_aqp3 + self.EPSILON, self.__num_anad_and_aqp3 + self.EPSILON) #end run_max_parsimony #-----------------------------get_num_taxa---------------------------------- # Description: Returns number of taxa. #--------------------------------------------------------------------------- def get_num_taxa(self): return self.__num_taxa #end get_num_taxa #-----------------------------get_p_value----------------------------------- # Description: Returns the P-Value of the hypothesis test. #--------------------------------------------------------------------------- def get_p_value(self): return self.__p_value #end get_p_value #--------------------------import_lookup------------------------------------ # Description: Imports the look-up file for assigning character state # changes and taxa names. #--------------------------------------------------------------------------- def import_lookup( self, path ): #Imports the look-up file for assigning character state changes and taxa names import_file = xlrd.open_workbook(path) file = import_file.sheet_by_index(0) values = list() #Local list for holding cell row information for row in range( 1, file.nrows): #Nested loops to cover entire spreadsheet for col in range( file.ncols ): #Creates a list of the scientific names, common names and character states for each fish in file if col == 0: file_name = file.cell_value(row, col) values.append(file_name) elif col == 1: scientific_name = file.cell_value(row, col) values.append(scientific_name) elif col == 2: common_name = file.cell_value(row, col) values.append(common_name) elif col == 3: anadromous = int(file.cell_value(row, col)) values.append(anadromous) else: aqp3 = int(file.cell_value(row, col)) values.append(aqp3) self.__anadromy_lookup[values[0]] = values[1:] values.clear() __num_taxa = len(self.__anadromy_lookup) #end import_lookup #----------------------------show_tree-------------------------------------- # Description: Displays tree in console and opens an external window to # interact with tree and see branch length. #--------------------------------------------------------------------------- def show_tree(self): print( self.__tree.get_ascii(attributes=["name", "anadromy", "aqp3"], show_internal=True)) self.__tree.show() #end show_tree #----------------------------to_string-------------------------------------- # Description: Prints to console number of taxa and their names, as well as # the number of character state changes. #--------------------------------------------------------------------------- def to_string(self): if self.__tree == None or self.__effect_size == 0: return "\n****************Error****************\nTree not constructed,\ or maximum parsimony not yet run. Please run methods and try again." count = 0 asr_info = "\n\t\tTaxa\n" for key in self.__anadromy_lookup: count += 1 asr_info += str(count) + ": " + self.__anadromy_lookup[key][ self.SCIENTIFIC_INDEX] asr_info += " (" + self.__anadromy_lookup[key][ self.COMMON_INDEX] + ")\n" asr_info += "\nAnadromy Character State Changes: " + str( self.__num_anad) asr_info += "\nAQP3 Character State Changes: " + str(self.__num_aqp3) return asr_info #end to_string #------------------------calc_effect_size----------------------------------- # Description: Public method that calculates the effect size of the ASRTree. #--------------------------------------------------------------------------- def calc_effect_size(self, numOfAnad, numOfAqp3, numAnadAndAqp3): effect_size = ((numAnadAndAqp3 / self.__num_of_branches) / ((numOfAnad / self.__num_of_branches) * (numOfAqp3 / self.__num_of_branches))) return effect_size #end calc_effect_size #-------------------------monte_carlo_sim----------------------------------- # Description: Public method to run n number of Monte Carlo simulations # in order to test the hypothesis. Each simulation checks # the ancestral node in the tree, then refers to the transition # rate matrix for the probability of getting the same or a # different character state. #--------------------------------------------------------------------------- def monte_carlo_sim(self, num_sims): #Checks if there already is a simulation tree to avoid unncessary copies self.__p_value_count = 0 #Initialize back to 0 self.__sim_effect_sizes.clear() #Initialize back to empty if self.__sim_tree is None: self.__sim_tree = self.__tree.copy() for sim in range(num_sims): #Set values of each count back to the EPSILON value to avoid #division by 0 in the effect size aqp3_count = self.EPSILON anad_count = self.EPSILON anad_aqp3_count = self.EPSILON for node in self.__sim_tree.traverse("preorder"): rand_num_1 = random.randint(0, 1001) rand_num_2 = random.randint(0, 1001) if not node.is_root(): #Check each ancestor's character state, and roll a random #number against the probability of going from that state to #the same or a different state based on transition matrix #and assign that character state. Tally all gains if node.up.anadromy == 1: if (self.____transition_prob_anad[1][0] * 1000) > rand_num_1: node.add_feature("anadromy", 0) else: node.add_feature("anadromy", 1) anad_count += 1 else: if (self.____transition_prob_anad[0][1] * 1000) < rand_num_1: node.add_feature("anadromy", 0) else: node.add_feature("anadromy", 1) anad_count += 1 if node.up.aqp3 == 1: if (self.__transition_prob_aqp3[1][0] * 1000) > rand_num_2: node.add_feature("aqp3", 0) else: node.add_feature("aqp3", 1) aqp3_count += 1 else: if (self.__transition_prob_aqp3[0][1] * 1000) < rand_num_2: node.add_feature("aqp3", 0) else: node.add_feature("aqp3", 1) aqp3_count += 1 if node.anadromy == 1 and node.aqp3 == 1: anad_aqp3_count += 1 #Calculate the effect size and store the results. eff_size = self.calc_effect_size(anad_count, aqp3_count, anad_aqp3_count) self.__sim_effect_sizes.append(eff_size) if eff_size >= self.__effect_size: self.__p_value_count += 1 self.__p_value = (self.__p_value_count / num_sims ) #Calculate and store p-value #end monte_carlo_sim #--------------------------plot_histogram----------------------------------- # Description: Public method to plot the histogram for testing the null # hypothesis. #--------------------------------------------------------------------------- def plot_histogram(self): plt.style.use('seaborn') _ = plt.hist(self.__sim_effect_sizes, bins=100) plt.axvline(self.__effect_size, color='k', linestyle='dashed', linewidth=1) plt.text(self.__effect_size + .05, 200, ' Actual Effect Size:{:.3f}'.format(self.__effect_size)) plt.xlabel('Effect Size') plt.ylabel('Effect Frequency') plt.title('Monte Carlo Simulation Distribution') plt.show() #end plot_histogram #--------------------__find_transition_prob--------------------------------- # Description: Private method that determines the transition probability # of each character trait change. #--------------------------------------------------------------------------- def __find_transition_prob(self): #Establish counter variables and traverse tree zero_to_one_anad = zero_to_zero_anad = one_to_zero_anad = one_to_one_anad = 0.0 zero_to_one_aqp3 = zero_to_zero_aqp3 = one_to_zero_aqp3 = one_to_one_aqp3 = 0.0 for node in self.__tree.traverse("postorder"): if not node.is_root(): #Find Anadromy transitions if (node.up.anadromy is 0 and node.anadromy is 0): zero_to_zero_anad += 1 elif (node.up.anadromy is 0 and node.anadromy is 1): zero_to_one_anad += 1 elif (node.up.anadromy is 1 and node.anadromy is 0): one_to_zero_anad += 1 else: one_to_one_anad += 1 #Find AQP3 transitions if (node.up.aqp3 is 0 and node.aqp3 is 0): zero_to_zero_aqp3 += 1 elif (node.up.aqp3 is 0 and node.aqp3 is 1): zero_to_one_aqp3 += 1 elif (node.up.aqp3 is 1 and node.aqp3 is 0): one_to_zero_aqp3 += 1 else: one_to_one_aqp3 += 1 #Insert the probability into the appropriate matrix self.____transition_prob_anad[0][0] = (zero_to_zero_anad / self.__num_of_branches) self.____transition_prob_anad[0][1] = (zero_to_one_anad / self.__num_of_branches) self.____transition_prob_anad[1][1] = (one_to_one_anad / self.__num_of_branches) self.____transition_prob_anad[1][0] = (one_to_zero_anad / self.__num_of_branches) self.__transition_prob_aqp3[0][0] = (zero_to_zero_aqp3 / self.__num_of_branches) self.__transition_prob_aqp3[0][1] = (zero_to_one_aqp3 / self.__num_of_branches) self.__transition_prob_aqp3[1][1] = (one_to_one_aqp3 / self.__num_of_branches) self.__transition_prob_aqp3[1][0] = (one_to_zero_aqp3 / self.__num_of_branches) #end findTransitionProb #Private Methods #---------------------------__down_pass------------------------------------- # Description: Private method to perform down-pass to assign character state # to tips and internal nodes. #--------------------------------------------------------------------------- def __down_pass(self): for node in self.__tree.traverse("postorder"): #Check for internal nodes that have been visted - marked as "Ancestor" if node.name is "Ancestor": if not node.is_root(): #If the parent node of the current ancestor node is unvisited, #attach the character state of this node to its ancestor if node.up.name is "": node.up.add_feature("anadromy", node.anadromy) node.up.add_feature("aqp3", node.aqp3) node.up.name = "Ancestor" #If the node has an intersection with its ancestor, set it if node.aqp3.issubset( node.up.aqp3) or node.aqp3.issuperset( node.up.aqp3): node.up.add_feature( "aqp3", node.up.aqp3.intersection(node.aqp3)) else: #Otherwise, it's a union of two states node.up.add_feature("aqp3", node.up.aqp3.union(node.aqp3)) #If the node has an intersection with its ancestor, set it if node.anadromy.issubset( node.up.anadromy) or node.anadromy.issuperset( node.up.anadromy): node.up.add_feature( "anadromy", node.up.anadromy.intersection(node.anadromy)) else: #Otherwise, it's a union of two states node.up.add_feature( "anadromy", node.up.anadromy.union(node.anadromy)) else: #Otherwise, it could be an unnamed internal node, or a terminal node #If it's a terminal node, grab its states from the lookup if node.name in self.__anadromy_lookup: isAnadromous = set( [self.__anadromy_lookup[node.name][self.ANAD_INDEX]]) isAqp3 = set( [self.__anadromy_lookup[node.name][self.AQP3_INDEX]]) node.add_feature("anadromy", isAnadromous) node.add_feature("aqp3", isAqp3) if node.up.name is "": #If the internal node is not yet named, it is unvisited node.up.add_feature("anadromy", isAnadromous) node.up.add_feature("aqp3", isAqp3) node.up.name = "Ancestor" #Tag internal nodes as Ancestor to easily identify visited nodes if self.__anadromy_lookup[node.name][ self.AQP3_INDEX] in node.up.aqp3: node.up.add_feature( "aqp3", node.aqp3.intersection(node.up.aqp3)) else: node.up.add_feature("aqp3", node.up.aqp3.union(node.aqp3)) if self.__anadromy_lookup[node.name][ self.ANAD_INDEX] in node.up.anadromy: node.up.add_feature( "anadromy", node.anadromy.intersection(node.up.anadromy)) else: node.up.add_feature( "anadromy", node.up.anadromy.union(node.anadromy)) node.name = self.__anadromy_lookup[node.name][ self.COMMON_INDEX] #end __down_pass #----------------------------__up_pass-------------------------------------- # Description: Private method to perform up-pass to clear any union in # ancestor nodes by sinding the intersection of the # ancestor and its parent node. #--------------------------------------------------------------------------- def __up_pass(self): #Up-pass to clear any union in ancestor nodes for node in self.__tree.traverse("preorder"): if node.name is "Ancestor": if not node.is_root(): if len(node.anadromy) > 1: node.add_feature( "anadromy", node.anadromy.intersection(node.up.anadromy)) if len(node.aqp3) > 1: node.add_feature("aqp3", node.aqp3.intersection(node.up.aqp3)) #end __up_pass #--------------------------__clean_tree------------------------------------- # Description: Private function to clear the sets in the attributes for # anadromy and AQP3 in each node and turn them into integers. #--------------------------------------------------------------------------- def __clean_tree(self): for node in self.__tree.traverse("preorder"): character_state_anad = next(iter(node.anadromy)) character_state_aqp3 = next(iter(node.aqp3)) node.add_feature("anadromy", character_state_anad) node.add_feature("aqp3", character_state_aqp3) #end __clean_tree #-------------------------__find_char_states--------------------------------- # Description: Private function to find the number of branches, as well as # find the number of character states - both individual and # branches with both andromy and AQP3. #--------------------------------------------------------------------------- def __find_char_states(self): for node in self.__tree.traverse("preorder"): self.__num_of_branches += 1 if node.anadromy == 1 and node.aqp3 == 1: self.__num_anad_and_aqp3 += 1 if node.anadromy == 1: self.__num_anad += 1 if node.aqp3 == 1: self.__num_aqp3 += 1 self.__num_of_branches -= 1 #Not counting the root as a separate branch
import numpy as np from setup_gloome_param_files import isIndDict, genotype_dict, geneDict, good_indices import pandas as pd from uniqify import uniqify from unlistify import unlistify from copy import copy import pickle import re # Getting the tree with internal nodes from gainLoss' ancestral reconstruction. # Internal nodes are labeled with 'Nx' where x is a number, the root being # '[N1]' and then the numbers increase. ancTree_njs16 = Tree('full_proks_gainLoss_results/TheTree.INodes.ph', format=1) # To print the tree. print(ancTree_njs16.get_ascii(show_internal=True)) # Getting the root of the tree. root = ancTree_njs16 & '[N1]' nodes = list(root.traverse()) # Getting the original traits in a trait dict. for orgName in isIndDict: (ancTree_njs16 & orgName).add_feature('isInd', isIndDict[orgName]) # Writing the markNode recursive function. def markNode(tree, node): children = node.children # Checking if all children marked.
from ete3 import Tree x = Tree("(((D:3,(E:3)F:2)C:2)B:4)A;", format=1) print(x.get_ascii(show_internal = True))
def Tree_analysis(tree,tabla,out,analysis_type,out2): ###Al subsequents variables could be modified binomial_value = float(0.05) #Default value for the option 2 of the core evaluation method for the tree p_value = float(0.05) #p-value threeshold for the binomial method (2 method) percentage = float(0.9) #Minimun percentage threeshold of subjects requiered to defined a core taxo_p = float(0.9) #Minimun percentage of the same taxonomic group within all OTUs contained into the same Node output_file=open(out, 'w') output_file_2=open(out2, 'w') tree = Tree(tree, quoted_node_names=True, format=1) #Here we load the 97_otus tree table = {} cont = 1 for line in open(tabla): if (line.startswith('#')): output_file_2.write(str(line)) else: fields = list(map(str.strip, line.split('\t'))) #We create a dictionary with all the keys and values of the OTU table against reference table[fields[0]] = list(map(float, fields[1:-1])) table2 = {} for line in open(tabla): if (line.startswith('#')): continue else: fields2 = list(map(str.strip, line.split('\t'))) #Here we load a dictionary with the taxonomy information from the picked OTUs table2[fields2[0]] = list(map(str, fields2[(len(fields2)-1):len(fields2)])) table_final_res = [0] * len(fields[1:-1]) table_final_res = ([float(i) for i in table_final_res]) sum_abun_rela = 0 cores = 0 for leaf in tree: if leaf.name not in table: leaf.vector = None else: leaf.vector = table[leaf.name] #Create value vectors for each of the tree tips of the tree with the values of the OTU table previously generated node2content = tree.get_cached_content() flag=0 for node in tree.traverse(): #This loop is used to add values into de vectors created before if not node.is_leaf(): leaf_vectors = np.array([leaf.vector for leaf in node2content[node] if leaf.vector is not None]) node.vector = leaf_vectors.sum(axis=0) if(flag == 0): save_node1=node.vector total_saved_leaves = np.array([leaf.name for leaf in node2content[node]]) flag=1 if(analysis_type==4): #This method only prints the information of the tree, only for information of the tree purpouse print(tree.get_ascii(show_internal=True)) output_file.write(tree.get_ascii(show_internal=True) + '\n' + '\n') for node in tree.traverse("preorder"): print (node.name, node.vector) output_file.write(node.name + '\t' + str(node.vector) + '\n') if(analysis_type!=4): output_file.write("Core" + '\t' + "Prevalence" + '\t' + "Abundance" + '\t' + "Relative abundances" + '\t' + "Min" + '\t' + "Max" + '\t' + "Average" + '\t' + "SD" + '\t' + "Leaves" + '\t' + "Taxonomy" + '\t' + "Leaves number" + '\n') if(analysis_type==1 or analysis_type==2 or analysis_type==3): #Here we evaluate the tree traversally using one of the choosen methods: 100% core, binomial or percentage for node in tree.traverse("postorder"): tot_cont=np.count_nonzero(node.vector) #Count the number ob subjects in this study with one ore more ocurrence in the vector for a certain node tot_cont2=np.asarray(node.vector).size #Count the total vector array size a=stats.binom_test(tot_cont, n=tot_cont2, p=binomial_value, alternative='greater') #Binomial test that uses the binomial_value rela=(tot_cont/tot_cont2) if(analysis_type==1 and np.all(node.vector) or (analysis_type==2 and a <= p_value) or (analysis_type==3 and rela >= percentage)): #Depending on the method used to go through the tree, we will evaluate different parameters to check if the node should be or not taken into account node.vector=([float(i) for i in node.vector]) #Transform all the values contained in node.vector to float, to perform operations efficiently abundance=node.vector/save_node1 #Relative abundance of each subject in the node over the terminal node (sum of all nodes) abundance =([float(i) for i in abundance]) mean_abun=np.mean([float(i) for i in abundance]) #Mean abundance of the node std_abun=np.std([float(i) for i in abundance]) #Standard deviation of the node abundance_rela=sum(node.vector)/sum(save_node1) #Global relative abundance of the node over the terminal node table_final_res=list(map(sum, zip(table_final_res, abundance))) #Getting all the results for each node into a final result table sum_abun_rela=sum_abun_rela+abundance_rela #The sum of all global relative abundance cores=cores+1 #Total number of cores node2content = tree.get_cached_content() output_file_2.write(str(node.name) + '\t') for x in range(len(abundance)): output_file_2.write(str(abundance[x]) + '\t'), output_file_2.write('\n') output_file.write(node.name + '\t' + str(rela) + '\t' + str(node.vector) + '\t' + str(abundance) + '\t' + str(min(abundance)) + '\t' + str(max(abundance)) + '\t' + str(mean_abun) + '\t' + str(std_abun) + '\t') conteo_hojas=nodes_eval(node,tree,output_file,table2,taxo_p,total_saved_leaves) #With this line we can assign a taxonomy to each node based in the taxonomy of each OTU, dependig on the minimun taxonomy percentage level stablished before output_file.write(str(conteo_hojas) + '\n') #Print the total number of leaves of this node tree=erase_node(node,tree) #Once a node has been evaluated, this line erase that node from the tree to simplify the calculations of the next nodes G = tree.search_nodes(name=node.name)[0] removed_node = G.detach() output_file.write(str(cores) + '\t' + '\t' + '\t' + str(table_final_res) + '\t' + str(min(table_final_res)) + '\t' + str(max(table_final_res)) + '\t' + str(np.mean([float(i) for i in table_final_res])) + '\t' + str(np.std([float(i) for i in table_final_res])) + '\n')
a.add_features(active=True) dictTree[nameNode] = a print(a) #Exemple de merge nameNode = 'd' dictPos[nameNode] = 1 #nameList = ['a','d'] noeud = Tree() #a = t.add_child(name=nameNode) noeud.add_child(dictTree[nameList[1]]) noeud.add_child(dictTree[nameList[2]]) noeud.add_features(name=nameNode) dictTree[nameNode] = noeud test = dictTree[nameNode] print(test.get_ascii(show_internal=True)) print(noeud.get_ascii(show_internal=True)) print(dictPos) print(dictTree) for node in t1: if node.is_root(): print("hello") #if not node.is_leaf(): #innerbranch.append(node) #print (node) #for leaf in t1: #print (leaf.name) #print(t1.get_tree_root())
"time":time.strftime("%H:%M:%S")} data[service]["user_input"] = userinput data[service]["run_info"] = run_info data[service]["results"] = summary_cont result_file = os.path.join(outdir, "data.json") with open(result_file, "w") as outfile: json.dump(data, outfile) # Create tree if neighbor parameter was sat if args.nj_path: # Check that more than 2 + header samples are included in the analysis if len(allel_output) > 3: python_path = sys.executable tree_script = os.path.join(os.path.dirname(__file__), "make_nj_tree.py") cmd = "{} {} -i {} -o {} -n {}".format(python_path, tree_script, allele_matrix, outdir, args.nj_path) proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() out = out.decode("utf-8") err = err.decode("utf-8") if proc.returncode != 0: eprint("No neighbor joining tree was created. The neighbor program responded with this: {}".format(err)) else: # print newick tr = Tree("{}/allele_tree.newick".format(outdir)) print(tr.get_ascii())
else: # for child #### search the parent node by parent_id node_cur = root.search_nodes(name=str(parent_id)) # there should be only one parent node if len(node_cur) == 1: #### set child with its id node_cur = node_cur[0].add_child(name=str(cell_id)) #### set duration node_cur.add_feature("dist", time_duration) # set node style node_cur.set_style(ns) # set node name to face nameFace = TextFace(node_cur.name) nameFace.fgcolor = "white" nameFace.fsize = 15 nameFace.background.color = "green" node_cur.add_face(nameFace, column=1, position="branch-bottom") else: raise RuntimeError("the cell id should be unique!") #node = root.search_nodes(name=str(5)) #node[0].add_feature("dist", 1.5) print root.get_ascii() root.show(tree_style=ts)
def run(args): if args.text_mode: from ete3 import Tree for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = Tree(nw) else: t = Tree(tfile) print( t.get_ascii(show_internal=args.show_internal_names, attributes=args.show_attributes)) return import random import re import colorsys from collections import defaultdict from ete3 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle, add_face_to_node, random_color) global FACES if args.face: FACES = parse_faces(args.face) else: FACES = [] # VISUALIZATION ts = TreeStyle() ts.mode = args.mode ts.show_leaf_name = True ts.tree_width = args.tree_width for f in FACES: if f["value"] == "@name": ts.show_leaf_name = False break if args.as_ncbi: ts.show_leaf_name = False FACES.extend( parse_faces([ 'value:@sci_name, size:10, fstyle:italic', 'value:@taxid, color:grey, size:6, format:" - %s"', 'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal', 'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal', ])) if args.alg: FACES.extend( parse_faces([ 'value:@sequence, size:10, pos:aligned, ftype:%s' % args.alg_type ])) if args.heatmap: FACES.extend( parse_faces(['value:@name, size:10, pos:aligned, ftype:heatmap'])) if args.bubbles: for bubble in args.bubbles: FACES.extend( parse_faces([ 'value:@%s, pos:float, ftype:bubble, opacity:0.4' % bubble, ])) ts.branch_vertical_margin = args.branch_separation if args.show_support: ts.show_branch_support = True if args.show_branch_length: ts.show_branch_length = True if args.force_topology: ts.force_topology = True ts.layout_fn = lambda x: None for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = PhyloTree(nw) else: t = PhyloTree(tfile) if args.alg: t.link_to_alignment(args.alg, alg_format=args.alg_format) if args.heatmap: DEFAULT_COLOR_SATURATION = 0.3 BASE_LIGHTNESS = 0.7 def gradient_color(value, max_value, saturation=0.5, hue=0.1): def rgb2hex(rgb): return '#%02x%02x%02x' % rgb def hls2hex(h, l, s): return rgb2hex( tuple([ int(x * 255) for x in colorsys.hls_to_rgb(h, l, s) ])) lightness = 1 - (value * BASE_LIGHTNESS) / max_value return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION) heatmap_data = {} max_value, min_value = None, None for line in open(args.heatmap): if line.startswith('#COLNAMES'): pass elif line.startswith('#') or not line.strip(): pass else: fields = line.split('\t') name = fields[0].strip() values = [float(x) if x else None for x in fields[1:]] maxv = max(values) minv = min(values) if max_value is None or maxv > max_value: max_value = maxv if min_value is None or minv < min_value: min_value = minv heatmap_data[name] = values heatmap_center_value = 0 heatmap_color_center = "white" heatmap_color_up = 0.3 heatmap_color_down = 0.7 heatmap_color_missing = "black" heatmap_max_value = abs(heatmap_center_value - max_value) heatmap_min_value = abs(heatmap_center_value - min_value) if heatmap_center_value <= min_value: heatmap_max_value = heatmap_min_value + heatmap_max_value else: heatmap_max_value = max(heatmap_min_value, heatmap_max_value) # scale the tree if not args.height: args.height = None if not args.width: args.width = None f2color = {} f2last_seed = {} for node in t.traverse(): node.img_style['size'] = 0 if len(node.children) == 1: node.img_style['size'] = 2 node.img_style['shape'] = "square" node.img_style['fgcolor'] = "steelblue" ftype_pos = defaultdict(int) for findex, f in enumerate(FACES): if (f['nodetype'] == 'any' or (f['nodetype'] == 'leaf' and node.is_leaf()) or (f['nodetype'] == 'internal' and not node.is_leaf())): # if node passes face filters if node_matcher(node, f["filters"]): if f["value"].startswith("@"): fvalue = getattr(node, f["value"][1:], None) else: fvalue = f["value"] # if node's attribute has content, generate face if fvalue is not None: fsize = f["size"] fbgcolor = f["bgcolor"] fcolor = f['color'] if fcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr: color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr( node, color_keyattr, None) last_seed = f2last_seed.setdefault( color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform( 0.1, 0.2) f2last_seed[color_keyattr] = seed fcolor = f2color.setdefault( color_bin, random_color(h=seed)) if fbgcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fbgcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr: color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr( node, color_keyattr, None) last_seed = f2last_seed.setdefault( color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform( 0.1, 0.2) f2last_seed[color_keyattr] = seed fbgcolor = f2color.setdefault( color_bin, random_color(h=seed)) if f["ftype"] == "text": if f.get("format", None): fvalue = f["format"] % fvalue F = TextFace(fvalue, fsize=fsize, fgcolor=fcolor or "black", fstyle=f.get('fstyle', None)) elif f["ftype"] == "fullseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="seq", seqtail_format="seq", height=fsize) elif f["ftype"] == "compactseq": F = faces.SeqMotifFace( seq=fvalue, seq_format="compactseq", seqtail_format="compactseq", height=fsize) elif f["ftype"] == "blockseq": F = faces.SeqMotifFace( seq=fvalue, seq_format="blockseq", seqtail_format="blockseq", height=fsize, fgcolor=fcolor or "slategrey", bgcolor=fbgcolor or "slategrey", scale_factor=1.0) fbgcolor = None elif f["ftype"] == "bubble": try: v = float(fvalue) except ValueError: rad = fsize else: rad = fsize * v F = faces.CircleFace(radius=rad, style="sphere", color=fcolor or "steelblue") elif f["ftype"] == "heatmap": if not f['column']: col = ftype_pos[f["pos"]] else: col = f["column"] for i, value in enumerate( heatmap_data.get(node.name, [])): ftype_pos[f["pos"]] += 1 if value is None: color = heatmap_color_missing elif value > heatmap_center_value: color = gradient_color( abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up) elif value < heatmap_center_value: color = gradient_color( abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down) else: color = heatmap_color_center node.add_face(RectFace( 20, 20, color, color), position="aligned", column=col + i) # Add header # for i, name in enumerate(header): # nameF = TextFace(name, fsize=7) # nameF.rotation = -90 # tree_style.aligned_header.add_face(nameF, column=i) F = None elif f["ftype"] == "profile": # internal profiles? F = None elif f["ftype"] == "barchart": F = None elif f["ftype"] == "piechart": F = None # Add the Face if F: F.opacity = f['opacity'] or 1.0 # Set face general attributes if fbgcolor: F.background.color = fbgcolor if not f['column']: col = ftype_pos[f["pos"]] ftype_pos[f["pos"]] += 1 else: col = f["column"] node.add_face(F, column=col, position=f["pos"]) if args.image: t.render("t%d.%s" % (tindex, args.image), tree_style=ts, w=args.width, h=args.height, units=args.size_units) else: t.show(None, tree_style=ts)
from ete3 import Tree t = Tree('((((H,K)D,(F,I)G)B,E)A,((L,(N,Q)O)J,(P,S)M)C)X;', format=1) print t.get_ascii(show_internal=True) #print rooted_tree
#%% # Loads a tree with internal node names t = Tree("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;", format=1) #%% #adddint features (e.g. percentages) from ete3 import Tree t = Tree("((((((4, e), i), o),h), u), ((3, 4), (i, june)));") # we annotate the tree using external data colors = {"a":"red", "e":"green", "i":"yellow", "o":"black", "u":"purple", "4":"green", "3":"yellow", "1":"white", "5":"red", "june":"yellow"} for leaf in t: leaf.add_features(color=colors.get(leaf.name, "none")) print(t.get_ascii(attributes=["name", "color"], show_internal=False)) print("Green-yellow clusters:") # And obtain clusters exclusively green and yellow for node in t.get_monophyletic(values=["green", "yellow"], target_attr="color"): print(node.get_ascii(attributes=["color", "name"], show_internal=False)) #%% #finding and saving nodes by their names C= t&"C" H= t&"H" I= t&"I"