def createImg(filename, thres=0, samples=1): count = parseLineage(filename) suffix, matrix, taxo = getSuffixandMatrixandNewick(count,thres,samples) newick = convert(taxo,suffix) newick += ';' t = Tree(newick, format=1) ct = ClusterTree(t.write(), text_array=matrix) addColors(ct) # nodes are linked to the array table array = ct.arraytable # Calculates some stats on the matrix. Needed to establish the color gradients. matrix_dist = [i for r in xrange(len(array.matrix))for i in array.matrix[r] if np.isfinite(i)] matrix_max = np.max(matrix_dist) matrix_min = np.min(matrix_dist) matrix_avg = (matrix_max+matrix_min)/2 # Creates a profile face that will represent node's profile as a heatmap profileFace = ProfileFace(matrix_max, matrix_min, matrix_avg, 200, 14, "heatmap",colorscheme=3) # Creates my own layout function that uses previous faces def mylayout(node): # If node is a leaf if node.is_leaf(): # And a line profile add_face_to_node(profileFace, node, 0, aligned=True) node.img_style["size"]=2 # Use my layout to visualize the tree ts = TreeStyle() ts.layout_fn = mylayout # ct.show(tree_style=ts) filedir = '/'.join(filename.split('/')[:-1]) # t.write(format=9, outfile="output/newick/"+param+".nw") ct.render(filedir+'/phylo.png',tree_style=ts)
def createSampleData(self): # hardcoded data for now tree = ClusterTree('(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);') leaves = tree.get_leaf_names() idx_dict = {'A':0,'B':1,'C':2,'D':3} idx_labels = sorted(idx_dict, key=idx_dict.get) dmat = np.zeros((4,4)) for l1,l2 in combinations(leaves,2): d = tree.get_distance(l1,l2) dmat[idx_dict[l1],idx_dict[l2]] = dmat[idx_dict[l2],idx_dict[l1]] = d self.X = dmat
def TreeStructureProcessing(inputfile): start_time = time.clock() global array global singleton_clust global total_protein global clustid #files = glob.glob('./NWK_TO_CLUST/'+fname+'/*') #for fi in files: # os.remove(fi) # fa="./NWK_TO_CLUST/Result_Analysis/Cluster_Statistics_"+fname+".txt" #import os.path # if os.path.exists(fa): # os.remove(fa) t = ClusterTree( "./OUTPUT/%s/NWK/HIER_CLUST_%s.nwk" % ((inputfile.split("_")[0]).upper(), inputfile.split(".")[0])) # t = ClusterTree("./NWK/"+fname+"/UPROT_HIER_CLUST_"+fname+".nwk")#,format=1) #array = t.arraytable #------------------------------------------------------------------- Traverse_Cluster_Tree(t, (inputfile.split("_")[0]).upper()) print("-------------------------------") print("Total clusters = %s" % (clustid)) print("Singleton clusters = %s" % (singleton_clust)) print("Non-Singleton clusters = %s" % (clustid - singleton_clust)) print("Total Proteins = %s" % (total_protein)) print("---------------completed----------------") print("Time Required For Tree Partitionning:= %s seconds" % (datetime.timedelta(time.clock()) - datetime.timedelta(start_time)))
def extract_leaf_labels_newick(self, user_input_file): tree = ClusterTree(user_input_file) leaves = tree.get_leaf_names() leaf_dict = {} # Convert leaves (a list) into a dictionary for i in range(len(leaves)): leaf_dict[leaves[i]] = i i = i + 1 # Cast dictionary attributes as list and create index labels k = list(leaf_dict.keys()) v = list(leaf_dict.values()) w = list(leaf_dict.items()) leaf_labels = [k[v.index(j)] for j in range(0, len(w))] return leaf_labels
def newick(self, user_input_file): tree = ClusterTree(user_input_file) leaves = tree.get_leaf_names() ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True leaf_dict = {} # Convert leaves (a list) into a dictionary for i in range(len(leaves)): leaf_dict[leaves[i]] = i i = i + 1 # Cast dictionary attributes as list and create index labels k = list(leaf_dict.keys()) v = list(leaf_dict.values()) w = list(leaf_dict.items()) leaf_labels = [k[v.index(j)] for j in range(0, len(w))] # Create a numpy array of zeros based on the number of taxa in the tree dmat = np.zeros((len(leaves), len(leaves))) print('Converting input tree:') # Compute distance matrix from newick tree (this is not yet a linked distance matrix) for l1, l2 in tqdm(combinations(leaves, 2)): d = tree.get_distance(l1, l2) dmat[leaf_dict[l1], leaf_dict[l2]] = dmat[leaf_dict[l2], leaf_dict[l1]] = d # Convert dmat into a linkage distance matrix for scipy schlink = sch.linkage(scipy.spatial.distance.squareform(dmat), method='average', metric='euclidean') return dmat, schlink
def NodeFinder(phylogeny,NodeLen,HGTRate,Output): from ete3 import ClusterTree t=ClusterTree(phylogeny) TreeDict={} for node in t.traverse(): TreeDict[node]=[] for leaf in node: TreeDict[node].append(leaf.name) #Selecting nodes with leaf size of selected size Iter=0 NodeList=[] TreeSelect={} for node in TreeDict: if len(TreeDict[node])>= int(NodeLen): NodeList.append(Iter) TreeSelect[Iter]=TreeDict[node] Iter+=1 #Randomly selecting nodes without repalcement, #if number of eligible nodes are less than HGTRate, Randomly choose with replacement import numpy as np if int(options.HGTRate)<=len(NodeList): NodeSelect=np.random.choice(NodeList,int(HGTRate),replace=False) elif int(options.HGTRate)>len(NodeList): NodeSelect=np.random.choice(NodeList,int(HGTRate),replace=True) NodeFinal={} for node in NodeSelect: NodeFinal[str(node)]=TreeSelect[node] #Return Selected Node List print(NodeFinal) import json with open(str(Output), 'w') as f: json.dump(NodeFinal, f)
def newick_to_linkage(filePath): """ converts newick tree to scipy linkage matrix """ tree = ClusterTree(filePath) leaves = tree.get_leaf_names() ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True idx_dict = {} idx = 0 for leaf in leaves: idx_dict[leaf] = idx idx += 1 idx_labels = [idx_dict.keys()[idx_dict.values().index(i)] for i in range(len(idx_dict))] dmat = np.zeros((len(leaves), len(leaves))) # FIXME need to understand for leaf1, leaf2 in combinations(leaves, 2): d = tree.get_distance(leaf1, leaf2) dmat[idx_dict[leaf1], idx_dict[leaf2]] = dmat[idx_dict[leaf2], idx_dict[leaf1]] = d schlink = sch.linkage(scipy.spatial.distance.squareform(dmat),method='average',metric='euclidean')
def showTreeStructure(inputfile): t = ClusterTree( "./OUTPUT/%s/NWK/HIER_CLUST_%s.nwk" % ((inputfile.split("_")[0]).upper(), inputfile.split(".")[0])) def mylayout(node): global flag #print node.left # print "node",len(node ) #if len(node)!=1:# and flag == 1: # if flag == 1: # pidlist= getpid(node) #print pidlist # dcs=DoCS(pidlist,PRO_pfam) #print dcs # if dcs>=0.5: # flag=0 # print pidlist,len(node),dcs # print "_____________________" if node.is_leaf(): node.img_style["shape"] = "sphere" node.img_style["size"] = 10 node.img_style["fgcolor"] = 'purple' t.dist = 0 t.convert_to_ultrametric(1, strategy="fixed") ts = TreeStyle() ts.scale = 230 ts.root_opening_factor = 5.0 ts.mode = "c" ts.layout_fn = mylayout #print mylayout t.show(tree_style=ts) print("--------------------------------") print("completed")
A\t-1.23\t-0.81\t1.79\t0.78\t-0.42\t-0.69\t0.58 B\t-1.76\t-0.94\t1.16\t0.36\t0.41\t-0.35\t1.12 C\t-2.19\t0.13\t0.65\t-0.51\t0.52\t1.04\t0.36 D\t-1.22\t-0.98\t0.79\t-0.76\t-0.29\t1.54\t0.93 E\t-1.47\t-0.83\t0.85\t0.07\t-0.81\t1.53\t0.65 F\t-1.04\t-1.11\t0.87\t-0.14\t-0.80\t1.74\t0.48 G\t-1.57\t-1.17\t1.29\t0.23\t-0.20\t1.17\t0.26 H\t-1.53\t-1.25\t0.59\t-0.30\t0.32\t1.41\t0.77 """ print "Example numerical matrix" print matrix # #Names col1 col2 col3 col4 col5 col6 col7 # A -1.23 -0.81 1.79 0.78 -0.42 -0.69 0.58 # B -1.76 -0.94 1.16 0.36 0.41 -0.35 1.12 # C -2.19 0.13 0.65 -0.51 0.52 1.04 0.36 # D -1.22 -0.98 0.79 -0.76 -0.29 1.54 0.93 # E -1.47 -0.83 0.85 0.07 -0.81 1.53 0.65 # F -1.04 -1.11 0.87 -0.14 -0.80 1.74 0.48 # G -1.57 -1.17 1.29 0.23 -0.20 1.17 0.26 # H -1.53 -1.25 0.59 -0.30 0.32 1.41 0.77 # # # We load a tree structure whose leaf nodes correspond to rows in the # numerical matrix. We use the text_array argument to link the tree # with numerical matrix. t = ClusterTree("(((A,B),(C,(D,E))),(F,(G,H)));", text_array=matrix) t.show("heatmap") t.show("cluster_cbars") t.show("cluster_bars") t.show("cluster_lines")
print matrix # #Names col1 col2 col3 col4 col5 col6 col7 # A -1.23 -0.81 1.79 0.78 -0.42 -0.69 0.58 # B -1.76 -0.94 1.16 0.36 0.41 -0.35 1.12 # C -2.19 0.13 0.65 -0.51 0.52 1.04 0.36 # D -1.22 -0.98 0.79 -0.76 -0.29 1.54 0.93 # E -1.47 -0.83 0.85 0.07 -0.81 1.53 0.65 # F -1.04 -1.11 0.87 -0.14 -0.80 1.74 0.48 # G -1.57 -1.17 1.29 0.23 -0.20 1.17 0.26 # H -1.53 -1.25 0.59 -0.30 0.32 1.41 0.77 # # # We load a tree structure whose leaf nodes correspond to rows in the # numerical matrix. We use the text_array argument to link the tree # with numerical matrix. t = ClusterTree("(((A,B),(C,(D,E))),(F,(G,H)));", text_array=matrix) print "Example tree", t # /-A # /--------| # | \-B # /--------| # | | /-C # | \--------| # | | /-D #---------| \--------| # | \-E # | # | /-F # \--------| # | /-G # \--------|
from ete3 import ClusterTree, TreeStyle, AttrFace, ProfileFace, TextFace from ete3.treeview.faces import add_face_to_node # To operate with numbers efficiently import numpy PATH = "./" # Loads tree and array t = ClusterTree(PATH+"diauxic.nw", PATH+"diauxic.array") # nodes are linked to the array table array = t.arraytable # Calculates some stats on the matrix. Needed to establish the color # gradients. matrix_dist = [i for r in xrange(len(array.matrix))\ for i in array.matrix[r] if numpy.isfinite(i)] matrix_max = numpy.max(matrix_dist) matrix_min = numpy.min(matrix_dist) matrix_avg = matrix_min+((matrix_max-matrix_min)/2) # Creates a profile face that will represent node's profile as a # heatmap profileFace = ProfileFace(matrix_max, matrix_min, matrix_avg, \ 200, 14, "heatmap") cbarsFace = ProfileFace(matrix_max,matrix_min,matrix_avg,200,70,"cbars") nameFace = AttrFace("name", fsize=8) # Creates my own layout function that uses previous faces def mylayout(node): # If node is a leaf if node.is_leaf():
def plot_heat_tree_V0(heatmap_file, tree_file, output_file=None): ''' Plot heatmap next to a tree. The order of the heatmap **MUST** be the same, as order of the leafs on the tree. The tree must be in the Newick format. If *output_file* is specified, then heat-tree will be rendered as a PNG, otherwise interactive browser will pop-up with your heat-tree. Parameters ---------- heatmap_file: str Path to the heatmap file. The first row must have '#Names' as first element of the header. e.g. #Names, A, B, C, D row1, 2, 4, 0, 4 row2, 4, 6, 2, -1 tree_file: str Path to the tree file in Newick format. The leaf node labels should be the same as as row names in the heatmap file. E.g. row1, row2. output_file: str, optional If specified the heat-tree will be rendered in that file as a PNG image, otherwise interactive browser will pop-up. **N.B.** program will wait for you to exit the browser before continuing. ''' import numpy from ete3.treeview.faces import add_face_to_node from ete3 import ClusterTree, TreeStyle, AttrFace, ProfileFace # To operate with numbers efficiently # Loads tree and array t = ClusterTree(tree_file, heatmap_file) t.ladderize() R = t.get_midpoint_outgroup() t.set_outgroup(R) # nodes are linked to the array table array = t.arraytable # Calculates some stats on the matrix. Needed to establish the color # gradients. matrix_dist = [i for r in xrange(len(array.matrix))\ for i in array.matrix[r] if numpy.isfinite(i)] matrix_max = numpy.max(matrix_dist) matrix_min = numpy.min(matrix_dist) matrix_avg = matrix_min + ((matrix_max - matrix_min) / 2) # Creates a profile face that will represent node's profile as a # heatmap profileFace = ProfileFace(1., 0., 0.5, 1000, 14, "heatmap", colorscheme=1) nameFace = AttrFace("name", fsize=8) # Creates my own layout function that uses previous faces def mylayout(node): # If node is a leaf if node.is_leaf(): # And a line profile add_face_to_node(profileFace, node, 0, aligned=True) node.img_style["size"] = 0 add_face_to_node(nameFace, node, 1, aligned=True) # Use my layout to visualize the tree ts = TreeStyle() ts.layout_fn = mylayout t.render("test.svg", tree_style=ts) '''
from ete3 import ClusterTree, faces # To operate with numbersd bub efficiently import numpy PATH = "./" # Loads tree and array t = ClusterTree(PATH + "diauxic.nw", PATH + "diauxic.array") # nodes are linked to the array table array = t.arraytable # Calculates some stats on the matrix matrix_dist = [i for r in xrange(len(array.matrix))\ for i in array.matrix[r] if numpy.isfinite(i)] matrix_max = numpy.max(matrix_dist) matrix_min = numpy.min(matrix_dist) matrix_avg = matrix_min + ((matrix_max - matrix_min) / 2) # Creates a profile face that will represent node's profile as a # heatmap profileFace = faces.ProfileFace(matrix_max, matrix_min, matrix_avg, \ 200, 14, "heatmap") cbarsFace = faces.ProfileFace(matrix_max, matrix_min, matrix_avg, 200, 70, "cbars") nameFace = faces.AttrFace("name", fsize=8) # Creates my own layout function that uses previous faces def mylayout(node): # If node is a leaf
from ete3 import ClusterTree, TreeStyle, AttrFace, ProfileFace, TextFace from ete3.treeview.faces import add_face_to_node # To operate with numbers efficiently import numpy PATH = "./" # Loads tree and array t = ClusterTree(PATH + "diauxic.nw", PATH + "diauxic.array") # nodes are linked to the array table array = t.arraytable # Calculates some stats on the matrix. Needed to establish the color # gradients. matrix_dist = [i for r in xrange(len(array.matrix))\ for i in array.matrix[r] if numpy.isfinite(i)] matrix_max = numpy.max(matrix_dist) matrix_min = numpy.min(matrix_dist) matrix_avg = matrix_min + ((matrix_max - matrix_min) / 2) # Creates a profile face that will represent node's profile as a # heatmap profileFace = ProfileFace(matrix_max, matrix_min, matrix_avg, \ 200, 14, "heatmap") cbarsFace = ProfileFace(matrix_max, matrix_min, matrix_avg, 200, 70, "cbars") nameFace = AttrFace("name", fsize=8) # Creates my own layout function that uses previous faces def mylayout(node):
def extract_taxa(input_tree): tree = ClusterTree(input_tree) leaves = tree.get_leaf_names() return leaves
tree_style.aligned_header.add_face(nameF, column=i + cols_add_before_heat) data = pd.read_table(in_path + "/profiles.csv", header=0, index_col=0) data.index.name = "#Names" data_mat = data.to_csv(None, sep="\t", float_format="%d") header = list(data.columns.values) f = open(in_path + "/wgMLST_tree.newick") nkTree = f.readlines() f.close() t_str = nkTree[0] t = ClusterTree(t_str, data_mat) ts = TreeStyle() ts.margin_left = 20 ts.margin_right = 20 ts.margin_top = 20 ts.margin_bottom = 10 ts.scale = 2 ts.min_leaf_separation = 0 ts.branch_vertical_margin = 0 ts.show_leaf_name = True ts.show_branch_length = False ts.show_branch_support = True
print('step5 complete') # generate an Textarray out of the CausalLoci PresenceAbsence Matrix=str() for Profiles in ProfileList: for words in Profiles: Matrix+=str(words)+'\t' Matrix=Matrix[:-1]+'\n' print(Matrix) #print(Matrix) #Generate ClusterTree using ETE toolkit t=ClusterTree( '/home/masih/Projects/BacterialSimulator/RealTree.nwk' , text_array=Matrix) #Define a Tree visualizaiton layout def ColorCodedNode (node): if node.is_leaf(): ColorCode=PhenoDict[node.name] if ColorCode == '1': #Name=faces.AttrFace('name',fsize='20',fgcolor="Blue") #NameFace=TextFace(Name) faces.add_face_to_node(AttrFace('name',fsize=20,fgcolor='blue'), node, column=0,aligned=True) #faces.add_face_to_node(TextFace(text='marker1',fsize=10,fgcolor='black'), node, column=1,position='aligned') faces.add_face_to_node(ProfileFace(1, -1, 0, width=200, height=40, style='heatmap', colorscheme=2),node,column=1,position='aligned') elif ColorCode == '2': #Name=faces.AttrFace('name',fsize='20',fgcolor="Red") #NameFace=TextFace(Name)
def make_cluster_tree(tree_file: str, matrix: str, out_file: str, outgroup: Optional[List[str]] = None) -> None: """Draw a tree with cluster absence/presence information from an existing tree file and absence/presence matrix, and save it as an image under the supplied file name. Arguments: tree_file: the name of the file containing the tree to annotate matrix: a comma- or tab-separated absence/presence matrix out_file: the name under which to save the resulting image outgroup: the organism(s) to use as an outgroup, if any """ # ClusterTree needs tab-separated, but that can't be exported cleanly matrix = matrix.replace(",", "\t") # tree with clustering analysis tree = ClusterTree(tree_file, text_array=matrix) # rerooting the tree if outgroup: ancestor = tree.get_common_ancestor(outgroup) tree.set_outgroup(ancestor) tree.ladderize(direction=1) # set drawing line width to 2 my_node_style = NodeStyle() my_node_style["vt_line_width"] = 2 my_node_style["hz_line_width"] = 2 my_node_style["size"] = 5 # layout function def sel_mylayout(node): node.set_style(my_node_style) if node.is_leaf(): # add names in larger font + italics species_name = AttrFace("name", fsize=12, fstyle="italic") add_face_to_node(species_name, node, column=0, position="branch-right") # add absence/presence matrix for i, value in enumerate(getattr(node, "profile", [])): if value > 0: color = "#FF0000" else: color = "#EEEEEE" my_face = CircleFace(8, color, style="circle") my_face.margin_right = 3 my_face.margin_bottom = 3 add_face_to_node(my_face, node, position="aligned", column=i) # Use my layout to visualize the tree my_tree_style = TreeStyle() # Add header for j, name in enumerate(tree.arraytable.colNames): name_face = TextFace(name, fsize=11) name_face.rotation = -90 name_face.hz_align = 1 name_face.vt_align = 1 name_face.margin_bottom = 10 my_tree_style.aligned_header.add_face(name_face, column=j) my_tree_style.scale_length = 0.1 # myTreeStyle.show_branch_support = True # don't auto-show leaf names, since we dealt with that above my_tree_style.show_leaf_name = False # set layout function for my_tree_style my_tree_style.layout_fn = sel_mylayout #tree.render(out_file, w=183, units="mm", dpi=600, tree_style=my_tree_style) tree.render(out_file, dpi=600, tree_style=my_tree_style)
from ete3 import ClusterTree, TreeStyle, AttrFace, ProfileFace, TextFace from ete3.treeview.faces import add_face_to_node # To operate with numbers efficiently import numpy PATH = "./" # Loads tree and array t = ClusterTree(PATH+"diauxic.nw", PATH+"diauxic.array") # nodes are linked to the array table array = t.arraytable # Calculates some stats on the matrix. Needed to establish the color # gradients. matrix_dist = [i for r in xrange(len(array.matrix))\ for i in array.matrix[r] if numpy.isfinite(i)] matrix_max = numpy.max(matrix_dist) matrix_min = numpy.min(matrix_dist) matrix_avg = matrix_min+((matrix_max-matrix_min)/2) # Creates a profile face that will represent node's profile as a # heatmap nameFace = AttrFace("name", fsize=8) # Creates my own layout function that uses previous faces def mylayout(node): profileFace = ProfileFace(matrix_max, matrix_min, matrix_avg, \ 200, 14, "heatmap") cbarsFace = ProfileFace(matrix_max,matrix_min,matrix_avg,200,70,"cbars") # If node is a leaf
A\t-1.23\t-0.81\t1.79\t0.78\t-0.42\t-0.69\t0.58 B\t-1.76\t-0.94\t1.16\t0.36\t0.41\t-0.35\t1.12 C\t-2.19\t0.13\t0.65\t-0.51\t0.52\t1.04\t0.36 D\t-1.22\t-0.98\t0.79\t-0.76\t-0.29\t1.54\t0.93 E\t-1.47\t-0.83\t0.85\t0.07\t-0.81\t1.53\t0.65 F\t-1.04\t-1.11\t0.87\t-0.14\t-0.80\t1.74\t0.48 G\t-1.57\t-1.17\t1.29\t0.23\t-0.20\t1.17\t0.26 H\t-1.53\t-1.25\t0.59\t-0.30\t0.32\t1.41\t0.77 """ print("Example numerical matrix") print(matrix) # #Names col1 col2 col3 col4 col5 col6 col7 # A -1.23 -0.81 1.79 0.78 -0.42 -0.69 0.58 # B -1.76 -0.94 1.16 0.36 0.41 -0.35 1.12 # C -2.19 0.13 0.65 -0.51 0.52 1.04 0.36 # D -1.22 -0.98 0.79 -0.76 -0.29 1.54 0.93 # E -1.47 -0.83 0.85 0.07 -0.81 1.53 0.65 # F -1.04 -1.11 0.87 -0.14 -0.80 1.74 0.48 # G -1.57 -1.17 1.29 0.23 -0.20 1.17 0.26 # H -1.53 -1.25 0.59 -0.30 0.32 1.41 0.77 # # # We load a tree structure whose leaf nodes correspond to rows in the # numerical matrix. We use the text_array argument to link the tree # with numerical matrix. t = ClusterTree("(((A,B),(C,(D,E))),(F,(G,H)));", text_array=matrix) t.show("heatmap") t.show("cluster_cbars") t.show("cluster_bars") t.show("cluster_lines")
newickStr = [] for node, children in taxo.iteritems(): nodename = str(node) if type(children)==defaultdict: newickStr.append(convert(children,suffix)+nodename) else: newickStr.append(nodename + suffix[nodename]) return '('+','.join(newickStr)+')' count = parseLineage(filename) suffix, matrix, taxo = getSuffixandMatrixandNewick(count) newick = convert(taxo,suffix) newick += ';' t = Tree(newick, format=1) ct = ClusterTree(t.write(), text_array=matrix) addColors(ct) # nodes are linked to the array table array = ct.arraytable # Calculates some stats on the matrix. Needed to establish the color gradients. matrix_dist = [i for r in xrange(len(array.matrix))for i in array.matrix[r] if np.isfinite(i)] matrix_max = np.max(matrix_dist) matrix_min = np.min(matrix_dist) matrix_avg = (matrix_max+matrix_min)/2 # Creates a profile face that will represent node's profile as a heatmap profileFace = ProfileFace(matrix_max, matrix_min, matrix_avg, 200, 14, "heatmap",colorscheme=3) # Creates my own layout function that uses previous faces def mylayout(node): # If node is a leaf if node.is_leaf():