def _get_motif_tree(tree, data, circle=True, vmin=None, vmax=None): try: from ete3 import Tree, NodeStyle, TreeStyle except ImportError: print("Please install ete3 to use this functionality") sys.exit(1) t = Tree(tree) # Determine cutoff for color scale if not (vmin and vmax): for i in range(90, 101): minmax = np.percentile(data.values, i) if minmax > 0: break if not vmin: vmin = -minmax if not vmax: vmax = minmax norm = Normalize(vmin=vmin, vmax=vmax, clip=True) mapper = cm.ScalarMappable(norm=norm, cmap="RdBu_r") m = 25 / data.values.max() for node in t.traverse("levelorder"): val = data[[l.name for l in node.get_leaves()]].values.mean() style = NodeStyle() style["size"] = 0 style["hz_line_color"] = to_hex(mapper.to_rgba(val)) style["vt_line_color"] = to_hex(mapper.to_rgba(val)) v = max(np.abs(m * val), 5) style["vt_line_width"] = v style["hz_line_width"] = v node.set_style(style) ts = TreeStyle() ts.layout_fn = _tree_layout ts.show_leaf_name = False ts.show_scale = False ts.branch_vertical_margin = 10 if circle: ts.mode = "c" ts.arc_start = 180 # 0 degrees = 3 o'clock ts.arc_span = 180 return t, ts
def _get_motif_tree(tree, data, circle=True, vmin=None, vmax=None): try: from ete3 import Tree, NodeStyle, TreeStyle except ImportError: print("Please install ete3 to use this functionality") sys.exit(1) t = Tree(tree) # Determine cutoff for color scale if not(vmin and vmax): for i in range(90, 101): minmax = np.percentile(data.values, i) if minmax > 0: break if not vmin: vmin = -minmax if not vmax: vmax = minmax norm = Normalize(vmin=vmin, vmax=vmax, clip=True) mapper = cm.ScalarMappable(norm=norm, cmap="RdBu_r") m = 25 / data.values.max() for node in t.traverse("levelorder"): val = data[[l.name for l in node.get_leaves()]].values.mean() style = NodeStyle() style["size"] = 0 style["hz_line_color"] = to_hex(mapper.to_rgba(val)) style["vt_line_color"] = to_hex(mapper.to_rgba(val)) v = max(np.abs(m * val), 5) style["vt_line_width"] = v style["hz_line_width"] = v node.set_style(style) ts = TreeStyle() ts.layout_fn = _tree_layout ts.show_leaf_name= False ts.show_scale = False ts.branch_vertical_margin = 10 if circle: ts.mode = "c" ts.arc_start = 180 # 0 degrees = 3 o'clock ts.arc_span = 180 return t, ts
def plot_tree(tree, save=False, path=''): # style ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 360 #plot tree if save: tree.render(file_name=path, tree_style=ts) tree.show(tree_style=ts)
def set_default_TreeStyle(tree, draw_nodes): ts = TreeStyle() ts.mode = "c" ts.arc_start = -180 ts.arc_span = 180 ts.root_opening_factor = 1 ts.show_branch_length = False ts.show_branch_support = True ts.force_topology = False ts.show_leaf_name = False ts.min_leaf_separation = 10 ts.root_opening_factor = 1 ts.complete_branch_lines_when_necessary = True return ts, tree
def plot_newick(aug_cluster_list, mode='c', db=-1): circular_style = TreeStyle() circular_style.mode = mode # draw tree in circular mode circular_style.scale = 20 circular_style.arc_span = 360 if db > 0: newick = convert_to_newick_db(aug_cluster_list, len(aug_cluster_list) - 1, db) circular_style.mode = mode else: newick = convert_to_newick(aug_cluster_list, len(aug_cluster_list) - 1) newick = newick + ':0;' t = Tree(newick, format=1) t.show(tree_style=circular_style) if False: t.render('tree3.png', w=100, units='in', tree_style=circular_style)
def visualize(self, group1=None, group2=None): import matplotlib import matplotlib.pyplot as plt # annotate the cluster roots with their fractions if group1 or group2: for i, cluster_root in enumerate(self.cluster_roots): # count downstream conditions in the leafs datapoints_in_cluster = list(self.nodes2leaves[cluster_root]) cluster_root.add_face( TextFace(f"Group1: {len(group1)}// Group2:{len(group2)}"), column=0, position="branch-right") def _custom_layout(node): cmap_cluster = plt.cm.tab10( np.linspace(0, 1, len(self.cluster_roots))) cmap_treated = plt.cm.viridis(np.linspace(0, 1, 2)) if node.is_leaf(): c_cluster = matplotlib.colors.rgb2hex( cmap_cluster[node.clustering, :]) c_treat = matplotlib.colors.rgb2hex( cmap_treated[node.treated, :]) node.img_style["fgcolor"] = c_treat node.img_style["bgcolor"] = c_cluster if 'is_cluster_root' in node.features: c_cluster = matplotlib.colors.rgb2hex( cmap_cluster[node.is_cluster_root, :]) node.img_style["bgcolor"] = c_cluster node.img_style["draw_descendants"] = False node.add_face(TextFace(f"#data:{node.n_datapoints}"), column=0, position="branch-right") ts = TreeStyle() ts.mode = "r" ts.show_leaf_name = False ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 270 ts.layout_fn = _custom_layout self.tree.show(tree_style=ts)
from ete3 import Tree, TreeStyle file = open("gisaid_cov2020_sequences_filtered_8312_2.nw") tree = file.read() file.close() t = Tree(tree) t.populate(30) ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 180 t.show(tree_style=ts)
#define tree style ts = TreeStyle() ts.show_leaf_name = True ts.min_leaf_separation = 1 #ts.show_branch_length = True ts.show_branch_support = True ts.scale = 200 # 100 pixels per branch length unit ts.min_leaf_separation = 0.5 ts.branch_vertical_margin = 0 # 10 pixels between adjacent branches ts.title.add_face(TextFace(filename, fsize=20), column=0) circular_style = TreeStyle() circular_style.mode = "c" # draw tree in circular mode circular_style.scale = 100 circular_style.arc_start = -90 # 0 degrees = 3 o'clock circular_style.arc_span = 330 pruned = [] #now for something completely different, tree traverse for node in t.traverse(): if node.is_leaf(): pruned.append(node) local = locdata.get(node.name, "no pred") leafcolor = colors.get(local, "black") node.add_features(color=leafcolor) node.img_style["size"] = 0 node.img_style["fgcolor"] = leafcolor node.img_style["vt_line_color"] = leafcolor node.img_style["hz_line_color"] = leafcolor node.img_style["hz_line_width"] = 2 #node.name = tag_replace(node.name) #make s
def Surviving_Phylogenetic_Tree(self): C_IDs = Tumour_Evolution.keys() C_Times = [] ID_lenghts = [] Phylogeny = [] C_IDs.remove("P-0:0") main_branches = [] #while loop regex_str = 'PC[0-9]+' branch = ',[0-9]+' entering_flag = True Phylo_Struct = [] i = 0 while (C_IDs): if (entering_flag): entering_flag = False searchRegex = re.compile(regex_str + '-.*') matches = [ m.group(0) for l in C_IDs for m in [searchRegex.search(l)] if m ] for m in matches: C_IDs.remove(m) Phylo_Struct.append(matches) else: regex_str = regex_str + branch searchRegex = re.compile(regex_str + '-.*') matches = [ m.group(0) for l in C_IDs for m in [searchRegex.search(l)] if m ] for m in matches: C_IDs.remove(m) Phylo_Struct.append(matches) #print "PS",Phylo_Struct # trabnsalte this into a tree main_branches = Phylo_Struct[0] branch_ids = [] for clone in main_branches: branch_ids.append(clone[0:3]) Phylogeny = [] initial_step = Phylo_Struct.pop(0) initial_step = natsorted(initial_step, key=lambda y: y.lower()) #print "I", initial_step ID_time = dict() regex_str = 'PC[0-9]+' for clone in initial_step: parent_str = re.search(regex_str, clone) ID_time[parent_str.group(0)] = int( clone.split("-", 1)[1].split(":")[0]) Phylogeny.append( ("P", clone, 100 - int(clone.split("-", 1)[1].split(":")[0]))) ## Year length #print ID_time #print "Remaining ", Phylo_Struct ## Generating Phylogenetic Tree regex_str = 'PC[0-9]+' branch = ',[0-9]+' for step in Phylo_Struct: step = natsorted(step, key=lambda y: y.lower()) for clone in step: clone_str = re.search(regex_str, clone) for _parent in initial_step: parent_str = re.search(regex_str, _parent) if (clone_str.group(0) == parent_str.group(0)): main_parent = re.search('PC[0-9]+', clone) clone_year = int(clone.split("-", 1)[1].split(":")[0]) Phylogeny.append( (_parent, clone, abs(ID_time[main_parent.group(0)] - clone_year))) ## year lengt normalised regex_str = regex_str + branch initial_step = step t = Tree.from_parent_child_table(Phylogeny) #a=np.unique(t).tolist() ts = TreeStyle() ts.show_leaf_name = True #ts.rotation = 90 ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 t.show(tree_style=ts)
def plot_tree(self, output_filepath, time_series_info=None, tree_style='horizontal_right', leaf_size_map_to=None, show_leaf_names=False, color_branches_by=None, line_width=1, start_color='red', end_color='purple', ladderize=True): """ This method plots the phylogenetic tree as a dendogram. Uses the ete3 package to do this. output_filepath - This is the path to the output image file. time_series_info - This tells where the information of time-point can be found (if at all) in each of the element IDs. Acceptable values are: None - (default) This means there is no time information in the tree 'start_of_id' - This means the time info is at the very beginning of each element ID, and is separated by a '_'. For example: '45.3_blahblahblah' would have a time point of 45.3. tree_style - This give the style of tree plotting, i.e. circular, horizontal, etc. Acceptable values are: 'half_circle' - Tree is plotted as a half circle, where branches are radiating outward and upward. 'horizontal_right' - Tree is plotted as a normal dendogram where branching occurs from left to right. leaf_size_map_to - If defined (default, None), this will inform what attribute of the node the leaf size maps to. Acceptable values are: None - No leaf size info. All leaves the same size 'count' - leaf size proportional to the count attribute 'freq' - leaf size proportional to the freq attribute show_leaf_names - If True (default, False), then will plot the names of each of the leafs of the tree. color_branches_by - If defined (default, None) this will give how the leaf branches will be colored, if at all. Acceptable values are: None - Default. No branch coloring 'time_point' - This means that the branches will be colored according to the 'time_point' attribute of each leaf node. There must be a 'time_point' attribute in the leaf nodes for this to work. So, one should run the 'add_time_info' method before doing this. Alternatively, one can define the 'time_series_info' parameter for this method, and this will be taken care of. any other string - This will give the name of any other attribute of the leaf nodes for which to map the value to the leaf branch color. then this will instruct to color the different time-points with different colors. This is ignored if 'time_series_info' is False line_width - controls the line width. Default, 1 start_color - This gives the staring color. This should be a string the spells out the name of a color. Most simple colors should be fine. Uses the 'Color' module from 'colour'. end_color - This gives the ending color. This should be a string the spells out the name of a color. Most simple colors should be fine. Uses the 'Color' module from 'colour'. The colors used for each unique attribute that gives the colors will span the spectrom from 'start_color' to 'end_color'. ladderize - If True (default), this will ladderize the tree. That is it will sort the partitions of the internal nodes based upon number of decendant nodes in the child nodes. """ if not self.time_points and time_series_info: self.add_time_info(time_series_info=time_series_info) #set time-point colors, if desired if color_branches_by: start_color = Color(start_color) end_color = Color(end_color) if color_branches_by == 'time_point' and time_series_info: colors = list( start_color.range_to(end_color, len(self.time_points))) hex_colors = [i.hex_l for i in colors] tpoint_to_color_dic = {} for index, i in enumerate(sorted(self.time_points)): tpoint_to_color_dic[i] = hex_colors[index] else: #check if attribute already exists in the tree data. if not, add it for leaf in self.tree: if not color_branches_by in leaf.features: self.add_attribute_to_leaves( attribute_name=color_branches_by) break colors = list( start_color.range_to( end_color, len(self.extra_leaf_features[color_branches_by]))) hex_colors = [i.hex_l for i in colors] attribute_to_color_dic = {} for index, i in enumerate( sorted(self.extra_leaf_features[color_branches_by])): attribute_to_color_dic[i] = hex_colors[index] #set node styles most_dist_leaf, size_to_tree_size_scaler = self.tree.get_farthest_leaf( ) #need to scale sizes by the length (divergence) of the tree for node in self.tree.traverse(): node_style = NodeStyle() #do stuff to leaf nodes if node.is_leaf(): if color_branches_by: if color_branches_by == 'time_point': color = tpoint_to_color_dic[node.time_point] else: color = attribute_to_color_dic[getattr( node, color_branches_by)] else: color = Color('black') color = color.hex_l node_style['hz_line_color'] = color node_style['vt_line_color'] = color if leaf_size_map_to: if leaf_size_map_to == 'count': radius = size_to_tree_size_scaler * 100 * math.log( node.count) elif leaf_size_map_to == 'freq': radius = size_to_tree_size_scaler * 100 * node.freq c = CircleFace(radius=radius, color=color, style='circle') c.opacity = 0.3 node.add_face(c, 0, position='branch-right') node_style['size'] = 0 node_style[ 'hz_line_width'] = size_to_tree_size_scaler * 10 * line_width node_style[ 'vt_line_width'] = size_to_tree_size_scaler * 10 * line_width node.set_style(node_style) #set tree style tree_steeze = TreeStyle() if tree_style == 'half_circle': tree_steeze.mode = 'c' tree_steeze.arc_start = -180 tree_steeze.arc_span = 180 elif tree_style == 'horizontal_right': pass if show_leaf_names: tree_steeze.show_leaf_name = True else: tree_steeze.show_leaf_name = False self.tree.ladderize() self.tree.render(output_filepath, w=700, h=700, units='mm', tree_style=tree_steeze) return
def main(args): if args.alignment: t = PhyloTree(args.tree, alignment=args.alignment, alg_format='fasta') else: t = PhyloTree(args.tree) if args.highlight_new: runs = read_runs(args.highlight_new) t.set_outgroup('EM_079422') t.ladderize() ts = TreeStyle() ts.show_leaf_name = False ts.show_branch_support = False ts.layout_fn = layout thick_hz_line = NodeStyle() thick_hz_line["hz_line_width"] = 8 t.set_style(thick_hz_line) #t.children[0].set_style(thick_hz_line) #t.children[1].set_style(thick_hz_line) thick_vt_line = NodeStyle() thick_vt_line["vt_line_width"] = 4 t.set_style(thick_vt_line) # header if not args.hide_annotations: ts.aligned_header.add_face(MyTextFace('Sample identifier', fstyle='Bold', fsize=8, tight_text=False), column = 1) ts.aligned_header.add_face(MyTextFace('Prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 2) ts.aligned_header.add_face(MyTextFace('Sous-prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 3) ts.aligned_header.add_face(MyTextFace('Village', fstyle='Bold', fsize=8, tight_text=False), column = 4) ts.aligned_header.add_face(MyTextFace('Sample received', fstyle='Bold', fsize=8, tight_text=False), column = 5) if args.positions: positions = read_positions(args.positions) alg_header = RulerFace(positions, col_width=11, height=0, # set to 0 if dont want to use values kind="stick", hlines = [0], hlines_col = ["white"], # trick to hide hz line ) ts.aligned_header.add_face(alg_header, 6) #legend if args.legend: legend = {} for s in samples.values(): legend[s['prefec']] = s['prefec__colour'] for p in sorted(legend.keys()): ts.legend.add_face(CircleFace(4, legend[p]), column=0) ts.legend.add_face(MyTextFace(p, fsize=6, tight_text=False), column=1) ts.legend_position=1 if args.circular: ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 # t.show(tree_style=ts) t.render(args.output, tree_style=ts, w=1024)
def search(tree, topHit1, topHit2): colors = {"hopID": "#a50026", "canID": "#313695"} hopCount = 0 canCount = 0 for leaf in tree.traverse(): leaf.img_style['size'] = 0 if "F" in leaf.name: if leaf.is_leaf(): geneID = 'hopID' color = colors.get(geneID, None) if color: style1 = NodeStyle() style1["fgcolor"] = "#a50026" style1["size"] = 0 style1["vt_line_color"] = "#a50026" style1["hz_line_color"] = "#a50026" style1["vt_line_width"] = 2 style1["hz_line_width"] = 2 style1["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style1["hz_line_type"] = 0 leaf.set_style(style1) if leaf.name in topHit1: hopCount += 1 uniprotID1, uniprotDesc1, eValue1, bitScore1 = topHit1[ leaf.name] if 'Berberine' in uniprotDesc1: newLeafName = 'H. lupulus, BBE-like, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style1["bgcolor"] = "#e0f3f8" leaf.set_style(style1) elif 'Inactive tetrahydrocannabinolic acid synthase' in uniprotDesc1: newLeafName = 'H. lupulus, Inactive THCAS, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style1["bgcolor"] = "#4393c3" leaf.set_style(style1) elif 'Tetrahydrocannabinolic acid synthase ' in uniprotDesc1: newLeafName = 'H. lupulus, THCAS, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style1["bgcolor"] = "#4393c3" leaf.set_style(style1) elif 'Cannabidiolic acid synthase-like' in uniprotDesc1: newLeafName = 'H. lupulus, CBDAS-like, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style1["bgcolor"] = "#92c5de" leaf.set_style(style1) elif 'Cannabidiolic acid synthase ' in uniprotDesc1: newLeafName = 'H. lupulus, CBDAS, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style1["bgcolor"] = "#92c5de" leaf.set_style(style1) else: print(leaf.name, uniprotID1, uniprotDesc1) else: if leaf.is_leaf(): geneID = 'canID' color = colors.get(geneID, None) if color: style2 = NodeStyle() style2["fgcolor"] = "#313695" style2["size"] = 0 style2["vt_line_color"] = "#313695" style2["hz_line_color"] = "#313695" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 leaf.set_style(style2) if leaf.name in topHit2: canCount += 1 uniprotID2, uniprotDesc2, eValue2, bitScore2 = topHit2[ leaf.name] if 'Berberine' in uniprotDesc2: newLeafName = 'C. sativa, BBE-like, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style2["bgcolor"] = "#e0f3f8" leaf.set_style(style2) elif 'Inactive tetrahydrocannabinolic acid synthase' in uniprotDesc2: newLeafName = 'C. sativa, Inactive THCAS, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style2["bgcolor"] = "#4393c3" leaf.set_style(style2) elif 'Tetrahydrocannabinolic acid synthase ' in uniprotDesc2: newLeafName = 'C. sativa, THCAS, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style2["bgcolor"] = "#4393c3" leaf.set_style(style2) elif 'Cannabidiolic acid synthase-like' in uniprotDesc2: newLeafName = 'C. sativa, CBDAS-like, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style2["bgcolor"] = "#92c5de" leaf.set_style(style2) elif 'Cannabidiolic acid synthase ' in uniprotDesc2: newLeafName = 'C. sativa, CBDAS, ' + leaf.name name_face = TextFace(newLeafName, fgcolor=color, fsize=12) leaf.add_face(name_face, column=0, position='branch-right') style2["bgcolor"] = "#92c5de" leaf.set_style(style2) else: print(leaf.name, uniprotID1, uniprotDesc1) ts = TreeStyle() #ts.rotation = 90 ts.mode = "c" ts.arc_start = -180 ts.arc_span = 180 # ts.optimal_scale_level = 'mid' ts.branch_vertical_margin = 10 # ts.scale = 180 ts.scale = 225 ts.show_leaf_name = False ts.show_scale = False tree.render("hop_vs_can_tree_v5.svg", tree_style=ts, w=600) tree.render("hop_vs_can_tree_v5.pdf", tree_style=ts, w=600)
def deepbiome_draw_phylogenetic_tree( log, network_info, path_info, num_classes, file_name="%%inline", img_w=500, branch_vertical_margin=20, arc_start=0, arc_span=360, node_name_on=True, name_fsize=10, tree_weight_on=True, tree_weight=None, tree_level_list=['Genus', 'Family', 'Order', 'Class', 'Phylum'], weight_opacity=0.4, weight_max_radios=10, phylum_background_color_on=True, phylum_color=[], phylum_color_legend=False, show_covariates=True, verbose=True): """ Draw phylogenetic tree Parameters ---------- log (logging instance) : python logging instance for logging network_info (dictionary) : python dictionary with network_information path_info (dictionary): python dictionary with path_information num_classes (int): number of classes for the network. 0 for regression, 1 for binary classificatin. file_name (str): name of the figure for save. - "*.png", "*.jpg" - "%%inline" for notebook inline output. default="%%inline" img_w (int): image width (pt) default=500 branch_vertical_margin (int): vertical margin for branch default=20 arc_start (int): angle that arc start default=0 arc_span (int): total amount of angle for the arc span default=360 node_name_on (boolean): show the name of the last leaf node if True default=False name_fsize (int): font size for the name of the last leaf node default=10 tree_weight_on (boolean): show the amount and the direction of the weight for each edge in the tree by circle size and color. default=True tree_weight (ndarray): reference tree weights default=None tree_level_list (list): name of each level of the given reference tree weights default=['Genus', 'Family', 'Order', 'Class', 'Phylum'] weight_opacity (float): opacity for weight circle default= 0.4 weight_max_radios (int): maximum radios for weight circle default= 10 phylum_background_color_on (boolean): show the background color for each phylum based on `phylumn_color`. default= True phylum_color (list): specify the list of background colors for phylum level. If `phylumn_color` is empty, it will arbitrarily assign the color for each phylum. default= [] phylum_color_legend (boolean): show the legend for the background colors for phylum level default= False show_covariates (boolean): show the effect of the covariates default= True verbose (boolean): show the log if True default=True Returns ------- Examples -------- Draw phylogenetic tree deepbiome_draw_phylogenetic_tree(log, network_info, path_info, num_classes, file_name = "%%inline") """ os.environ[ 'QT_QPA_PLATFORM'] = 'offscreen' # for tree figure (https://github.com/etetoolkit/ete/issues/381) reader_class = getattr(readers, network_info['model_info']['reader_class'].strip()) reader = reader_class(log, path_info, verbose=verbose) data_path = path_info['data_info']['data_path'] try: count_path = path_info['data_info']['count_path'] x_list = np.array( pd.read_csv(path_info['data_info']['count_list_path'], header=None).iloc[:, 0]) x_path = np.array([ '%s/%s' % (count_path, x_list[fold]) for fold in range(x_list.shape[0]) if '.csv' in x_list[fold] ]) except: x_path = np.array([ '%s/%s' % (data_path, path_info['data_info']['x_path']) for fold in range(1) ]) reader.read_dataset(x_path[0], None, 0) network_class = getattr( build_network, network_info['model_info']['network_class'].strip()) network = network_class(network_info, path_info, log, fold=0, num_classes=num_classes, tree_level_list=tree_level_list, is_covariates=reader.is_covariates, covariate_names=reader.covariate_names, verbose=False) if len(phylum_color) == 0: colors = mcolors.CSS4_COLORS colors_name = list(colors.keys()) if reader.is_covariates and show_covariates: phylum_color = np.random.choice( colors_name, network.phylogenetic_tree_info['Phylum_with_covariates']. unique().shape[0]) else: phylum_color = np.random.choice( colors_name, network.phylogenetic_tree_info['Phylum'].unique().shape[0]) basic_st = NodeStyle() basic_st['size'] = weight_max_radios * 0.5 basic_st['shape'] = 'circle' basic_st['fgcolor'] = 'black' t = Tree() root_st = NodeStyle() root_st["size"] = 0 t.set_style(root_st) tree_node_dict = {} tree_node_dict['root'] = t upper_class = 'root' lower_class = tree_level_list[-1] lower_layer_names = tree_weight[-1].columns.to_list() layer_tree_node_dict = {} phylum_color_dict = {} for j, val in enumerate(lower_layer_names): t.add_child(name=val) leaf_t = t.get_leaves_by_name(name=val)[0] leaf_t.set_style(basic_st) layer_tree_node_dict[val] = leaf_t if lower_class == 'Phylum' and phylum_background_color_on: phylum_st = copy.deepcopy(basic_st) phylum_st["bgcolor"] = phylum_color[j] phylum_color_dict[val] = phylum_color[j] leaf_t.set_style(phylum_st) tree_node_dict[lower_class] = layer_tree_node_dict upper_class = lower_class upper_layer_names = lower_layer_names for i in range(len(tree_level_list) - 1): lower_class = tree_level_list[-2 - i] if upper_class == 'Disease' and show_covariates == False: lower_layer_names = network.phylogenetic_tree_info[ lower_class].unique() else: lower_layer_names = tree_weight[-i - 1].index.to_list() layer_tree_node_dict = {} for j, val in enumerate(upper_layer_names): parient_t = tree_node_dict[upper_class][val] if upper_class == 'Disease': child_class = lower_layer_names else: child_class = network.phylogenetic_tree_info[lower_class][ network.phylogenetic_tree_info[upper_class] == val].unique() for k, child_val in enumerate(child_class): parient_t.add_child(name=child_val) leaf_t = parient_t.get_leaves_by_name(name=child_val)[0] if lower_class == 'Phylum' and phylum_background_color_on: phylum_st = copy.deepcopy(basic_st) phylum_st["bgcolor"] = phylum_color[k] phylum_color_dict[child_val] = phylum_color[k] leaf_t.set_style(phylum_st) else: leaf_t.set_style(basic_st) if tree_weight_on: edge_weights = np.array(tree_weight[-1 - i]) edge_weights *= (weight_max_radios / np.max(edge_weights)) if upper_class == 'Disease': upper_num = 0 else: upper_num = network.phylogenetic_tree_dict[ upper_class][val] if upper_class == 'Disease' and reader.is_covariates == True and show_covariates: lower_num = network.phylogenetic_tree_dict[ '%s_with_covariates' % lower_class][child_val] else: lower_num = network.phylogenetic_tree_dict[ lower_class][child_val] leaf_t.add_features(weight=edge_weights[lower_num, upper_num]) layer_tree_node_dict[child_val] = leaf_t tree_node_dict[lower_class] = layer_tree_node_dict upper_class = lower_class upper_layer_names = lower_layer_names def layout(node): if "weight" in node.features: # Creates a sphere face whose size is proportional to node's # feature "weight" color = {1: "RoyalBlue", 0: "Red"}[int(node.weight > 0)] C = CircleFace(radius=node.weight, color=color, style="circle") # Let's make the sphere transparent C.opacity = weight_opacity # And place as a float face over the tree faces.add_face_to_node(C, node, 0, position="float") if node_name_on & node.is_leaf(): # Add node name to laef nodes N = AttrFace("name", fsize=name_fsize, fgcolor="black") faces.add_face_to_node(N, node, 0) ts = TreeStyle() ts.show_leaf_name = False ts.mode = "c" ts.arc_start = arc_start ts.arc_span = arc_span ts.layout_fn = layout ts.branch_vertical_margin = branch_vertical_margin ts.show_scale = False if phylum_color_legend: for phylum_name in np.sort(list(phylum_color_dict.keys())): color_name = phylum_color_dict[phylum_name] ts.legend.add_face(CircleFace(weight_max_radios * 1, color_name), column=0) ts.legend.add_face(TextFace(" %s" % phylum_name, fsize=name_fsize), column=1) return t.render(file_name=file_name, w=img_w, tree_style=ts) # ######################################################################################################################### # if __name__ == "__main__": # argdict = argv_parse(sys.argv) # try: gpu_memory_fraction = float(argdict['gpu_memory_fraction'][0]) # except: gpu_memory_fraction = None # try: max_queue_size=int(argdict['max_queue_size'][0]) # except: max_queue_size=10 # try: workers=int(argdict['workers'][0]) # except: workers=1 # try: use_multiprocessing=argdict['use_multiprocessing'][0]=='True' # except: use_multiprocessing=False # ### Logger ############################################################################################ # logger = logging_daily.logging_daily(argdict['log_info'][0]) # logger.reset_logging() # log = logger.get_logging() # log.setLevel(logging_daily.logging.INFO) # log.info('Argument input') # for argname, arg in argdict.items(): # log.info(' {}:{}'.format(argname,arg)) # ### Configuration ##################################################################################### # config_data = configuration.Configurator(argdict['path_info'][0], log) # config_data.set_config_map(config_data.get_section_map()) # config_data.print_config_map() # config_network = configuration.Configurator(argdict['network_info'][0], log) # config_network.set_config_map(config_network.get_section_map()) # config_network.print_config_map() # path_info = config_data.get_config_map() # network_info = config_network.get_config_map() # test_evaluation, train_evaluation, network = deepbiome_train(log, network_info, path_info, number_of_fold=20)
def build_colorful_tree(newick, filename=""): """ Note that these will fail if we dont have all the pre-reqs and it is not triival to get them all. This stuff is NOT general purpose. """ from ete3 import Tree, TreeStyle, CircleFace, TextFace tree = Tree(newick) #setup colors and treestyle ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.force_topology = True ts.arc_span = 360 face = CircleFace(30, "MediumSeaGreen") face.margin_top = 1000 ts.legend.add_face(face, column=0) face = TextFace("Normal B-cell", fsize=64) face.margin_right = 100 face.margin_top = 1000 ts.legend.add_face(face, column=1) ts.legend.add_face(CircleFace(30, "SeaGreen"), column=0) face = TextFace("Normal B CD19pcell", fsize=64) face.margin_right = 100 ts.legend.add_face(face, column=1) ts.legend.add_face(CircleFace(30, "ForestGreen"), column=0) face = TextFace("Normal B CD19pCD27pcell", fsize=64) face.margin_right = 100 ts.legend.add_face(face, column=1) ts.legend.add_face(CircleFace(30, "Green"), column=0) face = TextFace("Normal B CD19pCD27mcell", fsize=64) face.margin_right = 100 ts.legend.add_face(face, column=1) ts.legend.add_face(CircleFace(30, "RoyalBlue"), column=0) face = TextFace("CLL all-batches", fsize=64) face.margin_right = 100 ts.legend.add_face(face, column=1) #draw tree from ete3 import NodeStyle styles = {} styles["normal_B"] = NodeStyle(bgcolor="MediumSeaGreen", hz_line_color="Black", vt_line_color="Black") styles["NormalBCD19pcell"] = NodeStyle(bgcolor="SeaGreen", hz_line_color="Black", vt_line_color="Black") styles["NormalBCD19pCD27pcell"] = NodeStyle(bgcolor="ForestGreen", hz_line_color="Black", vt_line_color="Black") styles["NormalBCD19pCD27mcell"] = NodeStyle(bgcolor="Green", hz_line_color="Black", vt_line_color="Black") styles["CLL"] = NodeStyle(bgcolor="RoyalBlue", hz_line_color="Black", vt_line_color="Black") for node in tree.traverse("postorder"): #print node.set_style() if len(node.get_leaf_names()) == 1: name = node.get_leaf_names()[0] if "normal_B" in name: node.set_style(styles["normal_B"]) elif "NormalBCD19pcell" in name: node.set_style(styles["NormalBCD19pcell"]) elif "NormalBCD19pCD27pcell" in name: node.set_style(styles["NormalBCD19pCD27pcell"]) elif "NormalBCD19pCD27mcell" in name: node.set_style(styles["NormalBCD19pCD27mcell"]) else: node.set_style(styles["CLL"]) #lol tree.render(filename, w=10, dpi=600, units='in', tree_style=ts)
def draw_tree(the_tree, colour, back_color, label, out_file, the_scale, extend, bootstrap, group_file, grid_options, the_table, pres_abs, circular): t = Tree(the_tree, quoted_node_names=True) # t.ladderize() font_size = 8 font_type = 'Heveltica' font_gap = 3 font_buffer = 10 o = t.get_midpoint_outgroup() t.set_outgroup(o) the_leaves = [] for leaves in t.iter_leaves(): the_leaves.append(leaves) groups = {} num = 0 # set cutoff value for clades as 1/20th of the distance between the furthest two branches # assign nodes to groups last_node = None ca_list = [] if not group_file is None: style = NodeStyle() style['size'] = 0 style["vt_line_color"] = '#000000' style["hz_line_color"] = '#000000' style["vt_line_width"] = 1 style["hz_line_width"] = 1 for n in t.traverse(): n.set_style(style) with open(group_file) as f: group_dict = {} for line in f: group_dict[line.split()[0]] = line.split()[1] for node in the_leaves: i = node.name for j in group_dict: if j in i: if group_dict[j] in groups: groups[group_dict[j]].append(i) else: groups[group_dict[j]] = [i] coloured_nodes = [] for i in groups: the_col = i style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 if len(groups[i]) == 1: ca = t.search_nodes(name=groups[i][0])[0] ca.set_style(style) coloured_nodes.append(ca) else: ca = t.get_common_ancestor(groups[i]) ca.set_style(style) coloured_nodes.append(ca) tocolor = [] for j in ca.children: tocolor.append(j) while len(tocolor) > 0: x = tocolor.pop(0) coloured_nodes.append(x) x.set_style(style) for j in x.children: tocolor.append(j) ca_list.append((ca, the_col)) if back_color: # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes # colour the common ancestor then add it to the group - continue until only the root node is left while len(ca_list) > 1: distance = float('inf') for i, col1 in ca_list: for j, col2 in ca_list: if not i is j: parent = t.get_common_ancestor(i, j) getit = True the_dist = t.get_distance(i, j) if the_dist <= distance: distance = the_dist the_i = i the_j = j the_i_col = col1 the_j_col = col2 ca_list.remove((the_i, the_i_col)) ca_list.remove((the_j, the_j_col)) rgb1 = strtorgb(the_i_col) rgb2 = strtorgb(the_j_col) rgb3 = ((rgb1[0] + rgb2[0]) / 2, (rgb1[1] + rgb2[1]) / 2, (rgb1[2] + rgb2[2]) / 2) new_col = colorstr(rgb3) new_node = t.get_common_ancestor(the_i, the_j) the_col = new_col style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 new_node.set_style(style) coloured_nodes.append(new_node) ca_list.append((new_node, new_col)) for j in new_node.children: tocolor.append(j) while len(tocolor) > 0: x = tocolor.pop(0) if not x in coloured_nodes: coloured_nodes.append(x) x.set_style(style) for j in x.children: tocolor.append(j) elif colour: distances = [] for node1 in the_leaves: for node2 in the_leaves: if node1 != node2: distances.append(t.get_distance(node1, node2)) distances.sort() clade_cutoff = distances[len(distances) / 4] for node in the_leaves: i = node.name if not last_node is None: if t.get_distance(node, last_node) <= clade_cutoff: groups[group_num].append(i) else: groups[num] = [num, i] group_num = num num += 1 else: groups[num] = [num, i] group_num = num num += 1 last_node = node for i in groups: num = groups[i][0] h = num * 360 / len(groups) the_col = hsl_to_str(h, 0.5, 0.5) style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 if len(groups[i]) == 2: ca = t.search_nodes(name=groups[i][1])[0] ca.set_style(style) else: ca = t.get_common_ancestor(groups[i][1:]) ca.set_style(style) tocolor = [] for j in ca.children: tocolor.append(j) while len(tocolor) > 0: x = tocolor.pop(0) x.set_style(style) for j in x.children: tocolor.append(j) ca_list.append((ca, h)) # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes # colour the common ancestor then add it to the group - continue until only the root node is left while len(ca_list) > 1: distance = float('inf') got_one = False for i, col1 in ca_list: for j, col2 in ca_list: if not i is j: parent = t.get_common_ancestor(i, j) getit = True for children in parent.children: if children != i and children != j: getit = False break if getit: the_dist = t.get_distance(i, j) if the_dist <= distance: distance = the_dist the_i = i the_j = j the_i_col = col1 the_j_col = col2 got_one = True if not got_one: break ca_list.remove((the_i, the_i_col)) ca_list.remove((the_j, the_j_col)) new_col = (the_i_col + the_j_col) / 2 new_node = t.get_common_ancestor(the_i, the_j) the_col = hsl_to_str(new_col, 0.5, 0.3) style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 new_node.set_style(style) ca_list.append((new_node, new_col)) # if you just want a black tree else: style = NodeStyle() style['size'] = 0 style["vt_line_color"] = '#000000' style["hz_line_color"] = '#000000' style["vt_line_width"] = 1 style["hz_line_width"] = 1 for n in t.traverse(): n.set_style(style) color_list = [(240, 163, 255), (0, 117, 220), (153, 63, 0), (76, 0, 92), (25, 25, 25), (0, 92, 49), (43, 206, 72), (255, 204, 153), (128, 128, 128), (148, 255, 181), (143, 124, 0), (157, 204, 0), (194, 0, 136), (0, 51, 128), (255, 164, 5), (255, 168, 187), (66, 102, 0), (255, 0, 16), (94, 241, 242), (0, 153, 143), (224, 255, 102), (116, 10, 255), (153, 0, 0), (255, 255, 128), (255, 255, 0), (255, 80, 5), (0, 0, 0), (50, 50, 50)] up_to_colour = {} ts = TreeStyle() column_list = [] width_dict = {} if not grid_options is None: colour_dict = {} type_dict = {} min_val_dict = {} max_val_dict = {} leaf_name_dict = {} header_count = 0 the_columns = {} if grid_options == 'auto': with open(the_table) as f: headers = f.readline().rstrip().split('\t')[1:] for i in headers: the_columns[i] = [i] type_dict[i] = 'colour' colour_dict[i] = {'empty': '#FFFFFF'} width_dict[i] = 20 up_to_colour[i] = 0 column_list.append(i) else: with open(grid_options) as g: for line in g: if line.startswith('H'): name, type, width = line.rstrip().split('\t')[1:] if name in the_columns: the_columns[name].append(name + '_' + str(header_count)) else: the_columns[name] = [ name + '_' + str(header_count) ] width = int(width) name = name + '_' + str(header_count) header_count += 1 colour_dict[name] = {'empty': '#FFFFFF'} type_dict[name] = type width_dict[name] = width column_list.append(name) up_to_colour[name] = 0 min_val_dict[name] = float('inf') max_val_dict[name] = 0 elif line.startswith('C'): c_name, c_col = line.rstrip().split('\t')[1:] if not c_col.startswith('#'): c_col = colorstr(map(int, c_col.split(','))) colour_dict[name][c_name] = c_col val_dict = {} with open(the_table) as f: headers = f.readline().rstrip().split('\t')[1:] column_no = {} for num, i in enumerate(headers): if i in the_columns: column_no[num] = i for line in f: name = line.split('\t')[0] leaf_name = None for n in t.traverse(): if n.is_leaf(): if name.split('.')[0] in n.name: leaf_name = n.name if leaf_name is None: continue else: leaf_name_dict[leaf_name] = name vals = line.rstrip().split('\t')[1:] if name in val_dict: sys.exit('Duplicate entry found in table.') else: val_dict[name] = {} for num, val in enumerate(vals): if num in column_no and val != '': for q in the_columns[column_no[num]]: column_name = q if type_dict[column_name] == 'colour': val_dict[name][column_name] = val if not val in colour_dict[column_name]: colour_dict[column_name][val] = colorstr( color_list[up_to_colour[column_name] % len(color_list)]) up_to_colour[column_name] += 1 elif type_dict[column_name] == 'text': val_dict[name][column_name] = val elif type_dict[column_name] == 'colour_scale_date': year, month, day = val.split('-') year, month, day = int(year), int(month), int( day) the_val = datetime.datetime( year, month, day, 0, 0, 0) - datetime.datetime( 1970, 1, 1, 0, 0, 0) val_dict[name][ column_name] = the_val.total_seconds() if the_val.total_seconds( ) < min_val_dict[column_name]: min_val_dict[ column_name] = the_val.total_seconds() if the_val.total_seconds( ) > max_val_dict[column_name]: max_val_dict[ column_name] = the_val.total_seconds() elif type_dict[column_name] == 'colour_scale': the_val = float(val) val_dict[name][column_name] = the_val if the_val < min_val_dict[column_name]: min_val_dict[column_name] = the_val if the_val > max_val_dict[column_name]: max_val_dict[column_name] = the_val else: sys.exit('Unknown column type') if not out_file is None: new_desc = open(out_file + '.new_desc', 'w') else: new_desc = open('viridis.new_desc', 'w') ts.legend_position = 3 leg_column = 0 for num, i in enumerate(column_list): nameF = TextFace(font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True) nameF.rotation = -90 ts.aligned_header.add_face(nameF, column=num + 1) new_desc.write('H\t' + i.rsplit('_', 1)[0] + '\t' + type_dict[i] + '\t' + str(width_dict[i]) + '\n') x = num * 200 if type_dict[i] == 'colour': ts.legend.add_face(TextFace( font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF', '#FFFFFF'), column=leg_column) for num2, j in enumerate(colour_dict[i]): new_desc.write('C\t' + j + '\t' + colour_dict[i][j] + '\n') ts.legend.add_face(TextFace(font_gap * ' ' + j + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, colour_dict[i][j], colour_dict[i][j]), column=leg_column) leg_column += 2 elif type_dict[i] == 'colour_scale': ts.legend.add_face(TextFace( font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF', '#FFFFFF'), column=leg_column) for num2 in range(11): y = num2 * 20 + 30 val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0 h = val / (max_val_dict[i] - min_val_dict[i]) * 270 s = 0.5 l = 0.5 colour = hsl_to_str(h, s, l) ts.legend.add_face(TextFace(font_gap * ' ' + str(val) + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, colour, colour), column=leg_column) leg_column += 2 elif type_dict[i] == 'colour_scale_date': ts.legend.add_face(TextFace( font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF', '#FFFFFF'), column=leg_column) for num2 in range(11): y = num2 * 20 + 30 val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0 h = val / (max_val_dict[i] - min_val_dict[i]) * 360 s = 0.5 l = 0.5 colour = hsl_to_str(h, s, l) days = str(int(val / 60 / 60 / 24)) + ' days' ts.legend.add_face(TextFace(font_gap * ' ' + days + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, colour, colour), column=leg_column) leg_column += 2 for n in t.traverse(): if n.is_leaf(): name = leaf_name_dict[n.name] if i in val_dict[name]: val = val_dict[name][i] else: val = 'empty' if type_dict[i] == 'colour': n.add_face(RectFace(width_dict[i], 20, colour_dict[i][val], colour_dict[i][val]), column=num + 1, position="aligned") elif type_dict[i] == 'colour_scale' or type_dict[ i] == 'colour_scale_date': if val == 'empty': colour = '#FFFFFF' else: h = (val - min_val_dict[i]) / ( max_val_dict[i] - min_val_dict[i]) * 360 s = 0.5 l = 0.5 colour = hsl_to_str(h, s, l) n.add_face(RectFace(width_dict[i], 20, colour, colour), column=num + 1, position="aligned") elif type_dict[i] == 'text': n.add_face(TextFace(font_gap * ' ' + val + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=num + 1, position="aligned") if not pres_abs is None: starting_col = len(column_list) + 1 subprocess.Popen('makeblastdb -out tempdb -dbtype prot -in ' + pres_abs[0], shell=True).wait() folder = pres_abs[1] len_dict = {} gene_list = [] ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=starting_col + 1) ts.legend.add_face(RectFace(20, 20, '#FFFFFF', '#FFFFFF'), column=starting_col) ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=starting_col + 1) ts.legend.add_face(RectFace(20, 20, "#5ba965", "#5ba965"), column=starting_col) ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=starting_col + 1) ts.legend.add_face(RectFace(20, 20, "#cb5b4c", "#cb5b4c"), column=starting_col) with open(pres_abs[0]) as f: for line in f: if line.startswith('>'): name = line.split()[0][1:] gene_list.append(name) len_dict[name] = 0 nameF = TextFace(font_gap * ' ' + name + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True) nameF.rotation = -90 ts.aligned_header.add_face(nameF, column=starting_col + len(gene_list) - 1) else: len_dict[name] += len(line.rstrip()) min_length = 0.9 min_ident = 90 for n in t.iter_leaves(): the_name = n.name if the_name[0] == '"' and the_name[-1] == '"': the_name = the_name[1:-1] if the_name.endswith('.ref'): the_name = the_name[:-4] if not os.path.exists(folder + '/' + the_name): for q in os.listdir(folder): if q.startswith(the_name): the_name = q if not os.path.exists(the_name + '.blast'): subprocess.Popen( 'blastx -query ' + folder + '/' + the_name + ' -db tempdb -outfmt 6 -num_threads 24 -out ' + the_name + '.blast', shell=True).wait() gotit = set() with open(the_name + '.blast') as b: for line in b: query, subject, ident, length = line.split()[:4] ident = float(ident) length = int(length) if ident >= min_ident and length >= min_length * len_dict[ subject]: gotit.add(subject) for num, i in enumerate(gene_list): if i in gotit: colour = "#5ba965" else: colour = "#cb5b4c" n.add_face(RectFace(20, 20, colour, colour), column=num + starting_col, position="aligned") # for num, i in enumerate(gene_list): # x = (starting_col + num) * 200 # svg.writeString(i, x+50, 20, 12) # y = 30 # svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#5ba965'), strtorgb('#5ba965'), lt=0) # svg.writeString('present', x + 70, y + 12, 12) # y = 50 # svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#cb5b4c'), strtorgb('#cb5b4c'), lt=0) # svg.writeString('absent', x + 70, y + 12, 12) # Set these to False if you don't want bootstrap/distance values ts.show_branch_length = label ts.show_branch_support = bootstrap ts.show_leaf_name = False for node in t.traverse(): if node.is_leaf(): node.add_face(AttrFace("name", fsize=font_size, ftype=font_type, tight_text=True, fgcolor='black'), column=0, position="aligned") ts.margin_left = 20 ts.margin_right = 100 ts.margin_top = 20 ts.margin_bottom = 20 if extend: ts.draw_guiding_lines = True ts.scale = the_scale if not circular is None: ts.mode = "c" ts.arc_start = 0 ts.arc_span = 360 if out_file is None: t.show(tree_style=ts) else: t.render(out_file, w=210, units='mm', tree_style=ts)
else: #We're at the root node new_node.dist = 0 cur_node_id = str(current_bud_row.OrgID) for idx, new_row in saved_pop_hosts[saved_pop_hosts.ParentID.eq(cur_node_id)].iterrows(): build_tree_recursive(new_row, new_node) return new_node root_node_row = saved_pop_hosts[saved_pop_hosts.ParentID == "(none)"].squeeze() print("Building Tree") build_tree_recursive(root_node_row, host_phylo) print("Drawing Tree") #Some drawing code ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 host_phylo.render("tree.png", tree_style=ts) print("Saving Tree") #Write the Newick Format Tree host_phylo.write(format=1, outfile="avida_tree.nw")
ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True t.show(tree_style=ts) # %%Circular TRee in 180 DEgrees from ete3 import Tree, TreeStyle t = Tree() t.populate(30) ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 t.show(tree_style=ts) # %%Circular TRee in 180 DEgrees MYVERSION from ete3 import Tree, TreeStyle t = Tree( "(sweetpotato,(hotpepper,(eggplant,(potato,tomato))));" ) ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -60 # 0 degrees = 3 o'clock ts.arc_span = 120 t.show(tree_style=ts) # %%Circular TRee in 180 DEgrees MYVERSION
def main(args): if args.alignment: t = PhyloTree(args.tree, alignment=args.alignment, alg_format='fasta') else: t = PhyloTree(args.tree) if args.highlight_new: runs = read_runs(args.highlight_new) t.set_outgroup('EM_079422') t.ladderize() ts = TreeStyle() ts.show_leaf_name = False ts.show_branch_support = False ts.layout_fn = layout thick_hz_line = NodeStyle() thick_hz_line["hz_line_width"] = 8 t.set_style(thick_hz_line) #t.children[0].set_style(thick_hz_line) #t.children[1].set_style(thick_hz_line) thick_vt_line = NodeStyle() thick_vt_line["vt_line_width"] = 4 t.set_style(thick_vt_line) # header if not args.hide_annotations: ts.aligned_header.add_face(MyTextFace('Sample identifier', fstyle='Bold', fsize=8, tight_text=False), column = 1) ts.aligned_header.add_face(MyTextFace('Prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 2) ts.aligned_header.add_face(MyTextFace('Sous-prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 3) ts.aligned_header.add_face(MyTextFace('Village', fstyle='Bold', fsize=8, tight_text=False), column = 4) ts.aligned_header.add_face(MyTextFace('Sample received', fstyle='Bold', fsize=8, tight_text=False), column = 5) if args.positions: positions = read_positions(args.positions) alg_header = RulerFace(positions, col_width=11, height=0, # set to 0 if dont want to use values kind="stick", hlines = [0], hlines_col = ["white"], # trick to hide hz line ) ts.aligned_header.add_face(alg_header, 6) #legend if args.legend: legend = {} for s in list(samples.values()): legend[s['prefec']] = s['prefec__colour'] for p in sorted(legend.keys()): ts.legend.add_face(CircleFace(4, legend[p]), column=0) ts.legend.add_face(MyTextFace(p, fsize=6, tight_text=False), column=1) ts.legend_position=1 if args.circular: ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 # t.show(tree_style=ts) t.render(args.output, tree_style=ts, w=1024)
GK = pd.DataFrame(kernel.fit_transform(GL['graph'].values)) GK.columns = GK.index = label # Use 1-K as measure of Distance DM_GK = DistanceMatrix(1 - GK.values) #make GK tree sktree = nj(DM_GK, result_constructor=str) GK_tree = Tree(sktree) GK_tree.name = 'AGORA network similarity tree' # style ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 360 #plot tree #GK_tree.render(file_name='/home/acabbia/Documents/Muscle_Model/GSMM-distance/figures/GK_tree_AGORA.png', tree_style=ts) #GK_tree.show(tree_style=ts) #%% #### # MAKE JACCARD TREE ### # make binary matrices (rxn, mets and gene matrices) ref_model = cobra.io.read_sbml_model(ref_model_file) reactions_matrix, metabolite_matrix, gene_matrix = make_binary_mat( model_library_folder, ref_model)
def plot(self, placement, togjson, outdir, cfg): """ plot a plcement in the tree show all pplacer placements and the LCA and HCA node as well as the inferred lineage """ from ete3 import NodeStyle, TreeStyle from ete3 import CircleFace, TextFace, RectFace logging.debug("Plotting trees now") # with no X display this needs to be set os.environ["QT_QPA_PLATFORM"] = "offscreen" info = self.loadInfo(togjson) def defaultNodeStyle(): return NodeStyle() nodeStyles = defaultdict(defaultNodeStyle) no = 0 for LCAp, HPAp in zip(placement["LCA"], placement["HPA"]): plotpath = os.path.join(outdir, f"tree_{no}.png") # make shallow copy t = self.t LCA = LCAp["node"] HPA = HPAp["node"] # define basic tree style ts = TreeStyle() # hide leave names ts.show_leaf_name = False ts.root_opening_factor = 1 # circular tree ts.mode = "c" ts.rotation = 210 ts.arc_start = 0 # 0 degrees = 3 o'clock ts.arc_span = 350 highlightsize = 80 nodesize = 10 # define styles for special nodes # at the moment hard coded, but could be accesible for the user # LCA style LCAstyle = NodeStyle() LCAstyle["fgcolor"] = "#33a02c" LCAstyle["bgcolor"] = "#b2df8a" LCAstyle["size"] = highlightsize # HPA style HPAstyle = NodeStyle() HPAstyle["fgcolor"] = "#1f78b4" HPAstyle["bgcolor"] = "#a6cee3" HPAstyle["size"] = highlightsize # default node defaultStyle = NodeStyle() defaultStyle["fgcolor"] = "gray" defaultStyle["size"] = nodesize # add legend ts.legend_position = 1 ts.legend.add_face(CircleFace(40, LCAstyle["fgcolor"]), column=1) ts.legend.add_face(TextFace(f"LCA", fsize=50), column=2) ts.legend.add_face(CircleFace(40, HPAstyle["fgcolor"]), column=1) ts.legend.add_face(TextFace(f"HPA", fsize=50), column=2) i = 1 ts.legend.add_face(TextFace(f"p = {i}", fsize=50), column=1) while i > 0: temp_face = RectFace(60, 10, fgcolor=p_to_color(i), bgcolor=p_to_color(i)) temp_face.margin_top = -4 ts.legend.add_face(temp_face, column=1) i -= 0.01 ts.legend.add_face(TextFace(f"p = {cfg['minPlacementLikelyhood']}", fsize=50), column=1) # add highlights for each placed protein for n in t.traverse(): if n.name.startswith("PTHR"): # set color based on posterior prob: x = (info[n.name]["post_prob"] - cfg["minPlacementLikelyhood"]) / ( 1 - cfg["minPlacementLikelyhood"]) # orange to purple gradient from 0 to 1 posterior propability he = p_to_color(x) nodeStyles[he]["bgcolor"] = he # define back color of locations n.set_style(nodeStyles[he]) elif n.name == LCA: n.set_style(LCAstyle) elif n.name == HPA: n.set_style(HPAstyle) else: n.set_style(defaultStyle) # plot to disk _ = t.render(plotpath, w=320, units="mm", tree_style=ts) no = no + 1
def plot_phylo(nw_tree, out_name, parenthesis_classif=True, show_support=False, radial_mode=False, root=False): from ete3 import Tree, AttrFace, TreeStyle, NodeStyle, TextFace import orthogroup2phylogeny_best_refseq_uniprot_hity ete2_tree = Tree(nw_tree, format=0) if root: R = ete2_tree.get_midpoint_outgroup() # and set it as tree outgroup ete2_tree.set_outgroup(R) ete2_tree.set_outgroup('Bacillus subtilis') ete2_tree.ladderize() if parenthesis_classif: print('parenthesis_classif!') name2classif = {} for lf in ete2_tree.iter_leaves(): print(lf) try: classif = lf.name.split('_')[-2][0:-1] print('classif', classif) #lf.name = lf.name.split('(')[0] name2classif[lf.name] = classif except: pass classif_list = list(set(name2classif.values())) classif2col = dict( zip( classif_list, orthogroup2phylogeny_best_refseq_uniprot_hity. get_spaced_colors(len(classif_list)))) for lf in ete2_tree.iter_leaves(): #try: if parenthesis_classif: try: col = classif2col[name2classif[lf.name]] except: col = 'black' else: col = 'black' #print col #lf.name = '%s|%s-%s' % (lf.name, accession2name_and_phylum[lf.name][0],accession2name_and_phylum[lf.name][1]) if radial_mode: ff = AttrFace("name", fsize=12, fstyle='italic') else: ff = AttrFace("name", fsize=12, fstyle='italic') #ff.background.color = 'red' ff.fgcolor = col lf.add_face(ff, column=0) if not show_support: print('support') for n in ete2_tree.traverse(): print(n.support) nstyle = NodeStyle() if float(n.support) < 1: nstyle["fgcolor"] = "red" nstyle["size"] = 4 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) else: for n in ete2_tree.traverse(): nstyle = NodeStyle() nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) #nameFace = AttrFace(lf.name, fsize=30, fgcolor=phylum2col[accession2name_and_phylum[lf.name][1]]) #faces.add_face_to_node(nameFace, lf, 0, position="branch-right") # #nameFace.border.width = 1 ''' except: col = 'red' print col lf.name = '%s| %s' % (lf.name, locus2organism[lf.name]) ff = AttrFace("name", fsize=12) #ff.background.color = 'red' ff.fgcolor = col lf.add_face(ff, column=0) ''' #n = TextFace(lf.name, fgcolor = "black", fsize = 12, fstyle = 'italic') #lf.add_face(n, 0) ''' for n in ete2_tree.traverse(): nstyle = NodeStyle() if n.support < 90: nstyle["fgcolor"] = "black" nstyle["size"] = 4 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) ''' ts = TreeStyle() ts.show_leaf_name = False #ts.scale=2000 #ts.scale=20000 ts.show_branch_support = show_support if radial_mode: ts.mode = "c" ts.arc_start = -90 ts.arc_span = 360 ts.tree_width = 370 ts.complete_branch_lines_when_necessary = True ete2_tree.render(out_name, tree_style=ts, w=900)
style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 5 style2["hz_line_width"] = 5 style2["vt_line_type"] = 1 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 1 for l in t.iter_leaves(): l.img_style = style2 ts = TreeStyle() ts.show_leaf_name = True #ts.rotation = 90 ts.mode = 'c' ts.arc_start = 180 ts.arc_span = 350 #t.show(tree_style=ts) #t.show() t = Tree() t.populate(8) style2 = NodeStyle() style2["fgcolor"] = "darkred" style2["shape"] = "circle" style2["vt_line_color"] = "green" style2["hz_line_color"] = "red" style2["vt_line_width"] = 5 style2["hz_line_width"] = 5 style2["vt_line_type"] = 1 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 1 for l in t.iter_leaves():
ts = TreeStyle() ts.show_leaf_name = True ts.scale = 800 for n in t.traverse(): if n.is_leaf(): if n.name in strnr: n.img_style["fgcolor"] = "red" n.img_style["size"] = 10 t.render("plots/tree_ete.pdf", w=300, tree_style=ts) t2 = Tree("data/accessory_binary_genes.fa.newick") ts2 = TreeStyle() ts2.show_leaf_name = False ts2.mode = "c" ts2.arc_start = -180 # 0 degrees = 3 o'clock ts2.arc_span = 359 for n in t2.traverse(): if n.is_leaf(): if "DTU" in n.name: print(n.name) n.img_style["bgcolor"] = "red" if "CEB" in n.name or "ERR" in n.name: print(n.name) n.img_style["bgcolor"] = "blue" t2.render("plots/tree_ete2.png", w=1200, tree_style=ts2)
def generateFigure(PF, sample, rank, input_file, output_base_name, file_type, plot_l1, scaling, output_dpi): # Make the ETE3 tree try: tree = ncbi.get_topology(PF.get_all_tax_ids(sample), rank_limit=rank) except: logging.getLogger('Tampa').critical("Input format not compatible.") exit(1) ts = TreeStyle() ts.layout_fn = PF.layout ts.mode = "c" ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = False ts.min_leaf_separation = 10 ts.arc_span = 360 #ts.legend.add_face(CircleFace(100, "#1b9e77", label="Predicted"), column=0) #ts.legend.add_face(CircleFace(100, '#d95f02', label="True"), column=1) # add white space to move the legend closer ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0) # add the legend legend_fs = 50 C1 = CircleFace(100, "#1b9e77") C1.hz_align = True ts.legend.add_face(C1, column=0) T1 = TextFace("Predicted", fsize=legend_fs) T1.hz_align = True ts.legend.add_face(T1, column=0) if len(PF.ground_truth_dict) > 0: C2 = CircleFace(100, "#d95f02") C2.hz_align = True ts.legend.add_face(C2, column=1) T2 = TextFace("True", fsize=legend_fs) T2.hz_align = True ts.legend.add_face(T2, column=1) T3 = TextFace(f"Tool: {os.path.basename(input_file).split('.')[0]}", fsize=legend_fs) T3.hz_align = True ts.legend.add_face(T3, column=0) ts.allow_face_overlap = False # this lets me mess a bit with font size and face size without the interaction of the two ts.min_leaf_separation = 10 tree_output_file = f"{output_base_name}_tree_{rank}_{sample}.{file_type}" tree.render(tree_output_file, h=5.2, w=5, tree_style=ts, units="in", dpi=output_dpi) if plot_l1: # if you asked for L1 too, then plot that true_abundance_at_rank = [] predicted_abundance_at_rank = [] for node in tree.get_leaves(): if node.rank == rank: tax_id = str(node.taxid) if tax_id in PF.ground_truth_tax_id_to_percentage: true_abundance_at_rank.append(PF.ground_truth_tax_id_to_percentage[str(node.taxid)] / 100.) else: true_abundance_at_rank.append(0) if tax_id in PF.profile_tax_id_to_percentage: predicted_abundance_at_rank.append(PF.profile_tax_id_to_percentage[str(node.taxid)] / 100.) else: predicted_abundance_at_rank.append(0) data = np.zeros((len(true_abundance_at_rank), 2)) data[:, 0] = np.array(true_abundance_at_rank) data[:, 1] = np.array(predicted_abundance_at_rank) df = pd.DataFrame(data, columns=['True', 'Predicted']) # g = seaborn.FacetGrid(df, height=6) ax = seaborn.scatterplot(x='True', y='Predicted', data=df, color='b', s=55) eps = 1 ax.set_aspect('equal') max_val = np.max(data) + eps ax.set_xlim(-.5, max_val) ax.set_ylim(-.5, max_val) ax.set_xbound(-.5, max_val) ax.set_ybound(-.5, max_val) #plt.figure(figsize=(6,6)) plt.plot(np.linspace(0, max_val, 100), np.linspace(0, max_val, 100), color='k') for (x, y) in zip(true_abundance_at_rank, predicted_abundance_at_rank): if x > y: ax.vlines(x, y, x, colors='r') if y > x: ax.vlines(x, x, y, colors='r') plt.title(f"Tool: {os.path.basename(input_file).split('.')[0]}") plt.tight_layout() l1_out_file = f"{output_base_name}_L1_{rank}.{file_type}" plt.savefig(l1_out_file, dpi=output_dpi)