def _get_motif_tree(tree, data, circle=True, vmin=None, vmax=None): try: from ete3 import Tree, NodeStyle, TreeStyle except ImportError: print("Please install ete3 to use this functionality") sys.exit(1) t = Tree(tree) # Determine cutoff for color scale if not(vmin and vmax): for i in range(90, 101): minmax = np.percentile(data.values, i) if minmax > 0: break if not vmin: vmin = -minmax if not vmax: vmax = minmax norm = Normalize(vmin=vmin, vmax=vmax, clip=True) mapper = cm.ScalarMappable(norm=norm, cmap="RdBu_r") m = 25 / data.values.max() for node in t.traverse("levelorder"): val = data[[l.name for l in node.get_leaves()]].values.mean() style = NodeStyle() style["size"] = 0 style["hz_line_color"] = to_hex(mapper.to_rgba(val)) style["vt_line_color"] = to_hex(mapper.to_rgba(val)) v = max(np.abs(m * val), 5) style["vt_line_width"] = v style["hz_line_width"] = v node.set_style(style) ts = TreeStyle() ts.layout_fn = _tree_layout ts.show_leaf_name= False ts.show_scale = False ts.branch_vertical_margin = 10 if circle: ts.mode = "c" ts.arc_start = 180 # 0 degrees = 3 o'clock ts.arc_span = 180 return t, ts
def main(args): if args.alignment: t = PhyloTree(args.tree, alignment=args.alignment, alg_format='fasta') else: t = PhyloTree(args.tree) if args.highlight_new: runs = read_runs(args.highlight_new) t.set_outgroup('EM_079422') t.ladderize() ts = TreeStyle() ts.show_leaf_name = False ts.show_branch_support = False ts.layout_fn = layout thick_hz_line = NodeStyle() thick_hz_line["hz_line_width"] = 8 t.set_style(thick_hz_line) #t.children[0].set_style(thick_hz_line) #t.children[1].set_style(thick_hz_line) thick_vt_line = NodeStyle() thick_vt_line["vt_line_width"] = 4 t.set_style(thick_vt_line) # header if not args.hide_annotations: ts.aligned_header.add_face(MyTextFace('Sample identifier', fstyle='Bold', fsize=8, tight_text=False), column = 1) ts.aligned_header.add_face(MyTextFace('Prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 2) ts.aligned_header.add_face(MyTextFace('Sous-prefecture', fstyle='Bold', fsize=8, tight_text=False), column = 3) ts.aligned_header.add_face(MyTextFace('Village', fstyle='Bold', fsize=8, tight_text=False), column = 4) ts.aligned_header.add_face(MyTextFace('Sample received', fstyle='Bold', fsize=8, tight_text=False), column = 5) if args.positions: positions = read_positions(args.positions) alg_header = RulerFace(positions, col_width=11, height=0, # set to 0 if dont want to use values kind="stick", hlines = [0], hlines_col = ["white"], # trick to hide hz line ) ts.aligned_header.add_face(alg_header, 6) #legend if args.legend: legend = {} for s in samples.values(): legend[s['prefec']] = s['prefec__colour'] for p in sorted(legend.keys()): ts.legend.add_face(CircleFace(4, legend[p]), column=0) ts.legend.add_face(MyTextFace(p, fsize=6, tight_text=False), column=1) ts.legend_position=1 if args.circular: ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 # t.show(tree_style=ts) t.render(args.output, tree_style=ts, w=1024)
else: #We're at the root node new_node.dist = 0 cur_node_id = str(current_bud_row.OrgID) for idx, new_row in saved_pop_hosts[saved_pop_hosts.ParentID.eq(cur_node_id)].iterrows(): build_tree_recursive(new_row, new_node) return new_node root_node_row = saved_pop_hosts[saved_pop_hosts.ParentID == "(none)"].squeeze() print("Building Tree") build_tree_recursive(root_node_row, host_phylo) print("Drawing Tree") #Some drawing code ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 host_phylo.render("tree.png", tree_style=ts) print("Saving Tree") #Write the Newick Format Tree host_phylo.write(format=1, outfile="avida_tree.nw")
kernel = gk.WeisfeilerLehman(base_kernel = gk.VertexHistogram, normalize= True) GK = pd.DataFrame(kernel.fit_transform(GL['graph'].values)) GK.columns = GK.index = label # Use 1-K as measure of Distance DM_GK = DistanceMatrix(1-GK.values) #make GK tree sktree = nj(DM_GK, result_constructor=str) GK_tree = Tree(sktree) GK_tree.name = 'AGORA network similarity tree' # style ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.arc_start = -180 ts.arc_span = 360 #plot tree GK_tree.render(file_name='/home/acabbia/Documents/Muscle_Model/GSMM-distance/figures/GK_tree_AGORA.png', tree_style=ts) GK_tree.show(tree_style=ts) #%% #### # MAKE JACCARD TREE ### # make binary matrices (rxn, mets and gene matrices) ref_model = cobra.io.read_sbml_model(ref_model_file) reactions_matrix, metabolite_matrix, gene_matrix = make_binary_mat(model_library_folder, ref_model)
def plot_tree(self, output_filepath, time_series_info=None, tree_style='horizontal_right', leaf_size_map_to=None, show_leaf_names=False, color_branches_by=None, line_width=1, start_color='red', end_color='purple', ladderize=True): """ This method plots the phylogenetic tree as a dendogram. Uses the ete3 package to do this. output_filepath - This is the path to the output image file. time_series_info - This tells where the information of time-point can be found (if at all) in each of the element IDs. Acceptable values are: None - (default) This means there is no time information in the tree 'start_of_id' - This means the time info is at the very beginning of each element ID, and is separated by a '_'. For example: '45.3_blahblahblah' would have a time point of 45.3. tree_style - This give the style of tree plotting, i.e. circular, horizontal, etc. Acceptable values are: 'half_circle' - Tree is plotted as a half circle, where branches are radiating outward and upward. 'horizontal_right' - Tree is plotted as a normal dendogram where branching occurs from left to right. leaf_size_map_to - If defined (default, None), this will inform what attribute of the node the leaf size maps to. Acceptable values are: None - No leaf size info. All leaves the same size 'count' - leaf size proportional to the count attribute 'freq' - leaf size proportional to the freq attribute show_leaf_names - If True (default, False), then will plot the names of each of the leafs of the tree. color_branches_by - If defined (default, None) this will give how the leaf branches will be colored, if at all. Acceptable values are: None - Default. No branch coloring 'time_point' - This means that the branches will be colored according to the 'time_point' attribute of each leaf node. There must be a 'time_point' attribute in the leaf nodes for this to work. So, one should run the 'add_time_info' method before doing this. Alternatively, one can define the 'time_series_info' parameter for this method, and this will be taken care of. any other string - This will give the name of any other attribute of the leaf nodes for which to map the value to the leaf branch color. then this will instruct to color the different time-points with different colors. This is ignored if 'time_series_info' is False line_width - controls the line width. Default, 1 start_color - This gives the staring color. This should be a string the spells out the name of a color. Most simple colors should be fine. Uses the 'Color' module from 'colour'. end_color - This gives the ending color. This should be a string the spells out the name of a color. Most simple colors should be fine. Uses the 'Color' module from 'colour'. The colors used for each unique attribute that gives the colors will span the spectrom from 'start_color' to 'end_color'. ladderize - If True (default), this will ladderize the tree. That is it will sort the partitions of the internal nodes based upon number of decendant nodes in the child nodes. """ if not self.time_points and time_series_info: self.add_time_info(time_series_info=time_series_info) #set time-point colors, if desired if color_branches_by: start_color = Color(start_color) end_color = Color(end_color) if color_branches_by == 'time_point' and time_series_info: colors = list( start_color.range_to(end_color, len(self.time_points))) hex_colors = [i.hex_l for i in colors] tpoint_to_color_dic = {} for index, i in enumerate(sorted(self.time_points)): tpoint_to_color_dic[i] = hex_colors[index] else: #check if attribute already exists in the tree data. if not, add it for leaf in self.tree: if not color_branches_by in leaf.features: self.add_attribute_to_leaves( attribute_name=color_branches_by) break colors = list( start_color.range_to( end_color, len(self.extra_leaf_features[color_branches_by]))) hex_colors = [i.hex_l for i in colors] attribute_to_color_dic = {} for index, i in enumerate( sorted(self.extra_leaf_features[color_branches_by])): attribute_to_color_dic[i] = hex_colors[index] #set node styles most_dist_leaf, size_to_tree_size_scaler = self.tree.get_farthest_leaf( ) #need to scale sizes by the length (divergence) of the tree for node in self.tree.traverse(): node_style = NodeStyle() #do stuff to leaf nodes if node.is_leaf(): if color_branches_by: if color_branches_by == 'time_point': color = tpoint_to_color_dic[node.time_point] else: color = attribute_to_color_dic[getattr( node, color_branches_by)] else: color = Color('black') color = color.hex_l node_style['hz_line_color'] = color node_style['vt_line_color'] = color if leaf_size_map_to: if leaf_size_map_to == 'count': radius = size_to_tree_size_scaler * 100 * math.log( node.count) elif leaf_size_map_to == 'freq': radius = size_to_tree_size_scaler * 100 * node.freq c = CircleFace(radius=radius, color=color, style='circle') c.opacity = 0.3 node.add_face(c, 0, position='branch-right') node_style['size'] = 0 node_style[ 'hz_line_width'] = size_to_tree_size_scaler * 10 * line_width node_style[ 'vt_line_width'] = size_to_tree_size_scaler * 10 * line_width node.set_style(node_style) #set tree style tree_steeze = TreeStyle() if tree_style == 'half_circle': tree_steeze.mode = 'c' tree_steeze.arc_start = -180 tree_steeze.arc_span = 180 elif tree_style == 'horizontal_right': pass if show_leaf_names: tree_steeze.show_leaf_name = True else: tree_steeze.show_leaf_name = False self.tree.ladderize() self.tree.render(output_filepath, w=700, h=700, units='mm', tree_style=tree_steeze) return
def main(args): if args.alignment: t = PhyloTree(args.tree, alignment=args.alignment, alg_format='fasta') else: t = PhyloTree(args.tree) if args.highlight_new: runs = read_runs(args.highlight_new) t.set_outgroup('EM_079422') t.ladderize() ts = TreeStyle() ts.show_leaf_name = False ts.show_branch_support = False ts.layout_fn = layout thick_hz_line = NodeStyle() thick_hz_line["hz_line_width"] = 8 t.set_style(thick_hz_line) #t.children[0].set_style(thick_hz_line) #t.children[1].set_style(thick_hz_line) thick_vt_line = NodeStyle() thick_vt_line["vt_line_width"] = 4 t.set_style(thick_vt_line) # header if not args.hide_annotations: ts.aligned_header.add_face(MyTextFace('Sample identifier', fstyle='Bold', fsize=8, tight_text=False), column=1) ts.aligned_header.add_face(MyTextFace('Prefecture', fstyle='Bold', fsize=8, tight_text=False), column=2) ts.aligned_header.add_face(MyTextFace('Sous-prefecture', fstyle='Bold', fsize=8, tight_text=False), column=3) ts.aligned_header.add_face(MyTextFace('Village', fstyle='Bold', fsize=8, tight_text=False), column=4) ts.aligned_header.add_face(MyTextFace('Sample received', fstyle='Bold', fsize=8, tight_text=False), column=5) if args.positions: positions = read_positions(args.positions) alg_header = RulerFace( positions, col_width=11, height=0, # set to 0 if dont want to use values kind="stick", hlines=[0], hlines_col=["white"], # trick to hide hz line ) ts.aligned_header.add_face(alg_header, 6) #legend if args.legend: legend = {} for s in samples.values(): legend[s['prefec']] = s['prefec__colour'] for p in sorted(legend.keys()): ts.legend.add_face(CircleFace(4, legend[p]), column=0) ts.legend.add_face(MyTextFace(p, fsize=6, tight_text=False), column=1) ts.legend_position = 1 if args.circular: ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 # t.show(tree_style=ts) t.render(args.output, tree_style=ts, w=1024)
def deepbiome_draw_phylogenetic_tree( log, network_info, path_info, num_classes, file_name="%%inline", img_w=500, branch_vertical_margin=20, arc_start=0, arc_span=360, node_name_on=True, name_fsize=10, tree_weight_on=True, tree_weight=None, tree_level_list=['Genus', 'Family', 'Order', 'Class', 'Phylum'], weight_opacity=0.4, weight_max_radios=10, phylum_background_color_on=True, phylum_color=[], phylum_color_legend=False, show_covariates=True, verbose=True): """ Draw phylogenetic tree Parameters ---------- log (logging instance) : python logging instance for logging network_info (dictionary) : python dictionary with network_information path_info (dictionary): python dictionary with path_information num_classes (int): number of classes for the network. 0 for regression, 1 for binary classificatin. file_name (str): name of the figure for save. - "*.png", "*.jpg" - "%%inline" for notebook inline output. default="%%inline" img_w (int): image width (pt) default=500 branch_vertical_margin (int): vertical margin for branch default=20 arc_start (int): angle that arc start default=0 arc_span (int): total amount of angle for the arc span default=360 node_name_on (boolean): show the name of the last leaf node if True default=False name_fsize (int): font size for the name of the last leaf node default=10 tree_weight_on (boolean): show the amount and the direction of the weight for each edge in the tree by circle size and color. default=True tree_weight (ndarray): reference tree weights default=None tree_level_list (list): name of each level of the given reference tree weights default=['Genus', 'Family', 'Order', 'Class', 'Phylum'] weight_opacity (float): opacity for weight circle default= 0.4 weight_max_radios (int): maximum radios for weight circle default= 10 phylum_background_color_on (boolean): show the background color for each phylum based on `phylumn_color`. default= True phylum_color (list): specify the list of background colors for phylum level. If `phylumn_color` is empty, it will arbitrarily assign the color for each phylum. default= [] phylum_color_legend (boolean): show the legend for the background colors for phylum level default= False show_covariates (boolean): show the effect of the covariates default= True verbose (boolean): show the log if True default=True Returns ------- Examples -------- Draw phylogenetic tree deepbiome_draw_phylogenetic_tree(log, network_info, path_info, num_classes, file_name = "%%inline") """ os.environ[ 'QT_QPA_PLATFORM'] = 'offscreen' # for tree figure (https://github.com/etetoolkit/ete/issues/381) reader_class = getattr(readers, network_info['model_info']['reader_class'].strip()) reader = reader_class(log, path_info, verbose=verbose) data_path = path_info['data_info']['data_path'] try: count_path = path_info['data_info']['count_path'] x_list = np.array( pd.read_csv(path_info['data_info']['count_list_path'], header=None).iloc[:, 0]) x_path = np.array([ '%s/%s' % (count_path, x_list[fold]) for fold in range(x_list.shape[0]) if '.csv' in x_list[fold] ]) except: x_path = np.array([ '%s/%s' % (data_path, path_info['data_info']['x_path']) for fold in range(1) ]) reader.read_dataset(x_path[0], None, 0) network_class = getattr( build_network, network_info['model_info']['network_class'].strip()) network = network_class(network_info, path_info, log, fold=0, num_classes=num_classes, tree_level_list=tree_level_list, is_covariates=reader.is_covariates, covariate_names=reader.covariate_names, verbose=False) if len(phylum_color) == 0: colors = mcolors.CSS4_COLORS colors_name = list(colors.keys()) if reader.is_covariates and show_covariates: phylum_color = np.random.choice( colors_name, network.phylogenetic_tree_info['Phylum_with_covariates']. unique().shape[0]) else: phylum_color = np.random.choice( colors_name, network.phylogenetic_tree_info['Phylum'].unique().shape[0]) basic_st = NodeStyle() basic_st['size'] = weight_max_radios * 0.5 basic_st['shape'] = 'circle' basic_st['fgcolor'] = 'black' t = Tree() root_st = NodeStyle() root_st["size"] = 0 t.set_style(root_st) tree_node_dict = {} tree_node_dict['root'] = t upper_class = 'root' lower_class = tree_level_list[-1] lower_layer_names = tree_weight[-1].columns.to_list() layer_tree_node_dict = {} phylum_color_dict = {} for j, val in enumerate(lower_layer_names): t.add_child(name=val) leaf_t = t.get_leaves_by_name(name=val)[0] leaf_t.set_style(basic_st) layer_tree_node_dict[val] = leaf_t if lower_class == 'Phylum' and phylum_background_color_on: phylum_st = copy.deepcopy(basic_st) phylum_st["bgcolor"] = phylum_color[j] phylum_color_dict[val] = phylum_color[j] leaf_t.set_style(phylum_st) tree_node_dict[lower_class] = layer_tree_node_dict upper_class = lower_class upper_layer_names = lower_layer_names for i in range(len(tree_level_list) - 1): lower_class = tree_level_list[-2 - i] if upper_class == 'Disease' and show_covariates == False: lower_layer_names = network.phylogenetic_tree_info[ lower_class].unique() else: lower_layer_names = tree_weight[-i - 1].index.to_list() layer_tree_node_dict = {} for j, val in enumerate(upper_layer_names): parient_t = tree_node_dict[upper_class][val] if upper_class == 'Disease': child_class = lower_layer_names else: child_class = network.phylogenetic_tree_info[lower_class][ network.phylogenetic_tree_info[upper_class] == val].unique() for k, child_val in enumerate(child_class): parient_t.add_child(name=child_val) leaf_t = parient_t.get_leaves_by_name(name=child_val)[0] if lower_class == 'Phylum' and phylum_background_color_on: phylum_st = copy.deepcopy(basic_st) phylum_st["bgcolor"] = phylum_color[k] phylum_color_dict[child_val] = phylum_color[k] leaf_t.set_style(phylum_st) else: leaf_t.set_style(basic_st) if tree_weight_on: edge_weights = np.array(tree_weight[-1 - i]) edge_weights *= (weight_max_radios / np.max(edge_weights)) if upper_class == 'Disease': upper_num = 0 else: upper_num = network.phylogenetic_tree_dict[ upper_class][val] if upper_class == 'Disease' and reader.is_covariates == True and show_covariates: lower_num = network.phylogenetic_tree_dict[ '%s_with_covariates' % lower_class][child_val] else: lower_num = network.phylogenetic_tree_dict[ lower_class][child_val] leaf_t.add_features(weight=edge_weights[lower_num, upper_num]) layer_tree_node_dict[child_val] = leaf_t tree_node_dict[lower_class] = layer_tree_node_dict upper_class = lower_class upper_layer_names = lower_layer_names def layout(node): if "weight" in node.features: # Creates a sphere face whose size is proportional to node's # feature "weight" color = {1: "RoyalBlue", 0: "Red"}[int(node.weight > 0)] C = CircleFace(radius=node.weight, color=color, style="circle") # Let's make the sphere transparent C.opacity = weight_opacity # And place as a float face over the tree faces.add_face_to_node(C, node, 0, position="float") if node_name_on & node.is_leaf(): # Add node name to laef nodes N = AttrFace("name", fsize=name_fsize, fgcolor="black") faces.add_face_to_node(N, node, 0) ts = TreeStyle() ts.show_leaf_name = False ts.mode = "c" ts.arc_start = arc_start ts.arc_span = arc_span ts.layout_fn = layout ts.branch_vertical_margin = branch_vertical_margin ts.show_scale = False if phylum_color_legend: for phylum_name in np.sort(list(phylum_color_dict.keys())): color_name = phylum_color_dict[phylum_name] ts.legend.add_face(CircleFace(weight_max_radios * 1, color_name), column=0) ts.legend.add_face(TextFace(" %s" % phylum_name, fsize=name_fsize), column=1) return t.render(file_name=file_name, w=img_w, tree_style=ts) # ######################################################################################################################### # if __name__ == "__main__": # argdict = argv_parse(sys.argv) # try: gpu_memory_fraction = float(argdict['gpu_memory_fraction'][0]) # except: gpu_memory_fraction = None # try: max_queue_size=int(argdict['max_queue_size'][0]) # except: max_queue_size=10 # try: workers=int(argdict['workers'][0]) # except: workers=1 # try: use_multiprocessing=argdict['use_multiprocessing'][0]=='True' # except: use_multiprocessing=False # ### Logger ############################################################################################ # logger = logging_daily.logging_daily(argdict['log_info'][0]) # logger.reset_logging() # log = logger.get_logging() # log.setLevel(logging_daily.logging.INFO) # log.info('Argument input') # for argname, arg in argdict.items(): # log.info(' {}:{}'.format(argname,arg)) # ### Configuration ##################################################################################### # config_data = configuration.Configurator(argdict['path_info'][0], log) # config_data.set_config_map(config_data.get_section_map()) # config_data.print_config_map() # config_network = configuration.Configurator(argdict['network_info'][0], log) # config_network.set_config_map(config_network.get_section_map()) # config_network.print_config_map() # path_info = config_data.get_config_map() # network_info = config_network.get_config_map() # test_evaluation, train_evaluation, network = deepbiome_train(log, network_info, path_info, number_of_fold=20)
def plot_phylo(nw_tree, out_name, parenthesis_classif=True, show_support=False, radial_mode=False, root=False): from ete3 import Tree, AttrFace, TreeStyle, NodeStyle, TextFace import orthogroup2phylogeny_best_refseq_uniprot_hity ete2_tree = Tree(nw_tree, format=0) print(root) if root: R = ete2_tree.get_midpoint_outgroup() # and set it as tree outgroup ete2_tree.set_outgroup(R) #ete2_tree.set_outgroup('Bacillus subtilis') ete2_tree.ladderize() if parenthesis_classif: #print ('parenthesis_classif!') name2classif = {} for lf in ete2_tree.iter_leaves(): #print (lf) try: classif = lf.name.split('_')[-2][0:-1] #print ('classif', classif) #lf.name = lf.name.split('(')[0] name2classif[lf.name] = classif except: pass classif_list = list(set(name2classif.values())) classif2col = dict( zip( classif_list, orthogroup2phylogeny_best_refseq_uniprot_hity. get_spaced_colors(len(classif_list)))) for lf in ete2_tree.iter_leaves(): #try: if parenthesis_classif: try: col = classif2col[name2classif[lf.name]] except: col = 'black' else: col = 'black' #print col #lf.name = '%s|%s-%s' % (lf.name, accession2name_and_phylum[lf.name][0],accession2name_and_phylum[lf.name][1]) if radial_mode: ff = AttrFace("name", fsize=12, fstyle='italic') else: ff = AttrFace("name", fsize=12, fstyle='italic') #ff.background.color = 'red' ff.fgcolor = col lf.add_face(ff, column=0) if not show_support: #print('support') for n in ete2_tree.traverse(): #print (n.support) nstyle = NodeStyle() if float(n.support) < 1: nstyle["fgcolor"] = "red" nstyle["size"] = 4 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) else: for n in ete2_tree.traverse(): nstyle = NodeStyle() nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) #nameFace = AttrFace(lf.name, fsize=30, fgcolor=phylum2col[accession2name_and_phylum[lf.name][1]]) #faces.add_face_to_node(nameFace, lf, 0, position="branch-right") # #nameFace.border.width = 1 ''' except: col = 'red' print col lf.name = '%s| %s' % (lf.name, locus2organism[lf.name]) ff = AttrFace("name", fsize=12) #ff.background.color = 'red' ff.fgcolor = col lf.add_face(ff, column=0) ''' #n = TextFace(lf.name, fgcolor = "black", fsize = 12, fstyle = 'italic') #lf.add_face(n, 0) ''' for n in ete2_tree.traverse(): nstyle = NodeStyle() if n.support < 90: nstyle["fgcolor"] = "black" nstyle["size"] = 4 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) ''' ts = TreeStyle() ts.show_leaf_name = False #ts.scale=2000 #ts.scale=20000 ts.show_branch_support = show_support if radial_mode: ts.mode = "c" ts.arc_start = -90 ts.arc_span = 360 ts.tree_width = 370 ts.complete_branch_lines_when_necessary = True ete2_tree.render(out_name, tree_style=ts, w=900)