def plot_uncorrected_phylogeny(tree, species, latin_names, species_history): """ Generates a PDF figure of the input tree with same length for all branches. :param tree: input tree from configuration file :param species: the current focal species :param latin_names: a dictionary-like data structure that associates each informal species name to its latin name :param species_history: the list of ancestor nodes of the focal species, including the focal species and going up to the root. """ label_leaves_with_latin_names(tree, latin_names) node_and_branch_style(tree) ts = TreeStyle() # ts.title.add_face(TextFace(" Input phylogenetic tree", ftype="Arial", fsize=18), column=0) ts.orientation = 1 ts.branch_vertical_margin = 14 ts.show_leaf_name = False # because there is a Face showing it ts.show_branch_length = False ts.margin_left = 25 ts.margin_right = 25 ts.margin_top = 25 ts.margin_bottom = 25 ts.scale = 200 ts.show_scale = False tree.render(os.path.join("rate_adjustment", f"{species}", f"{_TREE.format(species)}"), w=4.5, units="in", tree_style=ts)
def get_tree_style(): ts = TreeStyle() # ts.mode = 'c' ts.margin_top = 10 ts.margin_bottom = 10 ts.margin_left = 10 ts.margin_right = 10 ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = False ts.show_scale = False title = TextFace(" Tax Assignment Tree", fsize=10) title.hz_align = 2 title.vt_align = 2 ts.title.add_face(TextFace(" "), column=0) ts.title.add_face(TextFace(" "), column=0) ts.title.add_face(title, column=0) return ts
def get_default_tree_style(color_dict): ts = TreeStyle() ts.mode = "c" # ts.layout_fn = layout ts.margin_top = 50 ts.margin_bottom = 0 ts.margin_left = 50 ts.margin_right = 50 ts.show_scale = False ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = False for p, c in color_dict.iteritems(): ts.legend.add_face(TextFace(" ", fsize=30), column=0) ts.legend.add_face(CircleFace(10, c), column=1) ts.legend.add_face(TextFace(" %s" % p, fsize=30), column=2) legend_margin_line = 5 while legend_margin_line: ts.legend.add_face(TextFace(" "), column=0) ts.legend.add_face(TextFace(" "), column=1) ts.legend.add_face(TextFace(" "), column=2) legend_margin_line -= 1 ts.legend_position = 3 return ts
def get_default_tree_style(color_dict): ts = TreeStyle() ts.mode = 'c' # ts.layout_fn = layout ts.margin_top = 50 ts.margin_bottom = 0 ts.margin_left = 50 ts.margin_right = 50 ts.show_scale = False ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = False for p, c in color_dict.iteritems(): ts.legend.add_face(TextFace(" ", fsize=30), column=0) ts.legend.add_face(CircleFace(10, c), column=1) ts.legend.add_face(TextFace(" %s" % p, fsize=30), column=2) legend_margin_line = 5 while legend_margin_line: ts.legend.add_face(TextFace(" "), column=0) ts.legend.add_face(TextFace(" "), column=1) ts.legend.add_face(TextFace(" "), column=2) legend_margin_line -= 1 ts.legend_position = 3 return ts
def plot_tree(tree, tree_title, tree_output): # set tree parameters ts = TreeStyle() ts.mode = "r" # tree model: 'r' for rectangular, 'c' for circular ts.show_leaf_name = 0 # set tree title text parameters ts.title.add_face(TextFace(tree_title, fsize=8, fgcolor='black', ftype='Arial', tight_text=False), column=0) # tree title text setting # set layout parameters ts.rotation = 0 # from 0 to 360 ts.show_scale = False ts.margin_top = 10 # top tree image margin ts.margin_bottom = 10 # bottom tree image margin ts.margin_left = 10 # left tree image margin ts.margin_right = 10 # right tree image margin ts.show_border = False # set tree image border ts.branch_vertical_margin = 3 # 3 pixels between adjancent branches # set tree node style for each_node in tree.traverse(): # leaf node parameters if each_node.is_leaf(): ns = NodeStyle() ns["shape"] = "circle" # dot shape: circle, square or sphere ns["size"] = 0 # dot size ns['hz_line_width'] = 0.5 # branch line width ns['vt_line_width'] = 0.5 # branch line width ns['hz_line_type'] = 0 # branch line type: 0 for solid, 1 for dashed, 2 for dotted ns['vt_line_type'] = 0 # branch line type ns["fgcolor"] = "blue" # the dot setting each_node.add_face(TextFace(each_node.name, fsize=5, fgcolor='black', tight_text=False, bold=False), column=0, position='branch-right' ) # leaf node the node name text setting each_node.set_style(ns) # non-leaf node parameters else: nlns = NodeStyle() nlns["size"] = 0 # dot size #nlns["rotation"] = 45 each_node.add_face( TextFace(each_node.name, fsize=3, fgcolor='black', tight_text=False, bold=False), column=5, position='branch-top') # non-leaf node name text setting) each_node.set_style(nlns) tree.render(tree_output, w=900, units="px", tree_style=ts) # set figures size
def heatmap_view(tree, orthologous_groups, save_dir): """Generates a heatmap of regulation states in all species.""" light_tree = copy.deepcopy(tree) # Tree copy for the light heatmap # Heat map settings rect_face_fgcolor = 'black' locus_tag_len = max( len(gene.locus_tag) + 5 for ortho_grp in orthologous_groups for gene in ortho_grp.genes) rect_face_width = locus_tag_len * 8 light_rect_face_width = 20 rect_face_height = 20 rotation = 90 # Sort orthologous groups by the number of regulated genes in each group orthologous_groups = filter_and_sort_orthologous_grps(orthologous_groups) # For each species and its gene in each orthologous group, draw a rectangle for node, light_node in zip(tree.get_leaves(), light_tree.get_leaves()): for i, orthologous_grp in enumerate(orthologous_groups, start=1): #get all orthologs in group matching_genes = [g for g in orthologous_grp.genes \ if g.genome.strain_name == node.name] #if there is ortholog if len(matching_genes) > 0: # Get the first ortholog from the genome in the group #this is the one with higher probability of regulation. #so this probability will be displayed for the group gene = matching_genes[0] p_regulation = gene.operon.regulation_probability p_notregulation = 1.0 - p_regulation p_absence = 0 # No ortholog from this genome else: gene = None p_regulation = 0 p_notregulation = 0 p_absence = 1 # Color of the rectangle is based on probabilities rect_face_bgcolor = rgb2hex(p_notregulation, p_regulation, p_absence) rect_face_text = ('%s [%d]' % (gene.locus_tag, gene.operon.operon_id) if gene else '') rect_face_label = { 'text': rect_face_text, 'font': 'Courier', 'fontsize': 8, 'color': 'black' } # Create the rectangle rect_face = RectFace(rect_face_width, rect_face_height, rect_face_fgcolor, rect_face_bgcolor, label=rect_face_label) light_rect_face = RectFace(light_rect_face_width, rect_face_height, rect_face_fgcolor, rect_face_bgcolor, label='') rect_face.rotation = -rotation light_rect_face.rotation = -rotation # Add the rectangle to the corresponding column node.add_face(rect_face, column=i, position='aligned') light_node.add_face(light_rect_face, column=i, position='aligned') ts = TreeStyle() # Add orthologous group descriptions descriptions = ['-'.join([grp.description, \ str([item['ID'] for item in grp.COGs]) if len(grp.COGs)>0 else '', \ str([item['ID'] for item in grp.NOGs]) if len(grp.NOGs)>0 else '', \ str([item['ID'] for item in grp.PFAMs])] if len(grp.PFAMs)>0 else '')\ for grp in orthologous_groups] max_description_len = max(map(len, descriptions)) descriptions = [ '[%d]' % i + description + ' ' * (max_description_len - len(description)) for i, description in enumerate(descriptions, start=1) ] for i, description in enumerate(descriptions, start=1): text_face = TextFace(description, ftype='Courier') text_face.hz_align = 1 text_face.vt_align = 1 text_face.rotation = -rotation ts.aligned_header.add_face(text_face, column=i) # Rotate the generated heatmap. ts.margin_left = 10 ts.margin_top = 20 ts.rotation = rotation ts.show_scale = False # For some reason, it can't render to PDF in color tree.render(os.path.join(save_dir, 'heatmap.svg'), tree_style=ts) light_tree.render(os.path.join(save_dir, 'heatmap_light.svg'), tree_style=ts)
def format_tree(tree, alignment, al_len_dict, edpos, codontable={}, colors=None, codon_col={}, text="C-to-U RNA editing", ic_contents=[]): """Format the rendering of tree data for alignment""" t = tree.copy() # alignment is ordered dict # flip alignment dict from gene ==> species ==> seq # to species ==> gene ==> seq specSeq = ddict(str) edposSeq = ddict(list) cur_len = 0 limits = [] for gname, specdict in alignment.items(): for node in t: # fill missing with gap specSeq[node.name] += specdict.get(node.name, al_len_dict[gname] * '-') edposSeq[node.name] += [ x + cur_len for x in edpos[gname].get(node.name, []) ] # if node.name == 'Y08501': # print(gname) # print( edposSeq[node.name]) cur_len += al_len_dict.get(gname, 0) limits.append((gname, cur_len)) for node in t: node.add_feature("sequence", specSeq[node.name]) node.add_feature('edlist', edposSeq[node.name]) ts = TreeStyle() ts.branch_vertical_margin = 15 ts.scale = 15 ts.allow_face_overlap = False ts.show_scale = False ts.show_leaf_name = False ns = NodeStyle() ns['shape'] = 'square' ns['fgcolor'] = 'black' ns['size'] = 0 def layout(node): node.img_style = ns if node.is_leaf(): faces.add_face_to_node(AttrFace( 'fullname', fsize=14, fgcolor=(MARKED_NODE_COLOR if (node.name in colors or node.fullname in colors) else 'black')), node, 0, position="aligned") if hasattr(node, "sequence") and node.sequence: seqface = SequenceFace(node.sequence, "codon", fsize=13, codontable=codontable, col_w=RES_COL_WIDTH, bg_colors=codon_col, black_out=node.edlist) faces.add_face_to_node(seqface, node, 1, position="aligned") ts.layout_fn = layout # ts.title.add_face(TextFace('(%s) - SP score : %.0f | IC = %.2f' % (codon, sum(SP_score), sum(ic_contents)), # fsize=14, fgcolor='red'), 0) # ts.aligned_header.add_face( # faces.RectFace(14, 14, 'white', 'white'), 1) # ts.aligned_foot.add_face( # faces.RectFace(14, 14, 'white', 'white'), 1) # for (cod, col) in codon_col.items(): # ts.legend.add_face(faces.RectFace(50, 25, col, col), column=0) # ts.legend.add_face(TextFace(" %s " % cod, fsize=8), column=1) ts.legend.add_face(TextFace(text, fsize=14), column=1) ts.legend_position = 1 ind = 1 prev_gend = 0 for (gname, gend) in limits: ts.aligned_foot.add_face( List90Face(list(range(0, gend - prev_gend, 3)), fsize=13, ftype='Monospace', col_w=RES_COL_WIDTH * 3), ind) ts.aligned_foot.add_face( faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 13, '#BBBBBB', '#EEEEEE'), ind) ts.aligned_foot.add_face(TextFace(gname, fsize=13), ind) ts.aligned_foot.add_face( faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 5, 'white', 'white'), ind) prev_gend += gend ind += 1 #t.dist = 0 ts.margin_left = 5 ts.margin_right = 5 ts.margin_bottom = 5 return t, ts
def matriline_tree(id, db): offspring = id central_ind = db.get_elephant(id = id)[1] #Start upwards to the oldest existing maternal ancestor direct_mothers = [] mother = int while mother is not None: mother = db.get_mother(id=offspring) direct_mothers.append(mother) offspring = mother if direct_mothers[-1] is None: direct_mothers.pop() #Find the oldest known female in the line if direct_mothers != []: oldest_mother = direct_mothers.pop() else: oldest_mother = id #Go back down. The criterion to stop is that no female of generation 'n' #has any offspring. mothers = [oldest_mother] generation_n = [1] oldest_mother_num = db.get_elephant(id = oldest_mother)[1] newick="('"+str(oldest_mother_num)+"_\u2640')" branch_length = [[oldest_mother_num,2]] while generation_n.__len__() != 0: generation_n = [] for m in mothers: m_num = db.get_elephant(id = m)[1] m_birth = db.get_elephant(id = m)[5] o = db.get_offsprings(id = m) if o is not None: taxon = [] for i in o: generation_n.append(i) info = db.get_elephant(id = i) num = info[1] sex = info[4] birth = info[5] age_of_mother_at_birth = round((birth - m_birth).days / 365.25) branch_length.append([num,age_of_mother_at_birth]) if sex == 'F': u = '\u2640' elif sex == 'M': u = '\u2642' else: u = '?' taxon.append(str(num)+'_'+u) #Could be refined so that branch length equals age of mother at childbirth newick = newick.replace(("'"+str(m_num)+"_\u2640'"), (str(taxon).replace('[','(').replace(']',')').replace(' ','')+str(m_num)+'_\u2640')) mothers = generation_n newick = newick.replace("'","")+';' #Now formatting for the actual plotting in ete3: t = Tree(newick , format=8) # print(t.get_ascii(attributes=['name'], show_internal=True)) ts = TreeStyle() ts.show_leaf_name = False ts.rotation = 90 ts.show_scale = False ts.min_leaf_separation = 50 def my_layout(node): F = TextFace(node.name, tight_text=True) F.fsize=6 F.margin_left=5 F.margin_right=5 F.margin_top=0 F.margin_bottom=15 F.rotation=-90 add_face_to_node(F, node, column=0, position="branch-right") ts.layout_fn = my_layout ts.margin_left=10 ts.margin_right=10 ts.margin_top=10 ts.margin_bottom=10 i = 0 for n in t.traverse(): if i == 0: n.delete() n.img_style["size"] = 0. n.img_style["vt_line_width"] = 1 n.img_style["hz_line_width"] = 1 i += 1 else: if str(n.name[:-2]) == str(central_ind): n.img_style["size"] = 10 n.img_style["vt_line_width"] = 1 n.img_style["hz_line_width"] = 1 n.img_style["shape"] = "circle" n.img_style["fgcolor"] = "#A30B37" n.dist = int(branch_length[i-1][1]) else: n.img_style["size"] = 0. n.img_style["vt_line_width"] = 1 n.img_style["hz_line_width"] = 1 n.dist = int(branch_length[i-1][1]) i += 1 t.render('tree.png', w=600, units= 'px', tree_style=ts) taxa = [] for n in t.traverse(): taxa.append(n.name) return(t.write(format=1),taxa)
def draw_tree(the_tree, colour, back_color, label, out_file, the_scale, extend, bootstrap, group_file, grid_options, the_table, pres_abs, circular): t = Tree(the_tree, quoted_node_names=True) # t.ladderize() font_size = 8 font_type = 'Heveltica' font_gap = 3 font_buffer = 10 o = t.get_midpoint_outgroup() t.set_outgroup(o) the_leaves = [] for leaves in t.iter_leaves(): the_leaves.append(leaves) groups = {} num = 0 # set cutoff value for clades as 1/20th of the distance between the furthest two branches # assign nodes to groups last_node = None ca_list = [] if not group_file is None: style = NodeStyle() style['size'] = 0 style["vt_line_color"] = '#000000' style["hz_line_color"] = '#000000' style["vt_line_width"] = 1 style["hz_line_width"] = 1 for n in t.traverse(): n.set_style(style) with open(group_file) as f: group_dict = {} for line in f: group_dict[line.split()[0]] = line.split()[1] for node in the_leaves: i = node.name for j in group_dict: if j in i: if group_dict[j] in groups: groups[group_dict[j]].append(i) else: groups[group_dict[j]] = [i] coloured_nodes = [] for i in groups: the_col = i style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 if len(groups[i]) == 1: ca = t.search_nodes(name=groups[i][0])[0] ca.set_style(style) coloured_nodes.append(ca) else: ca = t.get_common_ancestor(groups[i]) ca.set_style(style) coloured_nodes.append(ca) tocolor = [] for j in ca.children: tocolor.append(j) while len(tocolor) > 0: x = tocolor.pop(0) coloured_nodes.append(x) x.set_style(style) for j in x.children: tocolor.append(j) ca_list.append((ca, the_col)) if back_color: # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes # colour the common ancestor then add it to the group - continue until only the root node is left while len(ca_list) > 1: distance = float('inf') for i, col1 in ca_list: for j, col2 in ca_list: if not i is j: parent = t.get_common_ancestor(i, j) getit = True the_dist = t.get_distance(i, j) if the_dist <= distance: distance = the_dist the_i = i the_j = j the_i_col = col1 the_j_col = col2 ca_list.remove((the_i, the_i_col)) ca_list.remove((the_j, the_j_col)) rgb1 = strtorgb(the_i_col) rgb2 = strtorgb(the_j_col) rgb3 = ((rgb1[0] + rgb2[0]) / 2, (rgb1[1] + rgb2[1]) / 2, (rgb1[2] + rgb2[2]) / 2) new_col = colorstr(rgb3) new_node = t.get_common_ancestor(the_i, the_j) the_col = new_col style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 new_node.set_style(style) coloured_nodes.append(new_node) ca_list.append((new_node, new_col)) for j in new_node.children: tocolor.append(j) while len(tocolor) > 0: x = tocolor.pop(0) if not x in coloured_nodes: coloured_nodes.append(x) x.set_style(style) for j in x.children: tocolor.append(j) elif colour: distances = [] for node1 in the_leaves: for node2 in the_leaves: if node1 != node2: distances.append(t.get_distance(node1, node2)) distances.sort() clade_cutoff = distances[len(distances) / 4] for node in the_leaves: i = node.name if not last_node is None: if t.get_distance(node, last_node) <= clade_cutoff: groups[group_num].append(i) else: groups[num] = [num, i] group_num = num num += 1 else: groups[num] = [num, i] group_num = num num += 1 last_node = node for i in groups: num = groups[i][0] h = num * 360 / len(groups) the_col = hsl_to_str(h, 0.5, 0.5) style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 if len(groups[i]) == 2: ca = t.search_nodes(name=groups[i][1])[0] ca.set_style(style) else: ca = t.get_common_ancestor(groups[i][1:]) ca.set_style(style) tocolor = [] for j in ca.children: tocolor.append(j) while len(tocolor) > 0: x = tocolor.pop(0) x.set_style(style) for j in x.children: tocolor.append(j) ca_list.append((ca, h)) # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes # colour the common ancestor then add it to the group - continue until only the root node is left while len(ca_list) > 1: distance = float('inf') got_one = False for i, col1 in ca_list: for j, col2 in ca_list: if not i is j: parent = t.get_common_ancestor(i, j) getit = True for children in parent.children: if children != i and children != j: getit = False break if getit: the_dist = t.get_distance(i, j) if the_dist <= distance: distance = the_dist the_i = i the_j = j the_i_col = col1 the_j_col = col2 got_one = True if not got_one: break ca_list.remove((the_i, the_i_col)) ca_list.remove((the_j, the_j_col)) new_col = (the_i_col + the_j_col) / 2 new_node = t.get_common_ancestor(the_i, the_j) the_col = hsl_to_str(new_col, 0.5, 0.3) style = NodeStyle() style['size'] = 0 style["vt_line_color"] = the_col style["hz_line_color"] = the_col style["vt_line_width"] = 2 style["hz_line_width"] = 2 new_node.set_style(style) ca_list.append((new_node, new_col)) # if you just want a black tree else: style = NodeStyle() style['size'] = 0 style["vt_line_color"] = '#000000' style["hz_line_color"] = '#000000' style["vt_line_width"] = 1 style["hz_line_width"] = 1 for n in t.traverse(): n.set_style(style) color_list = [(240, 163, 255), (0, 117, 220), (153, 63, 0), (76, 0, 92), (25, 25, 25), (0, 92, 49), (43, 206, 72), (255, 204, 153), (128, 128, 128), (148, 255, 181), (143, 124, 0), (157, 204, 0), (194, 0, 136), (0, 51, 128), (255, 164, 5), (255, 168, 187), (66, 102, 0), (255, 0, 16), (94, 241, 242), (0, 153, 143), (224, 255, 102), (116, 10, 255), (153, 0, 0), (255, 255, 128), (255, 255, 0), (255, 80, 5), (0, 0, 0), (50, 50, 50)] up_to_colour = {} ts = TreeStyle() column_list = [] width_dict = {} if not grid_options is None: colour_dict = {} type_dict = {} min_val_dict = {} max_val_dict = {} leaf_name_dict = {} header_count = 0 the_columns = {} if grid_options == 'auto': with open(the_table) as f: headers = f.readline().rstrip().split('\t')[1:] for i in headers: the_columns[i] = [i] type_dict[i] = 'colour' colour_dict[i] = {'empty': '#FFFFFF'} width_dict[i] = 20 up_to_colour[i] = 0 column_list.append(i) else: with open(grid_options) as g: for line in g: if line.startswith('H'): name, type, width = line.rstrip().split('\t')[1:] if name in the_columns: the_columns[name].append(name + '_' + str(header_count)) else: the_columns[name] = [ name + '_' + str(header_count) ] width = int(width) name = name + '_' + str(header_count) header_count += 1 colour_dict[name] = {'empty': '#FFFFFF'} type_dict[name] = type width_dict[name] = width column_list.append(name) up_to_colour[name] = 0 min_val_dict[name] = float('inf') max_val_dict[name] = 0 elif line.startswith('C'): c_name, c_col = line.rstrip().split('\t')[1:] if not c_col.startswith('#'): c_col = colorstr(map(int, c_col.split(','))) colour_dict[name][c_name] = c_col val_dict = {} with open(the_table) as f: headers = f.readline().rstrip().split('\t')[1:] column_no = {} for num, i in enumerate(headers): if i in the_columns: column_no[num] = i for line in f: name = line.split('\t')[0] leaf_name = None for n in t.traverse(): if n.is_leaf(): if name.split('.')[0] in n.name: leaf_name = n.name if leaf_name is None: continue else: leaf_name_dict[leaf_name] = name vals = line.rstrip().split('\t')[1:] if name in val_dict: sys.exit('Duplicate entry found in table.') else: val_dict[name] = {} for num, val in enumerate(vals): if num in column_no and val != '': for q in the_columns[column_no[num]]: column_name = q if type_dict[column_name] == 'colour': val_dict[name][column_name] = val if not val in colour_dict[column_name]: colour_dict[column_name][val] = colorstr( color_list[up_to_colour[column_name] % len(color_list)]) up_to_colour[column_name] += 1 elif type_dict[column_name] == 'text': val_dict[name][column_name] = val elif type_dict[column_name] == 'colour_scale_date': year, month, day = val.split('-') year, month, day = int(year), int(month), int( day) the_val = datetime.datetime( year, month, day, 0, 0, 0) - datetime.datetime( 1970, 1, 1, 0, 0, 0) val_dict[name][ column_name] = the_val.total_seconds() if the_val.total_seconds( ) < min_val_dict[column_name]: min_val_dict[ column_name] = the_val.total_seconds() if the_val.total_seconds( ) > max_val_dict[column_name]: max_val_dict[ column_name] = the_val.total_seconds() elif type_dict[column_name] == 'colour_scale': the_val = float(val) val_dict[name][column_name] = the_val if the_val < min_val_dict[column_name]: min_val_dict[column_name] = the_val if the_val > max_val_dict[column_name]: max_val_dict[column_name] = the_val else: sys.exit('Unknown column type') if not out_file is None: new_desc = open(out_file + '.new_desc', 'w') else: new_desc = open('viridis.new_desc', 'w') ts.legend_position = 3 leg_column = 0 for num, i in enumerate(column_list): nameF = TextFace(font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True) nameF.rotation = -90 ts.aligned_header.add_face(nameF, column=num + 1) new_desc.write('H\t' + i.rsplit('_', 1)[0] + '\t' + type_dict[i] + '\t' + str(width_dict[i]) + '\n') x = num * 200 if type_dict[i] == 'colour': ts.legend.add_face(TextFace( font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF', '#FFFFFF'), column=leg_column) for num2, j in enumerate(colour_dict[i]): new_desc.write('C\t' + j + '\t' + colour_dict[i][j] + '\n') ts.legend.add_face(TextFace(font_gap * ' ' + j + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, colour_dict[i][j], colour_dict[i][j]), column=leg_column) leg_column += 2 elif type_dict[i] == 'colour_scale': ts.legend.add_face(TextFace( font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF', '#FFFFFF'), column=leg_column) for num2 in range(11): y = num2 * 20 + 30 val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0 h = val / (max_val_dict[i] - min_val_dict[i]) * 270 s = 0.5 l = 0.5 colour = hsl_to_str(h, s, l) ts.legend.add_face(TextFace(font_gap * ' ' + str(val) + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, colour, colour), column=leg_column) leg_column += 2 elif type_dict[i] == 'colour_scale_date': ts.legend.add_face(TextFace( font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF', '#FFFFFF'), column=leg_column) for num2 in range(11): y = num2 * 20 + 30 val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0 h = val / (max_val_dict[i] - min_val_dict[i]) * 360 s = 0.5 l = 0.5 colour = hsl_to_str(h, s, l) days = str(int(val / 60 / 60 / 24)) + ' days' ts.legend.add_face(TextFace(font_gap * ' ' + days + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=leg_column + 1) ts.legend.add_face(RectFace(width_dict[i], 20, colour, colour), column=leg_column) leg_column += 2 for n in t.traverse(): if n.is_leaf(): name = leaf_name_dict[n.name] if i in val_dict[name]: val = val_dict[name][i] else: val = 'empty' if type_dict[i] == 'colour': n.add_face(RectFace(width_dict[i], 20, colour_dict[i][val], colour_dict[i][val]), column=num + 1, position="aligned") elif type_dict[i] == 'colour_scale' or type_dict[ i] == 'colour_scale_date': if val == 'empty': colour = '#FFFFFF' else: h = (val - min_val_dict[i]) / ( max_val_dict[i] - min_val_dict[i]) * 360 s = 0.5 l = 0.5 colour = hsl_to_str(h, s, l) n.add_face(RectFace(width_dict[i], 20, colour, colour), column=num + 1, position="aligned") elif type_dict[i] == 'text': n.add_face(TextFace(font_gap * ' ' + val + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=num + 1, position="aligned") if not pres_abs is None: starting_col = len(column_list) + 1 subprocess.Popen('makeblastdb -out tempdb -dbtype prot -in ' + pres_abs[0], shell=True).wait() folder = pres_abs[1] len_dict = {} gene_list = [] ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=starting_col + 1) ts.legend.add_face(RectFace(20, 20, '#FFFFFF', '#FFFFFF'), column=starting_col) ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=starting_col + 1) ts.legend.add_face(RectFace(20, 20, "#5ba965", "#5ba965"), column=starting_col) ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True), column=starting_col + 1) ts.legend.add_face(RectFace(20, 20, "#cb5b4c", "#cb5b4c"), column=starting_col) with open(pres_abs[0]) as f: for line in f: if line.startswith('>'): name = line.split()[0][1:] gene_list.append(name) len_dict[name] = 0 nameF = TextFace(font_gap * ' ' + name + ' ' * font_buffer, fsize=font_size, ftype=font_type, tight_text=True) nameF.rotation = -90 ts.aligned_header.add_face(nameF, column=starting_col + len(gene_list) - 1) else: len_dict[name] += len(line.rstrip()) min_length = 0.9 min_ident = 90 for n in t.iter_leaves(): the_name = n.name if the_name[0] == '"' and the_name[-1] == '"': the_name = the_name[1:-1] if the_name.endswith('.ref'): the_name = the_name[:-4] if not os.path.exists(folder + '/' + the_name): for q in os.listdir(folder): if q.startswith(the_name): the_name = q if not os.path.exists(the_name + '.blast'): subprocess.Popen( 'blastx -query ' + folder + '/' + the_name + ' -db tempdb -outfmt 6 -num_threads 24 -out ' + the_name + '.blast', shell=True).wait() gotit = set() with open(the_name + '.blast') as b: for line in b: query, subject, ident, length = line.split()[:4] ident = float(ident) length = int(length) if ident >= min_ident and length >= min_length * len_dict[ subject]: gotit.add(subject) for num, i in enumerate(gene_list): if i in gotit: colour = "#5ba965" else: colour = "#cb5b4c" n.add_face(RectFace(20, 20, colour, colour), column=num + starting_col, position="aligned") # for num, i in enumerate(gene_list): # x = (starting_col + num) * 200 # svg.writeString(i, x+50, 20, 12) # y = 30 # svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#5ba965'), strtorgb('#5ba965'), lt=0) # svg.writeString('present', x + 70, y + 12, 12) # y = 50 # svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#cb5b4c'), strtorgb('#cb5b4c'), lt=0) # svg.writeString('absent', x + 70, y + 12, 12) # Set these to False if you don't want bootstrap/distance values ts.show_branch_length = label ts.show_branch_support = bootstrap ts.show_leaf_name = False for node in t.traverse(): if node.is_leaf(): node.add_face(AttrFace("name", fsize=font_size, ftype=font_type, tight_text=True, fgcolor='black'), column=0, position="aligned") ts.margin_left = 20 ts.margin_right = 100 ts.margin_top = 20 ts.margin_bottom = 20 if extend: ts.draw_guiding_lines = True ts.scale = the_scale if not circular is None: ts.mode = "c" ts.arc_start = 0 ts.arc_span = 360 if out_file is None: t.show(tree_style=ts) else: t.render(out_file, w=210, units='mm', tree_style=ts)
def heatmap_view(tree, orthologous_groups, save_dir): """Generates a heatmap of regulation states in all species.""" light_tree = copy.deepcopy(tree) # Tree copy for the light heatmap # Heat map settings rect_face_fgcolor = 'black' locus_tag_len = max(len(gene.locus_tag) + 5 for ortho_grp in orthologous_groups for gene in ortho_grp.genes) rect_face_width = locus_tag_len * 8 light_rect_face_width = 20 rect_face_height = 20 rotation = 90 # Sort orthologous groups by the number of regulated genes in each group orthologous_groups = filter_and_sort_orthologous_grps(orthologous_groups) # For each species and its gene in each orthologous group, draw a rectangle for node, light_node in zip(tree.get_leaves(), light_tree.get_leaves()): for i, orthologous_grp in enumerate(orthologous_groups, start=1): #get all orthologs in group matching_genes = [g for g in orthologous_grp.genes \ if g.genome.strain_name == node.name] #if there is ortholog if len(matching_genes) > 0: # Get the first ortholog from the genome in the group #this is the one with higher probability of regulation. #so this probability will be displayed for the group gene = matching_genes[0] p_regulation = gene.operon.regulation_probability p_notregulation = 1.0 - p_regulation p_absence = 0 # No ortholog from this genome else: gene = None p_regulation = 0 p_notregulation = 0 p_absence = 1 # Color of the rectangle is based on probabilities rect_face_bgcolor = rgb2hex( p_notregulation, p_regulation, p_absence) rect_face_text = ('%s [%d]' % (gene.locus_tag, gene.operon.operon_id) if gene else '') rect_face_label = {'text': rect_face_text, 'font': 'Courier', 'fontsize': 8, 'color': 'black'} # Create the rectangle rect_face = RectFace(rect_face_width, rect_face_height, rect_face_fgcolor, rect_face_bgcolor, label=rect_face_label) light_rect_face = RectFace(light_rect_face_width, rect_face_height, rect_face_fgcolor, rect_face_bgcolor, label='') rect_face.rotation = -rotation light_rect_face.rotation = -rotation # Add the rectangle to the corresponding column node.add_face(rect_face, column=i, position='aligned') light_node.add_face(light_rect_face, column=i, position='aligned') ts = TreeStyle() # Add orthologous group descriptions descriptions = ['-'.join([grp.description, str(grp.NOGs)]) for grp in orthologous_groups] max_description_len = max(map(len, descriptions)) descriptions = [ '[%d]' % i + description + ' '*(max_description_len-len(description)) for i, description in enumerate(descriptions, start=1)] for i, description in enumerate(descriptions, start=1): text_face = TextFace(description, ftype='Courier') text_face.hz_align = 1 text_face.vt_align = 1 text_face.rotation = -rotation ts.aligned_header.add_face(text_face, column=i) # Rotate the generated heatmap. ts.margin_left = 10 ts.margin_top = 20 ts.rotation = rotation ts.show_scale = False # For some reason, it can't render to PDF in color tree.render(os.path.join(save_dir, 'heatmap.svg'), tree_style=ts) light_tree.render(os.path.join(save_dir, 'heatmap_light.svg'), tree_style=ts)
f.margin_bottom = cmar f.margin_top = cmar f.margin_left = cmar f.margin_right = cmar node.add_face(f, column=0) #%% plot tree ts = TreeStyle() ts.show_leaf_name = True ts.mode = "c" ts.show_scale = False margins = 10 ts.margin_left = margins ts.margin_right = margins ts.margin_top = margins ts.margin_bottom = margins colors_dict = dict(zip(horizons, hexlist)) def create_colorstyle(h, colors_dict): c = colors_dict.get(h) ns = NodeStyle(bgcolor=c) return ns styles_dict = {h: create_colorstyle(h, colors_dict) for h in horizons}
def matriline_tree(id, db, as_list=False): offspring = id e = db.get_elephant(id=id) if e: central_ind = e[1] else: return(None) # Start upwards to the oldest existing maternal ancestor direct_mothers = [] mother = str while mother is not None: mother = db.get_mother(id=offspring) direct_mothers.append(mother) offspring = mother if direct_mothers[-1] is None: direct_mothers.pop() # Find the oldest known female in the line if direct_mothers != []: oldest_mother = direct_mothers.pop() else: oldest_mother = id # Go back down. The criterion to stop is that no female of generation 'n' has any offspring. mothers = [oldest_mother] generation_n = [1] oldest_mother_num = db.get_elephant(id=oldest_mother)[1] newick = "('" + str(oldest_mother_num) + "_\u2640')" branch_length = [[oldest_mother_num, 2]] ############################ # Exporation in list form if as_list is True: # at each generation, we will make two objects: an unstructured list giving all individuals, # and a structured list keeping track of paths # We make a first pass to create the unstructured list: tree_list_unstructured = [oldest_mother_num] g = 0 generation_n = [0] while generation_n.__len__() != 0: generation_n = [] # these_off = None if type(tree_list_unstructured[g]) is list: for i in tree_list_unstructured[g]: these_off = db.get_offsprings(num=i) if these_off: for o in these_off: generation_n.append(o) else: these_off = db.get_offsprings(num=tree_list_unstructured[g]) if these_off: for o in these_off: generation_n.append(o) g += 1 tree_list_unstructured.append(generation_n) if tree_list_unstructured[-1] == []: tree_list_unstructured.pop() # Now the genealogy is explored, we go through it and structure it: tree_list_structured = [oldest_mother_num] for generation in tree_list_unstructured: next_generation = [] these_off = None if type(generation) is not list: these_off = db.get_offsprings(num=generation) if these_off: next_generation = these_off else: next_generation = [] elif type(generation) is list and generation != []: for g in generation: these_off = db.get_offsprings(num=g) if these_off: next_generation.append(these_off) else: next_generation.append([]) if not all(x==[] for x in next_generation): tree_list_structured.append(next_generation) return([tree_list_structured, tree_list_unstructured]) ############################ # Exploration in Newick form while generation_n.__len__() != 0: generation_n = [] for m in mothers: m_num = db.get_elephant(id=m)[1] m_birth = db.get_elephant(id=m)[5] o = db.get_offsprings(id=m) if o is not None: taxon = [] for i in o: generation_n.append(i) info = db.get_elephant(id=i) num = info[1] if not num: num = info[3] sex = info[4] birth = info[5] age_of_mother_at_birth = round((birth - m_birth).days / 365.25) branch_length.append([num,age_of_mother_at_birth]) if sex == 'F': u = '\u2640' elif sex == 'M': u = '\u2642' else: u = '?' taxon.append(str(num)+'_'+u) newick = newick.replace(("'" + str(m_num) + "_\u2640'"), (str(taxon).replace('[', '(').replace(']', ')').replace(' ', '') + str(m_num) + '_\u2640')) mothers = generation_n newick = newick.replace("'", "")+';' # Now formatting for the actual plotting in ete3: t = Tree(newick, format=8) # print(t.get_ascii(attributes=['name'], show_internal=True)) ts = TreeStyle() ts.show_leaf_name = False ts.rotation = 90 ts.show_scale = False ts.min_leaf_separation = 50 def my_layout(node): F = TextFace(node.name, tight_text=True) F.fsize = 6 F.margin_left = 5 F.margin_right = 5 F.margin_top = 0 F.margin_bottom = 15 F.rotation = -90 add_face_to_node(F, node, column=0, position="branch-right") ts.layout_fn = my_layout ts.margin_left = 10 ts.margin_right = 10 ts.margin_top = 10 ts.margin_bottom = 10 i = 0 for n in t.traverse(): if i == 0: n.delete() n.img_style["size"] = 0. n.img_style["vt_line_width"] = 1 n.img_style["hz_line_width"] = 1 i += 1 else: if str(n.name[:-2]) == str(central_ind): n.img_style["size"] = 10 n.img_style["vt_line_width"] = 1 n.img_style["hz_line_width"] = 1 n.img_style["shape"] = "circle" n.img_style["fgcolor"] = "#A30B37" n.dist = int(branch_length[i-1][1]) else: n.img_style["size"] = 0. n.img_style["vt_line_width"] = 1 n.img_style["hz_line_width"] = 1 n.dist = int(branch_length[i-1][1]) i += 1 t.render('tree.png', w=600, units= 'px', tree_style=ts) taxa = [] for n in t.traverse(): taxa.append(n.name) return(t.write(format=1), taxa)
def plotting_tree(species, latin_names, original_tree, correction_table, consensus_strategy_for_multi_outgroups, ortholog_db, peak_stats, nextflow_flag): """ Generate a PDF figure of the input tree with branch lengths equal to Ks distances. If it is not possible to compute the branch length for a branch, the branch line is dashed. This happens when some\\ ortholog data to compute the branch-specific Ks contribution are missing. :param species: the current focal species :param latin_names: a dictionary-like data structure that associates each informal species name to its latin name :param original_tree: Newick tree format of the phylogenetic tree among the involved species :param correction_table: adjustment results in DataFrame format (contains both possible types of consensus strategy for how to deal with multiple outgroups) :param consensus_strategy_for_multi_outgroups: user choice about which consensus strategy to use when dealing with multiple outgroups :para ortholog_db: ortholog peak database used to get ortholog data for the relative rate test; if not available, will be ignored :param peak_stats: flag to specify whether the ortholog distribution peak is the mode or the median :param nextflow_flag: boolean flag to state whether the script is run in the Nextflow pipeline or not """ # Get an equivalent tree where the focal species is the top leaf tree = reorder_tree_leaves(original_tree, species) node_and_branch_style(tree) species_node = get_species_node(species, tree) labeling_internal_nodes(species_node) species_history = get_species_history(species_node) rate_species_dict, rate_sister_dict = {}, {} for ancestor_node in species_history[:-2]: # NOTE: at the moment the following function is only used to fill in the dictionaries of branch-specific Ks contributions average_peak_of_divergence_event, margin_error_box, error_text = get_branch_length_and_errorbox( species, ancestor_node, correction_table, consensus_strategy_for_multi_outgroups, latin_names, rate_species_dict, rate_sister_dict) # Adding the branch length to the focal species node, otherwise it lacks it if ancestor_node.name == species: ancestor_node.dist = rate_species_dict[species] draw_branch_length_label(ancestor_node, known_distance=True) # Adding as TextFaces both the divergent Ks of the node (as mean) and the error range (left-most and right-most boundaries) divergence_node = ancestor_node.up # getting parent node, where the current divergence takes place divergence_node.add_feature("rate_species", rate_species_dict[species]) divergence_node.add_feature("avg_peak", round(average_peak_of_divergence_event, 2)) divergence_node.add_feature("margins", f"({error_text[0]}, {error_text[1]})") ### divergence_node.add_face(AttrFace("margins", fsize=5), column=0, position="branch-right") [ NOT USED FOR NOW ] # Setting the branch length of the nodes belonging to the speciation history of the focal species for divergence_node in species_history[1:]: parent_node = divergence_node.up try: divergence_node.dist = round( parent_node.rate_species - divergence_node.rate_species, 3) draw_branch_length_label(divergence_node, known_distance=True) except Exception: divergence_node.dist = 10 # impossible number to flag an unknown length draw_branch_length_label(divergence_node, known_distance=False) unknown_branch_len_style(divergence_node) if ortholog_db.empty: # branch-specific Ks contributions can be obtained only from adjustment_tables logging.info( "Getting branch-specific Ks contributions from rate-adjustment table data" ) else: # if the ortholog DB is available, we can try to compute the branch-specific Ks contributions from there too logging.info( "Getting branch-specific Ks contributions from rate-adjustment table data" ) logging.info( "Computing branch-specific Ks contributions from ortholog peak data in database by applying principles of the relative rate test" ) rate_dict = {} get_rates_from_current_analysis(rate_dict, correction_table, species, species_history, latin_names) # Setting the branch length of the other remaining nodes missing_ortholog_data_from_database = False missing_ortholog_data_from_correction_table = False for node in species_history[:-1]: sister_node = node.get_sisters( ) # is a list containing the sister NODE (it's only ONE node) if not ortholog_db.empty: # if there is an ortholog database that can help with computing the missing branch lengths if len(sister_node[0].get_leaves()) > 1: missing_ortholog_data_from_database = get_rates_from_ortholog_peak_db( rate_dict, sister_node, latin_names, ortholog_db, peak_stats, missing_ortholog_data_from_database) else: if sister_node[0].name in rate_sister_dict.keys( ): # if leaf has known length sister_node[0].dist = rate_sister_dict[sister_node[0].name] draw_branch_length_label(sister_node[0], known_distance=True) else: # if the leaf has unknown length sister_node[ 0].dist = 10 # impossible number to flag an unknown length draw_branch_length_label(sister_node[0], known_distance=False) unknown_branch_len_style(sister_node[0]) else: # if ortholog database not available (the variable was previously set as an empty dataframe) if len(sister_node[0].get_leaves()) > 1: missing_ortholog_data_from_correction_table = True # correction_tables is not enough to know all branch lengths! sister_node[ 0].dist = 10 # impossible number to flag an unknown length draw_branch_length_label(sister_node[0], known_distance=False) unknown_branch_len_style(sister_node[0]) for node in sister_node[0].get_descendants(): node.dist = 10 # impossible number to flag an unknown length draw_branch_length_label(node, known_distance=False) unknown_branch_len_style(node) else: leaf = sister_node[0].get_leaves()[0] # there is only one leaf if leaf.name in rate_sister_dict.keys(): leaf.dist = rate_sister_dict[leaf.name] draw_branch_length_label(leaf, known_distance=True) else: # if the leaf has unknown length leaf.dist = 10 # impossible number to flag an unknown length draw_branch_length_label(leaf, known_distance=False) unknown_branch_len_style(leaf) # If the ortholog peak database is lacking some required data (must have been deleted by the user) or # if the peak database has been deleted and only the correction_table has been used for the branch contributions, gives a warning if missing_ortholog_data_from_database or missing_ortholog_data_from_correction_table: logging.warning("") logging.warning( "One or more branch lengths are unknown (dashed line) due to missing ortholog distribution peak data" ) # If in Nextflow mode, tell the user to wait until the pipeline is finished in order to have all branch lengths if nextflow_flag: if missing_ortholog_data_from_database: logging.info( f"As soon as new ortholog data will become available, the tree branch lengths will be updated" ) # If manual mode, tell the user how to get a complete branch tree (probably they deleted some data in the peak database) else: if missing_ortholog_data_from_database or missing_ortholog_data_from_correction_table: logging.warning( f"It's necessary to run a new Nextflow (or manual) pipeline to complete the tree branch length information" ) label_leaves_with_latin_names(tree, latin_names) adapt_unknown_branch_length(tree) ts = TreeStyle() # ts.title.add_face(TextFace(" Input tree with branch length equal to Ks distances ", ftype="Arial", fsize=18), column=0) ts.orientation = 1 ts.branch_vertical_margin = 14 ts.show_leaf_name = False # because there is a Face showing it ts.show_branch_length = False ts.margin_left = 25 ts.margin_right = 25 ts.margin_top = 25 ts.scale = 200 #ts.scale_length = # to set a fixed scale branch length root_of_corrected_tree = species_history[-1] root_of_corrected_tree.render(os.path.join( "rate_adjustment", f"{species}", f"{_TREE_BRANCH_DISTANCES.format(species)}"), w=4.5, units="in", tree_style=ts)
mark2.margin_right = 1 mark2.margin_left = 1 mark2.margin_bottom = 0 mark2.opacity = 1 # from 0 to 1 mark2.border.width = 1 mark2.background.color = "#F5F5DC" ts.legend.add_face(mark2, column=0) mark3 = TextFace("Selected branches", fsize=10, fgcolor="black") mark3.margin_top = 2 mark3.margin_right = 20 mark3.margin_left = 5 mark3.margin_bottom = 2 ts.legend.add_face(mark3, column=1) ts.margin_left = 20 ts.margin_right = 20 ts.margin_top = 10 ts.margin_bottom = 10 if len(sys.argv) >= 3: title = TextFace(target_name, fsize=16, fgcolor="SteelBlue", fstyle="italic", bold=True) title.margin_top = 10 title.margin_right = 10 title.margin_left = 10 title.margin_bottom = 10 ts.title.add_face(title, column=0)
# T.dist = 0.0 # set germline distance to 0 # T.write(format=1, outfile=tree_file_name+".multifurc") ts = TreeStyle() # ts.mode = "c" ts.scale = 500 ts.optimal_scale_level = "full" # ts.arc_start = 180 # -180 # ts.arc_span = 180 # 359 ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = False # ts.root_opening_factor = 0.75 ts.draw_guiding_lines = False ts.margin_left = 50 ts.margin_right = 50 ts.margin_top = 50 ts.margin_bottom = 50 ts.rotation = 0 path_to_sequence_string_uid_to_isotype_map = "/Users/lime/Dropbox/quake/Bcell/selection/figures/treePlots/v2/Bcell_flu_high_res.sequences.isotypeDict.V6_Full.csv" sequence_string_uid_to_isotype = {} with open(path_to_sequence_string_uid_to_isotype_map) as f: for line in f: vals = line.rstrip().split() sequence_string_uid_to_isotype[vals[1]] = vals[2] path_to_isotype_to_color_map = "/Users/lime/Dropbox/quake/Bcell/selection/figures/treePlots/v2/isotype_to_color_dict.json" with open(path_to_isotype_to_color_map, 'rU') as f: isotype_to_color = json.load(f)
def plot_species_tree(tree_newick, tree_type, gene_name, tree_file_name, name_list, tree_image_folder): # set tree parameters tree = Tree(tree_newick, format=2) ts = TreeStyle() ts.mode = "r" # tree model: 'r' for rectangular, 'c' for circular ts.show_leaf_name = False tree_title = tree_type + ' (' + gene_name + ')' # define tree title # set tree title text parameters ts.title.add_face(TextFace(tree_title, fsize=8, fgcolor='black', ftype='Arial', tight_text=False), column=0) # tree title text setting # set layout parameters ts.rotation = 0 # from 0 to 360 ts.show_scale = False ts.margin_top = 10 # top tree image margin ts.margin_bottom = 10 # bottom tree image margin ts.margin_left = 10 # left tree image margin ts.margin_right = 10 # right tree image margin ts.show_border = False # set tree image border ts.branch_vertical_margin = 3 # 3 pixels between adjancent branches # set tree node style for each_node in tree.traverse(): # leaf node parameters if each_node.is_leaf(): ns = NodeStyle() ns['shape'] = 'circle' # dot shape: circle, square or sphere ns['size'] = 0 # dot size ns['hz_line_width'] = 0.5 # branch line width ns['vt_line_width'] = 0.5 # branch line width ns['hz_line_type'] = 0 # branch line type: 0 for solid, 1 for dashed, 2 for dotted ns['vt_line_type'] = 0 # branch line type if each_node.name in name_list: ns['fgcolor'] = 'red' # the dot setting each_node.add_face( TextFace(each_node.name, fsize=8, fgcolor='red', tight_text=False, bold=False), column=0, position='branch-right') # the node name text setting each_node.set_style(ns) else: ns['fgcolor'] = 'blue' # the dot setting each_node.add_face( TextFace(each_node.name, fsize=8, fgcolor='black', tight_text=False, bold=False), column=0, position='branch-right') # the node name text setting each_node.set_style(ns) # non-leaf node parameters else: nlns = NodeStyle() nlns['size'] = 0 # dot size each_node.add_face( TextFace(each_node.name, fsize=4, fgcolor='black', tight_text=False, bold=False), column=5, position='branch-top') # non-leaf node name text setting) each_node.set_style(nlns) # set figures size tree.render('%s/%s.png' % (tree_image_folder, tree_file_name), w=900, units='px', tree_style=ts)
def treeMaker(path_to_prokka, path_to_hmm, pwd_hmmsearch_exe, pwd_mafft_exe, pwd_fasttree_exe, plot_tree): # Tests for presence of the tmp folder and deletes it tmp_folder = 'get_species_tree_wd' if os.path.exists(tmp_folder): os.system('rm -r ' + tmp_folder) os.mkdir(tmp_folder) # List all prokka dirs in the target folder prokka_files = [ i for i in os.listdir(path_to_prokka) if os.path.isdir(path_to_prokka + '/' + i) ] print('Detected %i input genomes' % len(prokka_files)) # Running hmmsearch on each file print('Running hmmsearch...') for f in prokka_files: # call hmmsearch #os.system('hmmsearch -o /dev/null --domtblout %s/%s_hmmout.tbl %s %s/%s/%s.faa' % (tmp_folder, f, path_to_hmm, path_to_prokka, f, f)) os.system( '%s -o /dev/null --domtblout %s/%s_hmmout.tbl %s %s/%s/%s.faa' % (pwd_hmmsearch_exe, tmp_folder, f, path_to_hmm, path_to_prokka, f, f)) # Reading the protein file in a dictionary proteinSequence = {} for seq_record in SeqIO.parse('%s/%s/%s.faa' % (path_to_prokka, f, f), 'fasta'): proteinSequence[seq_record.id] = str(seq_record.seq) # Reading the hmmersearch table/extracting the protein part found beu hmmsearch out of the protein/Writing each protein sequence that was extracted to a fasta file (one for each hmm in phylo.hmm hmm_id = '' hmm_name = '' hmm_pos1 = 0 hmm_pos2 = 0 hmm_score = 0 with open(tmp_folder + '/' + f.replace('prokka/', '') + '_hmmout.tbl', 'r') as tbl: for line in tbl: if line[0] == "#": continue line = re.sub('\s+', ' ', line) splitLine = line.split(' ') if (hmm_id == ''): hmm_id = splitLine[4] hmm_name = splitLine[0] hmm_pos1 = int(splitLine[17]) - 1 hmm_pos2 = int(splitLine[18]) hmm_score = float(splitLine[13]) elif (hmm_id == splitLine[4]): if (float(splitLine[13]) > hmm_score): hmm_name = splitLine[0] hmm_pos1 = int(splitLine[17]) - 1 hmm_pos2 = int(splitLine[18]) hmm_score = float(splitLine[13]) else: file_out = open(tmp_folder + '/' + hmm_id + '.fasta', 'a+') file_out.write('>' + f + '\n') if hmm_name != '': seq = str(proteinSequence[hmm_name][hmm_pos1:hmm_pos2]) file_out.write(str(seq) + '\n') file_out.close() hmm_id = splitLine[4] hmm_name = splitLine[0] hmm_pos1 = int(splitLine[17]) - 1 hmm_pos2 = int(splitLine[18]) hmm_score = float(splitLine[13]) else: file_out = open(tmp_folder + '/' + hmm_id + '.fasta', 'a+') file_out.write('>' + f + '\n') if hmm_name != '': seq = str(proteinSequence[hmm_name][hmm_pos1:hmm_pos2]) file_out.write(str(seq) + '\n') file_out.close() # Call mafft to align all single fasta files with hmms files = os.listdir(tmp_folder) fastaFiles = [i for i in files if i.endswith('.fasta')] print('Running mafft...') for f in fastaFiles: fastaFile1 = '%s/%s' % (tmp_folder, f) fastaFile2 = fastaFile1.replace('.fasta', '_aligned.fasta') os.system(pwd_mafft_exe + ' --quiet --maxiterate 1000 --globalpair ' + fastaFile1 + ' > ' + fastaFile2 + ' ; rm ' + fastaFile1) # concatenating the single alignments # create the dictionary print('Concatenating alignments...') concatAlignment = {} for element in prokka_files: concatAlignment[element] = '' # Reading all single alignment files and append them to the concatenated alignment files = os.listdir(tmp_folder) fastaFiles = [i for i in files if i.endswith('.fasta')] for f in fastaFiles: fastaFile = tmp_folder + '/' + f proteinSequence = {} alignmentLength = 0 for seq_record_2 in SeqIO.parse(fastaFile, 'fasta'): proteinName = seq_record_2.id proteinSequence[proteinName] = str(seq_record_2.seq) alignmentLength = len(proteinSequence[proteinName]) for element in prokka_files: if element in proteinSequence.keys(): concatAlignment[element] += proteinSequence[element] else: concatAlignment[element] += '-' * alignmentLength # writing alignment to file file_out = open('./species_tree.aln', 'w') for element in prokka_files: file_out.write('>' + element + '\n' + concatAlignment[element] + '\n') file_out.close() # calling fasttree for tree calculation print('Running fasttree...') os.system('%s -quiet species_tree.aln > species_tree.newick' % pwd_fasttree_exe) # Decomment the two following lines if tree is rooted but should be unrooted #phyloTree = dendropy.Tree.get(path='phylogenticTree.phy', schema='newick', rooting='force-unrooted') #dendropy.Tree.write_to_path(phyloTree, 'phylogenticTree_unrooted.phy', 'newick') # plot species tree if plot_tree == 1: print('Plot species tree') tree = Tree('species_tree.newick', format=1) # set tree parameters ts = TreeStyle() ts.mode = "r" # tree model: 'r' for rectangular, 'c' for circular ts.show_leaf_name = 0 # set tree title text parameters ts.title.add_face(TextFace('Species_Tree', fsize=8, fgcolor='black', ftype='Arial', tight_text=False), column=0) # tree title text setting # set layout parameters ts.rotation = 0 # from 0 to 360 ts.show_scale = False ts.margin_top = 10 # top tree image margin ts.margin_bottom = 10 # bottom tree image margin ts.margin_left = 10 # left tree image margin ts.margin_right = 10 # right tree image margin ts.show_border = False # set tree image border ts.branch_vertical_margin = 3 # 3 pixels between adjancent branches # set tree node style for each_node in tree.traverse(): # leaf node parameters if each_node.is_leaf(): ns = NodeStyle() ns["shape"] = "circle" # dot shape: circle, square or sphere ns["size"] = 0 # dot size ns['hz_line_width'] = 0.5 # branch line width ns['vt_line_width'] = 0.5 # branch line width ns['hz_line_type'] = 0 # branch line type: 0 for solid, 1 for dashed, 2 for dotted ns['vt_line_type'] = 0 # branch line type ns["fgcolor"] = "blue" # the dot setting each_node.add_face(TextFace(each_node.name, fsize=5, fgcolor='black', tight_text=False, bold=False), column=0, position='branch-right' ) # leaf node the node name text setting each_node.set_style(ns) # non-leaf node parameters else: nlns = NodeStyle() nlns["size"] = 0 # dot size # nlns["rotation"] = 45 each_node.add_face( TextFace(each_node.name, fsize=3, fgcolor='black', tight_text=False, bold=False), column=5, position='branch-top') # non-leaf node name text setting) each_node.set_style(nlns) tree.render('species_tree' + '.png', w=900, units="px", tree_style=ts) # set figures size if plot_tree == 0: print('The built species tree was exported to species_tree.newick') else: print( 'The built species tree was exported to species_tree.newick and species_tree.png' )