def get_tree_style(**kwargs): style = TreeStyle() style.layout_fn = layout style.allow_face_overlap = True style.branch_vertical_margin = 5 style.complete_branch_lines_when_necessary = False style.draw_aligned_faces_as_table = False style.scale = 1500 style.scale_length = 0.05 style.show_branch_support = False style.show_leaf_name = False for key, value in kwargs.items(): if value == "True": value = True else: try: value = float(value) except ValueError: pass setattr(style, key, value) return style
def bub_tree(tree, fasta, outfile1, root, types, c_dict, show, size, colours, field1, field2, scale, multiplier, dna): """ :param tree: tree object from ete :param fasta: the fasta file used to make the tree :param outfile1: outfile suffix :param root: sequence name to use as root :param types: tree type: circular (c) or rectangle (r) :param c_dict: dictionary mapping colour to time point (from col_map) :param show: show the tree in a gui (y/n) :param size: scale the terminal nodes by frequency information (y/n) :param colours: if using a matched fasta file, colour the sequence by charge/IUPAC :param field1: the field that contains the size/frequency value :param field2: the field that contains the size/frequency value :param scale: how much to scale the x axis :param multiplier :param dna true/false, is sequence a DNA sequence? :param t_list list of time points :return: None, outputs svg/pdf image of the tree """ if multiplier is None: mult = 500 else: mult = multiplier if dna: dna_prot = 'dna' bg_c = { 'A': 'green', 'C': 'blue', 'G': 'black', 'T': 'red', '-': 'grey', 'X': 'white' } fg_c = { 'A': 'black', 'C': 'black', 'G': 'black', 'T': 'black', '-': 'black', 'X': 'white' } else: dna_prot = 'aa' bg_c = { 'K': '#145AFF', 'R': '#145AFF', 'H': '#8282D2', 'E': '#E60A0A', 'D': '#E60A0A', 'N': '#00DCDC', 'Q': '#00DCDC', 'S': '#FA9600', 'T': '#FA9600', 'L': '#0F820F', 'I': '#0F820F', 'V': '#0F820F', 'Y': '#3232AA', 'F': '#3232AA', 'W': '#B45AB4', 'C': '#E6E600', 'M': '#E6E600', 'A': '#C8C8C8', 'G': '#EBEBEB', 'P': '#DC9682', '-': 'grey', 'X': 'white' } fg_c = { 'K': 'black', 'R': 'black', 'H': 'black', 'E': 'black', 'D': 'black', 'N': 'black', 'Q': 'black', 'S': 'black', 'T': 'black', 'L': 'black', 'I': 'black', 'V': 'black', 'Y': 'black', 'F': 'black', 'W': 'black', 'C': 'black', 'M': 'black', 'A': 'black', 'G': 'black', 'P': 'black', '-': 'grey', 'X': 'white' } if colours == 3: bg_c = None fg_c = None # outfile3 = str(outfile1.replace(".svg", ".nwk")) tstyle = TreeStyle() tstyle.force_topology = False tstyle.mode = types tstyle.scale = scale tstyle.min_leaf_separation = 0 tstyle.optimal_scale_level = 'full' # 'mid' # tstyle.complete_branch_lines_when_necessary = False if types == 'c': tstyle.root_opening_factor = 0.25 tstyle.draw_guiding_lines = False tstyle.guiding_lines_color = 'slateblue' tstyle.show_leaf_name = False tstyle.allow_face_overlap = True tstyle.show_branch_length = False tstyle.show_branch_support = False TreeNode(format=0, support=True) # tnode = TreeNode() if root is not None: tree.set_outgroup(root) # else: # r = tnode.get_midpoint_outgroup() # print("r", r) # tree.set_outgroup(r) time_col = [] for node in tree.traverse(): # node.ladderize() if node.is_leaf() is True: try: name = node.name.split("_") time = name[field2] kind = name[3] # print(name) except: time = 'zero' name = node.name print("Incorrect name format for ", node.name) if size is True: try: s = 20 + float(name[field1]) * mult except: s = 20 print("No frequency information for ", node.name) else: s = 20 colour = c_dict[time] time_col.append((time, colour)) nstyle = NodeStyle() nstyle["fgcolor"] = colour nstyle["size"] = s nstyle["hz_line_width"] = 10 nstyle["vt_line_width"] = 10 nstyle["hz_line_color"] = colour nstyle["vt_line_color"] = 'black' nstyle["hz_line_type"] = 0 nstyle["vt_line_type"] = 0 node.set_style(nstyle) if root is not None and node.name == root: # place holder in case you want to do something with the root leaf print('root is ', node.name) # nstyle["shape"] = "square" # nstyle["fgcolor"] = "black" # nstyle["size"] = s # nstyle["shape"] = "circle" # node.set_style(nstyle) else: nstyle["shape"] = "circle" node.set_style(nstyle) if fasta is not None: seq = fasta[str(node.name)] seqFace = SequenceFace(seq, seqtype=dna_prot, fsize=10, fg_colors=fg_c, bg_colors=bg_c, codon=None, col_w=40, alt_col_w=3, special_col=None, interactive=True) # seqFace = SeqMotifFace(seq=seq, motifs=None, seqtype=dna_prot, gap_format=' ', seq_format='()', scale_factor=20, # height=20, width=50, fgcolor='white', bgcolor='grey', gapcolor='white', ) # seqFace = SeqMotifFace(seq, seq_format="seq", fgcolor=fg_c, bgcolor=bg_c) #interactive=True (tree & node.name).add_face(seqFace, 0, "aligned") else: nstyle = NodeStyle() nstyle["size"] = 0.1 nstyle["hz_line_width"] = 10 nstyle["vt_line_width"] = 10 node.set_style(nstyle) continue tree.ladderize() # tnode.ladderize() legendkey = sorted(set(time_col)) legendkey = [(tp, col) for tp, col in legendkey] # legendkey.insert(0, ('Root', 'black')) legendkey.append(('', 'white')) for tm, clr in legendkey: tstyle.legend.add_face(faces.CircleFace(30, clr), column=0) tstyle.legend.add_face(faces.TextFace('\t' + tm, ftype='Arial', fsize=60, fgcolor='black', tight_text=True), column=1) if show is True: tree.show(tree_style=tstyle) tree.render(outfile1, dpi=600, tree_style=tstyle)
for idx, family in enumerate(families): for cidx, genus_tree in enumerate(genera_trees[idx]): tf_genera = TreeFace(genus_tree, ts_genera) tf_genera.border.width = 2 genus = genera[idx][cidx] color = colors[str(genus)] tf_genera.border.color = color (t & family).add_face(tf_genera, column=0, position='aligned') for n in genus_tree.iter_search_nodes(): if n.dist == 1: n.img_style = ns_genera ts_genera.show_leaf_name = False ts_genera.show_scale = False ts_genera.layout_fn = my_layout ts.branch_vertical_margin = 10 ts.show_leaf_name = False ts.branch_vertical_margin = 15 ts.layout_fn = my_layout ts.draw_guiding_lines = True ts.guiding_lines_type = 1 ts.show_scale = False ts.allow_face_overlap = False # ts.mode = "c" # ts.arc_start = 180 # 0 degrees = 3 o'clock # ts.arc_span = 270 t.show(tree_style=ts) t.render("mytree.png", w=183, units="mm", tree_style=ts)
def format_tree(tree, alignment, al_len_dict, edpos, codontable={}, colors=None, codon_col={}, text="C-to-U RNA editing", ic_contents=[]): """Format the rendering of tree data for alignment""" t = tree.copy() # alignment is ordered dict # flip alignment dict from gene ==> species ==> seq # to species ==> gene ==> seq specSeq = ddict(str) edposSeq = ddict(list) cur_len = 0 limits = [] for gname, specdict in alignment.items(): for node in t: # fill missing with gap specSeq[node.name] += specdict.get(node.name, al_len_dict[gname] * '-') edposSeq[node.name] += [ x + cur_len for x in edpos[gname].get(node.name, []) ] # if node.name == 'Y08501': # print(gname) # print( edposSeq[node.name]) cur_len += al_len_dict.get(gname, 0) limits.append((gname, cur_len)) for node in t: node.add_feature("sequence", specSeq[node.name]) node.add_feature('edlist', edposSeq[node.name]) ts = TreeStyle() ts.branch_vertical_margin = 15 ts.scale = 15 ts.allow_face_overlap = False ts.show_scale = False ts.show_leaf_name = False ns = NodeStyle() ns['shape'] = 'square' ns['fgcolor'] = 'black' ns['size'] = 0 def layout(node): node.img_style = ns if node.is_leaf(): faces.add_face_to_node(AttrFace( 'fullname', fsize=14, fgcolor=(MARKED_NODE_COLOR if (node.name in colors or node.fullname in colors) else 'black')), node, 0, position="aligned") if hasattr(node, "sequence") and node.sequence: seqface = SequenceFace(node.sequence, "codon", fsize=13, codontable=codontable, col_w=RES_COL_WIDTH, bg_colors=codon_col, black_out=node.edlist) faces.add_face_to_node(seqface, node, 1, position="aligned") ts.layout_fn = layout # ts.title.add_face(TextFace('(%s) - SP score : %.0f | IC = %.2f' % (codon, sum(SP_score), sum(ic_contents)), # fsize=14, fgcolor='red'), 0) # ts.aligned_header.add_face( # faces.RectFace(14, 14, 'white', 'white'), 1) # ts.aligned_foot.add_face( # faces.RectFace(14, 14, 'white', 'white'), 1) # for (cod, col) in codon_col.items(): # ts.legend.add_face(faces.RectFace(50, 25, col, col), column=0) # ts.legend.add_face(TextFace(" %s " % cod, fsize=8), column=1) ts.legend.add_face(TextFace(text, fsize=14), column=1) ts.legend_position = 1 ind = 1 prev_gend = 0 for (gname, gend) in limits: ts.aligned_foot.add_face( List90Face(list(range(0, gend - prev_gend, 3)), fsize=13, ftype='Monospace', col_w=RES_COL_WIDTH * 3), ind) ts.aligned_foot.add_face( faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 13, '#BBBBBB', '#EEEEEE'), ind) ts.aligned_foot.add_face(TextFace(gname, fsize=13), ind) ts.aligned_foot.add_face( faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 5, 'white', 'white'), ind) prev_gend += gend ind += 1 #t.dist = 0 ts.margin_left = 5 ts.margin_right = 5 ts.margin_bottom = 5 return t, ts
def generateFigure(PF, sample, rank, input_file, output_base_name, file_type, plot_l1, scaling, output_dpi): # Make the ETE3 tree try: tree = ncbi.get_topology(PF.get_all_tax_ids(sample), rank_limit=rank) except: logging.getLogger('Tampa').critical("Input format not compatible.") exit(1) ts = TreeStyle() ts.layout_fn = PF.layout ts.mode = "c" ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = False ts.min_leaf_separation = 10 ts.arc_span = 360 #ts.legend.add_face(CircleFace(100, "#1b9e77", label="Predicted"), column=0) #ts.legend.add_face(CircleFace(100, '#d95f02', label="True"), column=1) # add white space to move the legend closer ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1) ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0) # add the legend legend_fs = 50 C1 = CircleFace(100, "#1b9e77") C1.hz_align = True ts.legend.add_face(C1, column=0) T1 = TextFace("Predicted", fsize=legend_fs) T1.hz_align = True ts.legend.add_face(T1, column=0) if len(PF.ground_truth_dict) > 0: C2 = CircleFace(100, "#d95f02") C2.hz_align = True ts.legend.add_face(C2, column=1) T2 = TextFace("True", fsize=legend_fs) T2.hz_align = True ts.legend.add_face(T2, column=1) T3 = TextFace(f"Tool: {os.path.basename(input_file).split('.')[0]}", fsize=legend_fs) T3.hz_align = True ts.legend.add_face(T3, column=0) ts.allow_face_overlap = False # this lets me mess a bit with font size and face size without the interaction of the two ts.min_leaf_separation = 10 tree_output_file = f"{output_base_name}_tree_{rank}_{sample}.{file_type}" tree.render(tree_output_file, h=5.2, w=5, tree_style=ts, units="in", dpi=output_dpi) if plot_l1: # if you asked for L1 too, then plot that true_abundance_at_rank = [] predicted_abundance_at_rank = [] for node in tree.get_leaves(): if node.rank == rank: tax_id = str(node.taxid) if tax_id in PF.ground_truth_tax_id_to_percentage: true_abundance_at_rank.append(PF.ground_truth_tax_id_to_percentage[str(node.taxid)] / 100.) else: true_abundance_at_rank.append(0) if tax_id in PF.profile_tax_id_to_percentage: predicted_abundance_at_rank.append(PF.profile_tax_id_to_percentage[str(node.taxid)] / 100.) else: predicted_abundance_at_rank.append(0) data = np.zeros((len(true_abundance_at_rank), 2)) data[:, 0] = np.array(true_abundance_at_rank) data[:, 1] = np.array(predicted_abundance_at_rank) df = pd.DataFrame(data, columns=['True', 'Predicted']) # g = seaborn.FacetGrid(df, height=6) ax = seaborn.scatterplot(x='True', y='Predicted', data=df, color='b', s=55) eps = 1 ax.set_aspect('equal') max_val = np.max(data) + eps ax.set_xlim(-.5, max_val) ax.set_ylim(-.5, max_val) ax.set_xbound(-.5, max_val) ax.set_ybound(-.5, max_val) #plt.figure(figsize=(6,6)) plt.plot(np.linspace(0, max_val, 100), np.linspace(0, max_val, 100), color='k') for (x, y) in zip(true_abundance_at_rank, predicted_abundance_at_rank): if x > y: ax.vlines(x, y, x, colors='r') if y > x: ax.vlines(x, x, y, colors='r') plt.title(f"Tool: {os.path.basename(input_file).split('.')[0]}") plt.tight_layout() l1_out_file = f"{output_base_name}_L1_{rank}.{file_type}" plt.savefig(l1_out_file, dpi=output_dpi)
def scan_internals_pearR(tree, size, threshold, sources="none", simpson_threhold=0.4): global t #sources is defaulted to be "none" import math, seaborn import numpy as np from scipy.stats import pearsonr, spearmanr R_list = [] R2_list = [] S_index_list = [] tree_path = os.path.join(filepath, tree) t = Tree(tree_path, format=0) internals_dict = {} internal_nodes = [] avoid_sources = ["Unknown"] # sources to be omitted i = 0 path_trees = 'time_signal_trees' if not os.path.exists(path_trees): os.mkdir(path_trees) for node in t.traverse(): if len(node) >= size: internal_nodes.append(node) dist_list = [] year_list = [] if sources != "none": source_list = [] node.add_features(nodetype='internal') conf = node.support for leaf in node: #may change with different label format ###time internal_dist = node.get_distance(leaf) year_list.append(leaf.name.split('_')[1]) dist_list.append(internal_dist) ###end of time ###sources if sources != "none": z = leaf.name s = z.split("_")[4] if s not in avoid_sources: source_list.append(s) ####end of sources len_leaves = len(year_list) x_years = np.asarray(year_list).astype(np.int) y_dists = np.asarray(dist_list) R, P = spearmanr(x_years, y_dists) Rpear, Ppear = pearsonr(x_years, y_dists) ###for sources if sources != "none": source_names, source_fre = Uniq(source_list) s_index = simpson(source_fre) S_index_list.append(s_index) ###end of sources if math.isnan(R) != True: if R * R >= threshold: i += 1 nodetree = str(i) + '_R2_' + str(round(R * R, 2)) + '.tree' node.write(outfile=filepath + '/' + path_trees + '/' + nodetree, format=0) nt = Tree(filepath + '/' + path_trees + '/' + nodetree) leaves = [leaf.name.replace("'", "") for leaf in nt] leaves_num = len(leaves) leave_first = leaves[0].split('_')[0] leavesfile = open( filepath + '/' + path_trees + '/' + nodetree + '.' + str(leaves_num) + '.' + leave_first + '.leaves.txt', 'w') leavesfile.write("\n".join(leaves)) internals_dict[node] = R, P node.add_features(Rsize=int(R * R * 50)) R2_text = TextFace('R2=' + str(round(R * R, 2))) #node.add_face(R2_text,column=0,position='branch-top') leaves_text = TextFace('Leaves=' + str(len_leaves)) #node.add_face(leaves_text,column=0,position='branch-bottom') R2_list.append(R * R) for leaf in node: leaf.add_features(showname=True) elif Rpear * Rpear >= threshold: i += 1 nodetree = str(i) + '_R2_' + str(round(Rpear * Rpear, 2)) + '.tree' node.write(outfile=filepath + '/' + path_trees + '/' + nodetree, format=0) nt = Tree(filepath + '/' + path_trees + '/' + nodetree) leaves = [leaf.name.replace("'", "") for leaf in nt] leaves_num = len(leaves) leave_first = leaves[0].split('_')[0] leavesfile = open( filepath + '/' + path_trees + '/' + nodetree + '.' + str(leaves_num) + '.' + leave_first + '.leaves.txt', 'w') leavesfile.write("\n".join(leaves)) internals_dict[node] = Rpear, Ppear node.add_features(Rpearsize=int(Rpear * Rpear * 50)) R2_text = TextFace('R2=' + str(round(Rpear * Rpear, 2))) #node.add_face(R2_text,column=0,position='branch-top') leaves_text = TextFace('Leaves=' + str(len_leaves)) #node.add_face(leaves_text,column=0,position='branch-bottom') R2_list.append(Rpear * Rpear) for leaf in node: leaf.add_features(showname=True) else: internals_dict[node] = R, P R2_list.append(R * R) ###for sources if sources != "none": if s_index <= simpson_threhold: #more clonal, low diversity nstyle["hz_line_color"] = "blue" node.set_style(nstyle) source_text = TextFace('S=' + str(round(s_index, 2)), fgcolor="blue", fsize=15) node.add_face(source_text, column=1, position='branch-bottom') else: nstyle["hz_line_color"] = "green" node.set_style(nstyle) source_text = TextFace('S=' + str(round(s_index, 2)), fgcolor="green", fsize=15) node.add_face(source_text, column=1, position='branch-bottom') ###end of sources ###for time ## seaborn.set(style="white", palette="muted", color_codes=True) ## sns_plot=seaborn.distplot(np.array(R2_list),rug=True) ## fig = sns_plot.get_figure() ## fig.savefig(os.path.join(filepath, tree.rsplit('.')[0]+"_R2_distribution.png")) ## sns_plot.clear() ###end of time ###for source if sources != "none": seaborn.set(style="white", palette="muted", color_codes=True) sns_plot2 = seaborn.distplot(np.array(S_index_list), rug=True) fig2 = sns_plot2.get_figure() fig2.savefig(filepath + tree.rsplit('.')[0] + "_simpson_index_distribution.png") sns_plot2.clear() ###end of source ns = NodeStyle() ns["vt_line_width"] = 2 ns["hz_line_width"] = 2 ns["size"] = 0 for node in t.traverse(): node.set_style(ns) ts = TreeStyle() ts.layout_fn = layout ts.mode = "c" ts.scale = 180 ts.show_leaf_name = False ts.force_topology = True ts.allow_face_overlap = True #ts.branch_vertical_margin=2 #t.render(filepath+tree.rsplit('.')[0]+"_time_signals.png",dpi=300,tree_style=ts) outpath = os.path.join(filepath, tree.rsplit('.')[0] + "_time_signals.pdf") t.render(outpath, tree_style=ts)
def render(self, outfile, idlabel=False, isolabel=False, colormap=None, chain_split=None): '''Render to image file, filetype inferred from suffix, svg for color images''' def my_layout(node): circle_color = 'lightgray' if colormap is None or node.name not in colormap else colormap[ node.name] text_color = 'black' if isinstance(circle_color, str): if isolabel and hasattr(node, 'isotype'): nl = ''.join( sorted(set([ISO_SHORT[iss] for iss in node.isotype]), key=lambda x: ISO_TYPE_charORDER[x])) else: nl = str(node.frequency) C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)), color=circle_color, label={ 'text': nl, 'color': text_color } if node.frequency > 0 else None) C.rotation = -90 C.hz_align = 1 faces.add_face_to_node(C, node, 0) else: P = PieChartFace( [100 * x / node.frequency for x in circle_color.values()], 2 * 10 * scipy.sqrt(node.frequency), 2 * 10 * scipy.sqrt(node.frequency), colors=[(color if color != 'None' else 'lightgray') for color in list(circle_color.keys())], line_color=None) T = TextFace(' '.join( [str(x) for x in list(circle_color.values())]), tight_text=True) T.hz_align = 1 T.rotation = -90 faces.add_face_to_node(P, node, 0, position='branch-right') faces.add_face_to_node(T, node, 1, position='branch-right') if idlabel: T = TextFace(node.name, tight_text=True, fsize=6) T.rotation = -90 T.hz_align = 1 faces.add_face_to_node( T, node, 1 if isinstance(circle_color, str) else 2, position='branch-right') elif isolabel and hasattr(node, 'isotype') and False: iso_name = ''.join( sorted(set([ISO_SHORT[iss] for iss in node.isotype]), key=lambda x: ISO_TYPE_charORDER[x])) #T = TextFace(iso_name, tight_text=True, fsize=6) #T.rotation = -90 #T.hz_align = 1 #faces.add_face_to_node(T, node, 1 if isinstance(circle_color, str) else 2, position='branch-right') C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)), color=circle_color, label={ 'text': iso_name, 'color': text_color } if node.frequency > 0 else None) C.rotation = -90 C.hz_align = 1 faces.add_face_to_node(C, node, 0) for node in self.tree.traverse(): nstyle = NodeStyle() nstyle['size'] = 0 if node.up is not None: if set(node.sequence.upper()) == set( 'ACGT'): # Don't know what this do, try and delete aa = translate(node.sequence) aa_parent = translate(node.up.sequence) nonsyn = hamming_distance(aa, aa_parent) if '*' in aa: nstyle['bgcolor'] = 'red' if nonsyn > 0: nstyle['hz_line_color'] = 'black' nstyle['hz_line_width'] = nonsyn else: nstyle['hz_line_type'] = 1 node.set_style(nstyle) ts = TreeStyle() ts.show_leaf_name = False ts.rotation = 90 ts.draw_aligned_faces_as_table = False ts.allow_face_overlap = True ts.layout_fn = my_layout ts.show_scale = False self.tree.render(outfile, tree_style=ts) # If we labelled seqs, let's also write the alignment out so we have the sequences (including of internal nodes): if idlabel: aln = MultipleSeqAlignment([]) for node in self.tree.traverse(): aln.append( SeqRecord(Seq(str(node.sequence), generic_dna), id=node.name, description='abundance={}'.format( node.frequency))) AlignIO.write(aln, open(os.path.splitext(outfile)[0] + '.fasta', 'w'), 'fasta')
def renderingTreeImage(self): path = os.path.join('Input', 'ProteinInput') seq_records = SeqIO.parse(path, 'fasta') for record in seq_records: self.input_protein_accession_number.append(record.id) self.input_protein_sequence.append(record.seq) with open(os.path.join('execs', 'tmp', "rooted_tree.nwk")) as nwk_tree_handle: nwk_tree = nwk_tree_handle.read() t = Tree(nwk_tree) print(t) print '\n' ts = TreeStyle() ts.title.add_face(TextFace( 'PhyloEpsilon - Protein Ortholog Finding Tool by Bryan Dighera', fsize=16, ), column=0) ts.allow_face_overlap = True ts.show_leaf_name = True ts.show_branch_support = True leaf_names = [] for leaf in t.get_leaf_names(): np_xp_pattern = re.compile('N[P]|X[P]') digits_pattern = re.compile('\d+.\d') np_xp_search_obj = re.search(np_xp_pattern, leaf) digits_search_obj = re.search(digits_pattern, leaf) np_xp_name = np_xp_search_obj.group() digits_name = digits_search_obj.group() final_accession = str(np_xp_name + '_' + digits_name) print final_accession leaf_names.append(final_accession) #print 'leaf names: ' + '%s' % leaf_names P = Protein() protein_domains, domain_colors, unrepeated_domains = P.Domains() print domain_colors #Creates a dictionary that corresponds the protein accession number to its corresponding introns for i in range(len(leaf_names)): self.accession_dict_with_introns[ self.input_protein_accession_number[i]] = self.exon_lengths[i] i = 0 print 'protein accession number: ' + '%s' % self.input_protein_accession_number print 'Accession dict: ' + '%s' % self.accession_dict_with_introns + '\n' #Iterates through the accession numbers that correspond the the order of the leaves of the phylogenetic tree to retrieve introns and build fig for accession_number in leaf_names: intron_motifs = [[0, 0, "[]", None, 12, "White", "White", None]] #Checks the accession number against the dictionary and retrieves the corresponding introns, if no introns then doesn't append any if accession_number in self.accession_dict_with_introns: print accession_number, self.accession_dict_with_introns[ accession_number] exon_list = self.accession_dict_with_introns[accession_number] print exon_list for exon_length in exon_list: if str(exon_length) != 'NONE': for location in exon_length: split_exon_location = str(location).split('-') protein_seq_exon_location = int( math.floor(int(split_exon_location[1]) / 3)) #Calculates the intron phase and then checks the phase to append appropriate color indicating phase on diagram intron_phase = (int(split_exon_location[1]) - int(split_exon_location[0])) % 3 if intron_phase == 0: intron_motifs.append([ protein_seq_exon_location - 2, protein_seq_exon_location + 2, "[]", None, 5, "Grey", "Grey", None ]) elif intron_phase == 1: intron_motifs.append([ protein_seq_exon_location - 2, protein_seq_exon_location + 2, "[]", None, 5, "Black", "Black", None ]) elif intron_phase == 2: intron_motifs.append([ protein_seq_exon_location - 2, protein_seq_exon_location + 2, "[]", None, 5, "Blue", "Blue", None ]) else: print 'NO INTRONS FOUND FOR RECORD' print str(intron_motifs) + '\n' msa_protein_seq = self.msa_aligned_protein[i].strip('-') #ete3 module that adds the introns(motifs) to the phylogenetic tree seqFace = SeqMotifFace(str(msa_protein_seq), gapcolor="black", seq_format='line', scale_factor=1, motifs=intron_motifs) (t & t.get_leaf_names()[i]).add_face(seqFace, 0, "aligned") i += 1 n = 0 # Iterates through the accession numbers that correspond to the order of the leaves of the phylogenetic tree and compare to domain dict values # TODO: Add the legend and possibly give a number to each of the domains so they can be easily identified in the legend for accession_number in leaf_names: domain_motifs = [[0, 0, "[]", None, 12, "White", "White", None]] for domain in protein_domains: if accession_number in domain: print 'leaf accession #: ' + '%s' % accession_number print 'domains accession: ' + '%s' % domain.keys()[0] print domain.values()[0] for each_domain in domain.values()[0]: try: domain_motif_color = domain_colors[each_domain[0]] start_domain_loc = int( each_domain[1].split(':')[0]) end_domain_loc = int(each_domain[1].split(':')[1]) domain_name = str(each_domain[0]) domain_motifs.append([ start_domain_loc, end_domain_loc, "<>", 20, 20, 'Black', domain_motif_color, 'arial|8|black|' ]) except ValueError: domain_motif_color = domain_colors[each_domain[0]] start_pattern = re.compile('(?<!=\W)\d+') start_pattern_search = re.search( start_pattern, str(each_domain[1].split(':')[0])) start_domain_loc = int( start_pattern_search.group()) end_pattern = re.compile('(?<!=\W)\d+') end_pattern_search = re.search( end_pattern, str(each_domain[1].split(':')[1])) end_domain_loc = int(end_pattern_search.group()) domain_motifs.append([ start_domain_loc, end_domain_loc, "<>", 20, 20, 'Black', domain_motif_color, 'arial|8|black|' ]) print domain_motifs msa_protein_seq = self.msa_aligned_protein[n].strip('-') print msa_protein_seq print len(msa_protein_seq) print '*' * 100 domainFace = SeqMotifFace(str(msa_protein_seq), gapcolor="black", seq_format='line', scale_factor=1, motifs=domain_motifs) (t & t.get_leaf_names()[n]).add_face(domainFace, 0, "aligned") n += 1 #Creating the legend print protein_domains for single_unrepeat, colors in domain_colors.iteritems(): ts.legend.add_face(TextFace(single_unrepeat), column=0) ts.legend.add_face(SeqMotifFace( "A" * 45, [[0, 80, "[]", None, 8, "Black", colors, None]]), column=1) ts.legend_position = 1 #name_of_run = nameOfRun() file_name = self.run_name t.show(tree_style=ts) t.render(os.path.join('CompletedTrees', file_name + '.pdf'), tree_style=ts)
def render(self, outfile, idlabel=False, colormap=None, show_support=False, chain_split=None): '''render to image file, filetype inferred from suffix, svg for color images''' def my_layout(node): circle_color = 'lightgray' if colormap is None or node.name not in colormap else colormap[ node.name] text_color = 'black' if isinstance(circle_color, str): C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)), color=circle_color, label={ 'text': str(node.frequency), 'color': text_color } if node.frequency > 0 else None) C.rotation = -90 C.hz_align = 1 faces.add_face_to_node(C, node, 0) else: P = PieChartFace( [100 * x / node.frequency for x in circle_color.values()], 2 * 10 * scipy.sqrt(node.frequency), 2 * 10 * scipy.sqrt(node.frequency), colors=[(color if color != 'None' else 'lightgray') for color in list(circle_color.keys())], line_color=None) T = TextFace(' '.join( [str(x) for x in list(circle_color.values())]), tight_text=True) T.hz_align = 1 T.rotation = -90 faces.add_face_to_node(P, node, 0, position='branch-right') faces.add_face_to_node(T, node, 1, position='branch-right') if idlabel: T = TextFace(node.name, tight_text=True, fsize=6) T.rotation = -90 T.hz_align = 1 faces.add_face_to_node( T, node, 1 if isinstance(circle_color, str) else 2, position='branch-right') for node in self.tree.traverse(): nstyle = NodeStyle() nstyle['size'] = 0 if node.up is not None: if set(node.sequence.upper()) == set('ACGT'): if chain_split is not None: if self.frame is not None: raise NotImplementedError( 'frame not implemented with chain_split') leftseq_mutated = hamming_distance( node.sequence[:chain_split], node.up.sequence[:chain_split]) > 0 rightseq_mutated = hamming_distance( node.sequence[chain_split:], node.up.sequence[chain_split:]) > 0 if leftseq_mutated and rightseq_mutated: nstyle['hz_line_color'] = 'purple' nstyle['hz_line_width'] = 3 elif leftseq_mutated: nstyle['hz_line_color'] = 'red' nstyle['hz_line_width'] = 2 elif rightseq_mutated: nstyle['hz_line_color'] = 'blue' nstyle['hz_line_width'] = 2 if self.frame is not None: aa = Seq( node.sequence[(self.frame - 1):(self.frame - 1 + (3 * (((len(node.sequence) - (self.frame - 1)) // 3))))], generic_dna).translate() aa_parent = Seq( node.up.sequence[(self.frame - 1):(self.frame - 1 + (3 * (( (len(node.sequence) - (self.frame - 1)) // 3))))], generic_dna).translate() nonsyn = hamming_distance(aa, aa_parent) if '*' in aa: nstyle['bgcolor'] = 'red' if nonsyn > 0: nstyle['hz_line_color'] = 'black' nstyle['hz_line_width'] = nonsyn else: nstyle['hz_line_type'] = 1 node.set_style(nstyle) ts = TreeStyle() ts.show_leaf_name = False ts.rotation = 90 ts.draw_aligned_faces_as_table = False ts.allow_face_overlap = True ts.layout_fn = my_layout ts.show_scale = False ts.show_branch_support = show_support self.tree.render(outfile, tree_style=ts) # if we labelled seqs, let's also write the alignment out so we have the sequences (including of internal nodes) if idlabel: aln = MultipleSeqAlignment([]) for node in self.tree.traverse(): aln.append( SeqRecord(Seq(str(node.sequence), generic_dna), id=str(node.name), description='abundance={}'.format( node.frequency))) AlignIO.write(aln, open(os.path.splitext(outfile)[0] + '.fasta', 'w'), 'fasta')