def ly_block_alg(node): if node.is_leaf(): if 'sequence' in node.features: seqFace = SeqMotifFace(node.sequence, []) # [10, 100, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|domain Name"], motifs = [] last_lt = None for c, lt in enumerate(node.sequence): if lt != '-': if last_lt is None: last_lt = c if c + 1 == len(node.sequence): start, end = last_lt, c motifs.append([ start, end, "()", 0, 12, "slategrey", "slategrey", None ]) last_lt = None elif lt == '-': if last_lt is not None: start, end = last_lt, c - 1 motifs.append([ start, end, "()", 0, 12, "grey", "slategrey", None ]) last_lt = None seqFace = SeqMotifFace(node.sequence, motifs, intermotif_format="line", seqtail_format="line", scale_factor=ALG_SCALE) add_face_to_node(seqFace, node, ALG_START_COL, aligned=True)
def get_example_tree(): # Create a random tree and add to each leaf a random set of motifs # from the original set t = Tree("( (A, B, C, D, E, F, G), H, I);") seqFace = SeqMotifFace(seq, gapcolor="red") (t & "A").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq, seq_format="line", gap_format="blank") (t & "B").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq, seq_format="line") (t & "C").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq, seq_format="()") (t & "D").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq, motifs=simple_motifs, seq_format="-") (t & "E").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq=None, motifs=simple_motifs, gap_format="blank") (t & "F").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq, motifs=mixed_motifs, seq_format="-") (t & "G").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq=None, motifs=box_motifs, gap_format="line") (t & "H").add_face(seqFace, 0, "aligned") seqFace = SeqMotifFace(seq[30:60], seq_format="seq") (t & "I").add_face(seqFace, 0, "aligned") return t
def add_domains_to_tree(self, t): ''' displaying domains without a sequence / MSA ''' for leaf in t: gene_id = leaf.name domains = self.domain_dict.get(gene_id, []) # if no domains are annotated, 'domains' is an empty list and # no motifs are added to the sequence (for loop won't iterate) motifs = [] dom_n = 0 for domain in domains: domname, start, stop = domain start = dom_n * 55 stop = dom_n * 55 + 50 dom_n += 1 color_n = self.domains.index(domname) try: dom_color = COLORS[color_n] except IndexError: dom_color = 'gray' motifs.append([ start, stop, '()', None, 10, None, 'rgradient:{}'.format(dom_color), 'arial|6|black|{}'.format(domname) ]) if len(motifs) > 0: domface = SeqMotifFace(None, motifs=motifs, gap_format='line') (t & gene_id).add_face(domface, column=0, position='aligned') return
def generate_neighbour_face(node_ns, ns_colors, block, offsets): motifs = [[0, 0, "blank", None, 10, None, None, None]] for cur_offset, next_offset, node_n in zip(offsets, offsets.tolist()[1:], node_ns): if len(node_n) > 0: (nbr1, nbr2, copies, orientations) = node_n motifs.extend(get_neighbour_motifs(nbr1, ns_colors, cur_offset)) for i, or_ in enumerate(orientations): motifs.extend( get_neighbour_motifs( f'{block}' + ('h' if or_ == '+' else 't'), ns_colors, cur_offset + (i + 1) * 50)) motifs.extend( get_neighbour_motifs(nbr2, ns_colors, cur_offset + (copies + 1) * 50, True)) motifs.append([ cur_offset + (copies + 1) * 50 + 40, next_offset, "blank", None, 10, None, None, None ]) else: motifs.append( [cur_offset, next_offset, "blank", None, 10, None, None, None]) for offset in offsets[1:-1]: motifs.append( [offset - 10, offset - 10, "[]", None, 10, 'grey', 'grey', None]) motifs.append( [offset - 10, offset, "blank", None, 10, None, None, None]) return SeqMotifFace('', motifs=motifs)
def get_example_tree_2(): # Create a random tree and add to each leaf a random set of motifs # from the original set t= Tree(nwTree) for item in nwTree.replace('(','').replace(')', '').replace(';', '').replace(',','\t').split('\t'): seqFace2 = SeqMotifFace(seq, motifs=motifDict_2[item[:item.index(':')]], seq_format="-", gap_format="blank") (t & item[:item.index(':')]).add_face(seqFace2, 0, "aligned") return t
def add_msa_with_domains_to_tree(self, t): ''' iterating over all sequences in the tree and adding the sequence, domain and intron position visualizations. ''' for leaf in t: gene_id = leaf.name gapped_seq = self.msa_fasta_dict[gene_id] domains = self.domain_dict.get(gene_id, []) # if no domains are annotated, 'domains' is an empty list and # no motifs are added to the sequence (for loop won't iterate) motifs = [] for domain in domains: domname, start, stop = domain color_n = self.domains.index(domname) try: dom_color = COLORS[color_n] except IndexError: dom_color = 'gray' motifs.append([ start, stop, '()', None, 10, None, 'rgradient:{}'.format(dom_color), 'arial|6|black|{}'.format(domname) ]) # domain markers if hasattr(self, 'cds_length_dict'): if gene_id not in self.cds_length_dict: print('Warning: No GFF entry found for {}'.format(gene_id)) else: cds_lengths = self.cds_length_dict[gene_id] current_pos = 1 for cds_len in cds_lengths[:-1]: current_pos += cds_len gapped_seq = self.msa_fasta_dict[gene_id] try: gapped_pos, __ = self.correct_borders_for_gaps( gapped_seq, int(round(current_pos)), 0) except KeyError: raise Exception( 'The protein sequence of {} is shorter ' 'in the MSA than in the GFF!'.format(gene_id)) motifs.append([ (gapped_pos - 1), (gapped_pos + 1), '[]', None, 10, None, 'black', None ]) # black line that marks the intron positions seqface = SeqMotifFace(gapped_seq, gapcolor='gray', motifs=motifs, seq_format=self.seq_style, gap_format=self.gap_style, scale_factor=self.scale_factor) (t & gene_id).add_face(seqface, column=0, position='aligned') return
def _tree_visual(typeListFilter, motifData, hmmPath, finalPath, cachePath): t = Tree(os.path.join(cachePath, 'sequence.fasta.phy')) ts = TreeStyle() for sq in range(len(typeListFilter)): seqFace = SeqMotifFace((10 + 60 * len(motifData[sq])) * 'A', motifs=motifData[sq], seq_format="-") (t & 'Type ' + str(sq + 1) + ' [' + str(typeList[sq][1]) + ']').add_face( seqFace, 0, "aligned") t.render(os.path.join(finalPath, 'phylogeneticTree.pdf'), tree_style=ts)
def _create_tree (tree,fasta,out,color): seqs = SeqGroup(fasta, format="fasta") t = Tree(tree) colors = _parse_color_file(color) node_names = t.get_leaf_names() for name in node_names: seq = seqs.get_seq(name) seqFace = SeqMotifFace(seq, seq_format="()") node = t.get_leaves_by_name(name) for i in range(0,len(node)): if name in colors: ns = NodeStyle() ns['bgcolor'] = colors[name] node[i].set_style(ns) node[i].add_face(seqFace,0,'aligned') t.render(out)
def layout(node): node.img_style["size"] = 0 #add boostrap in not-leaf nodes if not node.is_leaf(): boostFace = faces.TextFace(node.support, fgcolor="grey", fsize=34) add_face_to_node(boostFace, node, column=0, position="branch-top") if node.is_leaf(): #get taxid and name taxid = (node.name.split('.')[0]) seq_name = (node.name.split('.')[1]) #add predicted name (eggnog-mapper) try: pred_name = predict_name[node.name] predNameFace = faces.TextFace(pred_name, fgcolor="salmon", fsize=34) predNameFace.margin_right = 20 predNameFace.margin_left = 20 add_face_to_node(predNameFace, node, column=2, position="branch-right") except: predNameFace = faces.TextFace('--', fgcolor="salmon", fsize=34) add_face_to_node(predNameFace, node, column=2, position="branch-right") #add sequence name (seq_name) seqNameFace = faces.TextFace(seq_name, fgcolor="grey", fsize=34) add_face_to_node(seqNameFace, node, column=1, position="branch-right") #get scientific name from taxid sp_name = ncbi.get_taxid_translator([(node.name.split('.')[0])]) node.name = sp_name[int(taxid)] #add alignment seqFace = SeqMotifFace(node.sequence, gap_format="blank") add_face_to_node(seqFace, node, column=3, position="aligned")
def custom_layout(node): if node.is_leaf(): #FACES IN LEAFS #Add face with node.name total_name = (node.name) aligned_name_face = TextFace(total_name, fgcolor='gray', fsize=11) add_face_to_node(aligned_name_face, node, column=0, position='branch-right') #if tree has an alignment, add a face with alignment if node.sequence: seqFace = SeqMotifFace(node.sequence, gap_format="blank") add_face_to_node(seqFace, node, column=1, position="aligned") else: #FACES IN INTERNAL NODES #Draws nodes as small blue squares of diameter equal to 3 pixels node.img_style['size'] = 3 node.img_style['shape'] = 'square' #if internal node has a name, add a face if node.name: name_face = TextFace(node.name, fgcolor='brown', fsize=10) name_face.margin_bottom = 1 add_face_to_node(name_face, node, column=0, position='branch-top') #if internal node has support (ej. boostrap), add a face if node.support: support_face = TextFace(node.support, fgcolor='indianred', fsize=8) support_face.margin_bottom = 1 add_face_to_node(support_face, node, column=0, position='branch-bottom')
def design_tree(self, node_scores="CALCULATE", plot_threshold=0): """Method that allows us to design a tree with It's node scores. """ try: if node_scores == "CALCULATE": self.calculate_nodes() else: # DUDAS EN ESTA OPCION tree = PhyloTree(self.tree_in, alignment=self.align_in, alg_format="fasta") node_number = 0 for node in tree.traverse(): node.add_feature("node_number", node_number) node.add_feature("node_score", node_scores[node_number]) node_number += 1 self.processed_tree = tree for node in self.processed_tree.traverse(): if node.is_leaf() == True: draw_position = 0 for position in self.position_matrix: seqFace = SeqMotifFace(node.sequence[position], seq_format="seq") (self.processed_tree & node.name).add_face( seqFace, draw_position, "aligned") draw_position += 1 else: if node.node_score > plot_threshold: score_face = TextFace(node.node_score) node.add_face(score_face, 0, "branch-top") except: sys.stderr.write("Error at designing tree.\n") sys.exit(1) return
def get_example_tree(): # Performs a tree reconciliation analysis gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));' t = PhyloTree(gene_tree_nw) ts = TreeStyle() # disable default PhyloTree Layout ts.layout_fn = lambda x: True t.link_to_alignment(alg) node2content = t.get_cached_content() for node in t.traverse(): node.img_style["size"] = 0 if not node.is_leaf(): leaves = node2content[node] # get columns with different aa subseqs, relevant_columns = mutation_columns([lf.sequence for lf in leaves]) for seq in subseqs: f = SeqMotifFace(seq, seq_format="seq", width=10, height=8) f.margin_top = 2 f.margin_right = 6 node.add_face(f, column=0, position="branch-bottom") for j, col in enumerate(relevant_columns): col_f = RectFace(10, 10, fgcolor=None, bgcolor=None, label={"text":str(col), "fonttype":"Courier", "color":"black", "fontsize":6}) node.add_face(col_f, column=j, position="branch-top") col_f.margin_bottom = 2 else: f = SeqMotifFace(node.sequence, seq_format="seq", width=6) node.add_face(f, column=0, position="aligned") alg_length = len(lf.sequence) ts.draw_aligned_faces_as_table = False for colnum in xrange(alg_length): col_f = RectFace(10, 10, fgcolor=None, bgcolor=None, label={"text":str(colnum), "fonttype":"Courier", "color":"black", "fontsize":6}) ts.aligned_header.add_face(col_f, column=colnum) return t, ts
def visualize_phylogeny(gene_dict, context_file): #Read in tree and assign additional information to each leaf t = Tree(context_file[0].replace('.fna', '.unique.tree')) for node in t.traverse(): if node.is_leaf(): id = node.name.split('__')[1] node.add_features(organism=gene_dict[id]['organism']) node.add_features(assembly=gene_dict[id]['assembly']) node.add_features(pident=gene_dict[id]['perc_id']) if args.compressed == True: node.add_features(cluster_size=gene_dict[id]['cluster_size']) #Create dictionary to append motifs to motif_dict = {} #Create keyword lists to set gene color tnps = [ 'iscr', 'transpos', 'tnp', 'insertion', '-like', ] ints = ['inti', 'integrase', 'xerc', 'xerd'] mobiles=['secretion', 'mobiliza', 'moba', 'mobb', 'mobc', 'mobl', 'plasmid', 'relaxase',\ 'conjugation', 'type iv'] res = [ 'lactam', 'aminoglyco', 'fluoroquinolo', 'tetracyclin', 'macrolid', 'carbapenem' ] print('decorating the tree...') #Create motifs for each gene associated with a leaf for leaf in t.traverse(): if leaf.is_leaf(): #traverse through environment genes for the respective sequence for key, value in gene_dict.items(): motifs = [] #Assign start and end position for annotated gene gene_start = gene_dict[key]['start'] gene_end = gene_dict[key]['stop'] #Sort such that the greater number is end and smaller is start if gene_start > gene_end: gene_end = gene_dict[key]['start'] gene_start = gene_dict[key]['stop'] #Append motif for annotated gene gene_motif=[gene_start, gene_end,'()', \ 2, 10, 'red', 'red', 'arial|10|black|'+str(gene_dict[key]['name'])] if not str(gene_dict[key]['frame']).startswith('-'): ori_motif = [ gene_end, gene_end + 10, '>', 2, 10, 'red', 'red', None ] else: ori_motif=[gene_start-10, gene_start, '<', 2, 10, \ 'red', 'red', None] motifs.extend([gene_motif, ori_motif]) for key2, value2 in value['env_genes'].items(): #Set color, default is orange color = 'orange' if any(keyword in value2['env_name'].lower() for keyword in tnps): color = 'violet' if any(keyword in value2['env_name'].lower() for keyword in ints): color = 'yellow' if any(keyword in value2['env_name'].lower() for keyword in mobiles): color = 'green' if any(keyword in value2['env_name'].lower() for keyword in res): color = 'red' if 'hypothetical' in value2['env_name']: color = 'grey' #Create motif for one env gene at a time and append to motif list motif=[value2['env_start'], value2['env_stop'], '()', 2, 10, color, color, \ 'arial|10|black|'+str(value2['env_name'])] #Set condition: If env gene != annotated gene, append motif arg_pos = { i for i in range(int(gene_motif[0]), int(gene_motif[1])) } env_pos = {i for i in range(int(motif[0]), int(motif[1]))} #Calculate overlap percentage between annotated gene and env gene total_overlap = float(len(arg_pos.intersection(env_pos))) overlap_perc = float( total_overlap / int(gene_dict[key]['length'])) * 100 if overlap_perc <= 70.0: motifs.append(motif) #Create additional motif to show gene orientation if value2['env_strand'] == '+': ori_motif=[value2['env_stop'], value2['env_stop']+10, '>', 2, 10, \ color, color, None] else: ori_motif=[value2['env_start']-10, value2['env_start'], '<', 2, 10, \ color, color, None] motifs.append(ori_motif) #append motif lists to respective annotated gene in dict gene_dict[key]['motifs'] = motifs #Set node style nst_plasmid = NodeStyle() nst_plasmid['bgcolor'] = 'DarkSeaGreen' nst_other = NodeStyle() nst_other = 'AntiqueWhite' #Now annotate the tree with the motifs for node in t.traverse(): if node.is_leaf(): if 'plasmid' in node.organism: node.set_style(nst_plasmid) else: node.set_style(nst_other) seqFace=SeqMotifFace(seq=None, motifs=gene_dict[node.name.split('__')[1]]['motifs'], \ seq_format='blank', gap_format='line') (t & node.name).add_face(seqFace, 1, 'aligned') #Create box showing gene percent id similarity = TextFace(node.pident, fsize=8) similarity.margin_top = 2 similarity.margin_bottom = 2 similarity.margin_left = 2 similarity.margin_right = 2 #Set box background color based on pident if node.pident <= 90.0: similarity.background.color = 'DarkGoldenrod' elif 90.0 < node.pident <= 95.0: similarity.background.color = 'ForestGreen' elif 95.0 <= node.pident: similarity.background.color = 'YellowGreen' node.add_face(similarity, column=2, position='aligned') #Create box showing cluster size if args.compressed == True: clust_box = TextFace(node.cluster_size, fsize=8) clust_box.margin_top = 2 clust_box.margin_bottom = 2 clust_box.margin_left = 2 clust_box.margin_right = 2 node.add_face(clust_box, column=3, position='aligned') #Return the annotated tree return t
def layout(node): if node.is_leaf(): seqFace = SeqMotifFace(seq, motifs, scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned")
def render_alignment(t, seqs): for genome, seq in seqs.items(): seqFace = SeqMotifFace(seq, seq_format='seq') (t & genome[1:]).add_face(seqFace, 0, "aligned") # get rid of the '>' with [1:]
def my_layout(node): ## Sequence name F = TextFace(node.name, tight_text=True) add_face_to_node(F, node, column=0, position="aligned") ## Sequence motif if node.is_leaf(): motifs_n = [] box_color = "black" opacity = 1 if node.T == "True" or node.C == "True": motifs_n.append([ 0, len(node.sequence), "[]", 10, 12, box_color, box_color, None ]) motifs_n.append( [0, len(node.sequence), "seq", 10, 10, None, None, None]) seq_face = SeqMotifFace(seq=node.sequence, seqtype='aa', seq_format='seq', fgcolor=box_color, motifs=motifs_n) seq_face.overlaping_motif_opacity = opacity add_face_to_node(seq_face, node, column=1, position='aligned') ## Nodes style if det_tool and node.T == "True": node.set_style(nstyle_T_sim) add_t(node) elif det_tool and node.C == "True": node.set_style(nstyle_C_sim) elif det_tool: node.set_style(nstyle) #if not det_tool no background elif node.T == "True" and not int( node.ND) in g_tree.conv_events.nodesWithTransitions_est: node.set_style(nstyle_T_sim) add_t(node) elif node.T == "True" and int( node.ND) in g_tree.conv_events.nodesWithTransitions_est: node.set_style(nstyle_T_sim_est) add_t(node) elif int(node.ND) in g_tree.conv_events.nodesWithTransitions_est: node.set_style(nstyle_T_est) elif node.C == "True" and not int( node.ND) in g_tree.conv_events.nodesWithConvergentModel_est: node.set_style(nstyle_C_sim) elif node.C == "True" and int( node.ND) in g_tree.conv_events.nodesWithConvergentModel_est: node.set_style(nstyle_C_sim_est) elif int(node.ND) in g_tree.conv_events.nodesWithConvergentModel_est: node.set_style(nstyle_C_est) elif cz_nodes_s and node.Cz != "False": node.set_style(cz_nodes_s[int(node.Cz)]) if int(node.ND) == int(cz_nodes[int(node.Cz)][0]): add_t(node) else: node.set_style(nstyle) if int(node.ND) == sim_root_ND and not det_tool: add_sim_root(node)
def get_example_tree(File): adres=os.getcwd() file_out_supliment = open(adres+"/out_spliment/"+File, 'w') node_file = open(adres+"/node/"+File, 'w') # Create a random tree and add to each leaf a random set of motifs # from the original set #t = Tree("( (A, B, C, D, E, F, G), H, I);") #Считываем все домены domain_all_legend={} file_all_domen=os.listdir(adres+"/for_pic/1_tree_nwk/") file_all_domen.remove(".DS_Store") file_all_domen.sort() i=0 for file_domain in file_all_domen: file_open_domain = open(adres+"/for_pic/3_domain/"+file_domain, 'r') for line in file_open_domain: line_=line.split("\t") try: if not (line_[2] in domain_all_legend): domain_all_legend.setdefault(line_[2],dic_domain_pic_pic[i]) i+=1 if i>len(dic_domain_pic_pic): i=0 except: a=0 mem="" file_open = open(adres+"/for_pic/1_tree_nwk/"+File, 'r') for line in file_open: mem=mem+line tt = Tree(mem, format=0) style = NodeStyle() style["fgcolor"] = "#000000" style["size"] = 0 style["vt_line_color"] = "#000000" style["hz_line_color"] = "#000000" style["vt_line_width"] = 4 style["hz_line_width"] = 4 style["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted style["hz_line_type"] = 8 for node in tt.traverse("levelorder"): node.img_style = style if (len(node.name))>1: node_file.write(node.name+"\n") children1=node.children for element in children1: element.img_style = style for node in tt.traverse("preorder"): node.img_style = style children1=node.children for element in children1: element.img_style = style node_file.close #вывести дерево с цветами #print (tt.get_ascii(attributes=["name", "color"], show_internal=False)) #поиск предка ancestor1="" i=0 for element in ancestor_grop: if i==0: for node in tt.traverse("postorder"): if i==0: node_name=str(node.name) if (node_name.startswith(element)) and not((node_name.startswith("PPE"))): ancestor1=str(node.name) i=1 break else: break else: break if not (ancestor1==""): tt.set_outgroup(ancestor1) #tt.render(adres+"/out/"+File[:-3]+"_2.png", tree_style=circular_style) print(str(ancestor1)+" - предок") file_out_supliment.write(str(ancestor1)+"\t"+" - предполагаемый корень"+"\n") else: print("Не нашел предка") file_out_supliment.write("\n\n\n Выявленные клады\n") #добавляем цвета к кладам for leaf in tt: i=0 node_name=str(leaf.name) for clad in all_clad: collor=collor_list[i] i+=1 for element in clad: if (node_name.startswith(element)): leaf.add_features(color=collor) #print(leaf) #print(tt) #забираем монофилитические цвета #print (tt.get_ascii(attributes=["name", "color"], show_internal=False)) ii=-1 for clad in all_clad: ii+=1 collor=collor_list[ii] for monophyletic_tree in tt.get_monophyletic(values=[collor], target_attr="color"): i=[] name_node_mono_color=[] for leaf in monophyletic_tree: i.append(leaf) name_node_mono_color.append(leaf.name) if len(i)>1: n1 = tt.get_common_ancestor(i) nst1 = NodeStyle() nst1["bgcolor"] = collor nst1["fgcolor"] = "#000000" nst1["size"] = 0 nst1["vt_line_color"] = "#000000" nst1["hz_line_color"] = "#000000" nst1["vt_line_width"] = 4 nst1["hz_line_width"] = 4 nst1["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted nst1["hz_line_type"] = 8 n1.set_style(nst1) for element in name_node_mono_color: file_out_supliment.write(str(element)+"\t"+" - "+collor+"\n") file_out_supliment.write("\n") file_out_supliment.write("\n\n\n Легенда доменного состава\n") #добавляем разметку по доменам dic_seq={} dic_domain={} dic_domain_pic={} i=0 list_legend_domain3=[] for node in tt.traverse("postorder"): #длины белков fasta_sequences=SeqIO.parse(open(adres+"/for_pic/2_MSA/"+File), "fasta") for element in fasta_sequences: if str(element.id)==str(node.name): dic_seq.setdefault(str(node.name),str(element.seq)) #доменный состав a=[] file_domain = open(adres+"/for_pic/3_domain/"+File, 'r') for line in file_domain: line_=line.split("\t") if line_[0]==str(node.name): if not (line_[2] in list_legend_domain3): list_legend_domain3.append(line_[2]) if not (line_[2] in dic_domain_pic): dic_domain_pic.setdefault(line_[2],dic_domain_pic_pic[i]) i+=1 #print(dic_domain_pic[line_[2]]) #print(i) a1=[int(line_[3]),int(line_[4]), "()", None, 15, "black", domain_all_legend[line_[2]], "arial|9|black|"+line_[2]] a.append(a1) dic_domain.setdefault(str(node.name),a) file_out_supliment.write(line_[2]+"\t"+domain_all_legend[line_[2]]+"\n") else: a1=[int(line_[3]),int(line_[4]), "()", None, 15, "black", domain_all_legend[line_[2]], "arial|9|black|"+line_[2]] a.append(a1) dic_domain.setdefault(str(node.name),a) for element in dic_domain: #print(str(element)+" "+ str(dic_domain[element])) try: seqFace = SeqMotifFace(seq=dic_seq[element], motifs=dic_domain[element], seq_format="line") (tt & element).add_face(seqFace, 0, "aligned") except: seqFace = SeqMotifFace(seq=dic_seq[element], seq_format="line", gapcolor="red") (tt & element).add_face(seqFace, 0, "aligned") print("except") #Рисуем легенду circular_style = TreeStyle() circular_style.show_leaf_name = False circular_style.show_branch_length = True circular_style.show_branch_support = True circular_style.scale = 75 circular_style.tree_width = 50 file_domain.close file_domain = open(adres+"/for_pic/3_domain/"+File, 'r') list_legend_domain={} list_legend_domain2=[] #считали список доменов i=0 for line in file_domain: line_=line.split("\t") try: if not(line_[2] in list_legend_domain2): #print(line_[2]) list_legend_domain2.append(line_[2]) list_legend_domain.setdefault("a"+str(i),line_[2]) i+=1 except: print("не понял что это за домен") i=0 #считываем легенду доменов file_domain_legend2={} file_domain_legend = open(adres+"/domain_legend.txt", 'r') for line in file_domain_legend: line_=line.split("\t") aaa=line_[1].replace(" ","_") aaa=aaa.replace("(","_") aaa=aaa.replace(")","_") aaa=aaa.replace(",","_") aaa=aaa.replace(":","_") aaa=aaa.replace(".","_") file_domain_legend2.setdefault(line_[0],aaa.replace("\n","")) #N = AttrFace("name", fsize=12) #faces.add_face_to_node(N, node, 1, position="branch-right") #рисуем домены ww="" for element in file_domain_legend2: ww=ww+","+file_domain_legend2[element] ww="("+ww[1:]+");" tree_domen_all=Tree(ww) for element in file_domain_legend2: try: element2=domain_all_legend[element] a1=[10,90, "()", None, 15, "black", domain_all_legend[element], "arial|9|black|"+element] i+=1 a=[] a.append(a1) seqFace = SeqMotifFace(seq=seq_seq, motifs=a, seq_format="line") #node_node="a"+str(i) node_node=file_domain_legend2[element] try: (tree_domen_all & node_node).add_face(seqFace, 0, "aligned") except: q=1 print("не нашел узел") except: q=1 circular_style.layout_fn = layout tree_domen_all.render(adres+"/out_legend_all.png", tree_style=circular_style) file_domain_out = open(adres+"/123123123.txt", 'w') w="" for element in list_legend_domain3: w=w+","+file_domain_legend2[element] w="("+w[1:]+");" tree_domen=Tree(w) for element in list_legend_domain3: file_domain_out.write(element+"\n") a1=[10,90, "()", None, 15, "black", domain_all_legend[element], "arial|9|black|"+element] i+=1 a=[] a.append(a1) try: seqFace = SeqMotifFace(seq=seq_seq, motifs=a, seq_format="line") #node_node="a"+str(i) node_node=file_domain_legend2[element] (tree_domen & node_node).add_face(seqFace, 0, "aligned") except: #print("Закончились узлы легенды") k=0 circular_style.layout_fn = layout tree_domen.render(adres+"/out_legend/"+File[:-4]+".png", tree_style=circular_style) #удаленние части узлов for node in tt.traverse("postorder"): try: seqFace = SeqMotifFace(seq=dic_seq[str(node.name)], motifs=dic_domain[str(node.name)], seq_format="line") (tt & node.name).add_face(seqFace, 0, "aligned") a=0 if len(node.name)<2: a=1 for element_save in save_node: if (node.name).startswith(element_save): a=1 for element_dell in dell_node: if (node.name).startswith(element_dell): a=0 if a==0: node.delete() except: if len(node.name)>0: seqFace = SeqMotifFace(seq=dic_seq[str(node.name)], seq_format="line", gapcolor="red") (tt & node.name).add_face(seqFace, 0, "aligned") node.delete() d0=0 #удаленние части узлов ЗАВЕРШЕНО #особые точки node_color=[] file_node_color = open(adres+"/for_pic/4_color_node/out_list_gene2.txt", 'r') for line in file_node_color: node_color.append(line.replace("\n","")) for node in tt.traverse("postorder"): if node.name in node_color: style = NodeStyle() style["fgcolor"] = "Red" style["size"] = 9 style["vt_line_color"] = "#000000" style["hz_line_color"] = "#000000" style["vt_line_width"] = 4 style["hz_line_width"] = 4 style["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted style["hz_line_type"] = 8 node.set_style(style) file_out_supliment.close #забираем монофилитические цвета #print (tt.get_ascii(attributes=["name", "color"], show_internal=False)) ii=-1 for clad in all_clad: ii+=1 collor=collor_list[ii] for monophyletic_tree in tt.get_monophyletic(values=[collor], target_attr="color"): i=[] name_node_mono_color=[] for leaf in monophyletic_tree: i.append(leaf) name_node_mono_color.append(leaf.name) if len(i)>1: n1 = tt.get_common_ancestor(i) nst1 = NodeStyle() nst1["bgcolor"] = collor nst1["fgcolor"] = "#000000" nst1["size"] = 0 nst1["vt_line_color"] = "#000000" nst1["hz_line_color"] = "#000000" nst1["vt_line_width"] = 4 nst1["hz_line_width"] = 4 nst1["vt_line_type"] = 8 # 0 solid, 1 dashed, 2 dotted nst1["hz_line_type"] = 8 n1.set_style(nst1) for element in name_node_mono_color: file_out_supliment.write(str(element)+"\t"+" - "+collor+"\n") file_out_supliment.write("\n") return tt
def layout(node): node.img_style["size"] = 0 if not node.is_leaf(): boostFace = faces.TextFace(node.support, fgcolor="grey", fsize=34) add_face_to_node(boostFace, node, column=0, position="branch-top") if node.is_leaf(): taxid = (node.name.split('.')[0]) seq_name = (node.name.split('.')[1]) #add predicted name (eggnog 4.5) try: pred_name = predict_name[node.name] predNameFace = faces.TextFace(pred_name, fgcolor="salmon", fsize=34) predNameFace.margin_right = 20 predNameFace.margin_left = 20 add_face_to_node(predNameFace, node, column=2, position="branch-right") except: predNameFace = faces.TextFace('--', fgcolor="salmon", fsize=34) add_face_to_node(predNameFace, node, column=2, position="branch-right") seqNameFace = faces.TextFace(seq_name, fgcolor="grey", fsize=34) add_face_to_node(seqNameFace, node, column=1, position="branch-right") sp_name = ncbi.get_taxid_translator([(node.name.split('.')[0])]) node.name = sp_name[int(taxid)] seqFace = SeqMotifFace(node.sequence, gap_format="blank") add_face_to_node(seqFace, node, column=3, position="aligned") lin = ncbi.get_lineage(taxid) #metazoa #if int('33208') in lin: # N = AttrFace("name", fsize=34, fgcolor="blue") # N.margin_left = 20 # N.margin_right= 20 # faces.add_face_to_node(N, node, column=0) #cnidaria if int('6073') in lin: N = AttrFace("name", fsize=34, fgcolor="red") N.margin_left = 20 N.margin_right = 20 faces.add_face_to_node(N, node, column=0) #ctenophora elif int('10197') in lin: N = AttrFace("name", fsize=34, fgcolor="orange") N.margin_left = 20 N.margin_right = 20 faces.add_face_to_node(N, node, column=0) #bilateria elif int('33213') in lin: N = AttrFace("name", fsize=34, fgcolor="blue") N.margin_left = 20 N.margin_right = 20 faces.add_face_to_node(N, node, column=0) #porifera elif int('6040') in lin: N = AttrFace("name", fsize=34, fgcolor="green") N.margin_left = 20 N.margin_right = 20 faces.add_face_to_node(N, node, column=0) else: N = AttrFace("name", fsize=34, fgcolor="black") N.margin_left = 20 N.margin_right = 20 faces.add_face_to_node(N, node, column=0)
seq = seq.translate(None, string.ascii_lowercase) # keep only CDS iesmotif = [[1, len(seq), "line", 2, 5, None, None, None]] for homIES in gfhomIES[geneFamily]: (begin, end, ies, iesId, beginMSA, endMSA) = charMat[(geneFamily, homIES, geneId)] if ies == '?': if beginMSA == 'NA': iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"]) else: iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"]) elif ies == '1': iesmotif.append([int(beginMSA), int(endMSA),"[]", 10, 10, "black", "red", "arial|8|black|" + iesId]) elif ies == '0': iesmotif.append([int(begin), int(end), "[]", 10, 10, "silver", "silver", None]) else: quit(1) seqFace = SeqMotifFace(seq = seq, motifs = iesmotif, gap_format = "blank", seq_format = "line") leaf.add_face(seqFace, 0, "aligned") drawTree(outputFile) """ t.show() # Draw trees. pp = pprint.PrettyPrinter(indent=8) # SPECIATION TREE # ################### wgd1 = Tree('((P_caudatum:1[&&NHX:Ev=S:S=3:ND=3],(((P_sexaurelia:1[&&NHX:Ev=S:S=7:ND=7],P_sonneborni:1[&&NHX:Ev=S:S=8:ND=8]):1[&&NHX:Ev=S:S=5:ND=5],(((P_pentaurelia:1[&&NHX:Ev=S:S=13:ND=13],P_primaurelia:1[&&NHX:Ev=S:S=14:ND=14]):1[&&NHX:Ev=S:S=11:ND=11],(P_biaurelia:1[&&NHX:Ev=S:S=15:ND=15],(P_octaurelia:1[&&NHX:Ev=S:S=17:ND=17],P_tetraurelia:1[&&NHX:Ev=S:S=18:ND=18]):1[&&NHX:Ev=S:S=16:ND=16]):1[&&NHX:Ev=S:S=12:ND=12]):1[&&NHX:Ev=S:S=9:ND=9],P_tredecaurelia:1[&&NHX:Ev=S:S=10:ND=10]):1[&&NHX:Ev=S:S=6:ND=6]):1[&&NHX:Ev=S:S=4:ND=4],((P_sexaurelia:1[&&NHX:Ev=S:S=7:ND=7],P_sonneborni:1[&&NHX:Ev=S:S=8:ND=8]):1[&&NHX:Ev=S:S=5:ND=5],(((P_pentaurelia:1[&&NHX:Ev=S:S=13:ND=13],P_primaurelia:1[&&NHX:Ev=S:S=14:ND=14]):1[&&NHX:Ev=S:S=11:ND=11],(P_biaurelia:1[&&NHX:Ev=S:S=15:ND=15],(P_octaurelia:1[&&NHX:Ev=S:S=17:ND=17],P_tetraurelia:1[&&NHX:Ev=S:S=18:ND=18]):1[&&NHX:Ev=S:S=16:ND=16]):1[&&NHX:Ev=S:S=12:ND=12]):1[&&NHX:Ev=S:S=9:ND=9],P_tredecaurelia:1[&&NHX:Ev=S:S=10:ND=10]):1[&&NHX:Ev=S:S=6:ND=6]):1[&&NHX:Ev=S:S=4:ND=4]):1[&&NHX:Ev=D:S=4:ND=4]):1[&&NHX:Ev=S:S=1:ND=1],T_thermophila:1[&&NHX:Ev=S:S=2:ND=2])[&&NHX:Ev=S:S=0:ND=0];')
def combine_features(data, dsizes, tree, taxid2sp, prot2taxid, taxa_to_merge): """ create a ete3 tree with domain features from jackhmmer """ # motif example from http://etetoolkit.org/docs/latest/tutorial/tutorial_drawing.html#phylogenetic-trees-and-sequence-domains #simple_motifs = [ ## seq.start, seq.end, shape, width, height, fgcolor, bgcolor #[10, 60, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|long text clipped long text clipped"], #[120, 150, "o", None, 10, "blue", "pink", None], #[200, 300, "()", None, 10, "blue", "red", "arial|8|white|hello"], #] # add domain match # and ount number of sequences by taxid # and get size #dsize = dict() motifs = dict() dcnt_sp = dict() for tremolo_dom in data: tremolo_dom_start, tremolo_dom_stop = data[tremolo_dom]["QPos"] del data[tremolo_dom]["QPos"] for prot in data[tremolo_dom]: taxid = prot2taxid[prot] sp = taxid2sp[taxid] sp = sp.replace("(", "").replace(")", "").replace(",", "").replace(";", "") taxid2sp[taxid] = sp full_domains = data[tremolo_dom][prot].get("Tpos", list())[:] target_domains = filter_domain_arch(full_domains) dom_arch = list() ordered_domains = sorted(target_domains) for start, stop, dom in ordered_domains: dom_arch.append(dom) dom_arch = ";".join(dom_arch) if sp not in dcnt_sp: dcnt_sp[sp] = dict() if dom_arch not in dcnt_sp[sp]: dcnt_sp[sp][dom_arch] = list() dcnt_sp[sp][dom_arch].append(prot) # add domains for start, stop, dom in ordered_domains: color = get_domain_color(dom) motifs.setdefault(sp, dict())\ .setdefault(prot, list())\ .append([start+1, stop, "[]", None, 10, "black", color, "arial|1|black|{}".format(dom)]) #motifs[sp][prot].sort() for hitnb in data[tremolo_dom][prot]["Hit"]: start, stop = data[tremolo_dom][prot]["Hit"][hitnb]["Tali"] motifs.setdefault(sp, dict())\ .setdefault(prot, list())\ .append([start, stop, "o", None, 10, "black", "red", "arial|1|black|HCAdom {}-{}".format(tremolo_dom, hitnb)]) #print(motifs) # merge taxonomic groups for taxid_taxa in taxa_to_merge: taxid, taxa = taxid_taxa.split(",") lnode = tree.search_nodes(name=taxid) if len(lnode) > 0: taxnode = lnode[0] children = [child.name for child in taxnode.children] leaves = list() for child in taxnode.traverse(): if child.is_leaf(): leaves.append(child) #print(taxid_taxa, child.name) taxid2sp[taxid] = taxa dcnt_sp[taxa] = dict() motifs[taxa] = dict() for leafnode in leaves: nodeid = leafnode.name nodesp = taxid2sp[nodeid] #print(taxid_taxa, nodeid, nodesp) # merge protein list for dom_arch in dcnt_sp[nodesp]: for prot in dcnt_sp[nodesp][dom_arch]: dcnt_sp[taxa].setdefault(dom_arch, list()).append(prot) # merge motif for prot in motifs[nodesp]: for m in motifs[nodesp][prot]: motifs[taxa].setdefault(prot, list()).append(m) # delete obsoletes species del dcnt_sp[nodesp] del motifs[nodesp] del taxid2sp[nodeid] #for dom_arch in dcnt_sp[taxa]: #for prot in dcnt_sp[taxa][dom_arch]: #print(prot, dom_arch) for child in children: node = tree.search_nodes(name=child)[0] node.detach() #print(taxnode.get_ascii(show_internal=True)) else: print("Unable to find taxid {} in tree".format(taxid)) print(lnode) for taxid in taxid2sp: #print(taxid) node = tree.search_nodes(name=taxid) if node != []: node[0].name = taxid2sp[taxid] else: print("Unable to find node for taxid {}".format(taxid)) # expand taxonomic tree by the number of sequences in each taxa for node in tree: if node.is_leaf(): if node.name in dcnt_sp: node_sp = node.name proteins = list() features = dict() for dom_arch in dcnt_sp[node_sp]: sizes_and_proteins = list() for prot in dcnt_sp[node_sp][dom_arch]: new_name = node_sp + " | " + prot.split("|")[1] if len(dcnt_sp[node_sp][dom_arch]) > 1: new_name += " [+{}]".format( len(dcnt_sp[node_sp][dom_arch]) - 1) sizes_and_proteins.append( (dsizes[prot], new_name, dom_arch, prot)) sizes_and_proteins.sort(reverse=True) sizes, names, dom_archs, prots = zip(*sizes_and_proteins) proteins.append(names[0].replace(":", " ")) features[names[0].replace(":", " ")] = (prots[0], dom_archs[0]) subtree = ete3.PhyloTree("({});".format(", ".join(proteins))) node.add_child(subtree) for new_node in subtree: prot, dom_arch = features[new_node.name] seq = "G" * dsizes[prot] m = motifs[node_sp][prot] seqFace = SeqMotifFace(seq, seq_format="line", motifs=m) new_node.add_face(seqFace, 0, "aligned") #for dom in domain_color: #print(dom, domain_color[dom]) return tree
#motif = [ LTRRTs[el][feat][0]//100, LTRRTs[el][feat][1]//100, "[]", None, 4, domainColor, domainColor, None ] Motifs.append(motif) #box_motifs = [ # # seq.start, seq.end, shape, width, height, fgcolor, bgcolor # [0, 5, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|10"], # [10, 25, "[]", None, 10, "black", "rgradient:ref", "arial|8|white|10"], # [30, 45, "[]", None, 10, "black", "rgradient:orange", "arial|8|white|20"], # [50, 65, "[]", None, 10, "black", "rgradient:pink", "arial|8|white|20"], # [70, 85, "[]", None, 10, "black", "rgradient:green", "arial|8|white|20"], # [90, 105, "[]", None, 10, "black", "rgradient:brown", "arial|8|white|20"], # [110, 125, "[]", None, 10, "black", "rgradient:yellow", "arial|8|white|20"], #] #seqFace = SeqMotifFace(seq=None, motifs=box_motifs, gap_format="line") seqFace = SeqMotifFace(seq=None, motifs=Motifs, gap_format="line") (t & node_name).add_face(seqFace, 0, position='aligned') if REROOT: t.set_outgroup( t & reroot_at ) else: # Auto-reroot on taxon with highest divergence corrected if greatest_div['element'] == None: # This happens when divergences are not obtained for a given cluster/superfamily (e.g. DIRS) pass else: t.set_outgroup( t & greatest_div['element'].lstrip('LTR_retrotransposon') ) ts = TreeStyle()
def renderingTreeImage(self): path = os.path.join('Input', 'ProteinInput') seq_records = SeqIO.parse(path, 'fasta') for record in seq_records: self.input_protein_accession_number.append(record.id) self.input_protein_sequence.append(record.seq) with open(os.path.join('execs', 'tmp', "rooted_tree.nwk")) as nwk_tree_handle: nwk_tree = nwk_tree_handle.read() t = Tree(nwk_tree) print(t) print '\n' ts = TreeStyle() ts.title.add_face(TextFace( 'PhyloEpsilon - Protein Ortholog Finding Tool by Bryan Dighera', fsize=16, ), column=0) ts.allow_face_overlap = True ts.show_leaf_name = True ts.show_branch_support = True leaf_names = [] for leaf in t.get_leaf_names(): np_xp_pattern = re.compile('N[P]|X[P]') digits_pattern = re.compile('\d+.\d') np_xp_search_obj = re.search(np_xp_pattern, leaf) digits_search_obj = re.search(digits_pattern, leaf) np_xp_name = np_xp_search_obj.group() digits_name = digits_search_obj.group() final_accession = str(np_xp_name + '_' + digits_name) print final_accession leaf_names.append(final_accession) #print 'leaf names: ' + '%s' % leaf_names P = Protein() protein_domains, domain_colors, unrepeated_domains = P.Domains() print domain_colors #Creates a dictionary that corresponds the protein accession number to its corresponding introns for i in range(len(leaf_names)): self.accession_dict_with_introns[ self.input_protein_accession_number[i]] = self.exon_lengths[i] i = 0 print 'protein accession number: ' + '%s' % self.input_protein_accession_number print 'Accession dict: ' + '%s' % self.accession_dict_with_introns + '\n' #Iterates through the accession numbers that correspond the the order of the leaves of the phylogenetic tree to retrieve introns and build fig for accession_number in leaf_names: intron_motifs = [[0, 0, "[]", None, 12, "White", "White", None]] #Checks the accession number against the dictionary and retrieves the corresponding introns, if no introns then doesn't append any if accession_number in self.accession_dict_with_introns: print accession_number, self.accession_dict_with_introns[ accession_number] exon_list = self.accession_dict_with_introns[accession_number] print exon_list for exon_length in exon_list: if str(exon_length) != 'NONE': for location in exon_length: split_exon_location = str(location).split('-') protein_seq_exon_location = int( math.floor(int(split_exon_location[1]) / 3)) #Calculates the intron phase and then checks the phase to append appropriate color indicating phase on diagram intron_phase = (int(split_exon_location[1]) - int(split_exon_location[0])) % 3 if intron_phase == 0: intron_motifs.append([ protein_seq_exon_location - 2, protein_seq_exon_location + 2, "[]", None, 5, "Grey", "Grey", None ]) elif intron_phase == 1: intron_motifs.append([ protein_seq_exon_location - 2, protein_seq_exon_location + 2, "[]", None, 5, "Black", "Black", None ]) elif intron_phase == 2: intron_motifs.append([ protein_seq_exon_location - 2, protein_seq_exon_location + 2, "[]", None, 5, "Blue", "Blue", None ]) else: print 'NO INTRONS FOUND FOR RECORD' print str(intron_motifs) + '\n' msa_protein_seq = self.msa_aligned_protein[i].strip('-') #ete3 module that adds the introns(motifs) to the phylogenetic tree seqFace = SeqMotifFace(str(msa_protein_seq), gapcolor="black", seq_format='line', scale_factor=1, motifs=intron_motifs) (t & t.get_leaf_names()[i]).add_face(seqFace, 0, "aligned") i += 1 n = 0 # Iterates through the accession numbers that correspond to the order of the leaves of the phylogenetic tree and compare to domain dict values # TODO: Add the legend and possibly give a number to each of the domains so they can be easily identified in the legend for accession_number in leaf_names: domain_motifs = [[0, 0, "[]", None, 12, "White", "White", None]] for domain in protein_domains: if accession_number in domain: print 'leaf accession #: ' + '%s' % accession_number print 'domains accession: ' + '%s' % domain.keys()[0] print domain.values()[0] for each_domain in domain.values()[0]: try: domain_motif_color = domain_colors[each_domain[0]] start_domain_loc = int( each_domain[1].split(':')[0]) end_domain_loc = int(each_domain[1].split(':')[1]) domain_name = str(each_domain[0]) domain_motifs.append([ start_domain_loc, end_domain_loc, "<>", 20, 20, 'Black', domain_motif_color, 'arial|8|black|' ]) except ValueError: domain_motif_color = domain_colors[each_domain[0]] start_pattern = re.compile('(?<!=\W)\d+') start_pattern_search = re.search( start_pattern, str(each_domain[1].split(':')[0])) start_domain_loc = int( start_pattern_search.group()) end_pattern = re.compile('(?<!=\W)\d+') end_pattern_search = re.search( end_pattern, str(each_domain[1].split(':')[1])) end_domain_loc = int(end_pattern_search.group()) domain_motifs.append([ start_domain_loc, end_domain_loc, "<>", 20, 20, 'Black', domain_motif_color, 'arial|8|black|' ]) print domain_motifs msa_protein_seq = self.msa_aligned_protein[n].strip('-') print msa_protein_seq print len(msa_protein_seq) print '*' * 100 domainFace = SeqMotifFace(str(msa_protein_seq), gapcolor="black", seq_format='line', scale_factor=1, motifs=domain_motifs) (t & t.get_leaf_names()[n]).add_face(domainFace, 0, "aligned") n += 1 #Creating the legend print protein_domains for single_unrepeat, colors in domain_colors.iteritems(): ts.legend.add_face(TextFace(single_unrepeat), column=0) ts.legend.add_face(SeqMotifFace( "A" * 45, [[0, 80, "[]", None, 8, "Black", colors, None]]), column=1) ts.legend_position = 1 #name_of_run = nameOfRun() file_name = self.run_name t.show(tree_style=ts) t.render(os.path.join('CompletedTrees', file_name + '.pdf'), tree_style=ts)
def custom_layout(node): global ncbi, NCBIPATH if not ncbi: ncbi = NCBITaxa(NCBIPATH) global orig_name, TABLEPATH if not orig_name: orig_name = {} with open(TABLEPATH) as tablefn: for line in tablefn: if line.strip() and not line.startswith("#"): line_data = line.strip().split("\t") name_in_tree = line_data[0] name_to_show = line_data[2] orig_name[name_in_tree] = name_to_show if node.is_leaf(): total_name = (node.name) node_name = node.name.split('|')[0] name2taxid = ncbi.get_name_translator([node_name]) taxid = name2taxid[node_name] lin = ncbi.get_lineage(int(taxid[0])) prot_info = (total_name.split('|')[2]) #.split('.',1)[1] prot_id = prot_info.split('.', 1)[1] if len(prot_id) > 50: prot_id = prot_id[0:50] if prot_info in orig_name.keys(): gene_name = orig_name[prot_info] else: gene_name = (total_name.split('|')[1]) aligned_pname_face = TextFace(prot_id, fgcolor='grey', fsize=11) aligned_pname_face.margin_top = 0 aligned_pname_face.margin_bottom = 0 aligned_pname_face.margin_right = 20 add_face_to_node(aligned_pname_face, node, column=1, position='branch-right') aligned_gname_face = TextFace(gene_name, fgcolor='black', fsize=11) aligned_gname_face.margin_top = 0 aligned_gname_face.margin_bottom = 0 aligned_gname_face.margin_left = 5 add_face_to_node(aligned_gname_face, node, column=2, position='branch-right') seqFace = SeqMotifFace(node.sequence, gap_format="blank") seqFace.margin_left = 5 add_face_to_node(seqFace, node, column=4, position="aligned") if node_name.startswith("H**o"): # Add an static face that handles the node name N = TextFace(node_name, fsize=11, fgcolor="red") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Spongilla"): N = TextFace(node_name, fsize=11, fgcolor="green") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Sycon"): N = TextFace(node_name, fsize=11, fgcolor="green") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Amphimedon"): N = TextFace(node_name, fsize=11, fgcolor="green") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Oscarella"): N = TextFace(node_name, fsize=11, fgcolor="green") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Gallus"): N = TextFace(node_name, fsize=11, fgcolor="red") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Branchiostoma"): N = TextFace(node_name, fsize=11, fgcolor="red") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Trichoplax"): N = TextFace(node_name, fsize=11, fgcolor="orange") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Nematostella"): N = TextFace(node_name, fsize=11, fgcolor="orange") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Hydra"): N = TextFace(node_name, fsize=11, fgcolor="orange") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Drosophila"): N = TextFace(node_name, fsize=11, fgcolor="blue") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node_name.startswith("Crassostrea"): N = TextFace(node_name, fsize=11, fgcolor="blue") N.margin_right = 20 faces.add_face_to_node(N, node, column=0, position='branch-right') elif node.is_leaf(): N = TextFace(node_name, fsize=11, fgcolor="black") N.margin_right = 20 faces.add_face_to_node(N, node, column=0) if int('7742') in lin: N = TextFace('vertebrata', fsize=11, fgcolor="red") #N.margin_left = 20 N.background.color = "Linen" add_face_to_node(N, node, column=3, position='aligned') if int('6040') in lin: N = TextFace('porifera', fsize=11, fgcolor="green") #N.margin_left = 20 N.background.color = "Linen" add_face_to_node(N, node, column=3, position='aligned') if int('6073') in lin: N = TextFace('cnidario', fsize=11, fgcolor="orange") #N.margin_left = 20 N.background.color = "Linen" add_face_to_node(N, node, column=3, position='aligned') else: node.img_style['size'] = 3 node.img_style['shape'] = 'square' if node.name: name_face = TextFace(node.name, fgcolor='grey', fsize=10) name_face.margin_bottom = 1 add_face_to_node(name_face, node, column=0, position='branch-top') if node.support: support_face = TextFace(node.support, fgcolor='indianred', fsize=8) support_face.margin_bottom = 1 add_face_to_node(support_face, node, column=0, position='branch-bottom')
def custom_layout(node): ncbi=connect_ncbitaxa() if node.is_leaf(): total_name = (node.name) if not total_name or total_name == "": sys.stderr.write("Name of node is null or empty when creating custom layout.\n") return #seq_name = (total_name.split('.', 1)[-1]) seq_name = (total_name.split('|')[1]) other_info = (total_name.split('|')[2]) aligned_name_face = TextFace(seq_name, fgcolor='brown', fsize=11) aligned_name_face.margin_top = 0 aligned_name_face.margin_bottom = 0 aligned_name_face.margin_left = 5 add_face_to_node(aligned_name_face, node, column=2, position='aligned') #node.name=(node.name.split('|')[0]) node_name = node.name.split('|')[0] if not node_name or node_name.strip() == "": sys.stderr.write("Node name is null or empty when creating custom layout.\n") return name2taxid=ncbi.get_name_translator([node_name]) taxid=name2taxid[node_name] lin = ncbi.get_lineage(int(taxid[0])) if int('7742') in lin: N = TextFace('vertebrata', fsize=11, fgcolor="red") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('6040') in lin: N = TextFace('porifera', fsize=11, fgcolor="green") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('6073') in lin: N = TextFace('cnidario', fsize=11, fgcolor="orange") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('33317') in lin: N = TextFace('protostomia', fsize=11, fgcolor="blue") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('10197') in lin: N = TextFace('Ctenophora', fsize=11, fgcolor="indigo") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('10226') in lin: N = TextFace('Ctenophora', fsize=11, fgcolor="sienna") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('6157') in lin: N = TextFace('Platyhelminthes', fsize=11, fgcolor="olive") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('7735') in lin: N = TextFace('Cephalochordata', fsize=11, fgcolor="skyblue") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') tax, seqs_info = other_info.split('.', 1) try: tax = int(tax) except: tax = tax if tax in lin: aligned_name_face = TextFace(seqs_info, fgcolor='grey', fsize=11) aligned_name_face.margin_top = 0 aligned_name_face.margin_bottom = 0 aligned_name_face.margin_left = 5 add_face_to_node(aligned_name_face, node, column=4, position='aligned') else: aligned_name_face = TextFace(other_info, fgcolor='red', fsize=11) aligned_name_face.margin_top = 0 aligned_name_face.margin_bottom = 0 aligned_name_face.margin_left = 5 add_face_to_node(aligned_name_face, node, column=4, position='aligned') seqFace = SeqMotifFace(node.sequence, gap_format="blank") add_face_to_node(seqFace, node, column=5, position="aligned") node.img_style['size'] = 0 #try: # g_sym=gene_sym[sci_name] # predNameFace = faces.TextFace(g_sym,fgcolor = "navy" , fsize=28) # add_face_to_node(predNameFace, node, column=3, position="branch-right" ) #except: # predNameFace = faces.TextFace(' ',fgcolor="navy", fsize=28) # add_face_to_node(predNameFace, node, column=3, position="branch-right") if node_name.startswith("H**o"): # Add an static face that handles the node name N = TextFace(node_name, fsize=11, fgcolor="red") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Spongilla"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Sycon"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Amphimedon"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Oscarella"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Gallus"): N = TextFace(node_name, fsize=11, fgcolor="red") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Branchiostoma"): N = TextFace(node_name, fsize=11, fgcolor="red") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Trichoplax"): N = TextFace(node_name, fsize=11, fgcolor="orange") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Nematostella"): N = TextFace(node_name, fsize=11, fgcolor="orange") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Hydra"): N = TextFace(node_name, fsize=11, fgcolor="orange") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Drosophila"): N = TextFace(node_name, fsize=11, fgcolor="blue") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Crassostrea"): N = TextFace(node_name, fsize=11, fgcolor="blue") add_face_to_node(N, node, column=0, position = 'branch-right') else: name_face = TextFace(node_name, fgcolor='#333333', fsize=11) name_face.margin_top = 0 name_face.margin_bottom = 0 add_face_to_node(name_face, node, column=0, position='branch-right') else: node.img_style['size'] = 3 node.img_style['shape'] = 'square' if node.name: name_face = TextFace(node.name, fgcolor='grey', fsize=10) name_face.margin_bottom = 1 add_face_to_node(name_face, node, column=0, position='branch-top') if node.support: support_face = TextFace(node.support, fgcolor='indianred', fsize=8) support_face.margin_bottom = 1 add_face_to_node(support_face, node, column=0, position='branch-bottom') return