def add_faces(cur, field, leaf, label_info, colours, bg_colour, outfile): y = 0 for x in range(len(label_info)): if x == 0: label_info[x] += ':' elif x < len(label_info) - 1: label_info[x] += ',' if '.svg' in outfile: padding = 1 + len(label_info[x]) /5 #this isn't label_info[x] += ' ' * padding label = TextFace(label_info[x]) if '.svg' in outfile: label.margin_left = 20 else: label.margin_left = 5 label.fgcolor = colours[x] label.background.color = bg_colour if x > 1 and x % 3 == 0: y += 3 leaf.add_face(label, column=x-y+1, position="branch-right")
def render_tree_image(self, filename): def my_layout(node): name_faces = AttrFace("name", fsize=10, fgcolor = "#0000FF") faces.add_face_to_node(name_faces, node, column=0, position="branch-right") t = Tree("%s;" % self.newick_string(), format = 1) s = "0" for n in t.iter_descendants(): text = TextFace(s) if s == "0": s = "1" else: s = "0" text.fgcolor = "#FF0000"; nstyle = NodeStyle() nstyle['size'] = 15 nstyle['fgcolor'] = "#333" n.set_style(nstyle) n.add_face(text, column = 0, position = "branch-top") ts = TreeStyle() ts.rotation = 90 ts.show_leaf_name = False ts.layout_fn = my_layout ts.margin_left = 0 ts.branch_vertical_margin = 50 ts.show_scale = False t.render(filename, tree_style = ts, w = 2000)
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylog�nie et la conservation de facteurs de firulence c�te � c�te N�cessite r�sultats MLST, ensemble des r�sultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des g�nomes et les noms qu'on veut afficher dans la phylog�nie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylog�nie au format newick avec identifiants correspondants � tous les dico utilis�s :param blast_result_file_list: r�sultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants g�nome utiis� dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' blast2data = {} queries = [] for one_blast_file in blast_result_file_list: with open(one_blast_file, 'r') as f: for line in f: line = line.split('\t') if line[1] not in blast2data: blast2data[line[1]] = {} blast2data[line[1]][line[0]] = [float(line[2]), int(line[8]), int(line[9])] else: blast2data[line[1]][line[0]] = [float(line[2]), int(line[8]), int(line[9])] if line[0] not in queries: queries.append(line[0]) print blast2data print queries for one_blast in blast2data.keys(): for ref_gene in blast2data[one_blast].keys(): for query_gene in blast2data[one_blast].keys(): overlap = False if ref_gene == query_gene: continue if one_blast == 'NC_002745' and ref_gene == 'selm': print 'target:', ref_gene, blast2data[one_blast][ref_gene] print query_gene, blast2data[one_blast][query_gene] # check if position is overlapping try: sorted_coordinates = sorted(blast2data[one_blast][ref_gene][1:3]) if blast2data[one_blast][query_gene][1] <= sorted_coordinates[1] and blast2data[one_blast][query_gene][1]>= sorted_coordinates[0]: print 'Overlaping locations!' print one_blast, ref_gene, blast2data[one_blast][ref_gene] print one_blast, query_gene, blast2data[one_blast][query_gene] overlap =True sorted_coordinates = sorted(blast2data[one_blast][query_gene][1:3]) if blast2data[one_blast][ref_gene][1] <= sorted_coordinates[1] and blast2data[one_blast][ref_gene][1]>= sorted_coordinates[0]: print 'Overlapping locations!' print one_blast, ref_gene, blast2data[one_blast][ref_gene] print one_blast, query_gene, blast2data[one_blast][query_gene] overlap =True if overlap: if blast2data[one_blast][ref_gene][0] > blast2data[one_blast][query_gene][0]: del blast2data[one_blast][query_gene] print 'removing', query_gene else: del blast2data[one_blast][ref_gene] print 'removing', ref_gene break except KeyError: print 'colocation already resolved:', query_gene, ref_gene queries_count = {} for query in queries: queries_count[query] = 0 for one_blast in blast2data: if query in blast2data[one_blast]: queries_count[query]+=1 for query in queries_count: if queries_count[query] == 0: queries.pop(queries.index(query)) ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) #t.populate(8) # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() #t2=t1 #for lf in t2.iter_leaves(): # try: # lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name]) # except: # #lf.name = ' %s (%s)' % (lf.name, lf.name) # # a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name])) # a.fgcolor = "red" # lf.name = a #t2.render("test.svg", dpi=800, h=400) #import sys #sys.exit() # and set it as tree outgroup head = True for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] for col, value in enumerate(sorted(queries)): print lf.name, value if head: 'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 4 n.margin_right = 4 n.margin_left = 4 n.margin_bottom = 4 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") try: identity_value = blast2data[lf.name][value][0] if 'nord' in id2description[lf.name]: if float(identity_value) >70: if str(identity_value) == '100.00' or str(identity_value) == '100.0': identity_value = '100' else: identity_value = str(round(float(identity_value), 1)) n = TextFace(' %s ' % str(identity_value)) n.margin_top = 4 n.margin_right = 4 n.margin_left = 4 n.margin_bottom = 4 n.inner_background.color = rgb2hex(m.to_rgba(float(identity_value))) if float(identity_value) >92: n.fgcolor = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: if float(identity_value) >70: if str(identity_value) == '100.00' or str(identity_value) == '100.0': identity_value = '100' else: identity_value = str(round(float(identity_value), 1)) n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = rgb2hex(m2.to_rgba(float(identity_value))) if float(identity_value) >92: n.fgcolor = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") except KeyError: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") try: lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name]) except: #lf.name = ' %s (%s)' % (lf.name, lf.name) a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name])) a.fgcolor = "red" lf.name = a head = False #.add_face(a, 0, position="aligned") # add boostrap suppot #for n in t1.traverse(): # if n.is_leaf(): # continue # n.add_face(TextFace(str(t1.support)), column=0, position = "branch-bottom") #ts = TreeStyle() #ts.show_branch_support = True # , tree_style=ts t1.render("test.svg", dpi=800, h=400)
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst, check_overlap, ordered_queries, fasta_file2accession, id_cutoff=80, reference_accession='-', accession2hit_filter=False, show_identity_values=True): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' import blast_utils blast2data, queries = blast_utils.remove_blast_redundancy( blast_result_file_list, check_overlap) queries_count = {} for query in queries: queries_count[query] = 0 for one_blast in blast2data: if query in blast2data[one_blast]: #print blast2data[one_blast][query] if float(blast2data[one_blast][query][0]) > id_cutoff: queries_count[query] += 1 else: del blast2data[one_blast][query] print queries_count for query in queries: print "Hit counts: %s\t%s" % (query, queries_count[query]) if queries_count[query] == 0: queries.pop(queries.index(query)) print 'delete columns with no matches ok' ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) tss = TreeStyle() #tss.show_branch_support = True # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() ordered_queries_filtered = [] for query in ordered_queries: hit_count = 0 for lf2 in t1.iter_leaves(): try: accession = fasta_file2accession[lf2.name] tmpidentity = blast2data[accession][query][0] if float(tmpidentity) > float(id_cutoff): hit_count += 1 except: continue if hit_count > 0: ordered_queries_filtered.append(query) #print 'skippink-----------' head = True print 'drawing tree' print 'n initial queries: %s n kept: %s' % (len(ordered_queries), len(ordered_queries_filtered)) for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] accession = fasta_file2accession[lf.name] for col, value in enumerate(ordered_queries_filtered): if head: if show_identity_values: #'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) else: n = TextFace(' %s ' % str(value), fsize=6) n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) try: identity_value = blast2data[accession][value][0] #print 'identity', lf.name, value, identity_value if lf.name != reference_accession: if not accession2hit_filter: # m_red color = rgb2hex(m_blue.to_rgba(float(identity_value))) else: # if filter, color hits that are not in the filter in green if accession in accession2hit_filter: if value in accession2hit_filter[accession]: # mred color = rgb2hex( m_green.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: # reference taxon, blue scale color = rgb2hex(m_blue.to_rgba(float(identity_value))) #if not show_identity_values: # color = rgb2hex(m_blue.to_rgba(float(identity_value))) except: identity_value = 0 color = "white" if show_identity_values: if float(identity_value) >= float(id_cutoff): if str(identity_value) == '100.00' or str( identity_value) == '100.0': identity_value = '100' n = TextFace("%s " % identity_value) else: # identity_value = str(round(float(identity_value), 1)) n = TextFace("%.2f" % round(float(identity_value), 2)) if float(identity_value) > 95: n.fgcolor = "white" n.opacity = 1. else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = color lf.add_face(n, col, position="aligned") else: if float(identity_value) >= float(id_cutoff): # don't show identity values n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 #n.color = color n.inner_background.color = color lf.add_face(n, col, position="aligned") try: accession = fasta_file2accession[lf.name] lf.name = ' %s (%s)' % (id2description[accession], id2mlst[lf.name]) except KeyError: print '--------', id2description lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name]) head = False for n in t1.traverse(): nstyle = NodeStyle() if n.support < 0.9: #mundo = TextFace("%s" % str(n.support)) #n.add_face(mundo, column=1, position="branch-bottom") nstyle["fgcolor"] = "blue" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) print 'rendering tree' t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)