def rotation_layout(node): if node.is_leaf(): F = TextFace(node.name, tight_text=True) F.rotation = randint(0, 360) add_face_to_node(TextFace("third" ), node, column=8, position="branch-right") add_face_to_node(TextFace("second" ), node, column=2, position="branch-right") add_face_to_node(F, node, column=0, position="branch-right") F.border.width = 1 F.inner_border.width = 1
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst, check_overlap, ordered_queries, fasta_file2accession, id_cutoff=80, reference_accession='-', accession2hit_filter=False, show_identity_values=True): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' import blast_utils blast2data, queries = blast_utils.remove_blast_redundancy( blast_result_file_list, check_overlap) queries_count = {} for query in queries: queries_count[query] = 0 for one_blast in blast2data: if query in blast2data[one_blast]: #print blast2data[one_blast][query] if float(blast2data[one_blast][query][0]) > id_cutoff: queries_count[query] += 1 else: del blast2data[one_blast][query] print queries_count for query in queries: print "Hit counts: %s\t%s" % (query, queries_count[query]) if queries_count[query] == 0: queries.pop(queries.index(query)) print 'delete columns with no matches ok' ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) tss = TreeStyle() #tss.show_branch_support = True # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() ordered_queries_filtered = [] for query in ordered_queries: hit_count = 0 for lf2 in t1.iter_leaves(): try: accession = fasta_file2accession[lf2.name] tmpidentity = blast2data[accession][query][0] if float(tmpidentity) > float(id_cutoff): hit_count += 1 except: continue if hit_count > 0: ordered_queries_filtered.append(query) #print 'skippink-----------' head = True print 'drawing tree' print 'n initial queries: %s n kept: %s' % (len(ordered_queries), len(ordered_queries_filtered)) for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] accession = fasta_file2accession[lf.name] for col, value in enumerate(ordered_queries_filtered): if head: if show_identity_values: #'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) else: n = TextFace(' %s ' % str(value), fsize=6) n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) try: identity_value = blast2data[accession][value][0] #print 'identity', lf.name, value, identity_value if lf.name != reference_accession: if not accession2hit_filter: # m_red color = rgb2hex(m_blue.to_rgba(float(identity_value))) else: # if filter, color hits that are not in the filter in green if accession in accession2hit_filter: if value in accession2hit_filter[accession]: # mred color = rgb2hex( m_green.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: # reference taxon, blue scale color = rgb2hex(m_blue.to_rgba(float(identity_value))) #if not show_identity_values: # color = rgb2hex(m_blue.to_rgba(float(identity_value))) except: identity_value = 0 color = "white" if show_identity_values: if float(identity_value) >= float(id_cutoff): if str(identity_value) == '100.00' or str( identity_value) == '100.0': identity_value = '100' n = TextFace("%s " % identity_value) else: # identity_value = str(round(float(identity_value), 1)) n = TextFace("%.2f" % round(float(identity_value), 2)) if float(identity_value) > 95: n.fgcolor = "white" n.opacity = 1. else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = color lf.add_face(n, col, position="aligned") else: if float(identity_value) >= float(id_cutoff): # don't show identity values n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 #n.color = color n.inner_background.color = color lf.add_face(n, col, position="aligned") try: accession = fasta_file2accession[lf.name] lf.name = ' %s (%s)' % (id2description[accession], id2mlst[lf.name]) except KeyError: print '--------', id2description lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name]) head = False for n in t1.traverse(): nstyle = NodeStyle() if n.support < 0.9: #mundo = TextFace("%s" % str(n.support)) #n.add_face(mundo, column=1, position="branch-bottom") nstyle["fgcolor"] = "blue" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) print 'rendering tree' t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)
header2column = dict([(name, i) for i, name in enumerate(column_header)]) ts = TreeStyle() ts.mode = 'r' ts.draw_guiding_lines = False ts.show_leaf_name = False ts.force_topology = False ts.layout_fn = layout ts.tree_width = 800 ts.draw_aligned_faces_as_table = True for i, name in enumerate(column_header): if name: headerF = TextFace(str(name), fgcolor=column_color[i], fsize=40) headerF.rotation = -85 else: headerF = RectFace(300, 5, "white", "white") ts.aligned_header.add_face(headerF, i) #tree_files = sys.argv[1:] for treefile in args.tree_files: output = treefile + '.png' print 'rendering', output try: t = Tree(open(treefile).read().replace('|', ',')) except Exception, e: print e, treefile else: t.set_outgroup(t.get_midpoint_outgroup()) t.sort_descendants()