def add_faces(cur, field, leaf, label_info, colours, bg_colour, outfile):
      y = 0
      for x in range(len(label_info)):
        if x == 0:
          label_info[x] += ':'
        elif x < len(label_info) - 1:
          label_info[x] += ','
        if '.svg' in outfile:
            padding = 1 + len(label_info[x]) /5  #this isn't 
            label_info[x] += ' ' * padding
        label = TextFace(label_info[x])
        if '.svg' in outfile:
          label.margin_left = 20
        else:
          label.margin_left = 5        
        label.fgcolor = colours[x]
        label.background.color = bg_colour
        if x > 1 and x % 3 == 0:
          y += 3
        leaf.add_face(label, column=x-y+1, position="branch-right")
	def render_tree_image(self, filename):
		def my_layout(node):
			name_faces = AttrFace("name", fsize=10, fgcolor = "#0000FF")
			faces.add_face_to_node(name_faces, node, column=0, position="branch-right")
		t = Tree("%s;" % self.newick_string(), format = 1)
		s = "0"	
		for n in t.iter_descendants():
			text = TextFace(s)
			if s == "0": s = "1"
			else: s = "0"
			text.fgcolor = "#FF0000";
			nstyle = NodeStyle()
			nstyle['size'] = 15
			nstyle['fgcolor'] = "#333"
			n.set_style(nstyle)
			n.add_face(text, column = 0, position = "branch-top")
		ts = TreeStyle()
		ts.rotation = 90
		ts.show_leaf_name = False
		ts.layout_fn = my_layout
		ts.margin_left = 0
		ts.branch_vertical_margin = 50
		ts.show_scale = False
		t.render(filename, tree_style = ts, w = 2000)
Beispiel #3
0
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst):
    '''
    Projet Staph aureus PVL avec Laure Jaton
    Script pour afficher une phylog�nie et la conservation de facteurs de firulence c�te � c�te
    N�cessite r�sultats MLST, ensemble des r�sultats tblastn (facteurs de virulence vs chromosomes),
    ainsi qu'une correspondance entre les accession des g�nomes et les noms qu'on veut afficher dans la phylog�nie. Icemn
    pour les identifiants molis des patients, on les remplace par CHUV n.
    :param tree_file: phylog�nie au format newick avec identifiants correspondants � tous les dico utilis�s
    :param blast_result_file_list: r�sultats tblastn virulence factors vs chromosome (seulement best blast)
    :param id2description: identifiants g�nome utiis� dans l'arbre et description correspondante (i.e S aureus Newman)
    :param id2mlst: identitifiants arbre 2 S. aureus ST type
    :return:
    '''

    blast2data = {}
    queries = []
    for one_blast_file in blast_result_file_list:
        with open(one_blast_file, 'r') as f:
            for line in f:
                line = line.split('\t')
                if line[1] not in blast2data:
                    blast2data[line[1]] = {}
                    blast2data[line[1]][line[0]] = [float(line[2]), int(line[8]), int(line[9])]
                else:
                     blast2data[line[1]][line[0]] = [float(line[2]), int(line[8]), int(line[9])]
                if line[0] not in queries:
                    queries.append(line[0])
    print blast2data
    print queries





    for one_blast in blast2data.keys():
        for ref_gene in blast2data[one_blast].keys():

            for query_gene in blast2data[one_blast].keys():
                overlap = False
                if ref_gene == query_gene:
                    continue
                if one_blast == 'NC_002745' and ref_gene == 'selm':
                    print 'target:', ref_gene, blast2data[one_blast][ref_gene]
                    print query_gene, blast2data[one_blast][query_gene]
                # check if position is overlapping
                try:
                    sorted_coordinates = sorted(blast2data[one_blast][ref_gene][1:3])
                    if blast2data[one_blast][query_gene][1] <= sorted_coordinates[1] and blast2data[one_blast][query_gene][1]>= sorted_coordinates[0]:
                        print 'Overlaping locations!'
                        print one_blast, ref_gene, blast2data[one_blast][ref_gene]
                        print one_blast, query_gene, blast2data[one_blast][query_gene]
                        overlap =True
                    sorted_coordinates = sorted(blast2data[one_blast][query_gene][1:3])
                    if blast2data[one_blast][ref_gene][1] <= sorted_coordinates[1] and blast2data[one_blast][ref_gene][1]>= sorted_coordinates[0]:
                        print 'Overlapping locations!'
                        print one_blast, ref_gene, blast2data[one_blast][ref_gene]
                        print one_blast, query_gene, blast2data[one_blast][query_gene]
                        overlap =True
                    if overlap:
                        if blast2data[one_blast][ref_gene][0] > blast2data[one_blast][query_gene][0]:
                            del blast2data[one_blast][query_gene]
                            print 'removing', query_gene
                        else:
                            del blast2data[one_blast][ref_gene]
                            print 'removing', ref_gene
                            break
                except KeyError:
                    print 'colocation already resolved:', query_gene, ref_gene

    queries_count = {}

    for query in queries:
        queries_count[query] = 0
        for one_blast in blast2data:
            if query in blast2data[one_blast]:
                queries_count[query]+=1
    for query in queries_count:
        if queries_count[query] == 0:
            queries.pop(queries.index(query))

    '''             
    rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld'
        , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE']
    #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH']

    for gene in rm_genes:
        queries.pop(queries.index(gene))
    '''
    #queries = ['selv']
    t1 = Tree(tree_file)
    #t.populate(8)
    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    t1.set_outgroup(R)
    t1.ladderize()
    #t2=t1

    #for lf in t2.iter_leaves():
    #    try:
    #        lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name])
    #    except:
    #        #lf.name = ' %s (%s)' % (lf.name, lf.name)
    #
    #        a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name]))
    #        a.fgcolor = "red"

    #        lf.name = a
    #t2.render("test.svg", dpi=800, h=400)
    #import sys
    #sys.exit()
        
    # and set it as tree outgroup
    head = True
    for lf in t1.iter_leaves():
        #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right")
        lf.branch_vertical_margin = 0
        #data = [random.randint(0,2) for x in xrange(3)]

        for col, value in enumerate(sorted(queries)):
            print lf.name, value
            if head:
                    'first row, print gene names'
                    #print 'ok!'
                    n = TextFace(' %s ' % str(value))
                    n.margin_top = 4
                    n.margin_right = 4
                    n.margin_left = 4
                    n.margin_bottom = 4
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    lf.add_face(n, col, position="aligned")

            try:

                identity_value = blast2data[lf.name][value][0]

                if 'nord' in id2description[lf.name]:

                    if float(identity_value) >70:
                        if str(identity_value) == '100.00' or str(identity_value) == '100.0':
                            identity_value = '100'
                        else:
                            identity_value = str(round(float(identity_value), 1))
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 4
                        n.margin_right = 4
                        n.margin_left = 4
                        n.margin_bottom = 4
                        n.inner_background.color = rgb2hex(m.to_rgba(float(identity_value)))
                        if float(identity_value) >92:
                            n.fgcolor = "white"
                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
                    else:
                        identity_value = '-'
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = "white"
                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
                else:
                    if float(identity_value) >70:
                        if str(identity_value) == '100.00' or str(identity_value) == '100.0':
                            identity_value = '100'
                        else:
                            identity_value = str(round(float(identity_value), 1))
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = rgb2hex(m2.to_rgba(float(identity_value)))

                        if float(identity_value) >92:
                            n.fgcolor = "white"

                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
                    else:
                        identity_value = '-'
                        n = TextFace(' %s ' % str(identity_value))
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = "white"
                        n.opacity = 1.
                        lf.add_face(n, col, position="aligned")
            except KeyError:

                identity_value = '-'
                n = TextFace(' %s ' % str(identity_value))
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "white"
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")

        try:
            lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name])
        except:
            #lf.name = ' %s (%s)' % (lf.name, lf.name)

            a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name]))
            a.fgcolor = "red"

            lf.name = a
        head = False
            #.add_face(a, 0, position="aligned")
    # add boostrap suppot
    #for n in t1.traverse():
    #    if n.is_leaf():
    #        continue
    #    n.add_face(TextFace(str(t1.support)), column=0, position = "branch-bottom")
    #ts = TreeStyle()
    #ts.show_branch_support = True

    # , tree_style=ts
    t1.render("test.svg", dpi=800, h=400)
Beispiel #4
0
def plot_blast_result(tree_file,
                      blast_result_file_list,
                      id2description,
                      id2mlst,
                      check_overlap,
                      ordered_queries,
                      fasta_file2accession,
                      id_cutoff=80,
                      reference_accession='-',
                      accession2hit_filter=False,
                      show_identity_values=True):
    '''
    Projet Staph aureus PVL avec Laure Jaton
    Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte
    Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes),
    ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn
    pour les identifiants molis des patients, on les remplace par CHUV n.
    :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés
    :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast)
    :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman)
    :param id2mlst: identitifiants arbre 2 S. aureus ST type
    :return:
    '''
    import blast_utils
    blast2data, queries = blast_utils.remove_blast_redundancy(
        blast_result_file_list, check_overlap)

    queries_count = {}

    for query in queries:
        queries_count[query] = 0
        for one_blast in blast2data:
            if query in blast2data[one_blast]:
                #print blast2data[one_blast][query]
                if float(blast2data[one_blast][query][0]) > id_cutoff:
                    queries_count[query] += 1
                else:
                    del blast2data[one_blast][query]

    print queries_count
    for query in queries:
        print "Hit counts: %s\t%s" % (query, queries_count[query])
        if queries_count[query] == 0:
            queries.pop(queries.index(query))

    print 'delete columns with no matches ok'
    '''             
    rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld'
        , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE']
    #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH']

    for gene in rm_genes:
        queries.pop(queries.index(gene))
    '''
    #queries = ['selv']
    t1 = Tree(tree_file)
    tss = TreeStyle()
    #tss.show_branch_support = True
    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    t1.set_outgroup(R)
    t1.ladderize()

    ordered_queries_filtered = []
    for query in ordered_queries:
        hit_count = 0
        for lf2 in t1.iter_leaves():
            try:
                accession = fasta_file2accession[lf2.name]
                tmpidentity = blast2data[accession][query][0]
                if float(tmpidentity) > float(id_cutoff):
                    hit_count += 1
            except:
                continue
        if hit_count > 0:
            ordered_queries_filtered.append(query)
            #print 'skippink-----------'

    head = True
    print 'drawing tree'
    print 'n initial queries: %s n kept: %s' % (len(ordered_queries),
                                                len(ordered_queries_filtered))
    for lf in t1.iter_leaves():
        #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right")
        lf.branch_vertical_margin = 0
        #data = [random.randint(0,2) for x in xrange(3)]
        accession = fasta_file2accession[lf.name]
        for col, value in enumerate(ordered_queries_filtered):

            if head:
                if show_identity_values:
                    #'first row, print gene names'
                    #print 'ok!'
                    n = TextFace(' %s ' % str(value))
                    n.margin_top = 2
                    n.margin_right = 2
                    n.margin_left = 2
                    n.margin_bottom = 2
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    #lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, col)
                else:
                    n = TextFace(' %s ' % str(value), fsize=6)
                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    #lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, col)
            try:
                identity_value = blast2data[accession][value][0]
                #print 'identity', lf.name, value, identity_value

                if lf.name != reference_accession:
                    if not accession2hit_filter:
                        # m_red
                        color = rgb2hex(m_blue.to_rgba(float(identity_value)))
                    else:
                        # if filter, color hits that are not in the filter in green

                        if accession in accession2hit_filter:
                            if value in accession2hit_filter[accession]:
                                # mred
                                color = rgb2hex(
                                    m_green.to_rgba(float(identity_value)))

                            else:
                                color = rgb2hex(
                                    m_blue.to_rgba(float(identity_value)))
                        else:
                            color = rgb2hex(
                                m_blue.to_rgba(float(identity_value)))
                else:
                    # reference taxon, blue scale
                    color = rgb2hex(m_blue.to_rgba(float(identity_value)))
                #if not show_identity_values:
                #    color = rgb2hex(m_blue.to_rgba(float(identity_value)))

            except:
                identity_value = 0
                color = "white"
            if show_identity_values:
                if float(identity_value) >= float(id_cutoff):

                    if str(identity_value) == '100.00' or str(
                            identity_value) == '100.0':
                        identity_value = '100'
                        n = TextFace("%s   " % identity_value)
                    else:
                        #    identity_value = str(round(float(identity_value), 1))

                        n = TextFace("%.2f" % round(float(identity_value), 2))
                    if float(identity_value) > 95:
                        n.fgcolor = "white"

                    n.opacity = 1.
                else:
                    identity_value = '-'
                    n = TextFace(' %s ' % str(identity_value))
                    n.opacity = 1.
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = color
                lf.add_face(n, col, position="aligned")
            else:

                if float(identity_value) >= float(id_cutoff):

                    # don't show identity values
                    n = TextFace('  ')

                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    #n.color = color
                    n.inner_background.color = color
                    lf.add_face(n, col, position="aligned")

        try:
            accession = fasta_file2accession[lf.name]
            lf.name = ' %s (%s)' % (id2description[accession],
                                    id2mlst[lf.name])
        except KeyError:
            print '--------', id2description
            lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name])
        head = False

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 0.9:
            #mundo = TextFace("%s" % str(n.support))
            #n.add_face(mundo, column=1, position="branch-bottom")
            nstyle["fgcolor"] = "blue"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    print 'rendering tree'
    t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)