Beispiel #1
0
def plot_blast_result(tree_file,
                      blast_result_file_list,
                      id2description,
                      id2mlst,
                      check_overlap,
                      ordered_queries,
                      fasta_file2accession,
                      id_cutoff=80,
                      reference_accession='-',
                      accession2hit_filter=False,
                      show_identity_values=True):
    '''
    Projet Staph aureus PVL avec Laure Jaton
    Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte
    Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes),
    ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn
    pour les identifiants molis des patients, on les remplace par CHUV n.
    :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés
    :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast)
    :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman)
    :param id2mlst: identitifiants arbre 2 S. aureus ST type
    :return:
    '''
    import blast_utils
    blast2data, queries = blast_utils.remove_blast_redundancy(
        blast_result_file_list, check_overlap)

    queries_count = {}

    for query in queries:
        queries_count[query] = 0
        for one_blast in blast2data:
            if query in blast2data[one_blast]:
                #print blast2data[one_blast][query]
                if float(blast2data[one_blast][query][0]) > id_cutoff:
                    queries_count[query] += 1
                else:
                    del blast2data[one_blast][query]

    print queries_count
    for query in queries:
        print "Hit counts: %s\t%s" % (query, queries_count[query])
        if queries_count[query] == 0:
            queries.pop(queries.index(query))

    print 'delete columns with no matches ok'
    '''             
    rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld'
        , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE']
    #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH']

    for gene in rm_genes:
        queries.pop(queries.index(gene))
    '''
    #queries = ['selv']
    t1 = Tree(tree_file)
    tss = TreeStyle()
    #tss.show_branch_support = True
    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    t1.set_outgroup(R)
    t1.ladderize()

    ordered_queries_filtered = []
    for query in ordered_queries:
        hit_count = 0
        for lf2 in t1.iter_leaves():
            try:
                accession = fasta_file2accession[lf2.name]
                tmpidentity = blast2data[accession][query][0]
                if float(tmpidentity) > float(id_cutoff):
                    hit_count += 1
            except:
                continue
        if hit_count > 0:
            ordered_queries_filtered.append(query)
            #print 'skippink-----------'

    head = True
    print 'drawing tree'
    print 'n initial queries: %s n kept: %s' % (len(ordered_queries),
                                                len(ordered_queries_filtered))
    for lf in t1.iter_leaves():
        #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right")
        lf.branch_vertical_margin = 0
        #data = [random.randint(0,2) for x in xrange(3)]
        accession = fasta_file2accession[lf.name]
        for col, value in enumerate(ordered_queries_filtered):

            if head:
                if show_identity_values:
                    #'first row, print gene names'
                    #print 'ok!'
                    n = TextFace(' %s ' % str(value))
                    n.margin_top = 2
                    n.margin_right = 2
                    n.margin_left = 2
                    n.margin_bottom = 2
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    #lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, col)
                else:
                    n = TextFace(' %s ' % str(value), fsize=6)
                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    #lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, col)
            try:
                identity_value = blast2data[accession][value][0]
                #print 'identity', lf.name, value, identity_value

                if lf.name != reference_accession:
                    if not accession2hit_filter:
                        # m_red
                        color = rgb2hex(m_blue.to_rgba(float(identity_value)))
                    else:
                        # if filter, color hits that are not in the filter in green

                        if accession in accession2hit_filter:
                            if value in accession2hit_filter[accession]:
                                # mred
                                color = rgb2hex(
                                    m_green.to_rgba(float(identity_value)))

                            else:
                                color = rgb2hex(
                                    m_blue.to_rgba(float(identity_value)))
                        else:
                            color = rgb2hex(
                                m_blue.to_rgba(float(identity_value)))
                else:
                    # reference taxon, blue scale
                    color = rgb2hex(m_blue.to_rgba(float(identity_value)))
                #if not show_identity_values:
                #    color = rgb2hex(m_blue.to_rgba(float(identity_value)))

            except:
                identity_value = 0
                color = "white"
            if show_identity_values:
                if float(identity_value) >= float(id_cutoff):

                    if str(identity_value) == '100.00' or str(
                            identity_value) == '100.0':
                        identity_value = '100'
                        n = TextFace("%s   " % identity_value)
                    else:
                        #    identity_value = str(round(float(identity_value), 1))

                        n = TextFace("%.2f" % round(float(identity_value), 2))
                    if float(identity_value) > 95:
                        n.fgcolor = "white"

                    n.opacity = 1.
                else:
                    identity_value = '-'
                    n = TextFace(' %s ' % str(identity_value))
                    n.opacity = 1.
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = color
                lf.add_face(n, col, position="aligned")
            else:

                if float(identity_value) >= float(id_cutoff):

                    # don't show identity values
                    n = TextFace('  ')

                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    #n.color = color
                    n.inner_background.color = color
                    lf.add_face(n, col, position="aligned")

        try:
            accession = fasta_file2accession[lf.name]
            lf.name = ' %s (%s)' % (id2description[accession],
                                    id2mlst[lf.name])
        except KeyError:
            print '--------', id2description
            lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name])
        head = False

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 0.9:
            #mundo = TextFace("%s" % str(n.support))
            #n.add_face(mundo, column=1, position="branch-bottom")
            nstyle["fgcolor"] = "blue"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    print 'rendering tree'
    t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)