예제 #1
0
def get_mysql_table(db_name, table_name):
    import numpy as np

    server, db = manipulate_biosqldb.load_db(db_name)

    all_taxons_id = manipulate_biosqldb.get_taxon_id_list(server, db_name)

    sql_taxons = "id, "
    for i in range(0, len(all_taxons_id) - 1):
        sql_taxons += ' `%s`,' % all_taxons_id[i]
    sql_taxons += ' `%s`' % all_taxons_id[-1]

    sql = "select %s from comparative_tables.%s_%s" % (sql_taxons, table_name,
                                                       db_name)
    mat = np.array(server.adaptor.execute_and_fetchall(sql, ))
    f = open("%s_matrix.tab" % table_name, "w")

    taxonid2genome = manipulate_biosqldb.taxon_id2genome_description(
        server, db_name, True)

    taxons_ids = [taxonid2genome[int(i)] for i in all_taxons_id]

    f.write('"id"\t"' + '"\t"'.join(taxons_ids) + '"\n')

    for row in mat:
        row = [str(i) for i in row]
        f.write("\t".join(row) + "\n")
예제 #2
0
def locus_list2identity_in_other_genomes(locus_list, biodb):
    server, db = manipulate_biosqldb.load_db(biodb)

    locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, biodb)

    taxon_id2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb)

    import re
    for i in taxon_id2description.keys():
        taxon_id2description[i] = re.sub(" subsp\. aureus", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("strain ", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("str\. ", "", taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" chromosome", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("Staphylococcus aureus ", "",
                                         taxon_id2description[i])

    header = 'orthogroup\t'
    dico = locus_tag2identity_best_hit_all_genomes(biodb, 'wcw_1594',
                                                   'group_417')
    for i in dico.keys():

        header += taxon_id2description[i] + '\t'

    final_out = header + '\n'

    for locus in locus_list:
        #print "locus", i
        seqfeature_id = locus_tag2seqfeature_id[locus]
        orthogroup = manipulate_biosqldb.seqfeature_id2orthogroup(
            server, seqfeature_id, biodb)
        #print "ortho", orthogroup
        dico = locus_tag2identity_best_hit_all_genomes(biodb, locus,
                                                       orthogroup)
        #print "dico done..."
        out = '%s\t' % orthogroup
        for i in dico.keys():
            identity = dico[i]
            out += '%s\t' % identity
        final_out += out + '\n'

    return final_out
예제 #3
0
def identity_closest_homolog(db_name):

    from chlamdb.biosqldb import manipulate_biosqldb
    from chlamdb.biosqldb import biosql_own_sql_tables
    import sys

    server, db = manipulate_biosqldb.load_db(db_name)

    sql1 = 'select locus_tag, seqfeature_id from custom_tables.locus2seqfeature_id_%s' % db_name
    locus2seqfeature_id = manipulate_biosqldb.to_dict(
        server.adaptor.execute_and_fetchall(sql1, ))

    sql2 = "CREATE TABLE comparative_tables.identity_closest_homolog2_%s(taxon_1 INT NOT NULL," \
          " taxon_2 INT NOT NULL," \
          " locus_1 INT NOT NULL," \
          " locus_2 INT NOT NULL," \
          " identity FLOAT, index locus_1(locus_1)," \
          " index locus_2(locus_2), index taxon_1(taxon_1), index taxon_2(taxon_2))" % (db_name)

    server.adaptor.execute(sql2)

    #identitydico = biosql_own_sql_tables.calculate_average_protein_identity_new_tables(db_name)
    taxon2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodatabase_name=db_name)

    all_taxons = taxon2description.keys()
    for i, taxon_1 in enumerate(all_taxons):

        locus2identity = biosql_own_sql_tables.circos_locus2taxon_highest_identity(
            db_name, taxon_1)

        for taxon_2 in all_taxons:
            if taxon_1 == taxon_2:
                continue

            for locus in locus2identity:
                try:
                    #print taxon_1, taxon_2, locus, locus2identity[locus][long(taxon_2)][1], locus2identity[locus][long(taxon_2)][0]
                    #sys.stdout.write("%s\t%s\n" % (taxon_1, taxon_2))
                    sql = 'insert into comparative_tables.identity_closest_homolog2_%s(taxon_1, taxon_2, locus_1, locus_2, identity) ' \
                          ' VALUES ("%s", "%s", "%s", "%s", %s)' % (db_name,
                                                                    taxon_1,
                                                                    taxon_2,
                                                                    locus2seqfeature_id[locus],
                                                                    locus2seqfeature_id[locus2identity[locus][int(taxon_2)][1]],
                                                                    locus2identity[locus][int(taxon_2)][0])
                    server.adaptor.execute(sql)

                except KeyError:
                    # no homologs
                    continue
        server.adaptor.commit()
예제 #4
0
def locus_list2presence_absence_all_genomes(locus_list, biodb_name):
    server, db = manipulate_biosqldb.load_db(biodb_name)

    locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, biodb_name)

    taxon_id2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb_name)

    import re
    for i in taxon_id2description.keys():
        taxon_id2description[i] = re.sub(" subsp\. aureus", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("strain ", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("str\. ", "", taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" chromosome", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("Staphylococcus aureus ", "",
                                         taxon_id2description[i])

    header = 'orthogroup\t'
    genomes = manipulate_biosqldb.get_genome_taxons_list(server, biodb_name)
    for i in genomes:
        header += taxon_id2description[i] + '\t'
    final_out = header + '\n'

    for i in locus_list:
        #print "locus", i
        seqfeature_id = locus_tag2seqfeature_id[i]
        orthogroup = manipulate_biosqldb.seqfeature_id2orthogroup(
            server, seqfeature_id, biodb_name)
        #print "ortho", orthogroup
        dico = heatmap_presence_absence(biodb_name, orthogroup)

        #print "dico done..."
        #print dico
        out = '%s\t' % orthogroup
        for i in genomes:

            out += '%s\t' % dico[i]
        final_out += out + '\n'

    return final_out
예제 #5
0
def biodb2randomized_matrix(bio_db_name):
    server, db = manipulate_biosqldb.load_db(bio_db_name)
    matrix = np.array(
        manipulate_biosqldb.get_orthology_table(server, bio_db_name))

    taxon_id2description = manipulate_biosqldb.taxon_id2genome_description(
        server, bio_db_name)
    #print taxon_id2description

    #group_names = matrix[:,0]
    taxons_ids = manipulate_biosqldb.get_taxon_id_list(server, bio_db_name)

    print 'Number of taxons:', len(taxons_ids)
    taxons_ids = [taxon_id2description[str(i)] for i in taxons_ids]

    import re
    for i, accession in enumerate(taxons_ids):
        #print i, accession
        description = taxons_ids[i]
        description = re.sub(", complete genome\.", "", description)
        description = re.sub(", complete genome", "", description)
        description = re.sub(", complete sequence\.", "", description)
        description = re.sub("strain ", "", description)
        description = re.sub("str\. ", "", description)
        description = re.sub(" complete genome sequence\.", "", description)
        description = re.sub(" complete genome\.", "", description)
        description = re.sub(" chromosome", "", description)
        description = re.sub(" DNA", "S.", description)
        description = re.sub("Merged record from ", "", description)
        description = re.sub(", wgs", "", description)
        description = re.sub("Candidatus ", "", description)
        description = re.sub(".contig.0_1, whole genome shotgun sequence.", "",
                             description)
        description = re.sub("Protochlamydia", "P.", description)
        description = re.sub("Chlamydia", "C.", description)
        description = re.sub("Chlamydophila", "E.", description)
        description = re.sub("Estrella", "E.", description)
        description = re.sub("Rhodopirellula", "R.", description)
        description = re.sub("Methylacidiphilum", "M.", description)
        description = re.sub(" phage", "", description)
        description = re.sub("Parachlamydia", "P.", description)
        description = re.sub("Neochlamydia", "Neo.", description)
        description = re.sub("Simkania", "S.", description)
        description = re.sub("Waddlia", "W.", description)
        description = re.sub("Pirellula", "P.", description)
        description = re.sub("Rhabdochlamydiaceae sp.", "Rhabdo", description)
        taxons_ids[i] = description

    M = matrix.astype(float)  # [:, 1:]
    M = heatmap.randomize_table(M)
    return (M, taxons_ids)
예제 #6
0
def convert_tree_taxon2genome(biodb_name,
                              input_tree,
                              output_tree,
                              sqlite=False):
    server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite)
    print biodb_name
    taxon_id2genome_description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb_name)

    print taxon_id2genome_description

    #locus2genome = manipulate_biosqldb.locus_tag2genome_name(server, biodb_name)

    import re
    for i in taxon_id2genome_description.keys():
        print i
        taxon_id2genome_description[i] = re.sub(" subsp\. aureus", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(", complete genome\.", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(", complete sequence\.", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub("strain ", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub("str\. ", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" complete genome sequence\.",
                                                "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" complete genome\.", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" chromosome", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub("Staphylococcus", "S.",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" DNA", "S.",
                                                taxon_id2genome_description[i])
    #print taxon_id2genome_description[i]

    print taxon_id2genome_description
    new_tree = parse_newick_tree.convert_terminal_node_names(
        input_tree, taxon_id2genome_description)
    #print new_tree[0]
    print "writing converted tree..."
    print output_tree
    Phylo.write(new_tree, output_tree, 'newick')
예제 #7
0
def write_ortho_matrix(bio_db_name):
    server, db = manipulate_biosqldb.load_db(bio_db_name)
    matrix = np.array(
        manipulate_biosqldb.get_orthology_table(server, bio_db_name))

    taxon_id2description = manipulate_biosqldb.taxon_id2genome_description(
        server, bio_db_name)

    group_names = matrix[:, 0]
    taxons_ids = manipulate_biosqldb.get_taxon_id_list(server, bio_db_name)

    import re
    for i in taxon_id2description.keys():
        taxon_id2description[i] = re.sub(" subsp\. aureus", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("strain ", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("str\. ", "", taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" chromosome", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" DNA", "", taxon_id2description[i])

    taxons_ids = [taxon_id2description[str(i)] for i in taxons_ids]
    #print taxon_id2description
    f = open("ortho_matrix.tab", "w")

    f.write("orthogroup\t" + "\t".join(taxons_ids) + "\n")

    for row in matrix:
        f.write("\t".join(row) + "\n")
    f.close()
예제 #8
0
def shared_orthogroups_average_identity(db_name):

    from chlamdb.biosqldb import manipulate_biosqldb
    from chlamdb.biosqldb import biosql_own_sql_tables
    import sys
    import numpy

    server, db = manipulate_biosqldb.load_db(db_name)

    sql = "CREATE TABLE comparative_tables.shared_og_av_id_%s(taxon_1 INT NOT NULL," \
          " taxon_2 INT NOT NULL," \
          " average_identity FLOAT," \
          " median_identity FLOAT," \
          " n_pairs INT)" % (db_name)
    server.adaptor.execute(sql)

    taxon2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodatabase_name=db_name)

    all_taxons = list(taxon2description.keys())
    for i, taxon_1 in enumerate(all_taxons):
        for taxon_2 in all_taxons[i + 1:]:
            data_sql = 'select identity from comparative_tables.identity_closest_homolog2_%s where taxon_1=%s and taxon_2=%s' % (
                db_name, taxon_1, taxon_2)
            data = list([
                i[0] for i in server.adaptor.execute_and_fetchall(data_sql, )
            ])
            print(data)
            sql = 'insert into comparative_tables.shared_og_av_id_%s(taxon_1, taxon_2, average_identity,' \
                  ' median_identity, n_pairs) values (%s, %s, %s, %s, %s)' % (db_name,
                                                                              taxon_1,
                                                                              taxon_2,
                                                                              numpy.average(data),
                                                                              numpy.median(data),
                                                                              len(data))
            print(sql)
            server.adaptor.execute_and_fetchall(sql, )
        server.adaptor.commit()
예제 #9
0
def plot_tree_stacked_barplot(
        tree_file,
        taxon2value_list_barplot=False,
        header_list=False,  # header stackedbarplots
        taxon2set2value_heatmap=False,
        taxon2label=False,
        header_list2=False,  # header counts columns
        biodb=False,
        column_scale=True,
        general_max=False,
        header_list3=False,
        set2taxon2value_list_simple_barplot=False,
        set2taxon2value_list_simple_barplot_counts=True,
        rotate=False,
        taxon2description=False):
    '''

    taxon2value_list_barplot list of lists:
    [[bar1_part1, bar1_part2,...],[bar2_part1, bar2_part2]]
    valeures de chaque liste transformes en pourcentages

    :param tree_file:
    :param taxon2value_list:
    :param biodb:
    :param exclude_outgroup:
    :param bw_scale:
    :return:
    '''

    if biodb:
        from chlamdb.biosqldb import manipulate_biosqldb
        server, db = manipulate_biosqldb.load_db(biodb)

        taxon2description = manipulate_biosqldb.taxon_id2genome_description(
            server, biodb, filter_names=True)

    t1 = Tree(tree_file)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    colors2 = [
        "red", "#FFFF00", "#58FA58", "#819FF7", "#F781F3", "#2E2E2E",
        "#F7F8E0", 'black'
    ]
    colors = [
        "#7fc97f", "#386cb0", "#fdc086", "#ffffb3", "#fdb462", "#f0027f",
        "#F7F8E0", 'black'
    ]  # fdc086ff 386cb0ff f0027fff

    tss = TreeStyle()
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False
    if column_scale and header_list2:
        import matplotlib.cm as cm
        from matplotlib.colors import rgb2hex
        import matplotlib as mpl
        column2scale = {}
        col_n = 0
        for column in header_list2:
            values = taxon2set2value_heatmap[column].values()
            #print values
            if min(values) == max(values):
                min_val = 0
                max_val = 1.5 * max(values)
            else:
                min_val = min(values)
                max_val = max(values)
            #print 'min-max', min_val, max_val
            norm = mpl.colors.Normalize(vmin=min_val, vmax=max_val)  # *1.1
            if col_n < 4:
                cmap = cm.OrRd  #
            else:
                cmap = cm.YlGnBu  #PuBu#OrRd

            m = cm.ScalarMappable(norm=norm, cmap=cmap)

            column2scale[column] = [m, float(max_val)]  # *0.7
            col_n += 1

    for i, lf in enumerate(t1.iter_leaves()):

        #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068':
        #    lf.name = 'Pirellula staleyi DSM 6068'
        #    continue
        if i == 0:

            if taxon2label:
                n = TextFace('  ')
                n.margin_top = 1
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.hz_align = 2
                n.vt_align = 2
                n.rotation = 270
                n.inner_background.color = "white"
                n.opacity = 1.

                tss.aligned_header.add_face(n, 0)
                col_add = 1
            else:
                col_add = 1
            if header_list:
                for col, header in enumerate(header_list):

                    n = TextFace('%s' % (header))
                    n.margin_top = 0
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, col + col_add)
                col_add += col + 1

            if header_list3:
                #print 'header_list 3!'
                col_tmp = 0
                for header in header_list3:
                    n = TextFace('%s' % (header))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.

                    if set2taxon2value_list_simple_barplot_counts:
                        if col_tmp == 0:
                            col_tmp += 1
                        tss.aligned_header.add_face(n, col_tmp + 1 + col_add)
                        n = TextFace('       ')
                        tss.aligned_header.add_face(n, col_tmp + col_add)
                        col_tmp += 2
                    else:
                        tss.aligned_header.add_face(n, col_tmp + col_add)
                        col_tmp += 1
                if set2taxon2value_list_simple_barplot_counts:
                    col_add += col_tmp
                else:
                    col_add += col_tmp

            if header_list2:
                for col, header in enumerate(header_list2):
                    n = TextFace('%s' % (header))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, col + col_add)
                col_add += col + 1

        if taxon2label:
            try:
                n = TextFace('%s' % taxon2label[lf.name])
            except:
                try:
                    n = TextFace('%s' % taxon2label[int(lf.name)])
                except:
                    n = TextFace('-')
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 20
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            if rotate:
                n.rotation = 270
            lf.add_face(n, 1, position="aligned")
            col_add = 2
        else:
            col_add = 2

        if taxon2value_list_barplot:

            try:
                val_list_of_lists = taxon2value_list_barplot[lf.name]
            except:
                val_list_of_lists = taxon2value_list_barplot[int(lf.name)]

            #col_count = 0
            for col, value_list in enumerate(val_list_of_lists):

                total = float(sum(value_list))
                percentages = [(i / total) * 100 for i in value_list]
                if col % 3 == 0:
                    col_list = colors2
                else:
                    col_list = colors
                b = StackedBarFace(percentages,
                                   width=150,
                                   height=18,
                                   colors=col_list[0:len(percentages)])
                b.rotation = 0
                b.inner_border.color = "white"
                b.inner_border.width = 0
                b.margin_right = 5
                b.margin_left = 5
                if rotate:
                    b.rotation = 270
                lf.add_face(b, col + col_add, position="aligned")
                #col_count+=1

            col_add += col + 1

        if set2taxon2value_list_simple_barplot:
            col_list = [
                '#fc8d59', '#91bfdb', '#99d594', '#c51b7d', '#f1a340',
                '#999999'
            ]
            color_i = 0
            col = 0
            for one_set in header_list3:
                if color_i > 5:
                    color_i = 0
                color = col_list[color_i]
                color_i += 1
                # values for all taxons
                values_lists = [
                    float(i) for i in
                    set2taxon2value_list_simple_barplot[one_set].values()
                ]
                #print values_lists
                #print one_set
                value = set2taxon2value_list_simple_barplot[one_set][lf.name]

                if set2taxon2value_list_simple_barplot_counts:
                    if isinstance(value, float):
                        a = TextFace(" %s " % str(round(value, 2)))
                    else:
                        a = TextFace(" %s " % str(value))
                    a.margin_top = 1
                    a.margin_right = 2
                    a.margin_left = 5
                    a.margin_bottom = 1
                    if rotate:
                        a.rotation = 270
                    lf.add_face(a, col + col_add, position="aligned")

                #print 'value and max', value, max(values_lists)
                fraction_biggest = (float(value) / max(values_lists)) * 100
                fraction_rest = 100 - fraction_biggest

                #print 'fractions', fraction_biggest, fraction_rest
                b = StackedBarFace([fraction_biggest, fraction_rest],
                                   width=100,
                                   height=15,
                                   colors=[color, 'white'])
                b.rotation = 0
                b.inner_border.color = "grey"
                b.inner_border.width = 0
                b.margin_right = 15
                b.margin_left = 0
                if rotate:
                    b.rotation = 270
                if set2taxon2value_list_simple_barplot_counts:
                    if col == 0:
                        col += 1
                    lf.add_face(b, col + 1 + col_add, position="aligned")
                    col += 2
                else:
                    lf.add_face(b, col + col_add, position="aligned")
                    col += 1
            if set2taxon2value_list_simple_barplot_counts:
                col_add += col

            else:
                col_add += col

        if taxon2set2value_heatmap:
            i = 0
            #if not taxon2label:
            #    col_add-=1
            for col2, head in enumerate(header_list2):

                col_name = header_list2[i]
                try:
                    value = taxon2set2value_heatmap[col_name][str(lf.name)]
                except:
                    try:
                        value = taxon2set2value_heatmap[col_name][round(
                            float(lf.name), 2)]
                    except:
                        value = 0
                if header_list2[i] == 'duplicates':
                    print('dupli', lf.name, value)
                #print 'val----------------', value
                if int(value) > 0:
                    if int(value) >= 10 and int(value) < 100:
                        n = TextFace('%4i' % value)
                    elif int(value) >= 100:
                        n = TextFace('%3i' % value)
                    else:

                        n = TextFace('%5i' % value)

                    n.margin_top = 1
                    n.margin_right = 2
                    n.margin_left = 5
                    n.margin_bottom = 1
                    n.hz_align = 1
                    n.vt_align = 1
                    if rotate:
                        n.rotation = 270
                    n.inner_background.color = rgb2hex(
                        column2scale[col_name][0].to_rgba(
                            float(value)))  #"orange"
                    #print 'xaxaxaxaxa', value,
                    if float(value) > column2scale[col_name][1]:

                        n.fgcolor = 'white'
                    n.opacity = 1.
                    n.hz_align = 1
                    n.vt_align = 1
                    lf.add_face(n, col2 + col_add, position="aligned")
                    i += 1
                else:
                    n = TextFace('')
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 5
                    n.margin_bottom = 1
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    if rotate:
                        n.rotation = 270
                    lf.add_face(n, col2 + col_add, position="aligned")
                    i += 1

        #lf.name = taxon2description[lf.name]
        n = TextFace(taxon2description[lf.name],
                     fgcolor="black",
                     fsize=12,
                     fstyle='italic')
        lf.add_face(n, 0)

    for n in t1.traverse():
        nstyle = NodeStyle()

        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
예제 #10
0
def plot_heat_tree(tree_file,
                   biodb="chlamydia_04_16",
                   exclude_outgroup=False,
                   bw_scale=True):
    from chlamdb.biosqldb import manipulate_biosqldb
    import matplotlib.cm as cm
    from matplotlib.colors import rgb2hex
    import matplotlib as mpl

    server, db = manipulate_biosqldb.load_db(biodb)

    sql_biodatabase_id = 'select biodatabase_id from biodatabase where name="%s"' % biodb
    db_id = server.adaptor.execute_and_fetchall(sql_biodatabase_id, )[0][0]
    if type(tree_file) == str:
        t1 = Tree(tree_file)
        try:
            R = t1.get_midpoint_outgroup()
            #print 'root', R
            # and set it as tree outgroup
            t1.set_outgroup(R)
        except:
            pass
    elif isinstance(tree_file, Tree):
        t1 = tree_file
    else:
        IOError('Unkown tree format')
    tss = TreeStyle()
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False

    #print "tree", t1

    sql1 = 'select taxon_id, description from bioentry where biodatabase_id=%s and description not like "%%%%plasmid%%%%"' % db_id
    sql2 = 'select t2.taxon_id, t1.GC from genomes_info_%s as t1 inner join bioentry as t2 ' \
           ' on t1.accession=t2.accession where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id)
    sql3 = 'select t2.taxon_id, t1.genome_size from genomes_info_%s as t1 ' \
           ' inner join bioentry as t2 on t1.accession=t2.accession ' \
           ' where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id)
    sql4 = 'select t2.taxon_id,percent_non_coding from genomes_info_%s as t1 ' \
           ' inner join bioentry as t2 on t1.accession=t2.accession ' \
           ' where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id)

    sql_checkm_completeness = 'select taxon_id, completeness from custom_tables.checkm_%s;' % biodb
    sql_checkm_contamination = 'select taxon_id,contamination from custom_tables.checkm_%s;' % biodb

    try:
        taxon_id2completeness = manipulate_biosqldb.to_dict(
            server.adaptor.execute_and_fetchall(sql_checkm_completeness))
        taxon_id2contamination = manipulate_biosqldb.to_dict(
            server.adaptor.execute_and_fetchall(sql_checkm_contamination))
    except:
        taxon_id2completeness = False
    #taxon2description = manipulate_biosqldb.to_dict(server.adaptor.execute_and_fetchall(sql1,))

    taxon2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb, filter_names=True)

    taxon2gc = manipulate_biosqldb.to_dict(
        server.adaptor.execute_and_fetchall(sql2, ))
    taxon2genome_size = manipulate_biosqldb.to_dict(
        server.adaptor.execute_and_fetchall(sql3, ))
    taxon2coding_density = manipulate_biosqldb.to_dict(
        server.adaptor.execute_and_fetchall(sql4, ))

    my_taxons = [lf.name for lf in t1.iter_leaves()]

    # Calculate the midpoint node

    if exclude_outgroup:
        excluded = str(list(t1.iter_leaves())[0].name)
        my_taxons.pop(my_taxons.index(excluded))

    genome_sizes = [float(taxon2genome_size[i]) for i in my_taxons]
    gc_list = [float(taxon2gc[i]) for i in my_taxons]
    fraction_list = [float(taxon2coding_density[i]) for i in my_taxons]

    value = 1

    max_genome_size = max(genome_sizes)  #3424182#
    max_gc = max(gc_list)  #48.23

    cmap = cm.YlGnBu  #YlOrRd#OrRd

    norm = mpl.colors.Normalize(vmin=min(genome_sizes) - 100000,
                                vmax=max(genome_sizes))
    m1 = cm.ScalarMappable(norm=norm, cmap=cmap)
    norm = mpl.colors.Normalize(vmin=min(gc_list), vmax=max(gc_list))
    m2 = cm.ScalarMappable(norm=norm, cmap=cmap)
    norm = mpl.colors.Normalize(vmin=min(fraction_list),
                                vmax=max(fraction_list))
    m3 = cm.ScalarMappable(norm=norm, cmap=cmap)

    for i, lf in enumerate(t1.iter_leaves()):
        #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068':
        #    lf.name = 'Pirellula staleyi DSM 6068'
        #    continue
        if i == 0:
            n = TextFace('Size (Mbp)')
            n.rotation = -25
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 20
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            #lf.add_face(n, 3, position="aligned")
            tss.aligned_header.add_face(n, 3)
            n = TextFace('GC (%)')
            n.rotation = -25
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 20
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            #lf.add_face(n, 5, position="aligned")
            tss.aligned_header.add_face(n, 5)
            n = TextFace('')
            #lf.add_face(n, 2, position="aligned")
            tss.aligned_header.add_face(n, 2)
            #lf.add_face(n, 4, position="aligned")
            tss.aligned_header.add_face(n, 4)
            n = TextFace('Non coding (%)')
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 20
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            n.rotation = -25
            #lf.add_face(n, 7, position="aligned")
            tss.aligned_header.add_face(n, 7)
            n = TextFace('')
            #lf.add_face(n, 6, position="aligned")
            tss.aligned_header.add_face(n, 6)

            if taxon_id2completeness:
                n = TextFace('Completeness (%)')
                n.margin_top = 1
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.inner_background.color = "white"
                n.opacity = 1.
                n.rotation = -25
                #lf.add_face(n, 7, position="aligned")
                tss.aligned_header.add_face(n, 9)
                n = TextFace('')
                #lf.add_face(n, 6, position="aligned")
                tss.aligned_header.add_face(n, 8)

                n = TextFace('Contamination (%)')
                n.margin_top = 1
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.inner_background.color = "white"
                n.opacity = 1.
                n.rotation = -25
                #lf.add_face(n, 7, position="aligned")
                tss.aligned_header.add_face(n, 11)
                n = TextFace('')
                #lf.add_face(n, 6, position="aligned")
                tss.aligned_header.add_face(n, 10)

        value += 1

        #print '------ %s' % lf.name
        if exclude_outgroup and i == 0:
            lf.name = taxon2description[lf.name]
            #print '#######################'
            continue

        n = TextFace(
            '  %s ' %
            str(round(taxon2genome_size[lf.name] / float(1000000), 2)))
        n.margin_top = 1
        n.margin_right = 1
        n.margin_left = 0
        n.margin_bottom = 1
        n.fsize = 7
        n.inner_background.color = "white"
        n.opacity = 1.

        lf.add_face(n, 2, position="aligned")
        #if max_genome_size > 3424182:
        #    max_genome_size = 3424182
        fraction_biggest = (float(taxon2genome_size[lf.name]) /
                            max_genome_size) * 100
        fraction_rest = 100 - fraction_biggest
        if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358':
            col = '#fc8d59'
        else:
            if not bw_scale:
                col = rgb2hex(m1.to_rgba(float(
                    taxon2genome_size[lf.name])))  # 'grey'
            else:
                col = '#fc8d59'

        b = StackedBarFace([fraction_biggest, fraction_rest],
                           width=100,
                           height=9,
                           colors=[col, 'white'])
        b.rotation = 0
        b.inner_border.color = "black"
        b.inner_border.width = 0
        b.margin_right = 15
        b.margin_left = 0
        lf.add_face(b, 3, position="aligned")

        fraction_biggest = (float(taxon2gc[lf.name]) / max_gc) * 100
        fraction_rest = 100 - fraction_biggest
        if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358':
            col = '#91bfdb'
        else:
            if not bw_scale:
                col = rgb2hex(m2.to_rgba(float(taxon2gc[lf.name])))
            else:
                col = '#91bfdb'
        b = StackedBarFace([fraction_biggest, fraction_rest],
                           width=100,
                           height=9,
                           colors=[col, 'white'])
        b.rotation = 0
        b.inner_border.color = "black"
        b.inner_border.width = 0
        b.margin_left = 0
        b.margin_right = 15

        lf.add_face(b, 5, position="aligned")
        n = TextFace('  %s ' % str(round(float(taxon2gc[lf.name]), 2)))
        n.margin_top = 1
        n.margin_right = 0
        n.margin_left = 0
        n.margin_bottom = 1
        n.fsize = 7
        n.inner_background.color = "white"
        n.opacity = 1.
        lf.add_face(n, 4, position="aligned")

        if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358':
            col = '#99d594'
        else:
            if not bw_scale:
                col = rgb2hex(m3.to_rgba(float(taxon2coding_density[lf.name])))
            else:
                col = '#99d594'
        n = TextFace('  %s ' % str(float(taxon2coding_density[lf.name])))
        n.margin_top = 1
        n.margin_right = 0
        n.margin_left = 0
        n.margin_right = 0
        n.margin_bottom = 1
        n.fsize = 7
        n.inner_background.color = "white"
        n.opacity = 1.
        lf.add_face(n, 6, position="aligned")
        fraction = (float(taxon2coding_density[lf.name]) /
                    max(taxon2coding_density.values())) * 100
        fraction_rest = ((max(taxon2coding_density.values()) -
                          taxon2coding_density[lf.name]) /
                         float(max(taxon2coding_density.values()))) * 100
        #print 'fraction, rest', fraction, fraction_rest
        b = StackedBarFace(
            [fraction, fraction_rest],
            width=100,
            height=9,
            colors=[col, 'white'
                    ])  # 1-round(float(taxon2coding_density[lf.name]), 2)
        b.rotation = 0
        b.margin_right = 1
        b.inner_border.color = "black"
        b.inner_border.width = 0
        b.margin_left = 5
        lf.add_face(b, 7, position="aligned")

        if taxon_id2completeness:
            n = TextFace('  %s ' % str(float(taxon_id2completeness[lf.name])))
            n.margin_top = 1
            n.margin_right = 0
            n.margin_left = 0
            n.margin_right = 0
            n.margin_bottom = 1
            n.fsize = 7
            n.inner_background.color = "white"
            n.opacity = 1.
            lf.add_face(n, 8, position="aligned")
            fraction = float(taxon_id2completeness[lf.name])
            fraction_rest = 100 - fraction
            #print 'fraction, rest', fraction, fraction_rest
            b = StackedBarFace(
                [fraction, fraction_rest],
                width=100,
                height=9,
                colors=["#d7191c", 'white'
                        ])  # 1-round(float(taxon2coding_density[lf.name]), 2)
            b.rotation = 0
            b.margin_right = 1
            b.inner_border.color = "black"
            b.inner_border.width = 0
            b.margin_left = 5
            lf.add_face(b, 9, position="aligned")

            n = TextFace('  %s ' % str(float(taxon_id2contamination[lf.name])))
            n.margin_top = 1
            n.margin_right = 0
            n.margin_left = 0
            n.margin_right = 0
            n.margin_bottom = 1
            n.fsize = 7
            n.inner_background.color = "white"
            n.opacity = 1.
            lf.add_face(n, 10, position="aligned")
            fraction = float(taxon_id2contamination[lf.name])
            fraction_rest = 100 - fraction
            #print 'fraction, rest', fraction, fraction_rest
            b = StackedBarFace(
                [fraction, fraction_rest],
                width=100,
                height=9,
                colors=["black", 'white'
                        ])  # 1-round(float(taxon2coding_density[lf.name]), 2)
            b.rotation = 0
            b.margin_right = 1
            b.inner_border.color = "black"
            b.inner_border.width = 0
            b.margin_left = 5
            lf.add_face(b, 11, position="aligned")

            #lf.name = taxon2description[lf.name]
        n = TextFace(taxon2description[lf.name],
                     fgcolor="black",
                     fsize=9,
                     fstyle='italic')
        n.margin_right = 30
        lf.add_face(n, 0)

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
예제 #11
0
def plot_tree_text_metadata(tree_file, header2taxon2text, ordered_header_list,
                            biodb):

    from chlamdb.biosqldb import manipulate_biosqldb
    server, db = manipulate_biosqldb.load_db(biodb)

    t1 = Tree(tree_file)

    taxon2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb, filter_names=True)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)
    tss = TreeStyle()
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False

    for i, leaf in enumerate(t1.iter_leaves()):

        # first leaf, add headers
        if i == 0:
            for column, header in enumerate(ordered_header_list):

                n = TextFace('%s' % (header))
                n.margin_top = 0
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.rotation = 270
                n.hz_align = 2
                n.vt_align = 2
                n.inner_background.color = "white"
                n.opacity = 1.
                tss.aligned_header.add_face(n, column)
        for column, header in enumerate(ordered_header_list):
            text = header2taxon2text[header][int(leaf.name)]
            n = TextFace('%s' % text)
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 5
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            #n.rotation = 270
            leaf.add_face(n, column + 1, position="aligned")
        # rename leaf (taxon_id => description)
        n = TextFace(taxon2description[leaf.name],
                     fgcolor="black",
                     fsize=12,
                     fstyle='italic')
        leaf.add_face(n, 0)

    for n in t1.traverse():
        # rename leaf

        nstyle = NodeStyle()

        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
예제 #12
0
def plot_tree_barplot(tree_file,
                      taxon2value_list_barplot,
                      header_list,
                      taxon2set2value_heatmap=False,
                      header_list2=False,
                      presence_only=True,
                      biodb="chlamydia_04_16",
                      column_scale=True,
                      general_max=False,
                      barplot2percentage=False):
    '''

    display one or more barplot

    :param tree_file:
    :param taxon2value_list:
    :param biodb:
    :param exclude_outgroup:
    :param bw_scale:
    :param barplot2percentage: list of bool to indicates if the number are percentages and the range should be set to 0-100

    :return:
    '''

    from chlamdb.biosqldb import manipulate_biosqldb
    import matplotlib.cm as cm
    from matplotlib.colors import rgb2hex
    import matplotlib as mpl

    server, db = manipulate_biosqldb.load_db(biodb)

    taxon2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb, filter_names=True)

    #print isinstance(tree_file, Tree)
    #print type(tree_file)

    if isinstance(tree_file, Tree):
        t1 = tree_file
    else:
        t1 = Tree(tree_file)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    tss = TreeStyle()
    value = 1
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False

    if column_scale and header_list2:
        import matplotlib.cm as cm
        from matplotlib.colors import rgb2hex
        import matplotlib as mpl
        column2scale = {}
        for column in header_list2:
            values = taxon2set2value_heatmap[column].values()

            norm = mpl.colors.Normalize(vmin=min(values), vmax=max(values))
            cmap = cm.OrRd
            m = cm.ScalarMappable(norm=norm, cmap=cmap)
            column2scale[column] = m

    cmap = cm.YlGnBu  #YlOrRd#OrRd

    values_lists = taxon2value_list_barplot.values()

    scale_list = []
    max_value_list = []

    for n, header in enumerate(header_list):
        #print 'scale', n, header
        data = [float(i[n]) for i in values_lists]

        if barplot2percentage is False:
            max_value = max(data)  #3424182#
            min_value = min(data)  #48.23
        else:
            if barplot2percentage[n] is True:
                max_value = 100
                min_value = 0
            else:
                max_value = max(data)  #3424182#
                min_value = min(data)  #48.23
        norm = mpl.colors.Normalize(vmin=min_value, vmax=max_value)
        m1 = cm.ScalarMappable(norm=norm, cmap=cmap)
        scale_list.append(m1)
        if not general_max:
            max_value_list.append(float(max_value))
        else:
            max_value_list.append(general_max)

    for i, lf in enumerate(t1.iter_leaves()):

        #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068':
        #    lf.name = 'Pirellula staleyi DSM 6068'
        #    continue
        if i == 0:

            col_add = 0
            for col, header in enumerate(header_list):

                #lf.add_face(n, column, position="aligned")
                n = TextFace(' ')
                n.margin_top = 1
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 1
                n.rotation = 90
                n.inner_background.color = "white"
                n.opacity = 1.
                n.hz_align = 2
                n.vt_align = 2

                tss.aligned_header.add_face(n, col_add)

                n = TextFace('%s' % header)
                n.margin_top = 1
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 80
                n.rotation = 270
                n.inner_background.color = "white"
                n.opacity = 1.
                n.hz_align = 2
                n.vt_align = 2
                tss.aligned_header.add_face(n, col_add + 1)
                col_add += 2

            if header_list2:
                for col, header in enumerate(header_list2):
                    n = TextFace('%s' % header)
                    n.margin_top = 1
                    n.margin_right = 20
                    n.margin_left = 2
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, col + col_add)

        try:
            val_list = taxon2value_list_barplot[lf.name]
        except:
            try:
                val_list = taxon2value_list_barplot[int(lf.name)]
            except:
                val_list = [0]
        col_add = 0
        for col, value in enumerate(val_list):

            # show value itself
            try:
                n = TextFace('  %s  ' % str(value))
            except:
                n = TextFace('  %s  ' % str(value))
            n.margin_top = 1
            n.margin_right = 10
            n.margin_left = 2
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.

            lf.add_face(n, col_add, position="aligned")
            # show bar

            color = rgb2hex(scale_list[col].to_rgba(float(value)))
            try:
                percentage = (value / max_value_list[col]) * 100
                #percentage = value
            except:
                percentage = 0
            maximum_bar = (
                (max_value_list[col] - value) / max_value_list[col]) * 100
            #maximum_bar = 100-percentage
            b = StackedBarFace([percentage, maximum_bar],
                               width=100,
                               height=10,
                               colors=[color, "white"])
            b.rotation = 0
            b.inner_border.color = "grey"
            b.inner_border.width = 0
            b.margin_right = 15
            b.margin_left = 0
            lf.add_face(b, col_add + 1, position="aligned")
            col_add += 2

        if taxon2set2value_heatmap:
            shift = col + col_add + 1

            i = 0
            for col, col_name in enumerate(header_list2):
                try:
                    value = taxon2set2value_heatmap[col_name][lf.name]
                except:
                    try:
                        value = taxon2set2value_heatmap[col_name][int(lf.name)]
                    except:
                        value = 0

                if int(value) > 0:
                    if int(value) > 9:
                        n = TextFace(' %i ' % int(value))
                    else:
                        n = TextFace(' %i   ' % int(value))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.fgcolor = "white"
                    n.inner_background.color = rgb2hex(
                        column2scale[col_name].to_rgba(
                            float(value)))  #"orange"
                    n.opacity = 1.
                    lf.add_face(n, col + col_add, position="aligned")
                    i += 1
                else:
                    n = TextFace('  ')  #% str(value))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.inner_background.color = "white"
                    n.opacity = 1.

                    lf.add_face(n, col + col_add, position="aligned")

        #lf.name = taxon2description[lf.name]
        try:
            n = TextFace(taxon2description[lf.name],
                         fgcolor="black",
                         fsize=12,
                         fstyle='italic')
        except:
            n = TextFace(lf.name, fgcolor="black", fsize=12, fstyle='italic')
        lf.add_face(n, 0)

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)
    #print t1
    return t1, tss
예제 #13
0
def plot_heatmap_tree_locus(biodb,
                            tree_file,
                            taxid2count,
                            taxid2identity=False,
                            taxid2locus=False,
                            reference_taxon=False,
                            n_paralogs_barplot=False):
    '''

    plot tree and associated heatmap with count of homolgs
    optional:
        - add identity of closest homolog
        - add locus tag of closest homolog

    '''

    from chlamdb.biosqldb import manipulate_biosqldb

    server, db = manipulate_biosqldb.load_db(biodb)

    taxid2organism = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb, True)

    t1 = Tree(tree_file)
    ts = TreeStyle()
    ts.draw_guiding_lines = True
    ts.guiding_lines_color = "gray"
    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    leaf_number = 0

    for lf in t1.iter_leaves():

        if str(lf.name) not in taxid2count:
            taxid2count[str(lf.name)] = 0

    max_count = max([taxid2count[str(lf.name)] for lf in t1.iter_leaves()])

    for i, lf in enumerate(t1.iter_leaves()):

        # top leaf, add header
        if i == 0:

            n = TextFace('Number of homologs')
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 20
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            n.rotation = -25
            #lf.add_face(n, 7, position="aligned")
            ts.aligned_header.add_face(n, 1)

            if taxid2identity:
                n = TextFace('Protein identity')
                n.margin_top = 1
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.inner_background.color = "white"
                n.opacity = 1.
                n.rotation = -25
                #lf.add_face(n, 7, position="aligned")
                ts.aligned_header.add_face(n, 2)
            if taxid2locus:
                n = TextFace('Locus tag')
                n.margin_top = 1
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.inner_background.color = "white"
                n.opacity = 1.
                n.rotation = -25
                #lf.add_face(n, 7, position="aligned")
                ts.aligned_header.add_face(n, 3)

        leaf_number += 1

        lf.branch_vertical_margin = 0

        data = [taxid2count[str(lf.name)]]

        # possibility to add one or more columns
        for col, value in enumerate(data):
            col_index = col
            if value > 0:
                n = TextFace(' %s ' % str(value))
                n.margin_top = 2

                n.margin_right = 2
                if col == 0:
                    n.margin_left = 20
                else:
                    n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "white"  # #81BEF7
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")

            else:
                n = TextFace(' %s ' % str(value))
                n.margin_top = 2
                n.margin_right = 2
                if col == 0:
                    n.margin_left = 20
                else:
                    n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "white"
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")
        # optionally indicate number of paralogs as a barplot
        if n_paralogs_barplot:
            col_index += 1
            percent = (float(value) / max_count) * 100
            n = StackedBarFace([percent, 100 - percent],
                               width=150,
                               height=18,
                               colors=['#6699ff', 'white'],
                               line_color='white')
            n.rotation = 0
            n.inner_border.color = "white"
            n.inner_border.width = 0
            n.margin_right = 15
            n.margin_left = 0
            lf.add_face(n, col + 1, position="aligned")

        # optionally add additionnal column with identity
        if taxid2identity:
            import matplotlib.cm as cm
            from matplotlib.colors import rgb2hex
            import matplotlib as mpl

            norm = mpl.colors.Normalize(vmin=0, vmax=100)
            cmap = cm.OrRd
            m = cm.ScalarMappable(norm=norm, cmap=cmap)

            try:
                if round(taxid2identity[str(lf.name)], 2) != 100:
                    value = "%.2f" % round(taxid2identity[str(lf.name)], 2)
                else:
                    value = "%.1f" % round(taxid2identity[str(lf.name)], 2)
            except:
                value = '-'
            if str(lf.name) == str(reference_taxon):
                value = '         '
            n = TextFace(' %s ' % value)
            n.margin_top = 2
            n.margin_right = 2
            n.margin_left = 20
            n.margin_bottom = 2
            if not value.isspace() and value is not '-':
                n.inner_background.color = rgb2hex(m.to_rgba(float(value)))
                if float(value) > 82:
                    n.fgcolor = 'white'
            n.opacity = 1.
            if str(lf.name) == str(reference_taxon):
                n.inner_background.color = '#800000'

            lf.add_face(n, col_index + 1, position="aligned")
        # optionaly add column with locus name
        if taxid2locus:
            try:
                value = str(taxid2locus[str(lf.name)])
            except:
                value = '-'
            n = TextFace(' %s ' % value)
            n.margin_top = 2
            n.margin_right = 2
            n.margin_left = 2
            n.margin_bottom = 2
            if str(lf.name) != str(reference_taxon):
                n.inner_background.color = "white"
            else:
                n.fgcolor = '#ff0000'
                n.inner_background.color = "white"
            n.opacity = 1.
            lf.add_face(n, col_index + 2, position="aligned")
        lf.name = taxid2organism[str(lf.name)]

    return t1, leaf_number, ts
예제 #14
0
def plot_heat_tree(biodb, taxid2n, tree_file):
    '''
    Plot heatmap next to a tree. The order of the heatmap **MUST** be the same,
    as order of the leafs on the tree. The tree must be in the Newick format. If
    *output_file* is specified, then heat-tree will be rendered as a PNG,
    otherwise interactive browser will pop-up with your heat-tree.

    Parameters
    ----------
    heatmap_file: str
        Path to the heatmap file. The first row must have '#Names' as first
        element of the header.
            e.g. #Names, A, B, C, D
                row1, 2, 4, 0, 4
                row2, 4, 6, 2, -1

    tree_file: str
        Path to the tree file in Newick format. The leaf node labels should
        be the same as as row names in the heatmap file. E.g. row1, row2.

    output_file: str, optional
        If specified the heat-tree will be rendered in that file as a PNG image,
        otherwise interactive browser will pop-up. **N.B.** program will wait
        for you to exit the browser before continuing.
    '''

    from chlamdb.biosqldb import manipulate_biosqldb
    server, db = manipulate_biosqldb.load_db(biodb)

    taxid2organism = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb, True)

    t1 = Tree(tree_file)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    leaf_number = 0
    for lf in t1.iter_leaves():
        leaf_number += 1
        lf.branch_vertical_margin = 0
        try:
            data = [taxid2n[str(lf.name)]]
        except:
            data = [0]
        #print 'taxon', int(lf.name)
        lf.name = taxid2organism[int(lf.name)]
        for col, value in enumerate(data):
            if value > 0:
                n = TextFace(' %s ' % str(value))
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "#81BEF7"
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")

            else:
                n = TextFace(' %s ' % str(value))
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "white"
                n.opacity = 1.
                lf.add_face(n, col, position="aligned")

    return t1, leaf_number