Example #1
0
def plot_module_and_pathway_combinaison_heatmap(biodb,
                                                ref_tree,
                                                pathway_list,
                                                module_list,
                                                taxon_id_list=[],
                                                group_by_KO=True,
                                                rotate=False):
    import manipulate_biosqldb
    import ete_motifs
    import module_heatmap

    pathway_list, code2taxon2count_pathway = pathway_list2profile_dico(
        biodb,
        pathway_list,
        taxon_id_list=taxon_id_list,
        group_by_KO=group_by_KO)
    module_list, code2taxon2count_modules = module_heatmap.module_list2profile_dico(
        biodb, module_list, taxon_id_list=taxon_id_list)

    merged_list = sorted(pathway_list + module_list)
    code2taxon2count_pathway.update(code2taxon2count_modules)

    tree2 = ete_motifs.multiple_profiles_heatmap(biodb,
                                                 merged_list,
                                                 code2taxon2count_pathway,
                                                 show_labels=True,
                                                 column_scale=True,
                                                 tree=ref_tree,
                                                 as_float=False,
                                                 rotate=rotate)
    return tree2
Example #2
0
def plot_module_heatmap(biodb, ref_tree, module_list, taxon_id_list=[], rotate=False):

    import ete_motifs

    module_list, code2taxon2count = module_list2profile_dico(biodb,
                                                             module_list,
                                                             taxon_id_list=taxon_id_list)

    tree2 = ete_motifs.multiple_profiles_heatmap(biodb,
                                                module_list,
                                                code2taxon2count,
                                                show_labels=True,
                                                column_scale=True,
                                                tree=ref_tree,
                                                as_float=False,
                                                 rotate=rotate)
    return tree2
Example #3
0
def plot_hmm_heatmap(biodb,
                     ref_tree,
                     taxon_id_list=[],
                     frequency=False,
                     six_frame_translation=False):
    from chlamdb.biosqldb import manipulate_biosqldb
    import ete_motifs

    code2taxon2count, set_list = get_set_data(biodb)

    tree2 = ete_motifs.multiple_profiles_heatmap(biodb,
                                                 set_list,
                                                 code2taxon2count,
                                                 show_labels=True,
                                                 column_scale=True,
                                                 tree=ref_tree,
                                                 as_float=frequency)
    return tree2
Example #4
0
def plot_pathway_heatmap(biodb,
                         ref_tree,
                         pathway_list,
                         taxon_id_list=[],
                         rotate=False,
                         group_by_KO=True):
    import manipulate_biosqldb
    import ete_motifs

    pathway_list, code2taxon2count = pathway_list2profile_dico(
        biodb,
        pathway_list,
        taxon_id_list=taxon_id_list,
        group_by_KO=group_by_KO)

    tree2 = ete_motifs.multiple_profiles_heatmap(biodb,
                                                 pathway_list,
                                                 code2taxon2count,
                                                 show_labels=True,
                                                 column_scale=True,
                                                 tree=ref_tree,
                                                 as_float=False,
                                                 rotate=rotate)
    return tree2
Example #5
0
def plot_cog_eatmap(biodb,
                    ref_tree,
                    taxon_id_list=[],
                    frequency=False,
                    group_by_cog_id=False):
    import manipulate_biosqldb
    import ete_motifs

    server, db = manipulate_biosqldb.load_db(biodb)

    sql = 'select biodatabase_id from biodatabase where name="%s"' % biodb

    db_id = server.adaptor.execute_and_fetchall(sql, )[0][0]

    # RESTRICT TO AS SUBSET OF THE TAXON AVAILABLE

    sql = ''

    if len(taxon_id_list) > 0:
        filter = ','.join(taxon_id_list)

        sql = 'select taxon_id, code, count(*) as n from COG.seqfeature_id2best_COG_hit_%s t1 ' \
              ' inner join biosqldb.bioentry t2 on t1.bioentry_id=t2.bioentry_id' \
              ' inner join COG.cog_id2cog_category t3 on t1.hit_cog_id=t3.COG_id ' \
              ' inner join COG.code2category t4 on t3.category_id=t4.category_id ' \
              ' where t2.biodatabase_id=%s and taxon_id in (%s)' \
              ' group by taxon_id, code;' % (biodb,
                db_id,
                filter)

        print(sql)
    else:
        if not group_by_cog_id:
            sql = 'select taxon_id,functon,count(*) as n ' \
                  ' from COG.locus_tag2gi_hit_%s t1 ' \
                  ' inner join COG.cog_names_2014 t2 on t1.COG_id=t2.COG_id ' \
                  ' inner join biosqldb.bioentry as t3 on t1.accession=t3.accession ' \
                  ' where biodatabase_id=%s group by taxon_id,functon' % (biodb, db_id)
        else:
            sql = ' select A.taxon_id,B.functon,count(*) from (select t1.COG_id, t3.taxon_id from COG.locus_tag2gi_hit_%s t1 ' \
                  ' inner join biosqldb.orthology_detail_%s t3 on t1.locus_tag=t3.locus_tag ' \
                  ' group by taxon_id,t1.COG_id) A inner join COG.cog_names_2014 B on A.COG_id=B.COG_id ' \
                  ' group by A.taxon_id,B.functon;' % (biodb, biodb)

    data = server.adaptor.execute_and_fetchall(sql, )

    if frequency:
        '''
        ATTENTION: based on total annotated with COG and not genome size
        
        '''
        sql = 'select taxon_id, count(*) as n from COG.seqfeature_id2best_COG_hit_%s t1' \
              ' inner join biosqldb.bioentry t2 on t1.bioentry_id=t2.bioentry_id' \
              ' where t2.biodatabase_id=%s group by taxon_id;' % (biodb, db_id)
        taxon_id2count = manipulate_biosqldb.to_dict(
            server.adaptor.execute_and_fetchall(sql, ))

        code2taxon2count = {}
        cog_list = []

    else:
        sql = 'select taxon_id, count(*) from biosqldb.orthology_detail_%s t1 left join COG.locus_tag2gi_hit_%s t2 ' \
              ' on t1.locus_tag=t2.locus_tag where COG_id is NULL group by t1.taxon_id;' % (biodb,  biodb)

        taxon2count_no_GOG = manipulate_biosqldb.to_dict(
            server.adaptor.execute_and_fetchall(sql, ))

        sql = 'select taxon_id, count(*) from orthology_detail_%s group by taxon_id' % biodb

        taxon2proteome_size = manipulate_biosqldb.to_dict(
            server.adaptor.execute_and_fetchall(sql, ))

        code2taxon2count = {}
        code2taxon2count['-'] = {}
        code2taxon2count['TOTAL'] = {}
        for taxon in taxon2count_no_GOG:
            if taxon in taxon_id_list:
                code2taxon2count['-'][taxon] = int(taxon2count_no_GOG[taxon])
                code2taxon2count['TOTAL'][taxon] = int(
                    taxon2proteome_size[taxon])

        cog_list = ['TOTAL', '-']

    sql = 'select code, description from COG.code2category;'
    code2description = manipulate_biosqldb.to_dict(
        server.adaptor.execute_and_fetchall(sql, ))

    for row in data:
        descr = "%s (%s)" % (code2description[row[1]], row[1])
        if descr not in cog_list:
            cog_list.append(descr)
        if descr not in code2taxon2count:
            code2taxon2count[descr] = {}
            if frequency:
                code2taxon2count[descr][str(row[0])] = round(
                    (float(row[2]) / float(taxon_id2count[str(row[0])])) * 100,
                    2)
            else:
                code2taxon2count[descr][str(row[0])] = int(row[2])
        else:
            if frequency:
                code2taxon2count[descr][str(row[0])] = round(
                    (float(row[2]) / float(taxon_id2count[str(row[0])])) * 100,
                    2)
            else:
                code2taxon2count[descr][str(row[0])] = int(row[2])

    tree2 = ete_motifs.multiple_profiles_heatmap(biodb,
                                                 cog_list,
                                                 code2taxon2count,
                                                 show_labels=True,
                                                 column_scale=True,
                                                 tree=ref_tree,
                                                 as_float=frequency)
    return tree2