def plot_module_and_pathway_combinaison_heatmap(biodb, ref_tree, pathway_list, module_list, taxon_id_list=[], group_by_KO=True, rotate=False): import manipulate_biosqldb import ete_motifs import module_heatmap pathway_list, code2taxon2count_pathway = pathway_list2profile_dico( biodb, pathway_list, taxon_id_list=taxon_id_list, group_by_KO=group_by_KO) module_list, code2taxon2count_modules = module_heatmap.module_list2profile_dico( biodb, module_list, taxon_id_list=taxon_id_list) merged_list = sorted(pathway_list + module_list) code2taxon2count_pathway.update(code2taxon2count_modules) tree2 = ete_motifs.multiple_profiles_heatmap(biodb, merged_list, code2taxon2count_pathway, show_labels=True, column_scale=True, tree=ref_tree, as_float=False, rotate=rotate) return tree2
def plot_module_heatmap(biodb, ref_tree, module_list, taxon_id_list=[], rotate=False): import ete_motifs module_list, code2taxon2count = module_list2profile_dico(biodb, module_list, taxon_id_list=taxon_id_list) tree2 = ete_motifs.multiple_profiles_heatmap(biodb, module_list, code2taxon2count, show_labels=True, column_scale=True, tree=ref_tree, as_float=False, rotate=rotate) return tree2
def plot_hmm_heatmap(biodb, ref_tree, taxon_id_list=[], frequency=False, six_frame_translation=False): from chlamdb.biosqldb import manipulate_biosqldb import ete_motifs code2taxon2count, set_list = get_set_data(biodb) tree2 = ete_motifs.multiple_profiles_heatmap(biodb, set_list, code2taxon2count, show_labels=True, column_scale=True, tree=ref_tree, as_float=frequency) return tree2
def plot_pathway_heatmap(biodb, ref_tree, pathway_list, taxon_id_list=[], rotate=False, group_by_KO=True): import manipulate_biosqldb import ete_motifs pathway_list, code2taxon2count = pathway_list2profile_dico( biodb, pathway_list, taxon_id_list=taxon_id_list, group_by_KO=group_by_KO) tree2 = ete_motifs.multiple_profiles_heatmap(biodb, pathway_list, code2taxon2count, show_labels=True, column_scale=True, tree=ref_tree, as_float=False, rotate=rotate) return tree2
def plot_cog_eatmap(biodb, ref_tree, taxon_id_list=[], frequency=False, group_by_cog_id=False): import manipulate_biosqldb import ete_motifs server, db = manipulate_biosqldb.load_db(biodb) sql = 'select biodatabase_id from biodatabase where name="%s"' % biodb db_id = server.adaptor.execute_and_fetchall(sql, )[0][0] # RESTRICT TO AS SUBSET OF THE TAXON AVAILABLE sql = '' if len(taxon_id_list) > 0: filter = ','.join(taxon_id_list) sql = 'select taxon_id, code, count(*) as n from COG.seqfeature_id2best_COG_hit_%s t1 ' \ ' inner join biosqldb.bioentry t2 on t1.bioentry_id=t2.bioentry_id' \ ' inner join COG.cog_id2cog_category t3 on t1.hit_cog_id=t3.COG_id ' \ ' inner join COG.code2category t4 on t3.category_id=t4.category_id ' \ ' where t2.biodatabase_id=%s and taxon_id in (%s)' \ ' group by taxon_id, code;' % (biodb, db_id, filter) print(sql) else: if not group_by_cog_id: sql = 'select taxon_id,functon,count(*) as n ' \ ' from COG.locus_tag2gi_hit_%s t1 ' \ ' inner join COG.cog_names_2014 t2 on t1.COG_id=t2.COG_id ' \ ' inner join biosqldb.bioentry as t3 on t1.accession=t3.accession ' \ ' where biodatabase_id=%s group by taxon_id,functon' % (biodb, db_id) else: sql = ' select A.taxon_id,B.functon,count(*) from (select t1.COG_id, t3.taxon_id from COG.locus_tag2gi_hit_%s t1 ' \ ' inner join biosqldb.orthology_detail_%s t3 on t1.locus_tag=t3.locus_tag ' \ ' group by taxon_id,t1.COG_id) A inner join COG.cog_names_2014 B on A.COG_id=B.COG_id ' \ ' group by A.taxon_id,B.functon;' % (biodb, biodb) data = server.adaptor.execute_and_fetchall(sql, ) if frequency: ''' ATTENTION: based on total annotated with COG and not genome size ''' sql = 'select taxon_id, count(*) as n from COG.seqfeature_id2best_COG_hit_%s t1' \ ' inner join biosqldb.bioentry t2 on t1.bioentry_id=t2.bioentry_id' \ ' where t2.biodatabase_id=%s group by taxon_id;' % (biodb, db_id) taxon_id2count = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql, )) code2taxon2count = {} cog_list = [] else: sql = 'select taxon_id, count(*) from biosqldb.orthology_detail_%s t1 left join COG.locus_tag2gi_hit_%s t2 ' \ ' on t1.locus_tag=t2.locus_tag where COG_id is NULL group by t1.taxon_id;' % (biodb, biodb) taxon2count_no_GOG = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql, )) sql = 'select taxon_id, count(*) from orthology_detail_%s group by taxon_id' % biodb taxon2proteome_size = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql, )) code2taxon2count = {} code2taxon2count['-'] = {} code2taxon2count['TOTAL'] = {} for taxon in taxon2count_no_GOG: if taxon in taxon_id_list: code2taxon2count['-'][taxon] = int(taxon2count_no_GOG[taxon]) code2taxon2count['TOTAL'][taxon] = int( taxon2proteome_size[taxon]) cog_list = ['TOTAL', '-'] sql = 'select code, description from COG.code2category;' code2description = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql, )) for row in data: descr = "%s (%s)" % (code2description[row[1]], row[1]) if descr not in cog_list: cog_list.append(descr) if descr not in code2taxon2count: code2taxon2count[descr] = {} if frequency: code2taxon2count[descr][str(row[0])] = round( (float(row[2]) / float(taxon_id2count[str(row[0])])) * 100, 2) else: code2taxon2count[descr][str(row[0])] = int(row[2]) else: if frequency: code2taxon2count[descr][str(row[0])] = round( (float(row[2]) / float(taxon_id2count[str(row[0])])) * 100, 2) else: code2taxon2count[descr][str(row[0])] = int(row[2]) tree2 = ete_motifs.multiple_profiles_heatmap(biodb, cog_list, code2taxon2count, show_labels=True, column_scale=True, tree=ref_tree, as_float=frequency) return tree2