Beispiel #1
0
def kegg_gene_sets(org):
    """ Returns gene sets from KEGG pathways.
    """
    caching.clear_cache()
    kegg_org = kegg.KEGGOrganism(taxonomy.name(org))
    ncbi_id_mapper = kegg_org.kegg_to_ncbi_mapper()
    genesets = []

    for id in kegg_org.pathways():
        pway = kegg.KEGGPathway(id)
        hier = ('KEGG', 'pathways')

        if pway.pathway_attributes():
            kegg_names = kegg_org.get_genes_by_pathway(id)
            mapped_genes = []
            for gene in kegg_names:
                try:
                    mapped_genes.append(ncbi_id_mapper[gene.upper()])
                except KeyError:
                    # some kegg names can not be matched to ncbi ids
                    # they are included in geneset anyway
                    # remove prefix, that specifies kegg organism
                    # mapped_genes.append(gene.split(':')[-1])
                    pass

            gs = GeneSet(gs_id=id,
                         name=pway.title,
                         genes=mapped_genes,
                         hierarchy=hier,
                         organism=org,
                         link=pway.link)
            genesets.append(gs)

    return GeneSets(genesets)
Beispiel #2
0
def kegg_gene_sets(tax_id: str) -> None:
    """ Returns gene sets from KEGG pathways.
    """
    caching.clear_cache()
    kegg_org = kegg.KEGGOrganism(taxonomy.name(tax_id))
    ncbi_id_mapper = kegg_org.kegg_to_ncbi_mapper()
    genesets = []

    for id in kegg_org.pathways():
        pway = kegg.KEGGPathway(id)
        hier = ('KEGG', 'Pathways')

        if pway.pathway_attributes():
            kegg_names = kegg_org.get_genes_by_pathway(id)
            mapped_genes = set()
            for gene in kegg_names:
                try:
                    mapped_genes.add(ncbi_id_mapper[gene.upper()])
                except KeyError:
                    # some kegg names can not be matched to ncbi ids
                    # they are included in geneset anyway
                    # remove prefix, that specifies kegg organism
                    # mapped_genes.append(gene.split(':')[-1])
                    pass

            gs = GeneSet(gs_id=id,
                         name=pway.title,
                         genes=mapped_genes,
                         hierarchy=hier,
                         organism=tax_id,
                         link=pway.link)
            genesets.append(gs)

    for gs_group in GeneSets(genesets).split_by_hierarchy():
        hierarchy = gs_group.common_hierarchy()
        gs_group.to_gmt_file_format(
            f'{data_path}/gene_sets/{filename(hierarchy, tax_id)}')
 def get_kgml_and_image(pathway_id):
     """Return an initialized KEGGPathway with pre-cached data"""
     p = kegg.KEGGPathway(pathway_id)
     p._get_kgml()  # makes sure the kgml file is downloaded
     p._get_image_filename()  # makes sure the image is downloaded
     return (pathway_id, p)