def run_enrichment(org_code, genes, reference, progress=None):
            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            link_map = kegg_api.link(org_code,
                                     "pathway")  # [(pathway_id, kegg_gene_id)]
            ncbi_gene_map = kegg_api.conv(
                org_code, 'ncbi-geneid')  # [(ncbi_gene_id, kegg_gene_id)]
            ncbi_gene_map = [(_1.split(":", 1)[1], _2)
                             for _1, _2 in ncbi_gene_map]
            link_map = relation_join(
                link_map,
                [(_2, _1)
                 for _1, _2 in ncbi_gene_map])  # [(pathway_id, ncbi_gene_id)]
            kegg_sets = relation_list_to_multimap(
                link_map)  #  {pathway_id -> [ncbi_gene_ids]}
            # map kegg gene ids to ncbi_gene_ids.
            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])
            pathways = pathway_enrichment(kegg_sets,
                                          genes,
                                          reference,
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways, ncbi_gene_map
        def run_enrichment(genes, reference, progress=None):
            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            linkmap = kegg_api.link(self.org.org_code, "pathway")
            converted_ids = kegg_api.conv(self.org.org_code, 'ncbi-geneid')
            kegg_sets = relation_list_to_multimap(linkmap, {
                gene.upper(): ncbi.split(':')[-1]
                for ncbi, gene in converted_ids
            })

            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])

            pathways = pathway_enrichment(kegg_sets,
                                          genes,
                                          reference,
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways
Esempio n. 3
0
        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_ncbi_ids()

            # This is here just to keep widget working without any major changes.
            # map not needed, geneMatcher will not work on widget level.
            unique_genes = genes
            unique_ref_genes = dict([(gene, gene) for gene in set(reference)])

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict(
                (v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            linkmap = kegg_api.link(org.org_code, "pathway")
            converted_ids = kegg_api.conv(org.org_code, 'ncbi-geneid')
            kegg_sets = relation_list_to_multimap(
                linkmap,
                dict((gene.upper(), ncbi.split(':')[-1])
                     for ncbi, gene in converted_ids))

            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])

            pathways = pathway_enrichment(kegg_sets,
                                          unique_genes.values(),
                                          unique_ref_genes.keys(),
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways, org, unique_genes, unique_ref_genes
    def __init__(self):
        super().__init__()

        # commit
        self.commit_button = None

        # gene sets object
        self.gene_sets_obj = geneset.GeneSets()

        # progress bar
        self.progress_bar = None
        self.progress_bar_iterations = None

        # data
        self.input_data = None
        self.input_genes = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        # custom gene sets
        self.custom_data = None
        self.feature_model = DomainModel(valid_types=(DiscreteVariable,
                                                      StringVariable))
        self.gene_set_label = None
        self.gs_label_combobox = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None

        # reference genes
        self.reference_radio_box = None
        self.reference_data = None
        self.reference_genes = None

        self.reference_tax_id = None
        self.reference_attr_names = None
        self.reference_gene_id_attribute = None
        self.reference_gene_id_column = None

        # info box
        self.input_info = None
        self.num_of_sel_genes = 0

        # filter
        self.line_edit_filter = None
        self.search_pattern = ''
        self.organism_select_combobox = None

        # data model view
        self.data_view = None
        self.data_model = None

        # gene matcher NCBI
        self.gene_matcher = None

        # filter proxy model
        self.filter_proxy_model = None

        # hierarchy widget
        self.hierarchy_widget = None
        self.hierarchy_state = None

        # spinbox
        self.spin_widget = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # gui
        self.setup_gui()