def run_enrichment(org_code, genes, reference, progress=None):
            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            link_map = kegg_api.link(org_code,
                                     "pathway")  # [(pathway_id, kegg_gene_id)]
            ncbi_gene_map = kegg_api.conv(
                org_code, 'ncbi-geneid')  # [(ncbi_gene_id, kegg_gene_id)]
            ncbi_gene_map = [(_1.split(":", 1)[1], _2)
                             for _1, _2 in ncbi_gene_map]
            link_map = relation_join(
                link_map,
                [(_2, _1)
                 for _1, _2 in ncbi_gene_map])  # [(pathway_id, ncbi_gene_id)]
            kegg_sets = relation_list_to_multimap(
                link_map)  #  {pathway_id -> [ncbi_gene_ids]}
            # map kegg gene ids to ncbi_gene_ids.
            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])
            pathways = pathway_enrichment(kegg_sets,
                                          genes,
                                          reference,
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways, ncbi_gene_map
        def run_enrichment(genes, reference, progress=None):
            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            linkmap = kegg_api.link(self.org.org_code, "pathway")
            converted_ids = kegg_api.conv(self.org.org_code, 'ncbi-geneid')
            kegg_sets = relation_list_to_multimap(linkmap, {
                gene.upper(): ncbi.split(':')[-1]
                for ncbi, gene in converted_ids
            })

            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])

            pathways = pathway_enrichment(kegg_sets,
                                          genes,
                                          reference,
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways
Esempio n. 3
0
        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_ncbi_ids()

            # This is here just to keep widget working without any major changes.
            # map not needed, geneMatcher will not work on widget level.
            unique_genes = genes
            unique_ref_genes = dict([(gene, gene) for gene in set(reference)])

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict(
                (v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            linkmap = kegg_api.link(org.org_code, "pathway")
            converted_ids = kegg_api.conv(org.org_code, 'ncbi-geneid')
            kegg_sets = relation_list_to_multimap(
                linkmap,
                dict((gene.upper(), ncbi.split(':')[-1])
                     for ncbi, gene in converted_ids))

            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])

            pathways = pathway_enrichment(kegg_sets,
                                          unique_genes.values(),
                                          unique_ref_genes.keys(),
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways, org, unique_genes, unique_ref_genes
    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.org.org_code

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = {s[-5:] for s in self.pathways.keys()}

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return [koEntry] + reduce(
                        lambda li, c: li + _walkCollect(c),
                        [child for child in koEntry.entries], [])
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c),
                               [child for child in koEntry.entries], [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2,
                                [_walkCollect(c) for c in self.koOrthology],
                                [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(
                        2, "%i of %i" % (len(genes), len(self.input_genes)))
                    item.setText(3, "%i of %i" % (ref, len(self.ref_genes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways = sorted(pathways, key=lambda item: item[1][1])

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2,
                             "%i of %i" % (len(genes), len(self.input_genes)))
                item.setText(3, "%i of %i" % (ref, len(self.ref_genes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(
                index, QItemSelectionModel.ClearAndSelect)