def _start_progerss_task(self): func = partial( self._dataset_progress, concurrent.methodinvoke(self, "set_progress", (float, ))) progress_task = concurrent.Task(function=func) progress_task.exceptionReady.connect(self._dataset_progress_exception) self._executor.submit(progress_task)
def UpdatePathwayView(self): items = self.listView.selectedItems() if len(items) > 0: item = items[0] else: item = None self.commit() item = item or self.bestPValueItem if not item or not item.pathway_id: self.pathwayView.SetPathway(None) return def get_kgml_and_image(pathway_id): """Return an initialized KEGGPathway with pre-cached data""" p = kegg.KEGGPathway(pathway_id) p._get_kgml() # makes sure the kgml file is downloaded p._get_image_filename() # makes sure the image is downloaded return (pathway_id, p) self.setEnabled(False) self._pathwayTask = concurrent.Task( function=lambda: get_kgml_and_image(item.pathway_id)) self._pathwayTask.finished.connect(self._onPathwayTaskFinshed) self._executor.submit(self._pathwayTask)
def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even allow this to be executed if # data has no genes try: self.__get_input_genes() self.input_genes = set(self.input_genes) except ValueError: self.error(0, "Cannot extract gene names from input.") self.information(1) self.org = kegg.KEGGOrganism(kegg.from_taxid(self.tax_id)) if self.useReference and self.ref_data: self.__get_ref_genes() self.ref_genes = set(self.ref_genes) else: self.ref_genes = self.org.get_ncbi_ids() def run_enrichment(genes, reference, progress=None): # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. kegg_api = kegg.api.CachedKeggApi() linkmap = kegg_api.link(self.org.org_code, "pathway") converted_ids = kegg_api.conv(self.org.org_code, 'ncbi-geneid') kegg_sets = relation_list_to_multimap( linkmap, {gene.upper(): ncbi.split(':')[-1] for ncbi, gene in converted_ids} ) kegg_sets = geneset.GeneSets( sets=[geneset.GeneSet(gs_id=ddi, genes=set(genes)) for ddi, genes in kegg_sets.items()] ) pathways = pathway_enrichment(kegg_sets, genes, reference, callback=progress) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress) return pathways self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float,)) self._enrichTask = concurrent.Task(function=lambda: run_enrichment(self.input_genes, self.ref_genes, progress)) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask)
def setSelectedMart(self): self.mart = self.marts[self.selectedDatabase] self.error(0) self.setEnabled(False) self._task = task = concurrent.Task(function=self.mart.datasets) task.resultReady.connect(self.setBioMartDatasets) task.exceptionReady.connect(self._handleException) self._executor.submit(task)
def _setServiceUrl(self): service = self.serviceCombo.itemData(self.serviceCombo.currentIndex()) if service is not None: self.selectedService = service self._task = task = concurrent.Task( function=partial(self._get_registry, url=service)) task.resultReady.connect(self.setBioMartRegistry) task.exceptionReady.connect(self._handleException) self._executor.submit(task)
def fetch_indicators(self): """Trigger a background job for fetching a new indicator list.""" self._main_widget.setBlocking(True) self._main_widget.setEnabled(False) func = partial( self._fetch_indicators, concurrent.methodinvoke(self._main_widget, "set_progress", (float, ))) self._fetch_task = concurrent.Task(function=func) self._fetch_task.finished.connect(self._fetch_indicators_finished) self._fetch_task.exceptionReady.connect(self._init_exception) self._executor.submit(self._fetch_task)
def commit(self): pageconf = self.attributePagesTabWidget.currentWidget().widget() format = pageconf.outFormats self.error(100) if not self.hasBiopython and format.lower() == "fasta": self.error(100, "Cannot parse FASTA format") return query = pageconf.query() bydatasets = defaultdict(lambda: ([], [])) for conftype, tree, val in query: dataset = self.dataset if conftype == "Attribute": bydatasets[dataset][0].append(tree.internalName) elif conftype == "Filter": bydatasets[dataset][1].append((tree.internalName, val)) if self.SHOW_FILTERS: pageconf = self.filterPagesTabWidget.currentWidget().widget() query = pageconf.query() for conftype, tree, val in query: dataset = self.dataset if conftype == "Attribute": bydatasets[dataset][0].append(tree.internalName) elif conftype == "Filter": bydatasets[dataset][1].append((tree.internalName, val)) query = self.registry.query( format="TSV" if "tsv" in format.lower() else format.upper(), uniqueRows=self.uniqueRows, virtualSchema=dataset.virtualSchema, serverVirtualSchema=dataset.serverVirtualSchema ) for dataset, (attributes, filters) in bydatasets.items(): query.set_dataset(dataset if dataset else self.dataset) for attr in attributes: query.add_attribute(attr) for filter, value in filters: query.add_filter(filter, value) self.error(0) self.setEnabled(False) self._task = task = concurrent.Task(function=query.get_table) task.resultReady.connect(self.dataReady) task.exceptionReady.connect(self._handleException) self._executor.submit(task)
def commit(self): """Fetch the climate data and send a new orange table.""" logger.debug("commit data") self.setEnabled(False) self._set_progress_flag = True func = partial( self._fetch_dataset, concurrent.methodinvoke(self, "set_progress", (float, ))) self._fetch_task = concurrent.Task(function=func) self._fetch_task.finished.connect(self._fetch_dataset_finished) self._fetch_task.exceptionReady.connect(self._fetch_dataset_exception) self._executor.submit(self._fetch_task)
def runNormalizationAsync(self): """ Run MA centering and z_score estimation in a separate thread """ self.error(0) self.progressBarInit(processEvents=None) self.progressBarSet(0.0, processEvents=None) G, R = self.getMerged() center_method = self.CENTER_METHODS[self.selectedCenterMethod][1] use_lowess = self.selectedCenterMethod in [1, 2] @withexcepthook def run(progressCallback=lambda value: None): if use_lowess: Gc, Rc = center_method( G, R, f=2. / 3., iter=1, progressCallback=lambda val: progressCallback(val / 2)) else: Gc, Rc = center_method(G, R) progressCallback(50) z_scores = expression.MA_zscore( Gc, Rc, 1. / 3., progressCallback=lambda val: progressCallback(50 + val / 2)) return Gc, Rc, z_scores self.progressDiscard = ProgressBarDiscard(self, self) progress = concurrent.methodinvoke(self.progressDiscard, "progressBarSet", (float, )) self._task = concurrent.Task(function=partial(run, progress)) self._task.resultReady.connect(self.onResultsReady) self._task.exceptionReady.connect(self.onException) self.setEnabled(False) self.setBlocking(True) self._executor.submit(self._task)
def setSelectedDataset(self): self.dataset = self.datasets[self.selectedDataset] self.error(0) self.setEnabled(False) def get_configuration(dataset): connection = dataset.connection stream = connection.configuration( dataset=dataset.internalName, virtualSchema=dataset.serverVirtualSchema) response = stream.read() return response self._task = task = concurrent.Task( function=partial(get_configuration, self.dataset)) task.resultReady.connect(self.setBioMartConfiguration) task.exceptionReady.connect(self._handleException) self._executor.submit(task)
def __init__(self, parent=None): super().__init__(parent) self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox( self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView, ) gui.checkBox( self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform, ) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button( box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False, ) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget(allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None self.ref_data = None self.ref_genes = [] self.ref_tax_id = None self.ref_use_attr_names = None self.ref_gene_id_attribute = None self.ref_gene_id_column = None self.pathways = {} self.ncbi_gene_map = [] self.org = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float, )) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task)
def __init__(self, parent=None): super().__init__(parent) self.selectedDatabase = 0 self.uniqueRows = True gui.button(gui.widgetBox(self.controlArea, "Cache", addSpace=True), self, "Clear cache", tooltip="Clear saved query results", callback=self.clearCache) self.serviceindex = 0 self.serviceCombo = gui.comboBox( self.controlArea, self, "serviceindex", "Mart Service", callback=self._setServiceUrl ) for name, url in MartServices: self.serviceCombo.addItem(name, userData=url) idx = self.serviceCombo.findData(self.selectedService, Qt.UserRole) self.serviceCombo.setCurrentIndex(idx) # self.selectedService = self.serviceCombo.itemData(self.serviceCombo.currentItem()) self.martsCombo = gui.comboBox( self.controlArea, self, "selectedDatabase", "Database", callback=self.setSelectedMart, addSpace=True) self.martsCombo.setMaximumWidth(250) self.datasetsCombo = gui.comboBox( self.controlArea, self, "selectedDataset", "Dataset", callback=self.setSelectedDataset, addSpace=True) self.datasetsCombo.setMaximumWidth(250) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, "Results") gui.checkBox( box, self, "uniqueRows", "Unique results only", tooltip="Return unique results only.",) self.commitButton = gui.button( box, self, "Get Results", callback=self.commit, tooltip="Query the BioMart server and output the results", autoDefault=True) self.commitButton.setEnabled(False) self.mainWidget = gui.widgetBox( self.mainArea, orientation=QStackedLayout()) self.mainTab = QTabWidget() self.mainWidget.layout().addWidget(self.mainTab) self.attributesConfigurationBox = gui.createTabPage(self.mainTab, "Attributes") if self.SHOW_FILTERS: # ?? self.filtersConfigurationBox = gui.createTabPage(self.mainTab, "Filters") self.error(0) self.setEnabled(False) self._task = None self._executor = concurrent.ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) service = self.selectedService self._task = task = concurrent.Task( function=partial(self._get_registry, url=service)) task.resultReady.connect(self.setBioMartRegistry) task.exceptionReady.connect(self._handleException) self._executor.submit(task) self._setServiceUrl() self._afterInitQueue = [] try: from Bio import SeqIO self.hasBiopython = True except ImportError: self.warning(100, "Biopython package not found.\nTo retrieve FASTA sequence data from BioMart install Biopython.") self.hasBiopython = False
def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even allow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information(0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames \ and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information(1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance.") org_code = self.SelectedOrganismCode() from orangecontrib.bioinformatics.ncbi.gene import GeneMatcher gm = GeneMatcher(kegg.to_taxid(org_code)) gm.genes = genes gm.run_matcher() mapped_genes = {gene: str(ncbi_id) for gene, ncbi_id in gm.map_input_to_ncbi().items()} def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_ncbi_ids() # This is here just to keep widget working without any major changes. # map not needed, geneMatcher will not work on widget level. unique_genes = genes unique_ref_genes = dict([(gene, gene) for gene in set(reference)]) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict((v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. kegg_api = kegg.api.CachedKeggApi() linkmap = kegg_api.link(org.org_code, "pathway") converted_ids = kegg_api.conv(org.org_code, 'ncbi-geneid') kegg_sets = relation_list_to_multimap(linkmap, dict((gene.upper(), ncbi.split(':')[-1]) for ncbi, gene in converted_ids)) kegg_sets = geneset.GeneSets(input=kegg_sets) pathways = pathway_enrichment( kegg_sets, unique_genes.values(), unique_ref_genes.keys(), callback=progress ) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache( pathways.keys(), progress_callback=progress ) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float,)) self._enrichTask = concurrent.Task( function=lambda: run_enrichment(org_code, mapped_genes, reference, progress) ) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask)
def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even alow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information( 0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames \ and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information( 1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance.") org_code = self.SelectedOrganismCode() def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_genes() # Map 'genes' and 'reference' sets to unique KEGG identifiers unique_genes, _, _ = org.get_unique_gene_ids(set(genes)) unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference)) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict( (v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. # Ensure we are using the latest genesets # TODO: ?? Is updating the index enough? serverfiles.update(geneset.sfdomain, "index.pck") kegg_gs_collections = geneset.collections( (("KEGG", "pathways"), taxid)) pathways = pathway_enrichment(kegg_gs_collections, unique_genes.keys(), unique_ref_genes.keys(), callback=progress) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float, )) self._enrichTask = concurrent.Task(function=lambda: run_enrichment( org_code, genes, reference, progress)) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask)