def commit(self): if self._task is not None: self.cancel() if not self._image_attributes or self._input_data is None: self.clear_outputs() return embedder = self.connect() _executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) self.cancel_button.setDisabled(False) self.cb_image_attr.setDisabled(True) self.cb_embedder.setDisabled(True) file_paths_attr = self._image_attributes[self.cb_image_attr_current_id] file_paths = self._input_data[:, file_paths_attr].metas.flatten() file_paths_mask = file_paths == file_paths_attr.Unknown file_paths_valid = file_paths[~file_paths_mask] ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size)) set_progress = qconcurrent.methodinvoke( self, "__progress_set", (float,)) def advance(success=True): if success: set_progress(next(ticks)) def cancel(): task.future.cancel() task.cancelled = True task.embedder.set_canceled(True) def run_embedding(): return embedder( self._input_data, col=file_paths_attr, image_processed_callback=advance) self.auto_commit_widget.setDisabled(True) self.progressBarInit() self.progressBarSet(0.0) self.setBlocking(True) f = _executor.submit(run_embedding) f.add_done_callback( qconcurrent.methodinvoke(self, "__set_results", (object,))) task = self._task = namespace( file_paths_mask=file_paths_mask, file_paths_valid=file_paths_valid, file_paths=file_paths, embedder=embedder, cancelled=False, cancel=cancel, future=f, ) self._log.debug("Starting embedding task for %i images", file_paths.size) return
def _thread_func(self): _thread_tick = methodinvoke(self, "_thread_tick", ()) _thread_trigger = methodinvoke(self, "_thread_trigger", ()) while True: _thread_tick(self) time.sleep(1) self.current = self.current + 1 if self.current >= self.delay: _thread_trigger(self) self._trigger_value = not self._trigger_value self.current = 0
def updateInfoItems(self): self.warning(0) if self.data is None: return if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = ("NCBI Info", ncbi_info) self.error(0) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ( )))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted)
def score_genes(self, **kwargs): """ Run gene enrichment. :param design: :param data_x: :param rows_by_cluster: :param method: Note: We do not apply filter nor notify view that data is changed. This is done after filters """ if self._task is not None: # First make sure any pending tasks are cancelled. self.cancel() assert self._task is None progress_advance = methodinvoke(self, "progress_advance", (bool,)) def callback(): if self._task.cancelled: raise KeyboardInterrupt() progress_advance(self._task.cancelled) self.parent.progress_bar = ProgressBar(self.parent, iterations=len(self.get_rows())) f = partial(self._score_genes, callback=callback, **kwargs) self._task = Task() self._task.future = self._executor.submit(f) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self._end_task)
def _start_progerss_task(self): func = partial( self._dataset_progress, concurrent.methodinvoke(self, "set_progress", (float, ))) progress_task = concurrent.Task(function=func) progress_task.exceptionReady.connect(self._dataset_progress_exception) self._executor.submit(progress_task)
def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = GDS(gds_id) data = gds.get_data(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask)
def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even allow this to be executed if # data has no genes try: self.__get_input_genes() self.input_genes = set(self.input_genes) except ValueError: self.error(0, "Cannot extract gene names from input.") self.information(1) self.org = kegg.KEGGOrganism(kegg.from_taxid(self.tax_id)) if self.useReference and self.ref_data: self.__get_ref_genes() self.ref_genes = set(self.ref_genes) else: self.ref_genes = self.org.get_ncbi_ids() def run_enrichment(genes, reference, progress=None): # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. kegg_api = kegg.api.CachedKeggApi() linkmap = kegg_api.link(self.org.org_code, "pathway") converted_ids = kegg_api.conv(self.org.org_code, 'ncbi-geneid') kegg_sets = relation_list_to_multimap( linkmap, {gene.upper(): ncbi.split(':')[-1] for ncbi, gene in converted_ids} ) kegg_sets = geneset.GeneSets( sets=[geneset.GeneSet(gs_id=ddi, genes=set(genes)) for ddi, genes in kegg_sets.items()] ) pathways = pathway_enrichment(kegg_sets, genes, reference, callback=progress) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress) return pathways self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float,)) self._enrichTask = concurrent.Task(function=lambda: run_enrichment(self.input_genes, self.ref_genes, progress)) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask)
def test_methodinvoke(self): executor = ThreadExecutor() state = [None, None] class StateSetter(QObject): @pyqtSlot(object) def set_state(self, value): state[0] = value state[1] = QThread.currentThread() def func(callback): callback(QThread.currentThread()) obj = StateSetter() f1 = executor.submit(func, methodinvoke(obj, "set_state", (object,))) f1.result() # So invoked method can be called from the event loop self.app.processEvents() self.assertIs(state[1], QThread.currentThread(), "set_state was called from the wrong thread") self.assertIsNot(state[0], QThread.currentThread(), "set_state was invoked in the main thread") executor.shutdown(wait=True)
def fetch_network(): geneinfo = geneinfo_f.result() ppidb = fetch_ppidb(source, db_taxid, progress) return get_gene_network(ppidb, geneinfo, db_taxid, query_genes, include_neighborhood=include_neighborhood, min_score=min_score, progress=methodinvoke(self, "set_progress", (float,)))
def test_methodinvoke(self): executor = ThreadExecutor() state = [None, None] class StateSetter(QObject): @pyqtSlot(object) def set_state(self, value): state[0] = value state[1] = QThread.currentThread() def func(callback): callback(QThread.currentThread()) obj = StateSetter() f1 = executor.submit(func, methodinvoke(obj, "set_state", (object, ))) f1.result() # So invoked method can be called from the event loop self.app.processEvents() self.assertIs( state[1], QThread.currentThread(), "set_state was called from the wrong thread", ) self.assertIsNot( state[0], QThread.currentThread(), "set_state was invoked in the main thread", ) executor.shutdown(wait=True)
def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial(info_getter, org, genes, advance=methodinvoke(self, "advance", ()))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted)
def _start_progerss_task(self): func = partial( self._dataset_progress, concurrent.methodinvoke(self, "set_progress", (float,)) ) progress_task = concurrent.Task(function=func) progress_task.exceptionReady.connect(self._dataset_progress_exception) self._executor.submit(progress_task)
def compute(self): if not self.chosen_X: self.Error.no_vars_selected() return if not self.chosen_y: self.Error.no_class_selected() return self.btn_compute.setEnabled(False) yvar = self.data.domain[self.chosen_y] def get_col(var, col): values = np.array(list(var.values) + [np.nan], dtype=object) pd.Categorical(col, list(var.values)) col = pd.Series(col).fillna(-1).astype(int) return values[col] X = np.column_stack([ get_col(var, self.data.get_column_view(var)[0]) for var in (self.data.domain[i] for i in self.chosen_X) ]) X = pd.DataFrame(X, columns=self.chosen_X) y = pd.Series(self.data.get_column_view(yvar)[0]) test, args, kwargs = None, (X, y), dict(min_count=self.min_count) if self.is_permutation: statistic = 'chi2' if yvar.is_discrete else self.TEST_STATISTICS[ self.test_statistic] test = perm_test kwargs.update(statistic=statistic, n_jobs=-2, callback=methodinvoke(self, "setProgressValue", (int, int))) else: if yvar.is_discrete: if len(yvar.values) > 2: test = chi2_test else: test = hyper_test args = (X, y.astype(bool)) else: test = { 'mean': t_test, 'variance': fligner_killeen_test, 'median': mannwhitneyu_test, 'minimum': gumbel_min_test, 'maximum': gumbel_max_test, }[self.test_statistic] self._task = task = self.Task() self.progressBarInit() task.future = self._executor.submit(test, *args, **kwargs) task.watcher = FutureWatcher(task.future) task.watcher.done.connect(self.on_computed)
def fetch_network(): geneinfo = geneinfo_f.result() ppidb = fetch_ppidb(source, db_taxid, progress) return get_gene_network(ppidb, geneinfo, db_taxid, query_genes, include_neighborhood=include_neighborhood, min_score=min_score, progress=methodinvoke( self, "set_progress", (float, )))
def download(self): if self._task is not None: self.cancel() return self.btn_connect.setEnabled(False) self.setCursor(Qt.BusyCursor) self.btn_download.setText(self.LABEL_CANCEL) # Remember selected tables layout = self.box_include_data.layout() tables = self.included_data = { w.text() for w in (layout.itemAt(i).widget() for i in range(layout.count())) if w.isChecked() } # Remeber selected rows rows = [mi.row() for mi in self.view.selectionModel().selectedRows(0)] self.selection = rows nodes = [ self.model[row][0] for row in self.model.mapToSourceRows(rows) ] self._task = task = self.Task() self.progressBarInit() set_progress = methodinvoke(self, "setProgressValue", (float, )) def func(): if not nodes: raise self.Cancelled def progress_advance(*_, _counter=0): if task.cancelled: raise self.Cancelled _counter += 1 set_progress(_counter / (len(tables) + 1)) df = getdf( tables, nodeid=nodes, start_time=self.date_from.textFromDateTime( self.date_from.dateTime()), end_time=self.date_to.textFromDateTime( self.date_to.dateTime()), freq=self.RESOLUTION[self.sample_resolution], interpolate=self.INTERPOLATION[self.sample_interpolation], callback=progress_advance) return df task.future = self._executor.submit(func) task.watcher = FutureWatcher(task.future) task.watcher.done.connect(self.download_complete)
def _update(self): if self._task is not None: # First make sure any pending tasks are cancelled. self.cancel() assert self._task is None if self.train_data is None and self.test_data is None: return # collect all learners for which results have not yet been computed if self.model is None: return # setup the task state self._task = task = Task() # The learning_curve[_with_test_data] also takes a callback function # to report the progress. We instrument this callback to both invoke # the appropriate slots on this widget for reporting the progress # (in a thread safe manner) and to implement cooperative cancellation. set_progress = methodinvoke(self, "setProgressValue", (float, )) def callback(finished, loss): # check if the task has been cancelled and raise an exception # from within. This 'strategy' can only be used with code that # properly cleans up after itself in the case of an exception # (does not leave any global locks, opened file descriptors, ...) if task.cancelled: raise KeyboardInterrupt() set_progress(finished * 100) self.output_results.append([self.index, loss]) self.index += 1 # self.Outputs.losses.send(Table.from_list(domain, loss_list)) self.progressBarInit() # Submit the evaluation function to the executor and fill in the # task with the resultant Future. # task.future = self._executor.submit(self.learn.fit_one_cycle(1)) fit_model = partial(train_model, self.model, self.epochs, self.train_data, self.test_data, self.device, self.criterion, self.optimizer, self.train_button, callback=callback) task.future = self._executor.submit(fit_model) # Setup the FutureWatcher to notify us of completion task.watcher = FutureWatcher(task.future) # by using FutureWatcher we ensure `_task_finished` slot will be # called from the main GUI thread by the Qt's event loop task.watcher.done.connect(self._task_finished)
def fetch_indicators(self): """Trigger a background job for fetching a new indicator list.""" self._main_widget.setBlocking(True) self._main_widget.setEnabled(False) func = partial( self._fetch_indicators, concurrent.methodinvoke(self._main_widget, "set_progress", (float, ))) self._fetch_task = concurrent.Task(function=func) self._fetch_task.finished.connect(self._fetch_indicators_finished) self._fetch_task.exceptionReady.connect(self._init_exception) self._executor.submit(self._fetch_task)
def commit(self): """Fetch the climate data and send a new orange table.""" logger.debug("commit data") self.setEnabled(False) self._set_progress_flag = True func = partial( self._fetch_dataset, concurrent.methodinvoke(self, "set_progress", (float, ))) self._fetch_task = concurrent.Task(function=func) self._fetch_task.finished.connect(self._fetch_dataset_finished) self._fetch_task.exceptionReady.connect(self._fetch_dataset_exception) self._executor.submit(self._fetch_task)
def fetch_indicators(self): """Trigger a background job for fetching a new indicator list.""" self._main_widget.setBlocking(True) self._main_widget.setEnabled(False) func = partial( self._fetch_indicators, concurrent.methodinvoke( self._main_widget, "set_progress", (float,)) ) self._fetch_task = concurrent.Task(function=func) self._fetch_task.finished.connect(self._fetch_indicators_finished) self._fetch_task.exceptionReady.connect(self._init_exception) self._executor.submit(self._fetch_task)
def RetrieveFilesList(self): self.retryButton.hide() self.warning(0) self.progress.setRange(0, 3) task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance"))) task.resultReady.connect(self.SetFilesList) task.exceptionReady.connect(self.HandleError) self.executor.submit(task) self.setEnabled(False)
def set_agent(self, agent): if agent is not None: agent.prepare_to_pickle() self.agent = deepcopy(agent) self.agent.initial_trained_episodes = agent.trained_episodes self.agent.initial_train_results = deepcopy(agent.train_results) self.agent.initial_memory = deepcopy(agent.memory) self.environment_id = self.agent.environment_id self.agent.train(self.episodes(), self.seconds(), self, methodinvoke(self, "on_finish"))
def commit(self): """Fetch the climate data and send a new orange table.""" logger.debug("commit data") self.setEnabled(False) self._set_progress_flag = True func = partial( self._fetch_dataset, concurrent.methodinvoke(self, "set_progress", (float,)) ) self._fetch_task = concurrent.Task(function=func) self._fetch_task.finished.connect(self._fetch_dataset_finished) self._fetch_task.exceptionReady.connect(self._fetch_dataset_exception) self._executor.submit(self._fetch_task)
def init_gene_sets(self, reference_genes=None): if self._task is not None: self.cancel() assert self._task is None self._task = Task() progress_advance = methodinvoke(self, "progress_advance") def callback(): if self._task.cancelled: raise KeyboardInterrupt() if self.progress_bar: progress_advance() if reference_genes is None: reference_genes = self.gene_sets_obj.genes() self.init_item_model() sets_to_display = self.get_hierarchies(only_selected=True) # save setting on selected hierarchies self.stored_selections = sets_to_display # save context self.closeContext() f = partial(self.set_items, self.gene_sets_obj, sets_to_display, set(self.input_genes), reference_genes, self.min_count if self.use_min_count else 1, callback=callback) progress_iterations = sum([ len(g_set) for hier, g_set in self.gene_sets_obj.map_hierarchy_to_sets().items() if hier in sets_to_display ]) self.progress_bar = ProgressBar(self, iterations=progress_iterations) self._task.future = self._executor.submit(f) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self._init_gene_sets_finished) self.openContext(self.tax_id)
def commit(self): include_neighborhood = self.include_neighborhood query_genes = self.query_genes() source = SOURCES[self.network_source] if source.score_filter: min_score = self.min_score assert source.name == "STRING" min_score = min_score * 1000 else: min_score = None taxid = self.taxid progress = methodinvoke(self, "advance") if self.geneinfo is None: self.geneinfo = self.executor.submit(fetch_ncbi_geneinfo, taxid, progress) geneinfo_f = self.geneinfo taxmap = source.tax_mapping db_taxid = taxmap.get(taxid, taxid) if db_taxid is None: raise ValueError("invalid taxid for this network") def fetch_network(): geneinfo = geneinfo_f.result() ppidb = fetch_ppidb(source, db_taxid, progress) return get_gene_network(ppidb, geneinfo, db_taxid, query_genes, include_neighborhood=include_neighborhood, min_score=min_score, progress=methodinvoke( self, "set_progress", (float, ))) self.nettask = Task(function=fetch_network) self.nettask.finished.connect(self._on_result_ready) self.executor.submit(self.nettask) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._invalidated = False self._update_info()
def runNormalizationAsync(self): """ Run MA centering and z_score estimation in a separate thread """ self.error(0) self.progressBarInit(processEvents=None) self.progressBarSet(0.0, processEvents=None) G, R = self.getMerged() center_method = self.CENTER_METHODS[self.selectedCenterMethod][1] use_lowess = self.selectedCenterMethod in [1, 2] @withexcepthook def run(progressCallback=lambda value: None): if use_lowess: Gc, Rc = center_method( G, R, f=2. / 3., iter=1, progressCallback=lambda val: progressCallback(val / 2)) else: Gc, Rc = center_method(G, R) progressCallback(50) z_scores = expression.MA_zscore( Gc, Rc, 1. / 3., progressCallback=lambda val: progressCallback(50 + val / 2)) return Gc, Rc, z_scores self.progressDiscard = ProgressBarDiscard(self, self) progress = concurrent.methodinvoke(self.progressDiscard, "progressBarSet", (float, )) self._task = concurrent.Task(function=partial(run, progress)) self._task.resultReady.connect(self.onResultsReady) self._task.exceptionReady.connect(self.onException) self.setEnabled(False) self.setBlocking(True) self._executor.submit(self._task)
def commit(self): include_neighborhood = self.include_neighborhood query_genes = self.query_genes() source = SOURCES[self.network_source] if source.score_filter: min_score = self.min_score assert source.name == "STRING" min_score = min_score * 1000 else: min_score = None taxid = self.taxid progress = methodinvoke(self, "advance") if self.geneinfo is None: self.geneinfo = self.executor.submit( fetch_ncbi_geneinfo, taxid, progress ) geneinfo_f = self.geneinfo taxmap = source.tax_mapping db_taxid = taxmap.get(taxid, taxid) if db_taxid is None: raise ValueError("invalid taxid for this network") def fetch_network(): geneinfo = geneinfo_f.result() ppidb = fetch_ppidb(source, db_taxid, progress) return get_gene_network(ppidb, geneinfo, db_taxid, query_genes, include_neighborhood=include_neighborhood, min_score=min_score, progress=methodinvoke(self, "set_progress", (float,))) self.nettask = Task(function=fetch_network) self.nettask.finished.connect(self._on_result_ready) self.executor.submit(self.nettask) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._invalidated = False self._update_info()
def runNormalizationAsync(self): """ Run MA centering and z_score estimation in a separate thread """ self.error(0) self.progressBarInit(processEvents=None) self.progressBarSet(0.0, processEvents=None) G, R = self.getMerged() center_method = self.CENTER_METHODS[self.selectedCenterMethod][1] use_lowess = self.selectedCenterMethod in [1, 2] @withexcepthook def run(progressCallback=lambda value: None): if use_lowess: Gc, Rc = center_method( G, R, f=2. / 3., iter=1, progressCallback=lambda val: progressCallback(val / 2)) else: Gc, Rc = center_method(G, R) progressCallback(50) z_scores = expression.MA_zscore( Gc, Rc, 1. / 3., progressCallback=lambda val: progressCallback(50 + val / 2)) return Gc, Rc, z_scores self.progressDiscard = ProgressBarDiscard(self, self) progress = concurrent.methodinvoke( self.progressDiscard, "progressBarSet", (float,)) self._task = concurrent.Task(function=partial(run, progress)) self._task.resultReady.connect(self.onResultsReady) self._task.exceptionReady.connect(self.onException) self.setEnabled(False) self.setBlocking(True) self._executor.submit(self._task)
def __init__( self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n") self.organisms = None self.organismBox = gui.widgetBox(self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox(self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox(box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox(self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial(taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()))) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task)
def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no attributes with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list(filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float,)) info = methodinvoke(self, "_setRunInfo", (str,)) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary()
def commit(self): if self._task is not None: self.cancel() if self._image_embedder is None: self._set_server_info(connected=False) return if not self._image_attributes or self._input_data is None: self.Outputs.embeddings.send(None) self.Outputs.skipped_images.send(None) return self._set_server_info(connected=True) self.cancel_button.setDisabled(False) self.cb_image_attr.setDisabled(True) self.cb_embedder.setDisabled(True) file_paths_attr = self._image_attributes[self.cb_image_attr_current_id] file_paths = self._input_data[:, file_paths_attr].metas.flatten() origin = file_paths_attr.attributes.get("origin", "") if urlparse(origin).scheme in ("http", "https", "ftp", "data") and \ origin[-1] != "/": origin += "/" assert file_paths_attr.is_string assert file_paths.dtype == np.dtype('O') file_paths_mask = file_paths == file_paths_attr.Unknown file_paths_valid = file_paths[~file_paths_mask] for i, a in enumerate(file_paths_valid): urlparts = urlparse(a) if urlparts.scheme not in ("http", "https", "ftp", "data"): if urlparse(origin).scheme in ("http", "https", "ftp", "data"): file_paths_valid[i] = urljoin(origin, a) else: file_paths_valid[i] = os.path.join(origin, a) ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size)) set_progress = qconcurrent.methodinvoke( self, "__progress_set", (float,)) def advance(success=True): if success: set_progress(next(ticks)) def cancel(): task.future.cancel() task.cancelled = True task.embedder.cancelled = True embedder = self._image_embedder def run_embedding(paths): return embedder( file_paths=paths, image_processed_callback=advance) self.auto_commit_widget.setDisabled(True) self.progressBarInit(processEvents=None) self.progressBarSet(0.0, processEvents=None) self.setBlocking(True) f = self._executor.submit(run_embedding, file_paths_valid) f.add_done_callback( qconcurrent.methodinvoke(self, "__set_results", (object,))) task = self._task = namespace( file_paths_mask=file_paths_mask, file_paths_valid=file_paths_valid, file_paths=file_paths, embedder=embedder, cancelled=False, cancel=cancel, future=f, ) self._log.debug("Starting embedding task for %i images", file_paths.size) return
def Enrichment(self): assert self.input_data is not None assert self.__state == State.Ready if not self.annotations.ontology: self.annotations.ontology = self.ontology self.error(1) self.warning([0, 1]) self.__get_input_genes() self.input_genes = set(self.input_genes) self.known_input_genes = self.annotations.get_genes_with_known_annotation(self.input_genes) # self.clusterGenes = clusterGenes = self.annotations.map_to_ncbi_id(self.input_genes).values() self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (len(self.input_genes), len(self.known_input_genes), 100.0*len(self.known_input_genes)/len(self.input_genes) if len(self.input_genes) else 0.0)) if not self.useReferenceDataset or self.ref_data is None: self.information(2) self.information(1) self.ref_genes = self.annotations.genes() self.ref_genes = set(self.ref_genes) elif self.ref_data is not None: self.__get_ref_genes() self.ref_genes = set(self.ref_genes) ref_count = len(self.ref_genes) if ref_count == 0: self.ref_genes = self.annotations.genes() self.referenceRadioBox.buttons[1].setText("Reference set") self.referenceRadioBox.buttons[1].setDisabled(True) self.information(2, "Unable to extract gene names from reference dataset. " "Using entire genome for reference") self.useReferenceDataset = 0 else: self.referenceRadioBox.buttons[1].setText("Reference set ({} genes)".format(ref_count)) self.referenceRadioBox.buttons[1].setDisabled(False) self.information(2) else: self.useReferenceDataset = 0 self.ref_genes = [] if not self.ref_genes: self.error(1, "No valid reference set") return {} evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) aspect = ['Process', 'Component', 'Function'][self.aspectIndex] self.progressBarInit(processEvents=False) self.setBlocking(True) self.__state = State.Running if self.input_genes: f = self._executor.submit( self.annotations.get_enriched_terms, self.input_genes, self.ref_genes, evidences, aspect=aspect, prob=self.probFunctions[self.probFunc], use_fdr=False, progress_callback=methodinvoke( self, "_progressBarSet", (float,)) ) fw = FutureWatcher(f, parent=self) fw.done.connect(self.__on_enrichment_done) fw.done.connect(fw.deleteLater) return else: f = Future() f.set_result({}) self.__on_enrichment_done(f)
def start(self): """ Start/execute the image indexing operation """ self.error() self.__invalidated = False if self.currentPath is None: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')".format( self.__pendingTask.startdir)) self.cancel() startdir = self.currentPath self.__setRuntimeState(State.Processing) report_progress = methodinvoke(self, "__onReportProgress", (object, )) task = ImportImages(report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=3) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_image_scan_task_interupt(): try: return task(startdir) except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_image_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished)
def update_scores(self): """Compute the scores and update the histogram. """ self.__cancel_pending() self.clear_plot() self.scores = None self.nulldist = None self.error(0) grp, split_selection = self.selected_split() if not self.data or grp is None: return _, side, test_type, score_func = self.Scores[self.score_index] def compute_scores(X, group_indices): arrays = [X[ind] for ind in group_indices] return score_func(*arrays, axis=0) def permute_indices(group_indices, random_state=None): assert all(ind.dtype.kind == "i" for ind in group_indices) assert all(ind.ndim == 1 for ind in group_indices) if random_state is None: random_state = np.random joined = np.hstack(group_indices) random_state.shuffle(joined) split_ind = np.cumsum([len(ind) for ind in group_indices]) return np.split(joined, split_ind[:-1]) if isinstance(grp, grouputils.RowGroup): axis = 0 else: axis = 1 if test_type == OWFeatureSelection.TwoSampleTest: G1 = grouputils.group_selection_mask( self.data, grp, split_selection) G2 = ~G1 indices = [np.flatnonzero(G1), np.flatnonzero(G2)] elif test_type == self.VarSampleTest: indices = [grouputils.group_selection_mask(self.data, grp, [i]) for i in range(len(grp.values))] indices = [np.flatnonzero(ind) for ind in indices] else: assert False if not all(np.count_nonzero(ind) > 0 for ind in indices): self.error(0, "Target labels most exclude/include at least one " "value.") self.scores = None self.nulldist = None self.update_data_info_label() return X = self.data.X if axis == 1: X = X.T # TODO: Check that each label has more than one measurement, # raise warning otherwise. def compute_scores_with_perm(X, indices, nperm=0, rstate=None, progress_advance=None): scores = compute_scores(X, indices, ) if progress_advance is not None: progress_advance() null_scores = [] if nperm > 0: if rstate is None: rstate = np.random.RandomState(0) for i in range(nperm): p_indices = permute_indices(indices, rstate) assert all(pind.shape == ind.shape for pind, ind in zip(indices, p_indices)) pscore = compute_scores(X, p_indices) assert pscore.shape == scores.shape null_scores.append(pscore) if progress_advance is not None: progress_advance() return scores, null_scores p_advance = concurrent.methodinvoke( self, "progressBarAdvance", (float,)) state = namespace(cancelled=False, advance=p_advance) def progress(): if state.cancelled: raise concurrent.CancelledError else: state.advance(100 / (nperm + 1)) self.progressBarInit() set_scores = concurrent.methodinvoke( self, "__set_score_results", (concurrent.Future,)) nperm = self.permutations_count if self.compute_null else 0 self.__scores_state = state self.__scores_future = self._executor.submit( compute_scores_with_perm, X, indices, nperm, progress_advance=progress) self.__scores_future.add_done_callback(set_scores)
def Enrichment(self): assert self.clusterDataset is not None assert self.__state == State.Ready if not self.annotations.ontology: self.annotations.ontology = self.ontology if isinstance(self.annotations.genematcher, gene.GMDirect): self.SetGeneMatcher() self.error(1) self.warning([0, 1]) if self.useAttrNames: clusterGenes = [v.name for v in self.clusterDataset.domain.attributes] self.information(0) elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs): geneAttr = self.candidateGeneAttrs[self.geneAttrIndex] clusterGenes = [str(ex[geneAttr]) for ex in self.clusterDataset if not numpy.isnan(ex[geneAttr])] if any("," in gene for gene in clusterGenes): self.information(0, "Separators detected in cluster gene names. Assuming multiple genes per example.") clusterGenes = reduce(operator.iadd, (genes.split(",") for genes in clusterGenes), []) else: self.information(0) else: self.error(1, "Failed to extract gene names from input dataset!") return {} genesSetCount = len(set(clusterGenes)) self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator(clusterGenes).values() self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (genesSetCount, len(clusterGenes), 100.0*len(clusterGenes)/genesSetCount if genesSetCount else 0.0)) referenceGenes = None if not self.useReferenceDataset or self.referenceDataset is None: self.information(2) self.information(1) referenceGenes = self.annotations.gene_names elif self.referenceDataset is not None: if self.useAttrNames: referenceGenes = [v.name for v in self.referenceDataset.domain.attributes] self.information(1) elif geneAttr in (self.referenceDataset.domain.variables + self.referenceDataset.domain.metas): referenceGenes = [str(ex[geneAttr]) for ex in self.referenceDataset if not numpy.isnan(ex[geneAttr])] if any("," in gene for gene in clusterGenes): self.information(1, "Separators detected in reference gene names. Assuming multiple genes per example.") referenceGenes = reduce(operator.iadd, (genes.split(",") for genes in referenceGenes), []) else: self.information(1) else: self.information(1) referenceGenes = None if referenceGenes is None: referenceGenes = list(self.annotations.gene_names) self.referenceRadioBox.buttons[1].setText("Reference set") self.referenceRadioBox.buttons[1].setDisabled(True) self.information(2, "Unable to extract gene names from reference dataset. Using entire genome for reference") self.useReferenceDataset = 0 else: refc = len(referenceGenes) referenceGenes = self.annotations.get_gene_names_translator(referenceGenes).values() self.referenceRadioBox.buttons[1].setText("Reference set (%i genes, %i matched)" % (refc, len(referenceGenes))) self.referenceRadioBox.buttons[1].setDisabled(False) self.information(2) else: self.useReferenceDataset = 0 if not referenceGenes: self.error(1, "No valid reference set") return {} self.referenceGenes = referenceGenes evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) aspect = ["P", "C", "F"][self.aspectIndex] self.progressBarInit(processEvents=False) self.setBlocking(True) self.__state = State.Running if clusterGenes: f = self._executor.submit( self.annotations.get_enriched_terms, clusterGenes, referenceGenes, evidences, aspect=aspect, prob=self.probFunctions[self.probFunc], use_fdr=False, progress_callback=methodinvoke( self, "_progressBarSet", (float,)) ) fw = FutureWatcher(f, parent=self) fw.done.connect(self.__on_enrichment_done) fw.done.connect(fw.deleteLater) return else: f = Future() f.set_result({}) self.__on_enrichment_done(f)
def update_scores(self): """Compute the scores and update the histogram. """ self.__cancel_pending() self.clear_plot() self.scores = None self.nulldist = None self.error(0) grp, split_selection = self.selected_split() if not self.data or grp is None: return _, side, test_type, score_func = self.Scores[self.score_index] def compute_scores(X, group_indices, warn=False): arrays = [X[ind] for ind in group_indices] ss = score_func(*arrays, axis=0) return ss[0] if isinstance(ss, tuple) and not warn else ss def permute_indices(group_indices, random_state=None): assert all(ind.dtype.kind == "i" for ind in group_indices) assert all(ind.ndim == 1 for ind in group_indices) if random_state is None: random_state = np.random joined = np.hstack(group_indices) random_state.shuffle(joined) split_ind = np.cumsum([len(ind) for ind in group_indices]) return np.split(joined, split_ind[:-1]) if isinstance(grp, grouputils.RowGroup): axis = 0 else: axis = 1 if test_type == OWFeatureSelection.TwoSampleTest: G1 = grouputils.group_selection_mask(self.data, grp, split_selection) G2 = ~G1 indices = [np.flatnonzero(G1), np.flatnonzero(G2)] elif test_type == self.VarSampleTest: indices = [ grouputils.group_selection_mask(self.data, grp, [i]) for i in range(len(grp.values)) ] indices = [np.flatnonzero(ind) for ind in indices] else: assert False if not all(np.count_nonzero(ind) > 0 for ind in indices): self.error( 0, "Target labels most exclude/include at least one " "value.") self.scores = None self.nulldist = None self.update_data_info_label() return X = self.data.X if axis == 1: X = X.T # TODO: Check that each label has more than one measurement, # raise warning otherwise. def compute_scores_with_perm(X, indices, nperm=0, rstate=None, progress_advance=None): warning = None scores = compute_scores(X, indices, warn=True) if isinstance(scores, tuple): scores, warning = scores if progress_advance is not None: progress_advance() null_scores = [] if nperm > 0: if rstate is None: rstate = np.random.RandomState(0) for i in range(nperm): p_indices = permute_indices(indices, rstate) assert all(pind.shape == ind.shape for pind, ind in zip(indices, p_indices)) pscore = compute_scores(X, p_indices) assert pscore.shape == scores.shape null_scores.append(pscore) if progress_advance is not None: progress_advance() return scores, null_scores, warning p_advance = concurrent.methodinvoke(self, "progressBarAdvance", (float, )) state = namespace(cancelled=False, advance=p_advance) def progress(): if state.cancelled: raise concurrent.CancelledError else: state.advance(100 / (nperm + 1)) self.progressBarInit() set_scores = concurrent.methodinvoke(self, "__set_score_results", (concurrent.Future, )) nperm = self.permutations_count if self.compute_null else 0 self.__scores_state = state self.__scores_future = self._executor.submit(compute_scores_with_perm, X, indices, nperm, progress_advance=progress) self.__scores_future.add_done_callback(set_scores)
def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even alow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information(0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames \ and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information(1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance.") org_code = self.SelectedOrganismCode() def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_genes() # Map 'genes' and 'reference' sets to unique KEGG identifiers unique_genes, _, _ = org.get_unique_gene_ids(set(genes)) unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference)) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict((v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. # Ensure we are using the latest genesets # TODO: ?? Is updating the index enough? serverfiles.update(geneset.sfdomain, "index.pck") kegg_gs_collections = geneset.collections( (("KEGG", "pathways"), taxid) ) pathways = pathway_enrichment( kegg_gs_collections, unique_genes.keys(), unique_ref_genes.keys(), callback=progress ) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache( pathways.keys(), progress_callback=progress ) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float,)) self._enrichTask = concurrent.Task( function=lambda: run_enrichment(org_code, genes, reference, progress) ) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask)
def __init__(self, parent=None): super().__init__(parent) self.organismCodes = [] self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") # Organism selection. box = gui.widgetBox(self.controlArea, "Organism") self.organismComboBox = gui.comboBox( box, self, "organismIndex", items=[], callback=self.Update, addSpace=True, tooltip="Select the organism of the input genes") # Selection of genes attribute box = gui.widgetBox(self.controlArea, "Gene attribute") self.geneAttrCandidates = itemmodels.VariableListModel(parent=self) self.geneAttrCombo = gui.comboBox( box, self, "geneAttrIndex", callback=self.Update) self.geneAttrCombo.setModel(self.geneAttrCandidates) gui.checkBox(box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update) self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView) gui.checkBox(self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button(box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.") gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged ) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget( allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction( "Select All", self, shortcut=QKeySequence.SelectAll ) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.refData = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float,)) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task)
def Enrichment(self): assert self.clusterDataset is not None assert self.__state == State.Ready if not self.annotations.ontology: self.annotations.ontology = self.ontology if isinstance(self.annotations.genematcher, gene.GMDirect): self.SetGeneMatcher() self.error(1) self.warning([0, 1]) if self.useAttrNames: clusterGenes = [ v.name for v in self.clusterDataset.domain.attributes ] self.information(0) elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs): geneAttr = self.candidateGeneAttrs[self.geneAttrIndex] clusterGenes = [ str(ex[geneAttr]) for ex in self.clusterDataset if not numpy.isnan(ex[geneAttr]) ] if any("," in gene for gene in clusterGenes): self.information( 0, "Separators detected in cluster gene names. Assuming multiple genes per example." ) clusterGenes = reduce(operator.iadd, (genes.split(",") for genes in clusterGenes), []) else: self.information(0) else: self.error(1, "Failed to extract gene names from input dataset!") return {} genesSetCount = len(set(clusterGenes)) self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator( clusterGenes).values() self.infoLabel.setText( "%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (genesSetCount, len(clusterGenes), 100.0 * len(clusterGenes) / genesSetCount if genesSetCount else 0.0)) referenceGenes = None if not self.useReferenceDataset or self.referenceDataset is None: self.information(2) self.information(1) referenceGenes = self.annotations.gene_names elif self.referenceDataset is not None: if self.useAttrNames: referenceGenes = [ v.name for v in self.referenceDataset.domain.attributes ] self.information(1) elif geneAttr in (self.referenceDataset.domain.variables + self.referenceDataset.domain.metas): referenceGenes = [ str(ex[geneAttr]) for ex in self.referenceDataset if not numpy.isnan(ex[geneAttr]) ] if any("," in gene for gene in clusterGenes): self.information( 1, "Separators detected in reference gene names. Assuming multiple genes per example." ) referenceGenes = reduce(operator.iadd, (genes.split(",") for genes in referenceGenes), []) else: self.information(1) else: self.information(1) referenceGenes = None if referenceGenes is None: referenceGenes = list(self.annotations.gene_names) self.referenceRadioBox.buttons[1].setText("Reference set") self.referenceRadioBox.buttons[1].setDisabled(True) self.information( 2, "Unable to extract gene names from reference dataset. Using entire genome for reference" ) self.useReferenceDataset = 0 else: refc = len(referenceGenes) referenceGenes = self.annotations.get_gene_names_translator( referenceGenes).values() self.referenceRadioBox.buttons[1].setText( "Reference set (%i genes, %i matched)" % (refc, len(referenceGenes))) self.referenceRadioBox.buttons[1].setDisabled(False) self.information(2) else: self.useReferenceDataset = 0 if not referenceGenes: self.error(1, "No valid reference set") return {} self.referenceGenes = referenceGenes evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) aspect = ["P", "C", "F"][self.aspectIndex] self.progressBarInit(processEvents=False) self.setBlocking(True) self.__state = State.Running if clusterGenes: f = self._executor.submit(self.annotations.get_enriched_terms, clusterGenes, referenceGenes, evidences, aspect=aspect, prob=self.probFunctions[self.probFunc], use_fdr=False, progress_callback=methodinvoke( self, "_progressBarSet", (float, ))) fw = FutureWatcher(f, parent=self) fw.done.connect(self.__on_enrichment_done) fw.done.connect(fw.deleteLater) return else: f = Future() f.set_result({}) self.__on_enrichment_done(f)
def callback(self): if self._task.cancelled: raise KeyboardInterrupt() if self.progress_bar: methodinvoke(self, "progress_advance")()
def _update(self): if self._task is not None: # First make sure any pending tasks are cancelled. self.cancel() assert self._task is None if self.data is None: return # collect all learners for which results have not yet been computed need_update = [(id, learner) for id, learner in self.learners.items() if self.results[id] is None] if not need_update: return # [end-snippet-5] # [start-snippet-6] learners = [learner for _, learner in need_update] # setup the learner evaluations as partial function capturing # the necessary arguments. if self.testdata is None: learning_curve_func = partial( learning_curve, learners, self.data, folds=self.folds, proportions=self.curvePoints, ) else: learning_curve_func = partial( learning_curve_with_test_data, learners, self.data, self.testdata, times=self.folds, proportions=self.curvePoints, ) # [end-snippet-6] # [start-snippet-7] # setup the task state self._task = task = Task() # The learning_curve[_with_test_data] also takes a callback function # to report the progress. We instrument this callback to both invoke # the appropriate slots on this widget for reporting the progress # (in a thread safe manner) and to implement cooperative cancellation. set_progress = methodinvoke(self, "setProgressValue", (float,)) def callback(finished): # check if the task has been cancelled and raise an exception # from within. This 'strategy' can only be used with code that # properly cleans up after itself in the case of an exception # (does not leave any global locks, opened file descriptors, ...) if task.cancelled: raise KeyboardInterrupt() set_progress(finished * 100) # capture the callback in the partial function learning_curve_func = partial(learning_curve_func, callback=callback) # [end-snippet-7] # [start-snippet-8] self.progressBarInit() # Submit the evaluation function to the executor and fill in the # task with the resultant Future. task.future = self._executor.submit(learning_curve_func) # Setup the FutureWatcher to notify us of completion task.watcher = FutureWatcher(task.future) # by using FutureWatcher we ensure `_task_finished` slot will be # called from the main GUI thread by the Qt's event loop task.watcher.done.connect(self._task_finished)
def apply(self): self.agent.train(self.episodes(), self.seconds(), self, methodinvoke(self, "on_finish"))
def commit(self): if self._task is not None: self.cancel() if self._image_embedder is None: self._set_server_info(connected=False) return if not self._image_attributes or self._input_data is None: self.Outputs.embeddings.send(None) self.Outputs.skipped_images.send(None) return self._set_server_info(connected=True) self.cancel_button.setDisabled(False) self.cb_image_attr.setDisabled(True) self.cb_embedder.setDisabled(True) file_paths_attr = self._image_attributes[self.cb_image_attr_current_id] file_paths = self._input_data[:, file_paths_attr].metas.flatten() origin = file_paths_attr.attributes.get("origin", "") if urlparse(origin).scheme in ("http", "https", "ftp", "data") and \ origin[-1] != "/": origin += "/" assert file_paths_attr.is_string assert file_paths.dtype == np.dtype('O') file_paths_mask = file_paths == file_paths_attr.Unknown file_paths_valid = file_paths[~file_paths_mask] for i, a in enumerate(file_paths_valid): urlparts = urlparse(a) if urlparts.scheme not in ("http", "https", "ftp", "data"): if urlparse(origin).scheme in ("http", "https", "ftp", "data"): file_paths_valid[i] = urljoin(origin, a) else: file_paths_valid[i] = os.path.join(origin, a) ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size)) set_progress = qconcurrent.methodinvoke( self, "__progress_set", (float,)) def advance(success=True): if success: set_progress(next(ticks)) def cancel(): task.future.cancel() task.cancelled = True task.embedder.set_canceled(True) embedder = self._image_embedder def run_embedding(paths): return embedder( file_paths=paths, image_processed_callback=advance) self.auto_commit_widget.setDisabled(True) self.progressBarInit(processEvents=None) self.progressBarSet(0.0, processEvents=None) self.setBlocking(True) f = self._executor.submit(run_embedding, file_paths_valid) f.add_done_callback( qconcurrent.methodinvoke(self, "__set_results", (object,))) task = self._task = namespace( file_paths_mask=file_paths_mask, file_paths_valid=file_paths_valid, file_paths=file_paths, embedder=embedder, cancelled=False, cancel=cancel, future=f, ) self._log.debug("Starting embedding task for %i images", file_paths.size) return
def start(self): """ Start/execute the image indexing operation """ self.error() self.__invalidated = False if self.currentPath is None: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')" .format(self.__pendingTask.startdir)) self.cancel() startdir = self.currentPath self.__setRuntimeState(State.Processing) report_progress = methodinvoke( self, "__onReportProgress", (object,)) task = ImageScan(startdir, report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=3) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_image_scan_task_interupt(): try: return task.run() except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_image_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished)
def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list( filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float, )) info = methodinvoke(self, "_setRunInfo", (str, )) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary()
def __init__(self, parent=None): super().__init__(parent) self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox( self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView, ) gui.checkBox( self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform, ) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button( box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False, ) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget(allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None self.ref_data = None self.ref_genes = [] self.ref_tax_id = None self.ref_use_attr_names = None self.ref_gene_id_attribute = None self.ref_gene_id_column = None self.pathways = {} self.ncbi_gene_map = [] self.org = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float, )) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task)
def __init__(self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n" ) self.organisms = None self.organismBox = gui.widgetBox( self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox( self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox( box, self, "gene_attr", "Gene atttibute", callback=self.updateInfoItems ) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox( self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView( self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial( taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()) ) ) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task)
def _update(self): if self._task is not None: # First make sure any pending tasks are cancelled. self.cancel() assert self._task is None if self.data is None: return # collect all learners for which results have not yet been computed need_update = [(id, learner) for id, learner in self.learners.items() if self.results[id] is None] if not need_update: return # [end-snippet-5] # [start-snippet-6] learners = [learner for _, learner in need_update] # setup the learner evaluations as partial function capturing # the necessary arguments. if self.testdata is None: learning_curve_func = partial( learning_curve, learners, self.data, folds=self.folds, proportions=self.curvePoints, ) else: learning_curve_func = partial( learning_curve_with_test_data, learners, self.data, self.testdata, times=self.folds, proportions=self.curvePoints, ) # [end-snippet-6] # [start-snippet-7] # setup the task state self._task = task = Task() # The learning_curve[_with_test_data] also takes a callback function # to report the progress. We instrument this callback to both invoke # the appropriate slots on this widget for reporting the progress # (in a thread safe manner) and to implement cooperative cancellation. set_progress = methodinvoke(self, "setProgressValue", (float, )) def callback(finished): # check if the task has been cancelled and raise an exception # from within. This 'strategy' can only be used with code that # properly cleans up after itself in the case of an exception # (does not leave any global locks, opened file descriptors, ...) if task.cancelled: raise KeyboardInterrupt() set_progress(finished * 100) # capture the callback in the partial function learning_curve_func = partial(learning_curve_func, callback=callback) # [end-snippet-7] # [start-snippet-8] self.progressBarInit() # Submit the evaluation function to the executor and fill in the # task with the resultant Future. task.future = self._executor.submit(learning_curve_func) # Setup the FutureWatcher to notify us of completion task.watcher = FutureWatcher(task.future) # by using FutureWatcher we ensure `_task_finished` slot will be # called from the main GUI thread by the Qt's event loop task.watcher.done.connect(self._task_finished)