def test_methodinvoke(self): executor = ThreadExecutor() state = [None, None] class StateSetter(QObject): @pyqtSlot(object) def set_state(self, value): state[0] = value state[1] = QThread.currentThread() def func(callback): callback(QThread.currentThread()) obj = StateSetter() f1 = executor.submit(func, methodinvoke(obj, "set_state", (object,))) f1.result() # So invoked method can be called from the event loop self.app.processEvents() self.assertIs(state[1], QThread.currentThread(), "set_state was called from the wrong thread") self.assertIsNot(state[0], QThread.currentThread(), "set_state was invoked in the main thread") executor.shutdown(wait=True)
def test_methodinvoke(self): executor = ThreadExecutor() state = [None, None] class StateSetter(QObject): @pyqtSlot(object) def set_state(self, value): state[0] = value state[1] = QThread.currentThread() def func(callback): callback(QThread.currentThread()) obj = StateSetter() f1 = executor.submit(func, methodinvoke(obj, "set_state", (object, ))) f1.result() # So invoked method can be called from the event loop self.app.processEvents() self.assertIs( state[1], QThread.currentThread(), "set_state was called from the wrong thread", ) self.assertIsNot( state[0], QThread.currentThread(), "set_state was invoked in the main thread", ) executor.shutdown(wait=True)
def test_executor(self): executor = ThreadExecutor() f = executor.submit(QThread.currentThread) self.assertIsNot(f.result(3), QThread.currentThread()) f = executor.submit(lambda: 1 / 0) with self.assertRaises(ZeroDivisionError): f.result() results = [] task = Task(function=QThread.currentThread) task.resultReady.connect(results.append, Qt.DirectConnection) f = executor.submit_task(task) self.assertIsNot(f.result(3), QThread.currentThread()) executor.shutdown()
def test_executor(self): executor = ThreadExecutor() f = executor.submit(QThread.currentThread) self.assertIsNot(f.result(3), QThread.currentThread()) f = executor.submit(lambda: 1 / 0) with self.assertRaises(ZeroDivisionError): f.result() results = [] task = Task(function=QThread.currentThread) task.resultReady.connect(results.append, Qt.DirectConnection) f = executor.submit_task(task) self.assertIsNot(f.result(3), QThread.currentThread()) executor.shutdown()
class OWImportSamples(OWWidget): name = "Import Samples" icon = "icons/import.svg" want_main_area = False resizing_enabled = False priority = 1 outputs = [("Data", Table)] username = settings.Setting('') password = settings.Setting('') selected_server = settings.Setting(0) combo_items = settings.Setting([]) def __init__(self): super().__init__() self.res = None self.data = None self._datatask = None self._executor = ThreadExecutor() """Choose server""" box = gui.widgetBox(self.controlArea, 'Server') box.setSizePolicy(Policy.Minimum, Policy.Fixed) self.servers = gui.comboBox(box, self, "selected_server", editable=True, items=self.combo_items, callback=self.on_server_changed) """set credentials""" box = gui.widgetBox(self.controlArea, 'Credentials') box.setSizePolicy(Policy.Minimum, Policy.Fixed) self.name_field = gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"""display info""" box = gui.vBox(self.controlArea, "Info") box.setSizePolicy(Policy.Minimum, Policy.Fixed) self.info = gui.widgetLabel(box, 'No data loaded.') gui.rubber(self.controlArea) self.auth_set() if self.username and self.password: self.connect() def _on_exception(self): self._update_info(error_msg='Error while downloading data...\n' 'Please check your connection.') def _update_info(self, error_msg=None): if not error_msg: if self._datatask is not None: if not self._datatask.future().done(): self.info.setText('Retrieving data...') self._handle_inputs(False) if self.data: self._handle_inputs(True) self.info.setText('Data ready: {} samples loaded.'.format( len(self.data))) else: self.info.setText(error_msg) def _handle_inputs(self, enable): self.name_field.setEnabled(enable) self.pass_field.setEnabled(enable) self.servers.setEnabled(enable) def _handle_styles(self, login=False, server=False, user=False, passwd=False): if login: self.name_field.setFocus() self.name_field.setStyleSheet(error_red) self.pass_field.setStyleSheet(error_red) elif server: self.servers.setFocus() self.servers.setStyleSheet(error_red) elif user: self.name_field.setFocus() self.name_field.setStyleSheet(error_red) elif passwd: self.pass_field.setFocus() self.pass_field.setStyleSheet(error_red) def _reset_styles(self): self.pass_field.setStyleSheet('') self.name_field.setStyleSheet('') self.servers.setStyleSheet('') def on_server_changed(self): if self.servers.itemText(self.selected_server) != '': if self.username and self.password: self.connect() elif not self.username: self._handle_styles(user=True) elif not self.password: self._handle_styles(passwd=True) else: self._handle_styles(server=True) def auth_set(self): self.pass_field.setDisabled(not self.username) self.pass_field.setFocus() def auth_changed(self): self.auth_set() if self.servers.itemText(self.selected_server) == '': self._handle_styles(server=True) else: self._reset_styles() self.connect() def commit(self): self.data = self._datatask.result() self._datatask = None self._update_info() if self.data: self.send("Data", to_orange_table(self.data)) def connect(self): self.res = None self.data = None if self.username and self.password: self._reset_styles() """Store widget settings (Login)""" self.combo_items = [ self.servers.itemText(i) for i in range(self.servers.count()) ] self.selected_server = self.servers.currentIndex() try: self.res = ResolweAPI( self.username, self.password, self.servers.itemText(self.selected_server)) except (ResolweCredentialsException, ResolweServerException, Exception) as e: error_name = type(e).__name__ if error_name == 'ResolweCredentialsException': self._update_info(error_msg=str(e)) self._handle_styles(login=True) elif error_name == 'ResolweServerException' or error_name == 'MissingSchema': self._update_info(error_msg=str(e)) self._handle_styles(server=True) else: self._update_info(error_msg=str(e)) if self.res: self._datatask = DownloadTask(self.res) self._datatask.finished.connect(self.commit) self._datatask.exception.connect(self._on_exception) self._executor.submit(self._datatask) self._update_info() def onDeleteWidget(self): super().onDeleteWidget() self._executor.shutdown(wait=False)
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/GeneInfo.svg" priority = 2010 inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data Subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__( self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n") self.organisms = None self.organismBox = gui.widgetBox(self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox(self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox(box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox(self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial(taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()))) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self.organisms = sorted( set([ name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo") ] + gene.NCBIGeneInfo.common_taxids())) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms]) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable))] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint(self.data, "genesinrows", self.useAttr) self.openContext(data) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ( )))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200)) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], [ "NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature" ], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Data Subset", None) def commit(self): if self.data is None: self.send("Data Subset", None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection))) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [ attr for attr in self.data.domain.attributes if isselected(attr.name) ] domain = Orange.data.Domain(attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.send("Data Subset", newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [ attr.str_val(v) for v in self.data.get_column_view(attr)[0] ] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # Add a gene info columns to the output headers = [ str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole)) for i in range(model.columnCount()) ] metas = [Orange.data.StringVariable(name) for name in headers] domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(metas)) newdata = self.data.from_table(domain, self.data)[indices] model_rows = [gene2row[gene] for _, gene in gene_col] for col, meta in zip(range(model.columnCount()), metas): col_data = [ str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows ] col_data = np.array(col_data, dtype=object, ndmin=2).T newdata[:, meta] = col_data if not len(newdata): newdata = None self.send("Data Subset", newdata) else: self.send("Data Subset", None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information(self, "No gene ids selected", "Please select some genes and try again.") return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox( box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox( box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=["Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference"], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QtGui.QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QtGui.QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QtGui.QLineEdit( self, placeholderText="Filter ...") self.filterCompleter = QtGui.QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QtGui.QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QtGui.QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QtGui.QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter(contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded( [("Taxonomy", "ncbi_taxonomy.tar.gz"), (geneset.sfdomain, "index.pck")] ) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor( parent=self, threadPool=QtCore.QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QtCore.QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable)] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint( data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key,) item = QtGui.QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get( (full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name,) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount())] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [m for m, use in zip(matchers, self.geneMatcherSettings) if use] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no attributes with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list(filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float,)) info = methodinvoke(self, "_setRunInfo", (str,)) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QtGui.QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QtGui.QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QtGui.QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels( ["Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment"]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item(), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QtGui.QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit ) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QtGui.QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView) ) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.error(0,) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def filterAnnotationsChartView(self, filterString=""): if self.__state & OWSetEnrichment.RunningEnrichment: return # TODO: Move filtering to a filter proxy model # TODO: Re-enable string search categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) # filterString = str(self.filterLineEdit.text()).lower() model = self.annotationsChartView.model() def ishidden(index): # Is item at index (row) hidden item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [model.item(i, 4).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden] fdrs = utils.stats.FDR(pvals) # update FDR for the selected collections and apply filtering rules itemsHidden = [] fdriter = iter(fdrs) for index, hidden in enumerate(hidemask): if not hidden: fdr = next(fdriter) pval = model.index(index, 4).data(Qt.UserRole) count = model.index(index, 2).data(Qt.ToolTipRole) hidden = (self.useMinCountFilter and count < self.minClusterCount) or \ (self.useMaxPValFilter and pval > self.maxPValue) or \ (self.useMaxFDRFilter and fdr > self.maxFDR) if not hidden: fdr_item = model.item(index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) self.annotationsChartView.setRowHidden( index, QModelIndex(), hidden) itemsHidden.append(hidden) if model.rowCount() and all(itemsHidden): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.annotationsChartView.model() rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (item.enrichment.query_mapped for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped] newdomain = Orange.data.Domain( mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWImportImages(widget.OWWidget): name = "Import Images" description = "Import images from a directory(s)" icon = "icons/ImportImages.svg" priority = 110 outputs = [("Data", Orange.data.Table)] #: list of recent paths recent_paths = settings.Setting([]) # type: List[RecentPath] currentPath = settings.Setting(None) want_main_area = False resizing_enabled = False Modality = Qt.ApplicationModal # Modality = Qt.WindowModal MaxRecentItems = 20 def __init__(self): super().__init__() #: widget's runtime state self.__state = State.NoState self._imageMeta = [] self._imageCategories = {} self.__invalidated = False self.__pendingTask = None vbox = gui.vBox(self.controlArea) hbox = gui.hBox(vbox) self.recent_cb = QComboBox( sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, minimumContentsLength=16, ) self.recent_cb.activated[int].connect(self.__onRecentActivated) icons = standard_icons(self) browseaction = QAction( "Open/Load Images", self, iconText="\N{HORIZONTAL ELLIPSIS}", icon=icons.dir_open_icon, toolTip="Select a directory from which to load the images" ) browseaction.triggered.connect(self.__runOpenDialog) reloadaction = QAction( "Reload", self, icon=icons.reload_icon, toolTip="Reload current image set" ) reloadaction.triggered.connect(self.reload) self.__actions = namespace( browse=browseaction, reload=reloadaction, ) browsebutton = QPushButton( browseaction.iconText(), icon=browseaction.icon(), toolTip=browseaction.toolTip(), clicked=browseaction.trigger ) reloadbutton = QPushButton( reloadaction.iconText(), icon=reloadaction.icon(), clicked=reloadaction.trigger, default=True, ) hbox.layout().addWidget(self.recent_cb) hbox.layout().addWidget(browsebutton) hbox.layout().addWidget(reloadbutton) self.addActions([browseaction, reloadaction]) reloadaction.changed.connect( lambda: reloadbutton.setEnabled(reloadaction.isEnabled()) ) box = gui.vBox(vbox, "Info") self.infostack = QStackedWidget() self.info_area = QLabel( text="No image set selected", wordWrap=True ) self.progress_widget = QProgressBar( minimum=0, maximum=0 ) self.cancel_button = QPushButton( "Cancel", icon=icons.cancel_icon, ) self.cancel_button.clicked.connect(self.cancel) w = QWidget() vlayout = QVBoxLayout() vlayout.setContentsMargins(0, 0, 0, 0) hlayout = QHBoxLayout() hlayout.setContentsMargins(0, 0, 0, 0) hlayout.addWidget(self.progress_widget) hlayout.addWidget(self.cancel_button) vlayout.addLayout(hlayout) self.pathlabel = TextLabel() self.pathlabel.setTextElideMode(Qt.ElideMiddle) self.pathlabel.setAttribute(Qt.WA_MacSmallSize) vlayout.addWidget(self.pathlabel) w.setLayout(vlayout) self.infostack.addWidget(self.info_area) self.infostack.addWidget(w) box.layout().addWidget(self.infostack) self.__initRecentItemsModel() self.__invalidated = True self.__executor = ThreadExecutor(self) QApplication.postEvent(self, QEvent(RuntimeEvent.Init)) def __initRecentItemsModel(self): if self.currentPath is not None and \ not os.path.isdir(self.currentPath): self.currentPath = None recent_paths = [] for item in self.recent_paths: if os.path.isdir(item.abspath): recent_paths.append(item) recent_paths = recent_paths[:OWImportImages.MaxRecentItems] recent_model = self.recent_cb.model() for pathitem in recent_paths: item = RecentPath_asqstandarditem(pathitem) recent_model.appendRow(item) self.recent_paths = recent_paths if self.currentPath is not None and \ os.path.isdir(self.currentPath) and self.recent_paths and \ os.path.samefile(self.currentPath, self.recent_paths[0].abspath): self.recent_cb.setCurrentIndex(0) else: self.currentPath = None self.recent_cb.setCurrentIndex(-1) self.__actions.reload.setEnabled(self.currentPath is not None) def customEvent(self, event): """Reimplemented.""" if event.type() == RuntimeEvent.Init: if self.__invalidated: try: self.start() finally: self.__invalidated = False super().customEvent(event) def __runOpenDialog(self): startdir = os.path.expanduser("~/") if self.recent_paths: startdir = self.recent_paths[0].abspath if OWImportImages.Modality == Qt.WindowModal: dlg = QFileDialog( self, "Select Top Level Directory", startdir, acceptMode=QFileDialog.AcceptOpen, modal=True, ) dlg.setFileMode(QFileDialog.Directory) dlg.setOption(QFileDialog.ShowDirsOnly) dlg.setDirectory(startdir) dlg.setAttribute(Qt.WA_DeleteOnClose) @dlg.accepted.connect def on_accepted(): dirpath = dlg.selectedFiles() if dirpath: self.setCurrentPath(dirpath[0]) self.start() dlg.open() else: dirpath = QFileDialog.getExistingDirectory( self, "Select Top Level Directory", startdir ) if dirpath: self.setCurrentPath(dirpath) self.start() def __onRecentActivated(self, index): item = self.recent_cb.itemData(index) if item is None: return assert isinstance(item, RecentPath) self.setCurrentPath(item.abspath) self.start() def __updateInfo(self): if self.__state == State.NoState: text = "No image set selected" elif self.__state == State.Processing: text = "Processing" elif self.__state == State.Done: nvalid = sum(imeta.isvalid for imeta in self._imageMeta) ncategories = len(self._imageCategories) if ncategories < 2: text = "{} images".format(nvalid) else: text = "{} images / {} categories".format(nvalid, ncategories) elif self.__state == State.Cancelled: text = "Cancelled" elif self.__state == State.Error: text = "Error state" else: assert False self.info_area.setText(text) if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) def setCurrentPath(self, path): """ Set the current root image path to path If the path does not exists or is not a directory the current path is left unchanged Parameters ---------- path : str New root import path. Returns ------- status : bool True if the current root import path was successfully changed to path. """ if self.currentPath is not None and path is not None and \ os.path.isdir(self.currentPath) and os.path.isdir(path) and \ os.path.samefile(self.currentPath, path): return True if not os.path.exists(path): warnings.warn("'{}' does not exist".format(path), UserWarning) return False elif not os.path.isdir(path): warnings.warn("'{}' is not a directory".format(path), UserWarning) return False newindex = self.addRecentPath(path) self.recent_cb.setCurrentIndex(newindex) if newindex >= 0: self.currentPath = path else: self.currentPath = None self.__actions.reload.setEnabled(self.currentPath is not None) if self.__state == State.Processing: self.cancel() return True def addRecentPath(self, path): """ Prepend a path entry to the list of recent paths If an entry with the same path already exists in the recent path list it is moved to the first place Parameters ---------- path : str """ existing = None for pathitem in self.recent_paths: if os.path.samefile(pathitem.abspath, path): existing = pathitem break model = self.recent_cb.model() if existing is not None: selected_index = self.recent_paths.index(existing) assert model.item(selected_index).data(Qt.UserRole) is existing self.recent_paths.remove(existing) row = model.takeRow(selected_index) self.recent_paths.insert(0, existing) model.insertRow(0, row) else: item = RecentPath(path, None, None) self.recent_paths.insert(0, item) model.insertRow(0, RecentPath_asqstandarditem(item)) return 0 def __setRuntimeState(self, state): assert state in State self.setBlocking(state == State.Processing) message = "" if state == State.Processing: assert self.__state in [State.Done, State.NoState, State.Error, State.Cancelled] message = "Processing" elif state == State.Done: assert self.__state == State.Processing elif state == State.Cancelled: assert self.__state == State.Processing message = "Cancelled" elif state == State.Error: message = "Error during processing" elif state == State.NoState: message = "" else: assert False self.__state = state if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) self.setStatusMessage(message) self.__updateInfo() def reload(self): """ Restart the image scan task """ if self.__state == State.Processing: self.cancel() self._imageMeta = [] self._imageCategories = {} self.start() def start(self): """ Start/execute the image indexing operation """ self.error() self.__invalidated = False if self.currentPath is None: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')" .format(self.__pendingTask.startdir)) self.cancel() startdir = self.currentPath self.__setRuntimeState(State.Processing) report_progress = methodinvoke( self, "__onReportProgress", (object,)) task = ImageScan(startdir, report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=3) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_image_scan_task_interupt(): try: return task.run() except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_image_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished) @Slot() def __onRunFinished(self): assert QThread.currentThread() is self.thread() assert self.__state == State.Processing assert self.__pendingTask is not None assert self.sender() is self.__pendingTask.watcher assert self.__pendingTask.future.done() task = self.__pendingTask self.__pendingTask = None try: image_meta = task.future.result() except Exception as err: sys.excepthook(*sys.exc_info()) state = State.Error image_meta = [] self.error(traceback.format_exc()) else: state = State.Done self.error() categories = {} for imeta in image_meta: # derive categories from the path relative to the starting dir dirname = os.path.dirname(imeta.path) relpath = os.path.relpath(dirname, task.startdir) categories[dirname] = relpath self._imageMeta = image_meta self._imageCategories = categories self.__setRuntimeState(state) self.commit() def cancel(self): """ Cancel current pending task (if any). """ if self.__state == State.Processing: assert self.__pendingTask is not None self.__pendingTask.cancel() self.__pendingTask = None self.__setRuntimeState(State.Cancelled) @Slot(object) def __onReportProgress(self, arg): # report on scan progress from a worker thread # arg must be a namespace(count: int, lastpath: str) assert QThread.currentThread() is self.thread() if self.__state == State.Processing: self.pathlabel.setText(prettyfypath(arg.lastpath)) def commit(self): """ Create and commit a Table from the collected image meta data. """ if self._imageMeta: categories = self._imageCategories if len(categories) > 1: cat_var = Orange.data.DiscreteVariable( "category", values=list(sorted(categories.values())) ) else: cat_var = None # Image name (file basename without the extension) imagename_var = Orange.data.StringVariable("image name") # Full fs path image_var = Orange.data.StringVariable("image") image_var.attributes["type"] = "image" # file size/width/height size_var = Orange.data.ContinuousVariable( "size", number_of_decimals=0) width_var = Orange.data.ContinuousVariable( "width", number_of_decimals=0) height_var = Orange.data.ContinuousVariable( "height", number_of_decimals=0) domain = Orange.data.Domain( [], [cat_var] if cat_var is not None else [], [imagename_var, image_var, size_var, width_var, height_var] ) cat_data = [] meta_data = [] for imgmeta in self._imageMeta: if imgmeta.isvalid: if cat_var is not None: category = categories.get(os.path.dirname(imgmeta.path)) cat_data.append([cat_var.to_val(category)]) else: cat_data.append([]) basename = os.path.basename(imgmeta.path) imgname, _ = os.path.splitext(basename) meta_data.append( [imgname, imgmeta.path, imgmeta.size, imgmeta.width, imgmeta.height] ) cat_data = numpy.array(cat_data, dtype=float) meta_data = numpy.array(meta_data, dtype=object) table = Orange.data.Table.from_numpy( domain, numpy.empty((len(cat_data), 0), dtype=float), cat_data, meta_data ) else: table = None self.send("Data", table) def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=True)
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/OWGeneInfo.svg" priority = 5 class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_genes = Output("Selected Genes", Orange.data.Table) data = Output("Data", Orange.data.Table) settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__( self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.map_input_to_ensembl = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n") self.organisms = None self.organismBox = gui.widgetBox(self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox(box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems) self.geneAttrComboBox.setEnabled(not self.useAttr) self.geneAttrCheckbox = gui.checkBox(box, self, "useAttr", "Use column names", callback=self.updateInfoItems) self.geneAttrCheckbox.toggled[bool].connect( self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setSortingEnabled(True) self.treeWidget.setSelectionMode(QTreeView.ExtendedSelection) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setItemDelegateForColumn( HEADER_SCHEMA['NCBI ID'], gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.setItemDelegateForColumn( HEADER_SCHEMA['Ensembl ID'], gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial(taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()))) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def _get_available_organisms(self): available_organism = sorted([(tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()], key=lambda x: x[1]) self.organisms = [tax_id[0] for tax_id in available_organism] self.organismComboBox.addItems( [tax_id[1] for tax_id in available_organism]) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self._get_available_organisms() self.organism_index = self.organisms.index(taxonomy.DEFAULT_ORGANISM) self.taxid = self.organisms[self.organism_index] self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] if self.data is not None: self.updateInfoItems() @Inputs.data def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = [ attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable)) ] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = str(self.data.attributes.get(TAX_ID, '')) self.useAttr = self.data.attributes.get(GENE_AS_ATTRIBUTE_NAME, self.useAttr) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) self.updateInfoItems() else: self.clear() def updateInfoItems(self): self.warning(0) if self.data is None: return if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = ("NCBI Info", ncbi_info) self.error(0) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ( )))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: self.map_input_to_ensembl, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo self.cells = cells = [] self.row2geneinfo = {} for i, (input_name, gi) in enumerate(geneinfo): if gi: row = [] for item in gi: row.append(item) # parse synonyms row[HEADER_SCHEMA['Synonyms']] = ','.join( row[HEADER_SCHEMA['Synonyms']]) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, list(HEADER_SCHEMA.keys()), None) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(len(HEADER_SCHEMA)): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200)) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) if self.useAttr: new_data = self.data.from_table(self.data.domain, self.data) for gene_var in new_data.domain.attributes: gene_var.attributes['Ensembl ID'] = str( self.map_input_to_ensembl[gene_var.name]) self.Outputs.data.send(new_data) elif self.attributes: ensembl_ids = [] for gene_name in self.data.get_column_view( self.attributes[self.gene_attr])[0]: if gene_name and gene_name in self.map_input_to_ensembl: ensembl_ids.append(self.map_input_to_ensembl[gene_name]) else: ensembl_ids.append('') data_with_ensembl = append_columns( self.data, metas=[(Orange.data.StringVariable('Ensembl ID'), ensembl_ids) ]) self.Outputs.data.send(data_with_ensembl) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], [ "NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature" ], self.treeWidget)) self.geneAttrComboBox.clear() self.Outputs.selected_genes.send(None) def commit(self): if self.data is None: self.Outputs.selected_genes.send(None) self.Outputs.data.send(None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection))) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if selectedIds: if self.useAttr: attrs = [ attr for attr in self.data.domain.attributes if isselected(attr.name) ] domain = Orange.data.Domain(attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.Outputs.selected_genes.send(newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [ attr.str_val(v) for v in self.data.get_column_view(attr)[0] ] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # SELECTED GENES OUTPUT selected_genes_metas = [ Orange.data.StringVariable(name) for name in gene.GENE_INFO_HEADER_LABELS ] selected_genes_domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(selected_genes_metas)) selected_genes_data = self.data.from_table( selected_genes_domain, self.data)[indices] model_rows = [gene2row[gene_name] for _, gene_name in gene_col] for col, meta in zip(range(model.columnCount()), selected_genes_metas): col_data = [ str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows ] col_data = np.array(col_data, dtype=object, ndmin=2).T selected_genes_data[:, meta] = col_data if not len(selected_genes_data): selected_genes_data = None self.Outputs.selected_genes.send(selected_genes_data) else: self.Outputs.selected_genes.send(None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWImportImages(widget.OWWidget): name = "Import Images" description = "Import images from a directory(s)" icon = "icons/ImportImages.svg" priority = 110 outputs = [("Data", Orange.data.Table)] #: list of recent paths recent_paths = settings.Setting([]) # type: List[RecentPath] currentPath = settings.Setting(None) want_main_area = False resizing_enabled = False Modality = Qt.ApplicationModal # Modality = Qt.WindowModal MaxRecentItems = 20 def __init__(self): super().__init__() #: widget's runtime state self.__state = State.NoState self.data = None self._n_image_categories = 0 self._n_image_data = 0 self._n_skipped = 0 self.__invalidated = False self.__pendingTask = None vbox = gui.vBox(self.controlArea) hbox = gui.hBox(vbox) self.recent_cb = QComboBox( sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, minimumContentsLength=16, acceptDrops=True) self.recent_cb.installEventFilter(self) self.recent_cb.activated[int].connect(self.__onRecentActivated) icons = standard_icons(self) browseaction = QAction( "Open/Load Images", self, iconText="\N{HORIZONTAL ELLIPSIS}", icon=icons.dir_open_icon, toolTip="Select a directory from which to load the images") browseaction.triggered.connect(self.__runOpenDialog) reloadaction = QAction("Reload", self, icon=icons.reload_icon, toolTip="Reload current image set") reloadaction.triggered.connect(self.reload) self.__actions = namespace( browse=browseaction, reload=reloadaction, ) browsebutton = QPushButton(browseaction.iconText(), icon=browseaction.icon(), toolTip=browseaction.toolTip(), clicked=browseaction.trigger) reloadbutton = QPushButton( reloadaction.iconText(), icon=reloadaction.icon(), clicked=reloadaction.trigger, default=True, ) hbox.layout().addWidget(self.recent_cb) hbox.layout().addWidget(browsebutton) hbox.layout().addWidget(reloadbutton) self.addActions([browseaction, reloadaction]) reloadaction.changed.connect( lambda: reloadbutton.setEnabled(reloadaction.isEnabled())) box = gui.vBox(vbox, "Info") self.infostack = QStackedWidget() self.info_area = QLabel(text="No image set selected", wordWrap=True) self.progress_widget = QProgressBar(minimum=0, maximum=0) self.cancel_button = QPushButton( "Cancel", icon=icons.cancel_icon, ) self.cancel_button.clicked.connect(self.cancel) w = QWidget() vlayout = QVBoxLayout() vlayout.setContentsMargins(0, 0, 0, 0) hlayout = QHBoxLayout() hlayout.setContentsMargins(0, 0, 0, 0) hlayout.addWidget(self.progress_widget) hlayout.addWidget(self.cancel_button) vlayout.addLayout(hlayout) self.pathlabel = TextLabel() self.pathlabel.setTextElideMode(Qt.ElideMiddle) self.pathlabel.setAttribute(Qt.WA_MacSmallSize) vlayout.addWidget(self.pathlabel) w.setLayout(vlayout) self.infostack.addWidget(self.info_area) self.infostack.addWidget(w) box.layout().addWidget(self.infostack) self.__initRecentItemsModel() self.__invalidated = True self.__executor = ThreadExecutor(self) QApplication.postEvent(self, QEvent(RuntimeEvent.Init)) def __initRecentItemsModel(self): if self.currentPath is not None and \ not os.path.isdir(self.currentPath): self.currentPath = None recent_paths = [] for item in self.recent_paths: if os.path.isdir(item.abspath): recent_paths.append(item) recent_paths = recent_paths[:OWImportImages.MaxRecentItems] recent_model = self.recent_cb.model() for pathitem in recent_paths: item = RecentPath_asqstandarditem(pathitem) recent_model.appendRow(item) self.recent_paths = recent_paths if self.currentPath is not None and \ os.path.isdir(self.currentPath) and self.recent_paths and \ os.path.samefile(self.currentPath, self.recent_paths[0].abspath): self.recent_cb.setCurrentIndex(0) else: self.currentPath = None self.recent_cb.setCurrentIndex(-1) self.__actions.reload.setEnabled(self.currentPath is not None) def customEvent(self, event): """Reimplemented.""" if event.type() == RuntimeEvent.Init: if self.__invalidated: try: self.start() finally: self.__invalidated = False super().customEvent(event) def __runOpenDialog(self): startdir = os.path.expanduser("~/") if self.recent_paths: startdir = os.path.dirname(self.recent_paths[0].abspath) if OWImportImages.Modality == Qt.WindowModal: dlg = QFileDialog( self, "Select Top Level Directory", startdir, acceptMode=QFileDialog.AcceptOpen, modal=True, ) dlg.setFileMode(QFileDialog.Directory) dlg.setOption(QFileDialog.ShowDirsOnly) dlg.setDirectory(startdir) dlg.setAttribute(Qt.WA_DeleteOnClose) @dlg.accepted.connect def on_accepted(): dirpath = dlg.selectedFiles() if dirpath: self.setCurrentPath(dirpath[0]) self.start() dlg.open() else: dirpath = QFileDialog.getExistingDirectory( self, "Select Top Level Directory", startdir) if dirpath: self.setCurrentPath(dirpath) self.start() def __onRecentActivated(self, index): item = self.recent_cb.itemData(index) if item is None: return assert isinstance(item, RecentPath) self.setCurrentPath(item.abspath) self.start() def __updateInfo(self): if self.__state == State.NoState: text = "No image set selected" elif self.__state == State.Processing: text = "Processing" elif self.__state == State.Done: nvalid = self._n_image_data ncategories = self._n_image_categories n_skipped = self._n_skipped if ncategories < 2: text = "{} image{}".format(nvalid, "s" if nvalid != 1 else "") else: text = "{} images / {} categories".format(nvalid, ncategories) if n_skipped > 0: text = text + ", {} skipped".format(n_skipped) elif self.__state == State.Cancelled: text = "Cancelled" elif self.__state == State.Error: text = "Error state" else: assert False self.info_area.setText(text) if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) def setCurrentPath(self, path): """ Set the current root image path to path If the path does not exists or is not a directory the current path is left unchanged Parameters ---------- path : str New root import path. Returns ------- status : bool True if the current root import path was successfully changed to path. """ if self.currentPath is not None and path is not None and \ os.path.isdir(self.currentPath) and os.path.isdir(path) and \ os.path.samefile(self.currentPath, path): return True success = True error = None if path is not None: if not os.path.exists(path): error = "'{}' does not exist".format(path) path = None success = False elif not os.path.isdir(path): error = "'{}' is not a directory".format(path) path = None success = False if error is not None: self.error(error) warnings.warn(error, UserWarning, stacklevel=3) else: self.error() if path is not None: newindex = self.addRecentPath(path) self.recent_cb.setCurrentIndex(newindex) if newindex >= 0: self.currentPath = path else: self.currentPath = None else: self.currentPath = None self.__actions.reload.setEnabled(self.currentPath is not None) if self.__state == State.Processing: self.cancel() return success def addRecentPath(self, path): """ Prepend a path entry to the list of recent paths If an entry with the same path already exists in the recent path list it is moved to the first place Parameters ---------- path : str """ existing = None for pathitem in self.recent_paths: try: if os.path.samefile(pathitem.abspath, path): existing = pathitem break except FileNotFoundError: # file not found if the `pathitem.abspath` no longer exists pass model = self.recent_cb.model() if existing is not None: selected_index = self.recent_paths.index(existing) assert model.item(selected_index).data(Qt.UserRole) is existing self.recent_paths.remove(existing) row = model.takeRow(selected_index) self.recent_paths.insert(0, existing) model.insertRow(0, row) else: item = RecentPath(path, None, None) self.recent_paths.insert(0, item) model.insertRow(0, RecentPath_asqstandarditem(item)) return 0 def __setRuntimeState(self, state): assert state in State self.setBlocking(state == State.Processing) message = "" if state == State.Processing: assert self.__state in [ State.Done, State.NoState, State.Error, State.Cancelled ] message = "Processing" elif state == State.Done: assert self.__state == State.Processing elif state == State.Cancelled: assert self.__state == State.Processing message = "Cancelled" elif state == State.Error: message = "Error during processing" elif state == State.NoState: message = "" else: assert False self.__state = state if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) self.setStatusMessage(message) self.__updateInfo() def reload(self): """ Restart the image scan task """ if self.__state == State.Processing: self.cancel() self.data = None self.start() def start(self): """ Start/execute the image indexing operation """ self.error() self.__invalidated = False if self.currentPath is None: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')".format( self.__pendingTask.startdir)) self.cancel() startdir = self.currentPath self.__setRuntimeState(State.Processing) report_progress = methodinvoke(self, "__onReportProgress", (object, )) task = ImportImages(report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=3) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_image_scan_task_interupt(): try: return task(startdir) except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_image_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished) @Slot() def __onRunFinished(self): assert QThread.currentThread() is self.thread() assert self.__state == State.Processing assert self.__pendingTask is not None assert self.sender() is self.__pendingTask.watcher assert self.__pendingTask.future.done() task = self.__pendingTask self.__pendingTask = None try: data, n_skipped = task.future.result() except Exception: sys.excepthook(*sys.exc_info()) state = State.Error data = None n_skipped = 0 self.error(traceback.format_exc()) else: state = State.Done self.error() if data: self._n_image_data = len(data) self._n_image_categories = len(data.domain.class_var.values)\ if data.domain.class_var else 0 self.data = data self._n_skipped = n_skipped self.__setRuntimeState(state) self.commit() def cancel(self): """ Cancel current pending task (if any). """ if self.__state == State.Processing: assert self.__pendingTask is not None self.__pendingTask.cancel() self.__pendingTask = None self.__setRuntimeState(State.Cancelled) @Slot(object) def __onReportProgress(self, arg): # report on scan progress from a worker thread # arg must be a namespace(count: int, lastpath: str) assert QThread.currentThread() is self.thread() if self.__state == State.Processing: self.pathlabel.setText(prettyfypath(arg.lastpath)) def commit(self): """ Commit a Table from the collected image meta data. """ self.send("Data", self.data) def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=True) self.__invalidated = False def eventFilter(self, receiver, event): # re-implemented from QWidget # intercept and process drag drop events on the recent directory # selection combo box def dirpath(event): # type: (QDropEvent) -> Optional[str] """Return the directory from a QDropEvent.""" data = event.mimeData() urls = data.urls() if len(urls) == 1: url = urls[0] path = url.toLocalFile() if os.path.isdir(path): return path return None if receiver is self.recent_cb and \ event.type() in {QEvent.DragEnter, QEvent.DragMove, QEvent.Drop}: assert isinstance(event, QDropEvent) path = dirpath(event) if path is not None and event.possibleActions() & Qt.LinkAction: event.setDropAction(Qt.LinkAction) event.accept() if event.type() == QEvent.Drop: self.setCurrentPath(path) self.start() else: event.ignore() return True return super().eventFilter(receiver, event)
class OWResolwetSNE(OWWidget): name = "t-SNE" description = "Two-dimensional data projection with t-SNE." icon = "icons/OWResolwetSNE.svg" priority = 50 class Inputs: data = Input("Data", resolwe.Data, default=True) class Outputs: selected_data = Output("Selected Data", resolwe.Data, default=True) settings_version = 2 #: Runtime state Running, Finished, Waiting = 1, 2, 3 settingsHandler = settings.DomainContextHandler() max_iter = settings.Setting(300) perplexity = settings.Setting(30) pca_components = settings.Setting(20) # output embedding role. NoRole, AttrRole, AddAttrRole, MetaRole = 0, 1, 2, 3 auto_commit = settings.Setting(True) selection_indices = settings.Setting(None, schema_only=True) legend_anchor = settings.Setting(((1, 0), (1, 0))) graph = SettingProvider(OWMDSGraph) jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10] graph_name = "graph.plot_widget.plotItem" class Error(OWWidget.Error): not_enough_rows = Msg("Input data needs at least 2 rows") constant_data = Msg("Input data is constant") no_attributes = Msg("Data has no attributes") out_of_memory = Msg("Out of memory") optimization_error = Msg("Error during optimization\n{}") def __init__(self): super().__init__() #: Effective data used for plot styling/annotations. self.data = None # type: Optional[Orange.data.Table] #: Input subset data table self.subset_data = None # type: Optional[Orange.data.Table] #: Input data table self.signal_data = None # resolwe variables self.data_table_object = None # type: Optional[resolwe.Data] self._tsne_slug = 't-sne' self._tsne_selection_slug = 't-sne-selection' self._embedding_data_object = None self._embedding = None self._embedding_clas_var = None self.variable_x = ContinuousVariable("tsne-x") self.variable_y = ContinuousVariable("tsne-y") # threading self._task = None # type: Optional[ResolweTask] self._executor = ThreadExecutor() self.res = ResolweHelper() self._subset_mask = None # type: Optional[np.ndarray] self._invalidated = False self.pca_data = None self._curve = None self._data_metas = None self.variable_x = ContinuousVariable("tsne-x") self.variable_y = ContinuousVariable("tsne-y") self.__update_loop = None self.__in_next_step = False self.__draw_similar_pairs = False box = gui.vBox(self.controlArea, "t-SNE") form = QFormLayout(labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow, verticalSpacing=10) form.addRow("Max iterations:", gui.spin(box, self, "max_iter", 250, 2000, step=50)) form.addRow("Perplexity:", gui.spin(box, self, "perplexity", 1, 100, step=1)) box.layout().addLayout(form) gui.separator(box, 10) self.runbutton = gui.button(box, self, "Run", callback=self._run_embeding) box = gui.vBox(self.controlArea, "PCA Preprocessing") gui.hSlider(box, self, 'pca_components', label="Components: ", minValue=2, maxValue=50, step=1) #, callback=self._initialize) box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWMDSGraph(self, box, "MDSGraph", view_box=MDSInteractiveViewBox) box.layout().addWidget(self.graph.plot_widget) self.plot = self.graph.plot_widget g = self.graph.gui box = g.point_properties_box(self.controlArea) self.models = g.points_models # Because sc data frequently has many genes, # showing all attributes in combo boxes can cause problems # QUICKFIX: Remove a separator and attributes from order # (leaving just the class and metas) for model in self.models: model.order = model.order[:-2] g.add_widgets(ids=[g.JitterSizeSlider], widget=box) box = gui.vBox(self.controlArea, "Plot Properties") g.add_widgets([ g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity, g.LabelOnlySelected ], box) self.controlArea.layout().addStretch(100) self.icons = gui.attributeIconDict palette = self.graph.plot_widget.palette() self.graph.set_palette(palette) gui.rubber(self.controlArea) self.graph.box_zoom_select(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self.plot.getPlotItem().hideButtons() self.plot.setRenderHint(QPainter.Antialiasing) self.graph.jitter_continuous = True # self._initialize() def update_colors(self): pass def update_density(self): self.update_graph(reset_view=False) def update_regression_line(self): self.update_graph(reset_view=False) def prepare_data(self): pass def update_graph(self, reset_view=True, **_): self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.variable_x, self.variable_y, reset_view=True) def reset_graph_data(self, *_): if self.data is not None: self.graph.rescale_data() self.update_graph() def selection_changed(self): if self._task: self.cancel(clear_state=False) self._task = None self._executor = ThreadExecutor() self.commit() def _clear_plot(self): self.graph.plot_widget.clear() def _clear_state(self): self._clear_plot() self.graph.new_data(None) self._embedding_data_object = None self._embedding = None self._embedding_clas_var = None self._task = None self._executor = ThreadExecutor() def cancel(self, clear_state=True): """Cancel the current task (if any).""" if self._task is not None: self._executor.shutdown(wait=False) self.runbutton.setText('Run') self.progressBarFinished() if clear_state: self._clear_state() def run_task(self, slug, func): if self._task is not None: try: self.cancel() except CancelledError as e: print(e) assert self._task is None self.progressBarInit() self._task = ResolweTask(slug) self._task.future = self._executor.submit(func) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.finished.connect(self.task_finished) @Slot(Future, name='Finished') def task_finished(self, future): assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is future assert future.done() try: future_result = future.result() except Exception as ex: # TODO: raise exceptions raise ex else: if self._task.slug == self._tsne_slug: self._embedding_data_object = future_result self._embedding_clas_var = self.res.get_json( self._embedding_data_object, 'class_var') self._embedding = np.array( self.res.get_json(self._embedding_data_object, 'embedding_json', 'embedding')) self._setup_plot() if self._task.slug == self._tsne_selection_slug: print(future_result) self.Outputs.selected_data.send(future_result) finally: self.progressBarFinished() self.runbutton.setText('Start') self._task = None @Inputs.data def set_data(self, data): # type: (Optional[resolwe.Data]) -> None if data: self.data_table_object = data self._run_embeding() def _run_embeding(self): if self._task: self.cancel() return if self._task is None: inputs = { 'data_table': self.data_table_object, 'pca_components': self.pca_components, 'perplexity': self.perplexity, 'iterations': self.max_iter } if self._embedding is not None and self._embedding_data_object is not None: inputs['init'] = self._embedding_data_object func = partial(self.res.run_process, self._tsne_slug, **inputs) # move filter process in thread self.run_task(self._tsne_slug, func) self.runbutton.setText('Stop') def _setup_plot(self): class_var = DiscreteVariable(self._embedding_clas_var['name'], values=self._embedding_clas_var['values']) y_data = self._embedding_clas_var['y_data'] data = np.c_[self._embedding, y_data] plot_data = Table( Domain([self.variable_x, self.variable_y], class_vars=class_var), data) domain = plot_data and len(plot_data) and plot_data.domain or None for model in self.models: model.set_domain(domain) self.graph.attr_color = plot_data.domain.class_var if domain else None self.graph.attr_shape = None self.graph.attr_size = None self.graph.attr_label = None self.graph.new_data(plot_data) self.graph.update_data(self.variable_x, self.variable_y, True) def commit(self): selection = self.graph.get_selection() if self._embedding_data_object is not None and selection is not None: inputs = { 'data_table': self.data_table_object, 'embedding': self._embedding_data_object, 'selection': selection.tolist(), 'x_tsne_var': self.variable_x.name, 'y_tsne_var': self.variable_y.name } func = partial(self.res.run_process, self._tsne_selection_slug, **inputs) self.run_task(self._tsne_selection_slug, func) self.Outputs.selected_data.send(None) def onDeleteWidget(self): super().onDeleteWidget() self._clear_plot() self._clear_state() def send_report(self): if self.data is None: return def name(var): return var and var.name caption = report.render_items_vert( (("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) self.report_plot() if caption: self.report_caption(caption)
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = "Access to Gene Expression Omnibus data sets." icon = "icons/OWGEODatasets.svg" priority = 2 inputs = [] outputs = [("Expression Data", Table)] settingsList = [ "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames" ] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' )) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = gui.LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join(gds[key] for gds in self.gds for key in self.searchKeys) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, proxy.data(proxy.index(i, 1), Qt.DisplayRole)) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.treeWidget.scrollTo(current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = self.filterLineEdit.text() proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = GDS(gds_id) data = gds.get_data(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Domain([ attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items()) ], data.domain.class_var, data.domain.metas) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes) data = Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, TAX_ID, self.currentGds.get("taxid", "")) data_hints.set_hint(data, GENE_NAME, bool(self.outputRows)) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [ bytes(sp.saveState()) for sp in self.splitters ] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = self.nameEdit.text() self.commitIf()
class OWImportImages(widget.OWWidget): name = "Import Images" description = "Import images from a directory(s)" icon = "icons/ImportImages.svg" priority = 110 outputs = [("Data", Orange.data.Table)] #: list of recent paths recent_paths = settings.Setting([]) # type: List[RecentPath] currentPath = settings.Setting(None) want_main_area = False resizing_enabled = False Modality = Qt.ApplicationModal # Modality = Qt.WindowModal MaxRecentItems = 20 def __init__(self): super().__init__() #: widget's runtime state self.__state = State.NoState self._imageMeta = [] self._imageCategories = {} self.__invalidated = False self.__pendingTask = None vbox = gui.vBox(self.controlArea) hbox = gui.hBox(vbox) self.recent_cb = QComboBox( sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, minimumContentsLength=16, ) self.recent_cb.activated[int].connect(self.__onRecentActivated) icons = standard_icons(self) browseaction = QAction( "Open/Load Images", self, iconText="\N{HORIZONTAL ELLIPSIS}", icon=icons.dir_open_icon, toolTip="Select a directory from which to load the images") browseaction.triggered.connect(self.__runOpenDialog) reloadaction = QAction("Reload", self, icon=icons.reload_icon, toolTip="Reload current image set") reloadaction.triggered.connect(self.reload) self.__actions = namespace( browse=browseaction, reload=reloadaction, ) browsebutton = QPushButton(browseaction.iconText(), icon=browseaction.icon(), toolTip=browseaction.toolTip(), clicked=browseaction.trigger) reloadbutton = QPushButton( reloadaction.iconText(), icon=reloadaction.icon(), clicked=reloadaction.trigger, default=True, ) hbox.layout().addWidget(self.recent_cb) hbox.layout().addWidget(browsebutton) hbox.layout().addWidget(reloadbutton) self.addActions([browseaction, reloadaction]) reloadaction.changed.connect( lambda: reloadbutton.setEnabled(reloadaction.isEnabled())) box = gui.vBox(vbox, "Info") self.infostack = QStackedWidget() self.info_area = QLabel(text="No image set selected", wordWrap=True) self.progress_widget = QProgressBar(minimum=0, maximum=0) self.cancel_button = QPushButton( "Cancel", icon=icons.cancel_icon, ) self.cancel_button.clicked.connect(self.cancel) w = QWidget() vlayout = QVBoxLayout() vlayout.setContentsMargins(0, 0, 0, 0) hlayout = QHBoxLayout() hlayout.setContentsMargins(0, 0, 0, 0) hlayout.addWidget(self.progress_widget) hlayout.addWidget(self.cancel_button) vlayout.addLayout(hlayout) self.pathlabel = TextLabel() self.pathlabel.setTextElideMode(Qt.ElideMiddle) self.pathlabel.setAttribute(Qt.WA_MacSmallSize) vlayout.addWidget(self.pathlabel) w.setLayout(vlayout) self.infostack.addWidget(self.info_area) self.infostack.addWidget(w) box.layout().addWidget(self.infostack) self.__initRecentItemsModel() self.__invalidated = True self.__executor = ThreadExecutor(self) QApplication.postEvent(self, QEvent(RuntimeEvent.Init)) def __initRecentItemsModel(self): if self.currentPath is not None and \ not os.path.isdir(self.currentPath): self.currentPath = None recent_paths = [] for item in self.recent_paths: if os.path.isdir(item.abspath): recent_paths.append(item) recent_paths = recent_paths[:OWImportImages.MaxRecentItems] recent_model = self.recent_cb.model() for pathitem in recent_paths: item = RecentPath_asqstandarditem(pathitem) recent_model.appendRow(item) self.recent_paths = recent_paths if self.currentPath is not None and \ os.path.isdir(self.currentPath) and self.recent_paths and \ os.path.samefile(self.currentPath, self.recent_paths[0].abspath): self.recent_cb.setCurrentIndex(0) else: self.currentPath = None self.recent_cb.setCurrentIndex(-1) self.__actions.reload.setEnabled(self.currentPath is not None) def customEvent(self, event): """Reimplemented.""" if event.type() == RuntimeEvent.Init: if self.__invalidated: try: self.start() finally: self.__invalidated = False super().customEvent(event) def __runOpenDialog(self): startdir = os.path.expanduser("~/") if self.recent_paths: startdir = self.recent_paths[0].abspath if OWImportImages.Modality == Qt.WindowModal: dlg = QFileDialog( self, "Select Top Level Directory", startdir, acceptMode=QFileDialog.AcceptOpen, modal=True, ) dlg.setFileMode(QFileDialog.Directory) dlg.setOption(QFileDialog.ShowDirsOnly) dlg.setDirectory(startdir) dlg.setAttribute(Qt.WA_DeleteOnClose) @dlg.accepted.connect def on_accepted(): dirpath = dlg.selectedFiles() if dirpath: self.setCurrentPath(dirpath[0]) self.start() dlg.open() else: dirpath = QFileDialog.getExistingDirectory( self, "Select Top Level Directory", startdir) if dirpath: self.setCurrentPath(dirpath) self.start() def __onRecentActivated(self, index): item = self.recent_cb.itemData(index) if item is None: return assert isinstance(item, RecentPath) self.setCurrentPath(item.abspath) self.start() def __updateInfo(self): if self.__state == State.NoState: text = "No image set selected" elif self.__state == State.Processing: text = "Processing" elif self.__state == State.Done: nvalid = sum(imeta.isvalid for imeta in self._imageMeta) ncategories = len(self._imageCategories) if ncategories < 2: text = "{} images".format(nvalid) else: text = "{} images / {} categories".format(nvalid, ncategories) elif self.__state == State.Cancelled: text = "Cancelled" elif self.__state == State.Error: text = "Error state" else: assert False self.info_area.setText(text) if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) def setCurrentPath(self, path): """ Set the current root image path to path If the path does not exists or is not a directory the current path is left unchanged Parameters ---------- path : str New root import path. Returns ------- status : bool True if the current root import path was successfully changed to path. """ if self.currentPath is not None and path is not None and \ os.path.isdir(self.currentPath) and os.path.isdir(path) and \ os.path.samefile(self.currentPath, path): return True if not os.path.exists(path): warnings.warn("'{}' does not exist".format(path), UserWarning) return False elif not os.path.isdir(path): warnings.warn("'{}' is not a directory".format(path), UserWarning) return False newindex = self.addRecentPath(path) self.recent_cb.setCurrentIndex(newindex) if newindex >= 0: self.currentPath = path else: self.currentPath = None self.__actions.reload.setEnabled(self.currentPath is not None) if self.__state == State.Processing: self.cancel() return True def addRecentPath(self, path): """ Prepend a path entry to the list of recent paths If an entry with the same path already exists in the recent path list it is moved to the first place Parameters ---------- path : str """ existing = None for pathitem in self.recent_paths: if os.path.samefile(pathitem.abspath, path): existing = pathitem break model = self.recent_cb.model() if existing is not None: selected_index = self.recent_paths.index(existing) assert model.item(selected_index).data(Qt.UserRole) is existing self.recent_paths.remove(existing) row = model.takeRow(selected_index) self.recent_paths.insert(0, existing) model.insertRow(0, row) else: item = RecentPath(path, None, None) self.recent_paths.insert(0, item) model.insertRow(0, RecentPath_asqstandarditem(item)) return 0 def __setRuntimeState(self, state): assert state in State self.setBlocking(state == State.Processing) message = "" if state == State.Processing: assert self.__state in [ State.Done, State.NoState, State.Error, State.Cancelled ] message = "Processing" elif state == State.Done: assert self.__state == State.Processing elif state == State.Cancelled: assert self.__state == State.Processing message = "Cancelled" elif state == State.Error: message = "Error during processing" elif state == State.NoState: message = "" else: assert False self.__state = state if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) self.setStatusMessage(message) self.__updateInfo() def reload(self): """ Restart the image scan task """ if self.__state == State.Processing: self.cancel() self._imageMeta = [] self._imageCategories = {} self.start() def start(self): """ Start/execute the image indexing operation """ self.error() self.__invalidated = False if self.currentPath is None: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')".format( self.__pendingTask.startdir)) self.cancel() startdir = self.currentPath self.__setRuntimeState(State.Processing) report_progress = methodinvoke(self, "__onReportProgress", (object, )) task = ImageScan(startdir, report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=3) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_image_scan_task_interupt(): try: return task.run() except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_image_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished) @Slot() def __onRunFinished(self): assert QThread.currentThread() is self.thread() assert self.__state == State.Processing assert self.__pendingTask is not None assert self.sender() is self.__pendingTask.watcher assert self.__pendingTask.future.done() task = self.__pendingTask self.__pendingTask = None try: image_meta = task.future.result() except Exception as err: sys.excepthook(*sys.exc_info()) state = State.Error image_meta = [] self.error(traceback.format_exc()) else: state = State.Done self.error() categories = {} for imeta in image_meta: # derive categories from the path relative to the starting dir dirname = os.path.dirname(imeta.path) relpath = os.path.relpath(dirname, task.startdir) categories[dirname] = relpath self._imageMeta = image_meta self._imageCategories = categories self.__setRuntimeState(state) self.commit() def cancel(self): """ Cancel current pending task (if any). """ if self.__state == State.Processing: assert self.__pendingTask is not None self.__pendingTask.cancel() self.__pendingTask = None self.__setRuntimeState(State.Cancelled) @Slot(object) def __onReportProgress(self, arg): # report on scan progress from a worker thread # arg must be a namespace(count: int, lastpath: str) assert QThread.currentThread() is self.thread() if self.__state == State.Processing: self.pathlabel.setText(prettyfypath(arg.lastpath)) def commit(self): """ Create and commit a Table from the collected image meta data. """ if self._imageMeta: categories = self._imageCategories if len(categories) > 1: cat_var = Orange.data.DiscreteVariable( "category", values=list(sorted(categories.values()))) else: cat_var = None # Image name (file basename without the extension) imagename_var = Orange.data.StringVariable("image name") # Full fs path image_var = Orange.data.StringVariable("image") image_var.attributes["type"] = "image" # file size/width/height size_var = Orange.data.ContinuousVariable("size", number_of_decimals=0) width_var = Orange.data.ContinuousVariable("width", number_of_decimals=0) height_var = Orange.data.ContinuousVariable("height", number_of_decimals=0) domain = Orange.data.Domain( [], [cat_var] if cat_var is not None else [], [imagename_var, image_var, size_var, width_var, height_var]) cat_data = [] meta_data = [] for imgmeta in self._imageMeta: if imgmeta.isvalid: if cat_var is not None: category = categories.get(os.path.dirname( imgmeta.path)) cat_data.append([cat_var.to_val(category)]) else: cat_data.append([]) basename = os.path.basename(imgmeta.path) imgname, _ = os.path.splitext(basename) meta_data.append([ imgname, imgmeta.path, imgmeta.size, imgmeta.width, imgmeta.height ]) cat_data = numpy.array(cat_data, dtype=float) meta_data = numpy.array(meta_data, dtype=object) table = Orange.data.Table.from_numpy( domain, numpy.empty((len(cat_data), 0), dtype=float), cat_data, meta_data) else: table = None self.send("Data", table) def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=True)
class OWImportImages(widget.OWWidget): name = "Import Images" description = "Import images from a directory(s)" icon = "icons/ImportImages.svg" priority = 110 outputs = [("Data", Orange.data.Table)] #: list of recent paths recent_paths = settings.Setting([]) # type: List[RecentPath] want_main_area = False resizing_enabled = False Modality = Qt.ApplicationModal # Modality = Qt.WindowModal MaxRecentItems = 20 def __init__(self): super().__init__() #: widget's runtime state self.__state = State.NoState self.data = None self._n_image_categories = 0 self._n_image_data = 0 self._n_skipped = 0 self.__invalidated = False self.__pendingTask = None vbox = gui.vBox(self.controlArea) hbox = gui.hBox(vbox) self.recent_cb = QComboBox( sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, minimumContentsLength=16, acceptDrops=True ) self.recent_cb.installEventFilter(self) self.recent_cb.activated[int].connect(self.__onRecentActivated) icons = standard_icons(self) browseaction = QAction( "Open/Load Images", self, iconText="\N{HORIZONTAL ELLIPSIS}", icon=icons.dir_open_icon, toolTip="Select a directory from which to load the images" ) browseaction.triggered.connect(self.__runOpenDialog) reloadaction = QAction( "Reload", self, icon=icons.reload_icon, toolTip="Reload current image set" ) reloadaction.triggered.connect(self.reload) self.__actions = namespace( browse=browseaction, reload=reloadaction, ) browsebutton = QPushButton( browseaction.iconText(), icon=browseaction.icon(), toolTip=browseaction.toolTip(), clicked=browseaction.trigger ) reloadbutton = QPushButton( reloadaction.iconText(), icon=reloadaction.icon(), clicked=reloadaction.trigger, default=True, ) hbox.layout().addWidget(self.recent_cb) hbox.layout().addWidget(browsebutton) hbox.layout().addWidget(reloadbutton) self.addActions([browseaction, reloadaction]) reloadaction.changed.connect( lambda: reloadbutton.setEnabled(reloadaction.isEnabled()) ) box = gui.vBox(vbox, "Info") self.infostack = QStackedWidget() self.info_area = QLabel( text="No image set selected", wordWrap=True ) self.progress_widget = QProgressBar( minimum=0, maximum=0 ) self.cancel_button = QPushButton( "Cancel", icon=icons.cancel_icon, ) self.cancel_button.clicked.connect(self.cancel) w = QWidget() vlayout = QVBoxLayout() vlayout.setContentsMargins(0, 0, 0, 0) hlayout = QHBoxLayout() hlayout.setContentsMargins(0, 0, 0, 0) hlayout.addWidget(self.progress_widget) hlayout.addWidget(self.cancel_button) vlayout.addLayout(hlayout) self.pathlabel = TextLabel() self.pathlabel.setTextElideMode(Qt.ElideMiddle) self.pathlabel.setAttribute(Qt.WA_MacSmallSize) vlayout.addWidget(self.pathlabel) w.setLayout(vlayout) self.infostack.addWidget(self.info_area) self.infostack.addWidget(w) box.layout().addWidget(self.infostack) self.__initRecentItemsModel() self.__invalidated = True self.__executor = ThreadExecutor(self) QApplication.postEvent(self, QEvent(RuntimeEvent.Init)) def __initRecentItemsModel(self): self._relocate_recent_files() recent_paths = [] for item in self.recent_paths: recent_paths.append(item) recent_paths = recent_paths[:OWImportImages.MaxRecentItems] recent_model = self.recent_cb.model() recent_model.clear() for pathitem in recent_paths: item = RecentPath_asqstandarditem(pathitem) recent_model.appendRow(item) self.recent_paths = recent_paths if self.recent_paths and os.path.isdir(self.recent_paths[0].abspath): self.recent_cb.setCurrentIndex(0) self.__actions.reload.setEnabled(True) else: self.recent_cb.setCurrentIndex(-1) self.__actions.reload.setEnabled(False) def customEvent(self, event): """Reimplemented.""" if event.type() == RuntimeEvent.Init: if self.__invalidated: try: self.start() finally: self.__invalidated = False super().customEvent(event) def __runOpenDialog(self): startdir = os.path.expanduser("~/") if self.recent_paths: startdir = os.path.dirname(self.recent_paths[0].abspath) if OWImportImages.Modality == Qt.WindowModal: dlg = QFileDialog( self, "Select Top Level Directory", startdir, acceptMode=QFileDialog.AcceptOpen, modal=True, ) dlg.setFileMode(QFileDialog.Directory) dlg.setOption(QFileDialog.ShowDirsOnly) dlg.setDirectory(startdir) dlg.setAttribute(Qt.WA_DeleteOnClose) @dlg.accepted.connect def on_accepted(): dirpath = dlg.selectedFiles() if dirpath: self.setCurrentPath(dirpath[0]) self.start() dlg.open() else: dirpath = QFileDialog.getExistingDirectory( self, "Select Top Level Directory", startdir ) if dirpath: self.setCurrentPath(dirpath) self.start() def __onRecentActivated(self, index): item = self.recent_cb.itemData(index) if item is None: return assert isinstance(item, RecentPath) self.setCurrentPath(item.abspath) self.start() def __updateInfo(self): if self.__state == State.NoState: text = "No image set selected" elif self.__state == State.Processing: text = "Processing" elif self.__state == State.Done: nvalid = self._n_image_data ncategories = self._n_image_categories n_skipped = self._n_skipped if ncategories < 2: text = "{} image{}".format(nvalid, "s" if nvalid != 1 else "") else: text = "{} images / {} categories".format(nvalid, ncategories) if n_skipped > 0: text = text + ", {} skipped".format(n_skipped) elif self.__state == State.Cancelled: text = "Cancelled" elif self.__state == State.Error: text = "Error state" else: assert False self.info_area.setText(text) if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) def setCurrentPath(self, path): """ Set the current root image path to path If the path does not exists or is not a directory the current path is left unchanged Parameters ---------- path : str New root import path. Returns ------- status : bool True if the current root import path was successfully changed to path. """ if self.recent_paths and path is not None and \ os.path.isdir(self.recent_paths[0].abspath) and os.path.isdir(path) \ and os.path.samefile(os.path.isdir(self.recent_paths[0].abspath), path): return True success = True error = None if path is not None: if not os.path.exists(path): error = "'{}' does not exist".format(path) path = None success = False elif not os.path.isdir(path): error = "'{}' is not a directory".format(path) path = None success = False if error is not None: self.error(error) warnings.warn(error, UserWarning, stacklevel=3) else: self.error() if path is not None: newindex = self.addRecentPath(path) self.recent_cb.setCurrentIndex(newindex) self.__actions.reload.setEnabled(len(self.recent_paths) > 0) if self.__state == State.Processing: self.cancel() return success def _search_paths(self): basedir = self.workflowEnv().get("basedir", None) if basedir is None: return [] return [("basedir", basedir)] def addRecentPath(self, path): """ Prepend a path entry to the list of recent paths If an entry with the same path already exists in the recent path list it is moved to the first place Parameters ---------- path : str """ existing = None for pathitem in self.recent_paths: try: if os.path.samefile(pathitem.abspath, path): existing = pathitem break except FileNotFoundError: # file not found if the `pathitem.abspath` no longer exists pass model = self.recent_cb.model() if existing is not None: selected_index = self.recent_paths.index(existing) assert model.item(selected_index).data(Qt.UserRole) is existing self.recent_paths.remove(existing) row = model.takeRow(selected_index) self.recent_paths.insert(0, existing) model.insertRow(0, row) else: item = RecentPath.create(path, self._search_paths()) self.recent_paths.insert(0, item) model.insertRow(0, RecentPath_asqstandarditem(item)) return 0 def __setRuntimeState(self, state): assert state in State self.setBlocking(state == State.Processing) message = "" if state == State.Processing: assert self.__state in [State.Done, State.NoState, State.Error, State.Cancelled] message = "Processing" elif state == State.Done: assert self.__state == State.Processing elif state == State.Cancelled: assert self.__state == State.Processing message = "Cancelled" elif state == State.Error: message = "Error during processing" elif state == State.NoState: message = "" else: assert False self.__state = state if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) self.setStatusMessage(message) self.__updateInfo() def reload(self): """ Restart the image scan task """ if self.__state == State.Processing: self.cancel() self.data = None self.start() def start(self): """ Start/execute the image indexing operation """ self.error() self.__invalidated = False if not self.recent_paths: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')" .format(self.__pendingTask.startdir)) self.cancel() startdir = self.recent_paths[0].abspath self.__setRuntimeState(State.Processing) report_progress = methodinvoke( self, "__onReportProgress", (object,)) task = ImportImages(report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=3) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_image_scan_task_interupt(): try: return task(startdir) except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_image_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished) @Slot() def __onRunFinished(self): assert QThread.currentThread() is self.thread() assert self.__state == State.Processing assert self.__pendingTask is not None assert self.sender() is self.__pendingTask.watcher assert self.__pendingTask.future.done() task = self.__pendingTask self.__pendingTask = None try: data, n_skipped = task.future.result() except Exception: sys.excepthook(*sys.exc_info()) state = State.Error data = None n_skipped = 0 self.error(traceback.format_exc()) else: state = State.Done self.error() if data: self._n_image_data = len(data) self._n_image_categories = len(data.domain.class_var.values)\ if data.domain.class_var else 0 else: self._n_image_data, self._n_image_categories = 0, 0 self.data = data self._n_skipped = n_skipped self.__setRuntimeState(state) self.commit() def cancel(self): """ Cancel current pending task (if any). """ if self.__state == State.Processing: assert self.__pendingTask is not None self.__pendingTask.cancel() self.__pendingTask = None self.__setRuntimeState(State.Cancelled) @Slot(object) def __onReportProgress(self, arg): # report on scan progress from a worker thread # arg must be a namespace(count: int, lastpath: str) assert QThread.currentThread() is self.thread() if self.__state == State.Processing: self.pathlabel.setText(prettyfypath(arg.lastpath)) def commit(self): """ Commit a Table from the collected image meta data. """ self.send("Data", self.data) def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=True) self.__invalidated = False def eventFilter(self, receiver, event): # re-implemented from QWidget # intercept and process drag drop events on the recent directory # selection combo box def dirpath(event): # type: (QDropEvent) -> Optional[str] """Return the directory from a QDropEvent.""" data = event.mimeData() urls = data.urls() if len(urls) == 1: url = urls[0] path = url.toLocalFile() if path.endswith("/"): path = path[:-1] # remove last / if os.path.isdir(path): return path return None if receiver is self.recent_cb and \ event.type() in {QEvent.DragEnter, QEvent.DragMove, QEvent.Drop}: assert isinstance(event, QDropEvent) path = dirpath(event) if path is not None and event.possibleActions() & Qt.LinkAction: event.setDropAction(Qt.LinkAction) event.accept() if event.type() == QEvent.Drop: self.setCurrentPath(path) self.start() else: event.ignore() return True return super().eventFilter(receiver, event) def _relocate_recent_files(self): search_paths = self._search_paths() rec = [] for recent in self.recent_paths: kwargs = dict( title=recent.title, sheet=recent.sheet, file_format=recent.file_format) resolved = recent.resolve(search_paths) if resolved is not None: rec.append( RecentPath.create(resolved.abspath, search_paths, **kwargs)) else: rec.append(recent) # change the list in-place for the case the widgets wraps this list self.recent_paths[:] = rec def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.__initRecentItemsModel()
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox(box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox(box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=[ "Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference" ], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit(self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter( contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")]) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor(parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [ var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable) ] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint(data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key, ) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get((full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy( taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [ key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked ] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name, ) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [ self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount()) ] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [ getattr(dialog, item[0]) for item in dialog.items ] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [ m for m, use in zip(matchers, self.geneMatcherSettings) if use ] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list( filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float, )) info = methodinvoke(self, "_setRunInfo", (str, )) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels([ "Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment" ]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item( ), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set( gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max( (e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score))) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView)) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.error(0, ) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def filterAnnotationsChartView(self, filterString=""): if self.__state & OWSetEnrichment.RunningEnrichment: return # TODO: Move filtering to a filter proxy model # TODO: Re-enable string search categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) # filterString = str(self.filterLineEdit.text()).lower() model = self.annotationsChartView.model() def ishidden(index): # Is item at index (row) hidden item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [ model.item(i, 4).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden ] fdrs = utils.stats.FDR(pvals) # update FDR for the selected collections and apply filtering rules itemsHidden = [] fdriter = iter(fdrs) for index, hidden in enumerate(hidemask): if not hidden: fdr = next(fdriter) pval = model.index(index, 4).data(Qt.UserRole) count = model.index(index, 2).data(Qt.ToolTipRole) hidden = (self.useMinCountFilter and count < self.minClusterCount) or \ (self.useMaxPValFilter and pval > self.maxPValue) or \ (self.useMaxFDRFilter and fdr > self.maxFDR) if not hidden: fdr_item = model.item(index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) self.annotationsChartView.setRowHidden(index, QModelIndex(), hidden) itemsHidden.append(hidden) if model.rowCount() and all(itemsHidden): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.annotationsChartView.model() rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [ attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped ] newdomain = Orange.data.Domain(mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [ i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped ] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWTestAndScore(OWWidget): name = "Test and Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 keywords = ['Cross Validation', 'CV'] replaces = ["Orange.widgets.evaluate.owtestlearners.OWTestLearners"] class Inputs: train_data = Input("Data", Table, default=True) test_data = Input("Test Data", Table) learner = MultiInput("Learner", Learner, filter_none=True) preprocessor = Input("Preprocessor", Preprocess) class Outputs: predictions = Output("Predictions", Table) evaluations_results = Output("Evaluation Results", Results) settings_version = 3 buttons_area_orientation = None UserAdviceMessages = [ widget.Message("Click on the table header to select shown columns", "click_header") ] settingsHandler = settings.PerfectDomainContextHandler() score_table = settings.SettingProvider(ScoreTable) #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(2) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) use_rope = settings.Setting(False) rope = settings.Setting(0.1) comparison_criterion = settings.Setting(0, schema_only=True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): test_data_empty = Msg("Test dataset is empty.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train datasets " "have different target variables.") memory_error = Msg("Not enough memory.") test_data_incompatible = Msg( "Test data may be incompatible with train data.") train_data_error = Msg("{}") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") cant_stratify = \ Msg("Can't run stratified {}-fold cross validation; " "the least common class has only {} instances.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") test_data_transformed = Msg( "Test data has been transformed to match the train data.") cant_stratify_numeric = Msg("Stratification is ignored for regression") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False self.scorers = [] self.__pending_comparison_criterion = self.comparison_criterion self.__id_gen = count() self._learner_inputs = [] # type: List[Tuple[Any, Learner]] #: An Ordered dictionary with current inputs and their testing results #: (keyed by ids generated by __id_gen). self.learners = OrderedDict() # type: Dict[Any, Input] self.__state = State.Waiting # Do we need to [re]test any learners, set by _invalidate and # cleared by __update self.__needupdate = False self.__task = None # type: Optional[TaskState] self.__executor = ThreadExecutor() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, searchable=True, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, contentsLength=8, searchable=True, callback=self._on_target_class_changed) self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison") gui.comboBox(box, self, "comparison_criterion", callback=self.update_comparison_table) hbox = gui.hBox(box) gui.checkBox(hbox, self, "use_rope", "Negligible difference: ", callback=self._on_use_rope_changed) gui.lineEdit(hbox, self, "rope", validator=QDoubleValidator(), controlWidth=70, callback=self.update_comparison_table, alignment=Qt.AlignRight) self.controls.rope.setEnabled(self.use_rope) gui.rubber(self.controlArea) self.score_table = ScoreTable(self) self.score_table.shownScoresChanged.connect(self.update_stats_model) view = self.score_table.view view.setSizeAdjustPolicy(view.AdjustToContents) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.score_table.view) self.compbox = box = gui.vBox(self.mainArea, box="Model comparison") table = self.comparison_table = QTableWidget( wordWrap=False, editTriggers=QTableWidget.NoEditTriggers, selectionMode=QTableWidget.NoSelection) table.setSizeAdjustPolicy(table.AdjustToContents) header = table.verticalHeader() header.setSectionResizeMode(QHeaderView.Fixed) header.setSectionsClickable(False) header = table.horizontalHeader() header.setTextElideMode(Qt.ElideRight) header.setDefaultAlignment(Qt.AlignCenter) header.setSectionsClickable(False) header.setStretchLastSection(False) header.setSectionResizeMode(QHeaderView.ResizeToContents) avg_width = self.fontMetrics().averageCharWidth() header.setMinimumSectionSize(8 * avg_width) header.setMaximumSectionSize(15 * avg_width) header.setDefaultSectionSize(15 * avg_width) box.layout().addWidget(table) box.layout().addWidget( QLabel( "<small>Table shows probabilities that the score for the model in " "the row is higher than that of the model in the column. " "Small numbers show the probability that the difference is " "negligible.</small>", wordWrap=True)) def sizeHint(self): sh = super().sizeHint() return QSize(780, sh.height()) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestAndScore.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestAndScore.FeatureFold and not enabled: self.resampling = OWTestAndScore.KFold @Inputs.learner def set_learner(self, index: int, learner: Learner): """ Set the input `learner` at `index`. Parameters ---------- index: int learner: Orange.base.Learner """ key, _ = self._learner_inputs[index] slot = self.learners[key] self.learners[key] = slot._replace(learner=learner, results=None) self._invalidate([key]) @Inputs.learner.insert def insert_learner(self, index: int, learner: Learner): key = next(self.__id_gen) self._learner_inputs.insert(index, (key, learner)) self.learners[key] = InputLearner(learner, None, None, key) self.learners = { key: self.learners[key] for key, _ in self._learner_inputs } self._invalidate([key]) @Inputs.learner.remove def remove_learner(self, index: int): key, _ = self._learner_inputs[index] self._invalidate([key]) self._learner_inputs.pop(index) self.learners.pop(key) @Inputs.train_data def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.cancel() self.Information.data_sampled.clear() self.Error.train_data_error.clear() if data is not None: data_errors = [ ("Train dataset is empty.", len(data) == 0), ("Train data input requires a target variable.", not data.domain.class_vars), ("Too many target variables.", len(data.domain.class_vars) > 1), ("Target variable has no values.", np.isnan(data.Y).all()), ("Target variable has only one value.", data.domain.has_discrete_class and len(unique(data.Y)) < 2), ("Data has no features to learn from.", data.X.shape[1] == 0), ] for error_msg, cond in data_errors: if cond: self.Error.train_data_error(error_msg) data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_scorers() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestAndScore.FeatureFold self._invalidate() @Inputs.test_data def set_test_data(self, data): # type: (Orange.data.Table) -> None """ Set the input separate testing dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not data: self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required_test() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestAndScore.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] # List of scorers shouldn't be retrieved globally, when the module is # loading since add-ons could have registered additional scorers. # It could have been cached but # - we don't gain much with it # - it complicates the unit tests def _update_scorers(self): if self.data and self.data.domain.class_var: new_scorers = usable_scorers(self.data.domain.class_var) else: new_scorers = [] # Don't unnecessarily reset the combo because this would always reset # comparison_criterion; we also set it explicitly, though, for clarity if new_scorers != self.scorers: self.scorers = new_scorers combo = self.controls.comparison_criterion combo.clear() combo.addItems( [scorer.long_name or scorer.name for scorer in self.scorers]) if self.scorers: self.comparison_criterion = 0 if self.__pending_comparison_criterion is not None: # Check for the unlikely case that some scorers have been removed # from modules if self.__pending_comparison_criterion < len(self.scorers): self.comparison_criterion = self.__pending_comparison_criterion self.__pending_comparison_criterion = None self._update_compbox_title() def _update_compbox_title(self): criterion = self.comparison_criterion if criterion < len(self.scorers): scorer = self.scorers[criterion]() self.compbox.setTitle(f"Model Comparison by {scorer.name}") else: self.compbox.setTitle(f"Model Comparison") @Inputs.preprocessor def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.score_table.update_header(self.scorers) self._update_view_enabled() self.update_stats_model() if self.__needupdate: self.__update() def kfold_changed(self): self.resampling = OWTestAndScore.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestAndScore.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestAndScore.ShuffleSplit self._param_changed() def _param_changed(self): self.modcompbox.setEnabled(self.resampling == OWTestAndScore.KFold) self._update_view_enabled() self._invalidate() self.__update() def _update_view_enabled(self): self.comparison_table.setEnabled( self.resampling == OWTestAndScore.KFold and len(self.learners) > 1 and self.data is not None) self.score_table.view.setEnabled(self.data is not None) def update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.score_table.model # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False names = [] for key, slot in self.learners.items(): name = learner_name(slot.learner) names.append(name) head = QStandardItem(name) head.setData(key, Qt.UserRole) results = slot.results if results is not None and results.success: train = QStandardItem("{:.3f}".format( results.value.train_time)) train.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) train.setData(key, Qt.UserRole) test = QStandardItem("{:.3f}".format(results.value.test_time)) test.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) test.setData(key, Qt.UserRole) row = [head, train, test] else: row = [head] if isinstance(results, Try.Fail): head.setToolTip(str(results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) if isinstance(results.exception, DomainTransformationError) \ and self.resampling == self.TestOnTest: self.Error.test_data_incompatible() self.Information.test_data_transformed.clear() else: errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) # Cell variable is used immediatelly, it's not stored # pylint: disable=cell-var-from-loop stats = [ Try(scorer_caller(scorer, ovr_results, target=1)) for scorer in self.scorers ] else: stats = None else: stats = slot.stats if stats is not None: for stat, scorer in zip(stats, self.scorers): item = QStandardItem() item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) if stat.success: item.setData(float(stat.value[0]), Qt.DisplayRole) else: item.setToolTip(str(stat.exception)) if scorer.name in self.score_table.shown_scores: has_missing_scores = True row.append(item) model.appendRow(row) # Resort rows based on current sorting header = self.score_table.view.horizontalHeader() model.sort(header.sortIndicatorSection(), header.sortIndicatorOrder()) self._set_comparison_headers(names) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _on_use_rope_changed(self): self.controls.rope.setEnabled(self.use_rope) self.update_comparison_table() def update_comparison_table(self): self.comparison_table.clearContents() slots = self._successful_slots() if not (slots and self.scorers): return names = [learner_name(slot.learner) for slot in slots] self._set_comparison_headers(names) if self.resampling == OWTestAndScore.KFold: scores = self._scores_by_folds(slots) self._fill_table(names, scores) def _successful_slots(self): model = self.score_table.model proxy = self.score_table.sorted_model keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole) for row in range(proxy.rowCount())) slots = [ slot for slot in (self.learners[key] for key in keys) if slot.results is not None and slot.results.success ] return slots def _set_comparison_headers(self, names): table = self.comparison_table try: # Prevent glitching during update table.setUpdatesEnabled(False) header = table.horizontalHeader() if len(names) > 2: header.setSectionResizeMode(QHeaderView.Stretch) else: header.setSectionResizeMode(QHeaderView.Fixed) table.setRowCount(len(names)) table.setColumnCount(len(names)) table.setVerticalHeaderLabels(names) table.setHorizontalHeaderLabels(names) finally: table.setUpdatesEnabled(True) def _scores_by_folds(self, slots): scorer = self.scorers[self.comparison_criterion]() self._update_compbox_title() if scorer.is_binary: if self.class_selection != self.TARGET_AVERAGE: class_var = self.data.domain.class_var target_index = class_var.values.index(self.class_selection) kw = dict(target=target_index) else: kw = dict(average='weighted') else: kw = {} def call_scorer(results): def thunked(): return scorer.scores_by_folds(results.value, **kw).flatten() return thunked scores = [Try(call_scorer(slot.results)) for slot in slots] scores = [score.value if score.success else None for score in scores] # `None in scores doesn't work -- these are np.arrays) if any(score is None for score in scores): self.Warning.scores_not_computed() return scores def _fill_table(self, names, scores): table = self.comparison_table for row, row_name, row_scores in zip(count(), names, scores): for col, col_name, col_scores in zip(range(row), names, scores): if row_scores is None or col_scores is None: continue if self.use_rope and self.rope: p0, rope, p1 = baycomp.two_on_single( row_scores, col_scores, self.rope) if np.isnan(p0) or np.isnan(rope) or np.isnan(p1): self._set_cells_na(table, row, col) continue self._set_cell( table, row, col, f"{p0:.3f}<br/><small>{rope:.3f}</small>", f"p({row_name} > {col_name}) = {p0:.3f}\n" f"p({row_name} = {col_name}) = {rope:.3f}") self._set_cell( table, col, row, f"{p1:.3f}<br/><small>{rope:.3f}</small>", f"p({col_name} > {row_name}) = {p1:.3f}\n" f"p({col_name} = {row_name}) = {rope:.3f}") else: p0, p1 = baycomp.two_on_single(row_scores, col_scores) if np.isnan(p0) or np.isnan(p1): self._set_cells_na(table, row, col) continue self._set_cell(table, row, col, f"{p0:.3f}", f"p({row_name} > {col_name}) = {p0:.3f}") self._set_cell(table, col, row, f"{p1:.3f}", f"p({col_name} > {row_name}) = {p1:.3f}") @classmethod def _set_cells_na(cls, table, row, col): cls._set_cell(table, row, col, "NA", "comparison cannot be computed") cls._set_cell(table, col, row, "NA", "comparison cannot be computed") @staticmethod def _set_cell(table, row, col, label, tooltip): item = QLabel(label) item.setToolTip(tooltip) item.setAlignment(Qt.AlignCenter) table.setCellWidget(row, col, item) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = (self.TARGET_AVERAGE, ) + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self.update_stats_model() self.update_comparison_table() def _invalidate(self, which=None): self.cancel() self.fold_feature_selected = \ self.resampling == OWTestAndScore.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.score_table.model statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.comparison_table.clearContents() self.__needupdate = True def commit(self): """ Commit the results to output. """ self.Error.memory_error.clear() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] combined = None predictions = None if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities try: predictions = combined.get_augmented_data( combined.learner_names) except MemoryError: self.Error.memory_error() self.Outputs.evaluations_results.send(combined) self.Outputs.predictions.send(predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.score_table.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if settings_["resampling"] > 0: settings_["resampling"] += 1 if version < 3: # Older version used an incompatible context handler settings_["context_settings"] = [ c for c in settings_.get("context_settings", ()) if not hasattr(c, 'classes') ] @Slot(float) def setProgressValue(self, value): self.progressBarSet(value) def __update(self): self.__needupdate = False assert self.__task is None or self.__state == State.Running if self.__state == State.Running: self.cancel() self.Warning.test_data_unused.clear() self.Error.test_data_incompatible.clear() self.Warning.test_data_missing.clear() self.Warning.cant_stratify.clear() self.Information.cant_stratify_numeric.clear() self.Information.test_data_transformed( shown=self.resampling == self.TestOnTest and self.data is not None and self.test_data is not None and self.data.domain.attributes != self.test_data.domain.attributes) self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() # check preconditions and return early or show warnings if self.data is None: self.__state = State.Waiting self.commit() return if not self.learners: self.__state = State.Waiting self.commit() return if self.resampling == OWTestAndScore.KFold: k = self.NFolds[self.n_folds] if len(self.data) < k: self.Error.too_many_folds() self.__state = State.Waiting self.commit() return do_stratify = self.cv_stratified if do_stratify: if self.data.domain.class_var.is_discrete: least = min( filter(None, np.bincount(self.data.Y.astype(int)))) if least < k: self.Warning.cant_stratify(k, least) do_stratify = False else: self.Information.cant_stratify_numeric() do_stratify = False elif self.resampling == OWTestAndScore.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() self.__state = State.Waiting self.commit() return elif self.test_data.domain.class_var != self.data.domain.class_var: self.Error.class_inconsistent() self.__state = State.Waiting self.commit() return elif self.test_data is not None: self.Warning.test_data_unused() rstate = 42 # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] # deepcopy all learners as they are not thread safe (by virtue of # the base API). These will be the effective learner objects tested # but will be replaced with the originals on return (see restore # learners bellow) learners_c = [copy.deepcopy(learner) for learner in learners] if self.resampling == OWTestAndScore.TestOnTest: test_f = partial( Orange.evaluation.TestOnTestData(store_data=True, store_models=True), self.data, self.test_data, learners_c, self.preprocessor) else: if self.resampling == OWTestAndScore.KFold: sampler = Orange.evaluation.CrossValidation( k=self.NFolds[self.n_folds], random_state=rstate, stratified=do_stratify) elif self.resampling == OWTestAndScore.FeatureFold: sampler = Orange.evaluation.CrossValidationFeature( feature=self.fold_feature) elif self.resampling == OWTestAndScore.LeaveOneOut: sampler = Orange.evaluation.LeaveOneOut() elif self.resampling == OWTestAndScore.ShuffleSplit: sampler = Orange.evaluation.ShuffleSplit( n_resamples=self.NRepeats[self.n_repeats], train_size=self.SampleSizes[self.sample_size] / 100, test_size=None, stratified=self.shuffle_stratified, random_state=rstate) elif self.resampling == OWTestAndScore.TestOnTrain: sampler = Orange.evaluation.TestOnTrainingData( store_models=True) else: assert False, "self.resampling %s" % self.resampling sampler.store_data = True test_f = partial(sampler, self.data, learners_c, self.preprocessor) def replace_learners(evalfunc, *args, **kwargs): res = evalfunc(*args, **kwargs) assert all(lc is lo for lc, lo in zip(learners_c, res.learners)) res.learners[:] = learners return res test_f = partial(replace_learners, test_f) self.__submit(test_f) def __submit(self, testfunc): # type: (Callable[[Callable[[float], None]], Results]) -> None """ Submit a testing function for evaluation MUST not be called if an evaluation is already pending/running. Cancel the existing task first. Parameters ---------- testfunc : Callable[[Callable[float]], Results]) Must be a callable taking a single `callback` argument and returning a Results instance """ assert self.__state != State.Running # Setup the task task = TaskState() def progress_callback(finished): if task.is_interruption_requested(): raise UserInterrupt() task.set_progress_value(100 * finished) testfunc = partial(testfunc, callback=progress_callback) task.start(self.__executor, testfunc) task.progress_changed.connect(self.setProgressValue) task.watcher.finished.connect(self.__task_complete) self.Outputs.evaluations_results.invalidate() self.Outputs.predictions.invalidate() self.progressBarInit() self.setStatusMessage("Running") self.__state = State.Running self.__task = task @Slot(object) def __task_complete(self, f: 'Future[Results]'): # handle a completed task assert self.thread() is QThread.currentThread() assert self.__task is not None and self.__task.future is f self.progressBarFinished() self.setStatusMessage("") assert f.done() self.__task = None self.__state = State.Done try: results = f.result() # type: Results learners = results.learners # type: List[Learner] except Exception as er: # pylint: disable=broad-except log.exception("testing error (in __task_complete):", exc_info=True) self.error("\n".join(traceback.format_exception_only(type(er), er))) return learner_key = { slot.learner: key for key, slot in self.learners.items() } assert all(learner in learner_key for learner in learners) # Update the results for individual learners class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(self.scorers) result = Try.Fail(ex) else: stats = [ Try(scorer_caller(scorer, result)) for scorer in self.scorers ] result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.score_table.update_header(self.scorers) self.update_stats_model() self.update_comparison_table() self.commit() def cancel(self): """ Cancel the current/pending evaluation (if any). """ if self.__task is not None: assert self.__state == State.Running self.__state = State.Cancelled task, self.__task = self.__task, None task.cancel() task.progress_changed.disconnect(self.setProgressValue) task.watcher.finished.disconnect(self.__task_complete) self.progressBarFinished() self.setStatusMessage("") def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=False) super().onDeleteWidget() def copy_to_clipboard(self): self.score_table.copy_selection_to_clipboard()
class OWDatabasesUpdate(OWWidget): name = "Databases Update" description = "Update local systems biology databases." icon = "../widgets/icons/OWDatabasesUpdate.svg" priority = 1 inputs = [] outputs = [] want_main_area = False def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels( ["", "Data Source", "Update", "Last Updated", "Size"]) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(1, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.SearchUpdate) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.UpdateAll, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.DownloadFiltered, tooltip="Download all filtered files shown" ) self.cancelButton = gui.button( box, self, "Cancel", callback=self.Cancel, tooltip="Cancel scheduled downloads/updates." ) self.retryButton = gui.button( box, self, "Reconnect", callback=self.RetrieveFilesList ) self.retryButton.hide() gui.rubber(box) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.updateItems = [] self.resize(800, 600) self.progress = ProgressState(self, maximum=3) self.progress.valueChanged.connect(self._updateProgress) self.progress.rangeChanged.connect(self._updateProgress) self.executor = ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) task = Task(self, function=self.RetrieveFilesList) task.exceptionReady.connect(self.HandleError) task.start() self._tasks = [] self._haveProgress = False def RetrieveFilesList(self): self.retryButton.hide() self.warning(0) self.progress.setRange(0, 3) task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance"))) task.resultReady.connect(self.SetFilesList) task.exceptionReady.connect(self.HandleError) self.executor.submit(task) self.setEnabled(False) def SetFilesList(self, serverInfo): """ Set the files to show. """ self.setEnabled(True) localInfo = serverfiles.allinfo() all_tags = set() self.filesView.clear() self.updateItems = [] for item in join_info_dict(localInfo, serverInfo): tree_item = UpdateTreeWidgetItem(item) options_widget = UpdateOptionsWidget(item.state) options_widget.item = item options_widget.installClicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) options_widget.removeClicked.connect( partial(self.SubmitRemoveTask, item.domain, item.filename) ) self.updateItems.append((item, tree_item, options_widget)) all_tags.update(item.tags) self.filesView.addTopLevelItems( [tree_item for _, tree_item, _ in self.updateItems] ) for item, tree_item, options_widget in self.updateItems: self.filesView.setItemWidget(tree_item, 0, options_widget) # Add an update button if the file is updateable if item.state == OUTDATED: button = QToolButton( None, text="Update", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) self.progress.advance() self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0)) for column in range(1, 4): contents_hint = self.filesView.sizeHintForColumn(column) header_hint = self.filesView.header().sectionSizeHint(column) width = max(min(contents_hint, 400), header_hint) self.filesView.setColumnWidth(column, width) hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")] self.completer.setTokenList(hints) self.SearchUpdate() self.UpdateInfoLabel() self.toggleButtons() self.cancelButton.setEnabled(False) self.progress.setRange(0, 0) def buttonCheck(self, selected_items, state, button): for item in selected_items: if item.state != state: button.setEnabled(False) else: button.setEnabled(True) break def toggleButtons(self): selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] self.buttonCheck(selected_items, OUTDATED, self.updateButton) self.buttonCheck(selected_items, AVAILABLE, self.downloadButton) def HandleError(self, exception): if isinstance(exception, ConnectionError): self.warning(0, "Could not connect to server! Check your connection " "and try to reconnect.") self.SetFilesList({}) self.retryButton.show() else: sys.excepthook(type(exception), exception, None) self.progress.setRange(0, 0) self.setEnabled(True) def UpdateInfoLabel(self): local = [item for item, tree_item, _ in self.updateItems if item.state != AVAILABLE and not tree_item.isHidden()] size = sum(float(item.size) for item in local) onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] sizeOnServer = sum(float(item.size) for item in onServer) text = ("%i items, %s (on server: %i items, %s)" % (len(local), serverfiles.sizeformat(size), len(onServer), serverfiles.sizeformat(sizeOnServer))) self.infoLabel.setText(text) def UpdateAll(self): self.warning(0) for item, tree_item, _ in self.updateItems: if item.state == OUTDATED and not tree_item.isHidden(): self.SubmitDownloadTask(item.domain, item.filename) def DownloadFiltered(self): # TODO: submit items in the order shown. for item, tree_item, _ in self.updateItems: if not tree_item.isHidden() and item.state in \ [AVAILABLE, OUTDATED]: self.SubmitDownloadTask(item.domain, item.filename) def SearchUpdate(self, searchString=None): strings = str(self.lineEditFilter.text()).split() for item, tree_item, _ in self.updateItems: hide = not all(UpdateItem_match(item, string) for string in strings) tree_item.setHidden(hide) self.UpdateInfoLabel() self.toggleButtons() def SubmitDownloadTask(self, domain, filename): """ Submit the (domain, filename) to be downloaded/updated. """ self.cancelButton.setEnabled(True) index = self.updateItemIndex(domain, filename) _, tree_item, opt_widget = self.updateItems[index] task = DownloadTask(domain, filename, serverfiles.LOCALFILES) self.progress.adjustRange(0, 100) pb = ItemProgressBar(self.filesView) pb.setRange(0, 100) pb.setTextVisible(False) task.advanced.connect(pb.advance) task.advanced.connect(self.progress.advance) task.finished.connect(pb.hide) task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection) task.exception.connect(self.onDownloadError, Qt.QueuedConnection) self.filesView.setItemWidget(tree_item, 2, pb) # Clear the text so it does not show behind the progress bar. tree_item.setData(2, Qt.DisplayRole, "") pb.show() # Disable the options widget opt_widget.setEnabled(False) self._tasks.append(task) self.executor.submit(task) def EndDownloadTask(self, task): future = task.future() index = self.updateItemIndex(task.domain, task.filename) item, tree_item, opt_widget = self.updateItems[index] self.filesView.removeItemWidget(tree_item, 2) opt_widget.setEnabled(True) if future.cancelled(): # Restore the previous state tree_item.setUpdateItem(item) opt_widget.setState(item.state) elif future.exception(): tree_item.setUpdateItem(item) opt_widget.setState(item.state) # Show the exception string in the size column. self.warning(0, "Error while downloading. Check your connection " "and retry.") # recreate button for download button = QToolButton( None, text="Retry", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) else: # get the new updated info dict and replace the the old item self.warning(0) info = serverfiles.info(item.domain, item.filename) new_item = update_item_from_info(item.domain, item.filename, info, info) self.updateItems[index] = (new_item, tree_item, opt_widget) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.UpdateInfoLabel() def SubmitRemoveTask(self, domain, filename): serverfiles.LOCALFILES.remove(domain, filename) index = self.updateItemIndex(domain, filename) item, tree_item, opt_widget = self.updateItems[index] if item.info_server: new_item = item._replace(state=AVAILABLE, local=None, info_local=None) else: new_item = item._replace(local=None, info_local=None) # Disable the options widget. No more actions can be performed # for the item. opt_widget.setEnabled(False) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.updateItems[index] = (new_item, tree_item, opt_widget) self.UpdateInfoLabel() def Cancel(self): """ Cancel all pending update/download tasks (that have not yet started). """ for task in self._tasks: task.future().cancel() def onDeleteWidget(self): self.Cancel() self.executor.shutdown(wait=False) OWWidget.onDeleteWidget(self) def onDownloadFinished(self): # on download completed/canceled/error assert QThread.currentThread() is self.thread() for task in list(self._tasks): future = task.future() if future.done(): self.EndDownloadTask(task) self._tasks.remove(task) if not self._tasks: # Clear/reset the overall progress self.progress.setRange(0, 0) self.cancelButton.setEnabled(False) def onDownloadError(self, exc_info): sys.excepthook(*exc_info) self.warning(0, "Error while downloading. Check your connection and " "retry.") def updateItemIndex(self, domain, filename): for i, (item, _, _) in enumerate(self.updateItems): if item.domain == domain and item.filename == filename: return i raise ValueError("%r, %r not in update list" % (domain, filename)) def _updateProgress(self, *args): rmin, rmax = self.progress.range() if rmin != rmax: if not self._haveProgress: self._haveProgress = True self.progressBarInit() self.progressBarSet(self.progress.ratioCompleted() * 100, processEvents=None) if rmin == rmax: self._haveProgress = False self.progressBarFinished()
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/GeneInfo.svg" priority = 2010 inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data Subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__(self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n" ) self.organisms = None self.organismBox = gui.widgetBox( self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox( self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox( box, self, "gene_attr", "Gene atttibute", callback=self.updateInfoItems ) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox( self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView( self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial( taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()) ) ) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self.organisms = sorted( set([name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo")] + gene.NCBIGeneInfo.common_taxids()) ) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms] ) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc.args, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable))] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint( self.data, "genesinrows", self.useAttr) self.openContext(data) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task( function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ())) ) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200) ) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], ["NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature"], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Data Subset", None) def commit(self): if self.data is None: self.send("Data Subset", None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection)) ) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [attr for attr in self.data.domain.attributes if isselected(attr.name)] domain = Orange.data.Domain( attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.send("Data Subset", newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [attr.str_val(v) for v in self.data.get_column_view(attr)[0]] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # Add a gene info columns to the output headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole)) for i in range(model.columnCount())] metas = [Orange.data.StringVariable(name) for name in headers] domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(metas)) newdata = self.data.from_table(domain, self.data)[indices] model_rows = [gene2row[gene] for _, gene in gene_col] for col, meta in zip(range(model.columnCount()), metas): col_data = [str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows] newdata[:, meta] = col_data if not len(newdata): newdata = None self.send("Data Subset", newdata) else: self.send("Data Subset", None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information( self, "No gene ids selected", "Please select some genes and try again." ) return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWImportDocuments(widget.OWWidget): name = "Import Documents" description = "Import text documents from folders." icon = "icons/ImportDocuments.svg" priority = 110 class Outputs: data = Output("Corpus", Corpus) skipped_documents = Output("Skipped documents", Table) LOCAL_FILE, URL = range(2) source = settings.Setting(LOCAL_FILE) #: list of recent paths recent_paths: List[RecentPath] = settings.Setting([]) currentPath: Optional[str] = settings.Setting(None) recent_urls: List[str] = settings.Setting([]) want_main_area = False resizing_enabled = False Modality = Qt.ApplicationModal MaxRecentItems = 20 class Warning(widget.OWWidget.Warning): read_error = widget.Msg("{} couldn't be read.") def __init__(self): super().__init__() #: widget's runtime state self.__state = State.NoState self.corpus = None self.n_text_categories = 0 self.n_text_data = 0 self.skipped_documents = [] self.__invalidated = False self.__pendingTask = None layout = QGridLayout() layout.setSpacing(4) gui.widgetBox(self.controlArea, orientation=layout, box='Source') source_box = gui.radioButtons(None, self, "source", box=True, callback=self.start, addToLayout=False) rb_button = gui.appendRadioButton(source_box, "Folder:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) self.recent_cb = QComboBox( sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, minimumContentsLength=16, acceptDrops=True ) self.recent_cb.installEventFilter(self) self.recent_cb.activated[int].connect(self.__onRecentActivated) browseaction = QAction( "Open/Load Documents", self, iconText="\N{HORIZONTAL ELLIPSIS}", icon=self.style().standardIcon(QStyle.SP_DirOpenIcon), toolTip="Select a folder from which to load the documents" ) browseaction.triggered.connect(self.__runOpenDialog) reloadaction = QAction( "Reload", self, icon=self.style().standardIcon(QStyle.SP_BrowserReload), toolTip="Reload current document set" ) reloadaction.triggered.connect(self.reload) self.__actions = namespace( browse=browseaction, reload=reloadaction, ) browsebutton = QPushButton( browseaction.iconText(), icon=browseaction.icon(), toolTip=browseaction.toolTip(), clicked=browseaction.trigger, default=False, autoDefault=False, ) reloadbutton = QPushButton( reloadaction.iconText(), icon=reloadaction.icon(), clicked=reloadaction.trigger, default=False, autoDefault=False, ) box.layout().addWidget(self.recent_cb) layout.addWidget(box, 0, 1) layout.addWidget(browsebutton, 0, 2) layout.addWidget(reloadbutton, 0, 3) rb_button = gui.appendRadioButton(source_box, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = PyListModel() url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 1, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) self.addActions([browseaction, reloadaction]) reloadaction.changed.connect( lambda: reloadbutton.setEnabled(reloadaction.isEnabled()) ) box = gui.vBox(self.controlArea, "Info") self.infostack = QStackedWidget() self.info_area = QLabel( text="No document set selected", wordWrap=True ) self.progress_widget = QProgressBar( minimum=0, maximum=100 ) self.cancel_button = QPushButton( "Cancel", icon=self.style().standardIcon(QStyle.SP_DialogCancelButton), default=False, autoDefault=False, ) self.cancel_button.clicked.connect(self.cancel) w = QWidget() vlayout = QVBoxLayout() vlayout.setContentsMargins(0, 0, 0, 0) hlayout = QHBoxLayout() hlayout.setContentsMargins(0, 0, 0, 0) hlayout.addWidget(self.progress_widget) hlayout.addWidget(self.cancel_button) vlayout.addLayout(hlayout) self.pathlabel = TextLabel() self.pathlabel.setTextElideMode(Qt.ElideMiddle) self.pathlabel.setAttribute(Qt.WA_MacSmallSize) vlayout.addWidget(self.pathlabel) w.setLayout(vlayout) self.infostack.addWidget(self.info_area) self.infostack.addWidget(w) box.layout().addWidget(self.infostack) self.__initRecentItemsModel() self.__invalidated = True self.__executor = ThreadExecutor(self) QApplication.postEvent(self, QEvent(RuntimeEvent.Init)) def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = "http://" + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.start() def __initRecentItemsModel(self): if self.currentPath is not None and \ not os.path.isdir(self.currentPath): self.currentPath = None recent_paths = [] for item in self.recent_paths: if os.path.isdir(item.abspath): recent_paths.append(item) recent_paths = recent_paths[:OWImportDocuments.MaxRecentItems] recent_model = self.recent_cb.model() for pathitem in recent_paths: item = RecentPath_asqstandarditem(pathitem) recent_model.appendRow(item) self.recent_paths = recent_paths if self.currentPath is not None and \ os.path.isdir(self.currentPath) and self.recent_paths and \ os.path.samefile(self.currentPath, self.recent_paths[0].abspath): self.recent_cb.setCurrentIndex(0) else: self.currentPath = None self.recent_cb.setCurrentIndex(-1) self.__actions.reload.setEnabled(self.currentPath is not None) def customEvent(self, event): """Reimplemented.""" if event.type() == RuntimeEvent.Init: if self.__invalidated: try: self.start() finally: self.__invalidated = False super().customEvent(event) def __runOpenDialog(self): startdir = os.path.expanduser("~/") if self.recent_paths: startdir = os.path.dirname(self.recent_paths[0].abspath) caption = "Select Top Level Folder" if OWImportDocuments.Modality == Qt.WindowModal: dlg = QFileDialog( self, caption, startdir, acceptMode=QFileDialog.AcceptOpen, modal=True, ) dlg.setFileMode(QFileDialog.Directory) dlg.setOption(QFileDialog.ShowDirsOnly) dlg.setDirectory(startdir) dlg.setAttribute(Qt.WA_DeleteOnClose) @dlg.accepted.connect def on_accepted(): dirpath = dlg.selectedFiles() if dirpath: self.setCurrentPath(dirpath[0]) self.start() dlg.open() else: dirpath = QFileDialog.getExistingDirectory( self, caption, startdir ) if dirpath: self.setCurrentPath(dirpath) self.start() def __onRecentActivated(self, index): item = self.recent_cb.itemData(index) if item is None: return assert isinstance(item, RecentPath) self.setCurrentPath(item.abspath) self.start() def __updateInfo(self): if self.__state == State.NoState: text = "No document set selected" elif self.__state == State.Processing: text = "Processing" elif self.__state == State.Done: nvalid = self.n_text_data ncategories = self.n_text_categories n_skipped = len(self.skipped_documents) if ncategories < 2: text = "{} document{}".format(nvalid, "s" if nvalid != 1 else "") else: text = "{} documents / {} categories".format(nvalid, ncategories) if n_skipped > 0: text = text + ", {} skipped".format(n_skipped) elif self.__state == State.Cancelled: text = "Cancelled" elif self.__state == State.Error: text = "Error state" else: assert False self.info_area.setText(text) if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) def setCurrentPath(self, path): """ Set the current root text path to path If the path does not exists or is not a directory the current path is left unchanged Parameters ---------- path : str New root import path. Returns ------- status : bool True if the current root import path was successfully changed to path. """ if self.currentPath is not None and path is not None and \ os.path.isdir(self.currentPath) and os.path.isdir(path) and \ os.path.samefile(self.currentPath, path) and \ self.source == self.LOCAL_FILE: return True success = True error = None if path is not None: if not os.path.exists(path): error = "'{}' does not exist".format(path) path = None success = False elif not os.path.isdir(path): error = "'{}' is not a folder".format(path) path = None success = False if error is not None: self.error(error) warnings.warn(error, UserWarning, stacklevel=3) else: self.error() if path is not None: newindex = self.addRecentPath(path) self.recent_cb.setCurrentIndex(newindex) if newindex >= 0: self.currentPath = path else: self.currentPath = None else: self.currentPath = None self.__actions.reload.setEnabled(self.currentPath is not None) if self.__state == State.Processing: self.cancel() self.source = self.LOCAL_FILE return success def addRecentPath(self, path): """ Prepend a path entry to the list of recent paths If an entry with the same path already exists in the recent path list it is moved to the first place Parameters ---------- path : str """ existing = None for pathitem in self.recent_paths: try: if os.path.samefile(pathitem.abspath, path): existing = pathitem break except FileNotFoundError: # file not found if the `pathitem.abspath` no longer exists pass model = self.recent_cb.model() if existing is not None: selected_index = self.recent_paths.index(existing) assert model.item(selected_index).data(Qt.UserRole) is existing self.recent_paths.remove(existing) row = model.takeRow(selected_index) self.recent_paths.insert(0, existing) model.insertRow(0, row) else: item = RecentPath(path, None, None) self.recent_paths.insert(0, item) model.insertRow(0, RecentPath_asqstandarditem(item)) return 0 def __setRuntimeState(self, state): assert state in State self.setBlocking(state == State.Processing) message = "" if state == State.Processing: assert self.__state in [State.Done, State.NoState, State.Error, State.Cancelled] message = "Processing" elif state == State.Done: assert self.__state == State.Processing elif state == State.Cancelled: assert self.__state == State.Processing message = "Cancelled" elif state == State.Error: message = "Error during processing" elif state == State.NoState: message = "" else: assert False self.__state = state if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) self.setStatusMessage(message) self.__updateInfo() def reload(self): """ Restart the text scan task """ if self.__state == State.Processing: self.cancel() self.source = self.LOCAL_FILE self.corpus = None self.start() def start(self): """ Start/execute the text indexing operation """ self.error() self.Warning.clear() self.progress_widget.setValue(0) self.__invalidated = False startdir = self.currentPath if self.source == self.LOCAL_FILE \ else self.url_combo.currentText().strip() if not startdir: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')" .format(self.__pendingTask.startdir)) self.cancel() self.__setRuntimeState(State.Processing) report_progress = methodinvoke( self, "__onReportProgress", (object,)) task = ImportDocuments(startdir, self.source == self.URL, report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=0) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_text_scan_task_interupt(): try: return task.run() except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_text_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished) @Slot() def __onRunFinished(self): assert QThread.currentThread() is self.thread() assert self.__state == State.Processing assert self.__pendingTask is not None assert self.sender() is self.__pendingTask.watcher assert self.__pendingTask.future.done() task = self.__pendingTask self.__pendingTask = None corpus, errors = None, [] try: corpus, errors = task.future.result() except NoDocumentsException: state = State.Error self.error("Folder contains no readable files.") except Exception: sys.excepthook(*sys.exc_info()) state = State.Error self.error(traceback.format_exc()) else: state = State.Done self.error() if corpus: self.n_text_data = len(corpus) self.n_text_categories = len(corpus.domain.class_var.values)\ if corpus.domain.class_var else 0 self.corpus = corpus if self.corpus: self.corpus.name = "Documents" self.skipped_documents = errors if len(errors): self.Warning.read_error( "Some files" if len(errors) > 1 else "One file" ) self.__setRuntimeState(state) self.commit() def cancel(self): """ Cancel current pending task (if any). """ if self.__state == State.Processing: assert self.__pendingTask is not None self.__pendingTask.cancel() self.__pendingTask = None self.__setRuntimeState(State.Cancelled) @Slot(object) def __onReportProgress(self, arg): # report on scan progress from a worker thread # arg must be a namespace(count: int, lastpath: str) assert QThread.currentThread() is self.thread() if self.__state == State.Processing: self.pathlabel.setText(prettifypath(arg.lastpath)) self.progress_widget.setValue(int(100 * arg.progress)) def commit(self): """ Create and commit a Corpus from the collected text meta data. """ self.Outputs.data.send(self.corpus) if self.skipped_documents: skipped_table = ( Table.from_list( SKIPPED_DOMAIN, [[x, os.path.join(self.currentPath, x)] for x in self.skipped_documents] ) ) skipped_table.name = "Skipped documents" else: skipped_table = None self.Outputs.skipped_documents.send(skipped_table) def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=True) self.__invalidated = False def eventFilter(self, receiver, event): # re-implemented from QWidget # intercept and process drag drop events on the recent directory # selection combo box def dirpath(event): # type: (QDropEvent) -> Optional[str] """Return the directory from a QDropEvent.""" data = event.mimeData() urls = data.urls() if len(urls) == 1: url = urls[0] path = url.toLocalFile() if os.path.isdir(path): return path return None if receiver is self.recent_cb and \ event.type() in {QEvent.DragEnter, QEvent.DragMove, QEvent.Drop}: assert isinstance(event, QDropEvent) path = dirpath(event) if path is not None and event.possibleActions() & Qt.LinkAction: event.setDropAction(Qt.LinkAction) event.accept() if event.type() == QEvent.Drop: self.setCurrentPath(path) self.start() else: event.ignore() return True return super().eventFilter(receiver, event) def send_report(self): if not self.currentPath: return items = [('Path', self.currentPath), ('Number of documents', self.n_text_data)] if self.n_text_categories: items += [('Categories', self.n_text_categories)] if self.skipped_documents: items += [('Number of skipped', len(self.skipped_documents))] self.report_items(items, )