def main(argv): app = QApplication(argv) mw = QMainWindow() dock = CollapsibleDockWidget() w1 = QTreeView() w1.header().hide() w2 = QToolButton() w2.setFixedSize(38, 200) dock.setExpandedWidget(w1) dock.setCollapsedWidget(w2) mw.addDockWidget(Qt.LeftDockWidgetArea, dock) mw.setCentralWidget(QTextEdit()) mw.show() a = QAction("Expand", mw, checkable=True, shortcut=QKeySequence(Qt.ControlModifier | Qt.Key_D)) a.triggered[bool].connect(dock.setExpanded) mw.addAction(a) return app.exec()
class OWGeneSetEnrichment(_OWGeneSets.OWGeneSets): name = "Gene Set Enrichment" description = "" icon = "icons/OWGeneSets.svg" priority = 9 max_p_value = Setting(0.0001) use_p_value = Setting(False) max_fdr = Setting(0.01) use_max_fdr = Setting(True) use_reference_data = Setting(True, schema_only=True) COUNT, REFERENCE, P_VAL, FDR, ENRICHMENT, GENES, CATEGORY, TERM = range(8) DATA_HEADER_LABELS = [ "Count", 'Reference', 'p-Value', 'FDR', 'Enrichment', 'Genes In Set', 'Category', 'Term' ] class Inputs(_OWGeneSets.OWGeneSets.Inputs): reference = Input("Reference Genes", Table) class Outputs: matched_genes = Output("Matched Genes", Table) def __init__(self): # reference data attributes self.reference_data = None self.reference_genes = None self.reference_tax_id = None self.reference_attr_names = None self.reference_gene_id_attribute = None self.reference_gene_id_column = None self.reference_radio_box = None super().__init__() @Inputs.reference def handle_reference_genes(self, data): """ Set the (optional) input dataset with reference gene names. """ if data: self.reference_data = data self.reference_tax_id = str( self.reference_data.attributes.get(TAX_ID, None)) self.reference_attr_names = self.reference_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.reference_gene_id_attribute = self.reference_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.reference_gene_id_column = self.reference_data.attributes.get( GENE_ID_COLUMN, None) if not (self.reference_attr_names is not None and ((self.reference_gene_id_attribute is None) ^ (self.reference_gene_id_column is None))): if self.reference_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.reference_tax_id is None: self.Error.missing_tax_id() return self.__get_reference_genes() self.reference_radio_box.setEnabled(bool(self.reference_data)) self.invalidate() def __get_source_data(self, proxy_row_index, column): proxy_index = self.filter_proxy_model.index(proxy_row_index, column) source_index = self.filter_proxy_model.mapToSource(proxy_index) return source_index.data(role=Qt.DisplayRole) def _update_fdr(self): # Update the FDR in place due to a changed selected categories set and # results for all of these categories are already available. proxy = self.filter_proxy_model model = self.filter_proxy_model.sourceModel() if model is not None: assert isinstance(model, QStandardItemModel) p_values = [(i, self.__get_source_data(i, self.P_VAL)) for i in range(proxy.rowCount())] fdr_values = FDR([p_val for _, p_val in p_values]) for i, fdr_val in zip([i for i, _ in p_values], fdr_values): proxy_index = proxy.index(i, self.FDR) source_index = self.filter_proxy_model.mapToSource(proxy_index) source_item = model.item(source_index.row(), self.FDR) source_item.setData(fdr_val, role=Qt.DisplayRole) source_item.setData(fdr_val, role=Qt.ToolTipRole) def __get_reference_genes(self): self.reference_genes = [] if self.reference_attr_names: for variable in self.reference_data.domain.attributes: self.reference_genes.append( str( variable.attributes.get( self.reference_gene_id_attribute, '?'))) else: genes, _ = self.reference_data.get_column_view( self.reference_gene_id_column) self.reference_genes = [str(g) for g in genes] def create_filters(self): search_term = self.search_pattern.lower().strip().split() # apply filtering rules filters = [ FilterProxyModel.Filter( self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term)) ] if self.use_min_count: filters.append( FilterProxyModel.Filter(self.COUNT, Qt.DisplayRole, lambda value: value >= self.min_count)) if self.use_p_value: filters.append( FilterProxyModel.Filter( self.P_VAL, Qt.DisplayRole, lambda value: value < self.max_p_value)) if self.use_max_fdr: filters.append( FilterProxyModel.Filter(self.FDR, Qt.DisplayRole, lambda value: value < self.max_fdr)) return filters def create_partial(self): reference_genes = (self.reference_genes if (self.use_reference_data and self.reference_data) else self.gene_info.keys()) return partial( self.set_items, self.gs_widget.gs_object, self.stored_gene_sets_selection, set(self.input_genes), self.callback, reference_genes=reference_genes, ) @staticmethod def set_items(gene_sets, sets_to_display, genes, callback, reference_genes=None): model_items = [] if not genes: return for gene_set in sorted(gene_sets): if gene_set.hierarchy not in sets_to_display: continue reference_genes = [] if reference_genes is None else reference_genes enrichemnt_result = gene_set.set_enrichment( reference_genes, genes.intersection(reference_genes)) callback() if len(enrichemnt_result.query) > 0: category_column = QStandardItem() name_column = QStandardItem() count_column = QStandardItem() genes_column = QStandardItem() ref_column = QStandardItem() pval_column = QStandardItem() fdr_column = QStandardItem() enrichment_column = QStandardItem() category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole) name_column.setData(gene_set.name, Qt.DisplayRole) name_column.setData(gene_set.name, Qt.ToolTipRole) name_column.setData(gene_set.link, LinkRole) name_column.setForeground(QColor(Qt.blue)) count_column.setData(len(enrichemnt_result.query), Qt.DisplayRole) count_column.setData(set(enrichemnt_result.query), Qt.UserRole) genes_column.setData(len(gene_set.genes), Qt.DisplayRole) genes_column.setData( set(gene_set.genes), Qt.UserRole ) # store genes to get then on output on selection ref_column.setData(len(enrichemnt_result.reference), Qt.DisplayRole) pval_column.setData(enrichemnt_result.p_value, Qt.DisplayRole) pval_column.setData(enrichemnt_result.p_value, Qt.ToolTipRole) enrichment_column.setData(enrichemnt_result.enrichment_score, Qt.DisplayRole) enrichment_column.setData(enrichemnt_result.enrichment_score, Qt.ToolTipRole) model_items.append([ count_column, ref_column, pval_column, fdr_column, enrichment_column, genes_column, category_column, name_column, ]) return model_items # We must extend this, because we need to update FDR values after workers finish enrichment @Slot(concurrent.futures.Future) def _init_gene_sets_finished(self, f): assert self.thread() is QThread.currentThread() assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None self.progress_bar.finish() self.setStatusMessage('') try: results = f.result() # type: list [self.data_model.appendRow(model_item) for model_item in results] self.filter_proxy_model.setSourceModel(self.data_model) self.data_view.selectionModel().selectionChanged.connect( self.commit) self._update_fdr() self.filter_data_view() self.set_selection() self.update_info_box() except Exception as ex: print(ex) def assign_delegates(self): self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.REFERENCE, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn( self.P_VAL, NumericalColumnDelegate(self, precision=2, notation='e')) self.data_view.setItemDelegateForColumn( self.FDR, NumericalColumnDelegate(self, precision=2, notation='e')) self.data_view.setItemDelegateForColumn( self.ENRICHMENT, NumericalColumnDelegate(self, precision=1)) def setup_control_area(self): # Control area self.input_info = widgetLabel( widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') self.custom_gs_col_box = box = vBox(self.controlArea, 'Custom Gene Set Term Column') box.hide() self.reference_radio_box = radioButtonsInBox( self.controlArea, self, "use_reference_data", ["Entire genome", "Reference gene set (input)"], tooltips=[ "Use entire genome (for gene set enrichment)", "Use reference set of genes" ], box="Reference", callback=self.invalidate, ) self.reference_radio_box.setEnabled(False) gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect( self.update_tree_view) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) def setup_filter_area(self): h_layout = QHBoxLayout() h_layout.setSpacing(100) h_widget = widgetBox(self.mainArea, orientation=h_layout) spin( h_widget, self, 'min_count', 0, 100, label='Count', tooltip='Minimum genes count', checked='use_min_count', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view, ) doubleSpin( h_widget, self, 'max_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_p_value', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view, ) doubleSpin( h_widget, self, 'max_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_max_fdr', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view, ) self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern') self.line_edit_filter.setPlaceholderText('Filter gene sets ...') self.line_edit_filter.textChanged.connect(self.filter_data_view) def setup_gui(self): # control area self.setup_control_area() # main area self.data_view = QTreeView() self.setup_filter_model() self.setup_filter_area() self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(False) self.data_view.setItemDelegateForColumn( self.TERM, LinkStyledItemDelegate(self.data_view)) self.mainArea.layout().addWidget(self.data_view) self.data_view.header().setSectionResizeMode( QHeaderView.ResizeToContents) self.assign_delegates()
class OWDataSets(widget.OWWidget): name = "Data Sets" description = "Load a data set from an online repository" icon = "icons/DataSets.svg" priority = 20 replaces = ["orangecontrib.prototypes.widgets.owdatasets.OWDataSets"] # The following constants can be overridden in a subclass # to reuse this widget for a different repository # Take care when refactoring! (used in e.g. single-cell) INDEX_URL = "http://datasets.orange.biolab.si/" DATASET_DIR = "datasets" class Error(widget.OWWidget.Error): no_remote_datasets = Msg("Could not fetch data set list") class Warning(widget.OWWidget.Warning): only_local_datasets = Msg("Could not fetch data sets list, only local " "cached data sets are shown") class Outputs: data = Output("Data", Orange.data.Table) #: Selected data set id selected_id = settings.Setting(None) # type: Optional[str] auto_commit = settings.Setting(False) # type: bool #: main area splitter state splitter_state = settings.Setting(b'') # type: bytes header_state = settings.Setting(b'') # type: bytes def __init__(self): super().__init__() self.local_cache_path = os.path.join(data_dir(), self.DATASET_DIR) self.__awaiting_state = None # type: Optional[_FetchState] box = gui.widgetBox(self.controlArea, "Info") self.infolabel = QLabel(text="Initializing...\n\n") box.layout().addWidget(self.infolabel) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.mainArea.layout().addWidget(self.filterLineEdit) self.splitter = QSplitter(orientation=Qt.Vertical) self.view = QTreeView( sortingEnabled=True, selectionMode=QTreeView.SingleSelection, alternatingRowColors=True, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) box = gui.widgetBox(self.splitter, "Description", addToLayout=False) self.descriptionlabel = QLabel( wordWrap=True, textFormat=Qt.RichText, ) self.descriptionlabel = QTextBrowser( openExternalLinks=True, textInteractionFlags=(Qt.TextSelectableByMouse | Qt.LinksAccessibleByMouse) ) self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame) # no (white) text background self.descriptionlabel.viewport().setAutoFillBackground(False) box.layout().addWidget(self.descriptionlabel) self.splitter.addWidget(self.view) self.splitter.addWidget(box) self.splitter.setSizes([300, 200]) self.splitter.splitterMoved.connect( lambda: setattr(self, "splitter_state", bytes(self.splitter.saveState())) ) self.mainArea.layout().addWidget(self.splitter) self.controlArea.layout().addStretch(10) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Data") model = QStandardItemModel(self) model.setHorizontalHeaderLabels(HEADER) proxy = QSortFilterProxyModel() proxy.setSourceModel(model) proxy.setFilterKeyColumn(-1) proxy.setFilterCaseSensitivity(False) self.view.setModel(proxy) if self.splitter_state: self.splitter.restoreState(self.splitter_state) self.view.setItemDelegateForColumn( Header.Size, SizeDelegate(self)) self.view.setItemDelegateForColumn( Header.Local, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.view.setItemDelegateForColumn( Header.Instances, NumericalDelegate(self)) self.view.setItemDelegateForColumn( Header.Variables, NumericalDelegate(self)) self.view.resizeColumnToContents(Header.Local) if self.header_state: self.view.header().restoreState(self.header_state) self.setBlocking(True) self.setStatusMessage("Initializing") self._executor = ThreadPoolExecutor(max_workers=1) f = self._executor.submit(self.list_remote) w = FutureWatcher(f, parent=self) w.done.connect(self.__set_index) @Slot(object) def __set_index(self, f): # type: (Future) -> None # set results from `list_remote` query. assert QThread.currentThread() is self.thread() assert f.done() self.setBlocking(False) self.setStatusMessage("") allinfolocal = self.list_local() try: res = f.result() except Exception: log.exception("Error while fetching updated index") if not allinfolocal: self.Error.no_remote_datasets() else: self.Warning.only_local_datasets() res = {} allinforemote = res # type: Dict[Tuple[str, str], dict] allkeys = set(allinfolocal) if allinforemote is not None: allkeys = allkeys | set(allinforemote) allkeys = sorted(allkeys) def info(file_path): if file_path in allinforemote: info = allinforemote[file_path] else: info = allinfolocal[file_path] islocal = file_path in allinfolocal isremote = file_path in allinforemote outdated = islocal and isremote and ( allinforemote[file_path].get('version', '') != allinfolocal[file_path].get('version', '')) islocal &= not outdated prefix = os.path.join('', *file_path[:-1]) filename = file_path[-1] return namespace( prefix=prefix, filename=filename, title=info.get("title", filename), datetime=info.get("datetime", None), description=info.get("description", None), references=info.get("references", []), seealso=info.get("seealso", []), source=info.get("source", None), year=info.get("year", None), instances=info.get("instances", None), variables=info.get("variables", None), target=info.get("target", None), missing=info.get("missing", None), tags=info.get("tags", []), size=info.get("size", None), islocal=islocal, outdated=outdated ) model = QStandardItemModel(self) model.setHorizontalHeaderLabels(HEADER) current_index = -1 for i, file_path in enumerate(allkeys): datainfo = info(file_path) item1 = QStandardItem() item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole) item1.setData(datainfo, Qt.UserRole) item2 = QStandardItem(datainfo.title) item3 = QStandardItem() item3.setData(datainfo.size, Qt.DisplayRole) item4 = QStandardItem() item4.setData(datainfo.instances, Qt.DisplayRole) item5 = QStandardItem() item5.setData(datainfo.variables, Qt.DisplayRole) item6 = QStandardItem() item6.setData(datainfo.target, Qt.DisplayRole) if datainfo.target: item6.setIcon(variable_icon(datainfo.target)) item7 = QStandardItem() item7.setData(", ".join(datainfo.tags) if datainfo.tags else "", Qt.DisplayRole) row = [item1, item2, item3, item4, item5, item6, item7] model.appendRow(row) if os.path.join(*file_path) == self.selected_id: current_index = i hs = self.view.header().saveState() model_ = self.view.model().sourceModel() self.view.model().setSourceModel(model) self.view.header().restoreState(hs) model_.deleteLater() model_.setParent(None) self.view.selectionModel().selectionChanged.connect( self.__on_selection ) # Update the info text self.infolabel.setText(format_info(model.rowCount(), len(allinfolocal))) if current_index != -1: selmodel = self.view.selectionModel() selmodel.select( self.view.model().mapFromSource(model.index(current_index, 0)), QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows) def __update_cached_state(self): model = self.view.model().sourceModel() localinfo = self.list_local() assert isinstance(model, QStandardItemModel) allinfo = [] for i in range(model.rowCount()): item = model.item(i, 0) info = item.data(Qt.UserRole) info.islocal = (info.prefix, info.filename) in localinfo item.setData(" " if info.islocal else "", Qt.DisplayRole) allinfo.append(info) self.infolabel.setText(format_info( model.rowCount(), sum(info.islocal for info in allinfo))) def selected_dataset(self): """ Return the current selected data set info or None if not selected Returns ------- info : Optional[namespace] """ rows = self.view.selectionModel().selectedRows(0) assert 0 <= len(rows) <= 1 current = rows[0] if rows else None # type: Optional[QModelIndex] if current is not None: info = current.data(Qt.UserRole) assert isinstance(info, namespace) else: info = None return info def filter(self): filter_string = self.filterLineEdit.text().strip() proxyModel = self.view.model() if proxyModel: proxyModel.setFilterFixedString(filter_string) def __on_selection(self): # Main data sets view selection has changed rows = self.view.selectionModel().selectedRows(0) assert 0 <= len(rows) <= 1 current = rows[0] if rows else None # type: Optional[QModelIndex] if current is not None: current = self.view.model().mapToSource(current) di = current.data(Qt.UserRole) text = description_html(di) self.descriptionlabel.setText(text) self.selected_id = os.path.join(di.prefix, di.filename) else: self.descriptionlabel.setText("") self.selected_id = None self.commit() def commit(self): """ Commit a dataset to the output immediately (if available locally) or schedule download background and an eventual send. During the download the widget is in blocking state (OWWidget.isBlocking) """ di = self.selected_dataset() if di is not None: self.Error.clear() if self.__awaiting_state is not None: # disconnect from the __commit_complete self.__awaiting_state.watcher.done.disconnect( self.__commit_complete) # .. and connect to update_cached_state # self.__awaiting_state.watcher.done.connect( # self.__update_cached_state) # TODO: There are possible pending __progress_advance queued self.__awaiting_state.pb.advance.disconnect( self.__progress_advance) self.progressBarFinished(processEvents=None) self.__awaiting_state = None if not di.islocal: pr = progress() callback = lambda pr=pr: pr.advance.emit() pr.advance.connect(self.__progress_advance, Qt.QueuedConnection) self.progressBarInit(processEvents=None) self.setStatusMessage("Fetching...") self.setBlocking(True) f = self._executor.submit( ensure_local, self.INDEX_URL, di.prefix, di.filename, self.local_cache_path, force=di.outdated, progress_advance=callback) w = FutureWatcher(f, parent=self) w.done.connect(self.__commit_complete) self.__awaiting_state = _FetchState(f, w, pr) else: self.setStatusMessage("") self.setBlocking(False) self.commit_cached(di.prefix, di.filename) else: self.Outputs.data.send(None) @Slot(object) def __commit_complete(self, f): # complete the commit operation after the required file has been # downloaded assert QThread.currentThread() is self.thread() assert self.__awaiting_state is not None assert self.__awaiting_state.future is f if self.isBlocking(): self.progressBarFinished(processEvents=None) self.setBlocking(False) self.setStatusMessage("") self.__awaiting_state = None try: path = f.result() except Exception as ex: log.exception("Error:") self.error(format_exception(ex)) path = None self.__update_cached_state() if path is not None: data = Orange.data.Table(path) else: data = None self.Outputs.data.send(data) def commit_cached(self, prefix, filename): path = LocalFiles(self.local_cache_path).localpath(prefix, filename) self.Outputs.data.send(Orange.data.Table(path)) @Slot() def __progress_advance(self): assert QThread.currentThread() is self.thread() self.progressBarAdvance(1, processEvents=None) def onDeleteWidget(self): super().onDeleteWidget() if self.__awaiting_state is not None: self.__awaiting_state.watcher.done.disconnect(self.__commit_complete) self.__awaiting_state.pb.advance.disconnect(self.__progress_advance) self.__awaiting_state = None def sizeHint(self): return QSize(900, 600) def closeEvent(self, event): self.splitter_state = bytes(self.splitter.saveState()) self.header_state = bytes(self.view.header().saveState()) super().closeEvent(event) def list_remote(self): # type: () -> Dict[Tuple[str, str], dict] client = ServerFiles(server=self.INDEX_URL) return client.allinfo() def list_local(self): # type: () -> Dict[Tuple[str, str], dict] return LocalFiles(self.local_cache_path).allinfo()
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox(box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox(box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=[ "Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference" ], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit(self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter( contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")]) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor(parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [ var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable) ] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint(data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key, ) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get((full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy( taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [ key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked ] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name, ) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [ self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount()) ] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [ getattr(dialog, item[0]) for item in dialog.items ] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [ m for m, use in zip(matchers, self.geneMatcherSettings) if use ] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list( filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float, )) info = methodinvoke(self, "_setRunInfo", (str, )) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels([ "Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment" ]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item( ), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set( gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max( (e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score))) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView)) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.error(0, ) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def filterAnnotationsChartView(self, filterString=""): if self.__state & OWSetEnrichment.RunningEnrichment: return # TODO: Move filtering to a filter proxy model # TODO: Re-enable string search categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) # filterString = str(self.filterLineEdit.text()).lower() model = self.annotationsChartView.model() def ishidden(index): # Is item at index (row) hidden item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [ model.item(i, 4).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden ] fdrs = utils.stats.FDR(pvals) # update FDR for the selected collections and apply filtering rules itemsHidden = [] fdriter = iter(fdrs) for index, hidden in enumerate(hidemask): if not hidden: fdr = next(fdriter) pval = model.index(index, 4).data(Qt.UserRole) count = model.index(index, 2).data(Qt.ToolTipRole) hidden = (self.useMinCountFilter and count < self.minClusterCount) or \ (self.useMaxPValFilter and pval > self.maxPValue) or \ (self.useMaxFDRFilter and fdr > self.maxFDR) if not hidden: fdr_item = model.item(index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) self.annotationsChartView.setRowHidden(index, QModelIndex(), hidden) itemsHidden.append(hidden) if model.rowCount() and all(itemsHidden): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.annotationsChartView.model() rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [ attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped ] newdomain = Orange.data.Domain(mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [ i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped ] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWDataSets(OWWidget): name = "Datasets" description = "Load a dataset from an online repository" icon = "icons/DataSets.svg" priority = 20 replaces = ["orangecontrib.prototypes.widgets.owdatasets.OWDataSets"] keywords = ["online"] # The following constants can be overridden in a subclass # to reuse this widget for a different repository # Take care when refactoring! (used in e.g. single-cell) INDEX_URL = "https://datasets.biolab.si/" DATASET_DIR = "datasets" # override HEADER_SCHEMA to define new columns # if schema is changed override methods: self.assign_delegates and # self.create_model HEADER_SCHEMA = [ ['islocal', {'label': ''}], ['title', {'label': 'Title'}], ['size', {'label': 'Size'}], ['instances', {'label': 'Instances'}], ['variables', {'label': 'Variables'}], ['target', {'label': 'Target'}], ['tags', {'label': 'Tags'}] ] # type: List[str, dict] class Error(OWWidget.Error): no_remote_datasets = Msg("Could not fetch dataset list") class Warning(OWWidget.Warning): only_local_datasets = Msg("Could not fetch datasets list, only local " "cached datasets are shown") class Outputs: data = Output("Data", Orange.data.Table) #: Selected dataset id selected_id = settings.Setting(None) # type: Optional[str] auto_commit = settings.Setting(False) # type: bool #: main area splitter state splitter_state = settings.Setting(b'') # type: bytes header_state = settings.Setting(b'') # type: bytes def __init__(self): super().__init__() self.allinfo_local = {} self.allinfo_remote = {} self.local_cache_path = os.path.join(data_dir(), self.DATASET_DIR) self._header_labels = [ header['label'] for _, header in self.HEADER_SCHEMA] self._header_index = namedtuple( '_header_index', [info_tag for info_tag, _ in self.HEADER_SCHEMA]) self.Header = self._header_index( *[index for index, _ in enumerate(self._header_labels)]) self.__awaiting_state = None # type: Optional[_FetchState] box = gui.widgetBox(self.controlArea, "Info") self.infolabel = QLabel(text="Initializing...\n\n") box.layout().addWidget(self.infolabel) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.mainArea.layout().addWidget(self.filterLineEdit) self.splitter = QSplitter(orientation=Qt.Vertical) self.view = QTreeView( sortingEnabled=True, selectionMode=QTreeView.SingleSelection, alternatingRowColors=True, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, uniformRowHeights=True, ) # the method doesn't exists yet, pylint: disable=unnecessary-lambda self.view.doubleClicked.connect(lambda: self.unconditional_commit()) box = gui.widgetBox(self.splitter, "Description", addToLayout=False) self.descriptionlabel = QLabel( wordWrap=True, textFormat=Qt.RichText, ) self.descriptionlabel = QTextBrowser( openExternalLinks=True, textInteractionFlags=(Qt.TextSelectableByMouse | Qt.LinksAccessibleByMouse) ) self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame) # no (white) text background self.descriptionlabel.viewport().setAutoFillBackground(False) box.layout().addWidget(self.descriptionlabel) self.splitter.addWidget(self.view) self.splitter.addWidget(box) self.splitter.setSizes([300, 200]) self.splitter.splitterMoved.connect( lambda: setattr(self, "splitter_state", bytes(self.splitter.saveState())) ) self.mainArea.layout().addWidget(self.splitter) self.controlArea.layout().addStretch(10) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Data") proxy = QSortFilterProxyModel() proxy.setFilterKeyColumn(-1) proxy.setFilterCaseSensitivity(False) self.view.setModel(proxy) if self.splitter_state: self.splitter.restoreState(self.splitter_state) self.assign_delegates() self.setBlocking(True) self.setStatusMessage("Initializing") self._executor = ThreadPoolExecutor(max_workers=1) f = self._executor.submit(self.list_remote) w = FutureWatcher(f, parent=self) w.done.connect(self.__set_index) def assign_delegates(self): # NOTE: All columns must have size hinting delegates. # QTreeView queries only the columns displayed in the viewport so # the layout would be different depending in the horizontal scroll # position self.view.setItemDelegate(UniformHeightDelegate(self)) self.view.setItemDelegateForColumn( self.Header.islocal, UniformHeightIndicatorDelegate(self, role=Qt.DisplayRole) ) self.view.setItemDelegateForColumn( self.Header.size, SizeDelegate(self) ) self.view.setItemDelegateForColumn( self.Header.instances, NumericalDelegate(self) ) self.view.setItemDelegateForColumn( self.Header.variables, NumericalDelegate(self) ) self.view.resizeColumnToContents(self.Header.islocal) def _parse_info(self, file_path): if file_path in self.allinfo_remote: info = self.allinfo_remote[file_path] else: info = self.allinfo_local[file_path] islocal = file_path in self.allinfo_local isremote = file_path in self.allinfo_remote outdated = islocal and isremote and ( self.allinfo_remote[file_path].get('version', '') != self.allinfo_local[file_path].get('version', '') ) islocal &= not outdated prefix = os.path.join('', *file_path[:-1]) filename = file_path[-1] return Namespace(file_path=file_path, prefix=prefix, filename=filename, islocal=islocal, outdated=outdated, **info) def create_model(self): allkeys = set(self.allinfo_local) | set(self.allinfo_remote) allkeys = sorted(allkeys) model = QStandardItemModel(self) model.setHorizontalHeaderLabels(self._header_labels) current_index = -1 for i, file_path in enumerate(allkeys): datainfo = self._parse_info(file_path) item1 = QStandardItem() item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole) item1.setData(datainfo, Qt.UserRole) item2 = QStandardItem(datainfo.title) item3 = QStandardItem() item3.setData(datainfo.size, Qt.DisplayRole) item4 = QStandardItem() item4.setData(datainfo.instances, Qt.DisplayRole) item5 = QStandardItem() item5.setData(datainfo.variables, Qt.DisplayRole) item6 = QStandardItem() item6.setData(datainfo.target, Qt.DisplayRole) if datainfo.target: item6.setIcon(variable_icon(datainfo.target)) item7 = QStandardItem() item7.setData(", ".join(datainfo.tags) if datainfo.tags else "", Qt.DisplayRole) row = [item1, item2, item3, item4, item5, item6, item7] model.appendRow(row) if os.path.join(*file_path) == self.selected_id: current_index = i return model, current_index @Slot(object) def __set_index(self, f): # type: (Future) -> None # set results from `list_remote` query. assert QThread.currentThread() is self.thread() assert f.done() self.setBlocking(False) self.setStatusMessage("") self.allinfo_local = self.list_local() try: self.allinfo_remote = f.result() except Exception: # anytying can happen, pylint: disable=broad-except log.exception("Error while fetching updated index") if not self.allinfo_local: self.Error.no_remote_datasets() else: self.Warning.only_local_datasets() self.allinfo_remote = {} model, current_index = self.create_model() self.view.model().setSourceModel(model) self.view.selectionModel().selectionChanged.connect( self.__on_selection ) self.view.resizeColumnToContents(0) self.view.setColumnWidth( 1, min(self.view.sizeHintForColumn(1), self.view.fontMetrics().width("X" * 24))) header = self.view.header() header.restoreState(self.header_state) # Update the info text self.infolabel.setText( format_info(model.rowCount(), len(self.allinfo_local))) if current_index != -1: selmodel = self.view.selectionModel() selmodel.select( self.view.model().mapFromSource(model.index(current_index, 0)), QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows) def __update_cached_state(self): model = self.view.model().sourceModel() localinfo = self.list_local() assert isinstance(model, QStandardItemModel) allinfo = [] for i in range(model.rowCount()): item = model.item(i, 0) info = item.data(Qt.UserRole) info.islocal = info.file_path in localinfo item.setData(" " if info.islocal else "", Qt.DisplayRole) allinfo.append(info) self.infolabel.setText(format_info( model.rowCount(), sum(info.islocal for info in allinfo))) def selected_dataset(self): """ Return the current selected dataset info or None if not selected Returns ------- info : Optional[Namespace] """ rows = self.view.selectionModel().selectedRows(0) assert 0 <= len(rows) <= 1 current = rows[0] if rows else None # type: Optional[QModelIndex] if current is not None: info = current.data(Qt.UserRole) assert isinstance(info, Namespace) else: info = None return info def filter(self): filter_string = self.filterLineEdit.text().strip() proxyModel = self.view.model() if proxyModel: proxyModel.setFilterFixedString(filter_string) def __on_selection(self): # Main datasets view selection has changed rows = self.view.selectionModel().selectedRows(0) assert 0 <= len(rows) <= 1 current = rows[0] if rows else None # type: Optional[QModelIndex] if current is not None: current = self.view.model().mapToSource(current) di = current.data(Qt.UserRole) text = description_html(di) self.descriptionlabel.setText(text) self.selected_id = os.path.join(di.prefix, di.filename) else: self.descriptionlabel.setText("") self.selected_id = None self.commit() def commit(self): """ Commit a dataset to the output immediately (if available locally) or schedule download background and an eventual send. During the download the widget is in blocking state (OWWidget.isBlocking) """ di = self.selected_dataset() if di is not None: self.Error.clear() if self.__awaiting_state is not None: # disconnect from the __commit_complete self.__awaiting_state.watcher.done.disconnect( self.__commit_complete) # .. and connect to update_cached_state # self.__awaiting_state.watcher.done.connect( # self.__update_cached_state) # TODO: There are possible pending __progress_advance queued self.__awaiting_state.pb.advance.disconnect( self.__progress_advance) self.progressBarFinished(processEvents=None) self.__awaiting_state = None if not di.islocal: pr = progress() callback = lambda pr=pr: pr.advance.emit() pr.advance.connect(self.__progress_advance, Qt.QueuedConnection) self.progressBarInit(processEvents=None) self.setStatusMessage("Fetching...") self.setBlocking(True) f = self._executor.submit( ensure_local, self.INDEX_URL, di.file_path, self.local_cache_path, force=di.outdated, progress_advance=callback) w = FutureWatcher(f, parent=self) w.done.connect(self.__commit_complete) self.__awaiting_state = _FetchState(f, w, pr) else: self.setStatusMessage("") self.setBlocking(False) self.commit_cached(di.file_path) else: self.Outputs.data.send(None) @Slot(object) def __commit_complete(self, f): # complete the commit operation after the required file has been # downloaded assert QThread.currentThread() is self.thread() assert self.__awaiting_state is not None assert self.__awaiting_state.future is f if self.isBlocking(): self.progressBarFinished(processEvents=None) self.setBlocking(False) self.setStatusMessage("") self.__awaiting_state = None try: path = f.result() # anything can happen here, pylint: disable=broad-except except Exception as ex: log.exception("Error:") self.error(format_exception(ex)) path = None self.__update_cached_state() if path is not None: data = self.load_data(path) else: data = None self.Outputs.data.send(data) def commit_cached(self, file_path): path = LocalFiles(self.local_cache_path).localpath(*file_path) self.Outputs.data.send(self.load_data(path)) @Slot() def __progress_advance(self): assert QThread.currentThread() is self.thread() self.progressBarAdvance(1, processEvents=None) def onDeleteWidget(self): super().onDeleteWidget() if self.__awaiting_state is not None: self.__awaiting_state.watcher.done.disconnect(self.__commit_complete) self.__awaiting_state.pb.advance.disconnect(self.__progress_advance) self.__awaiting_state = None @staticmethod def sizeHint(): return QSize(900, 600) def closeEvent(self, event): self.splitter_state = bytes(self.splitter.saveState()) self.header_state = bytes(self.view.header().saveState()) super().closeEvent(event) def load_data(self, path): # pylint: disable=no-self-use return Orange.data.Table(path) def list_remote(self): # type: () -> Dict[Tuple[str, ...], dict] client = ServerFiles(server=self.INDEX_URL) return client.allinfo() def list_local(self): # type: () -> Dict[Tuple[str, ...], dict] return LocalFiles(self.local_cache_path).allinfo()
class OWDataSets(OWWidget): name = "Datasets" description = "Load a dataset from an online repository" icon = "icons/DataSets.svg" priority = 20 replaces = ["orangecontrib.prototypes.widgets.owdatasets.OWDataSets"] keywords = ["online"] # The following constants can be overridden in a subclass # to reuse this widget for a different repository # Take care when refactoring! (used in e.g. single-cell) INDEX_URL = "https://datasets.biolab.si/" DATASET_DIR = "datasets" # override HEADER_SCHEMA to define new columns # if schema is changed override methods: self.assign_delegates and # self.create_model HEADER_SCHEMA = [['islocal', { 'label': '' }], ['title', { 'label': 'Title' }], ['size', { 'label': 'Size' }], ['instances', { 'label': 'Instances' }], ['variables', { 'label': 'Variables' }], ['target', { 'label': 'Target' }], ['tags', { 'label': 'Tags' }]] # type: List[str, dict] class Error(OWWidget.Error): no_remote_datasets = Msg("Could not fetch dataset list") class Warning(OWWidget.Warning): only_local_datasets = Msg("Could not fetch datasets list, only local " "cached datasets are shown") class Outputs: data = Output("Data", Orange.data.Table) #: Selected dataset id selected_id = settings.Setting(None) # type: Optional[str] auto_commit = settings.Setting(False) # type: bool #: main area splitter state splitter_state = settings.Setting(b'') # type: bytes header_state = settings.Setting(b'') # type: bytes def __init__(self): super().__init__() self.allinfo_local = {} self.allinfo_remote = {} self.local_cache_path = os.path.join(data_dir(), self.DATASET_DIR) self._header_labels = [ header['label'] for _, header in self.HEADER_SCHEMA ] self._header_index = namedtuple( '_header_index', [info_tag for info_tag, _ in self.HEADER_SCHEMA]) self.Header = self._header_index( *[index for index, _ in enumerate(self._header_labels)]) self.__awaiting_state = None # type: Optional[_FetchState] box = gui.widgetBox(self.controlArea, "Info") self.infolabel = QLabel(text="Initializing...\n\n") box.layout().addWidget(self.infolabel) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.mainArea.layout().addWidget(self.filterLineEdit) self.splitter = QSplitter(orientation=Qt.Vertical) self.view = QTreeView( sortingEnabled=True, selectionMode=QTreeView.SingleSelection, alternatingRowColors=True, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, uniformRowHeights=True, ) # the method doesn't exists yet, pylint: disable=unnecessary-lambda self.view.doubleClicked.connect(lambda: self.unconditional_commit()) box = gui.widgetBox(self.splitter, "Description", addToLayout=False) self.descriptionlabel = QLabel( wordWrap=True, textFormat=Qt.RichText, ) self.descriptionlabel = QTextBrowser( openExternalLinks=True, textInteractionFlags=(Qt.TextSelectableByMouse | Qt.LinksAccessibleByMouse)) self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame) # no (white) text background self.descriptionlabel.viewport().setAutoFillBackground(False) box.layout().addWidget(self.descriptionlabel) self.splitter.addWidget(self.view) self.splitter.addWidget(box) self.splitter.setSizes([300, 200]) self.splitter.splitterMoved.connect(lambda: setattr( self, "splitter_state", bytes(self.splitter.saveState()))) self.mainArea.layout().addWidget(self.splitter) self.controlArea.layout().addStretch(10) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Data") proxy = QSortFilterProxyModel() proxy.setFilterKeyColumn(-1) proxy.setFilterCaseSensitivity(False) self.view.setModel(proxy) if self.splitter_state: self.splitter.restoreState(self.splitter_state) self.assign_delegates() self.setBlocking(True) self.setStatusMessage("Initializing") self._executor = ThreadPoolExecutor(max_workers=1) f = self._executor.submit(self.list_remote) w = FutureWatcher(f, parent=self) w.done.connect(self.__set_index) def assign_delegates(self): # NOTE: All columns must have size hinting delegates. # QTreeView queries only the columns displayed in the viewport so # the layout would be different depending in the horizontal scroll # position self.view.setItemDelegate(UniformHeightDelegate(self)) self.view.setItemDelegateForColumn( self.Header.islocal, UniformHeightIndicatorDelegate(self, role=Qt.DisplayRole)) self.view.setItemDelegateForColumn(self.Header.size, SizeDelegate(self)) self.view.setItemDelegateForColumn(self.Header.instances, NumericalDelegate(self)) self.view.setItemDelegateForColumn(self.Header.variables, NumericalDelegate(self)) self.view.resizeColumnToContents(self.Header.islocal) def _parse_info(self, file_path): if file_path in self.allinfo_remote: info = self.allinfo_remote[file_path] else: info = self.allinfo_local[file_path] islocal = file_path in self.allinfo_local isremote = file_path in self.allinfo_remote outdated = islocal and isremote and ( self.allinfo_remote[file_path].get('version', '') != self.allinfo_local[file_path].get('version', '')) islocal &= not outdated prefix = os.path.join('', *file_path[:-1]) filename = file_path[-1] return Namespace(file_path=file_path, prefix=prefix, filename=filename, islocal=islocal, outdated=outdated, **info) def create_model(self): allkeys = set(self.allinfo_local) | set(self.allinfo_remote) allkeys = sorted(allkeys) model = QStandardItemModel(self) model.setHorizontalHeaderLabels(self._header_labels) current_index = -1 for i, file_path in enumerate(allkeys): datainfo = self._parse_info(file_path) item1 = QStandardItem() item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole) item1.setData(datainfo, Qt.UserRole) item2 = QStandardItem(datainfo.title) item3 = QStandardItem() item3.setData(datainfo.size, Qt.DisplayRole) item4 = QStandardItem() item4.setData(datainfo.instances, Qt.DisplayRole) item5 = QStandardItem() item5.setData(datainfo.variables, Qt.DisplayRole) item6 = QStandardItem() item6.setData(datainfo.target, Qt.DisplayRole) if datainfo.target: item6.setIcon(variable_icon(datainfo.target)) item7 = QStandardItem() item7.setData(", ".join(datainfo.tags) if datainfo.tags else "", Qt.DisplayRole) row = [item1, item2, item3, item4, item5, item6, item7] model.appendRow(row) if os.path.join(*file_path) == self.selected_id: current_index = i return model, current_index @Slot(object) def __set_index(self, f): # type: (Future) -> None # set results from `list_remote` query. assert QThread.currentThread() is self.thread() assert f.done() self.setBlocking(False) self.setStatusMessage("") self.allinfo_local = self.list_local() try: self.allinfo_remote = f.result() except Exception: # anytying can happen, pylint: disable=broad-except log.exception("Error while fetching updated index") if not self.allinfo_local: self.Error.no_remote_datasets() else: self.Warning.only_local_datasets() self.allinfo_remote = {} model, current_index = self.create_model() self.view.model().setSourceModel(model) self.view.selectionModel().selectionChanged.connect( self.__on_selection) self.view.resizeColumnToContents(0) self.view.setColumnWidth( 1, min(self.view.sizeHintForColumn(1), self.view.fontMetrics().width("X" * 37))) header = self.view.header() header.restoreState(self.header_state) # Update the info text self.infolabel.setText( format_info(model.rowCount(), len(self.allinfo_local))) if current_index != -1: selmodel = self.view.selectionModel() selmodel.select( self.view.model().mapFromSource(model.index(current_index, 0)), QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows) def __update_cached_state(self): model = self.view.model().sourceModel() localinfo = self.list_local() assert isinstance(model, QStandardItemModel) allinfo = [] for i in range(model.rowCount()): item = model.item(i, 0) info = item.data(Qt.UserRole) info.islocal = info.file_path in localinfo item.setData(" " if info.islocal else "", Qt.DisplayRole) allinfo.append(info) self.infolabel.setText( format_info(model.rowCount(), sum(info.islocal for info in allinfo))) def selected_dataset(self): """ Return the current selected dataset info or None if not selected Returns ------- info : Optional[Namespace] """ rows = self.view.selectionModel().selectedRows(0) assert 0 <= len(rows) <= 1 current = rows[0] if rows else None # type: Optional[QModelIndex] if current is not None: info = current.data(Qt.UserRole) assert isinstance(info, Namespace) else: info = None return info def filter(self): filter_string = self.filterLineEdit.text().strip() proxyModel = self.view.model() if proxyModel: proxyModel.setFilterFixedString(filter_string) def __on_selection(self): # Main datasets view selection has changed rows = self.view.selectionModel().selectedRows(0) assert 0 <= len(rows) <= 1 current = rows[0] if rows else None # type: Optional[QModelIndex] if current is not None: current = self.view.model().mapToSource(current) di = current.data(Qt.UserRole) text = description_html(di) self.descriptionlabel.setText(text) self.selected_id = os.path.join(di.prefix, di.filename) else: self.descriptionlabel.setText("") self.selected_id = None self.commit() def commit(self): """ Commit a dataset to the output immediately (if available locally) or schedule download background and an eventual send. During the download the widget is in blocking state (OWWidget.isBlocking) """ di = self.selected_dataset() if di is not None: self.Error.clear() if self.__awaiting_state is not None: # disconnect from the __commit_complete self.__awaiting_state.watcher.done.disconnect( self.__commit_complete) # .. and connect to update_cached_state # self.__awaiting_state.watcher.done.connect( # self.__update_cached_state) # TODO: There are possible pending __progress_advance queued self.__awaiting_state.pb.advance.disconnect( self.__progress_advance) self.progressBarFinished(processEvents=None) self.__awaiting_state = None if not di.islocal: pr = progress() callback = lambda pr=pr: pr.advance.emit() pr.advance.connect(self.__progress_advance, Qt.QueuedConnection) self.progressBarInit(processEvents=None) self.setStatusMessage("Fetching...") self.setBlocking(True) f = self._executor.submit(ensure_local, self.INDEX_URL, di.file_path, self.local_cache_path, force=di.outdated, progress_advance=callback) w = FutureWatcher(f, parent=self) w.done.connect(self.__commit_complete) self.__awaiting_state = _FetchState(f, w, pr) else: self.setStatusMessage("") self.setBlocking(False) self.commit_cached(di.file_path) else: self.Outputs.data.send(None) @Slot(object) def __commit_complete(self, f): # complete the commit operation after the required file has been # downloaded assert QThread.currentThread() is self.thread() assert self.__awaiting_state is not None assert self.__awaiting_state.future is f if self.isBlocking(): self.progressBarFinished(processEvents=None) self.setBlocking(False) self.setStatusMessage("") self.__awaiting_state = None try: path = f.result() # anything can happen here, pylint: disable=broad-except except Exception as ex: log.exception("Error:") self.error(format_exception(ex)) path = None self.__update_cached_state() if path is not None: data = self.load_data(path) else: data = None self.Outputs.data.send(data) def commit_cached(self, file_path): path = LocalFiles(self.local_cache_path).localpath(*file_path) self.Outputs.data.send(self.load_data(path)) @Slot() def __progress_advance(self): assert QThread.currentThread() is self.thread() self.progressBarAdvance(1, processEvents=None) def onDeleteWidget(self): super().onDeleteWidget() if self.__awaiting_state is not None: self.__awaiting_state.watcher.done.disconnect( self.__commit_complete) self.__awaiting_state.pb.advance.disconnect( self.__progress_advance) self.__awaiting_state = None @staticmethod def sizeHint(): return QSize(1100, 500) def closeEvent(self, event): self.splitter_state = bytes(self.splitter.saveState()) self.header_state = bytes(self.view.header().saveState()) super().closeEvent(event) def load_data(self, path): # pylint: disable=no-self-use return Orange.data.Table(path) def list_remote(self): # type: () -> Dict[Tuple[str, ...], dict] client = ServerFiles(server=self.INDEX_URL) return client.allinfo() def list_local(self): # type: () -> Dict[Tuple[str, ...], dict] return LocalFiles(self.local_cache_path).allinfo()
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox( box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox( box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=["Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference"], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit( self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter(contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")] ) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor( parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable)] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint( data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key,) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get( (full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name,) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount())] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [m for m, use in zip(matchers, self.geneMatcherSettings) if use] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list(filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float,)) info = methodinvoke(self, "_setRunInfo", (str,)) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels( ["Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment"]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item(), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit ) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView) ) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.error(0,) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def filterAnnotationsChartView(self, filterString=""): if self.__state & OWSetEnrichment.RunningEnrichment: return # TODO: Move filtering to a filter proxy model # TODO: Re-enable string search categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) # filterString = str(self.filterLineEdit.text()).lower() model = self.annotationsChartView.model() def ishidden(index): # Is item at index (row) hidden item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [model.item(i, 4).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden] fdrs = utils.stats.FDR(pvals) # update FDR for the selected collections and apply filtering rules itemsHidden = [] fdriter = iter(fdrs) for index, hidden in enumerate(hidemask): if not hidden: fdr = next(fdriter) pval = model.index(index, 4).data(Qt.UserRole) count = model.index(index, 2).data(Qt.ToolTipRole) hidden = (self.useMinCountFilter and count < self.minClusterCount) or \ (self.useMaxPValFilter and pval > self.maxPValue) or \ (self.useMaxFDRFilter and fdr > self.maxFDR) if not hidden: fdr_item = model.item(index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) self.annotationsChartView.setRowHidden( index, QModelIndex(), hidden) itemsHidden.append(hidden) if model.rowCount() and all(itemsHidden): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.annotationsChartView.model() rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped] newdomain = Orange.data.Domain( mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWGeneSets(OWWidget, ConcurrentWidgetMixin): name = 'Gene Sets' description = "" icon = 'icons/OWGeneSets.svg' priority = 80 want_main_area = True organism = Setting(None, schema_only=True) stored_gene_sets_selection = Setting([], schema_only=True) selected_rows = Setting([], schema_only=True) min_count: int min_count = Setting(5) use_min_count: bool use_min_count = Setting(True) auto_commit: bool auto_commit = Setting(False) search_pattern: str search_pattern = Setting('') # component settings gs_selection_component: SettingProvider = SettingProvider(GeneSetSelection) class Inputs: data = Input('Data', Table) custom_gene_sets = Input('Custom Gene Sets', Table) class Outputs: matched_genes = Output('Matched Genes', Table) class Warning(OWWidget.Warning): all_sets_filtered = Msg('All sets were filtered out.') class Error(OWWidget.Error): organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') cant_reach_host = Msg('Host orange.biolab.si is unreachable.') cant_load_organisms = Msg( 'No available organisms, please check your connection.') custom_gene_sets_table_format = Msg( 'Custom gene sets data must have genes represented as rows.') def __init__(self): super().__init__() # OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # Control area box = vBox(self.controlArea, True, margin=0) self.gs_selection_component: GeneSetSelection = GeneSetSelection( self, box) self.gs_selection_component.selection_changed.connect( self._on_selection_changed) # Main area self.filter_proxy_model = FilterProxyModel() self.filter_proxy_model.setFilterKeyColumn(Header.term) self.tree_view = QTreeView() self.tree_view.setAlternatingRowColors(True) self.tree_view.setSortingEnabled(True) self.tree_view.sortByColumn(Header.count, Qt.DescendingOrder) self.tree_view.setSelectionMode(QTreeView.ExtendedSelection) self.tree_view.setEditTriggers(QTreeView.NoEditTriggers) self.tree_view.viewport().setMouseTracking(True) self.tree_view.setItemDelegateForColumn( Header.term, LinkStyledItemDelegate(self.tree_view)) self.tree_view.setItemDelegateForColumn(Header.genes, NumericalColumnDelegate(self)) self.tree_view.setItemDelegateForColumn(Header.count, NumericalColumnDelegate(self)) self.tree_view.setModel(self.filter_proxy_model) h_layout = QHBoxLayout() h_layout.setSpacing(100) h_widget = widgetBox(self.mainArea, orientation=h_layout) spin( h_widget, self, 'min_count', 0, 1000, label='Count', tooltip='Minimum genes count', checked='use_min_count', callback=self.filter_view, callbackOnReturn=True, checkCallback=self.filter_view, ) self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern') self.line_edit_filter.setPlaceholderText('Filter gene sets ...') self.line_edit_filter.textChanged.connect(self.filter_view) self.mainArea.layout().addWidget(self.tree_view) self.tree_view.header().setSectionResizeMode( QHeaderView.ResizeToContents) self.commit_button = auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) self.input_data: Optional[Table] = None self.num_of_selected_genes: int = 0 @property def tax_id(self) -> Optional[str]: if self.input_data: return self.input_data.attributes[TableAnnotation.tax_id] @property def gene_as_attr_name(self) -> Optional[bool]: if self.input_data: return self.input_data.attributes[ TableAnnotation.gene_as_attr_name] @property def gene_location(self) -> Optional[str]: if not self.input_data: return if self.gene_as_attr_name: return self.input_data.attributes[ TableAnnotation.gene_id_attribute] else: return self.input_data.attributes[TableAnnotation.gene_id_column] @property def input_genes(self) -> Set[str]: if not self.input_data: return set() if self.gene_as_attr_name: return { str(variable.attributes.get(self.gene_location, '?')) for variable in self.input_data.domain.attributes } else: return { str(g) for g in self.input_data.get_column_view(self.gene_location)[0] } def on_partial_result(self, _): pass def on_done(self, result: Results): model = QStandardItemModel() for item in result.items: model.appendRow(item) model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels(Header.labels()) self.filter_proxy_model.setSourceModel(model) self.tree_view.selectionModel().selectionChanged.connect(self.commit) self.filter_view() self.update_info_box() def on_exception(self, ex): # TODO: handle possible exceptions raise ex def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def _on_selection_changed(self): self.start(run, self.gs_selection_component.gene_sets, self.gs_selection_component.selection, self.input_genes) @Inputs.data @check_table_annotation def set_data(self, input_data: Table): self.Outputs.matched_genes.send(None) self.input_data = None self.num_of_selected_genes = 0 if input_data: self.input_data = input_data self.gs_selection_component.initialize(self.tax_id) self.update_info_box() @Inputs.custom_gene_sets def handle_custom_gene_sets_input(self, custom_data): self.Outputs.matched_genes.send(None) if custom_data: self.gs_selection_component.initialize_custom_gene_sets( custom_data) else: self.gs_selection_component.initialize_custom_gene_sets(None) self.update_info_box() def commit(self): selection_model = self.tree_view.selectionModel() self.num_of_selected_genes = 0 if selection_model: selection = selection_model.selectedRows(Header.count) self.selected_rows = [ self.filter_proxy_model.mapToSource(sel).row() for sel in selection ] if selection and self.input_genes: genes = [ model_index.data(Qt.UserRole) for model_index in selection ] output_genes = list(set.union(*genes)) self.num_of_selected_genes = len(output_genes) if self.gene_as_attr_name: selected = [ column for column in self.input_data.domain.attributes if self.gene_location in column.attributes and str(column.attributes[ self.gene_location]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table( domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: # create filter from selected column for genes only_known = table_filter.FilterStringList( self.gene_location, output_genes) # apply filter to the data data_table = table_filter.Values([only_known ])(self.input_data) self.Outputs.matched_genes.send(data_table) self.update_info_box() def update_info_box(self): input_string = '' input_number = '' if self.input_genes: input_string += '{} unique gene names on input.\n'.format( len(self.input_genes)) input_number += str(len(self.input_genes)) self.info.set_output_summary( str(self.num_of_selected_genes), '{} genes on output.\n'.format(self.num_of_selected_genes)) else: self.info.set_output_summary(self.info.NoOutput) if self.gs_selection_component.data: num_of_genes = self.gs_selection_component.num_of_genes num_of_sets = self.gs_selection_component.num_of_custom_sets input_number += f"{'' if input_number else '0'}|{num_of_genes}" input_string += '{} marker genes in {} sets\n'.format( num_of_genes, num_of_sets) if not input_number: self.info.set_input_summary(self.info.NoInput) else: self.info.set_input_summary(input_number, input_string) def create_filters(self): search_term: List[str] = self.search_pattern.lower().strip().split() filters = [ FilterProxyModel.Filter( Header.term, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term)) ] if self.use_min_count: filters.append( FilterProxyModel.Filter(Header.count, Qt.DisplayRole, lambda value: value >= self.min_count)) return filters def filter_view(self): filter_proxy: FilterProxyModel = self.filter_proxy_model model: QStandardItemModel = filter_proxy.sourceModel() if isinstance(model, QStandardItemModel): # apply filtering rules filter_proxy.set_filters(self.create_filters()) if model.rowCount() and not filter_proxy.rowCount(): self.Warning.all_sets_filtered() else: self.Warning.clear() def sizeHint(self): return QSize(800, 600)
class OWGeneSets(OWWidget): name = "Gene Sets" description = "" icon = "icons/OWGeneSets.svg" priority = 9 want_main_area = True COUNT, GENES, CATEGORY, TERM = range(4) DATA_HEADER_LABELS = ["Count", 'Genes In Set', 'Category', 'Term'] organism = Setting(None, schema_only=True) stored_gene_sets_selection = Setting([], schema_only=True) selected_rows = Setting([], schema_only=True) custom_gene_set_indicator = Setting(None, schema_only=True) min_count = Setting(5) use_min_count = Setting(True) auto_commit = Setting(True) class Inputs: genes = Input("Data", Table) custom_sets = Input('Custom Gene Sets', Table) class Outputs: matched_genes = Output("Matched Genes", Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): all_sets_filtered = Msg('All sets were filtered out.') class Error(OWWidget.Error): organism_mismatch = Msg('Organism in input data and custom gene sets does not match') missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID) cant_reach_host = Msg("Host orange.biolab.si is unreachable.") cant_load_organisms = Msg("No available organisms, please check your connection.") def __init__(self): super().__init__() # commit self.commit_button = None # progress bar self.progress_bar = None self.progress_bar_iterations = None # data self.input_data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None # custom gene sets self.custom_data = None self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable)) self.custom_gs_col_box = None self.gs_label_combobox = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None # Gene Sets widget self.gs_widget = None # info box self.input_info = None self.num_of_sel_genes = 0 # filter self.line_edit_filter = None self.search_pattern = '' self.organism_select_combobox = None # data model view self.data_view = None self.data_model = None # gene matcher NCBI self.gene_matcher = None # filter proxy model self.filter_proxy_model = None # hierarchy widget self.hierarchy_widget = None self.hierarchy_state = None # spinbox self.spin_widget = None # threads self.threadpool = QThreadPool(self) self.workers = None self._task = None # type: Optional[Task] self._executor = ThreadExecutor() # gui self.setup_gui() def __reset_widget_state(self): self.update_info_box() # clear data view self.init_item_model() # reset filters self.setup_filter_model() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._init_gene_sets_finished) self._task = None @Slot() def progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here if self.progress_bar: self.progress_bar.advance() def __get_input_genes(self): self.input_genes = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes.append(str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.input_data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] def handle_custom_gene_sets(self, select_customs_flag=False): if self.custom_gene_set_indicator: if self.custom_data is not None and self.custom_gene_id_column is not None: if self.__check_organism_mismatch(): # self.gs_label_combobox.setDisabled(True) self.Error.organism_mismatch() self.gs_widget.update_gs_hierarchy() return if isinstance(self.custom_gene_set_indicator, DiscreteVariable): labels = self.custom_gene_set_indicator.values gene_sets_names = [ labels[int(idx)] for idx in self.custom_data.get_column_view(self.custom_gene_set_indicator)[0] ] else: gene_sets_names, _ = self.custom_data.get_column_view(self.custom_gene_set_indicator) self.num_of_custom_sets = len(set(gene_sets_names)) gene_names, _ = self.custom_data.get_column_view(self.custom_gene_id_column) hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets',) try: self.gs_widget.add_custom_sets( gene_sets_names, gene_names, hierarchy_title=hierarchy_title, select_customs_flag=select_customs_flag, ) except geneset.GeneSetException: pass # self.gs_label_combobox.setDisabled(False) else: self.gs_widget.update_gs_hierarchy() self.update_info_box() def update_tree_view(self): self.init_gene_sets() def invalidate(self): # clear self.__reset_widget_state() self.update_info_box() if self.input_data is not None: # setup self.__get_input_genes() self.update_tree_view() def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def __get_reference_genes(self): self.reference_genes = [] if self.reference_attr_names: for variable in self.reference_data.domain.attributes: self.reference_genes.append(str(variable.attributes.get(self.reference_gene_id_attribute, '?'))) else: genes, _ = self.reference_data.get_column_view(self.reference_gene_id_column) self.reference_genes = [str(g) for g in genes] @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.__reset_widget_state() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.feature_model.set_domain(None) if data: self.custom_data = data self.feature_model.set_domain(self.custom_data.domain) self.custom_tax_id = str(self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get(GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get(GENE_ID_COLUMN, None) if self.gs_label_combobox is None: self.gs_label_combobox = comboBox( self.custom_gs_col_box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.on_gene_set_indicator_changed, ) self.custom_gs_col_box.show() if self.custom_gene_set_indicator in self.feature_model: index = self.feature_model.indexOf(self.custom_gene_set_indicator) self.custom_gene_set_indicator = self.feature_model[index] else: self.custom_gene_set_indicator = self.feature_model[0] else: self.custom_gs_col_box.hide() self.gs_widget.clear_custom_sets() self.handle_custom_gene_sets(select_customs_flag=self.custom_gene_set_indicator is not None) self.invalidate() @Inputs.genes def handle_genes_input(self, data): self.Error.clear() self.__reset_widget_state() # clear output self.Outputs.matched_genes.send(None) # clear input values self.input_genes = [] self.input_data = None self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gs_widget.clear() self.gs_widget.clear_gene_sets() self.update_info_box() if data: self.input_data = data self.tax_id = str(self.input_data.attributes.get(TAX_ID, None)) self.use_attr_names = self.input_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get(GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.input_data.attributes.get(GENE_ID_COLUMN, None) self.update_info_box() if not ( self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None)) ): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return if self.__check_organism_mismatch(): self.Error.organism_mismatch() return self.gs_widget.load_gene_sets(self.tax_id) # if input data change, we need to refresh custom sets if self.custom_data: self.gs_widget.clear_custom_sets() self.handle_custom_gene_sets() self.invalidate() def update_info_box(self): info_string = '' if self.input_genes: info_string += '{} unique gene names on input.\n'.format(len(self.input_genes)) info_string += '{} genes on output.\n'.format(self.num_of_sel_genes) else: if self.input_data: if not any([self.gene_id_column, self.gene_id_attribute]): info_string += 'Input data with incorrect meta data.\nUse Gene Name Matcher widget.' else: info_string += 'No data on input.\n' if self.custom_data: info_string += '{} marker genes in {} sets\n'.format(self.custom_data.X.shape[0], self.num_of_custom_sets) self.input_info.setText(info_string) def create_partial(self): return partial( self.set_items, self.gs_widget.gs_object, self.stored_gene_sets_selection, set(self.input_genes), self.callback, ) def callback(self): if self._task.cancelled: raise KeyboardInterrupt() if self.progress_bar: methodinvoke(self, "progress_advance")() def init_gene_sets(self): if self._task is not None: self.cancel() assert self._task is None self._task = Task() self.init_item_model() # save setting on selected hierarchies self.stored_gene_sets_selection = self.gs_widget.get_hierarchies(only_selected=True) f = self.create_partial() progress_iterations = sum( ( len(g_set) for hier, g_set in self.gs_widget.gs_object.map_hierarchy_to_sets().items() if hier in self.stored_gene_sets_selection ) ) self.progress_bar = ProgressBar(self, iterations=progress_iterations) self._task.future = self._executor.submit(f) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self._init_gene_sets_finished) @Slot(concurrent.futures.Future) def _init_gene_sets_finished(self, f): assert self.thread() is QThread.currentThread() assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None self.progress_bar.finish() self.setStatusMessage('') try: results = f.result() # type: list [self.data_model.appendRow(model_item) for model_item in results] self.filter_proxy_model.setSourceModel(self.data_model) self.data_view.selectionModel().selectionChanged.connect(self.commit) self.filter_data_view() self.set_selection() self.update_info_box() except Exception as ex: print(ex) def create_filters(self): search_term = self.search_pattern.lower().strip().split() filters = [ FilterProxyModel.Filter( self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term) ) ] if self.use_min_count: filters.append(FilterProxyModel.Filter(self.COUNT, Qt.DisplayRole, lambda value: value >= self.min_count)) return filters def filter_data_view(self): filter_proxy = self.filter_proxy_model # type: FilterProxyModel model = filter_proxy.sourceModel() # type: QStandardItemModel if isinstance(model, QStandardItemModel): # apply filtering rules filter_proxy.set_filters(self.create_filters()) if model.rowCount() and not filter_proxy.rowCount(): self.Warning.all_sets_filtered() else: self.Warning.clear() def set_selection(self): if len(self.selected_rows): view = self.data_view model = self.data_model row_model_indexes = [model.indexFromItem(model.item(i)) for i in self.selected_rows] proxy_rows = [self.filter_proxy_model.mapFromSource(i).row() for i in row_model_indexes] if model.rowCount() <= self.selected_rows[-1]: return header_count = view.header().count() - 1 selection = QItemSelection() for row_index in proxy_rows: selection.append( QItemSelectionRange( self.filter_proxy_model.index(row_index, 0), self.filter_proxy_model.index(row_index, header_count), ) ) view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def commit(self): selection_model = self.data_view.selectionModel() if selection_model: selection = selection_model.selectedRows(self.COUNT) self.selected_rows = [self.filter_proxy_model.mapToSource(sel).row() for sel in selection] if selection and self.input_genes: genes = [model_index.data(Qt.UserRole) for model_index in selection] output_genes = [gene_name for gene_name in list(set.union(*genes))] self.num_of_sel_genes = len(output_genes) self.update_info_box() if self.use_attr_names: selected = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[self.gene_id_attribute]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table(domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: # create filter from selected column for genes only_known = table_filter.FilterStringList(self.gene_id_column, output_genes) # apply filter to the data data_table = table_filter.Values([only_known])(self.input_data) self.Outputs.matched_genes.send(data_table) def assign_delegates(self): self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self)) def setup_filter_model(self): self.filter_proxy_model = FilterProxyModel() self.filter_proxy_model.setFilterKeyColumn(self.TERM) self.data_view.setModel(self.filter_proxy_model) def setup_filter_area(self): h_layout = QHBoxLayout() h_layout.setSpacing(100) h_widget = widgetBox(self.mainArea, orientation=h_layout) spin( h_widget, self, 'min_count', 0, 1000, label='Count', tooltip='Minimum genes count', checked='use_min_count', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view, ) self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern') self.line_edit_filter.setPlaceholderText('Filter gene sets ...') self.line_edit_filter.textChanged.connect(self.filter_data_view) def on_gene_set_indicator_changed(self): # self._handle_future_model() self.gs_widget.clear_custom_sets() self.handle_custom_gene_sets() self.invalidate() def setup_control_area(self): # Control area self.input_info = widgetLabel(widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') self.custom_gs_col_box = box = vBox(self.controlArea, 'Custom Gene Set Term Column') box.hide() gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect(self.update_tree_view) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) def setup_gui(self): # control area self.setup_control_area() # main area self.data_view = QTreeView() self.setup_filter_model() self.setup_filter_area() self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(False) self.data_view.setItemDelegateForColumn(self.TERM, LinkStyledItemDelegate(self.data_view)) self.mainArea.layout().addWidget(self.data_view) self.data_view.header().setSectionResizeMode(QHeaderView.ResizeToContents) self.assign_delegates() @staticmethod def set_items(gene_sets, sets_to_display, genes, callback): model_items = [] if not genes: return for gene_set in sorted(gene_sets): if gene_set.hierarchy not in sets_to_display: continue callback() matched_set = gene_set.genes & genes if len(matched_set) > 0: category_column = QStandardItem() term_column = QStandardItem() count_column = QStandardItem() genes_column = QStandardItem() category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole) term_column.setData(gene_set.name, Qt.DisplayRole) term_column.setData(gene_set.name, Qt.ToolTipRole) term_column.setData(gene_set.link, LinkRole) term_column.setForeground(QColor(Qt.blue)) count_column.setData(matched_set, Qt.UserRole) count_column.setData(len(matched_set), Qt.DisplayRole) genes_column.setData(len(gene_set.genes), Qt.DisplayRole) genes_column.setData( set(gene_set.genes), Qt.UserRole ) # store genes to get then on output on selection model_items.append([count_column, genes_column, category_column, term_column]) return model_items def init_item_model(self): if self.data_model: self.data_model.clear() self.setup_filter_model() else: self.data_model = QStandardItemModel() self.data_model.setSortRole(Qt.UserRole) self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS) def sizeHint(self): return QSize(1280, 960)
class OWGeneSets(OWWidget): name = "Gene Set Enrichment" description = "" icon = "icons/OWGeneSets.svg" priority = 9 want_main_area = True settingsHandler = OrganismContextHandler() # settings auto_commit = Setting(True) stored_selections = ContextSetting([]) organism = ContextSetting(None) min_count = Setting(5) use_min_count = Setting(True) max_p_value = Setting(0.0001) use_p_value = Setting(False) max_fdr = Setting(0.01) use_max_fdr = Setting(True) use_reference_data = Setting(True) COUNT, REFERENCE, P_VAL, FDR, ENRICHMENT, GENES, CATEGORY, TERM = range(8) DATA_HEADER_LABELS = [ "Count", 'Reference', 'p-Value', 'FDR', 'Enrichment', 'Genes In Set', 'Category', 'Term' ] class Inputs: genes = Input("Genes", Table) custom_sets = Input('Custom Gene Sets', Table) reference = Input("Reference Genes", Table) class Outputs: matched_genes = Output("Matched Genes", Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): all_sets_filtered = Msg('All sets were filtered out.') class Error(OWWidget.Error): organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID) cant_reach_host = Msg("Host orange.biolab.si is unreachable.") cant_load_organisms = Msg( "No available organisms, please check your connection.") def __init__(self): super().__init__() # commit self.commit_button = None # gene sets object self.gene_sets_obj = geneset.GeneSets() # progress bar self.progress_bar = None self.progress_bar_iterations = None # data self.input_data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None # custom gene sets self.custom_data = None self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable)) self.gene_set_label = None self.gs_label_combobox = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None # reference genes self.reference_radio_box = None self.reference_data = None self.reference_genes = None self.reference_tax_id = None self.reference_attr_names = None self.reference_gene_id_attribute = None self.reference_gene_id_column = None # info box self.input_info = None self.num_of_sel_genes = 0 # filter self.line_edit_filter = None self.search_pattern = '' self.organism_select_combobox = None # data model view self.data_view = None self.data_model = None # gene matcher NCBI self.gene_matcher = None # filter proxy model self.filter_proxy_model = None # hierarchy widget self.hierarchy_widget = None self.hierarchy_state = None # spinbox self.spin_widget = None # threads self.threadpool = QThreadPool(self) self.workers = None self._task = None # type: Optional[Task] self._executor = ThreadExecutor() # gui self.setup_gui() def __reset_widget_state(self): # reset hierarchy widget state self.hierarchy_widget.clear() # clear data view self.init_item_model() # reset filters self.setup_filter_model() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._init_gene_sets_finished) self._task = None @Slot() def progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here if self.progress_bar: self.progress_bar.advance() def __get_input_genes(self): self.input_genes = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes.append( str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.input_data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] def __construct_custom_gene_sets(self): custom_set_hier = ('Custom sets', ) # delete any custom sets if they exists self.gene_sets_obj.delete_sets_by_hierarchy(custom_set_hier) if self.custom_data and self.custom_gene_id_column: gene_sets_names, _ = self.custom_data.get_column_view( self.gene_set_label) gene_names, _ = self.custom_data.get_column_view( self.custom_gene_id_column) temp_dict = defaultdict(list) for set_name, gene_name in zip(gene_sets_names, gene_names): temp_dict[set_name].append(gene_name) g_sets = [] for key, value in temp_dict.items(): g_sets.append( geneset.GeneSet(gs_id=key, hierarchy=custom_set_hier, organism=self.custom_tax_id, name=key, genes=set(value))) self.gene_sets_obj.update(g_sets) def __update_hierarchy(self): self.set_hierarchy_model( self.hierarchy_widget, hierarchy_tree(self.gene_sets_obj.hierarchies())) self.set_selected_hierarchies() def update_tree_view(self): if self.use_reference_data and self.reference_data: self.init_gene_sets(reference_genes=self.reference_genes) else: self.init_gene_sets() def invalidate(self): # clear self.__reset_widget_state() self.update_info_box() if self.input_data is not None: # setup self.__construct_custom_gene_sets() self.__get_input_genes() self.__update_hierarchy() self.update_tree_view() def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def __get_reference_genes(self): self.reference_genes = [] if self.reference_attr_names: for variable in self.reference_data.domain.attributes: self.reference_genes.append( str( variable.attributes.get( self.reference_gene_id_attribute, '?'))) else: genes, _ = self.reference_data.get_column_view( self.reference_gene_id_column) self.reference_genes = [str(g) for g in genes] @Inputs.reference def handle_reference_genes(self, data): """ Set the (optional) input dataset with reference gene names. """ if data: self.reference_data = data self.reference_tax_id = str( self.reference_data.attributes.get(TAX_ID, None)) self.reference_attr_names = self.reference_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.reference_gene_id_attribute = self.reference_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.reference_gene_id_column = self.reference_data.attributes.get( GENE_ID_COLUMN, None) if not (self.reference_attr_names is not None and ((self.reference_gene_id_attribute is None) ^ (self.reference_gene_id_column is None))): if self.reference_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.reference_tax_id is None: self.Error.missing_tax_id() return self.__get_reference_genes() self.reference_radio_box.setEnabled(bool(self.reference_data)) self.invalidate() @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.__reset_widget_state() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.gs_label_combobox.setDisabled(True) self.feature_model.set_domain(None) if data: self.custom_data = data self.custom_tax_id = str( self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get( GENE_ID_COLUMN, None) if not (self.custom_use_attr_names is not None and ((self.custom_gene_id_attribute is None) ^ (self.custom_gene_id_column is None))): if self.custom_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.custom_tax_id is None: self.Error.missing_tax_id() return if self.__check_organism_mismatch(): self.Error.organism_mismatch() return self.gs_label_combobox.setDisabled(False) self.feature_model.set_domain(self.custom_data.domain) if self.feature_model: self.gene_set_label = self.feature_model[0] self.invalidate() @Inputs.genes def handle_genes_input(self, data): self.closeContext() self.Error.clear() self.__reset_widget_state() # clear output self.Outputs.matched_genes.send(None) # clear input genes self.input_genes = [] self.gs_label_combobox.setDisabled(True) self.update_info_box() if data: self.input_data = data self.tax_id = str(self.input_data.attributes.get(TAX_ID, None)) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.input_data.attributes.get( GENE_ID_COLUMN, None) if not (self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return if self.__check_organism_mismatch(): self.Error.organism_mismatch() return self.openContext(self.tax_id) # if input data change, we need to set feature model again if self.custom_data: self.gs_label_combobox.setDisabled(False) self.feature_model.set_domain(self.custom_data.domain) if self.feature_model: self.gene_set_label = self.feature_model[0] self.download_gene_sets() def update_info_box(self): info_string = '' if self.input_genes: info_string += '{} unique gene names on input.\n'.format( len(self.input_genes)) info_string += '{} genes on output.\n'.format( self.num_of_sel_genes) else: info_string += 'No genes on input.\n' self.input_info.setText(info_string) def on_gene_sets_download(self, result): # make sure this happens in the main thread. # Qt insists that widgets be created within the GUI(main) thread. assert threading.current_thread() == threading.main_thread() self.setStatusMessage('') if result: for res in result: g_sets = geneset.load_gene_sets(res, self.tax_id) self.gene_sets_obj.update([g_set for g_set in g_sets]) # add custom sets if there are any self.invalidate() self.update_info_box() def download_gene_sets(self): self.Error.clear() # reset hierarchy widget state self.hierarchy_widget.clear() # clear data view self.init_item_model() # get all gene sets for selected organism gene_sets = geneset.list_all(organism=self.tax_id) # status message self.setStatusMessage('downloading sets') worker = Worker(download_gene_sets, self.tax_id, gene_sets) worker.signals.result.connect(self.on_gene_sets_download) # move download process to worker thread self.threadpool.start(worker) def set_hierarchy_model(self, tree_widget, sets): def beautify_displayed_text(text): if '_' in text: return text.replace('_', ' ').title() else: return text # TODO: maybe optimize this code? for key, value in sets.items(): item = QTreeWidgetItem(tree_widget, [beautify_displayed_text(key)]) item.setFlags(item.flags() & (Qt.ItemIsUserCheckable | ~Qt.ItemIsSelectable | Qt.ItemIsEnabled)) item.setExpanded(True) item.hierarchy = key if value: item.setFlags(item.flags() | Qt.ItemIsTristate) self.set_hierarchy_model(item, value) else: if item.parent(): item.hierarchy = (item.parent().hierarchy, key) if not item.childCount() and not item.parent(): item.hierarchy = (key, ) def init_gene_sets(self, reference_genes=None): if self._task is not None: self.cancel() assert self._task is None self._task = Task() progress_advance = methodinvoke(self, "progress_advance") def callback(): if self._task.cancelled: raise KeyboardInterrupt() if self.progress_bar: progress_advance() if reference_genes is None: reference_genes = self.gene_sets_obj.genes() self.init_item_model() sets_to_display = self.get_hierarchies(only_selected=True) # save setting on selected hierarchies self.stored_selections = sets_to_display # save context self.closeContext() f = partial(self.set_items, self.gene_sets_obj, sets_to_display, set(self.input_genes), reference_genes, self.min_count if self.use_min_count else 1, callback=callback) progress_iterations = sum([ len(g_set) for hier, g_set in self.gene_sets_obj.map_hierarchy_to_sets().items() if hier in sets_to_display ]) self.progress_bar = ProgressBar(self, iterations=progress_iterations) self._task.future = self._executor.submit(f) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self._init_gene_sets_finished) self.openContext(self.tax_id) @Slot(concurrent.futures.Future) def _init_gene_sets_finished(self, f): assert self.thread() is QThread.currentThread() assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None self.progress_bar.finish() self.setStatusMessage('') try: results = f.result() # type: list [self.data_model.appendRow(model_item) for model_item in results] self.filter_proxy_model.setSourceModel(self.data_model) self._update_fdr() self.filter_data_view() except Exception as ex: print(ex) def set_selected_hierarchies(self): iterator = QTreeWidgetItemIterator(self.hierarchy_widget, QTreeWidgetItemIterator.All) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if iterator.value().hierarchy in self.stored_selections: iterator.value().setCheckState(0, Qt.Checked) else: iterator.value().setCheckState(0, Qt.Unchecked) iterator += 1 # if no items are checked, we check first one at random if len(self.get_hierarchies(only_selected=True)) == 0: iterator = QTreeWidgetItemIterator( self.hierarchy_widget, QTreeWidgetItemIterator.NotChecked) while iterator.value(): if type(iterator.value().hierarchy) is not str: iterator.value().setCheckState(0, Qt.Checked) return iterator += 1 def get_hierarchies(self, **kwargs): """ return selected hierarchy """ only_selected = kwargs.get('only_selected', None) sets_to_display = list() if only_selected: iterator = QTreeWidgetItemIterator(self.hierarchy_widget, QTreeWidgetItemIterator.Checked) else: iterator = QTreeWidgetItemIterator(self.hierarchy_widget) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if not only_selected: sets_to_display.append(iterator.value().hierarchy) else: if not iterator.value().isDisabled(): sets_to_display.append(iterator.value().hierarchy) iterator += 1 return sets_to_display def filter_data_view(self): filter_proxy = self.filter_proxy_model # type: FilterProxyModel model = filter_proxy.sourceModel() # type: QStandardItemModel assert isinstance(model, QStandardItemModel) search_term = self.search_pattern.lower().strip().split() # apply filtering rules filters = [ FilterProxyModel.Filter( self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term)) ] # if self.use_min_count: # filters.append( # FilterProxyModel.Filter( # self.COUNT, Qt.DisplayRole, # lambda value: value >= self.min_count, # ) # ) if self.use_p_value: filters.append( FilterProxyModel.Filter( self.P_VAL, Qt.DisplayRole, lambda value: value < self.max_p_value)) if self.use_max_fdr: filters.append( FilterProxyModel.Filter(self.FDR, Qt.DisplayRole, lambda value: value < self.max_fdr)) filter_proxy.set_filters(filters) if model.rowCount() and not filter_proxy.rowCount(): self.Warning.all_sets_filtered() else: self.Warning.clear() def __get_source_data(self, proxy_row_index, column): proxy_index = self.filter_proxy_model.index(proxy_row_index, column) source_index = self.filter_proxy_model.mapToSource(proxy_index) return source_index.data(role=Qt.DisplayRole) def _update_fdr(self): # Update the FDR in place due to a changed selected categories set and # results for all of these categories are already available. proxy = self.filter_proxy_model model = self.filter_proxy_model.sourceModel() if model is not None: assert isinstance(model, QStandardItemModel) p_values = [(i, self.__get_source_data(i, self.P_VAL)) for i in range(proxy.rowCount())] fdr_values = FDR([p_val for _, p_val in p_values]) for i, fdr_val in zip([i for i, _ in p_values], fdr_values): proxy_index = proxy.index(i, self.FDR) source_index = self.filter_proxy_model.mapToSource(proxy_index) source_item = model.item(source_index.row(), self.FDR) source_item.setData(fdr_val, role=Qt.DisplayRole) source_item.setData(fdr_val, role=Qt.ToolTipRole) def commit(self): selection_model = self.data_view.selectionModel() if selection_model: # genes_from_set = selection_model.selectedRows(GENES) matched_genes = selection_model.selectedRows(self.COUNT) if matched_genes and self.input_genes: genes = [ model_index.data(Qt.UserRole) for model_index in matched_genes ] output_genes = [ gene_name for gene_name in list(set.union(*genes)) ] self.num_of_sel_genes = len(output_genes) self.update_info_box() if self.use_attr_names: selected = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table( domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: selected_rows = [] for row_index, row in enumerate(self.input_data): gene_in_row = str(row[self.gene_id_column]) if gene_in_row in self.input_genes and gene_in_row in output_genes: selected_rows.append(row_index) if selected_rows: selected = self.input_data[selected_rows] else: selected = None self.Outputs.matched_genes.send(selected) def assign_delegates(self): self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.REFERENCE, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn( self.P_VAL, NumericalColumnDelegate(self, precision=2, notation='e')) self.data_view.setItemDelegateForColumn( self.FDR, NumericalColumnDelegate(self, precision=2, notation='e')) self.data_view.setItemDelegateForColumn( self.ENRICHMENT, NumericalColumnDelegate(self, precision=1)) def setup_filter_model(self): self.filter_proxy_model = FilterProxyModel() self.filter_proxy_model.setFilterKeyColumn(self.TERM) self.data_view.setModel(self.filter_proxy_model) def setup_filter_area(self): h_layout = QHBoxLayout() h_layout.setSpacing(100) h_widget = widgetBox(self.mainArea, orientation=h_layout) spin(h_widget, self, 'min_count', 0, 100, label='Count', tooltip='Minimum genes count', checked='use_min_count', callback=self.invalidate, callbackOnReturn=True, checkCallback=self.invalidate) doubleSpin(h_widget, self, 'max_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_p_value', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view) doubleSpin(h_widget, self, 'max_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_max_fdr', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view) self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern') self.line_edit_filter.setPlaceholderText('Filter gene sets ...') self.line_edit_filter.textChanged.connect(self.filter_data_view) def setup_control_area(self): info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) box = vBox(self.controlArea, "Custom Gene Sets") self.gs_label_combobox = comboBox(box, self, "gene_set_label", sendSelectedValue=True, model=self.feature_model, callback=self.invalidate) self.gs_label_combobox.setDisabled(True) self.reference_radio_box = radioButtonsInBox( self.controlArea, self, "use_reference_data", ["Entire genome", "Reference gene set (input)"], tooltips=[ "Use entire genome (for gene set enrichment)", "Use reference set of genes" ], box="Reference", callback=self.invalidate) self.reference_radio_box.setEnabled(False) hierarchy_box = widgetBox(self.controlArea, "Gene Set Categories") self.hierarchy_widget = QTreeWidget(self) self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers) self.hierarchy_widget.setHeaderLabels([' ']) self.hierarchy_widget.itemClicked.connect(self.update_tree_view) hierarchy_box.layout().addWidget(self.hierarchy_widget) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) def setup_gui(self): # control area self.setup_control_area() # main area self.data_view = QTreeView() self.setup_filter_model() self.setup_filter_area() self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(False) self.data_view.setItemDelegateForColumn( self.TERM, LinkStyledItemDelegate(self.data_view)) self.data_view.selectionModel().selectionChanged.connect(self.commit) self.mainArea.layout().addWidget(self.data_view) self.data_view.header().setSectionResizeMode( QHeaderView.ResizeToContents) self.assign_delegates() @staticmethod def set_items(gene_sets, sets_to_display, genes, ref, count_treshold, callback): model_items = [] if not genes: return for gene_set in gene_sets: if gene_set.hierarchy not in sets_to_display: continue enrichemnt_result = gene_set.set_enrichment( ref, genes.intersection(ref)) callback() if len(enrichemnt_result.query) >= count_treshold: category_column = QStandardItem() name_column = QStandardItem() count_column = QStandardItem() genes_column = QStandardItem() ref_column = QStandardItem() pval_column = QStandardItem() fdr_column = QStandardItem() enrichemnt_column = QStandardItem() category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole) name_column.setData(gene_set.name, Qt.DisplayRole) name_column.setData(gene_set.name, Qt.ToolTipRole) name_column.setData(gene_set.link, LinkRole) name_column.setForeground(QColor(Qt.blue)) count_column.setData(len(enrichemnt_result.query), Qt.DisplayRole) count_column.setData(set(enrichemnt_result.query), Qt.UserRole) genes_column.setData(len(gene_set.genes), Qt.DisplayRole) genes_column.setData( set(gene_set.genes), Qt.UserRole ) # store genes to get then on output on selection ref_column.setData(len(enrichemnt_result.reference), Qt.DisplayRole) pval_column.setData(enrichemnt_result.p_value, Qt.DisplayRole) pval_column.setData(enrichemnt_result.p_value, Qt.ToolTipRole) enrichemnt_column.setData(enrichemnt_result.enrichment_score, Qt.DisplayRole) enrichemnt_column.setData(enrichemnt_result.enrichment_score, Qt.ToolTipRole) model_items.append([ count_column, ref_column, pval_column, fdr_column, enrichemnt_column, genes_column, category_column, name_column ]) return model_items def init_item_model(self): if self.data_model: self.data_model.clear() self.setup_filter_model() else: self.data_model = QStandardItemModel() self.data_model.setSortRole(Qt.UserRole) self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS) def sizeHint(self): return QSize(1280, 960)
class OWBatchNorm(OWWidget): name = "Batch Effect Removal" description = "Batch effect normalization on Single Cell data set." icon = "icons/BatchEffectRemoval.svg" priority = 230 class Inputs: data = Input("Data", Table) class Outputs: data = Output("Data", Table) class Error(OWWidget.Error): general_error = Msg({}) discrete_attributes = Msg("Data with discrete attributes " "can not be processed.") class Warning(OWWidget.Warning): missing_values = Msg("Missing values have been replaced with 0.") negative_values = Msg("Unable to use current settings due " "to negative values in data.") resizing_enabled = False want_main_area = False settingsHandler = PerfectDomainContextHandler() batch_vars = ContextSetting([]) link_method = Setting(LinkMethod.IDENTITY_LINK) skip_zeros = Setting(False) auto_commit = Setting(True) def __init__(self, parent=None): super().__init__(parent) self.data = None # Info infobox = gui.widgetBox(self.controlArea, "Info") self.info_label = gui.widgetLabel(infobox, "No data on input.") # Link method method_box = gui.widgetBox(self.controlArea, "Method") gui.comboBox(method_box, self, "link_method", items=LinkMethod.items(), callback=self.__link_method_changed) gui.separator(method_box) self.skip_zeros_check = gui.checkBox( method_box, self, "skip_zeros", "Skip zero expressions", enabled=self.link_method != LinkMethod.LOG_LINK, callback=lambda: self.commit()) # Batch Variable Selection header_shema = (("selected", ""), ("variable", "Variable"), ("count", "#"), ("score", "Score")) header_labels = labels = [label for _, label in header_shema] header = namedtuple("header", [tag for tag, _ in header_shema]) self.Header = header(*[index for index, _ in enumerate(labels)]) batch_box = gui.widgetBox(self.controlArea, "Batch Variable Selection") self.view = QTreeView() self.model = QStandardItemModel() self.model.itemChanged.connect(self.__selected_batch_vars_changed) self.model.setHorizontalHeaderLabels(header_labels) batch_box.layout().addWidget(self.view) self._setup_view() gui.auto_commit(self.controlArea, self, "auto_commit", "Apply", "Apply Automatically") def __link_method_changed(self): enable = self.link_method != LinkMethod.LOG_LINK self.skip_zeros_check.setEnabled(enable) if not enable: self.skip_zeros_check.setChecked(True) self.commit() def __selected_batch_vars_changed(self, item): if item.checkState(): self.batch_vars.append(item.data(VariableRole)) else: self.batch_vars.remove(item.data(VariableRole)) self.commit() def _setup_view(self): self.view.setModel(self.model) self.view.setSelectionMode(QTreeView.NoSelection) self.view.setSortingEnabled(True) self.view.setRootIsDecorated(False) self.view.setItemDelegateForColumn(self.Header.count, IntegralDelegate(self)) self.view.setItemDelegateForColumn(self.Header.score, RealDelegate(self)) self.view.header().setSectionResizeMode(QHeaderView.ResizeToContents) self.view.header().setStretchLastSection(False) self.view.header().setSectionResizeMode(self.Header.variable, QHeaderView.Stretch) self.view.setFocus() @Inputs.data def set_data(self, data): self.closeContext() self.clear() self.data = data self._setup_info_label() self._check_data() self.openContext(data) if self.data is not None: self.batch_vars = [data.domain[v.name] for v in self.batch_vars] self._setup_model() self.commit() def clear(self): self.batch_vars = [] if self.model: n_rows = self.model.rowCount() self.model.removeRows(0, n_rows) def _setup_info_label(self): text = "No data on input." if self.data is not None: domain, attrs = self.data.domain, self.data.domain.attributes text = "{} cells, {} genes\n".format(len(self.data), len(attrs)) text += "{} meta features".format(len(domain.metas)) \ if len(domain.metas) else "(no meta features)" self.info_label.setText(text) def _check_data(self): self.clear_messages() if self.data and self.data.domain.has_discrete_attributes(): self.data = None self.Error.discrete_attributes() if self.data and np.isnan(self.data.X).any(): self.data.X = np.nan_to_num(self.data.X) self.Warning.missing_values() def _setup_model(self): estimator = ScBatchScorer() for var in self.data.domain.class_vars + self.data.domain.metas: if not var.is_primitive(): continue try: score = float(estimator.score_data(self.data, var)) except Exception: score = np.nan self.model.appendRow([ self.__selected_item(var), self.__variable_item(var), self.__count_item(var), self.__score_item(score) ]) def __selected_item(self, var): item = QStandardItem() item.setData(var, VariableRole) item.setCheckable(True) select = var in self.batch_vars item.setCheckState(Qt.Checked if select else Qt.Unchecked) item.setEditable(False) return item def __variable_item(self, var): item = QStandardItem() item.setData(var.name, Qt.DisplayRole) item.setData(gui.attributeIconDict[var], Qt.DecorationRole) item.setEditable(False) return item def __count_item(self, var): item = QStandardItem() if var.is_discrete: item.setData(len(var.values), Qt.DisplayRole) item.setEditable(False) return item def __score_item(self, score): item = QStandardItem() item.setData(score, Qt.DisplayRole) item.setEditable(False) return item def commit(self): data = None self.Error.general_error.clear() self.Warning.negative_values.clear() if self.data is not None: if (self.data.X < 0).any() and self.skip_zeros: self.Warning.negative_values() data = self.data else: try: data = SCBatchNormalizer( LinkMethod.items()[self.link_method], self.skip_zeros, self.batch_vars)(self.data) except Exception as e: self.Error.general_error(str(e)) data = None self.Outputs.data.send(data) def send_report(self): method = LinkMethod.items()[self.link_method] if self.skip_zeros: method += " (Skip zero expressions)" variables = ", ".join([v.name for v in self.batch_vars]) \ if self.batch_vars else "None" self.report_items("", [("Method", method), ("Batch variable selection", variables)])