def on_done(self, result: Results): model = QStandardItemModel() for item in result.items: model.appendRow(item) model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels(Header.labels()) self.filter_proxy_model.setSourceModel(model) self.tree_view.selectionModel().selectionChanged.connect(self.commit) self.filter_view() self.update_info_box()
def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels([ "Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment" ]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item( ), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set( gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max( (e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score))) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView)) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("")
def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels( ["Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment"]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item(), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit ) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView) ) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("")
class OWGeneSets(OWWidget): name = "Gene Sets" description = "" icon = "icons/OWGeneSets.svg" priority = 9 want_main_area = True COUNT, GENES, CATEGORY, TERM = range(4) DATA_HEADER_LABELS = ["Count", 'Genes In Set', 'Category', 'Term'] organism = Setting(None, schema_only=True) stored_gene_sets_selection = Setting([], schema_only=True) selected_rows = Setting([], schema_only=True) custom_gene_set_indicator = Setting(None, schema_only=True) min_count = Setting(5) use_min_count = Setting(True) auto_commit = Setting(True) class Inputs: genes = Input("Data", Table) custom_sets = Input('Custom Gene Sets', Table) class Outputs: matched_genes = Output("Matched Genes", Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): all_sets_filtered = Msg('All sets were filtered out.') class Error(OWWidget.Error): organism_mismatch = Msg('Organism in input data and custom gene sets does not match') missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID) cant_reach_host = Msg("Host orange.biolab.si is unreachable.") cant_load_organisms = Msg("No available organisms, please check your connection.") def __init__(self): super().__init__() # commit self.commit_button = None # progress bar self.progress_bar = None self.progress_bar_iterations = None # data self.input_data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None # custom gene sets self.custom_data = None self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable)) self.custom_gs_col_box = None self.gs_label_combobox = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None # Gene Sets widget self.gs_widget = None # info box self.input_info = None self.num_of_sel_genes = 0 # filter self.line_edit_filter = None self.search_pattern = '' self.organism_select_combobox = None # data model view self.data_view = None self.data_model = None # gene matcher NCBI self.gene_matcher = None # filter proxy model self.filter_proxy_model = None # hierarchy widget self.hierarchy_widget = None self.hierarchy_state = None # spinbox self.spin_widget = None # threads self.threadpool = QThreadPool(self) self.workers = None self._task = None # type: Optional[Task] self._executor = ThreadExecutor() # gui self.setup_gui() def __reset_widget_state(self): self.update_info_box() # clear data view self.init_item_model() # reset filters self.setup_filter_model() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._init_gene_sets_finished) self._task = None @Slot() def progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here if self.progress_bar: self.progress_bar.advance() def __get_input_genes(self): self.input_genes = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes.append(str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.input_data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] def handle_custom_gene_sets(self, select_customs_flag=False): if self.custom_gene_set_indicator: if self.custom_data is not None and self.custom_gene_id_column is not None: if self.__check_organism_mismatch(): # self.gs_label_combobox.setDisabled(True) self.Error.organism_mismatch() self.gs_widget.update_gs_hierarchy() return if isinstance(self.custom_gene_set_indicator, DiscreteVariable): labels = self.custom_gene_set_indicator.values gene_sets_names = [ labels[int(idx)] for idx in self.custom_data.get_column_view(self.custom_gene_set_indicator)[0] ] else: gene_sets_names, _ = self.custom_data.get_column_view(self.custom_gene_set_indicator) self.num_of_custom_sets = len(set(gene_sets_names)) gene_names, _ = self.custom_data.get_column_view(self.custom_gene_id_column) hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets',) try: self.gs_widget.add_custom_sets( gene_sets_names, gene_names, hierarchy_title=hierarchy_title, select_customs_flag=select_customs_flag, ) except geneset.GeneSetException: pass # self.gs_label_combobox.setDisabled(False) else: self.gs_widget.update_gs_hierarchy() self.update_info_box() def update_tree_view(self): self.init_gene_sets() def invalidate(self): # clear self.__reset_widget_state() self.update_info_box() if self.input_data is not None: # setup self.__get_input_genes() self.update_tree_view() def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def __get_reference_genes(self): self.reference_genes = [] if self.reference_attr_names: for variable in self.reference_data.domain.attributes: self.reference_genes.append(str(variable.attributes.get(self.reference_gene_id_attribute, '?'))) else: genes, _ = self.reference_data.get_column_view(self.reference_gene_id_column) self.reference_genes = [str(g) for g in genes] @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.__reset_widget_state() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.feature_model.set_domain(None) if data: self.custom_data = data self.feature_model.set_domain(self.custom_data.domain) self.custom_tax_id = str(self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get(GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get(GENE_ID_COLUMN, None) if self.gs_label_combobox is None: self.gs_label_combobox = comboBox( self.custom_gs_col_box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.on_gene_set_indicator_changed, ) self.custom_gs_col_box.show() if self.custom_gene_set_indicator in self.feature_model: index = self.feature_model.indexOf(self.custom_gene_set_indicator) self.custom_gene_set_indicator = self.feature_model[index] else: self.custom_gene_set_indicator = self.feature_model[0] else: self.custom_gs_col_box.hide() self.gs_widget.clear_custom_sets() self.handle_custom_gene_sets(select_customs_flag=self.custom_gene_set_indicator is not None) self.invalidate() @Inputs.genes def handle_genes_input(self, data): self.Error.clear() self.__reset_widget_state() # clear output self.Outputs.matched_genes.send(None) # clear input values self.input_genes = [] self.input_data = None self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gs_widget.clear() self.gs_widget.clear_gene_sets() self.update_info_box() if data: self.input_data = data self.tax_id = str(self.input_data.attributes.get(TAX_ID, None)) self.use_attr_names = self.input_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get(GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.input_data.attributes.get(GENE_ID_COLUMN, None) self.update_info_box() if not ( self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None)) ): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return if self.__check_organism_mismatch(): self.Error.organism_mismatch() return self.gs_widget.load_gene_sets(self.tax_id) # if input data change, we need to refresh custom sets if self.custom_data: self.gs_widget.clear_custom_sets() self.handle_custom_gene_sets() self.invalidate() def update_info_box(self): info_string = '' if self.input_genes: info_string += '{} unique gene names on input.\n'.format(len(self.input_genes)) info_string += '{} genes on output.\n'.format(self.num_of_sel_genes) else: if self.input_data: if not any([self.gene_id_column, self.gene_id_attribute]): info_string += 'Input data with incorrect meta data.\nUse Gene Name Matcher widget.' else: info_string += 'No data on input.\n' if self.custom_data: info_string += '{} marker genes in {} sets\n'.format(self.custom_data.X.shape[0], self.num_of_custom_sets) self.input_info.setText(info_string) def create_partial(self): return partial( self.set_items, self.gs_widget.gs_object, self.stored_gene_sets_selection, set(self.input_genes), self.callback, ) def callback(self): if self._task.cancelled: raise KeyboardInterrupt() if self.progress_bar: methodinvoke(self, "progress_advance")() def init_gene_sets(self): if self._task is not None: self.cancel() assert self._task is None self._task = Task() self.init_item_model() # save setting on selected hierarchies self.stored_gene_sets_selection = self.gs_widget.get_hierarchies(only_selected=True) f = self.create_partial() progress_iterations = sum( ( len(g_set) for hier, g_set in self.gs_widget.gs_object.map_hierarchy_to_sets().items() if hier in self.stored_gene_sets_selection ) ) self.progress_bar = ProgressBar(self, iterations=progress_iterations) self._task.future = self._executor.submit(f) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self._init_gene_sets_finished) @Slot(concurrent.futures.Future) def _init_gene_sets_finished(self, f): assert self.thread() is QThread.currentThread() assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None self.progress_bar.finish() self.setStatusMessage('') try: results = f.result() # type: list [self.data_model.appendRow(model_item) for model_item in results] self.filter_proxy_model.setSourceModel(self.data_model) self.data_view.selectionModel().selectionChanged.connect(self.commit) self.filter_data_view() self.set_selection() self.update_info_box() except Exception as ex: print(ex) def create_filters(self): search_term = self.search_pattern.lower().strip().split() filters = [ FilterProxyModel.Filter( self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term) ) ] if self.use_min_count: filters.append(FilterProxyModel.Filter(self.COUNT, Qt.DisplayRole, lambda value: value >= self.min_count)) return filters def filter_data_view(self): filter_proxy = self.filter_proxy_model # type: FilterProxyModel model = filter_proxy.sourceModel() # type: QStandardItemModel if isinstance(model, QStandardItemModel): # apply filtering rules filter_proxy.set_filters(self.create_filters()) if model.rowCount() and not filter_proxy.rowCount(): self.Warning.all_sets_filtered() else: self.Warning.clear() def set_selection(self): if len(self.selected_rows): view = self.data_view model = self.data_model row_model_indexes = [model.indexFromItem(model.item(i)) for i in self.selected_rows] proxy_rows = [self.filter_proxy_model.mapFromSource(i).row() for i in row_model_indexes] if model.rowCount() <= self.selected_rows[-1]: return header_count = view.header().count() - 1 selection = QItemSelection() for row_index in proxy_rows: selection.append( QItemSelectionRange( self.filter_proxy_model.index(row_index, 0), self.filter_proxy_model.index(row_index, header_count), ) ) view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def commit(self): selection_model = self.data_view.selectionModel() if selection_model: selection = selection_model.selectedRows(self.COUNT) self.selected_rows = [self.filter_proxy_model.mapToSource(sel).row() for sel in selection] if selection and self.input_genes: genes = [model_index.data(Qt.UserRole) for model_index in selection] output_genes = [gene_name for gene_name in list(set.union(*genes))] self.num_of_sel_genes = len(output_genes) self.update_info_box() if self.use_attr_names: selected = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[self.gene_id_attribute]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table(domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: # create filter from selected column for genes only_known = table_filter.FilterStringList(self.gene_id_column, output_genes) # apply filter to the data data_table = table_filter.Values([only_known])(self.input_data) self.Outputs.matched_genes.send(data_table) def assign_delegates(self): self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self)) def setup_filter_model(self): self.filter_proxy_model = FilterProxyModel() self.filter_proxy_model.setFilterKeyColumn(self.TERM) self.data_view.setModel(self.filter_proxy_model) def setup_filter_area(self): h_layout = QHBoxLayout() h_layout.setSpacing(100) h_widget = widgetBox(self.mainArea, orientation=h_layout) spin( h_widget, self, 'min_count', 0, 1000, label='Count', tooltip='Minimum genes count', checked='use_min_count', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view, ) self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern') self.line_edit_filter.setPlaceholderText('Filter gene sets ...') self.line_edit_filter.textChanged.connect(self.filter_data_view) def on_gene_set_indicator_changed(self): # self._handle_future_model() self.gs_widget.clear_custom_sets() self.handle_custom_gene_sets() self.invalidate() def setup_control_area(self): # Control area self.input_info = widgetLabel(widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') self.custom_gs_col_box = box = vBox(self.controlArea, 'Custom Gene Set Term Column') box.hide() gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect(self.update_tree_view) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) def setup_gui(self): # control area self.setup_control_area() # main area self.data_view = QTreeView() self.setup_filter_model() self.setup_filter_area() self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(False) self.data_view.setItemDelegateForColumn(self.TERM, LinkStyledItemDelegate(self.data_view)) self.mainArea.layout().addWidget(self.data_view) self.data_view.header().setSectionResizeMode(QHeaderView.ResizeToContents) self.assign_delegates() @staticmethod def set_items(gene_sets, sets_to_display, genes, callback): model_items = [] if not genes: return for gene_set in sorted(gene_sets): if gene_set.hierarchy not in sets_to_display: continue callback() matched_set = gene_set.genes & genes if len(matched_set) > 0: category_column = QStandardItem() term_column = QStandardItem() count_column = QStandardItem() genes_column = QStandardItem() category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole) term_column.setData(gene_set.name, Qt.DisplayRole) term_column.setData(gene_set.name, Qt.ToolTipRole) term_column.setData(gene_set.link, LinkRole) term_column.setForeground(QColor(Qt.blue)) count_column.setData(matched_set, Qt.UserRole) count_column.setData(len(matched_set), Qt.DisplayRole) genes_column.setData(len(gene_set.genes), Qt.DisplayRole) genes_column.setData( set(gene_set.genes), Qt.UserRole ) # store genes to get then on output on selection model_items.append([count_column, genes_column, category_column, term_column]) return model_items def init_item_model(self): if self.data_model: self.data_model.clear() self.setup_filter_model() else: self.data_model = QStandardItemModel() self.data_model.setSortRole(Qt.UserRole) self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS) def sizeHint(self): return QSize(1280, 960)
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 class Error(widget.OWWidget.Error): no_gene_names = Msg("Input data contains no columns with gene names") no_data_onInput = Msg("No data on input") class Warning(widget.OWWidget.Warning): no_sets_found = Msg("No enriched sets found") def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox( box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox( box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=["Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference"], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit(self, placeholderText="Search ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.source_model = QStandardItemModel() self.source_model.setSortRole(Qt.UserRole) self.source_model.setHorizontalHeaderLabels( ["Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment"]) self.proxy_model = CustomFilterModel(self.annotationsChartView) self.proxy_model.setFilterKeyColumn(1) # filter only by name (second column). self.annotationsChartView.setModel(self.proxy_model) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter(contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")] ) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor( parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def string_search(self): self.annotationsChartView.model().setFilterFixedString(self.filterLineEdit.text()) def sizeHint(self): return QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.Error.clear() self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable)] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint( data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key,) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get( (full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name,) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount())] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: # compute FDR according the selected categories self.compute_fdr() self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [m for m, use in zip(matchers, self.geneMatcherSettings) if use] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.Warning.clear() self.Error.clear() if not self.genesinrows and len(self.geneAttrs) == 0: self.Error.no_gene_names() return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list(filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float,)) info = methodinvoke(self, "_setRunInfo", (str,)) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = self.source_model for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item(), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit ) if not model.rowCount(): self.Warning.no_sets_found() else: # compute FDR according the selected categories self.compute_fdr() # set source model if there are sets found self.filterAnnotationsChartView() self.proxy_model.setSourceModel(model) self.Warning.clear() self._updatesummary() allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView) ) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.Error.no_data_onInput() self.Warning.clear() self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def return_filter_values(self): categories = set(", ".join(category) for category, _ in self.selectedCategories()) return { "useMinCountFilter": self.useMinCountFilter, "useMaxPValFilter": self.useMaxPValFilter, "minClusterCount": self.minClusterCount, "useMaxFDRFilter": self.useMaxFDRFilter, "maxPValue": self.maxPValue, "categories": categories, "maxFDR": self.maxFDR } def compute_fdr(self): selected_categories = self.return_filter_values().get('categories') data_model = self.source_model # get pvalues of rows that match selected category pvals = [(row_index, data_model.index(row_index, 4).data(role=Qt.UserRole)) for row_index in range(data_model.rowCount()) if data_model.index(row_index, 0).data(role=Qt.DisplayRole) in selected_categories] fdrs = utils.stats.FDR([pval for _, pval in pvals]) # set calculated values to the data model for (row_index, pval), fdr in zip(pvals, fdrs): fdr_item = data_model.item(row_index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) def filterAnnotationsChartView(self): if self.__state & OWSetEnrichment.RunningEnrichment: return # set string pattern self.proxy_model._pattern = str(self.filterLineEdit.text()) # set filter values to proxy model self.proxy_model.set_filter_values(self.return_filter_values()) # filter model self.proxy_model.invalidateFilter() self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.source_model rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped] newdomain = Orange.data.Domain( mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWGeneSets(OWWidget): name = "Gene Set Enrichment" description = "" icon = "icons/OWGeneSets.svg" priority = 9 want_main_area = True settingsHandler = OrganismContextHandler() # settings auto_commit = Setting(True) stored_selections = ContextSetting([]) organism = ContextSetting(None) min_count = Setting(5) use_min_count = Setting(True) max_p_value = Setting(0.0001) use_p_value = Setting(False) max_fdr = Setting(0.01) use_max_fdr = Setting(True) use_reference_data = Setting(True) COUNT, REFERENCE, P_VAL, FDR, ENRICHMENT, GENES, CATEGORY, TERM = range(8) DATA_HEADER_LABELS = [ "Count", 'Reference', 'p-Value', 'FDR', 'Enrichment', 'Genes In Set', 'Category', 'Term' ] class Inputs: genes = Input("Genes", Table) custom_sets = Input('Custom Gene Sets', Table) reference = Input("Reference Genes", Table) class Outputs: matched_genes = Output("Matched Genes", Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): all_sets_filtered = Msg('All sets were filtered out.') class Error(OWWidget.Error): organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID) cant_reach_host = Msg("Host orange.biolab.si is unreachable.") cant_load_organisms = Msg( "No available organisms, please check your connection.") def __init__(self): super().__init__() # commit self.commit_button = None # gene sets object self.gene_sets_obj = geneset.GeneSets() # progress bar self.progress_bar = None self.progress_bar_iterations = None # data self.input_data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None # custom gene sets self.custom_data = None self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable)) self.gene_set_label = None self.gs_label_combobox = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None # reference genes self.reference_radio_box = None self.reference_data = None self.reference_genes = None self.reference_tax_id = None self.reference_attr_names = None self.reference_gene_id_attribute = None self.reference_gene_id_column = None # info box self.input_info = None self.num_of_sel_genes = 0 # filter self.line_edit_filter = None self.search_pattern = '' self.organism_select_combobox = None # data model view self.data_view = None self.data_model = None # gene matcher NCBI self.gene_matcher = None # filter proxy model self.filter_proxy_model = None # hierarchy widget self.hierarchy_widget = None self.hierarchy_state = None # spinbox self.spin_widget = None # threads self.threadpool = QThreadPool(self) self.workers = None self._task = None # type: Optional[Task] self._executor = ThreadExecutor() # gui self.setup_gui() def __reset_widget_state(self): # reset hierarchy widget state self.hierarchy_widget.clear() # clear data view self.init_item_model() # reset filters self.setup_filter_model() def cancel(self): """ Cancel the current task (if any). """ if self._task is not None: self._task.cancel() assert self._task.future.done() # disconnect the `_task_finished` slot self._task.watcher.done.disconnect(self._init_gene_sets_finished) self._task = None @Slot() def progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here if self.progress_bar: self.progress_bar.advance() def __get_input_genes(self): self.input_genes = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes.append( str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.input_data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] def __construct_custom_gene_sets(self): custom_set_hier = ('Custom sets', ) # delete any custom sets if they exists self.gene_sets_obj.delete_sets_by_hierarchy(custom_set_hier) if self.custom_data and self.custom_gene_id_column: gene_sets_names, _ = self.custom_data.get_column_view( self.gene_set_label) gene_names, _ = self.custom_data.get_column_view( self.custom_gene_id_column) temp_dict = defaultdict(list) for set_name, gene_name in zip(gene_sets_names, gene_names): temp_dict[set_name].append(gene_name) g_sets = [] for key, value in temp_dict.items(): g_sets.append( geneset.GeneSet(gs_id=key, hierarchy=custom_set_hier, organism=self.custom_tax_id, name=key, genes=set(value))) self.gene_sets_obj.update(g_sets) def __update_hierarchy(self): self.set_hierarchy_model( self.hierarchy_widget, hierarchy_tree(self.gene_sets_obj.hierarchies())) self.set_selected_hierarchies() def update_tree_view(self): if self.use_reference_data and self.reference_data: self.init_gene_sets(reference_genes=self.reference_genes) else: self.init_gene_sets() def invalidate(self): # clear self.__reset_widget_state() self.update_info_box() if self.input_data is not None: # setup self.__construct_custom_gene_sets() self.__get_input_genes() self.__update_hierarchy() self.update_tree_view() def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def __get_reference_genes(self): self.reference_genes = [] if self.reference_attr_names: for variable in self.reference_data.domain.attributes: self.reference_genes.append( str( variable.attributes.get( self.reference_gene_id_attribute, '?'))) else: genes, _ = self.reference_data.get_column_view( self.reference_gene_id_column) self.reference_genes = [str(g) for g in genes] @Inputs.reference def handle_reference_genes(self, data): """ Set the (optional) input dataset with reference gene names. """ if data: self.reference_data = data self.reference_tax_id = str( self.reference_data.attributes.get(TAX_ID, None)) self.reference_attr_names = self.reference_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.reference_gene_id_attribute = self.reference_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.reference_gene_id_column = self.reference_data.attributes.get( GENE_ID_COLUMN, None) if not (self.reference_attr_names is not None and ((self.reference_gene_id_attribute is None) ^ (self.reference_gene_id_column is None))): if self.reference_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.reference_tax_id is None: self.Error.missing_tax_id() return self.__get_reference_genes() self.reference_radio_box.setEnabled(bool(self.reference_data)) self.invalidate() @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.__reset_widget_state() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.gs_label_combobox.setDisabled(True) self.feature_model.set_domain(None) if data: self.custom_data = data self.custom_tax_id = str( self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get( GENE_ID_COLUMN, None) if not (self.custom_use_attr_names is not None and ((self.custom_gene_id_attribute is None) ^ (self.custom_gene_id_column is None))): if self.custom_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.custom_tax_id is None: self.Error.missing_tax_id() return if self.__check_organism_mismatch(): self.Error.organism_mismatch() return self.gs_label_combobox.setDisabled(False) self.feature_model.set_domain(self.custom_data.domain) if self.feature_model: self.gene_set_label = self.feature_model[0] self.invalidate() @Inputs.genes def handle_genes_input(self, data): self.closeContext() self.Error.clear() self.__reset_widget_state() # clear output self.Outputs.matched_genes.send(None) # clear input genes self.input_genes = [] self.gs_label_combobox.setDisabled(True) self.update_info_box() if data: self.input_data = data self.tax_id = str(self.input_data.attributes.get(TAX_ID, None)) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.input_data.attributes.get( GENE_ID_COLUMN, None) if not (self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return if self.__check_organism_mismatch(): self.Error.organism_mismatch() return self.openContext(self.tax_id) # if input data change, we need to set feature model again if self.custom_data: self.gs_label_combobox.setDisabled(False) self.feature_model.set_domain(self.custom_data.domain) if self.feature_model: self.gene_set_label = self.feature_model[0] self.download_gene_sets() def update_info_box(self): info_string = '' if self.input_genes: info_string += '{} unique gene names on input.\n'.format( len(self.input_genes)) info_string += '{} genes on output.\n'.format( self.num_of_sel_genes) else: info_string += 'No genes on input.\n' self.input_info.setText(info_string) def on_gene_sets_download(self, result): # make sure this happens in the main thread. # Qt insists that widgets be created within the GUI(main) thread. assert threading.current_thread() == threading.main_thread() self.setStatusMessage('') if result: for res in result: g_sets = geneset.load_gene_sets(res, self.tax_id) self.gene_sets_obj.update([g_set for g_set in g_sets]) # add custom sets if there are any self.invalidate() self.update_info_box() def download_gene_sets(self): self.Error.clear() # reset hierarchy widget state self.hierarchy_widget.clear() # clear data view self.init_item_model() # get all gene sets for selected organism gene_sets = geneset.list_all(organism=self.tax_id) # status message self.setStatusMessage('downloading sets') worker = Worker(download_gene_sets, self.tax_id, gene_sets) worker.signals.result.connect(self.on_gene_sets_download) # move download process to worker thread self.threadpool.start(worker) def set_hierarchy_model(self, tree_widget, sets): def beautify_displayed_text(text): if '_' in text: return text.replace('_', ' ').title() else: return text # TODO: maybe optimize this code? for key, value in sets.items(): item = QTreeWidgetItem(tree_widget, [beautify_displayed_text(key)]) item.setFlags(item.flags() & (Qt.ItemIsUserCheckable | ~Qt.ItemIsSelectable | Qt.ItemIsEnabled)) item.setExpanded(True) item.hierarchy = key if value: item.setFlags(item.flags() | Qt.ItemIsTristate) self.set_hierarchy_model(item, value) else: if item.parent(): item.hierarchy = (item.parent().hierarchy, key) if not item.childCount() and not item.parent(): item.hierarchy = (key, ) def init_gene_sets(self, reference_genes=None): if self._task is not None: self.cancel() assert self._task is None self._task = Task() progress_advance = methodinvoke(self, "progress_advance") def callback(): if self._task.cancelled: raise KeyboardInterrupt() if self.progress_bar: progress_advance() if reference_genes is None: reference_genes = self.gene_sets_obj.genes() self.init_item_model() sets_to_display = self.get_hierarchies(only_selected=True) # save setting on selected hierarchies self.stored_selections = sets_to_display # save context self.closeContext() f = partial(self.set_items, self.gene_sets_obj, sets_to_display, set(self.input_genes), reference_genes, self.min_count if self.use_min_count else 1, callback=callback) progress_iterations = sum([ len(g_set) for hier, g_set in self.gene_sets_obj.map_hierarchy_to_sets().items() if hier in sets_to_display ]) self.progress_bar = ProgressBar(self, iterations=progress_iterations) self._task.future = self._executor.submit(f) self._task.watcher = FutureWatcher(self._task.future) self._task.watcher.done.connect(self._init_gene_sets_finished) self.openContext(self.tax_id) @Slot(concurrent.futures.Future) def _init_gene_sets_finished(self, f): assert self.thread() is QThread.currentThread() assert threading.current_thread() == threading.main_thread() assert self._task is not None assert self._task.future is f assert f.done() self._task = None self.progress_bar.finish() self.setStatusMessage('') try: results = f.result() # type: list [self.data_model.appendRow(model_item) for model_item in results] self.filter_proxy_model.setSourceModel(self.data_model) self._update_fdr() self.filter_data_view() except Exception as ex: print(ex) def set_selected_hierarchies(self): iterator = QTreeWidgetItemIterator(self.hierarchy_widget, QTreeWidgetItemIterator.All) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if iterator.value().hierarchy in self.stored_selections: iterator.value().setCheckState(0, Qt.Checked) else: iterator.value().setCheckState(0, Qt.Unchecked) iterator += 1 # if no items are checked, we check first one at random if len(self.get_hierarchies(only_selected=True)) == 0: iterator = QTreeWidgetItemIterator( self.hierarchy_widget, QTreeWidgetItemIterator.NotChecked) while iterator.value(): if type(iterator.value().hierarchy) is not str: iterator.value().setCheckState(0, Qt.Checked) return iterator += 1 def get_hierarchies(self, **kwargs): """ return selected hierarchy """ only_selected = kwargs.get('only_selected', None) sets_to_display = list() if only_selected: iterator = QTreeWidgetItemIterator(self.hierarchy_widget, QTreeWidgetItemIterator.Checked) else: iterator = QTreeWidgetItemIterator(self.hierarchy_widget) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if not only_selected: sets_to_display.append(iterator.value().hierarchy) else: if not iterator.value().isDisabled(): sets_to_display.append(iterator.value().hierarchy) iterator += 1 return sets_to_display def filter_data_view(self): filter_proxy = self.filter_proxy_model # type: FilterProxyModel model = filter_proxy.sourceModel() # type: QStandardItemModel assert isinstance(model, QStandardItemModel) search_term = self.search_pattern.lower().strip().split() # apply filtering rules filters = [ FilterProxyModel.Filter( self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term)) ] # if self.use_min_count: # filters.append( # FilterProxyModel.Filter( # self.COUNT, Qt.DisplayRole, # lambda value: value >= self.min_count, # ) # ) if self.use_p_value: filters.append( FilterProxyModel.Filter( self.P_VAL, Qt.DisplayRole, lambda value: value < self.max_p_value)) if self.use_max_fdr: filters.append( FilterProxyModel.Filter(self.FDR, Qt.DisplayRole, lambda value: value < self.max_fdr)) filter_proxy.set_filters(filters) if model.rowCount() and not filter_proxy.rowCount(): self.Warning.all_sets_filtered() else: self.Warning.clear() def __get_source_data(self, proxy_row_index, column): proxy_index = self.filter_proxy_model.index(proxy_row_index, column) source_index = self.filter_proxy_model.mapToSource(proxy_index) return source_index.data(role=Qt.DisplayRole) def _update_fdr(self): # Update the FDR in place due to a changed selected categories set and # results for all of these categories are already available. proxy = self.filter_proxy_model model = self.filter_proxy_model.sourceModel() if model is not None: assert isinstance(model, QStandardItemModel) p_values = [(i, self.__get_source_data(i, self.P_VAL)) for i in range(proxy.rowCount())] fdr_values = FDR([p_val for _, p_val in p_values]) for i, fdr_val in zip([i for i, _ in p_values], fdr_values): proxy_index = proxy.index(i, self.FDR) source_index = self.filter_proxy_model.mapToSource(proxy_index) source_item = model.item(source_index.row(), self.FDR) source_item.setData(fdr_val, role=Qt.DisplayRole) source_item.setData(fdr_val, role=Qt.ToolTipRole) def commit(self): selection_model = self.data_view.selectionModel() if selection_model: # genes_from_set = selection_model.selectedRows(GENES) matched_genes = selection_model.selectedRows(self.COUNT) if matched_genes and self.input_genes: genes = [ model_index.data(Qt.UserRole) for model_index in matched_genes ] output_genes = [ gene_name for gene_name in list(set.union(*genes)) ] self.num_of_sel_genes = len(output_genes) self.update_info_box() if self.use_attr_names: selected = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table( domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: selected_rows = [] for row_index, row in enumerate(self.input_data): gene_in_row = str(row[self.gene_id_column]) if gene_in_row in self.input_genes and gene_in_row in output_genes: selected_rows.append(row_index) if selected_rows: selected = self.input_data[selected_rows] else: selected = None self.Outputs.matched_genes.send(selected) def assign_delegates(self): self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn(self.REFERENCE, NumericalColumnDelegate(self)) self.data_view.setItemDelegateForColumn( self.P_VAL, NumericalColumnDelegate(self, precision=2, notation='e')) self.data_view.setItemDelegateForColumn( self.FDR, NumericalColumnDelegate(self, precision=2, notation='e')) self.data_view.setItemDelegateForColumn( self.ENRICHMENT, NumericalColumnDelegate(self, precision=1)) def setup_filter_model(self): self.filter_proxy_model = FilterProxyModel() self.filter_proxy_model.setFilterKeyColumn(self.TERM) self.data_view.setModel(self.filter_proxy_model) def setup_filter_area(self): h_layout = QHBoxLayout() h_layout.setSpacing(100) h_widget = widgetBox(self.mainArea, orientation=h_layout) spin(h_widget, self, 'min_count', 0, 100, label='Count', tooltip='Minimum genes count', checked='use_min_count', callback=self.invalidate, callbackOnReturn=True, checkCallback=self.invalidate) doubleSpin(h_widget, self, 'max_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_p_value', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view) doubleSpin(h_widget, self, 'max_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_max_fdr', callback=self.filter_data_view, callbackOnReturn=True, checkCallback=self.filter_data_view) self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern') self.line_edit_filter.setPlaceholderText('Filter gene sets ...') self.line_edit_filter.textChanged.connect(self.filter_data_view) def setup_control_area(self): info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) box = vBox(self.controlArea, "Custom Gene Sets") self.gs_label_combobox = comboBox(box, self, "gene_set_label", sendSelectedValue=True, model=self.feature_model, callback=self.invalidate) self.gs_label_combobox.setDisabled(True) self.reference_radio_box = radioButtonsInBox( self.controlArea, self, "use_reference_data", ["Entire genome", "Reference gene set (input)"], tooltips=[ "Use entire genome (for gene set enrichment)", "Use reference set of genes" ], box="Reference", callback=self.invalidate) self.reference_radio_box.setEnabled(False) hierarchy_box = widgetBox(self.controlArea, "Gene Set Categories") self.hierarchy_widget = QTreeWidget(self) self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers) self.hierarchy_widget.setHeaderLabels([' ']) self.hierarchy_widget.itemClicked.connect(self.update_tree_view) hierarchy_box.layout().addWidget(self.hierarchy_widget) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) def setup_gui(self): # control area self.setup_control_area() # main area self.data_view = QTreeView() self.setup_filter_model() self.setup_filter_area() self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(False) self.data_view.setItemDelegateForColumn( self.TERM, LinkStyledItemDelegate(self.data_view)) self.data_view.selectionModel().selectionChanged.connect(self.commit) self.mainArea.layout().addWidget(self.data_view) self.data_view.header().setSectionResizeMode( QHeaderView.ResizeToContents) self.assign_delegates() @staticmethod def set_items(gene_sets, sets_to_display, genes, ref, count_treshold, callback): model_items = [] if not genes: return for gene_set in gene_sets: if gene_set.hierarchy not in sets_to_display: continue enrichemnt_result = gene_set.set_enrichment( ref, genes.intersection(ref)) callback() if len(enrichemnt_result.query) >= count_treshold: category_column = QStandardItem() name_column = QStandardItem() count_column = QStandardItem() genes_column = QStandardItem() ref_column = QStandardItem() pval_column = QStandardItem() fdr_column = QStandardItem() enrichemnt_column = QStandardItem() category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole) name_column.setData(gene_set.name, Qt.DisplayRole) name_column.setData(gene_set.name, Qt.ToolTipRole) name_column.setData(gene_set.link, LinkRole) name_column.setForeground(QColor(Qt.blue)) count_column.setData(len(enrichemnt_result.query), Qt.DisplayRole) count_column.setData(set(enrichemnt_result.query), Qt.UserRole) genes_column.setData(len(gene_set.genes), Qt.DisplayRole) genes_column.setData( set(gene_set.genes), Qt.UserRole ) # store genes to get then on output on selection ref_column.setData(len(enrichemnt_result.reference), Qt.DisplayRole) pval_column.setData(enrichemnt_result.p_value, Qt.DisplayRole) pval_column.setData(enrichemnt_result.p_value, Qt.ToolTipRole) enrichemnt_column.setData(enrichemnt_result.enrichment_score, Qt.DisplayRole) enrichemnt_column.setData(enrichemnt_result.enrichment_score, Qt.ToolTipRole) model_items.append([ count_column, ref_column, pval_column, fdr_column, enrichemnt_column, genes_column, category_column, name_column ]) return model_items def init_item_model(self): if self.data_model: self.data_model.clear() self.setup_filter_model() else: self.data_model = QStandardItemModel() self.data_model.setSortRole(Qt.UserRole) self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS) def sizeHint(self): return QSize(1280, 960)
class OWGeneSets(OWWidget): name = "Gene Sets" description = "" icon = "icons/OWGeneSets.svg" priority = 9 want_main_area = True settingsHandler = OrganismContextHandler() # settings auto_commit = Setting(True) stored_selections = ContextSetting([]) organism = ContextSetting(None) class Inputs: genes = Input("Genes", Table) class Outputs: matched_genes = Output("Matched Genes", Table) class Information(OWWidget.Information): pass class Error(OWWidget.Error): missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID) cant_reach_host = Msg("Host orange.biolab.si is unreachable.") cant_load_organisms = Msg( "No available organisms, please check your connection.") def __init__(self): super().__init__() # commit self.commit_button = None # progress bar self.progress_bar = None self.progress_bar_iterations = None # data self.input_data = None self.input_genes = None self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None self.input_info = None self.num_of_sel_genes = 0 # filter self.lineEdit_filter = None self.search_pattern = '' self.organism_select_combobox = None # data model view self.data_view = None self.data_model = None # gene matcher NCBI self.gene_matcher = None # filter proxy model self.filter_proxy_model = None # hierarchy widget self.hierarchy_widget = None self.hierarchy_state = None # threads self.threadpool = QThreadPool(self) self.workers = None # gui self.setup_gui() def _progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here if self.progress_bar: self.progress_bar.advance() def __get_genes(self): self.input_genes = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes.append( str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.input_data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] @Inputs.genes def handle_input(self, data): self.closeContext() self.Error.clear() if data: self.input_data = data self.tax_id = str(self.input_data.attributes.get(TAX_ID, None)) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.input_data.attributes.get( GENE_ID_COLUMN, None) if not (self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return self.openContext(self.tax_id) self.__get_genes() self.download_gene_sets() def update_info_box(self): info_string = '' if self.input_genes: info_string += '{} unique gene names on input.\n'.format( len(self.input_genes)) info_string += '{} genes on output.\n'.format( self.num_of_sel_genes) else: info_string += 'No genes on input.\n' self.input_info.setText(info_string) def on_gene_sets_download(self, result): # make sure this happens in the main thread. # Qt insists that widgets be created within the GUI(main) thread. assert threading.current_thread() == threading.main_thread() self.progress_bar.finish() self.setStatusMessage('') tax_id, sets = result self.set_hierarchy_model(self.hierarchy_widget, *hierarchy_tree(tax_id, sets)) self.set_selected_hierarchies() self.update_info_box() self.workers = defaultdict(list) self.progress_bar_iterations = dict() for selected_hierarchy in self.get_hierarchies(): gene_sets = geneset.load_gene_sets(selected_hierarchy) worker = Worker(get_collections, gene_sets, set(self.input_genes), progress_callback=True, partial_result=True) worker.signals.error.connect(self.handle_error) worker.signals.finished.connect(self.handle_worker_finished) worker.signals.progress.connect(self._progress_advance) worker.signals.partial_result.connect(self.populate_data_model) worker.setAutoDelete(False) self.workers[selected_hierarchy] = worker self.progress_bar_iterations[selected_hierarchy] = len(gene_sets) self.display_gene_sets() def handle_worker_finished(self): # We check if all workers have completed. If not, continue # dirty hax, is this ok? if self.progress_bar and self.progress_bar.widget.progressBarValue == 100: self.progress_bar.finish() self.setStatusMessage('') self.hierarchy_widget.setDisabled(False) # adjust column width for i in range(len(DATA_HEADER_LABELS) - 1): self.data_view.resizeColumnToContents(i) self.filter_proxy_model.setSourceModel(self.data_model) def populate_data_model(self, partial_result): assert threading.current_thread() == threading.main_thread() if partial_result: self.data_model.appendRow(partial_result) def set_hierarchy_model(self, model, tax_id, sets): def beautify_displayed_text(text): if '_' in text: return text.replace('_', ' ').title() else: return text # TODO: maybe optimize this code? for key, value in sets.items(): item = QTreeWidgetItem(model, [beautify_displayed_text(key)]) item.setFlags(item.flags() & (Qt.ItemIsUserCheckable | ~Qt.ItemIsSelectable | Qt.ItemIsEnabled)) item.setExpanded(True) item.tax_id = tax_id item.hierarchy = key if value: item.setFlags(item.flags() | Qt.ItemIsTristate) self.set_hierarchy_model(item, tax_id, value) else: if item.parent(): item.hierarchy = ((item.parent().hierarchy, key), tax_id) if not item.childCount() and not item.parent(): item.hierarchy = ((key, ), tax_id) def download_gene_sets(self): self.Error.clear() # reset hierarchy widget state self.hierarchy_widget.clear() # clear data view self.init_item_model() # get all gene sets for selected organism gene_sets = geneset.list_all(organism=self.tax_id) # init progress bar self.progress_bar = ProgressBar(self, iterations=len(gene_sets) * 100) # status message self.setStatusMessage('downloading sets') worker = Worker(download_gene_sets, gene_sets, progress_callback=True) worker.signals.progress.connect(self._progress_advance) worker.signals.result.connect(self.on_gene_sets_download) worker.signals.error.connect(self.handle_error) # move download process to worker thread self.threadpool.start(worker) def display_gene_sets(self): self.init_item_model() self.hierarchy_widget.setDisabled(True) only_selected_hier = self.get_hierarchies(only_selected=True) # init progress bar iterations = sum([ self.progress_bar_iterations[hier] for hier in only_selected_hier ]) self.progress_bar = ProgressBar(self, iterations=iterations) self.setStatusMessage('displaying gene sets') if not only_selected_hier: self.progress_bar.finish() self.setStatusMessage('') self.hierarchy_widget.setDisabled(False) return # save setting on selected hierarchies self.stored_selections = only_selected_hier # save context self.closeContext() for selected_hierarchy in only_selected_hier: self.threadpool.start(self.workers[selected_hierarchy]) self.openContext(self.tax_id) def handle_error(self, ex): self.progress_bar.finish() self.setStatusMessage('') if isinstance(ex, ConnectionError): self.Error.cant_reach_host() print(ex) def set_selected_hierarchies(self): iterator = QTreeWidgetItemIterator(self.hierarchy_widget, QTreeWidgetItemIterator.All) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if iterator.value().hierarchy in self.stored_selections: iterator.value().setCheckState(0, Qt.Checked) else: iterator.value().setCheckState(0, Qt.Unchecked) iterator += 1 # if no items are checked, we check first one at random if len(self.get_hierarchies(only_selected=True)) == 0: iterator = QTreeWidgetItemIterator( self.hierarchy_widget, QTreeWidgetItemIterator.NotChecked) while iterator.value(): if type(iterator.value().hierarchy) is not str: iterator.value().setCheckState(0, Qt.Checked) return iterator += 1 def get_hierarchies(self, **kwargs): """ return selected hierarchy """ only_selected = kwargs.get('only_selected', None) sets_to_display = list() if only_selected: iterator = QTreeWidgetItemIterator(self.hierarchy_widget, QTreeWidgetItemIterator.Checked) else: iterator = QTreeWidgetItemIterator(self.hierarchy_widget) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if not only_selected: sets_to_display.append(iterator.value().hierarchy) else: if not iterator.value().isDisabled(): sets_to_display.append(iterator.value().hierarchy) iterator += 1 return sets_to_display def commit(self): selection_model = self.data_view.selectionModel() if selection_model: # genes_from_set = selection_model.selectedRows(GENES) matched_genes = selection_model.selectedRows(MATCHED) if matched_genes and self.input_genes: genes = [ model_index.data(Qt.UserRole) for model_index in matched_genes ] output_genes = [ gene_name for gene_name in list(set.union(*genes)) ] self.num_of_sel_genes = len(output_genes) self.update_info_box() if self.use_attr_names: selected = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in output_genes ] domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table( domain, self.input_data) self.Outputs.matched_genes.send(new_data) else: selected_rows = [] for row_index, row in enumerate(self.input_data): gene_in_row = str(row[self.gene_id_column]) if gene_in_row in self.input_genes and gene_in_row in output_genes: selected_rows.append(row_index) if selected_rows: selected = self.input_data[selected_rows] else: selected = None self.Outputs.matched_genes.send(selected) def setup_gui(self): # control area info_box = vBox(self.controlArea, 'Input info') self.input_info = widgetLabel(info_box) hierarchy_box = widgetBox(self.controlArea, "Entity Sets") self.hierarchy_widget = QTreeWidget(self) self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers) self.hierarchy_widget.setHeaderLabels(HIERARCHY_HEADER_LABELS) self.hierarchy_widget.itemClicked.connect(self.display_gene_sets) hierarchy_box.layout().addWidget(self.hierarchy_widget) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) # rubber(self.controlArea) # main area self.filter_proxy_model = QSortFilterProxyModel(self.data_view) self.filter_proxy_model.setFilterKeyColumn(3) self.data_view = QTreeView() self.data_view.setModel(self.filter_proxy_model) self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(2, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(True) self.data_view.setItemDelegateForColumn( TERM, LinkStyledItemDelegate(self.data_view)) self.data_view.selectionModel().selectionChanged.connect(self.commit) self.lineEdit_filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter gene sets:') self.lineEdit_filter.setPlaceholderText('search pattern ...') self.lineEdit_filter.textChanged.connect( self.filter_proxy_model.setFilterRegExp) self.mainArea.layout().addWidget(self.data_view) def init_item_model(self): if self.data_model: self.data_model.clear() self.filter_proxy_model.setSourceModel(None) else: self.data_model = QStandardItemModel() self.data_model.setSortRole(Qt.UserRole) self.data_model.setHorizontalHeaderLabels(DATA_HEADER_LABELS) def sizeHint(self): return QSize(1280, 960)