class OWGenes(OWWidget, ConcurrentWidgetMixin): name = "Genes" description = "Tool for working with genes" icon = "../widgets/icons/OWGeneInfo.svg" priority = 40 want_main_area = True selected_organism: int = Setting(11) search_pattern: str = Setting('') exclude_unmatched = Setting(True) replace_id_with_symbol = Setting(True) auto_commit = Setting(True) settingsHandler = DomainContextHandler() selected_gene_col = ContextSetting(None) use_attr_names = ContextSetting(True) replaces = [ 'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher' ] class Inputs: data_table = Input("Data", Table) class Outputs: data_table = Output("Data", Table) gene_matcher_results = Output("Genes", Table) class Information(OWWidget.Information): pass def sizeHint(self): return QSize(1280, 960) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # ATTRIBUTES # self.target_database = ENTREZ_ID # input data self.input_data = None self.input_genes = None self.tax_id = None self.column_candidates = [] # input options self.organisms = [] # gene matcher self.gene_matcher = None # progress bar self.progress_bar = None self._timer = QTimer() self._timer.timeout.connect(self._apply_filter) self._timer.setSingleShot(True) # GUI SECTION # # Control area self.info_box = widgetLabel( widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') organism_box = vBox(self.controlArea, 'Organism') self.organism_select_combobox = comboBox( organism_box, self, 'selected_organism', callback=self.on_input_option_change) self.get_available_organisms() self.organism_select_combobox.setCurrentIndex(self.selected_organism) box = widgetBox(self.controlArea, 'Gene IDs in the input data') self.gene_columns_model = itemmodels.DomainModel( valid_types=(StringVariable, DiscreteVariable)) self.gene_column_combobox = comboBox( box, self, 'selected_gene_col', label='Stored in data column', model=self.gene_columns_model, sendSelectedValue=True, callback=self.on_input_option_change, ) self.attr_names_checkbox = checkBox( box, self, 'use_attr_names', 'Stored as feature (column) names', disables=[(-1, self.gene_column_combobox)], callback=self.on_input_option_change, ) self.gene_column_combobox.setDisabled(bool(self.use_attr_names)) output_box = vBox(self.controlArea, 'Output') # separator(output_box) # output_box.layout().addWidget(horizontal_line()) # separator(output_box) self.exclude_radio = checkBox(output_box, self, 'exclude_unmatched', 'Exclude unmatched genes', callback=self.commit) self.replace_radio = checkBox(output_box, self, 'replace_id_with_symbol', 'Replace feature IDs with gene names', callback=self.commit) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) rubber(self.controlArea) # Main area self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self.handle_filter_callback) # rubber(self.radio_group) self.mainArea.layout().addWidget(self.filter) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Vertical) self.table_model = GeneInfoModel() self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setSortingEnabled(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.unknown_model = UnknownGeneInfoModel() self.unknown_view = QTableView() self.unknown_view.setModel(self.unknown_model) self.unknown_view.verticalHeader().hide() self.unknown_view.setShowGrid(False) self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection) self.unknown_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.table_view) self.splitter.addWidget(self.unknown_view) self.splitter.setStretchFactor(0, 90) self.splitter.setStretchFactor(1, 10) self.mainArea.layout().addWidget(self.splitter) def handle_filter_callback(self): self._timer.stop() self._timer.start(500) def _apply_filter(self): # filter only if input data is present and model is populated if self.table_model.table is not None: self.table_model.update_model( filter_pattern=str(self.search_pattern)) self.commit() def __reset_widget_state(self): self.table_view.clearSpans() self.table_view.setModel(None) self.table_model.clear() self.unknown_model.clear() self._update_info_box() def _update_info_box(self): if self.input_genes and self.gene_matcher: num_genes = len(self.gene_matcher.genes) known_genes = len(self.gene_matcher.get_known_genes()) info_text = ('{} genes in input data\n' '{} genes match Entrez database\n' '{} genes with match conflicts\n'.format( num_genes, known_genes, num_genes - known_genes)) else: info_text = 'No data on input.' self.info_box.setText(info_text) def on_done(self, _): # update info box self._update_info_box() # set output options self.toggle_radio_options() # set known genes self.table_model.initialize(self.gene_matcher.genes) self.table_view.setModel(self.table_model) self.table_view.selectionModel().selectionChanged.connect(self.commit) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setItemDelegateForColumn( self.table_model.entrez_column_index, LinkStyledItemDelegate(self.table_view)) v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) self.table_view.horizontalHeader().setStretchLastSection(True) # set unknown genes self.unknown_model.initialize(self.gene_matcher.genes) self.unknown_view.verticalHeader().setStretchLastSection(True) self._apply_filter() def get_available_organisms(self): available_organism = sorted(((tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()), key=lambda x: x[1]) self.organisms = [tax_id[0] for tax_id in available_organism] self.organism_select_combobox.addItems( [tax_id[1] for tax_id in available_organism]) def gene_names_from_table(self): """ Extract and return gene names from `Orange.data.Table`. """ self.input_genes = [] if self.input_data: if self.use_attr_names: self.input_genes = [ str(attr.name).strip() for attr in self.input_data.domain.attributes ] else: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.input_genes = [ str(e[self.selected_gene_col]) for e in self.input_data if not np.isnan(e[self.selected_gene_col]) ] def _update_gene_matcher(self): self.gene_names_from_table() self.gene_matcher = GeneMatcher(self.get_selected_organism(), auto_start=False) self.gene_matcher.genes = self.input_genes # self.gene_matcher.organism = self.get_selected_organism() def get_selected_organism(self): return self.organisms[self.selected_organism] def _run(self): if self.gene_matcher is not None: self.start(run_gene_matcher, self.gene_matcher) def on_input_option_change(self): self.__reset_widget_state() self._update_gene_matcher() self._run() def gene_column_identifier(self): """ Get most suitable column that stores genes. If there are several suitable columns, select the one with most unique values. Take the best one. """ # candidates -> (variable, num of unique values) candidates = ((col, np.unique(self.input_data.get_column_view(col)[0]).size) for col in self.gene_columns_model if isinstance(col, DiscreteVariable) or isinstance(col, StringVariable)) best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1] return best_candidate def find_genes_location(self): """ Try locate the genes in the input data when we first load the data. Proposed rules: - when no suitable feature names are present, check the columns. - find the most suitable column, that is, the one with most unique values. """ domain = self.input_data.domain if not domain.attributes: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.use_attr_names = False @Inputs.data_table def handle_input(self, data): self.closeContext() self.input_data = None self.input_genes = None self.__reset_widget_state() self.gene_columns_model.set_domain(None) self.selected_gene_col = None if data: self.input_data = data self.gene_columns_model.set_domain(self.input_data.domain) # check if input table has tax_id, human is used if tax_id is not found self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606')) # check for gene location. Default is that genes are attributes in the input table. self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, self.use_attr_names) if self.tax_id in self.organisms and not self.selected_organism: self.selected_organism = self.organisms.index(self.tax_id) self.openContext(self.input_data.domain) self.find_genes_location() self.on_input_option_change() def commit(self): selection = self.table_view.selectionModel().selectedRows( self.table_model.entrez_column_index) selected_genes = [row.data() for row in selection] if not len(selected_genes): selected_genes = self.table_model.get_filtered_genes() gene_ids = self.get_target_ids() known_genes = [gid for gid in gene_ids if gid != '?'] table = None gm_table = None if known_genes: # Genes are in rows (we have a column with genes). if not self.use_attr_names: if self.target_database in self.input_data.domain: gene_var = self.input_data.domain[self.target_database] metas = self.input_data.domain.metas else: gene_var = StringVariable(self.target_database) metas = self.input_data.domain.metas + (gene_var, ) domain = Domain(self.input_data.domain.attributes, self.input_data.domain.class_vars, metas) table = self.input_data.transform(domain) col, _ = table.get_column_view(gene_var) col[:] = gene_ids # filter selected rows selected_genes_set = set(selected_genes) selected_rows = [ row_index for row_index, row in enumerate(table) if str(row[gene_var]) in selected_genes_set ] # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = False table.attributes[GENE_ID_COLUMN] = self.target_database table = table[selected_rows] if selected_rows else table if self.exclude_unmatched: # create filter from selected column for genes only_known = table_filter.FilterStringList( gene_var, known_genes) # apply filter to the data table = table_filter.Values([only_known])(table) self.Outputs.data_table.send(table) # genes are are in columns (genes are features). else: domain = self.input_data.domain.copy() table = self.input_data.transform(domain) for gene in self.gene_matcher.genes: if gene.input_identifier in table.domain: table.domain[gene.input_identifier].attributes[ self.target_database] = (str(gene.gene_id) if gene.gene_id else '?') if self.replace_id_with_symbol: try: table.domain[gene.input_identifier].name = str( gene.symbol) except AttributeError: # TODO: missing gene symbol, need to handle this? pass # filter selected columns selected_genes_set = set(selected_genes) selected = [ column for column in table.domain.attributes if self.target_database in column.attributes and str(column.attributes[ self.target_database]) in selected_genes_set ] output_attrs = table.domain.attributes if selected: output_attrs = selected if self.exclude_unmatched: known_genes_set = set(known_genes) output_attrs = [ col for col in output_attrs if col.attributes[ self.target_database] in known_genes_set ] domain = Domain(output_attrs, table.domain.class_vars, table.domain.metas) table = table.from_table(domain, table) # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = True table.attributes[GENE_ID_ATTRIBUTE] = self.target_database gm_table = self.gene_matcher.to_data_table( selected_genes=selected_genes if selected_genes else None) self.Outputs.data_table.send(table) self.Outputs.gene_matcher_results.send(gm_table) def toggle_radio_options(self): self.replace_radio.setEnabled(bool(self.use_attr_names)) if self.gene_matcher.genes: # enable checkbox if unknown genes are detected self.exclude_radio.setEnabled( len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes())) self.exclude_unmatched = len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes()) def get_target_ids(self): return [ str(gene.gene_id) if gene.gene_id else '?' for gene in self.gene_matcher.genes ]
class OWMarkerGenes(widget.OWWidget): name = "Marker Genes" icon = 'icons/OWMarkerGenes.svg' priority = 170 replaces = [ 'orangecontrib.single_cell.widgets.owmarkergenes.OWMarkerGenes' ] class Warning(widget.OWWidget.Warning): using_local_files = widget.Msg( "Can't connect to serverfiles. Using cached files.") class Outputs: genes = widget.Output("Genes", Table) want_main_area = True selected_group: str = settings.Setting('') selected_db_source: str = settings.Setting('') filter_text: str = settings.Setting('') header_state: bytes = settings.Setting(b'') auto_commit = settings.Setting(True) settingsHandler = MarkerGroupContextHandler() selected_genes: Set[tuple] = settings.ContextSetting(set()) def __init__(self): super().__init__() self._data = None self._available_db_sources = None self.output = None self._timer = QTimer() self._timer.timeout.connect(self._filter_table) self._timer.setSingleShot(True) self.info.set_input_summary("0") self.info.set_output_summary("0") box = gui.widgetBox(self.controlArea, 'Database', margin=0) self.db_source_index = -1 self.db_source_cb = gui.comboBox(box, self, 'db_source_index') self.db_source_cb.activated[int].connect(self.handle_source_changed) box = gui.widgetBox(self.controlArea, 'Organism', margin=0) self.group_index = -1 self.group_cb = gui.comboBox(box, self, 'group_index') self.group_cb.activated[int].connect(self.set_group_index) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit", "Commit Automatically") # TODO: to avoid this, marker genes table should have 'tax_id' column self.map_group_to_taxid = {'Human': '9606', 'Mouse': '10090'} filter_line_edit = gui.lineEdit(self.mainArea, self, "filter_text") # type: QLineEdit filter_line_edit.setPlaceholderText("Filter...") filter_line_edit.textEdited.connect(self.call_filter_timer) self.view = view = QTreeView( rootIsDecorated=False, uniformRowHeights=True, selectionMode=QTreeView.ExtendedSelection, sortingEnabled=True, ) view.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(view) self._load_data() if self.header_state: view.header().restoreState(self.header_state) @property def available_db_sources(self) -> dict: return self._available_db_sources @available_db_sources.setter def available_db_sources(self, value: dict): self._available_db_sources = value items = list(value.keys()) try: idx = items.index(self.selected_db_source) except ValueError: idx = -1 self.db_source_cb.clear() self.db_source_cb.addItems(items) if idx != -1: self.db_source_index = idx self.selected_db_source = items[idx] elif items: self.db_source_index = min(max(self.db_source_index, 0), len(items) - 1) self.set_db_source_index(self.db_source_index) @property def data(self) -> Table: return self._data @data.setter def data(self, value: Table): """ Set the source data. The data is then filtered on the first meta column (group) """ self._data = value domain = value.domain if domain.metas: group = domain.metas[0] groupcol, _ = value.get_column_view(group) if group.is_string: group_values = list(set(groupcol)) elif group.is_discrete: group_values = group.values else: raise TypeError("Invalid column type") try: idx = group_values.index(self.selected_group) except ValueError: idx = -1 self.group_cb.clear() self.group_cb.addItems(group_values) if idx != -1: self.group_index = idx self.selected_group = group_values[idx] elif group_values: self.group_index = min(max(self.group_index, 0), len(group_values) - 1) self.set_group_index(self.group_index) def _load_data(self): self.Warning.using_local_files.clear() found_sources = {} try: found_sources.update( serverfiles.ServerFiles().allinfo(serverfiles_domain)) except requests.exceptions.ConnectionError: found_sources.update(serverfiles.allinfo(serverfiles_domain)) self.Warning.using_local_files() self.available_db_sources = { item.get('title').split(': ')[-1]: item for item in found_sources.values() } if self.available_db_sources: file_name = self.available_db_sources[ self.selected_db_source]['filename'] try: serverfiles.update(serverfiles_domain, file_name) except requests.exceptions.ConnectionError: # try to update file. Ignore network errors. pass try: file_path = serverfiles.localpath_download( serverfiles_domain, file_name) except requests.exceptions.ConnectionError as err: # Unexpected error. raise err data = Table(file_path) # enforce order old_domain = data.domain new_domain = Domain( [], metas=[ old_domain['Organism'], old_domain['Name'], old_domain['Entrez ID'], old_domain['Cell Type'], old_domain['Function'], old_domain['Reference'], old_domain['URL'], ], ) data = data.transform(new_domain) self.data = data def set_selection(self): selected = self.selected_rows() if len(selected): header_count = self.view.header().count() - 1 if self.view.model().rowCount() <= selected[-1]: return selection = QItemSelection() for row_index in selected: selection.append( QItemSelectionRange( self.view.model().index(row_index, 0), self.view.model().index(row_index, header_count))) self.view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def handle_source_changed(self, source_index): self.set_db_source_index(source_index) self._load_data() def set_db_source_index(self, source_index): self.closeContext() self.db_source_index = source_index self.selected_db_source = self.db_source_cb.itemText(source_index) def set_group_index(self, group_index): self.closeContext() self.group_index = group_index self.selected_group = self.group_cb.itemText(group_index) self._setup() def call_filter_timer(self, search_string): self._timer.stop() if search_string != self.filter_text: self.filter_text = search_string self._timer.start(700) def _filter_table(self): model = self.view.model() assert isinstance(model, SearchableTableModel) model.update_model(str(self.filter_text)) self.set_selection() self.update_data_info() self.update_model() self.commit() def update_data_info(self): model = self.view.model() self.info.set_input_summary( f"Shown : {str(len(model.source))}/{str(model.data_length())}") self.info.set_output_summary( f"Selected: {str(len(self.selected_genes))}") def _setup(self): self.closeContext() data = self.data group = data.domain.metas[0] gvec = data.get_column_view(group)[0] if group.is_string: mask = gvec == self.group_cb.itemData(self.group_index, Qt.DisplayRole) else: mask = gvec == self.group_index data = data[mask] rest = data[:, data.domain.metas[1:]] model = SearchableTableModel(rest, parent=self) ref_col = rest.domain.metas.index( rest.domain[HeaderLabels[HeaderIndex.REFERENCE]]) self.view.setItemDelegateForColumn( ref_col, gui.LinkStyledItemDelegate(self.view)) self.view.setModel(model) self.view.selectionModel().selectionChanged.connect( self._on_selection_changed) self.openContext(self.selected_group) self.call_filter_timer(self.filter_text) self.view.hideColumn(HeaderIndex.URL) def _on_selection_changed(self, *args): self.update_model() self.commit() self.update_data_info() def selected_rows(self): """ Return row index for selected genes """ model = self.view.model() if not self.selected_genes: return [] return [ row_index for row_index in range(model.rowCount()) if ( model.index(row_index, HeaderIndex.GENE).data(), model.index(row_index, HeaderIndex.CELL_TYPE).data(), model.index(row_index, HeaderIndex.REFERENCE).data(), ) in self.selected_genes ] def commit(self): self.Outputs.genes.send(self.output) def update_model(self): model = self.view.model() assert isinstance(model, SearchableTableModel) rows = [mi.row() for mi in self.view.selectionModel().selectedRows(0)] if rows and len(rows) != len(model.source): rows = model.mapToSourceRows(rows) output = model.source[rows] else: output = model.source gene_id = self.view.selectionModel().selectedRows(HeaderIndex.GENE) cell_type = self.view.selectionModel().selectedRows( HeaderIndex.CELL_TYPE) ref = self.view.selectionModel().selectedRows(HeaderIndex.REFERENCE) self.selected_genes = { (entrez.data(), cell.data(), ref.data()) for entrez, cell, ref in zip(gene_id, cell_type, ref) } # always false for marker genes data tables in single cell output.attributes[GENE_AS_ATTRIBUTE_NAME] = False # set taxonomy id in data.attributes output.attributes[TAX_ID] = self.map_group_to_taxid.get( self.selected_group, '') # set column id flag output.attributes[GENE_ID_COLUMN] = HeaderLabels[HeaderIndex.GENE] output.name = 'Marker Genes' self.output = output def closeEvent(self, event): self.header_state = bytes(self.view.header().saveState()) super().closeEvent(event) def sizeHint(self): return super().sizeHint().expandedTo(QSize(750, 500))