class OWCreateInstance(OWWidget): name = "Create Instance" description = "Interactively create a data instance from sample dataset." icon = "icons/CreateInstance.svg" category = "Data" keywords = ["simulator"] priority = 4000 class Inputs: data = Input("Data", Table) reference = Input("Reference", Table) class Outputs: data = Output("Data", Table) class Information(OWWidget.Information): nans_removed = Msg("Variables with only missing values were " "removed from the list.") want_main_area = False ACTIONS = ["median", "mean", "random", "input"] HEADER = [["name", "Variable"], ["variable", "Value"]] Header = namedtuple("header", [tag for tag, _ in HEADER])(*range(len(HEADER))) values: Dict[str, Union[float, str]] = Setting({}, schema_only=True) append_to_data = Setting(True) auto_commit = Setting(True) def __init__(self): super().__init__() self.data: Optional[Table] = None self.reference: Optional[Table] = None self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed, placeholderText="Filter...") self.view = QTableView(sortingEnabled=True, contextMenuPolicy=Qt.CustomContextMenu, selectionMode=QTableView.NoSelection) self.view.customContextMenuRequested.connect(self.__menu_requested) self.view.setItemDelegateForColumn(self.Header.variable, VariableDelegate(self)) self.view.verticalHeader().hide() self.view.horizontalHeader().setStretchLastSection(True) self.view.horizontalHeader().setMaximumSectionSize(350) self.model = VariableItemModel(self) self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER]) self.model.dataChanged.connect(self.__table_data_changed) self.model.dataHasNanColumn.connect(self.Information.nans_removed) self.proxy_model = QSortFilterProxyModel() self.proxy_model.setFilterKeyColumn(-1) self.proxy_model.setFilterCaseSensitivity(False) self.proxy_model.setSourceModel(self.model) self.view.setModel(self.proxy_model) vbox = gui.vBox(self.controlArea, box=True) vbox.layout().addWidget(self.filter_edit) vbox.layout().addWidget(self.view) box = gui.hBox(vbox) gui.rubber(box) for name in self.ACTIONS: gui.button(box, self, name.capitalize(), lambda *args, fun=name: self._initialize_values(fun), autoDefault=False) gui.rubber(box) box = gui.auto_apply(self.controlArea, self, "auto_commit") box.button.setFixedWidth(180) box.layout().insertStretch(0) # pylint: disable=unnecessary-lambda append = gui.checkBox(None, self, "append_to_data", "Append this instance to input data", callback=lambda: self.commit()) box.layout().insertWidget(0, append) self._set_input_summary() self._set_output_summary() self.settingsAboutToBePacked.connect(self.pack_settings) def __filter_edit_changed(self): self.proxy_model.setFilterFixedString(self.filter_edit.text().strip()) def __table_data_changed(self): self.commit() def __menu_requested(self, point: QPoint): index = self.view.indexAt(point) model: QSortFilterProxyModel = index.model() source_index = model.mapToSource(index) menu = QMenu(self) for action in self._create_actions(source_index): menu.addAction(action) menu.popup(self.view.viewport().mapToGlobal(point)) def _create_actions(self, index: QModelIndex) -> List[QAction]: actions = [] for name in self.ACTIONS: action = QAction(name.capitalize(), self) action.triggered.connect( lambda *args, fun=name: self._initialize_values(fun, [index])) actions.append(action) return actions def _initialize_values(self, fun: str, indices: List[QModelIndex] = None): cont_fun = { "median": np.nanmedian, "mean": np.nanmean, "random": cont_random, "input": np.nanmean }.get(fun, NotImplemented) disc_fun = { "median": majority, "mean": majority, "random": disc_random, "input": majority }.get(fun, NotImplemented) if not self.data or fun == "input" and not self.reference: return self.model.dataChanged.disconnect(self.__table_data_changed) rows = range(self.proxy_model.rowCount()) if indices is None else \ [index.row() for index in indices] for row in rows: index = self.model.index(row, self.Header.variable) variable = self.model.data(index, VariableRole) if fun == "input": if variable not in self.reference.domain: continue values = self.reference.get_column_view(variable)[0] if variable.is_primitive(): values = values.astype(float) if all(np.isnan(values)): continue else: values = self.model.data(index, ValuesRole) if variable.is_continuous: value = cont_fun(values) value = round(value, variable.number_of_decimals) elif variable.is_discrete: value = disc_fun(values) elif variable.is_string: value = "" else: raise NotImplementedError self.model.setData(index, value, ValueRole) self.model.dataChanged.connect(self.__table_data_changed) self.commit() @Inputs.data def set_data(self, data: Table): self.data = data self._set_input_summary() self._set_model_data() self.unconditional_commit() def _set_model_data(self): self.Information.nans_removed.clear() self.model.removeRows(0, self.model.rowCount()) if not self.data: return self.model.set_data(self.data, self.values) self.values = {} self.view.horizontalHeader().setStretchLastSection(False) self.view.resizeColumnsToContents() self.view.resizeRowsToContents() self.view.horizontalHeader().setStretchLastSection(True) @Inputs.reference def set_reference(self, data: Table): self.reference = data self._set_input_summary() def _set_input_summary(self): n_data = len(self.data) if self.data else 0 n_refs = len(self.reference) if self.reference else 0 summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.reference: summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_refs)}" data_list = [("Data", self.data), ("Reference", self.reference)] details = format_multiple_summaries(data_list) kwargs = {"format": Qt.RichText} self.info.set_input_summary(summary, details, **kwargs) def _set_output_summary(self, data: Optional[Table] = None): if data: summary, details = len(data), format_summary_details(data) else: summary, details = self.info.NoOutput, "" self.info.set_output_summary(summary, details) def commit(self): output_data = None if self.data: output_data = self._create_data_from_values() if self.append_to_data: output_data = self._append_to_data(output_data) self._set_output_summary(output_data) self.Outputs.data.send(output_data) def _create_data_from_values(self) -> Table: data = Table.from_domain(self.data.domain, 1) data.name = "created" data.X[:] = np.nan data.Y[:] = np.nan for i, m in enumerate(self.data.domain.metas): data.metas[:, i] = "" if m.is_string else np.nan values = self._get_values() for var_name, value in values.items(): data[:, var_name] = value return data def _append_to_data(self, data: Table) -> Table: assert self.data assert len(data) == 1 var = DiscreteVariable("Source ID", values=(self.data.name, data.name)) data = Table.concatenate([self.data, data], axis=0) domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (var, )) data = data.transform(domain) data.metas[:len(self.data), -1] = 0 data.metas[len(self.data):, -1] = 1 return data def _get_values(self) -> Dict[str, Union[str, float]]: values = {} for row in range(self.model.rowCount()): index = self.model.index(row, self.Header.variable) values[self.model.data(index, VariableRole).name] = \ self.model.data(index, ValueRole) return values def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_domain("Output", self.data.domain) items = [] values: Dict = self._get_values() for var in self.data.domain.variables + self.data.domain.metas: val = values.get(var.name, np.nan) if var.is_primitive(): val = var.repr_val(val) items.append([f"{var.name}:", val]) self.report_table("Values", items) @staticmethod def sizeHint(): return QSize(600, 500) def pack_settings(self): self.values: Dict[str, Union[str, float]] = self._get_values()
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin): name = "GEO Data Sets" description = "Access to Gene Expression Omnibus data sets." icon = "icons/OWGEODatasets.svg" priority = 2 class Outputs: gds_data = Output("Expression Data", Table) search_pattern = Setting('') auto_commit = Setting(True) genes_as_rows = Setting(False) mergeSpots = Setting(True) selected_gds = Setting(None) gdsSelectionStates = Setting({}) splitter_settings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01', )) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.gds_info: Optional[GDSInfo] = GDSInfo( ) # TODO: handle possible exceptions self.gds_data: Optional[Table] = None # Control area box = widgetBox(self.controlArea, 'Info', addSpace=True) self.infoBox = widgetLabel(box, 'Initializing\n\n') box = widgetBox(self.controlArea, 'Output', addSpace=True) radioButtonsInBox(box, self, 'genes_as_rows', ['Samples in rows', 'Genes in rows'], callback=self._run) separator(box) rubber(self.controlArea) auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) # Main Area # Filter widget self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self._apply_filter) self.mainArea.layout().addWidget(self.filter) splitter_vertical = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter_vertical) # set table view self.table_view = QTableView(splitter_vertical) self.table_view.setShowGrid(False) self.table_view.setSortingEnabled(True) self.table_view.sortByColumn(1, Qt.AscendingOrder) self.table_view.setAlternatingRowColors(True) self.table_view.verticalHeader().setVisible(False) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) self.table_view.viewport().setMouseTracking(True) self.table_view.setSizeAdjustPolicy( QAbstractScrollArea.AdjustToContents) self.table_model = GEODatasetsModel() self.table_model.initialize(self.gds_info) self.table_view.setModel(self.table_model) self.table_model.show_table() self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.resizeColumnsToContents() v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) # set item delegates self.table_view.setItemDelegateForColumn( self.table_model.pubmedid_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.gds_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.indicator_col, IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole), ) splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical) # Description Widget box = widgetBox(splitter_horizontal, 'Description') self.description_widget = widgetLabel(box, '') self.description_widget.setWordWrap(True) rubber(box) # Sample Annotations Widget box = widgetBox(splitter_horizontal, 'Sample Annotations') self.annotations_widget = QTreeWidget(box) self.annotations_widget.setHeaderLabels( ['Type (Sample annotations)', 'Sample count']) self.annotations_widget.setRootIsDecorated(True) box.layout().addWidget(self.annotations_widget) self._annotations_updating = False self.annotations_widget.itemChanged.connect( self.on_annotation_selection_changed) self.splitters = splitter_vertical, splitter_horizontal for sp, setting in zip(self.splitters, self.splitter_settings): sp.splitterMoved.connect(self._splitter_moved) sp.restoreState(setting) self.table_view.selectionModel().selectionChanged.connect( self.on_gds_selection_changed) self._apply_filter() self._run() def _splitter_moved(self, *args): self.splitter_settings = [ bytes(sp.saveState()) for sp in self.splitters ] def _set_description_widget(self): self.description_widget.setText( self.selected_gds.get('description', 'Description not available.')) def _set_annotations_widget(self, gds): self._annotations_updating = True self.annotations_widget.clear() annotations = defaultdict(set) subsets_count = {} for desc in gds['subsets']: annotations[desc['type']].add(desc['description']) subsets_count[desc['description']] = str(len(desc['sample_id'])) for _type, subsets in annotations.items(): key = (gds["name"], _type) parent = QTreeWidgetItem(self.annotations_widget, [_type]) parent.key = key for subset in subsets: key = (gds['name'], _type, subset) item = QTreeWidgetItem( parent, [subset, subsets_count.get(subset, '')]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotations_updating = False self.annotations_widget.expandAll() for i in range(self.annotations_widget.columnCount()): self.annotations_widget.resizeColumnToContents(i) def _set_selection(self): if self.selected_gds is not None: index = self.table_model.get_row_index( self.selected_gds.get('name')) if index is not None: self.table_view.selectionModel().blockSignals(True) self.table_view.selectRow(index) self._handle_selection_changed() self.table_view.selectionModel().blockSignals(False) def _handle_selection_changed(self): if self.table_model.table is not None: selection = self.table_view.selectionModel().selectedRows( self.table_model.gds_id_col) selected_gds_name = selection[0].data( ) if len(selection) > 0 else None if selected_gds_name: self.selected_gds = self.table_model.info.get( selected_gds_name) self._set_annotations_widget(self.selected_gds) self._set_description_widget() else: self.annotations_widget.clear() self.description_widget.clear() self.update_info() def _apply_filter(self): if self.table_model.table is not None: self.table_model.show_table( filter_pattern=str(self.search_pattern)) self._set_selection() self.update_info() def _run(self): if self.selected_gds is not None: self.gds_data = None self.start(run_download_task, self.selected_gds.get('name'), self.get_selected_samples(), self.genes_as_rows) def on_gds_selection_changed(self): self._handle_selection_changed() self._run() def on_annotation_selection_changed(self): if self._annotations_updating: return for i in range(self.annotations_widget.topLevelItemCount()): item = self.annotations_widget.topLevelItem(i) if 'key' in item.__dict__: self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) if 'key' in child.__dict__: self.gdsSelectionStates[child.key] = child.checkState(0) self._run() def update_info(self): all_gds = len(self.table_model.info) text = "{} datasets\n{} datasets cached\n".format( all_gds, len(local_files.listfiles())) filtered = self.table_view.model().rowCount() if all_gds != filtered: text += "{} after filtering".format(filtered) self.infoBox.setText(text) def get_selected_samples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. TODO: this could probably be simplified. """ def childiter(item): """ Iterate over the children of an QTreeWidgetItem instance. """ for i in range(item.childCount()): yield item.child(i) samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotations_widget.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) _samples = defaultdict(list) for sample, sample_type in samples: _samples[sample].append(sample_type) return _samples def commit(self): self.Outputs.gds_data.send(self.gds_data) def on_done(self, result: Result): assert isinstance(result.gds_dataset, Table) self.gds_data = result.gds_dataset self.commit() if self.gds_info: self.table_model.initialize(self.gds_info) self._apply_filter() def on_partial_result(self, result: Any) -> None: pass def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def send_report(self): self.report_items( "GEO Dataset", [ ("ID", self.selected_gds['name']), ("Title", self.selected_gds['title']), ("Organism", self.selected_gds['sample_organism']), ], ) self.report_items( "Data", [ ("Samples", self.selected_gds['sample_count']), ("Features", self.selected_gds['variables']), ("Genes", self.selected_gds['genes']), ], ) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.selected_gds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for _type in subsets: self.report_html += "<b>" + _type + ":</b></br>" for desc, count in subsets[_type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>"