class OWCreateInstance(OWWidget): name = "Create Instance" description = "Interactively create a data instance from sample dataset." icon = "icons/CreateInstance.svg" category = "Data" keywords = ["simulator"] priority = 4000 class Inputs: data = Input("Data", Table) reference = Input("Reference", Table) class Outputs: data = Output("Data", Table) class Information(OWWidget.Information): nans_removed = Msg("Variables with only missing values were " "removed from the list.") want_main_area = False ACTIONS = ["median", "mean", "random", "input"] HEADER = [["name", "Variable"], ["variable", "Value"]] Header = namedtuple("header", [tag for tag, _ in HEADER])(*range(len(HEADER))) values: Dict[str, Union[float, str]] = Setting({}, schema_only=True) append_to_data = Setting(True) auto_commit = Setting(True) def __init__(self): super().__init__() self.data: Optional[Table] = None self.reference: Optional[Table] = None self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed, placeholderText="Filter...") self.view = QTableView(sortingEnabled=True, contextMenuPolicy=Qt.CustomContextMenu, selectionMode=QTableView.NoSelection) self.view.customContextMenuRequested.connect(self.__menu_requested) self.view.setItemDelegateForColumn(self.Header.variable, VariableDelegate(self)) self.view.verticalHeader().hide() self.view.horizontalHeader().setStretchLastSection(True) self.view.horizontalHeader().setMaximumSectionSize(350) self.model = VariableItemModel(self) self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER]) self.model.dataChanged.connect(self.__table_data_changed) self.model.dataHasNanColumn.connect(self.Information.nans_removed) self.proxy_model = QSortFilterProxyModel() self.proxy_model.setFilterKeyColumn(-1) self.proxy_model.setFilterCaseSensitivity(False) self.proxy_model.setSourceModel(self.model) self.view.setModel(self.proxy_model) vbox = gui.vBox(self.controlArea, box=True) vbox.layout().addWidget(self.filter_edit) vbox.layout().addWidget(self.view) box = gui.hBox(vbox) gui.rubber(box) for name in self.ACTIONS: gui.button(box, self, name.capitalize(), lambda *args, fun=name: self._initialize_values(fun), autoDefault=False) gui.rubber(box) box = gui.auto_apply(self.controlArea, self, "auto_commit") box.button.setFixedWidth(180) box.layout().insertStretch(0) # pylint: disable=unnecessary-lambda append = gui.checkBox(None, self, "append_to_data", "Append this instance to input data", callback=lambda: self.commit()) box.layout().insertWidget(0, append) self._set_input_summary() self._set_output_summary() self.settingsAboutToBePacked.connect(self.pack_settings) def __filter_edit_changed(self): self.proxy_model.setFilterFixedString(self.filter_edit.text().strip()) def __table_data_changed(self): self.commit() def __menu_requested(self, point: QPoint): index = self.view.indexAt(point) model: QSortFilterProxyModel = index.model() source_index = model.mapToSource(index) menu = QMenu(self) for action in self._create_actions(source_index): menu.addAction(action) menu.popup(self.view.viewport().mapToGlobal(point)) def _create_actions(self, index: QModelIndex) -> List[QAction]: actions = [] for name in self.ACTIONS: action = QAction(name.capitalize(), self) action.triggered.connect( lambda *args, fun=name: self._initialize_values(fun, [index])) actions.append(action) return actions def _initialize_values(self, fun: str, indices: List[QModelIndex] = None): cont_fun = { "median": np.nanmedian, "mean": np.nanmean, "random": cont_random, "input": np.nanmean }.get(fun, NotImplemented) disc_fun = { "median": majority, "mean": majority, "random": disc_random, "input": majority }.get(fun, NotImplemented) if not self.data or fun == "input" and not self.reference: return self.model.dataChanged.disconnect(self.__table_data_changed) rows = range(self.proxy_model.rowCount()) if indices is None else \ [index.row() for index in indices] for row in rows: index = self.model.index(row, self.Header.variable) variable = self.model.data(index, VariableRole) if fun == "input": if variable not in self.reference.domain: continue values = self.reference.get_column_view(variable)[0] if variable.is_primitive(): values = values.astype(float) if all(np.isnan(values)): continue else: values = self.model.data(index, ValuesRole) if variable.is_continuous: value = cont_fun(values) value = round(value, variable.number_of_decimals) elif variable.is_discrete: value = disc_fun(values) elif variable.is_string: value = "" else: raise NotImplementedError self.model.setData(index, value, ValueRole) self.model.dataChanged.connect(self.__table_data_changed) self.commit() @Inputs.data def set_data(self, data: Table): self.data = data self._set_input_summary() self._set_model_data() self.unconditional_commit() def _set_model_data(self): self.Information.nans_removed.clear() self.model.removeRows(0, self.model.rowCount()) if not self.data: return self.model.set_data(self.data, self.values) self.values = {} self.view.horizontalHeader().setStretchLastSection(False) self.view.resizeColumnsToContents() self.view.resizeRowsToContents() self.view.horizontalHeader().setStretchLastSection(True) @Inputs.reference def set_reference(self, data: Table): self.reference = data self._set_input_summary() def _set_input_summary(self): n_data = len(self.data) if self.data else 0 n_refs = len(self.reference) if self.reference else 0 summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.reference: summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_refs)}" data_list = [("Data", self.data), ("Reference", self.reference)] details = format_multiple_summaries(data_list) kwargs = {"format": Qt.RichText} self.info.set_input_summary(summary, details, **kwargs) def _set_output_summary(self, data: Optional[Table] = None): if data: summary, details = len(data), format_summary_details(data) else: summary, details = self.info.NoOutput, "" self.info.set_output_summary(summary, details) def commit(self): output_data = None if self.data: output_data = self._create_data_from_values() if self.append_to_data: output_data = self._append_to_data(output_data) self._set_output_summary(output_data) self.Outputs.data.send(output_data) def _create_data_from_values(self) -> Table: data = Table.from_domain(self.data.domain, 1) data.name = "created" data.X[:] = np.nan data.Y[:] = np.nan for i, m in enumerate(self.data.domain.metas): data.metas[:, i] = "" if m.is_string else np.nan values = self._get_values() for var_name, value in values.items(): data[:, var_name] = value return data def _append_to_data(self, data: Table) -> Table: assert self.data assert len(data) == 1 var = DiscreteVariable("Source ID", values=(self.data.name, data.name)) data = Table.concatenate([self.data, data], axis=0) domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (var, )) data = data.transform(domain) data.metas[:len(self.data), -1] = 0 data.metas[len(self.data):, -1] = 1 return data def _get_values(self) -> Dict[str, Union[str, float]]: values = {} for row in range(self.model.rowCount()): index = self.model.index(row, self.Header.variable) values[self.model.data(index, VariableRole).name] = \ self.model.data(index, ValueRole) return values def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_domain("Output", self.data.domain) items = [] values: Dict = self._get_values() for var in self.data.domain.variables + self.data.domain.metas: val = values.get(var.name, np.nan) if var.is_primitive(): val = var.repr_val(val) items.append([f"{var.name}:", val]) self.report_table("Values", items) @staticmethod def sizeHint(): return QSize(600, 500) def pack_settings(self): self.values: Dict[str, Union[str, float]] = self._get_values()
class OWFeatureStatistics(widget.OWWidget): HISTOGRAM_ASPECT_RATIO = (7, 3) MINIMUM_HISTOGRAM_HEIGHT = 50 MAXIMUM_HISTOGRAM_HEIGHT = 80 name = 'Feature Statistics' description = 'Show basic statistics for data features.' icon = 'icons/FeatureStatistics.svg' class Inputs: data = Input('Data', Table, default=True) want_main_area = True buttons_area_orientation = Qt.Vertical settingsHandler = DomainContextHandler() auto_commit = Setting(True) color_var = ContextSetting(None) # type: Optional[Variable] filter_string = ContextSetting('') def __init__(self): super().__init__() self.data = None # type: Optional[Table] self.model = None # type: Optional[FeatureStatisticsTableModel] # Information panel info_box = gui.vBox(self.controlArea, 'Info') info_box.setMinimumWidth(200) self.info_summary = gui.widgetLabel(info_box, wordWrap=True) self.info_attr = gui.widgetLabel(info_box, wordWrap=True) self.info_class = gui.widgetLabel(info_box, wordWrap=True) self.info_meta = gui.widgetLabel(info_box, wordWrap=True) self.set_info() # TODO: Implement filtering on the model # filter_box = gui.vBox(self.controlArea, 'Filter') # self.filter_text = gui.lineEdit( # filter_box, self, value='filter_string', # placeholderText='Filter variables by name', # callback=self._filter_table_variables, callbackOnType=True, # ) # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus) # shortcut.setWhatsThis('Filter variables by name') self.color_var_model = DomainModel( valid_types=(ContinuousVariable, DiscreteVariable), placeholder='None', ) box = gui.vBox(self.controlArea, 'Histogram') self.cb_color_var = gui.comboBox( box, master=self, value='color_var', model=self.color_var_model, label='Color:', orientation=Qt.Horizontal, ) self.cb_color_var.currentIndexChanged.connect(self.__color_var_changed) gui.rubber(self.controlArea) gui.auto_commit( self.buttonsArea, self, 'auto_commit', 'Send Selected Rows', 'Send Automatically' ) # Main area self.view = QTableView( showGrid=False, cornerButtonEnabled=False, sortingEnabled=True, selectionBehavior=QTableView.SelectRows, selectionMode=QTableView.MultiSelection, horizontalScrollMode=QTableView.ScrollPerPixel, verticalScrollMode=QTableView.ScrollPerPixel, ) hheader = self.view.horizontalHeader() hheader.setStretchLastSection(False) # Contents precision specifies how many rows should be taken into # account when computing the sizes, 0 being the visible rows. This is # crucial, since otherwise the `ResizeToContents` section resize mode # would call `sizeHint` on every single row in the data before first # render. However this, this cannot be used here, since this only # appears to work properly when the widget is actually shown. When the # widget is not shown, size `sizeHint` is called on every row. hheader.setResizeContentsPrecision(5) # Set a nice default size so that headers have some space around titles hheader.setDefaultSectionSize(120) # Set individual column behaviour in `set_data` since the logical # indices must be valid in the model, which requires data. hheader.setSectionResizeMode(QHeaderView.Interactive) vheader = self.view.verticalHeader() vheader.setVisible(False) vheader.setSectionResizeMode(QHeaderView.Fixed) def bind_histogram_aspect_ratio(logical_index, _, new_size): """Force the horizontal and vertical header to maintain the defined aspect ratio specified for the histogram.""" # Prevent function being exectued more than once per resize if logical_index is not self.model.Columns.DISTRIBUTION.index: return ratio_width, ratio_height = self.HISTOGRAM_ASPECT_RATIO unit_width = new_size / ratio_width new_height = unit_width * ratio_height effective_height = max(new_height, self.MINIMUM_HISTOGRAM_HEIGHT) effective_height = min(effective_height, self.MAXIMUM_HISTOGRAM_HEIGHT) vheader.setDefaultSectionSize(effective_height) def keep_row_centered(logical_index, old_size, new_size): """When resizing the widget when scrolled further down, the positions of rows changes. Obviously, the user resized in order to better see the row of interest. This keeps that row centered.""" # TODO: This does not work properly # Prevent function being exectued more than once per resize if logical_index is not self.model.Columns.DISTRIBUTION.index: return top_row = self.view.indexAt(self.view.rect().topLeft()).row() bottom_row = self.view.indexAt(self.view.rect().bottomLeft()).row() middle_row = top_row + (bottom_row - top_row) // 2 self.view.scrollTo(self.model.index(middle_row, 0), QTableView.PositionAtCenter) hheader.sectionResized.connect(bind_histogram_aspect_ratio) hheader.sectionResized.connect(keep_row_centered) self.distribution_delegate = DistributionDelegate() self.view.setItemDelegate(self.distribution_delegate) self.mainArea.layout().addWidget(self.view) def sizeHint(self): return QSize(900, 500) def _filter_table_variables(self): regex = QRegExp(self.filter_string) # If the user explicitly types different cases, we assume they know # what they are searching for and account for letter case in filter different_case = ( any(c.islower() for c in self.filter_string) and any(c.isupper() for c in self.filter_string) ) if not different_case: regex.setCaseSensitivity(Qt.CaseInsensitive) @Inputs.data def set_data(self, data): self.closeContext() self.data = data if data is not None: self.model = FeatureStatisticsTableModel(data, parent=self) self.color_var_model.set_domain(data.domain) # Set the selected index to 1 if any target classes, otherwise 0 if data.domain.class_vars: self.color_var = data.domain.class_vars[0] self.openContext(self.data) else: self.model = None self.color_var_model.set_domain(None) self.view.setModel(self.model) self._filter_table_variables() self.distribution_delegate.clear() self.set_info() # The resize modes for individual columns must be set here, because # the logical index must be valid in `setSectionResizeMode`. It is not # valid when there is no data in the model. if self.model: columns, hheader = self.model.Columns, self.view.horizontalHeader() hheader.setSectionResizeMode(columns.ICON.index, QHeaderView.ResizeToContents) hheader.setSectionResizeMode(columns.DISTRIBUTION.index, QHeaderView.Stretch) @pyqtSlot(int) def __color_var_changed(self, new_index): attribute = None if new_index < 1 else self.cb_color_var.model()[new_index] self.distribution_delegate.set_color_attribute(attribute) if self.model: for row_idx in range(self.model.rowCount()): index = self.model.index( row_idx, self.model.Columns.DISTRIBUTION.index) self.view.update(index) @staticmethod def _format_variables_string(variables): agg = [] for var_type_name, var_type in [ ('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable) ]: var_type_list = [v for v in variables if isinstance(v, var_type)] if var_type_list: agg.append(( '%d %s' % (len(var_type_list), var_type_name), len(var_type_list) )) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts)) def set_info(self): if self.data is not None: self.info_summary.setText('<b>%s</b> contains %s with %s' % ( self.data.name, plural('{number} instance{s}', self.model.n_instances), plural('{number} feature{s}', self.model.n_attributes) )) self.info_attr.setText( '<b>Attributes:</b><br>%s' % self._format_variables_string(self.data.domain.attributes) ) self.info_class.setText( '<b>Class variables:</b><br>%s' % self._format_variables_string(self.data.domain.class_vars) ) self.info_meta.setText( '<b>Metas:</b><br>%s' % self._format_variables_string(self.data.domain.metas) ) else: self.info_summary.setText('No data on input.') self.info_attr.setText('') self.info_class.setText('') self.info_meta.setText('') def commit(self): pass def send_report(self): pass