def test_deprecated_str_as_var(self): if LooseVersion(Orange.__version__) >= LooseVersion("3.26"): # pragma: no cover self.fail("Remove support for variables stored as string settings " "and this test.") context = Mock() context.attributes = {"foo": 2} context.metas = {} setting = ContextSetting("") setting.name = "setting_name" with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") DomainContextHandler.encode_setting(context, setting, "foo") self.assertIn("setting_name", w[0].message.args[0])
def test_filter_value(self): setting = ContextSetting([]) setting.name = "value" def test_filter(before_value, after_value): data = dict(value=before_value) self.handler.filter_value(setting, data, *self.args) self.assertEqual(data.get("value", None), after_value) # filter list values test_filter([], []) # When list contains attributes asa tuple of (name, type), # Attributes not present in domain should be filtered out test_filter([("d1", Discrete), ("d1", Continuous), ("c1", Continuous), ("c1", Discrete)], [("d1", Discrete), ("c1", Continuous)]) # All other values in list should remain test_filter([0, [1, 2, 3], "abcd", 5.4], [0, [1, 2, 3], "abcd", 5.4])
class OWLinearProjection(OWAnchorProjectionWidget): name = "Linear Projection" description = "A multi-axis projection of data onto " \ "a two-dimensional plane." icon = "icons/LinearProjection.svg" priority = 240 keywords = [] Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2), type=int, qualname="OWLinearProjection.Placement") Projection_name = {Placement.Circular: "Circular Placement", Placement.LDA: "Linear Discriminant Analysis", Placement.PCA: "Principal Component Analysis"} settings_version = 5 placement = Setting(Placement.Circular) selected_vars = ContextSetting([]) vizrank = SettingProvider(LinearProjectionVizRank) GRAPH_CLASS = OWLinProjGraph graph = SettingProvider(OWLinProjGraph) class Error(OWAnchorProjectionWidget.Error): no_cont_features = Msg("Plotting requires numeric features") def __init__(self): self.model_selected = VariableListModel(enable_dnd=True) self.model_selected.removed.connect(self.__model_selected_changed) self.model_other = VariableListModel(enable_dnd=True) self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank( None, self, "Suggest Features", self.__vizrank_set_attrs) super().__init__() def _add_controls(self): self._add_controls_variables() self._add_controls_placement() super()._add_controls() self.gui.add_control( self._effects_box, gui.hSlider, "Hide radius:", master=self.graph, value="hide_radius", minValue=0, maxValue=100, step=10, createLabel=False, callback=self.__radius_slider_changed ) self.controlArea.layout().removeWidget(self.control_area_stretch) self.control_area_stretch.setParent(None) def _add_controls_variables(self): self.variables_selection = VariablesSelection( self, self.model_selected, self.model_other, self.controlArea ) self.variables_selection.added.connect(self.__model_selected_changed) self.variables_selection.removed.connect(self.__model_selected_changed) self.variables_selection.add_remove.layout().addWidget( self.btn_vizrank ) def _add_controls_placement(self): box = gui.widgetBox( self.controlArea, True, sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum) ) self.radio_placement = gui.radioButtonsInBox( box, self, "placement", btnLabels=[self.Projection_name[x] for x in self.Placement], callback=self.__placement_radio_changed ) @property def continuous_variables(self): if self.data is None or self.data.domain is None: return [] dom = self.data.domain return [v for v in chain(dom.variables, dom.metas) if v.is_continuous] @property def effective_variables(self): return self.model_selected[:] def __vizrank_set_attrs(self, attrs): if not attrs: return self.model_selected[:] = attrs[:] self.model_other[:] = [var for var in self.continuous_variables if var not in attrs] self.__model_selected_changed() def __model_selected_changed(self): self.selected_vars = [(var.name, vartype(var)) for var in self.model_selected] self.projection = None self._check_options() self.init_projection() self.setup_plot() self.commit() def __placement_radio_changed(self): self.controls.graph.hide_radius.setEnabled( self.placement != self.Placement.Circular) self.projection = self.projector = None self._init_vizrank() self.init_projection() self.setup_plot() self.commit() def __radius_slider_changed(self): self.graph.update_radius() def colors_changed(self): super().colors_changed() self._init_vizrank() def set_data(self, data): super().set_data(data) self._check_options() self._init_vizrank() self.init_projection() def use_context(self): self.model_selected.clear() self.model_other.clear() if self.data is not None and len(self.selected_vars): d, selected = self.data.domain, [v[0] for v in self.selected_vars] self.model_selected[:] = [d[attr] for attr in selected] self.model_other[:] = [d[attr.name] for attr in self.continuous_variables if attr.name not in selected] elif self.data is not None: self.model_selected[:] = self.continuous_variables[:3] self.model_other[:] = self.continuous_variables[3:] def _check_options(self): buttons = self.radio_placement.buttons for btn in buttons: btn.setEnabled(True) if self.data is not None: has_discrete_class = self.data.domain.has_discrete_class if not has_discrete_class or len(np.unique(self.data.Y)) < 2: buttons[self.Placement.LDA].setEnabled(False) if self.placement == self.Placement.LDA: self.placement = self.Placement.Circular self.controls.graph.hide_radius.setEnabled( self.placement != self.Placement.Circular) def _init_vizrank(self): is_enabled, msg = False, "" if self.data is None: msg = "There is no data." elif self.attr_color is None: msg = "Color variable has to be selected" elif self.attr_color.is_continuous and \ self.placement == self.Placement.LDA: msg = "Suggest Features does not work for Linear " \ "Discriminant Analysis Projection when " \ "continuous color variable is selected." elif len([v for v in self.continuous_variables if v is not self.attr_color]) < 3: msg = "Not enough available continuous variables" elif len(self.data[self.valid_data]) < 2: msg = "Not enough valid data instances" else: is_enabled = not np.isnan(self.data.get_column_view( self.attr_color)[0].astype(float)).all() self.btn_vizrank.setToolTip(msg) self.btn_vizrank.setEnabled(is_enabled) if is_enabled: self.vizrank.initialize() def check_data(self): def error(err): err() self.data = None super().check_data() if self.data is not None: if not len(self.continuous_variables): error(self.Error.no_cont_features) def init_attr_values(self): super().init_attr_values() self.selected_vars = [] def init_projection(self): if self.placement == self.Placement.Circular: self.projector = CircularPlacement() elif self.placement == self.Placement.LDA: self.projector = LDA(solver="eigen", n_components=2) elif self.placement == self.Placement.PCA: self.projector = PCA(n_components=2) self.projector.component = 2 self.projector.preprocessors = PCA.preprocessors + [Normalize()] super().init_projection() def get_coordinates_data(self): def normalized(a): span = np.max(a, axis=0) - np.min(a, axis=0) span[span == 0] = 1 return (a - np.mean(a, axis=0)) / span embedding = self.get_embedding() if embedding is None: return None, None norm_emb = normalized(embedding[self.valid_data]) return (norm_emb.ravel(), np.zeros(len(norm_emb), dtype=float)) \ if embedding.shape[1] == 1 else norm_emb.T def _get_send_report_caption(self): def projection_name(): return self.Projection_name[self.placement] return report.render_items_vert(( ("Projection", projection_name()), ("Color", self._get_caption_var_name(self.attr_color)), ("Label", self._get_caption_var_name(self.attr_label)), ("Shape", self._get_caption_var_name(self.attr_shape)), ("Size", self._get_caption_var_name(self.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) @classmethod def migrate_settings(cls, settings_, version): if version < 2: settings_["point_width"] = settings_["point_size"] if version < 3: settings_graph = {} settings_graph["jitter_size"] = settings_["jitter_value"] settings_graph["point_width"] = settings_["point_width"] settings_graph["alpha_value"] = settings_["alpha_value"] settings_graph["class_density"] = settings_["class_density"] settings_["graph"] = settings_graph if version < 4: if "radius" in settings_: settings_["graph"]["hide_radius"] = settings_["radius"] if "selection_indices" in settings_ and \ settings_["selection_indices"] is not None: selection = settings_["selection_indices"] settings_["selection"] = [(i, 1) for i, selected in enumerate(selection) if selected] if version < 5: if "placement" in settings_ and \ settings_["placement"] not in cls.Placement: settings_["placement"] = cls.Placement.Circular @classmethod def migrate_context(cls, context, version): if version < 2: domain = context.ordered_domain c_domain = [t for t in context.ordered_domain if t[1] == 2] d_domain = [t for t in context.ordered_domain if t[1] == 1] for d, old_val, new_val in ((domain, "color_index", "attr_color"), (d_domain, "shape_index", "attr_shape"), (c_domain, "size_index", "attr_size")): index = context.values[old_val][0] - 1 context.values[new_val] = (d[index][0], d[index][1] + 100) \ if 0 <= index < len(d) else None if version < 3: context.values["graph"] = { "attr_color": context.values["attr_color"], "attr_shape": context.values["attr_shape"], "attr_size": context.values["attr_size"] } if version == 3: values = context.values values["attr_color"] = values["graph"]["attr_color"] values["attr_size"] = values["graph"]["attr_size"] values["attr_shape"] = values["graph"]["attr_shape"] values["attr_label"] = values["graph"]["attr_label"]
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 keywords = [] class Inputs: evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) quantities = [ "Number of instances", "Proportion of predicted", "Proportion of actual" ] settings_version = 1 settingsHandler = ClassValuesContextHandler() selected_learner = Setting([0], schema_only=True) selection = ContextSetting(set()) selected_quantity = Setting(0) append_predictions = Setting(True) append_probabilities = Setting(False) autocommit = Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell") ] class Error(widget.OWWidget.Error): no_regression = Msg("Confusion Matrix cannot show regression results.") invalid_values = Msg( "Evaluation Results input contains invalid values") def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.headers = [] self.learners_box = gui.listBox(self.controlArea, self, "selected_learner", "learners", box=True, callback=self._learner_changed) self.outputbox = gui.vBox(self.controlArea, "Output") box = gui.hBox(self.outputbox) gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_apply(self.outputbox, self, "autocommit", box=False) self.mainArea.layout().setContentsMargins(0, 0, 0, 0) box = gui.vBox(self.mainArea, box=True) sbox = gui.hBox(box) gui.rubber(sbox) gui.comboBox(sbox, self, "selected_quantity", items=self.quantities, label="Show: ", orientation=Qt.Horizontal, callback=self._update) self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) view.clicked.connect(self.cell_clicked) box.layout().addWidget(view) selbox = gui.hBox(box) gui.button(selbox, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(selbox, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(selbox, self, "Clear Selection", callback=self.select_none, autoDefault=False) @staticmethod def sizeHint(): """Initial size""" return QSize(750, 340) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def _init_table(self, nclasses): item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, nclasses) self.tableview.setSpan(2, 0, nclasses, 1) font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(self.headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setSectionResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(nclasses + 3) self.tablemodel.setColumnCount(nclasses + 3) @Inputs.evaluation_results def set_results(self, results): """Set the input results.""" # false positive, pylint: disable=no-member prev_sel_learner = self.selected_learner.copy() self.clear() self.warning() self.closeContext() data = None if results is not None and results.data is not None: data = results.data[results.row_indices] if data is not None and not data.domain.has_discrete_class: self.Error.no_regression() data = results = None else: self.Error.no_regression.clear() nan_values = False if results is not None: assert isinstance(results, Orange.evaluation.Results) if np.any(np.isnan(results.actual)) or \ np.any(np.isnan(results.predicted)): # Error out here (could filter them out with a warning # instead). nan_values = True results = data = None self.Error.invalid_values(shown=nan_values) self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) return self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] self.headers = class_values + \ [unicodedata.lookup("N-ARY SUMMATION")] # NOTE: The 'learner_names' is set in 'Test Learners' widget. self.learners = getattr(results, "learner_names", [f"Learner #{i + 1}" for i in range(nmodels)]) self._init_table(len(class_values)) self.openContext(data.domain.class_var) if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners): if self.learners: self.selected_learner[:] = [0] else: self.selected_learner[:] = prev_sel_learner self._update() self._set_selection() self.unconditional_commit() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_wrong(self): """Select the off-diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _prepare_data(self): indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner[0]] predicted = self.results.predicted[self.selected_learner[0]] selected = [ i for i, t in enumerate(zip(actual, predicted)) if t in indices ] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas if self.append_predictions: extra.append(predicted.reshape(-1, 1)) var = Orange.data.DiscreteVariable( "{}({})".format(class_var.name, learner_name), class_var.values) metas = metas + (var, ) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner[0]] extra.append(np.array(probs, dtype=object)) pvars = [ Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values ] metas = metas + tuple(pvars) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = self.data.transform(domain) if extra: data.metas[:, len(self.data.domain.metas):] = \ np.hstack(tuple(extra)) data.name = learner_name if selected: annotated_data = create_annotated_table(data, selected) data = data[selected] else: annotated_data = create_annotated_table(data, []) data = None return data, annotated_data def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner: data, annotated_data = self._prepare_data() else: data = None annotated_data = None self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send(annotated_data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit() def _set_selection(self): selection = QItemSelection() index = self.tableview.model().index for row, col in self.selection: sel = index(row + 2, col + 2) selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _learner_changed(self): self._update() self._set_selection() self.commit() def _update(self): def _isinvalid(x): return isnan(x) or isinf(x) # Update the displayed confusion matrix if self.results is not None and self.selected_learner: cmatrix = confusion_matrix(self.results, self.selected_learner[0]) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = np.diag_indices(n) colors = cmatrix.astype(np.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(np.int) formatstr = "{}" div = np.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, np.newaxis] div = colors.max(axis=1)[:, np.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div maxval = normalized[diag].max() if maxval > 0: colors[diag] = normalized[diag] / maxval for i in range(n): for j in range(n): val = normalized[i, j] col_val = colors[i, j] item = self._item(i + 2, j + 2) item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) item.setData("trbl", BorderRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(n): self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t")) self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l")) self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum()))) def send_report(self): """Send report""" if self.results is not None and self.selected_learner: self.report_table( "Confusion matrix for {} (showing {})".format( self.learners[self.selected_learner[0]], self.quantities[self.selected_quantity].lower()), self.tableview) @classmethod def migrate_settings(cls, settings, version): if not version: # For some period of time the 'selected_learner' property was # changed from List[int] -> int # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0) if "selected_learner" in settings and \ isinstance(settings["selected_learner"], int): settings["selected_learner"] = [settings["selected_learner"]]
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["data", "file", "load", "read"] outputs = [ widget.OutputSignal( "Data", Table, doc="Attribute-valued data set read from the input file.") ] want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler() # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) dlg_formats = ("All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join( "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox(None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) box = gui.vBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") domain_editor = DomainEditor(self.variables) self.editor_model = domain_editor.model() box.layout().addWidget(domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation data sets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) box.layout().addWidget(self.report_button) self.report_button.setFixedWidth(170) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE and \ os.path.getsize(self.last_path()) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) def sizeHint(self): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation data sets") return else: start_file = self.last_path() or os.path.expanduser("~/") filename, _ = QFileDialog.getOpenFileName(self, 'Open Orange Data File', start_file, self.dlg_formats) if not filename: return self.loaded_file = filename self.add_path(filename) self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers # pylint: disable=broad-except self.editor_model.set_domain(None) self.apply_button.setEnabled(False) self.Warning.file_too_big.clear() error = None try: self.reader = self._get_reader() if self.reader is None: self.data = None self.send("Data", None) self.info.setText("No data.") self.sheet_box.hide() return except Exception as ex: error = ex if not error: self._update_sheet_combo() with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: error = ex self.warning(warnings[-1].message.args[0] if warnings else '') if error: self.data = None self.send("Data", None) self.info.setText("An error occurred:\n{}".format(error)) self.editor_model.reset() self.sheet_box.hide() return self.info.setText(self._describe(data)) add_origin(data, self.loaded_file or self.last_path()) self.send("Data", data) self.editor_model.set_domain(data.domain) self.data = data def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: reader = FileFormat.get_reader(self.last_path()) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url) def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) def _describe(self, table): domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = "<b>{}</b>".format(descs[0]) if descs: text += "<p>{}</p>".format("<br/>".join(descs)) text += "<p>{} instance(s), {} feature(s), {} meta attribute(s)".\ format(len(table), len(domain.attributes), len(domain.metas)) if domain.has_continuous_class: text += "<br/>Regression; numerical class." elif domain.has_discrete_class: text += "<br/>Classification; discrete class with {} values.".\ format(len(domain.class_var.values)) elif table.domain.class_vars: text += "<br/>Multi-target; {} target variables.".format( len(table.domain.class_vars)) else: text += "<br/>Data has no target variable." text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += '<p>First entry: {}<br/>Last entry: {}</p>'.format( table[0, 'Timestamp'], table[-1, 'Timestamp']) return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def apply_domain_edit(self): attributes = [] class_vars = [] metas = [] places = [attributes, class_vars, metas] X, y, m = [], [], [] cols = [X, y, m] # Xcols, Ycols, Mcols def is_missing(x): return str(x) in ("nan", "") for column, (name, tpe, place, vals, is_con), (orig_var, orig_plc) in \ zip(count(), self.editor_model.variables, chain([(at, 0) for at in self.data.domain.attributes], [(cl, 1) for cl in self.data.domain.class_vars], [(mt, 2) for mt in self.data.domain.metas])): if place == 3: continue if orig_plc == 2: col_data = list(chain(*self.data[:, orig_var].metas)) else: col_data = list(chain(*self.data[:, orig_var])) if name == orig_var.name and tpe == type(orig_var): var = orig_var elif tpe == DiscreteVariable: values = list( str(i) for i in set(col_data) if not is_missing(i)) var = tpe(name, values) col_data = [ np.nan if is_missing(x) else values.index(str(x)) for x in col_data ] elif tpe == StringVariable and type(orig_var) == DiscreteVariable: var = tpe(name) col_data = [ orig_var.repr_val(x) if not np.isnan(x) else "" for x in col_data ] else: var = tpe(name) places[place].append(var) cols[place].append(col_data) domain = Domain(attributes, class_vars, metas) X = np.array(X).T if len(X) else np.empty((len(self.data), 0)) y = np.array(y).T if len(y) else None dtpe = object if any( isinstance(m, StringVariable) for m in domain.metas) else float m = np.array(m, dtype=dtpe).T if len(m) else None table = Table.from_numpy(domain, X, y, m, self.data.W) self.send("Data", table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~/" + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += " ({})".format(self.sheet_combo.currentText()) self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) def dragEnterEvent(self, event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data()
class OWFeatureStatistics(widget.OWWidget): name = 'Feature Statistics' description = 'Show basic statistics for data features.' icon = 'icons/FeatureStatistics.svg' class Inputs: data = Input('Data', Table, default=True) class Outputs: reduced_data = Output('Reduced Data', Table, default=True) statistics = Output('Statistics', Table) want_main_area = False settingsHandler = DomainContextHandler() settings_version = 2 auto_commit = Setting(True) color_var = ContextSetting(None) # type: Optional[Variable] # filter_string = ContextSetting('') sorting = Setting((0, Qt.AscendingOrder)) selected_vars = ContextSetting([], schema_only=True) def __init__(self): super().__init__() self.data = None # type: Optional[Table] # TODO: Implement filtering on the model # filter_box = gui.vBox(self.controlArea, 'Filter') # self.filter_text = gui.lineEdit( # filter_box, self, value='filter_string', # placeholderText='Filter variables by name', # callback=self._filter_table_variables, callbackOnType=True, # ) # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus) # shortcut.setWhatsThis('Filter variables by name') # Main area self.model = FeatureStatisticsTableModel(parent=self) self.table_view = FeatureStatisticsTableView(self.model, parent=self) self.table_view.selectionModel().selectionChanged.connect(self.on_select) self.table_view.horizontalHeader().sectionClicked.connect(self.on_header_click) self.controlArea.layout().addWidget(self.table_view) self.color_var_model = DomainModel( valid_types=(ContinuousVariable, DiscreteVariable), placeholder='None', ) self.cb_color_var = gui.comboBox( self.buttonsArea, master=self, value='color_var', model=self.color_var_model, label='Color:', orientation=Qt.Horizontal, contentsLength=13, searchable=True ) self.cb_color_var.activated.connect(self.__color_var_changed) gui.rubber(self.buttonsArea) gui.auto_send(self.buttonsArea, self, "auto_commit") self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) @staticmethod def sizeHint(): return QSize(1050, 500) def _filter_table_variables(self): regex = QRegExp(self.filter_string) # If the user explicitly types different cases, we assume they know # what they are searching for and account for letter case in filter different_case = ( any(c.islower() for c in self.filter_string) and any(c.isupper() for c in self.filter_string) ) if not different_case: regex.setCaseSensitivity(Qt.CaseInsensitive) @Inputs.data def set_data(self, data): # Clear outputs and reset widget state self.closeContext() self.selected_vars = [] self.model.resetSorting() self.Outputs.reduced_data.send(None) self.Outputs.statistics.send(None) # Setup widget state for new data and restore settings self.data = data if data is not None: self.info.set_input_summary(len(data), format_summary_details(data)) self.color_var_model.set_domain(data.domain) self.color_var = None if self.data.domain.class_vars: self.color_var = self.data.domain.class_vars[0] else: self.info.set_input_summary(self.info.NoInput) self.color_var_model.set_domain(None) self.color_var = None self.model.set_data(data) self.openContext(self.data) self.__restore_selection() self.__restore_sorting() # self._filter_table_variables() self.__color_var_changed() self.commit() def __restore_selection(self): """Restore the selection on the table view from saved settings.""" selection_model = self.table_view.selectionModel() selection = QItemSelection() if self.selected_vars: var_indices = {var: i for i, var in enumerate(self.model.variables)} selected_indices = [var_indices[var] for var in self.selected_vars] for row in self.model.mapFromSourceRows(selected_indices): selection.append(QItemSelectionRange( self.model.index(row, 0), self.model.index(row, self.model.columnCount() - 1) )) selection_model.select(selection, QItemSelectionModel.ClearAndSelect) def __restore_sorting(self): """Restore the sort column and order from saved settings.""" sort_column, sort_order = self.sorting if self.model.n_attributes and sort_column < self.model.columnCount(): self.model.sort(sort_column, sort_order) self.table_view.horizontalHeader().setSortIndicator(sort_column, sort_order) @pyqtSlot(int) def on_header_click(self, *_): # Store the header states sort_order = self.model.sortOrder() sort_column = self.model.sortColumn() self.sorting = sort_column, sort_order @pyqtSlot(int) def __color_var_changed(self, *_): if self.model is not None: self.model.set_target_var(self.color_var) def on_select(self): selection_indices = list(self.model.mapToSourceRows([ i.row() for i in self.table_view.selectionModel().selectedRows() ])) self.selected_vars = list(self.model.variables[selection_indices]) self.commit() def commit(self): if not self.selected_vars: self.info.set_output_summary(self.info.NoOutput) self.Outputs.reduced_data.send(None) self.Outputs.statistics.send(None) return # Send a table with only selected columns to output variables = self.selected_vars self.info.set_output_summary(len(self.data[:, variables]), format_summary_details(self.data[:, variables])) self.Outputs.reduced_data.send(self.data[:, variables]) # Send the statistics of the selected variables to ouput labels, data = self.model.get_statistics_matrix(variables, return_labels=True) var_names = np.atleast_2d([var.name for var in variables]).T domain = Domain( attributes=[ContinuousVariable(name) for name in labels], metas=[StringVariable('Feature')] ) statistics = Table(domain, data, metas=var_names) statistics.name = '%s (Feature Statistics)' % self.data.name self.Outputs.statistics.send(statistics) def send_report(self): view = self.table_view self.report_table(view) @classmethod def migrate_context(cls, context, version): if not version or version < 2: selected_rows = context.values.pop("selected_rows", None) if not selected_rows: selected_vars = [] else: # This assumes that dict was saved by Python >= 3.6 so dict is # ordered; if not, context hasn't had worked anyway. all_vars = [ (var, tpe) for (var, tpe) in chain(context.attributes.items(), context.metas.items()) # it would be nicer to use cls.HIDDEN_VAR_TYPES, but there # is no suitable conversion function, and StringVariable (3) # was the only hidden var when settings_version < 2, so: if tpe != 3] selected_vars = [all_vars[i] for i in selected_rows] context.values["selected_vars"] = selected_vars, -3
class LineScanPlot(QWidget, OWComponent, SelectionGroupMixin, ImageColorSettingMixin, ImageZoomMixin): attr_x = ContextSetting(None) gamma = Setting(0) selection_changed = Signal() def __init__(self, parent): QWidget.__init__(self) OWComponent.__init__(self, parent) SelectionGroupMixin.__init__(self) ImageColorSettingMixin.__init__(self) self.parent = parent self.selection_type = SELECTMANY self.saving_enabled = True self.selection_enabled = True self.viewtype = INDIVIDUAL # required bt InteractiveViewBox self.highlighted = None self.data_points = None self.data_imagepixels = None self.plotview = pg.GraphicsLayoutWidget() self.plotview.show() self.plot = pg.PlotItem(background="w", viewBox=InteractiveViewBox(self)) self.plotview.addItem(self.plot) self.legend = ImageColorLegend() self.plotview.addItem(self.legend) self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) layout = QVBoxLayout() self.setLayout(layout) self.layout().setContentsMargins(0, 0, 0, 0) self.layout().addWidget(self.plotview) self.img = ImageItemNan() self.img.setOpts(axisOrder='row-major') self.plot.addItem(self.img) self.plot.scene().sigMouseMoved.connect(self.plot.vb.mouseMovedEvent) layout = QGridLayout() self.plotview.setLayout(layout) self.button = QPushButton("Menu", self.plotview) self.button.setAutoDefault(False) layout.setRowStretch(1, 1) layout.setColumnStretch(1, 1) layout.addWidget(self.button, 0, 0) view_menu = MenuFocus(self) self.button.setMenu(view_menu) # prepare interface according to the new context self.parent.contextAboutToBeOpened.connect( lambda x: self.init_interface_data(x[0])) self.add_zoom_actions(view_menu) common_options = dict(labelWidth=50, orientation=Qt.Horizontal, sendSelectedValue=True, valueType=str) choose_xy = QWidgetAction(self) box = gui.vBox(self) box.setFocusPolicy(Qt.TabFocus) self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES, valid_types=DomainModel.PRIMITIVE, placeholder="Position (index)") self.cb_attr_x = gui.comboBox(box, self, "attr_x", label="Axis x:", callback=self.update_attr, model=self.xy_model, **common_options) box.setFocusProxy(self.cb_attr_x) box.layout().addWidget(self.color_settings_box()) choose_xy.setDefaultWidget(box) view_menu.addAction(choose_xy) self.lsx = None # info about the X axis self.lsy = None # info about the Y axis self.data = None self.data_ids = {} def init_interface_data(self, data): same_domain = (self.data and data and data.domain == self.data.domain) if not same_domain: self.init_attr_values(data) def help_event(self, ev): pos = self.plot.vb.mapSceneToView(ev.scenePos()) sel, wavenumber_ind = self._points_at_pos(pos) prepared = [] if sel is not None: prepared.append(str(self.wavenumbers[wavenumber_ind])) for d in self.data[sel]: variables = [ v for v in self.data.domain.metas + self.data.domain.class_vars if v not in [self.attr_x] ] features = [ '{} = {}'.format(attr.name, d[attr]) for attr in variables ] features.append('value = {}'.format(d[wavenumber_ind])) prepared.append("\n".join(features)) text = "\n\n".join(prepared) if text: text = ('<span style="white-space:pre">{}</span>'.format( escape(text))) QToolTip.showText(ev.screenPos(), text, widget=self.plotview) return True else: return False def update_attr(self): self.update_view() def init_attr_values(self, data): domain = data.domain if data is not None else None self.xy_model.set_domain(domain) self.attr_x = self.xy_model[0] if self.xy_model else None def set_data(self, data): if data: self.data = data self.data_ids = {e: i for i, e in enumerate(data.ids)} self.restore_selection_settings() else: self.data = None self.data_ids = {} def update_view(self): self.img.clear() self.img.setSelection(None) self.legend.set_colors(None) self.lsx = None self.lsy = None self.wavenumbers = None self.data_xs = None self.data_imagepixels = None if self.data and len(self.data.domain.attributes): if self.attr_x is not None: xat = self.data.domain[self.attr_x] ndom = Domain([xat]) datam = Table(ndom, self.data) coorx = datam.X[:, 0] else: coorx = np.arange(len(self.data)) self.lsx = lsx = values_to_linspace(coorx) self.data_xs = coorx self.wavenumbers = wavenumbers = getx(self.data) self.lsy = lsy = values_to_linspace(wavenumbers) # set data imdata = np.ones((lsy[2], lsx[2])) * float("nan") xindex = index_values(coorx, lsx) yindex = index_values(wavenumbers, lsy) for xind, d in zip(xindex, self.data.X): imdata[yindex, xind] = d self.data_imagepixels = xindex self.img.setImage(imdata, autoLevels=False) self.update_levels() self.update_color_schema() # shift centres of the pixels so that the axes are useful shiftx = _shift(lsx) shifty = _shift(lsy) left = lsx[0] - shiftx bottom = lsy[0] - shifty width = (lsx[1] - lsx[0]) + 2 * shiftx height = (lsy[1] - lsy[0]) + 2 * shifty self.img.setRect(QRectF(left, bottom, width, height)) self.refresh_img_selection() def refresh_img_selection(self): selected_px = np.zeros((self.lsy[2], self.lsx[2]), dtype=np.uint8) selected_px[:, self.data_imagepixels] = self.selection_group self.img.setSelection(selected_px) def make_selection(self, selected, add): """Add selected indices to the selection.""" add_to_group, add_group, remove = selection_modifiers() if self.data and self.lsx and self.lsy: if add_to_group: # both keys - need to test it before add_group selnum = np.max(self.selection_group) elif add_group: selnum = np.max(self.selection_group) + 1 elif remove: selnum = 0 else: self.selection_group *= 0 selnum = 1 if selected is not None: self.selection_group[selected] = selnum self.refresh_img_selection() self.prepare_settings_for_saving() self.selection_changed.emit() def _points_at_pos(self, pos): if self.data and self.lsx and self.lsy: x, y = pos.x(), pos.y() x_distance = np.abs(self.data_xs - x) sel = (x_distance < _shift(self.lsx)) wavenumber_distance = np.abs(self.wavenumbers - y) wavenumber_ind = np.argmin(wavenumber_distance) return sel, wavenumber_ind return None, None def select_by_click(self, pos, add): sel, _ = self._points_at_pos(pos) self.make_selection(sel, add)
class OWScatterPlot(OWDataProjectionWidget): """Scatterplot visualization with explorative analysis and intelligent data visualization enhancements.""" name = 'Scatter Plot' description = "Interactive scatter plot visualization with " \ "intelligent data visualization enhancements." icon = "icons/ScatterPlot.svg" priority = 140 keywords = [] class Inputs(OWDataProjectionWidget.Inputs): features = Input("Features", AttributeList) class Outputs(OWDataProjectionWidget.Outputs): features = Output("Features", AttributeList, dynamic=False) settings_version = 4 auto_sample = Setting(True) attr_x = ContextSetting(None) attr_y = ContextSetting(None) tooltip_shows_all = Setting(True) GRAPH_CLASS = OWScatterPlotGraph graph = SettingProvider(OWScatterPlotGraph) embedding_variables_names = None xy_changed_manually = Signal(Variable, Variable) class Warning(OWDataProjectionWidget.Warning): missing_coords = Msg("Plot cannot be displayed because '{}' or '{}' " "is missing for all data points") no_continuous_vars = Msg("Data has no continuous variables") class Information(OWDataProjectionWidget.Information): sampled_sql = Msg("Large SQL table; showing a sample.") missing_coords = Msg( "Points with missing '{}' or '{}' are not displayed") def __init__(self): self.sql_data = None # Orange.data.sql.table.SqlTable self.attribute_selection_list = None # list of Orange.data.Variable self.__timer = QTimer(self, interval=1200) self.__timer.timeout.connect(self.add_data) super().__init__() # manually register Matplotlib file writers self.graph_writers = self.graph_writers.copy() for w in [MatplotlibFormat, MatplotlibPDFFormat]: self.graph_writers.append(w) def _add_controls(self): self._add_controls_axis() self._add_controls_sampling() super()._add_controls() self.gui.add_widgets([ self.gui.ShowGridLines, self.gui.ToolTipShowsAll, self.gui.RegressionLine ], self._plot_box) gui.checkBox( gui.indentedBox(self._plot_box), self, value="graph.orthonormal_regression", label="Treat variables as independent", callback=self.graph.update_regression_line, tooltip= "If checked, fit line to group (minimize distance from points);\n" "otherwise fit y as a function of x (minimize vertical distances)") def _add_controls_axis(self): common_options = dict(labelWidth=50, orientation=Qt.Horizontal, sendSelectedValue=True, valueType=str, contentsLength=14) box = gui.vBox(self.controlArea, True) dmod = DomainModel self.xy_model = DomainModel(dmod.MIXED, valid_types=ContinuousVariable) self.cb_attr_x = gui.comboBox(box, self, "attr_x", label="Axis x:", callback=self.set_attr_from_combo, model=self.xy_model, **common_options) self.cb_attr_y = gui.comboBox(box, self, "attr_y", label="Axis y:", callback=self.set_attr_from_combo, model=self.xy_model, **common_options) vizrank_box = gui.hBox(box) self.vizrank, self.vizrank_button = ScatterPlotVizRank.add_vizrank( vizrank_box, self, "Find Informative Projections", self.set_attr) def _add_controls_sampling(self): self.sampling = gui.auto_commit(self.controlArea, self, "auto_sample", "Sample", box="Sampling", callback=self.switch_sampling, commit=lambda: self.add_data(1)) self.sampling.setVisible(False) @property def effective_variables(self): return [self.attr_x, self.attr_y] def _vizrank_color_change(self): self.vizrank.initialize() is_enabled = self.data is not None and not self.data.is_sparse() and \ len(self.xy_model) > 2 and len(self.data[self.valid_data]) > 1 \ and np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0) self.vizrank_button.setEnabled( is_enabled and self.attr_color is not None and not np.isnan( self.data.get_column_view( self.attr_color)[0].astype(float)).all()) text = "Color variable has to be selected." \ if is_enabled and self.attr_color is None else "" self.vizrank_button.setToolTip(text) def set_data(self, data): super().set_data(data) def findvar(name, iterable): """Find a Orange.data.Variable in `iterable` by name""" for el in iterable: if isinstance(el, Variable) and el.name == name: return el return None # handle restored settings from < 3.3.9 when attr_* were stored # by name if isinstance(self.attr_x, str): self.attr_x = findvar(self.attr_x, self.xy_model) if isinstance(self.attr_y, str): self.attr_y = findvar(self.attr_y, self.xy_model) if isinstance(self.attr_label, str): self.attr_label = findvar(self.attr_label, self.gui.label_model) if isinstance(self.attr_color, str): self.attr_color = findvar(self.attr_color, self.gui.color_model) if isinstance(self.attr_shape, str): self.attr_shape = findvar(self.attr_shape, self.gui.shape_model) if isinstance(self.attr_size, str): self.attr_size = findvar(self.attr_size, self.gui.size_model) def check_data(self): super().check_data() self.__timer.stop() self.sampling.setVisible(False) self.sql_data = None if isinstance(self.data, SqlTable): if self.data.approx_len() < 4000: self.data = Table(self.data) else: self.Information.sampled_sql() self.sql_data = self.data data_sample = self.data.sample_time(0.8, no_cache=True) data_sample.download_data(2000, partial=True) self.data = Table(data_sample) self.sampling.setVisible(True) if self.auto_sample: self.__timer.start() if self.data is not None: if not self.data.domain.has_continuous_attributes(True, True): self.Warning.no_continuous_vars() self.data = None if self.data is not None and (len(self.data) == 0 or len(self.data.domain) == 0): self.data = None def get_embedding(self): self.valid_data = None if self.data is None: return None x_data = self.get_column(self.attr_x, filter_valid=False) y_data = self.get_column(self.attr_y, filter_valid=False) if x_data is None or y_data is None: return None self.Warning.missing_coords.clear() self.Information.missing_coords.clear() self.valid_data = np.isfinite(x_data) & np.isfinite(y_data) if self.valid_data is not None and not np.all(self.valid_data): msg = self.Information if np.any(self.valid_data) else self.Warning msg.missing_coords(self.attr_x.name, self.attr_y.name) return np.vstack((x_data, y_data)).T # Tooltip def _point_tooltip(self, point_id, skip_attrs=()): point_data = self.data[point_id] xy_attrs = (self.attr_x, self.attr_y) text = "<br/>".join( escape('{} = {}'.format(var.name, point_data[var])) for var in xy_attrs) if self.tooltip_shows_all: others = super()._point_tooltip(point_id, skip_attrs=xy_attrs) if others: text = "<b>{}</b><br/><br/>{}".format(text, others) return text def add_data(self, time=0.4): if self.data and len(self.data) > 2000: self.__timer.stop() return data_sample = self.sql_data.sample_time(time, no_cache=True) if data_sample: data_sample.download_data(2000, partial=True) data = Table(data_sample) self.data = Table.concatenate((self.data, data), axis=0) self.handleNewSignals() def init_attr_values(self): super().init_attr_values() data = self.data domain = data.domain if data and len(data) else None self.xy_model.set_domain(domain) self.attr_x = self.xy_model[0] if self.xy_model else None self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \ else self.attr_x def switch_sampling(self): self.__timer.stop() if self.auto_sample and self.sql_data: self.add_data() self.__timer.start() def set_subset_data(self, subset_data): self.warning() if isinstance(subset_data, SqlTable): if subset_data.approx_len() < AUTO_DL_LIMIT: subset_data = Table(subset_data) else: self.warning("Data subset does not support large Sql tables") subset_data = None super().set_subset_data(subset_data) # called when all signals are received, so the graph is updated only once def handleNewSignals(self): if self.attribute_selection_list and self.data is not None and \ self.data.domain is not None and \ all(attr in self.data.domain for attr in self.attribute_selection_list): self.attr_x, self.attr_y = self.attribute_selection_list[:2] self.attribute_selection_list = None super().handleNewSignals() self._vizrank_color_change() @Inputs.features def set_shown_attributes(self, attributes): if attributes and len(attributes) >= 2: self.attribute_selection_list = attributes[:2] self._invalidated = self._invalidated \ or self.attr_x != attributes[0] \ or self.attr_y != attributes[1] else: self.attribute_selection_list = None def set_attr(self, attr_x, attr_y): if attr_x != self.attr_x or attr_y != self.attr_y: self.attr_x, self.attr_y = attr_x, attr_y self.attr_changed() def set_attr_from_combo(self): self.attr_changed() self.xy_changed_manually.emit(self.attr_x, self.attr_y) def attr_changed(self): self.setup_plot() self.commit() def get_axes(self): return {"bottom": self.attr_x, "left": self.attr_y} def colors_changed(self): super().colors_changed() self._vizrank_color_change() def commit(self): super().commit() self.send_features() def send_features(self): features = [attr for attr in [self.attr_x, self.attr_y] if attr] self.Outputs.features.send(features or None) def get_widget_name_extension(self): if self.data is not None: return "{} vs {}".format(self.attr_x.name, self.attr_y.name) return None @classmethod def migrate_settings(cls, settings, version): if version < 2 and "selection" in settings and settings["selection"]: settings["selection_group"] = [(a, 1) for a in settings["selection"]] if version < 3: if "auto_send_selection" in settings: settings["auto_commit"] = settings["auto_send_selection"] if "selection_group" in settings: settings["selection"] = settings["selection_group"] @classmethod def migrate_context(cls, context, version): values = context.values if version < 3: values["attr_color"] = values["graph"]["attr_color"] values["attr_size"] = values["graph"]["attr_size"] values["attr_shape"] = values["graph"]["attr_shape"] values["attr_label"] = values["graph"]["attr_label"] if version < 4: if values["attr_x"][1] % 100 == 1 or values["attr_y"][1] % 100 == 1: raise IncompatibleContext()
class OWSelectAttributes(widget.OWWidget): # pylint: disable=too-many-instance-attributes name = "Select Columns" description = "Select columns from the data table and assign them to " \ "data features, classes or meta variables." icon = "icons/SelectColumns.svg" priority = 100 keywords = ["filter", "attributes", "target", "variable"] class Inputs: data = Input("Data", Table, default=True) features = Input("Features", AttributeList) class Outputs: data = Output("Data", Table) features = Output("Features", AttributeList, dynamic=False) want_main_area = False want_control_area = True settingsHandler = SelectAttributesDomainContextHandler(first_match=False) domain_role_hints = ContextSetting({}) use_input_features = Setting(False) ignore_new_features = Setting(False) auto_commit = Setting(True) class Warning(widget.OWWidget.Warning): mismatching_domain = Msg("Features and data domain do not match") multiple_targets = Msg("Most widgets do not support multiple targets") def __init__(self): super().__init__() self.data = None self.features = None # Schedule interface updates (enabled buttons) using a coalescing # single shot timer (complex interactions on selection and filtering # updates in the 'available_attrs_view') self.__interface_update_timer = QTimer(self, interval=0, singleShot=True) self.__interface_update_timer.timeout.connect( self.__update_interface_state) # The last view that has the selection for move operation's source self.__last_active_view = None # type: Optional[QListView] def update_on_change(view): # Schedule interface state update on selection change in `view` self.__last_active_view = view self.__interface_update_timer.start() self.controlArea = QWidget(self.controlArea) self.layout().addWidget(self.controlArea) layout = QGridLayout() self.controlArea.setLayout(layout) layout.setContentsMargins(4, 4, 4, 4) box = gui.vBox(self.controlArea, "Available Variables", addToLayout=False) self.available_attrs = VariablesListItemModel() filter_edit, self.available_attrs_view = variables_filter( parent=self, model=self.available_attrs) box.layout().addWidget(filter_edit) def dropcompleted(action): if action == Qt.MoveAction: self.commit() self.available_attrs_view.selectionModel().selectionChanged.connect( partial(update_on_change, self.available_attrs_view)) self.available_attrs_view.dragDropActionDidComplete.connect( dropcompleted) box.layout().addWidget(self.available_attrs_view) layout.addWidget(box, 0, 0, 3, 1) box = gui.vBox(self.controlArea, "Features", addToLayout=False) self.used_attrs = VariablesListItemModel() filter_edit, self.used_attrs_view = variables_filter( parent=self, model=self.used_attrs, accepted_type=(Orange.data.DiscreteVariable, Orange.data.ContinuousVariable)) self.used_attrs.rowsInserted.connect(self.__used_attrs_changed) self.used_attrs.rowsRemoved.connect(self.__used_attrs_changed) self.used_attrs_view.selectionModel().selectionChanged.connect( partial(update_on_change, self.used_attrs_view)) self.used_attrs_view.dragDropActionDidComplete.connect(dropcompleted) self.use_features_box = gui.auto_commit( self.controlArea, self, "use_input_features", "Use input features", "Always use input features", box=False, commit=self.__use_features_clicked, callback=self.__use_features_changed, addToLayout=False) self.enable_use_features_box() box.layout().addWidget(self.use_features_box) box.layout().addWidget(filter_edit) box.layout().addWidget(self.used_attrs_view) layout.addWidget(box, 0, 2, 1, 1) box = gui.vBox(self.controlArea, "Target Variable", addToLayout=False) self.class_attrs = VariablesListItemModel() self.class_attrs_view = VariablesListItemView( acceptedType=(Orange.data.DiscreteVariable, Orange.data.ContinuousVariable)) self.class_attrs_view.setModel(self.class_attrs) self.class_attrs_view.selectionModel().selectionChanged.connect( partial(update_on_change, self.class_attrs_view)) self.class_attrs_view.dragDropActionDidComplete.connect(dropcompleted) self.class_attrs_view.setMaximumHeight(72) box.layout().addWidget(self.class_attrs_view) layout.addWidget(box, 1, 2, 1, 1) box = gui.vBox(self.controlArea, "Meta Attributes", addToLayout=False) self.meta_attrs = VariablesListItemModel() self.meta_attrs_view = VariablesListItemView( acceptedType=Orange.data.Variable) self.meta_attrs_view.setModel(self.meta_attrs) self.meta_attrs_view.selectionModel().selectionChanged.connect( partial(update_on_change, self.meta_attrs_view)) self.meta_attrs_view.dragDropActionDidComplete.connect(dropcompleted) box.layout().addWidget(self.meta_attrs_view) layout.addWidget(box, 2, 2, 1, 1) bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0) layout.addWidget(bbox, 0, 1, 1, 1) self.up_attr_button = gui.button(bbox, self, "Up", callback=partial( self.move_up, self.used_attrs_view)) self.move_attr_button = gui.button(bbox, self, ">", callback=partial( self.move_selected, self.used_attrs_view)) self.down_attr_button = gui.button(bbox, self, "Down", callback=partial( self.move_down, self.used_attrs_view)) bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0) layout.addWidget(bbox, 1, 1, 1, 1) self.up_class_button = gui.button(bbox, self, "Up", callback=partial( self.move_up, self.class_attrs_view)) self.move_class_button = gui.button(bbox, self, ">", callback=partial( self.move_selected, self.class_attrs_view)) self.down_class_button = gui.button(bbox, self, "Down", callback=partial( self.move_down, self.class_attrs_view)) bbox = gui.vBox(self.controlArea, addToLayout=False) layout.addWidget(bbox, 2, 1, 1, 1) self.up_meta_button = gui.button(bbox, self, "Up", callback=partial( self.move_up, self.meta_attrs_view)) self.move_meta_button = gui.button(bbox, self, ">", callback=partial( self.move_selected, self.meta_attrs_view)) self.down_meta_button = gui.button(bbox, self, "Down", callback=partial( self.move_down, self.meta_attrs_view)) bbox = gui.vBox(self.controlArea, "Additional settings", addToLayout=False) gui.checkBox( widget=bbox, master=self, value="ignore_new_features", label="Ignore new variables by default", tooltip="When the widget receives data with additional columns " "they are added to the available attributes column if " "<i>Ignore new variables by default</i> is checked.") layout.addWidget(bbox, 3, 0, 1, 3) autobox = gui.auto_send(None, self, "auto_commit") layout.addWidget(autobox, 4, 0, 1, 3) reset = gui.button(None, self, "Reset", callback=self.reset, width=120) autobox.layout().insertWidget(0, reset) autobox.layout().insertStretch(1, 20) layout.setRowStretch(0, 4) layout.setRowStretch(1, 0) layout.setRowStretch(2, 2) layout.setHorizontalSpacing(0) self.controlArea.setLayout(layout) self.output_data = None self.original_completer_items = [] self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) self.resize(600, 600) @property def features_from_data_attributes(self): if self.data is None or self.features is None: return [] domain = self.data.domain return [ domain[feature.name] for feature in self.features if feature.name in domain and domain[feature.name] in domain.attributes ] def can_use_features(self): return bool(self.features_from_data_attributes) and \ self.features_from_data_attributes != self.used_attrs[:] def __use_features_changed(self): # Use input features check box # Needs a check since callback is invoked before object is created if not hasattr(self, "use_features_box"): return self.enable_used_attrs(not self.use_input_features) if self.use_input_features and self.can_use_features(): self.use_features() if not self.use_input_features: self.enable_use_features_box() def __use_features_clicked(self): # Use input features button self.use_features() def __used_attrs_changed(self): self.enable_use_features_box() @Inputs.data def set_data(self, data=None): self.update_domain_role_hints() self.closeContext() self.domain_role_hints = {} self.data = data if data is None: self.used_attrs[:] = [] self.class_attrs[:] = [] self.meta_attrs[:] = [] self.available_attrs[:] = [] self.info.set_input_summary(self.info.NoInput) return self.openContext(data) all_vars = data.domain.variables + data.domain.metas def attrs_for_role(role): selected_attrs = [ attr for attr in all_vars if domain_hints[attr][0] == role ] return sorted(selected_attrs, key=lambda attr: domain_hints[attr][1]) domain_hints = self.restore_hints(data.domain) self.used_attrs[:] = attrs_for_role("attribute") self.class_attrs[:] = attrs_for_role("class") self.meta_attrs[:] = attrs_for_role("meta") self.available_attrs[:] = attrs_for_role("available") self.info.set_input_summary(len(data), format_summary_details(data)) def restore_hints(self, domain: Domain) -> Dict[Variable, Tuple[str, int]]: """ Define hints for selected/unselected features. Rules: - if context available, restore new features based on checked/unchecked ignore_new_features, context hint should be took into account - in no context, restore features based on the domain (as selected) Parameters ---------- domain Data domain Returns ------- Dictionary with hints about order and model in which each feature should appear """ domain_hints = {} if not self.ignore_new_features or len(self.domain_role_hints) == 0: # select_new_features selected or no context - restore based on domain domain_hints.update( self._hints_from_seq("attribute", domain.attributes)) domain_hints.update(self._hints_from_seq("meta", domain.metas)) domain_hints.update( self._hints_from_seq("class", domain.class_vars)) else: # if context restored and ignore_new_features selected - restore # new features as available d = domain.attributes + domain.metas + domain.class_vars domain_hints.update(self._hints_from_seq("available", d)) domain_hints.update(self.domain_role_hints) return domain_hints def update_domain_role_hints(self): """ Update the domain hints to be stored in the widgets settings. """ hints = {} hints.update(self._hints_from_seq("available", self.available_attrs)) hints.update(self._hints_from_seq("attribute", self.used_attrs)) hints.update(self._hints_from_seq("class", self.class_attrs)) hints.update(self._hints_from_seq("meta", self.meta_attrs)) self.domain_role_hints = hints @staticmethod def _hints_from_seq(role, model): return [(attr, (role, i)) for i, attr in enumerate(model)] @Inputs.features def set_features(self, features): self.features = features def handleNewSignals(self): self.check_data() self.enable_used_attrs() self.enable_use_features_box() if self.use_input_features and self.features_from_data_attributes: self.enable_used_attrs(False) self.use_features() self.unconditional_commit() def check_data(self): self.Warning.mismatching_domain.clear() if self.data is not None and self.features is not None and \ not self.features_from_data_attributes: self.Warning.mismatching_domain() def enable_used_attrs(self, enable=True): self.up_attr_button.setEnabled(enable) self.move_attr_button.setEnabled(enable) self.down_attr_button.setEnabled(enable) self.used_attrs_view.setEnabled(enable) self.used_attrs_view.repaint() def enable_use_features_box(self): self.use_features_box.button.setEnabled(self.can_use_features()) enable_checkbox = bool(self.features_from_data_attributes) self.use_features_box.setHidden(not enable_checkbox) self.use_features_box.repaint() def use_features(self): attributes = self.features_from_data_attributes available, used = self.available_attrs[:], self.used_attrs[:] self.available_attrs[:] = [ attr for attr in used + available if attr not in attributes ] self.used_attrs[:] = attributes self.commit() @staticmethod def selected_rows(view): """ Return the selected rows in the view. """ rows = view.selectionModel().selectedRows() model = view.model() if isinstance(model, QSortFilterProxyModel): rows = [model.mapToSource(r) for r in rows] return [r.row() for r in rows] def move_rows(self, view: QListView, offset: int, roles=(Qt.EditRole, )): rows = [idx.row() for idx in view.selectionModel().selectedRows()] model = view.model() # type: QAbstractItemModel rowcount = model.rowCount() newrows = [min(max(0, row + offset), rowcount - 1) for row in rows] def itemData(index): return {role: model.data(index, role) for role in roles} for row, newrow in sorted(zip(rows, newrows), reverse=offset > 0): d1 = itemData(model.index(row, 0)) d2 = itemData(model.index(newrow, 0)) model.setItemData(model.index(row, 0), d2) model.setItemData(model.index(newrow, 0), d1) selection = QItemSelection() for nrow in newrows: index = model.index(nrow, 0) selection.select(index, index) view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) self.commit() def move_up(self, view: QListView): self.move_rows(view, -1) def move_down(self, view: QListView): self.move_rows(view, 1) def move_selected(self, view): if self.selected_rows(view): self.move_selected_from_to(view, self.available_attrs_view) elif self.selected_rows(self.available_attrs_view): self.move_selected_from_to(self.available_attrs_view, view) def move_selected_from_to(self, src, dst): self.move_from_to(src, dst, self.selected_rows(src)) def move_from_to(self, src, dst, rows): src_model = source_model(src) attrs = [src_model[r] for r in rows] for s1, s2 in reversed(list(slices(rows))): del src_model[s1:s2] dst_model = source_model(dst) dst_model.extend(attrs) self.commit() def __update_interface_state(self): last_view = self.__last_active_view if last_view is not None: self.update_interface_state(last_view) def update_interface_state(self, focus=None): for view in [ self.available_attrs_view, self.used_attrs_view, self.class_attrs_view, self.meta_attrs_view ]: if view is not focus and not view.hasFocus() \ and view.selectionModel().hasSelection(): view.selectionModel().clear() def selected_vars(view): model = source_model(view) return [model[i] for i in self.selected_rows(view)] available_selected = selected_vars(self.available_attrs_view) attrs_selected = selected_vars(self.used_attrs_view) class_selected = selected_vars(self.class_attrs_view) meta_selected = selected_vars(self.meta_attrs_view) available_types = set(map(type, available_selected)) all_primitive = all(var.is_primitive() for var in available_types) move_attr_enabled = \ ((available_selected and all_primitive) or attrs_selected) and \ self.used_attrs_view.isEnabled() self.move_attr_button.setEnabled(bool(move_attr_enabled)) if move_attr_enabled: self.move_attr_button.setText(">" if available_selected else "<") move_class_enabled = bool(all_primitive and available_selected) or class_selected self.move_class_button.setEnabled(bool(move_class_enabled)) if move_class_enabled: self.move_class_button.setText(">" if available_selected else "<") move_meta_enabled = available_selected or meta_selected self.move_meta_button.setEnabled(bool(move_meta_enabled)) if move_meta_enabled: self.move_meta_button.setText(">" if available_selected else "<") self.__last_active_view = None self.__interface_update_timer.stop() def commit(self): self.update_domain_role_hints() self.Warning.multiple_targets.clear() if self.data is not None: attributes = list(self.used_attrs) class_var = list(self.class_attrs) metas = list(self.meta_attrs) domain = Orange.data.Domain(attributes, class_var, metas) newdata = self.data.transform(domain) self.output_data = newdata self.Outputs.data.send(newdata) self.Outputs.features.send(AttributeList(attributes)) self.info.set_output_summary(len(newdata), format_summary_details(newdata)) self.Warning.multiple_targets(shown=len(class_var) > 1) else: self.output_data = None self.Outputs.data.send(None) self.Outputs.features.send(None) self.info.set_output_summary(self.info.NoOutput) def reset(self): self.enable_used_attrs() self.use_features_box.checkbox.setChecked(False) if self.data is not None: self.available_attrs[:] = [] self.used_attrs[:] = self.data.domain.attributes self.class_attrs[:] = self.data.domain.class_vars self.meta_attrs[:] = self.data.domain.metas self.update_domain_role_hints() self.commit() def send_report(self): if not self.data or not self.output_data: return in_domain, out_domain = self.data.domain, self.output_data.domain self.report_domain("Input data", self.data.domain) if (in_domain.attributes, in_domain.class_vars, in_domain.metas) == (out_domain.attributes, out_domain.class_vars, out_domain.metas): self.report_paragraph("Output data", "No changes.") else: self.report_domain("Output data", self.output_data.domain) diff = list( set(in_domain.variables + in_domain.metas) - set(out_domain.variables + out_domain.metas)) if diff: text = "%i (%s)" % (len(diff), ", ".join(x.name for x in diff)) self.report_items((("Removed", text), ))
class OWSOM(OWWidget): name = "Self-Organizing Map" description = "Computation of self-organizing map." icon = "icons/SOM.svg" keywords = ["SOM"] class Inputs: data = Input("Data", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = DomainContextHandler() auto_dimension = Setting(True) size_x = Setting(10) size_y = Setting(10) hexagonal = Setting(1) initialization = Setting(0) attr_color = ContextSetting(None) size_by_instances = Setting(True) pie_charts = Setting(False) selection = Setting(None, schema_only=True) graph_name = "view" _grid_pen = QPen(QBrush(QColor(224, 224, 224)), 2) _grid_pen.setCosmetic(True) OptControls = namedtuple( "OptControls", ("shape", "auto_dim", "spin_x", "spin_y", "initialization", "start") ) class Warning(OWWidget.Warning): ignoring_disc_variables = Msg("SOM ignores discrete variables.") missing_colors = \ Msg("Some data instances have undefined value of '{}'.") missing_values = \ Msg("{} data instance{} with undefined value(s) {} not shown.") class Error(OWWidget.Error): no_numeric_variables = Msg("Data contains no numeric columns.") no_defined_rows = Msg("All rows contain at least one undefined value.") def __init__(self): super().__init__() self.__pending_selection = self.selection self._optimizer = None self._optimizer_thread = None self.stop_optimization = False self.data = self.cont_x = None self.cells = self.member_data = None self.selection = None self.colors = self.thresholds = None box = gui.vBox(self.controlArea, box="SOM") shape = gui.comboBox( box, self, "", items=("Hexagonal grid", "Square grid")) shape.setCurrentIndex(1 - self.hexagonal) box2 = gui.indentedBox(box, 10) auto_dim = gui.checkBox( box2, self, "auto_dimension", "Set dimensions automatically", callback=self.on_auto_dimension_changed) self.manual_box = box3 = gui.hBox(box2) spinargs = dict( value="", widget=box3, master=self, minv=5, maxv=100, step=5, alignment=Qt.AlignRight) spin_x = gui.spin(**spinargs) spin_x.setValue(self.size_x) gui.widgetLabel(box3, "×") spin_y = gui.spin(**spinargs) spin_y.setValue(self.size_y) gui.rubber(box3) self.manual_box.setEnabled(not self.auto_dimension) initialization = gui.comboBox( box, self, "initialization", items=("Initialize with PCA", "Random initialization", "Replicable random")) start = gui.button( box, self, "Restart", callback=self.restart_som_pressed, sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)) self.opt_controls = self.OptControls( shape, auto_dim, spin_x, spin_y, initialization, start) box = gui.vBox(self.controlArea, "Color") gui.comboBox( box, self, "attr_color", maximumContentsLength=15, callback=self.on_attr_color_change, model=DomainModel(placeholder="(Same color)", valid_types=DomainModel.PRIMITIVE)) gui.checkBox( box, self, "pie_charts", label="Show pie charts", callback=self.on_pie_chart_change) gui.checkBox( box, self, "size_by_instances", label="Size by number of instances", callback=self.on_attr_size_change) gui.rubber(self.controlArea) self.scene = QGraphicsScene(self) self.view = SomView(self.scene) self.view.setMinimumWidth(400) self.view.setMinimumHeight(400) self.view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.view.setRenderHint(QPainter.Antialiasing) self.view.selection_changed.connect(self.on_selection_change) self.view.selection_moved.connect(self.on_selection_move) self.view.selection_mark_changed.connect(self.on_selection_mark_change) self.mainArea.layout().addWidget(self.view) self.elements = None self.grid = None self.grid_cells = None self.legend = None @Inputs.data def set_data(self, data): def prepare_data(): if len(cont_attrs) < len(attrs): self.Warning.ignoring_disc_variables() x = Table.from_table(Domain(cont_attrs), data).X if sp.issparse(x): self.data = data self.cont_x = x.tocsr() else: mask = np.all(np.isfinite(x), axis=1) if not np.any(mask): self.Error.no_defined_rows() else: if np.all(mask): self.data = data self.cont_x = x.copy() else: self.data = data[mask] self.cont_x = x[mask] self.cont_x -= np.min(self.cont_x, axis=0)[None, :] sums = np.sum(self.cont_x, axis=0)[None, :] sums[sums == 0] = 1 self.cont_x /= sums def set_warnings(): missing = len(data) - len(self.data) if missing == 1: self.Warning.missing_values(1, "", "is") elif missing > 1: self.Warning.missing_values(missing, "s", "are") self.stop_optimization_and_wait() self.closeContext() self.clear() self.Error.clear() self.Warning.clear() if data is not None: attrs = data.domain.attributes cont_attrs = [var for var in attrs if var.is_continuous] if not cont_attrs: self.Error.no_numeric_variables() else: prepare_data() if self.data is not None: self.controls.attr_color.model().set_domain(data.domain) self.attr_color = data.domain.class_var set_warnings() self.openContext(self.data) self.set_color_bins() self.create_legend() self.recompute_dimensions() self._set_input_summary(data and len(data)) self.start_som() def _set_input_summary(self, n_tot): if self.data is None: self.info.set_input_summary(self.info.NoInput) return n = len(self.data) inst = str(n) nvars = f"{self.cont_x.shape[1]} numeric variables" if n < n_tot: inst += f" ({n_tot})" details = f"{n_tot - n} out of {n_tot} instances ignored " \ f"because of missing values;\n{nvars}" else: details = f"{n} instances; {nvars}" self.info.set_input_summary(inst, details) def clear(self): self.data = self.cont_x = None self.cells = self.member_data = None self.attr_color = None self.colors = self.thresholds = None if self.elements is not None: self.scene.removeItem(self.elements) self.elements = None self.clear_selection() self.controls.attr_color.model().set_domain(None) self.Warning.clear() self.Error.clear() def recompute_dimensions(self): if not self.auto_dimension or self.cont_x is None: return dim = max(5, int(np.ceil(np.sqrt(5 * np.sqrt(self.cont_x.shape[0]))))) self.opt_controls.spin_x.setValue(dim) self.opt_controls.spin_y.setValue(dim) def on_auto_dimension_changed(self): self.manual_box.setEnabled(not self.auto_dimension) if self.auto_dimension: self.recompute_dimensions() else: spin_x = self.opt_controls.spin_x spin_y = self.opt_controls.spin_y dimx = int(5 * np.round(spin_x.value() / 5)) dimy = int(5 * np.round(spin_y.value() / 5)) spin_x.setValue(dimx) spin_y.setValue(dimy) def on_attr_color_change(self): self.controls.pie_charts.setEnabled(self.attr_color is not None) self.set_color_bins() self.create_legend() self.rescale() self._redraw() def on_attr_size_change(self): self._redraw() def on_pie_chart_change(self): self._redraw() def clear_selection(self): self.selection = None self.redraw_selection() def on_selection_change(self, selection, action=SomView.SelectionSet): if self.selection is None: self.selection = np.zeros(self.grid_cells.T.shape, dtype=np.int16) if action == SomView.SelectionSet: self.selection[:] = 0 self.selection[selection] = 1 elif action == SomView.SelectionAddToGroup: self.selection[selection] = max(1, np.max(self.selection)) elif action == SomView.SelectionNewGroup: self.selection[selection] = 1 + np.max(self.selection) elif action & SomView.SelectionRemove: self.selection[selection] = 0 self.redraw_selection() self.update_output() def on_selection_move(self, event: QKeyEvent): if self.selection is None or not np.any(self.selection): if event.key() in (Qt.Key_Right, Qt.Key_Down): x = y = 0 else: x = self.size_x - 1 y = self.size_y - 1 else: x, y = np.nonzero(self.selection) if len(x) > 1: return if event.key() == Qt.Key_Up and y > 0: y -= 1 if event.key() == Qt.Key_Down and y < self.size_y - 1: y += 1 if event.key() == Qt.Key_Left and x: x -= 1 if event.key() == Qt.Key_Right and x < self.size_x - 1: x += 1 x -= self.hexagonal and x == self.size_x - 1 and y % 2 if self.selection is not None and self.selection[x, y]: return selection = np.zeros(self.grid_cells.shape, dtype=bool) selection[x, y] = True self.on_selection_change(selection) def on_selection_mark_change(self, marks): self.redraw_selection(marks=marks) def redraw_selection(self, marks=None): if self.grid_cells is None: return sel_pen = QPen(QBrush(QColor(128, 128, 128)), 2) sel_pen.setCosmetic(True) mark_pen = QPen(QBrush(QColor(128, 128, 128)), 4) mark_pen.setCosmetic(True) pens = [self._grid_pen, sel_pen] mark_brush = QBrush(QColor(224, 255, 255)) sels = self.selection is not None and np.max(self.selection) palette = ColorPaletteGenerator(number_of_colors=sels + 1) brushes = [QBrush(Qt.NoBrush)] + \ [QBrush(palette[i].lighter(165)) for i in range(sels)] for y in range(self.size_y): for x in range(self.size_x - (y % 2) * self.hexagonal): cell = self.grid_cells[y, x] marked = marks is not None and marks[x, y] sel_group = self.selection is not None and self.selection[x, y] if marked: cell.setBrush(mark_brush) cell.setPen(mark_pen) else: cell.setBrush(brushes[sel_group]) cell.setPen(pens[bool(sel_group)]) cell.setZValue(marked or sel_group) def restart_som_pressed(self): if self._optimizer_thread is not None: self.stop_optimization = True else: self.start_som() def start_som(self): self.read_controls() self.update_layout() self.clear_selection() if self.cont_x is not None: self.enable_controls(False) self._recompute_som() else: self.update_output() def read_controls(self): c = self.opt_controls self.hexagonal = c.shape.currentIndex() == 0 self.size_x = c.spin_x.value() self.size_y = c.spin_y.value() def enable_controls(self, enable): c = self.opt_controls c.shape.setEnabled(enable) c.auto_dim.setEnabled(enable) c.start.setText("Start" if enable else "Stop") def update_layout(self): self.set_legend_pos() if self.elements: # Prevent having redrawn grid but with old elements self.scene.removeItem(self.elements) self.elements = None self.redraw_grid() self.rescale() def _redraw(self): self.Warning.missing_colors.clear() if self.elements: self.scene.removeItem(self.elements) self.elements = None self.view.set_dimensions(self.size_x, self.size_y, self.hexagonal) if self.cells is None: return sizes = self.cells[:, :, 1] - self.cells[:, :, 0] sizes = sizes.astype(float) if not self.size_by_instances: sizes[sizes != 0] = 0.8 else: sizes *= 0.8 / np.max(sizes) self.elements = QGraphicsItemGroup() self.scene.addItem(self.elements) if self.attr_color is None: self._draw_same_color(sizes) elif self.pie_charts: self._draw_pie_charts(sizes) else: self._draw_colored_circles(sizes) @property def _grid_factors(self): return (0.5, sqrt3_2) if self.hexagonal else (0, 1) def _draw_same_color(self, sizes): fx, fy = self._grid_factors color = QColor(64, 64, 64) for y in range(self.size_y): for x in range(self.size_x - self.hexagonal * (y % 2)): r = sizes[x, y] n = len(self.get_member_indices(x, y)) if not r: continue ellipse = ColoredCircle(r / 2, color, 0) ellipse.setPos(x + (y % 2) * fx, y * fy) ellipse.setToolTip(f"{n} instances") self.elements.addToGroup(ellipse) def _get_color_column(self): color_column = \ self.data.get_column_view(self.attr_color)[0].astype(float, copy=False) if self.attr_color.is_discrete: with np.errstate(invalid="ignore"): int_col = color_column.astype(int) int_col[np.isnan(color_column)] = len(self.colors) else: int_col = np.zeros(len(color_column), dtype=int) # The following line is not necessary because rows with missing # numeric data are excluded. Uncomment it if you change SOM to # tolerate missing values. # int_col[np.isnan(color_column)] = len(self.colors) for i, thresh in enumerate(self.thresholds, start=1): int_col[color_column >= thresh] = i return int_col def _tooltip(self, colors, distribution): if self.attr_color.is_discrete: values = self.attr_color.values else: values = self._bin_names() tot = np.sum(distribution) nbhp = "\N{NON-BREAKING HYPHEN}" return '<table style="white-space: nowrap">' + "".join(f""" <tr> <td> <font color={color.name()}>■</font> <b>{escape(val).replace("-", nbhp)}</b>: </td> <td> {n} ({n / tot * 100:.1f} %) </td> </tr> """ for color, val, n in zip(colors, values, distribution) if n) \ + "</table>" def _draw_pie_charts(self, sizes): fx, fy = self._grid_factors color_column = self._get_color_column() colors = self.colors + [Qt.gray] for y in range(self.size_y): for x in range(self.size_x - self.hexagonal * (y % 2)): r = sizes[x, y] if not r: self.grid_cells[y, x].setToolTip("") continue members = self.get_member_indices(x, y) color_dist = np.bincount(color_column[members], minlength=len(colors)) rel_color_dist = color_dist.astype(float) / len(members) pie = PieChart(rel_color_dist, r / 2, colors) pie.setToolTip(self._tooltip(colors, color_dist)) self.elements.addToGroup(pie) pie.setPos(x + (y % 2) * fx, y * fy) def _draw_colored_circles(self, sizes): fx, fy = self._grid_factors color_column = self._get_color_column() for y in range(self.size_y): for x in range(self.size_x - self.hexagonal * (y % 2)): r = sizes[x, y] if not r: continue members = self.get_member_indices(x, y) color_dist = color_column[members] color_dist = color_dist[color_dist < len(self.colors)] if len(color_dist) != len(members): self.Warning.missing_colors(self.attr_color.name) bc = np.bincount(color_dist, minlength=len(self.colors)) color = self.colors[np.argmax(bc)] ellipse = ColoredCircle(r / 2, color, np.max(bc) / len(members)) ellipse.setPos(x + (y % 2) * fx, y * fy) ellipse.setToolTip(self._tooltip(self.colors, bc)) self.elements.addToGroup(ellipse) def redraw_grid(self): if self.grid is not None: self.scene.removeItem(self.grid) self.grid = QGraphicsItemGroup() self.grid.setZValue(-200) self.grid_cells = np.full((self.size_y, self.size_x), None) for y in range(self.size_y): for x in range(self.size_x - (y % 2) * self.hexagonal): if self.hexagonal: cell = QGraphicsPathItem(_hexagon_path) cell.setPos(x + (y % 2) / 2, y * sqrt3_2) else: cell = QGraphicsRectItem(x - 0.5, y - 0.5, 1, 1) self.grid_cells[y, x] = cell cell.setPen(self._grid_pen) self.grid.addToGroup(cell) self.scene.addItem(self.grid) def get_member_indices(self, x, y): i, j = self.cells[x, y] return self.member_data[i:j] def _recompute_som(self): if self.cont_x is None: return class Optimizer(QObject): update = Signal(float, np.ndarray, np.ndarray) done = Signal(SOM) stopped = Signal() def __init__(self, data, widget): super().__init__() self.som = SOM( widget.size_x, widget.size_y, hexagonal=widget.hexagonal, pca_init=widget.initialization == 0, random_seed=0 if widget.initialization == 2 else None) self.data = data self.widget = widget def callback(self, progress): self.update.emit( progress, self.som.weights.copy(), self.som.ssum_weights.copy()) return not self.widget.stop_optimization def run(self): try: self.som.fit(self.data, N_ITERATIONS, callback=self.callback) # Report an exception, but still remove the thread finally: self.done.emit(self.som) self.stopped.emit() def update(_progress, weights, ssum_weights): progressbar.advance() self._assign_instances(weights, ssum_weights) self._redraw() def done(som): self.enable_controls(True) progressbar.finish() self._assign_instances(som.weights, som.ssum_weights) self._redraw() # This is the first time we know what was selected (assuming that # initialization is not set to random) if self.__pending_selection is not None: self.on_selection_change(self.__pending_selection) self.__pending_selection = None self.update_output() def thread_finished(): self._optimizer = None self._optimizer_thread = None progressbar = gui.ProgressBar(self, N_ITERATIONS) self._optimizer = Optimizer(self.cont_x, self) self._optimizer_thread = QThread() self._optimizer_thread.setStackSize(5 * 2 ** 20) self._optimizer.update.connect(update) self._optimizer.done.connect(done) self._optimizer.stopped.connect(self._optimizer_thread.quit) self._optimizer.moveToThread(self._optimizer_thread) self._optimizer_thread.started.connect(self._optimizer.run) self._optimizer_thread.finished.connect(thread_finished) self.stop_optimization = False self._optimizer_thread.start() def stop_optimization_and_wait(self): if self._optimizer_thread is not None: self.stop_optimization = True self._optimizer_thread.quit() self._optimizer_thread.wait() self._optimizer_thread = None def onDeleteWidget(self): self.stop_optimization_and_wait() self.clear() super().onDeleteWidget() def _assign_instances(self, weights, ssum_weights): if self.cont_x is None: return # the widget is shutting down while signals still processed assignments = SOM.winner_from_weights( self.cont_x, weights, ssum_weights, self.hexagonal) members = defaultdict(list) for i, (x, y) in enumerate(assignments): members[(x, y)].append(i) members.pop(None, None) self.cells = np.empty((self.size_x, self.size_y, 2), dtype=int) self.member_data = np.empty(self.cont_x.shape[0], dtype=int) index = 0 for x in range(self.size_x): for y in range(self.size_y): nmembers = len(members[(x, y)]) self.member_data[index:index + nmembers] = members[(x, y)] self.cells[x, y] = [index, index + nmembers] index += nmembers def resizeEvent(self, event): super().resizeEvent(event) self.create_legend() # re-wrap lines if necessary self.rescale() def rescale(self): if self.legend: leg_height = self.legend.boundingRect().height() leg_extra = 1.5 else: leg_height = 0 leg_extra = 1 vw, vh = self.view.width(), self.view.height() - leg_height scale = min(vw / (self.size_x + 1), vh / ((self.size_y + leg_extra) * self._grid_factors[1])) self.view.setTransform(QTransform.fromScale(scale, scale)) if self.hexagonal: self.view.setSceneRect( 0, -1, self.size_x - 1, (self.size_y + leg_extra) * sqrt3_2 + leg_height / scale) else: self.view.setSceneRect( -0.25, -0.25, self.size_x - 0.5, self.size_y - 0.5 + leg_height / scale) def update_output(self): if self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) self.info.set_output_summary(self.info.NoOutput) return indices = np.zeros(len(self.data), dtype=int) if self.selection is not None and np.any(self.selection): for y in range(self.size_y): for x in range(self.size_x): rows = self.get_member_indices(x, y) indices[rows] = self.selection[x, y] if np.any(indices): sel_data = create_groups_table(self.data, indices, False, "Group") self.Outputs.selected_data.send(sel_data) self.info.set_output_summary(str(len(sel_data))) else: self.Outputs.selected_data.send(None) self.info.set_output_summary(self.info.NoOutput) if np.max(indices) > 1: annotated = create_groups_table(self.data, indices) else: annotated = create_annotated_table( self.data, np.flatnonzero(indices)) self.Outputs.annotated_data.send(annotated) def set_color_bins(self): if self.attr_color is None: self.thresholds = self.colors = None elif self.attr_color.is_discrete: self.thresholds = None self.colors = [QColor(*color) for color in self.attr_color.colors] else: col = self.data.get_column_view(self.attr_color)[0].astype(float) self.thresholds = decimal_binnings(col, min_bins=4)[0][1:-1] palette = ContinuousPaletteGenerator(*self.attr_color.colors) nbins = len(self.thresholds) + 1 self.colors = [palette[i / (nbins - 1)] for i in range(nbins)] def create_legend(self): if self.legend is not None: self.scene.removeItem(self.legend) self.legend = None if self.attr_color is None: return if self.attr_color.is_discrete: names = self.attr_color.values else: names = self._bin_names() items = [] size = 8 for name, color in zip(names, self.colors): item = QGraphicsItemGroup() item.addToGroup( CanvasRectangle(None, -size / 2, -size / 2, size, size, Qt.gray, color)) item.addToGroup(CanvasText(None, name, size, 0, Qt.AlignVCenter)) items.append(item) self.legend = wrap_legend_items( items, hspacing=20, vspacing=16 + size, max_width=self.view.width() - 25) self.legend.setFlags(self.legend.ItemIgnoresTransformations) self.legend.setTransform( QTransform.fromTranslate(-self.legend.boundingRect().width() / 2, 0)) self.scene.addItem(self.legend) self.set_legend_pos() def _bin_names(self): sval = self.attr_color.repr_val return \ [f"< {sval(self.thresholds[0])}"] \ + [f"{sval(x)} - {sval(y)}" for x, y in zip(self.thresholds, self.thresholds[1:])] \ + [f"≥ {sval(self.thresholds[-1])}"] def set_legend_pos(self): if self.legend is None: return self.legend.setPos( self.size_x / 2, (self.size_y + 0.2 + 0.3 * self.hexagonal) * self._grid_factors[1]) def send_report(self): self.report_plot() if self.attr_color: self.report_caption( f"Self-organizing map colored by '{self.attr_color.name}'")
class OWDataTable(widget.OWWidget): name = "Data Table" description = "View the dataset in a spreadsheet." icon = "icons/Table.svg" priority = 50 buttons_area_orientation = Qt.Vertical class Inputs: data = Input("Data", Table, multiple=True) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) show_distributions = Setting(False) dist_color_RGB = Setting((220, 220, 220, 255)) show_attribute_labels = Setting(True) select_rows = Setting(True) auto_commit = Setting(True) color_by_class = Setting(True) settingsHandler = DomainContextHandler( match_values=DomainContextHandler.MATCH_VALUES_ALL) selected_rows = ContextSetting([]) selected_cols = ContextSetting([]) def __init__(self): super().__init__() self._inputs = OrderedDict() self.dist_color = QColor(*self.dist_color_RGB) info_box = gui.vBox(self.controlArea, "Info") self.info_ex = gui.widgetLabel(info_box, 'No data on input.', ) self.info_ex.setWordWrap(True) self.info_attr = gui.widgetLabel(info_box, ' ') self.info_attr.setWordWrap(True) self.info_class = gui.widgetLabel(info_box, ' ') self.info_class.setWordWrap(True) self.info_meta = gui.widgetLabel(info_box, ' ') self.info_meta.setWordWrap(True) info_box.setMinimumWidth(200) gui.separator(self.controlArea) box = gui.vBox(self.controlArea, "Variables") self.c_show_attribute_labels = gui.checkBox( box, self, "show_attribute_labels", "Show variable labels (if present)", callback=self._on_show_variable_labels_changed) gui.checkBox(box, self, "show_distributions", 'Visualize numeric values', callback=self._on_distribution_color_changed) gui.checkBox(box, self, "color_by_class", 'Color by instance classes', callback=self._on_distribution_color_changed) box = gui.vBox(self.controlArea, "Selection") gui.checkBox(box, self, "select_rows", "Select full rows", callback=self._on_select_rows_changed) gui.rubber(self.controlArea) reset = gui.button( None, self, "Restore Original Order", callback=self.restore_order, tooltip="Show rows in the original order", autoDefault=False) self.buttonsArea.layout().insertWidget(0, reset) gui.auto_commit(self.buttonsArea, self, "auto_commit", "Send Selected Rows", "Send Automatically") # GUI with tabs self.tabs = gui.tabWidget(self.mainArea) self.tabs.currentChanged.connect(self._on_current_tab_changed) def copy_to_clipboard(self): self.copy() def sizeHint(self): return QSize(800, 500) @Inputs.data def set_dataset(self, data, tid=None): """Set the input dataset.""" self.closeContext() if data is not None: if tid in self._inputs: # update existing input slot slot = self._inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setSectionsMovable(True) header.setSectionsClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) view.dataset = data self.tabs.addTab(view, getattr(data, "name", "Data")) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot self._inputs[tid] = slot self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(f): QMetaObject.invokeMethod( self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self._inputs: slot = self._inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1) self.selected_rows = [] self.selected_cols = [] self.openContext(data) self.set_selection() self.commit() def _setup_table_view(self, view, data): """Setup the `view` (QTableView) with `data` (Orange.data.Table) """ if data is None: view.setModel(None) return datamodel = TableModel(data) datamodel = RichTableDecorator(datamodel) rowcount = data.approx_len() if self.color_by_class and data.domain.has_discrete_class: color_schema = [ QColor(*c) for c in data.domain.class_var.colors] else: color_schema = None if self.show_distributions: view.setItemDelegate( gui.TableBarItem( self, color=self.dist_color, color_schema=color_schema) ) else: view.setItemDelegate(QStyledItemDelegate(self)) # Enable/disable view sorting based on data's type view.setSortingEnabled(is_sortable(data)) header = view.horizontalHeader() header.setSectionsClickable(is_sortable(data)) header.setSortIndicatorShown(is_sortable(data)) view.setModel(datamodel) vheader = view.verticalHeader() option = view.viewOptions() size = view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), view) vheader.setDefaultSectionSize(size.height() + 2) vheader.setMinimumSectionSize(5) vheader.setSectionResizeMode(QHeaderView.Fixed) # Limit the number of rows displayed in the QTableView # (workaround for QTBUG-18490 / QTBUG-28631) maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2) if rowcount > maxrows: sliceproxy = TableSliceProxy( parent=view, rowSlice=slice(0, maxrows)) sliceproxy.setSourceModel(datamodel) # First reset the view (without this the header view retains # it's state - at this point invalid/broken) view.setModel(None) view.setModel(sliceproxy) assert view.model().rowCount() <= maxrows assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0 # update the header (attribute names) self._update_variable_labels(view) selmodel = BlockSelectionModel( view.model(), parent=view, selectBlocks=not self.select_rows) view.setSelectionModel(selmodel) view.selectionModel().selectionChanged.connect(self.update_selection) #noinspection PyBroadException def set_corner_text(self, table, text): """Set table corner text.""" # As this is an ugly hack, do everything in # try - except blocks, as it may stop working in newer Qt. if not hasattr(table, "btn") and not hasattr(table, "btnfailed"): try: btn = table.findChild(QAbstractButton) class efc(QObject): def eventFilter(self, o, e): if (isinstance(o, QAbstractButton) and e.type() == QEvent.Paint): # paint by hand (borrowed from QTableCornerButton) btn = o opt = QStyleOptionHeader() opt.initFrom(btn) state = QStyle.State_None if btn.isEnabled(): state |= QStyle.State_Enabled if btn.isActiveWindow(): state |= QStyle.State_Active if btn.isDown(): state |= QStyle.State_Sunken opt.state = state opt.rect = btn.rect() opt.text = btn.text() opt.position = QStyleOptionHeader.OnlyOneSection painter = QStylePainter(btn) painter.drawControl(QStyle.CE_Header, opt) return True # eat event return False table.efc = efc() btn.installEventFilter(table.efc) table.btn = btn if sys.platform == "darwin": btn.setAttribute(Qt.WA_MacSmallSize) except Exception: table.btnfailed = True if hasattr(table, "btn"): try: btn = table.btn btn.setText(text) opt = QStyleOptionHeader() opt.text = btn.text() s = btn.style().sizeFromContents( QStyle.CT_HeaderSection, opt, QSize(), btn).expandedTo(QApplication.globalStrut()) if s.isValid(): table.verticalHeader().setMinimumWidth(s.width()) except Exception: pass def _on_current_tab_changed(self, index): """Update the info box on current tab change""" view = self.tabs.widget(index) if view is not None and view.model() is not None: self.set_info(view._input_slot.summary) else: self.set_info(None) def _update_variable_labels(self, view): "Update the variable labels visibility for `view`" model = view.model() if isinstance(model, TableSliceProxy): model = model.sourceModel() if self.show_attribute_labels: model.setRichHeaderFlags( RichTableDecorator.Labels | RichTableDecorator.Name) labelnames = set() for a in model.source.domain.variables: labelnames.update(a.attributes.keys()) labelnames = sorted( [label for label in labelnames if not label.startswith("_")]) self.set_corner_text(view, "\n".join([""] + labelnames)) else: model.setRichHeaderFlags(RichTableDecorator.Name) self.set_corner_text(view, "") def _on_show_variable_labels_changed(self): """The variable labels (var.attribues) visibility was changed.""" for slot in self._inputs.values(): self._update_variable_labels(slot.view) def _on_distribution_color_changed(self): for ti in range(self.tabs.count()): widget = self.tabs.widget(ti) model = widget.model() while isinstance(model, QAbstractProxyModel): model = model.sourceModel() data = model.source class_var = data.domain.class_var if self.color_by_class and class_var and class_var.is_discrete: color_schema = [QColor(*c) for c in class_var.colors] else: color_schema = None if self.show_distributions: delegate = gui.TableBarItem(self, color=self.dist_color, color_schema=color_schema) else: delegate = QStyledItemDelegate(self) widget.setItemDelegate(delegate) tab = self.tabs.currentWidget() if tab: tab.reset() def _on_select_rows_changed(self): for slot in self._inputs.values(): selection_model = slot.view.selectionModel() selection_model.setSelectBlocks(not self.select_rows) if self.select_rows: slot.view.setSelectionBehavior(QTableView.SelectRows) # Expand the current selection to full row selection. selection_model.select( selection_model.selection(), QItemSelectionModel.Select | QItemSelectionModel.Rows ) else: slot.view.setSelectionBehavior(QTableView.SelectItems) def restore_order(self): """Restore the original data order of the current view.""" table = self.tabs.currentWidget() if table is not None: table.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) def set_info(self, summary): if summary is None: self.info_ex.setText("No data on input.") self.info_attr.setText("") self.info_class.setText("") self.info_meta.setText("") else: info_len, info_attr, info_class, info_meta = \ format_summary(summary) self.info_ex.setText(info_len) self.info_attr.setText(info_attr) self.info_class.setText(info_class) self.info_meta.setText(info_meta) @Slot() def _update_info(self): current = self.tabs.currentWidget() if current is not None and current.model() is not None: self.set_info(current._input_slot.summary) def update_selection(self, *_): self.commit() def set_selection(self): if len(self.selected_rows) and len(self.selected_cols): view = self.tabs.currentWidget() model = view.model() if model.rowCount() <= self.selected_rows[-1] or \ model.columnCount() <= self.selected_cols[-1]: return selection = QItemSelection() rowranges = list(ranges(self.selected_rows)) colranges = list(ranges(self.selected_cols)) for rowstart, rowend in rowranges: for colstart, colend in colranges: selection.append( QItemSelectionRange( view.model().index(rowstart, colstart), view.model().index(rowend - 1, colend - 1) ) ) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def get_selection(self, view): """ Return the selected row and column indices of the selection in view. """ selection = view.selectionModel().selection() model = view.model() # map through the proxies into input table. while isinstance(model, QAbstractProxyModel): selection = model.mapSelectionToSource(selection) model = model.sourceModel() assert isinstance(model, TableModel) indexes = selection.indexes() rows = numpy.unique([ind.row() for ind in indexes]) # map the rows through the applied sorting (if any) rows = model.mapToSourceRows(rows) rows.sort() rows = rows.tolist() cols = sorted(set(ind.column() for ind in indexes)) return rows, cols @staticmethod def _get_model(view): model = view.model() while isinstance(model, QAbstractProxyModel): model = model.sourceModel() return model def commit(self): """ Commit/send the current selected row/column selection. """ selected_data = table = rowsel = None view = self.tabs.currentWidget() if view and view.model() is not None: model = self._get_model(view) table = model.source # The input data table # Selections of individual instances are not implemented # for SqlTables if isinstance(table, SqlTable): self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(None) return rowsel, colsel = self.get_selection(view) self.selected_rows, self.selected_cols = rowsel, colsel def select(data, rows, domain): """ Select the data subset with specified rows and domain subsets. If either rows or domain is None they mean select all. """ if rows is not None and domain is not None: return data.from_table(domain, data, rows) elif rows is not None: return data.from_table(data.domain, rows) elif domain is not None: return data.from_table(domain, data) else: return data domain = table.domain if len(colsel) < len(domain) + len(domain.metas): # only a subset of the columns is selected allvars = domain.class_vars + domain.metas + domain.attributes columns = [(c, model.headerData(c, Qt.Horizontal, TableModel.DomainRole)) for c in colsel] assert all(role is not None for _, role in columns) def select_vars(role): """select variables for role (TableModel.DomainRole)""" return [allvars[c] for c, r in columns if r == role] attrs = select_vars(TableModel.Attribute) if attrs and issparse(table.X): # for sparse data you can only select all attributes attrs = table.domain.attributes class_vars = select_vars(TableModel.ClassVar) metas = select_vars(TableModel.Meta) domain = Orange.data.Domain(attrs, class_vars, metas) # Avoid a copy if all/none rows are selected. if not rowsel: selected_data = None elif len(rowsel) == len(table): selected_data = select(table, None, domain) else: selected_data = select(table, rowsel, domain) self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(create_annotated_table(table, rowsel)) def copy(self): """ Copy current table selection to the clipboard. """ view = self.tabs.currentWidget() if view is not None: mime = table_selection_to_mime_data(view) QApplication.clipboard().setMimeData( mime, QClipboard.Clipboard ) def send_report(self): view = self.tabs.currentWidget() if not view or not view.model(): return model = self._get_model(view) self.report_data_brief(model.source) self.report_table(view)
class OWVisWidget(OWWidget): _shown_attributes = ContextSetting(default=[], required=ContextSetting.REQUIRED, selected='selected_shown', reservoir="_hidden_attributes") # Setting above will override these fields _hidden_attributes = () selected_shown = () selected_hidden = () @property def shown_attributes(self): return [a[0] for a in self._shown_attributes] @shown_attributes.setter def shown_attributes(self, value): shown = [] hidden = [] domain = self.get_data_domain() attr_info = lambda a: (a.name, vartype(a)) if domain: if value: shown = value if isinstance(value[0], tuple) else [ attr_info(domain[a]) for a in value ] hidden = [ x for x in [attr_info(domain[a]) for a in domain.attributes] if x not in shown ] else: shown = [attr_info(a) for a in domain.attributes] if not self.show_all_attributes: hidden = shown[10:] shown = shown[:10] if domain.class_var and attr_info(domain.class_var) not in shown: hidden += [attr_info(domain.class_var)] self._shown_attributes = shown self._hidden_attributes = hidden self.selected_hidden = [] self.selected_shown = [] self.trigger_attributes_changed() @property def hidden_attributes(self): return [a[0] for a in self._hidden_attributes] __attribute_selection_area_initialized = False #noinspection PyAttributeOutsideInit def add_attribute_selection_area(self, parent): self.add_shown_attributes(parent) self.add_hidden_attributes(parent) self.__attribute_selection_area_initialized = True self.trigger_attributes_changed() #noinspection PyAttributeOutsideInit def add_shown_attributes(self, parent): self.shown_attributes_area = gui.widgetBox(parent, " Shown attributes ") self.shown_attributes_listbox = gui.listBox( self.shown_attributes_area, self, "selected_shown", "_shown_attributes", dragDropCallback=self.trigger_attributes_changed, enableDragDrop=True, selectionMode=QListWidget.ExtendedSelection) #noinspection PyAttributeOutsideInit def add_hidden_attributes(self, parent): self.hidden_attributes_area = gui.widgetBox(parent, " Hidden attributes ") self.hidden_attributes_listbox = gui.listBox( self.hidden_attributes_area, self, "selected_hidden", "_hidden_attributes", dragDropCallback=self.trigger_attributes_changed, enableDragDrop=True, selectionMode=QListWidget.ExtendedSelection) def get_data_domain(self): if hasattr(self, "data") and self.data: return self.data.domain else: return None def trigger_attributes_changed(self): if not self.__attribute_selection_area_initialized: # Some components trigger this event during the initialization. # We ignore those requests, a separate event will be triggered # manually when everything is initialized. return self.attributes_changed() def closeContext(self): super().closeContext() self.data = None self.shown_attributes = None # "Events" def attributes_changed(self): pass
class OWDifference(widget.OWWidget): name = 'Difference' description = 'Make the time series stationary by replacing it with ' \ '1st or 2nd order discrete difference along its values. ' icon = 'icons/Difference.svg' priority = 570 keywords = ['difference', 'derivative', 'quotient', 'percent change'] class Inputs: time_series = Input("Time series", Table) class Outputs: time_series = Output("Time series", Timeseries) settingsHandler = DomainContextHandler() selected = ContextSetting([], schema_only=True) class Operation(str, Enum): DIFF = 'Difference' QUOT = 'Quotient' PERC = 'Percentage change' want_main_area = False resizing_enabled = False chosen_operation = settings.Setting(Operation.DIFF) diff_order = settings.Setting(1) shift_period = settings.Setting(1) invert_direction = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( 'Series can be differentiated up to the 2nd order. ' 'However, if the series is shifted by other than 1 ' 'step, a differencing order of 1 is always assumed.', 'diff-shift') ] def __init__(self): self.data = None box = gui.vBox(self.controlArea, 'Differencing') gui.comboBox(box, self, 'chosen_operation', orientation=Qt.Horizontal, items=[el.value for el in self.Operation], label='Compute:', callback=self.on_changed, sendSelectedValue=True) self.order_spin = gui.spin( box, self, 'diff_order', 1, 2, label='Differencing order:', callback=self.on_changed, tooltip='The value corresponds to n-th order numerical ' 'derivative of the series. \nThe order is fixed to 1 ' 'if the shift period is other than 1.') gui.spin(box, self, 'shift_period', 1, 100, label='Shift:', callback=self.on_changed, tooltip='Set this to other than 1 if you don\'t want to ' 'compute differences for subsequent values but for ' 'values shifted number of spaces apart. \n' 'If this value is different from 1, differencing ' 'order is fixed to 1.') gui.checkBox(box, self, 'invert_direction', label='Invert differencing direction', callback=self.on_changed, tooltip='Influences where the series is padded with nan ' 'values — at the beginning or at the end.') self.view = view = QListView(self, selectionMode=QListView.ExtendedSelection) self.model = model = VariableListModel(parent=self) view.setModel(model) view.selectionModel().selectionChanged.connect(self.on_changed) box.layout().addWidget(view) gui.auto_commit(box, self, 'autocommit', '&Apply') @Inputs.time_series def set_data(self, data): self.closeContext() self.data = data = None if data is None else Timeseries.from_data_table( data) if data is not None: self.model[:] = [ var for var in data.domain.variables if var.is_continuous and var is not data.time_variable ] self.select_default_variable() self.openContext(self.data) self._restore_selection() else: self.reset_model() self.on_changed() def _restore_selection(self): def restore(view, selection): with signal_blocking(view.selectionModel()): # gymnastics for transforming variable names back to indices var_list = [ var for var in self.data.domain.variables if var.is_continuous and var is not self.data.time_variable ] indices = [var_list.index(i) for i in selection] select_rows(view, indices) restore(self.view, self.selected) def select_default_variable(self): self.selected = [0] select_rows(self.view, self.selected) def reset_model(self): self.model.wrap([]) def on_changed(self): var_names = [ i.row() for i in self.view.selectionModel().selectedRows() ] self.order_spin.setEnabled( self.shift_period == 1 and self.chosen_operation == self.Operation.DIFF) self.selected = [self.model[v] for v in var_names] self.commit() def commit(self): data = self.data if not data or not len(self.selected): self.Outputs.time_series.send(None) return X = [] attrs = [] invert = self.invert_direction shift = self.shift_period order = self.diff_order op = self.chosen_operation for var in self.selected: col = np.ravel(data[:, var]) if invert: col = col[::-1] out = np.empty(len(col)) if op == self.Operation.DIFF and shift == 1: out[order:] = np.diff(col, order) out[:order] = np.nan else: if op == self.Operation.DIFF: out[shift:] = col[shift:] - col[:-shift] else: out[shift:] = np.divide(col[shift:], col[:-shift]) if op == self.Operation.PERC: out = (out - 1) * 100 out[:shift] = np.nan if invert: out = out[::-1] X.append(out) if op == self.Operation.DIFF and shift == 1: details = f'order={order}' else: details = f'shift={shift}' template = f'{var} ({op[:4].lower()}; {details})' name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) ts = Timeseries( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas) ts.time_variable = data.time_variable self.Outputs.time_series.send(ts)
class OWCreateClass(widget.OWWidget): name = "Create Class" description = "Create class attribute from a string attribute" icon = "icons/CreateClass.svg" category = "Data" keywords = [] class Inputs: data = Input("Data", Table) class Outputs: data = Output("Data", Table) want_main_area = False buttons_area_orientation = Qt.Vertical settingsHandler = DomainContextHandler() attribute = ContextSetting(None) class_name = ContextSetting("class") rules = ContextSetting({}) match_beginning = ContextSetting(False) case_sensitive = ContextSetting(False) TRANSFORMERS = { StringVariable: ValueFromStringSubstring, DiscreteVariable: ValueFromDiscreteSubstring } # Cached variables are used so that two instances of the widget with the # same settings will create the same variable. The usual `make` wouldn't # work here because variables with `compute_value` are not reused. cached_variables = {} class Warning(widget.OWWidget.Warning): no_nonnumeric_vars = Msg("Data contains only numeric variables.") class Error(widget.OWWidget.Error): class_name_duplicated = Msg("Class name duplicated.") class_name_empty = Msg("Class name should not be empty.") def __init__(self): super().__init__() self.data = None # The following lists are of the same length as self.active_rules #: list of pairs with counts of matches for each patter when the # patterns are applied in order and when applied on the entire set, # disregarding the preceding patterns self.match_counts = [] #: list of list of QLineEdit: line edit pairs for each pattern self.line_edits = [] #: list of QPushButton: list of remove buttons self.remove_buttons = [] #: list of list of QLabel: pairs of labels with counts self.counts = [] gui.lineEdit(self.controlArea, self, "class_name", orientation=Qt.Horizontal, box="New Class Name") variable_select_box = gui.vBox(self.controlArea, "Match by Substring") combo = gui.comboBox(variable_select_box, self, "attribute", label="From column:", orientation=Qt.Horizontal, searchable=True, callback=self.update_rules, model=DomainModel(valid_types=(StringVariable, DiscreteVariable))) # Don't use setSizePolicy keyword argument here: it applies to box, # not the combo combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Preferred) patternbox = gui.vBox(variable_select_box) #: QWidget: the box that contains the remove buttons, line edits and # count labels. The lines are added and removed dynamically. self.rules_box = rules_box = QGridLayout() rules_box.setSpacing(4) rules_box.setContentsMargins(4, 4, 4, 4) self.rules_box.setColumnMinimumWidth(1, 70) self.rules_box.setColumnMinimumWidth(0, 10) self.rules_box.setColumnStretch(0, 1) self.rules_box.setColumnStretch(1, 1) self.rules_box.setColumnStretch(2, 100) rules_box.addWidget(QLabel("Name"), 0, 1) rules_box.addWidget(QLabel("Substring"), 0, 2) rules_box.addWidget(QLabel("Count"), 0, 3, 1, 2) self.update_rules() widget = QWidget(patternbox) widget.setLayout(rules_box) patternbox.layout().addWidget(widget) box = gui.hBox(patternbox) gui.rubber(box) gui.button(box, self, "+", callback=self.add_row, autoDefault=False, width=34, sizePolicy=(QSizePolicy.Maximum, QSizePolicy.Maximum)) optionsbox = gui.vBox(self.controlArea, "Options") gui.checkBox(optionsbox, self, "match_beginning", "Match only at the beginning", callback=self.options_changed) gui.checkBox(optionsbox, self, "case_sensitive", "Case sensitive", callback=self.options_changed) gui.rubber(self.controlArea) gui.button(self.buttonsArea, self, "Apply", callback=self.apply) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) # TODO: Resizing upon changing the number of rules does not work self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) @property def active_rules(self): """ Returns the class names and patterns corresponding to the currently selected attribute. If the attribute is not yet in the dictionary, set the default. """ return self.rules.setdefault(self.attribute and self.attribute.name, [["", ""], ["", ""]]) def rules_to_edits(self): """Fill the line edites with the rules from the current settings.""" for editr, textr in zip(self.line_edits, self.active_rules): for edit, text in zip(editr, textr): edit.setText(text) @Inputs.data def set_data(self, data): """Input data signal handler.""" self.closeContext() self.rules = {} self.data = data summary = len(data) if data else self.info.NoInput details = format_summary_details(data) if data else "" self.info.set_input_summary(summary, details) model = self.controls.attribute.model() model.set_domain(data.domain if data is not None else None) self.Warning.no_nonnumeric_vars(shown=data is not None and not model) if not model: self.attribute = None self.Outputs.data.send(None) self.info.set_output_summary(self.info.NoOutput) return self.attribute = model[0] self.openContext(data) self.update_rules() self.apply() def update_rules(self): """Called when the rules are changed: adjust the number of lines in the form and fill them, update the counts. The widget does not have auto-apply.""" self.adjust_n_rule_rows() self.rules_to_edits() self.update_counts() # TODO: Indicator that changes need to be applied def options_changed(self): self.update_counts() def adjust_n_rule_rows(self): """Add or remove lines if needed and fix the tab order.""" def _add_line(): self.line_edits.append([]) n_lines = len(self.line_edits) for coli in range(1, 3): edit = QLineEdit() self.line_edits[-1].append(edit) self.rules_box.addWidget(edit, n_lines, coli) edit.textChanged.connect(self.sync_edit) button = gui.button(None, self, label='×', width=33, autoDefault=False, callback=self.remove_row, sizePolicy=(QSizePolicy.Maximum, QSizePolicy.Maximum)) self.remove_buttons.append(button) self.rules_box.addWidget(button, n_lines, 0) self.counts.append([]) for coli, kwargs in enumerate( (dict(), dict(styleSheet="color: gray"))): label = QLabel(alignment=Qt.AlignCenter, **kwargs) self.counts[-1].append(label) self.rules_box.addWidget(label, n_lines, 3 + coli) def _remove_line(): for edit in self.line_edits.pop(): edit.deleteLater() self.remove_buttons.pop().deleteLater() for label in self.counts.pop(): label.deleteLater() def _fix_tab_order(): prev = None for row, rule in zip(self.line_edits, self.active_rules): for col_idx, edit in enumerate(row): edit.row, edit.col_idx = rule, col_idx if prev is not None: self.setTabOrder(prev, edit) prev = edit n = len(self.active_rules) while n > len(self.line_edits): _add_line() while len(self.line_edits) > n: _remove_line() _fix_tab_order() def add_row(self): """Append a new row at the end.""" self.active_rules.append(["", ""]) self.adjust_n_rule_rows() self.update_counts() def remove_row(self): """Remove a row.""" remove_idx = self.remove_buttons.index(self.sender()) del self.active_rules[remove_idx] self.update_rules() self.update_counts() def sync_edit(self, text): """Handle changes in line edits: update the active rules and counts""" edit = self.sender() edit.row[edit.col_idx] = text self.update_counts() def class_labels(self): """Construct a list of class labels. Empty labels are replaced with C1, C2, C3. If C<n> already appears in the list of values given by the user, the labels start at C<n+1> instead. """ largest_c = max( (int(label[1:]) for label, _ in self.active_rules if re.match("^C\\d+", label)), default=0) class_count = count(largest_c + 1) return [ label_edit.text() or "C{}".format(next(class_count)) for label_edit, _ in self.line_edits ] def update_counts(self): """Recompute and update the counts of matches.""" def _matcher(strings, pattern): """Return indices of strings into patterns; consider case sensitivity and matching at the beginning. The given strings are assumed to be in lower case if match is case insensitive. Patterns are fixed on the fly.""" if not self.case_sensitive: pattern = pattern.lower() indices = np.char.find(strings, pattern.strip()) return indices == 0 if self.match_beginning else indices != -1 def _lower_if_needed(strings): return strings if self.case_sensitive else np.char.lower(strings) def _string_counts(): """ Generate pairs of arrays for each rule until running out of data instances. np.sum over the two arrays in each pair gives the number of matches of the remaining instances (considering the order of patterns) and of the original data. For _string_counts, the arrays contain bool masks referring to the original data """ nonlocal data data = data.astype(str) data = data[~np.char.equal(data, "")] data = _lower_if_needed(data) remaining = np.array(data) for _, pattern in self.active_rules: matching = _matcher(remaining, pattern) total_matching = _matcher(data, pattern) yield matching, total_matching remaining = remaining[~matching] if not remaining.size: break def _discrete_counts(): """ Generate pairs similar to _string_counts, except that the arrays contain bin counts for the attribute's values matching the pattern. """ attr_vals = np.array(attr.values) attr_vals = _lower_if_needed(attr_vals) bins = bincount(data, max_val=len(attr.values) - 1)[0] remaining = np.array(bins) for _, pattern in self.active_rules: matching = _matcher(attr_vals, pattern) yield remaining[matching], bins[matching] remaining[matching] = 0 if not np.any(remaining): break def _clear_labels(): """Clear all labels""" for lab_matched, lab_total in self.counts: lab_matched.setText("") lab_total.setText("") def _set_labels(): """Set the labels to show the counts""" for (n_matched, n_total), (lab_matched, lab_total), (lab, patt) in \ zip(self.match_counts, self.counts, self.active_rules): n_before = n_total - n_matched lab_matched.setText("{}".format(n_matched)) if n_before and (lab or patt): lab_total.setText("+ {}".format(n_before)) if n_matched: tip = "{} of the {} matching instances are already " \ "covered above".format(n_before, n_total) else: tip = "All matching instances are already covered above" lab_total.setToolTip(tip) lab_matched.setToolTip(tip) def _set_placeholders(): """Set placeholders for empty edit lines""" matches = [n for n, _ in self.match_counts] + \ [0] * len(self.line_edits) for n_matched, (_, patt) in zip(matches, self.line_edits): if not patt.text(): patt.setPlaceholderText( "(remaining instances)" if n_matched else "(unused)") labels = self.class_labels() for label, (lab_edit, _) in zip(labels, self.line_edits): if not lab_edit.text(): lab_edit.setPlaceholderText(label) _clear_labels() attr = self.attribute if attr is None: return counters = { StringVariable: _string_counts, DiscreteVariable: _discrete_counts } data = self.data.get_column_view(attr)[0] self.match_counts = [[int(np.sum(x)) for x in matches] for matches in counters[type(attr)]()] _set_labels() _set_placeholders() def apply(self): """Output the transformed data.""" self.Error.clear() self.class_name = self.class_name.strip() if not self.attribute: self.Outputs.data.send(None) self.info.set_output_summary(self.info.NoOutput) return domain = self.data.domain if not self.class_name: self.Error.class_name_empty() if self.class_name in domain: self.Error.class_name_duplicated() if not self.class_name or self.class_name in domain: self.Outputs.data.send(None) self.info.set_output_summary(self.info.NoOutput) return new_class = self._create_variable() new_domain = Domain(domain.attributes, new_class, domain.metas + domain.class_vars) new_data = self.data.transform(new_domain) summary = len(new_data) if new_data is not None else self.info.NoOutput details = format_summary_details( new_data) if new_data is not None else "" self.info.set_output_summary(summary, details) self.Outputs.data.send(new_data) def _create_variable(self): rules = self.active_rules # Transposition + stripping valid_rules = [ label or pattern or n_matches for (label, pattern), n_matches in zip(rules, self.match_counts) ] patterns = tuple(pattern for (_, pattern), valid in zip(rules, valid_rules) if valid) names = tuple(name for name, valid in zip(self.class_labels(), valid_rules) if valid) transformer = self.TRANSFORMERS[type(self.attribute)] # join patters with the same names names, map_values = unique_in_order_mapping(names) names = tuple(str(a) for a in names) map_values = tuple(map_values) var_key = (self.attribute, self.class_name, names, patterns, self.case_sensitive, self.match_beginning, map_values) if var_key in self.cached_variables: return self.cached_variables[var_key] compute_value = transformer(self.attribute, patterns, self.case_sensitive, self.match_beginning, map_values) new_var = DiscreteVariable(self.class_name, names, compute_value=compute_value) self.cached_variables[var_key] = new_var return new_var def send_report(self): # Pylint gives false positives: these functions are always called from # within the loop # pylint: disable=undefined-loop-variable def _cond_part(): rule = "<b>{}</b> ".format(class_name) if patt: rule += "if <b>{}</b> contains <b>{}</b>".format( self.attribute.name, patt) else: rule += "otherwise" return rule def _count_part(): if not n_matched: return "all {} matching instances are already covered " \ "above".format(n_total) elif n_matched < n_total and patt: return "{} matching instances (+ {} that are already " \ "covered above".format(n_matched, n_total - n_matched) else: return "{} matching instances".format(n_matched) if not self.attribute: return self.report_items("Input", [("Source attribute", self.attribute.name)]) output = "" names = self.class_labels() for (n_matched, n_total), class_name, (lab, patt) in \ zip(self.match_counts, names, self.active_rules): if lab or patt or n_total: output += "<li>{}; {}</li>".format(_cond_part(), _count_part()) if output: self.report_items("Output", [("Class name", self.class_name)]) self.report_raw("<ol>{}</ol>".format(output))
class OWPivot(OWWidget): name = "Pivot Table" description = "Reshape data table based on column values." icon = "icons/Pivot.svg" priority = 1000 keywords = ["pivot", "group", "aggregate"] class Inputs: data = Input("Data", Table, default=True) class Outputs: pivot_table = Output("Pivot Table", Table, default=True) filtered_data = Output("Filtered Data", Table) grouped_data = Output("Grouped Data", Table) class Warning(OWWidget.Warning): # TODO - inconsistent for different variable types no_col_feature = Msg("Column feature should be selected.") cannot_aggregate = Msg("Some aggregations ({}) cannot be computed.") renamed_vars = Msg("Some variables have been renamed in some tables" "to avoid duplicates.\n{}") too_many_values = Msg("Selected variable has too many values.") settingsHandler = DomainContextHandler() row_feature = ContextSetting(None) col_feature = ContextSetting(None) val_feature = ContextSetting(None) sel_agg_functions = Setting(set([Pivot.Count])) selection = Setting(set(), schema_only=True) auto_commit = Setting(True) AGGREGATIONS = (Pivot.Count, Pivot.Count_defined, None, # separator Pivot.Sum, Pivot.Mean, Pivot.Var, Pivot.Median, 2, # column break Pivot.Mode, Pivot.Min, Pivot.Max, None, Pivot.Majority) MAX_VALUES = 100 def __init__(self): super().__init__() self.data = None # type: Table self.pivot = None # type: Pivot self.__pending_selection = self.selection # type: Set self._add_control_area_controls() self._add_main_area_controls() def _add_control_area_controls(self): gui.comboBox(gui.vBox(self.controlArea, box="Rows"), self, "row_feature", contentsLength=14, searchable=True, model=DomainModel(valid_types=DomainModel.PRIMITIVE), callback=self.__feature_changed, orientation=Qt.Horizontal) gui.comboBox(gui.vBox(self.controlArea, box="Columns"), self, "col_feature", contentsLength=14, searchable=True, model=DomainModel(placeholder="(Same as rows)", valid_types=DiscreteVariable), callback=self.__feature_changed, orientation=Qt.Horizontal) gui.comboBox(gui.vBox(self.controlArea, box="Values"), self, "val_feature", contentsLength=14, searchable=True, model=DomainModel(placeholder="(None)"), callback=self.__val_feature_changed, orientation=Qt.Horizontal) self.__add_aggregation_controls() gui.rubber(self.controlArea) gui.auto_apply(self.buttonsArea, self, "auto_commit") self.set_input_summary() self.set_output_summary(None, None, None) def __add_aggregation_controls(self): def new_inbox(): nonlocal row, col, inbox inbox = QWidget() layout = QGridLayout() inbox.setLayout(layout) layout.setContentsMargins(0, 0, 0, 0) box.layout().addWidget(inbox) row = col = 0 box = gui.vBox(self.controlArea, "Aggregations") row = col = 0 inbox = None new_inbox() self.aggregation_checkboxes = [] # for test purposes for agg in self.AGGREGATIONS: if agg is None: line = QFrame() line.setFrameShape(QFrame.HLine) line.setLineWidth(1) line.setFrameShadow(QFrame.Sunken) box.layout().addWidget(line) new_inbox() continue elif agg == 2: col += 1 row = 0 continue check_box = QCheckBox(str(agg), inbox) check_box.setChecked(agg in self.sel_agg_functions) check_box.clicked.connect(lambda *args, a=agg: self.__aggregation_cb_clicked(a, args[0])) inbox.layout().addWidget(check_box, row, col) self.aggregation_checkboxes.append(check_box) row += 1 def _add_main_area_controls(self): self.table_view = PivotTableView() self.table_view.selection_changed.connect(self.__invalidate_filtered) self.mainArea.layout().addWidget(self.table_view) @property def no_col_feature(self): return self.col_feature is None and self.row_feature is not None \ and self.row_feature.is_continuous @property def skipped_aggs(self): def add(fun): data, var = self.data, self.val_feature return data and not var and fun not in Pivot.AutonomousFunctions \ or var and var.is_discrete and fun in Pivot.ContVarFunctions \ or var and var.is_continuous and fun in Pivot.DiscVarFunctions skipped = [str(fun) for fun in self.sel_agg_functions if add(fun)] return ", ".join(sorted(skipped)) def __feature_changed(self): self.selection = set() self.pivot = None self.commit() def __val_feature_changed(self): self.selection = set() if self.no_col_feature: return self.pivot.update_pivot_table(self.val_feature) self.commit() def __aggregation_cb_clicked(self, agg_fun: Pivot.Functions, checked: bool): self.selection = set() if checked: self.sel_agg_functions.add(agg_fun) else: self.sel_agg_functions.remove(agg_fun) if self.no_col_feature or not self.pivot or not self.data: return self.pivot.update_group_table(self.sel_agg_functions, self.val_feature) self.commit() def __invalidate_filtered(self): self.selection = self.table_view.get_selection() self.commit() @Inputs.data @check_sql_input def set_data(self, data): self.closeContext() self.selection = set() self.data = data self.pivot = None self.check_data() self.init_attr_values() self.openContext(self.data) self.unconditional_commit() def check_data(self): self.clear_messages() self.set_input_summary() def init_attr_values(self): domain = self.data.domain if self.data and len(self.data) else None for attr in ("row_feature", "col_feature", "val_feature"): getattr(self.controls, attr).model().set_domain(domain) setattr(self, attr, None) model = self.controls.row_feature.model() if model: self.row_feature = model[0] model = self.controls.val_feature.model() if model and len(model) > 2: self.val_feature = domain.variables[0] \ if domain.variables[0] in model else model[2] def commit(self): def send_outputs(pivot_table, filtered_data, grouped_data): self.Outputs.grouped_data.send(grouped_data) self.Outputs.pivot_table.send(pivot_table) self.Outputs.filtered_data.send(filtered_data) self.set_output_summary(pivot_table, filtered_data, grouped_data) self.Warning.renamed_vars.clear() self.Warning.too_many_values.clear() self.Warning.cannot_aggregate.clear() self.Warning.no_col_feature.clear() if self.pivot is None: if self.no_col_feature: self.table_view.clear() self.Warning.no_col_feature() send_outputs(None, None, None) return if self.data: col_var = self.col_feature or self.row_feature col = self.data.get_column_view(col_var)[0].astype(np.float) if len(nanunique(col)) >= self.MAX_VALUES: self.table_view.clear() self.Warning.too_many_values() send_outputs(None, None, None) return self.pivot = Pivot(self.data, self.sel_agg_functions, self.row_feature, self.col_feature, self.val_feature) if self.skipped_aggs: self.Warning.cannot_aggregate(self.skipped_aggs) self._update_graph() send_outputs(self.pivot.pivot_table, self.get_filtered_data(), self.pivot.group_table) if self.pivot.renamed: self.Warning.renamed_vars(self.pivot.renamed) def set_input_summary(self): summary = len(self.data) if self.data else self.info.NoInput details = format_summary_details(self.data) if self.data else "" self.info.set_input_summary(summary, details) def set_output_summary(self, pivot: Table, filtered: Table, grouped: Table): summary, detail, kwargs = self.info.NoOutput, "", {} if pivot or filtered or grouped: n_pivot = len(pivot) if pivot else 0 n_filtered = len(filtered) if filtered else 0 n_grouped = len(grouped) if grouped else 0 summary = f"{self.info.format_number(n_pivot)}, " \ f"{self.info.format_number(n_filtered)}, " \ f"{self.info.format_number(n_grouped)}" detail = format_multiple_summaries([ ("Pivot table", pivot), ("Filtered data", filtered), ("Grouped data", grouped) ]) kwargs = {"format": Qt.RichText} self.info.set_output_summary(summary, detail, **kwargs) def _update_graph(self): self.table_view.clear() if self.pivot.pivot_table: col_feature = self.col_feature or self.row_feature self.table_view.update_table(col_feature.name, self.row_feature.name, *self.pivot.pivot_tables) selection = self.__pending_selection or self.selection self.table_view.set_selection(selection) self.selection = self.table_view.get_selection() self.__pending_selection = set() def get_filtered_data(self): if not self.data or not self.selection or not self.pivot.pivot_table: return None cond = [] for i, j in self.selection: f = [] for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]), (self.col_feature, j)]: if isinstance(at, DiscreteVariable): f.append(FilterDiscrete(at, [val])) elif isinstance(at, ContinuousVariable): f.append(FilterContinuous(at, FilterContinuous.Equal, val)) cond.append(Values(f)) return Values(cond, conjunction=False)(self.data) @staticmethod def sizeHint(): return QSize(640, 525) def send_report(self): self.report_items(( ("Row feature", self.row_feature), ("Column feature", self.col_feature), ("Value feature", self.val_feature))) if self.data and self.val_feature is not None: self.report_table("", self.table_view) if not self.data: self.report_items((("Group by", self.row_feature),)) self.report_table(self.table_view)
class OWHyper(OWWidget): name = "HyperSpectra" class Inputs: data = Input("Data", Orange.data.Table, default=True) class Outputs: selected_data = Output("Selection", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) icon = "icons/hyper.svg" priority = 20 replaces = ["orangecontrib.infrared.widgets.owhyper.OWHyper"] settings_version = 3 settingsHandler = DomainContextHandler() imageplot = SettingProvider(ImagePlot) curveplot = SettingProvider(CurvePlotHyper) integration_method = Setting(0) integration_methods = Integrate.INTEGRALS value_type = Setting(0) attr_value = ContextSetting(None) lowlim = Setting(None) highlim = Setting(None) choose = Setting(None) class Warning(OWWidget.Warning): threshold_error = Msg("Low slider should be less than High") class Error(OWWidget.Warning): image_too_big = Msg("Image for chosen features is too big ({} x {}).") @classmethod def migrate_settings(cls, settings_, version): if version < 2: # delete the saved attr_value to prevent crashes try: del settings_["context_settings"][0].values["attr_value"] except: pass # migrate selection if version <= 2: try: current_context = settings_["context_settings"][0] selection = getattr(current_context, "selection", None) if selection is not None: selection = [(i, 1) for i in np.flatnonzero(np.array(selection))] settings_.setdefault( "imageplot", {})["selection_group_saved"] = selection except: pass def __init__(self): super().__init__() dbox = gui.widgetBox(self.controlArea, "Image values") rbox = gui.radioButtons(dbox, self, "value_type", callback=self._change_integration) gui.appendRadioButton(rbox, "From spectra") self.box_values_spectra = gui.indentedBox(rbox) gui.comboBox(self.box_values_spectra, self, "integration_method", valueType=int, items=(a.name for a in self.integration_methods), callback=self._change_integral_type) gui.rubber(self.controlArea) gui.appendRadioButton(rbox, "Use feature") self.box_values_feature = gui.indentedBox(rbox) self.feature_value_model = DomainModel( DomainModel.METAS | DomainModel.CLASSES, valid_types=DomainModel.PRIMITIVE) self.feature_value = gui.comboBox(self.box_values_feature, self, "attr_value", callback=self.update_feature_value, model=self.feature_value_model, sendSelectedValue=True, valueType=str) splitter = QSplitter(self) splitter.setOrientation(Qt.Vertical) self.imageplot = ImagePlot(self) self.imageplot.selection_changed.connect(self.image_selection_changed) self.curveplot = CurvePlotHyper(self, select=SELECTONE) self.curveplot.plot.vb.x_padding = 0.005 # pad view so that lines are not hidden splitter.addWidget(self.imageplot) splitter.addWidget(self.curveplot) self.mainArea.layout().addWidget(splitter) self.line1 = MovableVline(position=self.lowlim, label="", report=self.curveplot) self.line1.sigMoved.connect(lambda v: setattr(self, "lowlim", v)) self.line2 = MovableVline(position=self.highlim, label="", report=self.curveplot) self.line2.sigMoved.connect(lambda v: setattr(self, "highlim", v)) self.line3 = MovableVline(position=self.choose, label="", report=self.curveplot) self.line3.sigMoved.connect(lambda v: setattr(self, "choose", v)) for line in [self.line1, self.line2, self.line3]: line.sigMoveFinished.connect(self.changed_integral_range) self.curveplot.add_marking(line) line.hide() self.data = None self.disable_integral_range = False self.resize(900, 700) self.graph_name = "imageplot.plotview" self._update_integration_type() # prepare interface according to the new context self.contextAboutToBeOpened.connect( lambda x: self.init_interface_data(x[0])) def init_interface_data(self, data): same_domain = (self.data and data and data.domain == self.data.domain) if not same_domain: self.init_attr_values(data) def image_selection_changed(self): if not self.data: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) self.curveplot.set_data(None) return indices = np.flatnonzero(self.imageplot.selection_group) annotated_data = create_groups_table(self.data, self.imageplot.selection_group) if annotated_data is not None: annotated_data.X = self.data.X # workaround for Orange's copying on domain conversio self.Outputs.annotated_data.send(annotated_data) selected = self.data[indices] self.Outputs.selected_data.send(selected if selected else None) if selected: self.curveplot.set_data(selected) else: self.curveplot.set_data(self.data) def selection_changed(self): self.redraw_data() def init_attr_values(self, data): domain = data.domain if data is not None else None self.feature_value_model.set_domain(domain) self.attr_value = self.feature_value_model[ 0] if self.feature_value_model else None def redraw_data(self): self.imageplot.update_view() def update_feature_value(self): self.redraw_data() def _update_integration_type(self): self.line1.hide() self.line2.hide() self.line3.hide() if self.value_type == 0: self.box_values_spectra.setDisabled(False) self.box_values_feature.setDisabled(True) if self.integration_methods[ self.integration_method] != Integrate.PeakAt: self.line1.show() self.line2.show() else: self.line3.show() elif self.value_type == 1: self.box_values_spectra.setDisabled(True) self.box_values_feature.setDisabled(False) QTest.qWait(1) # first update the interface def _change_integration(self): # change what to show on the image self._update_integration_type() self.redraw_data() def changed_integral_range(self): if self.disable_integral_range: return self.redraw_data() def _change_integral_type(self): self._change_integration() @Inputs.data def set_data(self, data): self.closeContext() self.openContext(data) self.data = data self.imageplot.set_data(data) self.curveplot.set_data(data) self._init_integral_boundaries() self.imageplot.update_view() def _init_integral_boundaries(self): # requires data in curveplot self.disable_integral_range = True if self.curveplot.data_x is not None and len(self.curveplot.data_x): minx = self.curveplot.data_x[0] maxx = self.curveplot.data_x[-1] if self.lowlim is None or not minx <= self.lowlim <= maxx: self.lowlim = minx self.line1.setValue(self.lowlim) if self.highlim is None or not minx <= self.highlim <= maxx: self.highlim = maxx self.line2.setValue(self.highlim) if self.choose is None: self.choose = (minx + maxx) / 2 elif self.choose < minx: self.choose = minx elif self.choose > maxx: self.choose = maxx self.line3.setValue(self.choose) self.disable_integral_range = False
class OWMergeData(widget.OWWidget): name = "Merge Data" description = "Merge datasets based on the values of selected features." icon = "icons/MergeData.svg" priority = 1110 keywords = ["join"] class Inputs: data = Input("Data", Orange.data.Table, default=True, replaces=["Data A"]) extra_data = Input("Extra Data", Orange.data.Table, replaces=["Data B"]) class Outputs: data = Output( "Data", Orange.data.Table, replaces=["Merged Data A+B", "Merged Data B+A", "Merged Data"]) LeftJoin, InnerJoin, OuterJoin = range(3) OptionNames = ("Append columns from Extra data", "Find matching pairs of rows", "Concatenate tables") OptionDescriptions = ( "The first table may contain, for instance, city names,\n" "and the second would be a list of cities and their coordinates.\n" "Columns with coordinates would then be appended to the output.", "Input tables contain different features describing the same data " "instances.\n" "Output contains matched instances. Rows without matches are removed.", "Input tables contain different features describing the same data " "instances.\n" "Output contains all instances. Data from merged instances is " "merged into single rows.") UserAdviceMessages = [ widget.Message("Confused about merging options?\nSee the tooltips!", "merging_types") ] settingsHandler = MergeDataContextHandler() attr_pairs = ContextSetting(None, schema_only=True) merging = Setting(LeftJoin) auto_apply = Setting(True) settings_version = 2 want_main_area = False resizing_enabled = False class Warning(widget.OWWidget.Warning): renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") class Error(widget.OWWidget.Error): matching_numeric_with_nonnum = Msg( "Numeric and non-numeric columns ({} and {}) cannot be matched.") matching_index_with_sth = Msg("Row index cannot be matched with {}.") matching_id_with_sth = Msg("Instance cannot be matched with {}.") nonunique_left = Msg( "Some combinations of values on the left appear in multiple rows.\n" "For this type of merging, every possible combination of values " "on the left should appear at most once.") nonunique_right = Msg( "Some combinations of values on the right appear in multiple rows." "\n" "Every possible combination of values on the right should appear " "at most once.") def __init__(self): super().__init__() self.data = None self.extra_data = None content = [ INDEX, INSTANCEID, DomainModel.ATTRIBUTES, DomainModel.CLASSES, DomainModel.METAS ] self.model = DomainModelWithTooltips(content) self.extra_model = DomainModelWithTooltips(content) box = gui.hBox(self.controlArea, box=None) no_info = self.data_info(None) self.info_box_data = gui.label(box, self, no_info, box="Data") self.info_box_extra_data = gui.label(box, self, no_info, box="Extra Data") grp = gui.radioButtons(self.controlArea, self, "merging", box="Merging", btnLabels=self.OptionNames, tooltips=self.OptionDescriptions, callback=self.change_merging) grp.layout().setSpacing(8) self.attr_boxes = ConditionBox(self, self.model, self.extra_model, "", "matches") self.attr_boxes.add_row() box = gui.vBox(self.controlArea, box="Row matching") box.layout().addWidget(self.attr_boxes) gui.auto_apply(self.controlArea, self, box=False) # connect after wrapping self.commit with gui.auto_commit! self.attr_boxes.vars_changed.connect(self.commit) self.attr_boxes.vars_changed.connect(self.store_combo_state) self.settingsAboutToBePacked.connect(self.store_combo_state) def change_merging(self): self.commit() @Inputs.data @check_sql_input def set_data(self, data): self.data = data self.model.set_domain(data and data.domain) self.info_box_data.setText(self.data_info(data)) @Inputs.extra_data @check_sql_input def set_extra_data(self, data): self.extra_data = data self.extra_model.set_domain(data and data.domain) self.info_box_extra_data.setText(self.data_info(data)) def store_combo_state(self): self.attr_pairs = self.attr_boxes.current_state() def handleNewSignals(self): self.closeContext() self.attr_pairs = [self._find_best_match()] self.openContext(self.data and self.data.domain, self.extra_data and self.extra_data.domain) self.attr_boxes.set_state(self.attr_pairs) self.unconditional_commit() def _find_best_match(self): def get_unique_str_metas_names(model_): return [m for m in model_ if isinstance(m, StringVariable)] attr, extra_attr, n_max_intersect = INDEX, INDEX, 0 str_metas = get_unique_str_metas_names(self.model) extra_str_metas = get_unique_str_metas_names(self.extra_model) for m_a, m_b in product(str_metas, extra_str_metas): col = self.data[:, m_a].metas extra_col = self.extra_data[:, m_b].metas if col.size and extra_col.size \ and isinstance(col[0][0], str) \ and isinstance(extra_col[0][0], str): n_inter = len(np.intersect1d(col, extra_col)) if n_inter > n_max_intersect: n_max_intersect, attr, extra_attr = n_inter, m_a, m_b return attr, extra_attr @staticmethod def data_info(data): if data is None: return "No data." else: return \ f"{data.name}\n" \ f"{len(data)} instances\n" \ f"{len(data.domain) + len(data.domain.metas)} variables" def commit(self): self.clear_messages() merged = self.merge() if self.data and self.extra_data else None self.Outputs.data.send(merged) def send_report(self): # pylint: disable=invalid-sequence-index self.report_items( (("Merging", self.OptionNames[self.merging]), ("Match", ", ".join( f"{self._get_col_name(left)} with {self._get_col_name(right)}" for left, right in self.attr_boxes.current_state())))) def merge(self): # pylint: disable=invalid-sequence-index pairs = self.attr_boxes.current_state() if not self._check_pair_types(pairs): return None left_vars, right_vars = zip(*pairs) left_mask = np.full(len(self.data), True) left = np.vstack( tuple( self._values(self.data, var, left_mask) for var in left_vars)).T right_mask = np.full(len(self.extra_data), True) right = np.vstack( tuple( self._values(self.extra_data, var, right_mask) for var in right_vars)).T if not self._check_uniqueness(left, left_mask, right, right_mask): return None method = self._merge_methods[self.merging] lefti, righti, rightu = method(self, left, left_mask, right, right_mask) reduced_extra_data = \ self._compute_reduced_extra_data(right_vars, lefti, righti, rightu) return self._join_table_by_indices(reduced_extra_data, lefti, righti, rightu) def _check_pair_types(self, pairs): for left, right in pairs: if isinstance(left, ContinuousVariable) \ != isinstance(right, ContinuousVariable): self.Error.matching_numeric_with_nonnum(left, right) return False if INDEX in (left, right) and left != right: self.Error.matching_index_with_sth( self._get_col_name(({left, right} - {INDEX}).pop())) return False if INSTANCEID in (left, right) and left != right: self.Error.matching_id_with_sth( self._get_col_name(({left, right} - {INSTANCEID}).pop())) return False return True @staticmethod def _get_col_name(obj): return f"'{obj.name}'" if isinstance(obj, Variable) else obj.lower() def _check_uniqueness(self, left, left_mask, right, right_mask): ok = True masked_right = right[right_mask] if len(set(map(tuple, masked_right))) != len(masked_right): self.Error.nonunique_right() ok = False if self.merging != self.LeftJoin: masked_left = left[left_mask] if len(set(map(tuple, masked_left))) != len(masked_left): self.Error.nonunique_left() ok = False return ok def _compute_reduced_extra_data(self, right_match_vars, lefti, righti, rightu): """Prepare a table with extra columns that will appear in the merged table""" domain = self.data.domain extra_domain = self.extra_data.domain def var_needed(var): if rightu is not None and rightu.size: return True if var in right_match_vars and self.merging != self.OuterJoin: return False if var not in domain: return True both_defined = (lefti != -1) * (righti != -1) left_col = \ self.data.get_column_view(var)[0][lefti[both_defined]] right_col = \ self.extra_data.get_column_view(var)[0][righti[both_defined]] if var.is_primitive(): left_col = left_col.astype(float) right_col = right_col.astype(float) mask_left = np.isfinite(left_col) mask_right = np.isfinite(right_col) return not (np.all(mask_left == mask_right) and np.all( left_col[mask_left] == right_col[mask_right])) else: return not np.all(left_col == right_col) extra_vars = [ var for var in chain(extra_domain.variables, extra_domain.metas) if var_needed(var) ] return self.extra_data[:, extra_vars] @staticmethod def _values(data, var, mask): """Return an iterotor over keys for rows of the table.""" if var == INDEX: return np.arange(len(data)) if var == INSTANCEID: return np.fromiter((inst.id for inst in data), count=len(data), dtype=np.int) col = data.get_column_view(var)[0] if var.is_primitive(): col = col.astype(float, copy=False) nans = np.isnan(col) mask *= ~nans if var.is_discrete: col = col.astype(int) col[nans] = len(var.values) col = np.array(var.values + [np.nan])[col] else: col = col.copy() defined = col.astype(bool) mask *= defined col[~mask] = np.nan return col def _left_join_indices(self, left, left_mask, right, right_mask): """Compute a two-row array of indices: - the first row contains indices for the primary table, - the second row contains the matching rows in the extra table or -1""" data = self.data # Don't match nans. This is needed since numpy may change nan to string # nan, so nan's will match each other indices = np.arange(len(right)) indices[~right_mask] = -1 if right.shape[1] == 1: # The more common case can be handled faster right_map = dict(zip(right.flatten(), indices)) righti = (right_map.get(val, -1) for val in left.flatten()) else: right_map = dict(zip(map(tuple, right), indices)) righti = (right_map.get(tuple(val), -1) for val in left) righti = np.fromiter(righti, dtype=np.int64, count=len(data)) lefti = np.arange(len(data), dtype=np.int64) righti[lefti[~left_mask]] = -1 return lefti, righti, None def _inner_join_indices(self, left, left_mask, right, right_mask): """Use _augment_indices to compute the array of indices, then remove those with no match in the second table""" lefti, righti, _ = \ self._left_join_indices(left, left_mask, right, right_mask) mask = righti != [-1] return lefti[mask], righti[mask], None def _outer_join_indices(self, left, left_mask, right, right_mask): """Use _augment_indices to compute the array of indices, then add rows in the second table without a match in the first""" lefti, righti, _ = \ self._left_join_indices(left, left_mask, right, right_mask) unused = np.full(len(right), True) unused[righti] = False if len(right) - 1 not in righti: # righti can include -1, which sets the last element as used unused[-1] = True return lefti, righti, np.nonzero(unused)[0] _merge_methods = [ _left_join_indices, _inner_join_indices, _outer_join_indices ] def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu): """Join (horizontally) self.data and reduced_extra, taking the pairs of rows given in indices""" if not lefti.size: return None lt_dom = self.data.domain xt_dom = reduced_extra.domain domain = self._domain_rename_duplicates( lt_dom.attributes + xt_dom.attributes, lt_dom.class_vars + xt_dom.class_vars, lt_dom.metas + xt_dom.metas) X = self._join_array_by_indices(self.data.X, reduced_extra.X, lefti, righti) Y = self._join_array_by_indices(np.c_[self.data.Y], np.c_[reduced_extra.Y], lefti, righti) string_cols = [ i for i, var in enumerate(domain.metas) if var.is_string ] metas = self._join_array_by_indices(self.data.metas, reduced_extra.metas, lefti, righti, string_cols) if rightu is not None: # This domain is used for transforming the extra rows for outer join # It must use the original - not renamed - variables from right, so # values are copied, # but new domain for the left, so renamed values are *not* copied right_domain = Orange.data.Domain( domain.attributes[:len(lt_dom.attributes)] + xt_dom.attributes, domain.class_vars[:len(lt_dom.class_vars)] + xt_dom.class_vars, domain.metas[:len(lt_dom.metas)] + xt_dom.metas) extras = self.extra_data[rightu].transform(right_domain) X = np.vstack((X, extras.X)) extras_Y = extras.Y if extras_Y.ndim == 1: extras_Y = extras_Y.reshape(-1, 1) Y = np.vstack((Y, extras_Y)) metas = np.vstack((metas, extras.metas)) table = Orange.data.Table.from_numpy(domain, X, Y, metas) table.name = getattr(self.data, 'name', '') table.attributes = getattr(self.data, 'attributes', {}) if rightu is not None: table.ids = np.hstack((self.data.ids, self.extra_data.ids[rightu])) else: table.ids = self.data.ids[lefti] return table def _domain_rename_duplicates(self, attributes, class_vars, metas): """Check for duplicate variable names in domain. If any, rename the variables, by replacing them with new ones (names are appended a number). """ attrs, cvars, mets = [], [], [] n_attrs, n_cvars, n_metas = len(attributes), len(class_vars), len( metas) lists = [attrs] * n_attrs + [cvars] * n_cvars + [mets] * n_metas all_vars = attributes + class_vars + metas proposed_names = [m.name for m in all_vars] unique_names = get_unique_names_duplicates(proposed_names) duplicates = set() for p_name, u_name, var, c in zip(proposed_names, unique_names, all_vars, lists): if p_name != u_name: duplicates.add(p_name) var = var.copy(name=u_name) c.append(var) if duplicates: self.Warning.renamed_vars(", ".join(duplicates)) return Orange.data.Domain(attrs, cvars, mets) @staticmethod def _join_array_by_indices(left, right, lefti, righti, string_cols=None): """Join (horizontally) two arrays, taking pairs of rows given in indices """ def prepare(arr, inds, str_cols): try: newarr = arr[inds] except IndexError: newarr = np.full_like(arr, np.nan) else: empty = np.full(arr.shape[1], np.nan) if str_cols: assert arr.dtype == object empty = empty.astype(object) empty[str_cols] = '' newarr[inds == -1] = empty return newarr left_width = left.shape[1] str_left = [i for i in string_cols or () if i < left_width] str_right = [ i - left_width for i in string_cols or () if i >= left_width ] res = hstack((prepare(left, lefti, str_left), prepare(right, righti, str_right))) return res @staticmethod def migrate_settings(settings, version=None): def mig_value(x): if x == "Position (index)": return INDEX if x == "Source position (index)": return INSTANCEID return x if not version: operations = ("augment", "merge", "combine") oper = operations[settings["merging"]] settings["attr_pairs"] = ([ (mig_value(settings[f"attr_{oper}_data"]), mig_value(settings[f"attr_{oper}_extra"])) ]) for oper in operations: del settings[f"attr_{oper}_data"] del settings[f"attr_{oper}_extra"] if not version or version < 2 and "attr_pairs" in settings: attr_pairs = settings.pop("attr_pairs") attr_pairs = [ tuple((var, 100) if isinstance(var, str) else var for var in pair) for pair in attr_pairs ] context = ContextHandler().new_context() context.variables1 = \ dict(var for var, _ in attr_pairs if var[1] > 100) context.variables2 = \ dict(var for _, var in attr_pairs if var[1] > 100) context.values["attr_pairs"] = attr_pairs settings["context_settings"] = [context]
class OWAggregateColumns(widget.OWWidget): name = "Aggregate Columns" description = "Compute a sum, max, min ... of selected columns." icon = "icons/AggregateColumns.svg" priority = 100 keywords = ["aggregate", "sum", "product", "max", "min", "mean", "median", "variance"] class Inputs: data = Input("Data", Table, default=True) class Outputs: data = Output("Data", Table) want_main_area = False settingsHandler = DomainContextHandler() variables: List[Variable] = ContextSetting([]) operation = Setting("Sum") var_name = Setting("agg") auto_apply = Setting(True) Operations = {"Sum": np.nansum, "Product": np.nanprod, "Min": np.nanmin, "Max": np.nanmax, "Mean": np.nanmean, "Variance": np.nanvar, "Median": np.nanmedian} TimePreserving = ("Min", "Max", "Mean", "Median") def __init__(self): super().__init__() self.data = None box = gui.vBox(self.controlArea, box=True) self.variable_model = DomainModel( order=DomainModel.MIXED, valid_types=(ContinuousVariable, )) var_list = gui.listView( box, self, "variables", model=self.variable_model, callback=self.commit.deferred ) var_list.setSelectionMode(var_list.ExtendedSelection) combo = gui.comboBox( box, self, "operation", label="Operator: ", orientation=Qt.Horizontal, items=list(self.Operations), sendSelectedValue=True, callback=self.commit.deferred ) combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) gui.lineEdit( box, self, "var_name", label="Variable name: ", orientation=Qt.Horizontal, callback=self.commit.deferred ) gui.auto_apply(self.controlArea, self) @Inputs.data def set_data(self, data: Table = None): self.closeContext() self.variables.clear() self.data = data if self.data: self.variable_model.set_domain(data.domain) self.openContext(data) else: self.variable_model.set_domain(None) self.commit.now() @gui.deferred def commit(self): augmented = self._compute_data() self.Outputs.data.send(augmented) def _compute_data(self): if not self.data or not self.variables: return self.data new_col = self._compute_column() new_var = self._new_var() return self.data.add_column(new_var, new_col) def _compute_column(self): arr = np.empty((len(self.data), len(self.variables))) for i, var in enumerate(self.variables): arr[:, i] = self.data.get_column_view(var)[0].astype(float) func = self.Operations[self.operation] return func(arr, axis=1) def _new_var_name(self): return get_unique_names(self.data.domain, self.var_name) def _new_var(self): name = self._new_var_name() if self.operation in self.TimePreserving \ and all(isinstance(var, TimeVariable) for var in self.variables): return TimeVariable(name) return ContinuousVariable(name) def send_report(self): # fp for self.variables, pylint: disable=unsubscriptable-object if not self.data or not self.variables: return var_list = ", ".join(f"'{var.name}'" for var in self.variables[:31][:-1]) if len(self.variables) > 30: var_list += f" and {len(self.variables) - 30} others" else: var_list += f" and '{self.variables[-1].name}'" self.report_items(( ("Output:", f"'{self._new_var_name()}' as {self.operation.lower()} of {var_list}" ), ))
class OWSieveDiagram(OWWidget): name = "Sieve Diagram" description = "Visualize the observed and expected frequencies " \ "for a combination of values." icon = "icons/SieveDiagram.svg" priority = 200 keywords = [] class Inputs: data = Input("Data", Table, default=True) features = Input("Features", AttributeList) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) graph_name = "canvas" want_control_area = False settings_version = 1 settingsHandler = DomainContextHandler() attr_x = ContextSetting(None) attr_y = ContextSetting(None) selection = ContextSetting(set()) xy_changed_manually = Signal(Variable, Variable) def __init__(self): # pylint: disable=missing-docstring super().__init__() self.data = self.discrete_data = None self.attrs = [] self.input_features = None self.areas = [] self.selection = set() self.attr_box = gui.hBox(self.mainArea) self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE) combo_args = dict( widget=self.attr_box, master=self, contentsLength=12, searchable=True, sendSelectedValue=True, callback=self.attr_changed, model=self.domain_model) fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed) gui.comboBox(value="attr_x", **combo_args) gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size) gui.comboBox(value="attr_y", **combo_args) self.vizrank, self.vizrank_button = SieveRank.add_vizrank( self.attr_box, self, "Score Combinations", self.set_attr) self.vizrank_button.setSizePolicy(*fixed_size) self.canvas = QGraphicsScene(self) self.canvasView = ViewWithPress( self.canvas, self.mainArea, handler=self.reset_selection) self.mainArea.layout().addWidget(self.canvasView) self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) def sizeHint(self): return QSize(450, 550) def resizeEvent(self, event): super().resizeEvent(event) self.update_graph() def showEvent(self, event): super().showEvent(event) self.update_graph() @classmethod def migrate_context(cls, context, version): if not version: settings.rename_setting(context, "attrX", "attr_x") settings.rename_setting(context, "attrY", "attr_y") settings.migrate_str_to_variable(context) @Inputs.data def set_data(self, data): """ Discretize continuous attributes, and put all attributes and discrete metas into self.attrs. Select the first two attributes unless context overrides this. Method `resolve_shown_attributes` is called to use the attributes from the input, if it exists and matches the attributes in the data. Remove selection; again let the context override this. Initialize the vizrank dialog, but don't show it. Args: data (Table): input data """ if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.areas = [] self.selection = set() if self.data is None: self.attrs[:] = [] self.domain_model.set_domain(None) self.discrete_data = None else: self.domain_model.set_domain(data.domain) self.attrs = [x for x in self.domain_model if isinstance(x, Variable)] if self.attrs: self.attr_x = self.attrs[0] self.attr_y = self.attrs[len(self.attrs) > 1] else: self.attr_x = self.attr_y = None self.areas = [] self.selection = set() self.openContext(self.data) if self.data: self.discrete_data = self.sparse_to_dense(data, True) self.resolve_shown_attributes() self.update_graph() self.update_selection() self.vizrank.initialize() self.vizrank_button.setEnabled( self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) > 1 and not self.data.is_sparse()) def set_attr(self, attr_x, attr_y): self.attr_x, self.attr_y = attr_x, attr_y self.update_attr() def attr_changed(self): self.update_attr() self.xy_changed_manually.emit(self.attr_x, self.attr_y) def update_attr(self): """Update the graph and selection.""" self.selection = set() self.discrete_data = self.sparse_to_dense(self.data) self.update_graph() self.update_selection() def sparse_to_dense(self, data, init=False): """ Extracts two selected columns from sparse matrix. GH-2260 """ def discretizer(data): if any(attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas)): discretize = Discretize( method=EqualFreq(n=4), remove_const=False, discretize_classes=True, discretize_metas=True) return discretize(data).to_dense() return data if not data.is_sparse() and not init: return self.discrete_data if data.is_sparse(): attrs = {self.attr_x, self.attr_y} new_domain = data.domain.select_columns(attrs) data = Table.from_table(new_domain, data) return discretizer(data) @Inputs.features def set_input_features(self, attr_list): """ Handler for the Features signal. The method stores the attributes and calls `resolve_shown_attributes` Args: attr_list (AttributeList): data from the signal """ self.input_features = attr_list self.resolve_shown_attributes() self.update_selection() def resolve_shown_attributes(self): """ Use the attributes from the input signal if the signal is present and at least two attributes appear in the domain. If there are multiple, use the first two. Combos are disabled if inputs are used. """ self.warning() self.attr_box.setEnabled(True) self.vizrank.setEnabled(True) if not self.input_features: # None or empty return features = [f for f in self.input_features if f in self.domain_model] if not features: self.warning( "Features from the input signal are not present in the data") return old_attrs = self.attr_x, self.attr_y self.attr_x, self.attr_y = [f for f in (features * 2)[:2]] self.attr_box.setEnabled(False) self.vizrank.setEnabled(False) if (self.attr_x, self.attr_y) != old_attrs: self.selection = set() self.update_graph() def reset_selection(self): self.selection = set() self.update_selection() def select_area(self, area, event): """ Add or remove the clicked area from the selection Args: area (QRect): the area that is clicked event (QEvent): event description """ if event.button() != Qt.LeftButton: return index = self.areas.index(area) if event.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection() def update_selection(self): """ Update the graph (pen width) to show the current selection. Filter and output the data. """ if self.areas is None or not self.selection: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(create_annotated_table(self.data, [])) return filts = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair filts.append( filter.Values([ filter.FilterDiscrete(self.attr_x.name, [val_x]), filter.FilterDiscrete(self.attr_y.name, [val_y]) ])) else: width = 1 pen = area.pen() pen.setWidth(width) area.setPen(pen) if len(filts) == 1: filts = filts[0] else: filts = filter.Values(filts, conjunction=False) selection = filts(self.discrete_data) idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] if self.discrete_data is not self.data: selection = self.data[sel_idx] self.Outputs.selected_data.send(selection) self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx)) def update_graph(self): # Function uses weird names like r, g, b, but it does it with utmost # caution, hence # pylint: disable=invalid-name """Update the graph.""" def text(txt, *args, **kwargs): text = html_text = None if "max_width" in kwargs: text = txt else: html_text = to_html(txt) return CanvasText(self.canvas, text, html_text=html_text, *args, **kwargs) def width(txt): return text(txt, 0, 0, show=False).boundingRect().width() def height(txt): return text(txt, 0, 0, show=False).boundingRect().height() def fmt(val): return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val) def show_pearson(rect, pearson, pen_width): """ Color the given rectangle according to its corresponding standardized Pearson residual. Args: rect (QRect): the rectangle being drawn pearson (float): signed standardized pearson residual pen_width (int): pen width (bolder pen is used for selection) """ r = rect.rect() x, y, w, h = r.x(), r.y(), r.width(), r.height() if w == 0 or h == 0: return r = b = 255 if pearson > 0: r = g = max(255 - 20 * pearson, 55) elif pearson < 0: b = g = max(255 + 20 * pearson, 55) else: r = g = b = 224 rect.setBrush(QBrush(QColor(r, g, b))) pen_color = QColor(255 * (r == 255), 255 * (g == 255), 255 * (b == 255)) pen = QPen(pen_color, pen_width) rect.setPen(pen) if pearson > 0: pearson = min(pearson, 10) dist = 20 - 1.6 * pearson else: pearson = max(pearson, -10) dist = 20 - 8 * pearson pen.setWidth(1) def _offseted_line(ax, ay): r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), y + (ay or h)) self.canvas.addItem(r) r.setPen(pen) ax = dist while ax < w: _offseted_line(ax, 0) ax += dist ay = dist while ay < h: _offseted_line(0, ay) ay += dist def make_tooltip(): """Create the tooltip. The function uses local variables from the enclosing scope.""" # pylint: disable=undefined-loop-variable def _oper(attr, txt): if self.data.domain[attr.name] == ddomain[attr.name]: return " = " return " " if txt[0] in "<≥" else " in " xt, yt = ["<b>{attr}{eq}{val_name}</b>: {obs}/{n} ({p:.0f} %)".format( attr=to_html(attr.name), eq=_oper(attr, val_name), val_name=to_html(val_name), obs=fmt(prob * n), n=int(n), p=100 * prob) for attr, val_name, prob in [(attr_x, xval_name, chi.probs_x[x]), (attr_y, yval_name, chi.probs_y[y])]] ct = """<b>combination of values: </b><br/> expected {exp} ({p_exp:.0f} %)<br/> observed {obs} ({p_obs:.0f} %)""".format( exp=fmt(chi.expected[y, x]), p_exp=100 * chi.expected[y, x] / n, obs=fmt(chi.observed[y, x]), p_obs=100 * chi.observed[y, x] / n) return f"{xt}<br/>{yt}<hr/>{ct}" for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ self.attr_x is None or self.attr_y is None: return ddomain = self.discrete_data.domain attr_x, attr_y = self.attr_x, self.attr_y disc_x, disc_y = ddomain[attr_x.name], ddomain[attr_y.name] view = self.canvasView chi = ChiSqStats(self.discrete_data, disc_x, disc_y) max_ylabel_w = max((width(val) for val in disc_y.values), default=0) max_ylabel_w = min(max_ylabel_w, 200) x_off = height(attr_y.name) + max_ylabel_w y_off = 15 square_size = min(view.width() - x_off - 35, view.height() - y_off - 80) square_size = max(square_size, 10) self.canvasView.setSceneRect(0, 0, view.width(), view.height()) if not disc_x.values or not disc_y.values: text_ = "Features {} and {} have no values".format(disc_x, disc_y) \ if not disc_x.values and \ not disc_y.values and \ disc_x != disc_y \ else \ "Feature {} has no values".format( disc_x if not disc_x.values else disc_y) text(text_, view.width() / 2 + 70, view.height() / 2, Qt.AlignRight | Qt.AlignVCenter) return n = chi.n curr_x = x_off max_xlabel_h = 0 self.areas = [] for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)): if px == 0: continue width = square_size * px curr_y = y_off for y in range(len(chi.probs_y) - 1, -1, -1): # bottom-up order py = chi.probs_y[y] yval_name = disc_y.values[y] if py == 0: continue height = square_size * py selected = len(self.areas) in self.selection rect = CanvasRectangle( self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4, z=-10, onclick=self.select_area) rect.value_pair = x, y self.areas.append(rect) show_pearson(rect, chi.residuals[y, x], 3 * selected) rect.setToolTip(make_tooltip()) if x == 0: text(yval_name, x_off, curr_y + height / 2, Qt.AlignRight | Qt.AlignVCenter) curr_y += height xl = text(xval_name, curr_x + width / 2, y_off + square_size, Qt.AlignHCenter | Qt.AlignTop, max_width=width) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) curr_x += width bottom = y_off + square_size + max_xlabel_h text(attr_y.name, 0, y_off + square_size / 2, Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True) text(attr_x.name, x_off + square_size / 2, bottom, Qt.AlignHCenter | Qt.AlignTop, bold=True) bottom += 30 xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom) # Assume similar height for both lines text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height()) def get_widget_name_extension(self): if self.data is not None: return "{} vs {}".format(self.attr_x.name, self.attr_y.name) return None def send_report(self): self.report_plot()
class OWTreeGraph(OWTreeViewer2D): """Graphical visualization of tree models""" name = "Tree Viewer" icon = "icons/TreeViewer.svg" priority = 35 keywords = [] class Inputs: # Had different input names before merging from # Classification/Regression tree variants tree = Input("Tree", TreeModel, replaces=["Classification Tree", "Regression Tree"]) class Outputs: selected_data = Output("Selected Data", Table, default=True, id="selected-data") annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table, id="annotated-data") settingsHandler = ClassValuesContextHandler() target_class_index = ContextSetting(0) regression_colors = Setting(0) replaces = [ "Orange.widgets.classify.owclassificationtreegraph.OWClassificationTreeGraph", "Orange.widgets.classify.owregressiontreegraph.OWRegressionTreeGraph" ] COL_OPTIONS = ["Default", "Number of instances", "Mean value", "Variance"] COL_DEFAULT, COL_INSTANCE, COL_MEAN, COL_VARIANCE = range(4) def __init__(self): super().__init__() self.domain = None self.dataset = None self.clf_dataset = None self.tree_adapter = None self.color_label = QLabel("Target class: ") combo = self.color_combo = gui.OrangeComboBox() combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) combo.setSizeAdjustPolicy( QComboBox.AdjustToMinimumContentsLengthWithIcon) combo.setMinimumContentsLength(8) combo.activated[int].connect(self.color_changed) self.display_box.layout().addRow(self.color_label, combo) def set_node_info(self): """Set the content of the node""" for node in self.scene.nodes(): node.set_rect(QRectF()) self.update_node_info(node) w = max([n.rect().width() for n in self.scene.nodes()] + [0]) if w > self.max_node_width: w = self.max_node_width for node in self.scene.nodes(): rect = node.rect() node.set_rect(QRectF(rect.x(), rect.y(), w, rect.height())) self.scene.fix_pos(self.root_node, 10, 10) def _update_node_info_attr_name(self, node, text): attr = self.tree_adapter.attribute(node.node_inst) if attr is not None: text += "<hr/>{}".format(attr.name) return text def activate_loaded_settings(self): if not self.model: return super().activate_loaded_settings() if self.domain.class_var.is_discrete: self.color_combo.setCurrentIndex(self.target_class_index) self.toggle_node_color_cls() else: self.color_combo.setCurrentIndex(self.regression_colors) self.toggle_node_color_reg() self.set_node_info() def color_changed(self, i): if self.domain.class_var.is_discrete: self.target_class_index = i self.toggle_node_color_cls() self.set_node_info() else: self.regression_colors = i self.toggle_node_color_reg() def toggle_node_size(self): self.set_node_info() self.scene.update() self.scene_view.repaint() def toggle_color_cls(self): self.toggle_node_color_cls() self.set_node_info() self.scene.update() def toggle_color_reg(self): self.toggle_node_color_reg() self.set_node_info() self.scene.update() @Inputs.tree def ctree(self, model=None): """Input signal handler""" self.clear_scene() self.color_combo.clear() self.closeContext() self.model = model self.target_class_index = 0 if model is None: self.infolabel.setText('No tree.') self.root_node = None self.dataset = None self.tree_adapter = None else: self.tree_adapter = self._get_tree_adapter(model) self.domain = model.domain self.dataset = model.instances if self.dataset is not None and self.dataset.domain != self.domain: self.clf_dataset = self.dataset.transform(model.domain) else: self.clf_dataset = self.dataset class_var = self.domain.class_var if class_var.is_discrete: self.scene.colors = [QColor(*col) for col in class_var.colors] self.color_label.setText("Target class: ") self.color_combo.addItem("None") self.color_combo.addItems(self.domain.class_vars[0].values) self.color_combo.setCurrentIndex(self.target_class_index) else: self.scene.colors = \ ContinuousPaletteGenerator(*model.domain.class_var.colors) self.color_label.setText("Color by: ") self.color_combo.addItems(self.COL_OPTIONS) self.color_combo.setCurrentIndex(self.regression_colors) self.openContext(self.domain.class_var) # self.root_node = self.walkcreate(model.root, None) self.root_node = self.walkcreate(self.tree_adapter.root) self.infolabel.setText('{} nodes, {} leaves'.format( self.tree_adapter.num_nodes, len(self.tree_adapter.leaves(self.tree_adapter.root)))) self.setup_scene() self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, [])) def walkcreate(self, node, parent=None): """Create a structure of tree nodes from the given model""" node_obj = TreeNode(self.tree_adapter, node, parent) self.scene.addItem(node_obj) if parent: edge = GraphicsEdge(node1=parent, node2=node_obj) self.scene.addItem(edge) parent.graph_add_edge(edge) for child_inst in self.tree_adapter.children(node): if child_inst is not None: self.walkcreate(child_inst, node_obj) return node_obj def node_tooltip(self, node): return "<br>".join( to_html(str(rule)) for rule in self.tree_adapter.rules(node.node_inst)) def update_selection(self): if self.model is None: return nodes = [ item.node_inst for item in self.scene.selectedItems() if isinstance(item, TreeNode) ] data = self.tree_adapter.get_instances_in_nodes(nodes) self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, self.tree_adapter.get_indices(nodes))) def send_report(self): if not self.model: return items = [ ("Tree size", self.infolabel.text()), ( "Edge widths", ("Fixed", "Relative to root", "Relative to parent")[ # pylint: disable=invalid-sequence-index self.line_width_method]) ] if self.domain.class_var.is_discrete: items.append(("Target class", self.color_combo.currentText())) elif self.regression_colors != self.COL_DEFAULT: items.append( ("Color by", self.COL_OPTIONS[self.regression_colors])) self.report_items(items) self.report_plot(self.scene) def update_node_info(self, node): if self.domain.class_var.is_discrete: self.update_node_info_cls(node) else: self.update_node_info_reg(node) def update_node_info_cls(self, node): """Update the printed contents of the node for classification trees""" node_inst = node.node_inst distr = self.tree_adapter.get_distribution(node_inst)[0] total = self.tree_adapter.num_samples(node_inst) distr = distr / np.sum(distr) if self.target_class_index: tabs = distr[self.target_class_index - 1] text = "" else: modus = np.argmax(distr) tabs = distr[modus] text = f"{self.domain.class_vars[0].values[int(modus)]}<br/>" if tabs > 0.999: text += f"100%, {total}/{total}" else: text += f"{100 * tabs:2.1f}%, {int(total * tabs)}/{total}" text = self._update_node_info_attr_name(node, text) node.setHtml( f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>') def update_node_info_reg(self, node): """Update the printed contents of the node for regression trees""" node_inst = node.node_inst mean, var = self.tree_adapter.get_distribution(node_inst)[0] insts = self.tree_adapter.num_samples(node_inst) text = f"{mean:.1f} ± {var:.1f}<br/>" text += f"{insts} instances" text = self._update_node_info_attr_name(node, text) node.setHtml( f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>') def toggle_node_color_cls(self): """Update the node color for classification trees""" colors = self.scene.colors for node in self.scene.nodes(): distr = node.tree_adapter.get_distribution(node.node_inst)[0] total = sum(distr) if self.target_class_index: p = distr[self.target_class_index - 1] / total color = colors[self.target_class_index - 1].lighter(200 - 100 * p) else: modus = np.argmax(distr) p = distr[modus] / (total or 1) color = colors[int(modus)].lighter(300 - 200 * p) node.backgroundBrush = QBrush(color) self.scene.update() def toggle_node_color_reg(self): """Update the node color for regression trees""" def_color = QColor(192, 192, 255) if self.regression_colors == self.COL_DEFAULT: brush = QBrush(def_color.lighter(100)) for node in self.scene.nodes(): node.backgroundBrush = brush elif self.regression_colors == self.COL_INSTANCE: max_insts = len( self.tree_adapter.get_instances_in_nodes( [self.tree_adapter.root])) for node in self.scene.nodes(): node_insts = len( self.tree_adapter.get_instances_in_nodes([node.node_inst])) node.backgroundBrush = QBrush( def_color.lighter(120 - 20 * node_insts / max_insts)) elif self.regression_colors == self.COL_MEAN: minv = np.nanmin(self.dataset.Y) maxv = np.nanmax(self.dataset.Y) fact = 1 / (maxv - minv) if minv != maxv else 1 colors = self.scene.colors for node in self.scene.nodes(): node_mean = self.tree_adapter.get_distribution( node.node_inst)[0][0] node.backgroundBrush = QBrush(colors[fact * (node_mean - minv)]) else: nodes = list(self.scene.nodes()) variances = [ self.tree_adapter.get_distribution(node.node_inst)[0][1] for node in nodes ] max_var = max(variances) for node, var in zip(nodes, variances): node.backgroundBrush = QBrush( def_color.lighter(120 - 20 * var / max_var)) self.scene.update() def _get_tree_adapter(self, model): if isinstance(model, SklModel): return SklTreeAdapter(model) return TreeAdapter(model)
class OWRank(OWWidget): name = "Rank" description = "Rank and filter data features by their relevance." icon = "icons/Rank.svg" priority = 1102 keywords = [] buttons_area_orientation = Qt.Vertical class Inputs: data = Input("Data", Table) scorer = Input("Scorer", score.Scorer, multiple=True) class Outputs: reduced_data = Output("Reduced Data", Table, default=True) scores = Output("Scores", Table) features = Output("Features", AttributeList, dynamic=False) SelectNone, SelectAll, SelectManual, SelectNBest = range(4) nSelected = ContextSetting(5) auto_apply = Setting(True) sorting = Setting((0, Qt.DescendingOrder)) selected_methods = Setting(set()) settings_version = 2 settingsHandler = DomainContextHandler() selected_rows = ContextSetting([]) selectionMethod = ContextSetting(SelectNBest) class Information(OWWidget.Information): no_target_var = Msg("Data does not have a single target variable. " "You can still connect in unsupervised scorers " "such as PCA.") missings_imputed = Msg('Missing values will be imputed as needed.') class Error(OWWidget.Error): invalid_type = Msg("Cannot handle target variable type {}") inadequate_learner = Msg("Scorer {} inadequate: {}") no_attributes = Msg("Data does not have a single attribute.") def __init__(self): super().__init__() self.scorers = OrderedDict() self.out_domain_desc = None self.data = None self.problem_type_mode = ProblemType.CLASSIFICATION if not self.selected_methods: self.selected_methods = { method.name for method in SCORES if method.is_default } # GUI self.ranksModel = model = TableModel(parent=self) # type: TableModel self.ranksView = view = TableView(self) # type: TableView self.mainArea.layout().addWidget(view) view.setModel(model) view.setColumnWidth(0, 30) view.selectionModel().selectionChanged.connect(self.on_select) def _set_select_manual(): self.setSelectionMethod(OWRank.SelectManual) view.pressed.connect(_set_select_manual) view.verticalHeader().sectionClicked.connect(_set_select_manual) view.horizontalHeader().sectionClicked.connect(self.headerClick) self.measuresStack = stacked = QStackedWidget(self) self.controlArea.layout().addWidget(stacked) for scoring_methods in (CLS_SCORES, REG_SCORES, []): box = gui.vBox(None, "Scoring Methods" if scoring_methods else None) stacked.addWidget(box) for method in scoring_methods: box.layout().addWidget( QCheckBox( method.name, self, objectName=method. shortname, # To be easily found in tests checked=method.name in self.selected_methods, stateChanged=partial(self.methodSelectionChanged, method_name=method.name))) gui.rubber(box) gui.rubber(self.controlArea) self.switchProblemType(ProblemType.CLASSIFICATION) selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectionMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin( selMethBox, self, "nSelected", 1, 999, callback=lambda: self.setSelectionMethod(OWRank.SelectNBest)) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectionMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_send(selMethBox, self, "auto_apply", box=False) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) self.resize(690, 500) def switchProblemType(self, index): """ Switch between discrete/continuous/no_class mode """ self.measuresStack.setCurrentIndex(index) self.problem_type_mode = index @Inputs.data @check_sql_input def set_data(self, data): self.closeContext() self.selected_rows = [] self.ranksModel.clear() self.ranksModel.resetSorting(True) self.get_method_scores.cache_clear() # pylint: disable=no-member self.get_scorer_scores.cache_clear() # pylint: disable=no-member self.Error.clear() self.Information.clear() self.Information.missings_imputed( shown=data is not None and data.has_missing()) if data is not None and not data.domain.attributes: data = None self.Error.no_attributes() self.data = data self.switchProblemType(ProblemType.CLASSIFICATION) if self.data is not None: domain = self.data.domain self.info.set_input_summary(len(self.data), format_summary_details(self.data)) if domain.has_discrete_class: problem_type = ProblemType.CLASSIFICATION elif domain.has_continuous_class: problem_type = ProblemType.REGRESSION elif not domain.class_var: self.Information.no_target_var() problem_type = ProblemType.UNSUPERVISED else: # This can happen? self.Error.invalid_type(type(domain.class_var).__name__) problem_type = None if problem_type is not None: self.switchProblemType(problem_type) self.ranksModel.setVerticalHeaderLabels(domain.attributes) self.ranksView.setVHeaderFixedWidthFromLabel( max((a.name for a in domain.attributes), key=len)) self.selectionMethod = OWRank.SelectNBest else: self.info.set_input_summary(self.info.NoInput) self.openContext(data) self.selectButtons.button(self.selectionMethod).setChecked(True) def handleNewSignals(self): self.setStatusMessage('Running') self.updateScores() self.setStatusMessage('') self.on_select() @Inputs.scorer def set_learner(self, scorer, id): # pylint: disable=redefined-builtin if scorer is None: self.scorers.pop(id, None) else: # Avoid caching a (possibly stale) previous instance of the same # Scorer passed via the same signal if id in self.scorers: # pylint: disable=no-member self.get_scorer_scores.cache_clear() self.scorers[id] = ScoreMeta( scorer.name, scorer.name, scorer, ProblemType.from_variable(scorer.class_type), False) @memoize_method() def get_method_scores(self, method): # These errors often happen, but they result in nans, which # are handled correctly by the widget estimator = method.scorer() data = self.data try: scores = np.asarray(estimator(data)) except ValueError: try: scores = np.array( [estimator(data, attr) for attr in data.domain.attributes]) except ValueError: log.error("%s doesn't work on this data", method.name) scores = np.full(len(data.domain.attributes), np.nan) else: log.warning( "%s had to be computed separately for each " "variable", method.name) return scores @memoize_method() def get_scorer_scores(self, scorer): try: scores = scorer.scorer.score_data(self.data).T except (ValueError, TypeError): log.error("%s doesn't work on this data", scorer.name) scores = np.full((len(self.data.domain.attributes), 1), np.nan) labels = ((scorer.shortname, ) if scores.shape[1] == 1 else tuple( scorer.shortname + '_' + str(i) for i in range(1, 1 + scores.shape[1]))) return scores, labels def updateScores(self): if self.data is None: self.ranksModel.clear() self.Outputs.scores.send(None) return methods = [ method for method in SCORES if (method.name in self.selected_methods and method.problem_type == self.problem_type_mode and (not issparse(self.data.X) or method.scorer.supports_sparse_data)) ] scorers = [] self.Error.inadequate_learner.clear() for scorer in self.scorers.values(): if scorer.problem_type in (self.problem_type_mode, ProblemType.UNSUPERVISED): scorers.append(scorer) else: self.Error.inadequate_learner(scorer.name, scorer.learner_adequacy_err_msg) method_scores = tuple( self.get_method_scores(method) for method in methods) scorer_scores, scorer_labels = (), () if scorers: scorer_scores, scorer_labels = zip(*(self.get_scorer_scores(scorer) for scorer in scorers)) scorer_labels = tuple(chain.from_iterable(scorer_labels)) labels = tuple(method.shortname for method in methods) + scorer_labels model_array = np.column_stack(([ len(a.values) if a.is_discrete else np.nan for a in self.data.domain.attributes ], ) + (method_scores if method_scores else ()) + (scorer_scores if scorer_scores else ())) for column, values in enumerate(model_array.T): self.ranksModel.setExtremesFrom(column, values) self.ranksModel.wrap(model_array.tolist()) self.ranksModel.setHorizontalHeaderLabels(('#', ) + labels) self.ranksView.setColumnWidth(0, 40) # Re-apply sort try: sort_column, sort_order = self.sorting if sort_column < len(labels): # adds 1 for '#' (discrete count) column self.ranksModel.sort(sort_column + 1, sort_order) self.ranksView.horizontalHeader().setSortIndicator( sort_column + 1, sort_order) except ValueError: pass self.autoSelection() self.Outputs.scores.send(self.create_scores_table(labels)) def on_select(self): # Save indices of attributes in the original, unsorted domain self.selected_rows = list( self.ranksModel.mapToSourceRows([ i.row() for i in self.ranksView.selectionModel().selectedRows(0) ])) self.commit() def setSelectionMethod(self, method): self.selectionMethod = method self.selectButtons.button(method).setChecked(True) self.autoSelection() def autoSelection(self): selModel = self.ranksView.selectionModel() model = self.ranksModel rowCount = model.rowCount() columnCount = model.columnCount() if self.selectionMethod == OWRank.SelectNone: selection = QItemSelection() elif self.selectionMethod == OWRank.SelectAll: selection = QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1)) elif self.selectionMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QItemSelection( model.index(0, 0), model.index(nSelected - 1, columnCount - 1)) else: selection = QItemSelection() if self.selected_rows is not None: for row in model.mapFromSourceRows(self.selected_rows): selection.append( QItemSelectionRange(model.index(row, 0), model.index(row, columnCount - 1))) selModel.select(selection, QItemSelectionModel.ClearAndSelect) def headerClick(self, index): if index >= 1 and self.selectionMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states sort_order = self.ranksModel.sortOrder() sort_column = self.ranksModel.sortColumn( ) - 1 # -1 for '#' (discrete count) column self.sorting = (sort_column, sort_order) def methodSelectionChanged(self, state, method_name): if state == Qt.Checked: self.selected_methods.add(method_name) elif method_name in self.selected_methods: self.selected_methods.remove(method_name) self.updateScores() def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_table("Ranks", self.ranksView, num_format="{:.3f}") if self.out_domain_desc is not None: self.report_items("Output", self.out_domain_desc) def commit(self): selected_attrs = [] if self.data is not None: selected_attrs = [ self.data.domain.attributes[i] for i in self.selected_rows ] if not selected_attrs: self.Outputs.reduced_data.send(None) self.Outputs.features.send(None) self.out_domain_desc = None self.info.set_output_summary(self.info.NoOutput) else: reduced_domain = Domain(selected_attrs, self.data.domain.class_var, self.data.domain.metas) data = self.data.transform(reduced_domain) self.Outputs.reduced_data.send(data) self.Outputs.features.send(AttributeList(selected_attrs)) self.out_domain_desc = report.describe_domain(data.domain) self.info.set_output_summary(len(data), format_summary_details(data)) def create_scores_table(self, labels): model_list = self.ranksModel.tolist() if not model_list or len( model_list[0]) == 1: # Empty or just n_values column return None domain = Domain([ContinuousVariable(label) for label in labels], metas=[StringVariable("Feature")]) # Prevent np.inf scores finfo = np.finfo(np.float64) scores = np.clip(np.array(model_list)[:, 1:], finfo.min, finfo.max) feature_names = np.array([a.name for a in self.data.domain.attributes]) # Reshape to 2d array as Table does not like 1d arrays feature_names = feature_names[:, None] new_table = Table(domain, scores, metas=feature_names) new_table.name = "Feature Scores" return new_table @classmethod def migrate_settings(cls, settings, version): # If older settings, restore sort header to default # Saved selected_rows will likely be incorrect if version is None or version < 2: column, order = 0, Qt.DescendingOrder headerState = settings.pop("headerState", None) # Lacking knowledge of last problemType, use discrete ranks view's ordering if isinstance(headerState, (tuple, list)): headerState = headerState[0] if isinstance(headerState, bytes): hview = QHeaderView(Qt.Horizontal) hview.restoreState(headerState) column, order = hview.sortIndicatorSection( ) - 1, hview.sortIndicatorOrder() settings["sorting"] = (column, order) @classmethod def migrate_context(cls, context, version): if version is None or version < 2: # Old selection was saved as sorted indices. New selection is original indices. # Since we can't devise the latter without first computing the ranks, # just reset the selection to avoid confusion. context.values['selected_rows'] = []
class ImagePlot(QWidget, OWComponent, SelectionGroupMixin, ImageColorSettingMixin, ImageZoomMixin, ConcurrentMixin): attr_x = ContextSetting(None) attr_y = ContextSetting(None) gamma = Setting(0) selection_changed = Signal() image_updated = Signal() def __init__(self, parent): QWidget.__init__(self) OWComponent.__init__(self, parent) SelectionGroupMixin.__init__(self) ImageColorSettingMixin.__init__(self) ImageZoomMixin.__init__(self) ConcurrentMixin.__init__(self) self.parent = parent self.selection_type = SELECTMANY self.saving_enabled = True self.selection_enabled = True self.viewtype = INDIVIDUAL # required bt InteractiveViewBox self.highlighted = None self.data_points = None self.data_values = None self.data_imagepixels = None self.data_valid_positions = None self.plotview = pg.GraphicsLayoutWidget() self.plot = pg.PlotItem(background="w", viewBox=InteractiveViewBox(self)) self.plotview.addItem(self.plot) self.legend = ImageColorLegend() self.plotview.addItem(self.legend) self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) layout = QVBoxLayout() self.setLayout(layout) self.layout().setContentsMargins(0, 0, 0, 0) self.layout().addWidget(self.plotview) self.img = ImageItemNan() self.img.setOpts(axisOrder='row-major') self.plot.addItem(self.img) self.plot.vb.setAspectLocked() self.plot.scene().sigMouseMoved.connect(self.plot.vb.mouseMovedEvent) layout = QGridLayout() self.plotview.setLayout(layout) self.button = QPushButton("Menu", self.plotview) self.button.setAutoDefault(False) layout.setRowStretch(1, 1) layout.setColumnStretch(1, 1) layout.addWidget(self.button, 0, 0) view_menu = MenuFocus(self) self.button.setMenu(view_menu) # prepare interface according to the new context self.parent.contextAboutToBeOpened.connect( lambda x: self.init_interface_data(x[0])) actions = [] self.add_zoom_actions(view_menu) select_square = QAction( "Select (square)", self, triggered=self.plot.vb.set_mode_select_square, ) select_square.setShortcuts([Qt.Key_S]) select_square.setShortcutContext(Qt.WidgetWithChildrenShortcut) actions.append(select_square) select_polygon = QAction( "Select (polygon)", self, triggered=self.plot.vb.set_mode_select_polygon, ) select_polygon.setShortcuts([Qt.Key_P]) select_polygon.setShortcutContext(Qt.WidgetWithChildrenShortcut) actions.append(select_polygon) if self.saving_enabled: save_graph = QAction( "Save graph", self, triggered=self.save_graph, ) save_graph.setShortcuts( [QKeySequence(Qt.ControlModifier | Qt.Key_I)]) actions.append(save_graph) view_menu.addActions(actions) self.addActions(actions) common_options = dict(labelWidth=50, orientation=Qt.Horizontal, sendSelectedValue=True) choose_xy = QWidgetAction(self) box = gui.vBox(self) box.setFocusPolicy(Qt.TabFocus) self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES, valid_types=DomainModel.PRIMITIVE) self.cb_attr_x = gui.comboBox(box, self, "attr_x", label="Axis x:", callback=self.update_attr, model=self.xy_model, **common_options) self.cb_attr_y = gui.comboBox(box, self, "attr_y", label="Axis y:", callback=self.update_attr, model=self.xy_model, **common_options) box.setFocusProxy(self.cb_attr_x) box.layout().addWidget(self.color_settings_box()) choose_xy.setDefaultWidget(box) view_menu.addAction(choose_xy) self.lsx = None # info about the X axis self.lsy = None # info about the Y axis self.data = None self.data_ids = {} def init_interface_data(self, data): same_domain = (self.data and data and data.domain == self.data.domain) if not same_domain: self.init_attr_values(data) def help_event(self, ev): pos = self.plot.vb.mapSceneToView(ev.scenePos()) sel = self._points_at_pos(pos) prepared = [] if sel is not None: data, vals, points = self.data[sel], self.data_values[ sel], self.data_points[sel] for d, v, p in zip(data, vals, points): basic = "({}, {}): {}".format(p[0], p[1], v) variables = [ v for v in self.data.domain.metas + self.data.domain.class_vars if v not in [self.attr_x, self.attr_y] ] features = [ '{} = {}'.format(attr.name, d[attr]) for attr in variables ] prepared.append("\n".join([basic] + features)) text = "\n\n".join(prepared) if text: text = ('<span style="white-space:pre">{}</span>'.format( escape(text))) QToolTip.showText(ev.screenPos(), text, widget=self.plotview) return True else: return False def update_attr(self): self.update_view() def init_attr_values(self, data): domain = data.domain if data is not None else None self.xy_model.set_domain(domain) self.attr_x = self.xy_model[0] if self.xy_model else None self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \ else self.attr_x def save_graph(self): saveplot.save_plot(self.plotview, self.parent.graph_writers) def set_data(self, data): if data: self.data = data self.data_ids = {e: i for i, e in enumerate(data.ids)} self.restore_selection_settings() else: self.data = None self.data_ids = {} def refresh_img_selection(self): selected_px = np.zeros((self.lsy[2], self.lsx[2]), dtype=np.uint8) selected_px[self.data_imagepixels[self.data_valid_positions, 0], self.data_imagepixels[self.data_valid_positions, 1]] = \ self.selection_group[self.data_valid_positions] self.img.setSelection(selected_px) def make_selection(self, selected): """Add selected indices to the selection.""" add_to_group, add_group, remove = selection_modifiers() if self.data and self.lsx and self.lsy: if add_to_group: # both keys - need to test it before add_group selnum = np.max(self.selection_group) elif add_group: selnum = np.max(self.selection_group) + 1 elif remove: selnum = 0 else: self.selection_group *= 0 selnum = 1 if selected is not None: self.selection_group[selected] = selnum self.refresh_img_selection() self.prepare_settings_for_saving() self.selection_changed.emit() def select_square(self, p1, p2): """ Select elements within a square drawn by the user. A selection needs to contain whole pixels """ x1, y1 = p1.x(), p1.y() x2, y2 = p2.x(), p2.y() polygon = [ QPointF(x1, y1), QPointF(x2, y1), QPointF(x2, y2), QPointF(x1, y2), QPointF(x1, y1) ] self.select_polygon(polygon) def select_polygon(self, polygon): """ Select by a polygon which has to contain whole pixels. """ if self.data and self.lsx and self.lsy: polygon = [(p.x(), p.y()) for p in polygon] # a polygon should contain all pixel shiftx = _shift(self.lsx) shifty = _shift(self.lsy) points_edges = [ self.data_points + [[shiftx, shifty]], self.data_points + [[-shiftx, shifty]], self.data_points + [[shiftx, -shifty]], self.data_points + [[-shiftx, -shifty]] ] inp = in_polygon(points_edges[0], polygon) for p in points_edges[1:]: inp *= in_polygon(p, polygon) self.make_selection(inp) def _points_at_pos(self, pos): if self.data and self.lsx and self.lsy: x, y = pos.x(), pos.y() distance = np.abs(self.data_points - [[x, y]]) sel = (distance[:, 0] < _shift(self.lsx)) * (distance[:, 1] < _shift(self.lsy)) return sel def select_by_click(self, pos): sel = self._points_at_pos(pos) self.make_selection(sel) def update_view(self): self.cancel() self.parent.Error.image_too_big.clear() self.parent.Information.not_shown.clear() self.img.clear() self.img.setSelection(None) self.legend.set_colors(None) self.lsx = None self.lsy = None self.data_points = None self.data_values = None self.data_imagepixels = None self.data_valid_positions = None if self.data and self.attr_x and self.attr_y: self.start(self.compute_image, self.data, self.attr_x, self.attr_y, self.parent.integrate_fn()) else: self.image_updated.emit() @staticmethod def compute_image(data: Orange.data.Table, attr_x, attr_y, integrate_fn, state: TaskState): def progress_interrupt(i: float): if state.is_interruption_requested(): raise InterruptException class Result(): pass res = Result() xat = data.domain[attr_x] yat = data.domain[attr_y] ndom = Domain([xat, yat]) datam = data.transform(ndom) progress_interrupt(0) res.coorx = datam.X[:, 0] res.coory = datam.X[:, 1] res.data_points = datam.X res.lsx = lsx = values_to_linspace(res.coorx) res.lsy = lsy = values_to_linspace(res.coory) progress_interrupt(0) if lsx[-1] * lsy[-1] > IMAGE_TOO_BIG: raise ImageTooBigException((lsx[-1], lsy[-1])) # the code bellow does this, but part-wise: # d = integrate_fn(data).X[:, 0] parts = [] for slice in split_to_size(len(data), 10000): part = integrate_fn(data[slice]).X[:, 0] parts.append(part) progress_interrupt(0) d = np.concatenate(parts) res.d = d progress_interrupt(0) return res def on_done(self, res): self.lsx, self.lsy = res.lsx, res.lsy lsx, lsy = self.lsx, self.lsy d = res.d self.data_points = res.data_points xindex, xnan = index_values_nan(res.coorx, self.lsx) yindex, ynan = index_values_nan(res.coory, self.lsy) self.data_valid_positions = valid = np.logical_not( np.logical_or(xnan, ynan)) invalid_positions = len(d) - np.sum(valid) if invalid_positions: self.parent.Information.not_shown(invalid_positions) imdata = np.ones((lsy[2], lsx[2])) * float("nan") imdata[yindex[valid], xindex[valid]] = d[valid] self.data_values = d self.data_imagepixels = np.vstack((yindex, xindex)).T self.img.setImage(imdata, autoLevels=False) self.update_levels() self.update_color_schema() # shift centres of the pixels so that the axes are useful shiftx = _shift(lsx) shifty = _shift(lsy) left = lsx[0] - shiftx bottom = lsy[0] - shifty width = (lsx[1] - lsx[0]) + 2 * shiftx height = (lsy[1] - lsy[0]) + 2 * shifty self.img.setRect(QRectF(left, bottom, width, height)) self.refresh_img_selection() self.image_updated.emit() def on_partial_result(self, result): pass def on_exception(self, ex: Exception): if isinstance(ex, InterruptException): return if isinstance(ex, ImageTooBigException): self.parent.Error.image_too_big(ex.args[0][0], ex.args[0][1]) self.image_updated.emit() else: raise ex
class OWFeatureStatistics(widget.OWWidget): name = 'Feature Statistics' description = 'Show basic statistics for data features.' icon = 'icons/FeatureStatistics.svg' class Inputs: data = Input('Data', Table, default=True) class Outputs: reduced_data = Output('Reduced Data', Table, default=True) statistics = Output('Statistics', Table) want_main_area = True buttons_area_orientation = Qt.Vertical settingsHandler = DomainContextHandler() auto_commit = ContextSetting(True) color_var = ContextSetting(None) # type: Optional[Variable] # filter_string = ContextSetting('') sorting = ContextSetting((0, Qt.DescendingOrder)) selected_rows = ContextSetting([]) def __init__(self): super().__init__() self.data = None # type: Optional[Table] # Information panel info_box = gui.vBox(self.controlArea, 'Info') info_box.setMinimumWidth(200) self.info_summary = gui.widgetLabel(info_box, wordWrap=True) self.info_attr = gui.widgetLabel(info_box, wordWrap=True) self.info_class = gui.widgetLabel(info_box, wordWrap=True) self.info_meta = gui.widgetLabel(info_box, wordWrap=True) self.set_info() # TODO: Implement filtering on the model # filter_box = gui.vBox(self.controlArea, 'Filter') # self.filter_text = gui.lineEdit( # filter_box, self, value='filter_string', # placeholderText='Filter variables by name', # callback=self._filter_table_variables, callbackOnType=True, # ) # shortcut = QShortcut(QKeySequence('Ctrl+f'), self, self.filter_text.setFocus) # shortcut.setWhatsThis('Filter variables by name') self.color_var_model = DomainModel( valid_types=(ContinuousVariable, DiscreteVariable), placeholder='None', ) box = gui.vBox(self.controlArea, 'Histogram') self.cb_color_var = gui.comboBox( box, master=self, value='color_var', model=self.color_var_model, label='Color:', orientation=Qt.Horizontal, ) self.cb_color_var.activated.connect(self.__color_var_changed) gui.rubber(self.controlArea) gui.auto_commit( self.buttonsArea, self, 'auto_commit', 'Send Selected Rows', 'Send Automatically', ) # Main area self.model = FeatureStatisticsTableModel(parent=self) self.table_view = FeatureStatisticsTableView(self.model, parent=self) self.table_view.selectionModel().selectionChanged.connect( self.on_select) self.table_view.horizontalHeader().sectionClicked.connect( self.on_header_click) self.mainArea.layout().addWidget(self.table_view) def sizeHint(self): return QSize(1050, 500) def _filter_table_variables(self): regex = QRegExp(self.filter_string) # If the user explicitly types different cases, we assume they know # what they are searching for and account for letter case in filter different_case = (any(c.islower() for c in self.filter_string) and any(c.isupper() for c in self.filter_string)) if not different_case: regex.setCaseSensitivity(Qt.CaseInsensitive) @Inputs.data def set_data(self, data): # Clear outputs and reset widget state self.closeContext() self.selected_rows = [] self.model.resetSorting() self.Outputs.reduced_data.send(None) self.Outputs.statistics.send(None) # Setup widget state for new data and restore settings self.data = data if data is not None: self.color_var_model.set_domain(data.domain) self.color_var = None if self.data.domain.class_vars: self.color_var = self.data.domain.class_vars[0] else: self.color_var_model.set_domain(None) self.color_var = None self.model.set_data(data) self.openContext(self.data) self.__restore_selection() self.__restore_sorting() # self._filter_table_variables() self.__color_var_changed() self.set_info() self.commit() def __restore_selection(self): """Restore the selection on the table view from saved settings.""" selection_model = self.table_view.selectionModel() selection = QItemSelection() if len(self.selected_rows): for row in self.model.mapFromSourceRows(self.selected_rows): selection.append( QItemSelectionRange( self.model.index(row, 0), self.model.index(row, self.model.columnCount() - 1))) selection_model.select(selection, QItemSelectionModel.ClearAndSelect) def __restore_sorting(self): """Restore the sort column and order from saved settings.""" sort_column, sort_order = self.sorting if self.data is not None and sort_column < self.model.columnCount(): self.model.sort(sort_column, sort_order) self.table_view.horizontalHeader().setSortIndicator( sort_column, sort_order) @pyqtSlot(int) def on_header_click(self, *_): # Store the header states sort_order = self.model.sortOrder() sort_column = self.model.sortColumn() self.sorting = sort_column, sort_order @pyqtSlot(int) def __color_var_changed(self, *_): if self.model is not None: self.model.set_target_var(self.color_var) def _format_variables_string(self, variables): agg = [] for var_type_name, var_type in [('categorical', DiscreteVariable), ('numeric', ContinuousVariable), ('time', TimeVariable), ('string', StringVariable)]: # Disable pylint here because a `TimeVariable` is also a # `ContinuousVariable`, and should be labelled as such. That is why # it is necessary to check the type this way instead of using # `isinstance`, which would fail in the above case var_type_list = [v for v in variables if type(v) is var_type] # pylint: disable=unidiomatic-typecheck if var_type_list: shown = var_type in self.model.HIDDEN_VAR_TYPES agg.append(('%d %s%s' % (len(var_type_list), var_type_name, ['', ' (not shown)'][shown]), len(var_type_list))) if not agg: return 'No variables' attrs, counts = list(zip(*agg)) if len(attrs) > 1: var_string = ', '.join(attrs[:-1]) + ' and ' + attrs[-1] else: var_string = attrs[0] return plural('%s variable{s}' % var_string, sum(counts)) def set_info(self): if self.data is not None: self.info_summary.setText( '<b>%s</b> contains %s with %s' % (self.data.name, plural('{number} instance{s}', self.model.n_instances), plural('{number} feature{s}', self.model.n_attributes))) self.info_attr.setText( '<b>Attributes:</b><br>%s' % self._format_variables_string(self.data.domain.attributes)) self.info_class.setText( '<b>Class variables:</b><br>%s' % self._format_variables_string(self.data.domain.class_vars)) self.info_meta.setText( '<b>Metas:</b><br>%s' % self._format_variables_string(self.data.domain.metas)) else: self.info_summary.setText('No data on input.') self.info_attr.setText('') self.info_class.setText('') self.info_meta.setText('') def on_select(self): self.selected_rows = self.model.mapToSourceRows( [i.row() for i in self.table_view.selectionModel().selectedRows()]) self.commit() def commit(self): if not len(self.selected_rows): self.Outputs.reduced_data.send(None) self.Outputs.statistics.send(None) return # Send a table with only selected columns to output variables = self.model.variables[self.selected_rows] self.Outputs.reduced_data.send(self.data[:, variables]) # Send the statistics of the selected variables to ouput labels, data = self.model.get_statistics_matrix(variables, return_labels=True) var_names = np.atleast_2d([var.name for var in variables]).T domain = Domain( attributes=[ContinuousVariable(name) for name in labels], metas=[StringVariable('Feature')]) statistics = Table(domain, data, metas=var_names) statistics.name = '%s (Feature Statistics)' % self.data.name self.Outputs.statistics.send(statistics) def send_report(self): pass
class OWHyper(OWWidget): name = "HyperSpectra" class Inputs: data = Input("Data", Orange.data.Table, default=True) class Outputs: selected_data = Output("Selection", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) icon = "icons/hyper.svg" priority = 20 replaces = ["orangecontrib.infrared.widgets.owhyper.OWHyper"] settings_version = 4 settingsHandler = DomainContextHandler() imageplot = SettingProvider(ImagePlot) curveplot = SettingProvider(CurvePlotHyper) integration_method = Setting(0) integration_methods = Integrate.INTEGRALS value_type = Setting(0) attr_value = ContextSetting(None) lowlim = Setting(None) highlim = Setting(None) choose = Setting(None) graph_name = "imageplot.plotview" # defined so that the save button is shown class Warning(OWWidget.Warning): threshold_error = Msg("Low slider should be less than High") class Error(OWWidget.Error): image_too_big = Msg("Image for chosen features is too big ({} x {}).") class Information(OWWidget.Information): not_shown = Msg("Undefined positions: {} data point(s) are not shown.") @classmethod def migrate_settings(cls, settings_, version): if version < 2: # delete the saved attr_value to prevent crashes try: del settings_["context_settings"][0].values["attr_value"] except: # pylint: disable=bare-except pass # migrate selection if version <= 2: try: current_context = settings_["context_settings"][0] selection = getattr(current_context, "selection", None) if selection is not None: selection = [(i, 1) for i in np.flatnonzero(np.array(selection))] settings_.setdefault( "imageplot", {})["selection_group_saved"] = selection except: # pylint: disable=bare-except pass @classmethod def migrate_context(cls, context, version): if version <= 3 and "curveplot" in context.values: CurvePlot.migrate_context_sub_feature_color( context.values["curveplot"], version) def __init__(self): super().__init__() dbox = gui.widgetBox(self.controlArea, "Image values") rbox = gui.radioButtons(dbox, self, "value_type", callback=self._change_integration) gui.appendRadioButton(rbox, "From spectra") self.box_values_spectra = gui.indentedBox(rbox) gui.comboBox(self.box_values_spectra, self, "integration_method", items=(a.name for a in self.integration_methods), callback=self._change_integral_type) gui.rubber(self.controlArea) gui.appendRadioButton(rbox, "Use feature") self.box_values_feature = gui.indentedBox(rbox) self.feature_value_model = DomainModel( DomainModel.METAS | DomainModel.CLASSES, valid_types=DomainModel.PRIMITIVE) self.feature_value = gui.comboBox(self.box_values_feature, self, "attr_value", callback=self.update_feature_value, model=self.feature_value_model) splitter = QSplitter(self) splitter.setOrientation(Qt.Vertical) self.imageplot = ImagePlot(self) self.imageplot.selection_changed.connect(self.output_image_selection) self.curveplot = CurvePlotHyper(self, select=SELECTONE) self.curveplot.selection_changed.connect(self.redraw_integral_info) self.curveplot.plot.vb.x_padding = 0.005 # pad view so that lines are not hidden splitter.addWidget(self.imageplot) splitter.addWidget(self.curveplot) self.mainArea.layout().addWidget(splitter) self.line1 = MovableVline(position=self.lowlim, label="", report=self.curveplot) self.line1.sigMoved.connect(lambda v: setattr(self, "lowlim", v)) self.line2 = MovableVline(position=self.highlim, label="", report=self.curveplot) self.line2.sigMoved.connect(lambda v: setattr(self, "highlim", v)) self.line3 = MovableVline(position=self.choose, label="", report=self.curveplot) self.line3.sigMoved.connect(lambda v: setattr(self, "choose", v)) for line in [self.line1, self.line2, self.line3]: line.sigMoveFinished.connect(self.changed_integral_range) self.curveplot.add_marking(line) line.hide() self.markings_integral = [] self.data = None self.disable_integral_range = False self.resize(900, 700) self._update_integration_type() # prepare interface according to the new context self.contextAboutToBeOpened.connect( lambda x: self.init_interface_data(x[0])) def init_interface_data(self, data): same_domain = (self.data and data and data.domain == self.data.domain) if not same_domain: self.init_attr_values(data) def output_image_selection(self): if not self.data: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) self.curveplot.set_data(None) return indices = np.flatnonzero(self.imageplot.selection_group) annotated_data = groups_or_annotated_table( self.data, self.imageplot.selection_group) self.Outputs.annotated_data.send(annotated_data) selected = self.data[indices] self.Outputs.selected_data.send(selected if selected else None) if selected: self.curveplot.set_data(selected) else: self.curveplot.set_data(self.data) def init_attr_values(self, data): domain = data.domain if data is not None else None self.feature_value_model.set_domain(domain) self.attr_value = self.feature_value_model[ 0] if self.feature_value_model else None def redraw_integral_info(self): di = {} integrate = self.integrate_fn() if isinstance(integrate, Integrate) and np.any( self.curveplot.selection_group): # curveplot can have a subset of curves on the input> match IDs ind = np.flatnonzero(self.curveplot.selection_group)[0] dind = self.imageplot.data_ids[self.curveplot.data[ind].id] dshow = self.data[dind:dind + 1] datai = integrate(dshow) draw_info = datai.domain.attributes[0].compute_value.draw_info di = draw_info(dshow) self.refresh_markings(di) def refresh_markings(self, di): refresh_integral_markings([{ "draw": di }], self.markings_integral, self.curveplot) def integrate_fn(self): if self.value_type == 0: # integrals imethod = self.integration_methods[self.integration_method] if imethod != Integrate.PeakAt: return Integrate(methods=imethod, limits=[[self.lowlim, self.highlim]]) else: return Integrate(methods=imethod, limits=[[self.choose, self.choose]]) else: return lambda data, attr=self.attr_value: \ data.transform(Domain([data.domain[attr]])) def redraw_data(self): self.redraw_integral_info() self.imageplot.update_view() def update_feature_value(self): self.redraw_data() def _update_integration_type(self): self.line1.hide() self.line2.hide() self.line3.hide() if self.value_type == 0: self.box_values_spectra.setDisabled(False) self.box_values_feature.setDisabled(True) if self.integration_methods[ self.integration_method] != Integrate.PeakAt: self.line1.show() self.line2.show() else: self.line3.show() elif self.value_type == 1: self.box_values_spectra.setDisabled(True) self.box_values_feature.setDisabled(False) QTest.qWait(1) # first update the interface def _change_integration(self): # change what to show on the image self._update_integration_type() self.redraw_data() def changed_integral_range(self): if self.disable_integral_range: return self.redraw_data() def _change_integral_type(self): self._change_integration() @Inputs.data def set_data(self, data): self.closeContext() def valid_context(data): if data is None: return False annotation_features = [ v for v in data.domain.metas + data.domain.class_vars if isinstance(v, (DiscreteVariable, ContinuousVariable)) ] return len(annotation_features) >= 1 if valid_context(data): self.openContext(data) else: # to generate valid interface even if context was not loaded self.contextAboutToBeOpened.emit([data]) self.data = data self.imageplot.set_data(data) self.curveplot.set_data(data) self._init_integral_boundaries() self.imageplot.update_view() self.output_image_selection() def _init_integral_boundaries(self): # requires data in curveplot self.disable_integral_range = True if self.curveplot.data_x is not None and len(self.curveplot.data_x): minx = self.curveplot.data_x[0] maxx = self.curveplot.data_x[-1] else: minx = 0. maxx = 1. if self.lowlim is None or not minx <= self.lowlim <= maxx: self.lowlim = minx self.line1.setValue(self.lowlim) if self.highlim is None or not minx <= self.highlim <= maxx: self.highlim = maxx self.line2.setValue(self.highlim) if self.choose is None: self.choose = (minx + maxx) / 2 elif self.choose < minx: self.choose = minx elif self.choose > maxx: self.choose = maxx self.line3.setValue(self.choose) self.disable_integral_range = False def save_graph(self): self.imageplot.save_graph() def onDeleteWidget(self): self.curveplot.shutdown() self.imageplot.shutdown() super().onDeleteWidget()
class OWNomogram(OWWidget): name = "Nomogram" description = " Nomograms for Visualization of Naive Bayesian" \ " and Logistic Regression Classifiers." icon = "icons/Nomogram.svg" priority = 2000 keywords = [] class Inputs: classifier = Input("Classifier", Model) data = Input("Data", Table) class Outputs: features = Output("Features", AttributeList) MAX_N_ATTRS = 1000 POINT_SCALE = 0 ALIGN_LEFT = 0 ALIGN_ZERO = 1 ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier) settingsHandler = ClassValuesContextHandler() target_class_index = ContextSetting(0) normalize_probabilities = Setting(False) scale = Setting(1) display_index = Setting(1) n_attributes = Setting(10) sort_index = Setting(SortBy.ABSOLUTE) cont_feature_dim_index = Setting(0) graph_name = "scene" class Error(OWWidget.Error): invalid_classifier = Msg("Nomogram accepts only Naive Bayes and " "Logistic Regression classifiers.") def __init__(self): super().__init__() self.instances = None self.domain = None self.data = None self.classifier = None self.align = OWNomogram.ALIGN_ZERO self.log_odds_ratios = [] self.log_reg_coeffs = [] self.log_reg_coeffs_orig = [] self.log_reg_cont_data_extremes = [] self.p = None self.b0 = None self.points = [] self.feature_items = {} self.feature_marker_values = [] self.scale_marker_values = lambda x: x self.nomogram_main = None self.vertical_line = None self.hidden_vertical_line = None self.old_target_class_index = self.target_class_index self.repaint = False # GUI box = gui.vBox(self.controlArea, "Target class") self.class_combo = gui.comboBox(box, self, "target_class_index", callback=self._class_combo_changed, contentsLength=12) self.norm_check = gui.checkBox( box, self, "normalize_probabilities", "Normalize probabilities", hidden=True, callback=self.update_scene, tooltip="For multiclass data 1 vs. all probabilities do not" " sum to 1 and therefore could be normalized.") self.scale_radio = gui.radioButtons(self.controlArea, self, "scale", ["Point scale", "Log odds ratios"], box="Scale", callback=self.update_scene) box = gui.vBox(self.controlArea, "Display features") grid = QGridLayout() radio_group = gui.radioButtonsInBox(box, self, "display_index", [], orientation=grid, callback=self.update_scene) radio_all = gui.appendRadioButton(radio_group, "All", addToLayout=False) radio_best = gui.appendRadioButton(radio_group, "Best ranked:", addToLayout=False) spin_box = gui.hBox(None, margin=0) self.n_spin = gui.spin(spin_box, self, "n_attributes", 1, self.MAX_N_ATTRS, label=" ", controlWidth=60, callback=self._n_spin_changed) grid.addWidget(radio_all, 1, 1) grid.addWidget(radio_best, 2, 1) grid.addWidget(spin_box, 2, 2) self.sort_combo = gui.comboBox(box, self, "sort_index", label="Rank by:", items=SortBy.items(), orientation=Qt.Horizontal, callback=self.update_scene) self.cont_feature_dim_combo = gui.comboBox( box, self, "cont_feature_dim_index", label="Numeric features: ", items=["1D projection", "2D curve"], orientation=Qt.Horizontal, callback=self.update_scene) gui.rubber(self.controlArea) class _GraphicsView(QGraphicsView): def __init__(self, scene, parent, **kwargs): for k, v in dict( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff, viewportUpdateMode=QGraphicsView. BoundingRectViewportUpdate, renderHints=(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform), alignment=(Qt.AlignTop | Qt.AlignLeft), sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)).items(): kwargs.setdefault(k, v) super().__init__(scene, parent, **kwargs) class GraphicsView(_GraphicsView): def __init__(self, scene, parent): super().__init__( scene, parent, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, styleSheet='QGraphicsView {background: white}') self.viewport().setMinimumWidth( 300) # XXX: This prevents some tests failing self._is_resizing = False w = self def resizeEvent(self, resizeEvent): # Recompute main scene on window width change if resizeEvent.size().width() != resizeEvent.oldSize().width(): self._is_resizing = True self.w.update_scene() self._is_resizing = False return super().resizeEvent(resizeEvent) def is_resizing(self): return self._is_resizing def sizeHint(self): return QSize(400, 200) class FixedSizeGraphicsView(_GraphicsView): def __init__(self, scene, parent): super().__init__(scene, parent, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Minimum)) def sizeHint(self): return QSize(400, 85) scene = self.scene = QGraphicsScene(self) top_view = self.top_view = FixedSizeGraphicsView(scene, self) mid_view = self.view = GraphicsView(scene, self) bottom_view = self.bottom_view = FixedSizeGraphicsView(scene, self) for view in (top_view, mid_view, bottom_view): self.mainArea.layout().addWidget(view) def _class_combo_changed(self): with np.errstate(invalid='ignore'): coeffs = [ np.nan_to_num(p[self.target_class_index] / p[self.old_target_class_index]) for p in self.points ] points = [p[self.old_target_class_index] for p in self.points] self.feature_marker_values = [ self.get_points_from_coeffs(v, c, p) for (v, c, p) in zip(self.feature_marker_values, coeffs, points) ] self.feature_marker_values = np.asarray(self.feature_marker_values) self.update_scene() self.old_target_class_index = self.target_class_index def _n_spin_changed(self): self.display_index = 1 self.update_scene() def update_controls(self): self.class_combo.clear() self.norm_check.setHidden(True) self.cont_feature_dim_combo.setEnabled(True) if self.domain is not None: self.class_combo.addItems(self.domain.class_vars[0].values) if len(self.domain.attributes) > self.MAX_N_ATTRS: self.display_index = 1 if len(self.domain.class_vars[0].values) > 2: self.norm_check.setHidden(False) if not self.domain.has_continuous_attributes(): self.cont_feature_dim_combo.setEnabled(False) self.cont_feature_dim_index = 0 model = self.sort_combo.model() item = model.item(SortBy.POSITIVE) item.setFlags(item.flags() | Qt.ItemIsEnabled) item = model.item(SortBy.NEGATIVE) item.setFlags(item.flags() | Qt.ItemIsEnabled) self.align = OWNomogram.ALIGN_ZERO if self.classifier and isinstance(self.classifier, LogisticRegressionClassifier): self.align = OWNomogram.ALIGN_LEFT @Inputs.data def set_data(self, data): self.instances = data self.feature_marker_values = [] self.set_feature_marker_values() self.update_scene() @Inputs.classifier def set_classifier(self, classifier): self.closeContext() self.classifier = classifier self.Error.clear() if self.classifier and not isinstance(self.classifier, self.ACCEPTABLE): self.Error.invalid_classifier() self.classifier = None self.domain = self.classifier.domain if self.classifier else None self.data = None self.calculate_log_odds_ratios() self.calculate_log_reg_coefficients() self.update_controls() self.target_class_index = 0 self.openContext( self.domain.class_var if self.domain is not None else None) self.points = self.log_odds_ratios or self.log_reg_coeffs self.feature_marker_values = [] self.old_target_class_index = self.target_class_index self.update_scene() def calculate_log_odds_ratios(self): self.log_odds_ratios = [] self.p = None if self.classifier is None or self.domain is None: return if not isinstance(self.classifier, NaiveBayesModel): return log_cont_prob = self.classifier.log_cont_prob class_prob = self.classifier.class_prob for i in range(len(self.domain.attributes)): ca = np.exp(log_cont_prob[i]) * class_prob[:, None] _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None] self.log_odds_ratios.append(np.log(_or)) self.p = class_prob def calculate_log_reg_coefficients(self): self.log_reg_coeffs = [] self.log_reg_cont_data_extremes = [] self.b0 = None if self.classifier is None or self.domain is None: return if not isinstance(self.classifier, LogisticRegressionClassifier): return self.domain = self.reconstruct_domain(self.classifier.original_domain, self.domain) self.data = self.classifier.original_data.transform(self.domain) attrs, ranges, start = self.domain.attributes, [], 0 for attr in attrs: stop = start + len(attr.values) if attr.is_discrete else start + 1 ranges.append(slice(start, stop)) start = stop self.b0 = self.classifier.intercept coeffs = self.classifier.coefficients if len(self.domain.class_var.values) == 2: self.b0 = np.hstack((self.b0 * (-1), self.b0)) coeffs = np.vstack((coeffs * (-1), coeffs)) self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))] self.log_reg_coeffs_orig = self.log_reg_coeffs.copy() min_values = nanmin(self.data.X, axis=0) max_values = nanmax(self.data.X, axis=0) for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), min_values, max_values): if self.log_reg_coeffs[i].shape[1] == 1: coef = self.log_reg_coeffs[i] self.log_reg_coeffs[i] = np.hstack( (coef * min_t, coef * max_t)) self.log_reg_cont_data_extremes.append( [sorted([min_t, max_t], reverse=(c < 0)) for c in coef]) else: self.log_reg_cont_data_extremes.append([None]) def update_scene(self): self.clear_scene() if self.domain is None or not len(self.points[0]): self.Outputs.features.send(None) return n_attrs = self.n_attributes if self.display_index else int(1e10) attr_inds, attributes = zip(*self.get_ordered_attributes()[:n_attrs]) self.Outputs.features.send(AttributeList(attributes)) name_items = [QGraphicsTextItem(attr.name) for attr in attributes] point_text = QGraphicsTextItem("Points") probs_text = QGraphicsTextItem("Probabilities (%)") all_items = name_items + [point_text, probs_text] name_offset = -max(t.boundingRect().width() for t in all_items) - 10 w = self.view.viewport().rect().width() max_width = w + name_offset - 30 points = [self.points[i][self.target_class_index] for i in attr_inds] if self.align == OWNomogram.ALIGN_LEFT: points = [p - p.min() for p in points] max_ = np.nan_to_num(max(max(abs(p)) for p in points)) d = 100 / max_ if max_ else 1 minimums = [p[self.target_class_index].min() for p in self.points] if self.scale == OWNomogram.POINT_SCALE: points = [p * d for p in points] if self.align == OWNomogram.ALIGN_LEFT: self.scale_marker_values = lambda x: (x - minimums) * d else: self.scale_marker_values = lambda x: x * d else: if self.align == OWNomogram.ALIGN_LEFT: self.scale_marker_values = lambda x: x - minimums else: self.scale_marker_values = lambda x: x point_item, nomogram_head = self.create_main_nomogram( attributes, attr_inds, name_items, points, max_width, point_text, name_offset) probs_item, nomogram_foot = self.create_footer_nomogram( probs_text, d, minimums, max_width, name_offset) for item in self.feature_items.values(): item.dot.point_dot = point_item.dot item.dot.probs_dot = probs_item.dot item.dot.vertical_line = self.hidden_vertical_line self.nomogram = nomogram = NomogramItem() nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot]) self.scene.addItem(nomogram) self.set_feature_marker_values() rect = QRectF(self.scene.itemsBoundingRect().x(), self.scene.itemsBoundingRect().y(), self.scene.itemsBoundingRect().width(), self.nomogram.preferredSize().height()).adjusted( 10, 0, 20, 0) self.scene.setSceneRect(rect) # Clip top and bottom (60 and 150) parts from the main view self.view.setSceneRect(rect.x(), rect.y() + 80, rect.width() - 10, rect.height() - 160) self.view.viewport().setMaximumHeight(rect.height() - 160) # Clip main part from top/bottom views # below point values are imprecise (less/more than required) but this # is not a problem due to clipped scene content still being drawn self.top_view.setSceneRect(rect.x(), rect.y() + 3, rect.width() - 10, 20) self.bottom_view.setSceneRect(rect.x(), rect.height() - 110, rect.width() - 10, 30) def create_main_nomogram(self, attributes, attr_inds, name_items, points, max_width, point_text, name_offset): cls_index = self.target_class_index min_p = min(p.min() for p in points) max_p = max(p.max() for p in points) values = self.get_ruler_values(min_p, max_p, max_width) min_p, max_p = min(values), max(values) diff_ = np.nan_to_num(max_p - min_p) scale_x = max_width / diff_ if diff_ else max_width nomogram_header = NomogramItem() point_item = RulerItem(point_text, values, scale_x, name_offset, -scale_x * min_p) point_item.setPreferredSize(point_item.preferredWidth(), 35) nomogram_header.add_items([point_item]) self.nomogram_main = NomogramItem() cont_feature_item_class = ContinuousFeature2DItem if \ self.cont_feature_dim_index else ContinuousFeatureItem feature_items = [ DiscreteFeatureItem( name_item, attr.values, point, scale_x, name_offset, -scale_x * min_p) if attr.is_discrete else cont_feature_item_class( name_item, self.log_reg_cont_data_extremes[i][cls_index], self.get_ruler_values( point.min(), point.max(), scale_x * point.ptp(), False), scale_x, name_offset, -scale_x * min_p) for i, attr, name_item, point in zip( attr_inds, attributes, name_items, points) ] self.nomogram_main.add_items(feature_items) self.feature_items = OrderedDict(sorted(zip(attr_inds, feature_items))) x = -scale_x * min_p y = self.nomogram_main.layout().preferredHeight() + 10 self.vertical_line = QGraphicsLineItem(x, -6, x, y) self.vertical_line.setPen(QPen(Qt.DotLine)) self.vertical_line.setParentItem(point_item) self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y) pen = QPen(Qt.DashLine) pen.setBrush(QColor(Qt.red)) self.hidden_vertical_line.setPen(pen) self.hidden_vertical_line.setParentItem(point_item) return point_item, nomogram_header def get_ordered_attributes(self): """Return (in_domain_index, attr) pairs, ordered by method in SortBy combo""" if self.domain is None or not self.domain.attributes: return [] attrs = self.domain.attributes sort_by = self.sort_index class_value = self.target_class_index if sort_by == SortBy.NO_SORTING: return list(enumerate(attrs)) elif sort_by == SortBy.NAME: def key(x): _, attr = x return attr.name.lower() elif sort_by == SortBy.ABSOLUTE: def key(x): i, attr = x if attr.is_discrete: ptp = self.points[i][class_value].ptp() else: coef = np.abs( self.log_reg_coeffs_orig[i][class_value]).mean() ptp = coef * np.ptp( self.log_reg_cont_data_extremes[i][class_value]) return -ptp elif sort_by == SortBy.POSITIVE: def key(x): i, attr = x max_value = ( self.points[i][class_value].max() if attr.is_discrete else np.mean(self.log_reg_cont_data_extremes[i][class_value])) return -max_value elif sort_by == SortBy.NEGATIVE: def key(x): i, attr = x min_value = ( self.points[i][class_value].min() if attr.is_discrete else np.mean(self.log_reg_cont_data_extremes[i][class_value])) return min_value return sorted(enumerate(attrs), key=key) def create_footer_nomogram(self, probs_text, d, minimums, max_width, name_offset): # pylint: disable=invalid-unary-operand-type eps, d_ = 0.05, 1 k = -np.log(self.p / (1 - self.p)) if self.p is not None else -self.b0 min_sum = k[self.target_class_index] - np.log((1 - eps) / eps) max_sum = k[self.target_class_index] - np.log(eps / (1 - eps)) if self.align == OWNomogram.ALIGN_LEFT: max_sum = max_sum - sum(minimums) min_sum = min_sum - sum(minimums) for i in range(len(k)): # pylint: disable=consider-using-enumerate k[i] = k[i] - sum( [min(q) for q in [p[i] for p in self.points]]) if self.scale == OWNomogram.POINT_SCALE: min_sum *= d max_sum *= d d_ = d values = self.get_ruler_values(min_sum, max_sum, max_width) min_sum, max_sum = min(values), max(values) diff_ = np.nan_to_num(max_sum - min_sum) scale_x = max_width / diff_ if diff_ else max_width cls_var, cls_index = self.domain.class_var, self.target_class_index nomogram_footer = NomogramItem() def get_normalized_probabilities(val): if not self.normalize_probabilities: return 1 / (1 + np.exp(k[cls_index] - val / d_)) totals = self.__get_totals_for_class_values(minimums) p_sum = np.sum(1 / (1 + np.exp(k - totals / d_))) return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum def get_points(prob): if not self.normalize_probabilities: return (k[cls_index] - np.log(1 / prob - 1)) * d_ totals = self.__get_totals_for_class_values(minimums) p_sum = np.sum(1 / (1 + np.exp(k - totals / d_))) return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_ probs_item = ProbabilitiesRulerItem( probs_text, values, scale_x, name_offset, -scale_x * min_sum, get_points=get_points, title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]), get_probabilities=get_normalized_probabilities) nomogram_footer.add_items([probs_item]) return probs_item, nomogram_footer def __get_totals_for_class_values(self, minimums): cls_index = self.target_class_index marker_values = self.scale_marker_values(self.feature_marker_values) totals = np.full(len(self.domain.class_var.values), np.nan) totals[cls_index] = marker_values.sum() for i in range(len(self.domain.class_var.values)): if i == cls_index: continue coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points] points = [p[cls_index] for p in self.points] total = sum([ self.get_points_from_coeffs(v, c, p) for (v, c, p) in zip(self.feature_marker_values, coeffs, points) ]) if self.align == OWNomogram.ALIGN_LEFT: points = [p - m for m, p in zip(minimums, points)] total -= sum([min(p) for p in [p[i] for p in self.points]]) d = 100 / max(max(abs(p)) for p in points) if self.scale == OWNomogram.POINT_SCALE: total *= d totals[i] = total assert not np.any(np.isnan(totals)) return totals def set_feature_marker_values(self): if not (len(self.points) and len(self.feature_items)): return if not len(self.feature_marker_values): self._init_feature_marker_values() marker_values = self.scale_marker_values(self.feature_marker_values) invisible_sum = 0 for i, marker in enumerate(marker_values): try: item = self.feature_items[i] except KeyError: invisible_sum += marker else: item.dot.move_to_val(marker) item.dot.probs_dot.move_to_sum(invisible_sum) def _init_feature_marker_values(self): self.feature_marker_values = [] cls_index = self.target_class_index instances = Table(self.domain, self.instances) \ if self.instances else None values = [] for i, attr in enumerate(self.domain.attributes): value, feature_val = 0, None if len(self.log_reg_coeffs): if attr.is_discrete: ind, n = unique(self.data.X[:, i], return_counts=True) feature_val = np.nan_to_num(ind[np.argmax(n)]) else: feature_val = nanmean(self.data.X[:, i]) # If data is provided on a separate signal, use the first data # instance to position the points instead of the mean inst_in_dom = instances and attr in instances.domain if inst_in_dom and not np.isnan(instances[0][attr]): feature_val = instances[0][attr] if feature_val is not None: value = (self.points[i][cls_index][int(feature_val)] if attr.is_discrete else self.log_reg_coeffs_orig[i][cls_index][0] * feature_val) values.append(value) self.feature_marker_values = np.asarray(values) def clear_scene(self): self.feature_items = {} self.scale_marker_values = lambda x: x self.nomogram = None self.nomogram_main = None self.vertical_line = None self.hidden_vertical_line = None self.scene.clear() def send_report(self): self.report_plot() @staticmethod def reconstruct_domain(original, preprocessed): # abuse dict to make "in" comparisons faster attrs = OrderedDict() for attr in preprocessed.attributes: cv = attr._compute_value.variable._compute_value var = cv.variable if cv else original[attr.name] if var in attrs: # the reason for OrderedDict continue attrs[var] = None # we only need keys attrs = list(attrs.keys()) return Domain(attrs, original.class_var, original.metas) @staticmethod def get_ruler_values(start, stop, max_width, round_to_nearest=True): if max_width == 0: return [0] diff = np.nan_to_num((stop - start) / max_width) if diff <= 0: return [0] decimals = int(np.floor(np.log10(diff))) if diff > 4 * pow(10, decimals): step = 5 * pow(10, decimals + 2) elif diff > 2 * pow(10, decimals): step = 2 * pow(10, decimals + 2) elif diff > 1 * pow(10, decimals): step = 1 * pow(10, decimals + 2) else: step = 5 * pow(10, decimals + 1) round_by = int(-np.floor(np.log10(step))) r = start % step if not round_to_nearest: _range = np.arange(start + step, stop + r, step) - r start, stop = np.floor(start * 100) / 100, np.ceil( stop * 100) / 100 return np.round(np.hstack((start, _range, stop)), 2) return np.round(np.arange(start, stop + r + step, step) - r, round_by) @staticmethod def get_points_from_coeffs(current_value, coefficients, possible_values): if np.isnan(possible_values).any(): return 0 # pylint: disable=undefined-loop-variable indices = np.argsort(possible_values) sorted_values = possible_values[indices] sorted_coefficients = coefficients[indices] for i, val in enumerate(sorted_values): if current_value < val: break diff = sorted_values[i] - sorted_values[i - 1] k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \ (sorted_values[i] - sorted_values[i - 1]) return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \ sorted_coefficients[i] * sorted_values[i] * (1 - k) def reset_settings(self): self._reset_settings() self.update_scene()
class OWDistanceMatrix(widget.OWWidget): name = "Distance Matrix" description = "View distance matrix." icon = "icons/DistanceMatrix.svg" priority = 200 class Inputs: distances = Input("Distances", DistMatrix) class Outputs: distances = Output("Distances", DistMatrix, dynamic=False) table = Output("Table", Table) settingsHandler = DistanceMatrixContextHandler() auto_commit = Setting(True) annotation_idx = ContextSetting(1) selection = ContextSetting([]) want_control_area = False def __init__(self): super().__init__() self.distances = None self.items = None self.tablemodel = DistanceMatrixModel() view = self.tableview = QTableView() view.setEditTriggers(QTableView.NoEditTriggers) view.setItemDelegate(TableBorderItem()) view.setModel(self.tablemodel) view.setShowGrid(False) for header in (view.horizontalHeader(), view.verticalHeader()): header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setHighlightSections(True) header.setSectionsClickable(False) view.verticalHeader().setDefaultAlignment( Qt.AlignRight | Qt.AlignVCenter) selmodel = SymmetricSelectionModel(view.model(), view) view.setSelectionModel(selmodel) view.setSelectionBehavior(QTableView.SelectItems) self.mainArea.layout().addWidget(view) settings_box = gui.hBox(self.mainArea) self.annot_combo = gui.comboBox( settings_box, self, "annotation_idx", label="Labels: ", orientation=Qt.Horizontal, callback=self._invalidate_annotations, contentsLength=12) self.annot_combo.setModel(VariableListModel()) self.annot_combo.model()[:] = ["None", "Enumeration"] gui.rubber(settings_box) settings_box.layout().addWidget(self.report_button) gui.separator(settings_box, 40) acb = gui.auto_commit(settings_box, self, "auto_commit", "Send Selected", "Send Automatically", box=None) acb.setFixedWidth(200) # Signal must be connected after self.commit is redirected selmodel.selectionChanged.connect(self.commit) def sizeHint(self): return QSize(800, 500) @Inputs.distances def set_distances(self, distances): self.closeContext() self.distances = distances self.tablemodel.set_data(self.distances) self.selection = [] self.tableview.selectionModel().set_selected_items([]) self.items = items = distances is not None and distances.row_items annotations = ["None", "Enumerate"] self.annotation_idx = 1 if items and not distances.axis: annotations.append("Attribute names") self.annotation_idx = 2 elif isinstance(items, list) and \ all(isinstance(item, Variable) for item in items): annotations.append("Name") self.annotation_idx = 2 elif isinstance(items, Table): annotations.extend( itertools.chain(items.domain, items.domain.metas)) if items.domain.class_var: self.annotation_idx = 2 + len(items.domain.attributes) self.annot_combo.model()[:] = annotations if items: self.openContext(distances, annotations) self._update_labels() self.tableview.resizeColumnsToContents() self.commit() def _invalidate_annotations(self): if self.distances is not None: self._update_labels() def _update_labels(self): var = column = None if self.annotation_idx == 0: labels = None elif self.annotation_idx == 1: labels = [str(i + 1) for i in range(self.distances.shape[0])] elif self.annot_combo.model()[self.annotation_idx] == "Attribute names": attr = self.distances.row_items.domain.attributes labels = [str(attr[i]) for i in range(self.distances.shape[0])] elif self.annotation_idx == 2 and \ isinstance(self.items, widget.AttributeList): labels = [v.name for v in self.items] elif isinstance(self.items, Table): var = self.annot_combo.model()[self.annotation_idx] column, _ = self.items.get_column_view(var) labels = [var.str_val(value) for value in column] saved_selection = self.tableview.selectionModel().selected_items() self.tablemodel.set_labels(labels, var, column) if labels: self.tableview.horizontalHeader().show() self.tableview.verticalHeader().show() else: self.tableview.horizontalHeader().hide() self.tableview.verticalHeader().hide() self.tableview.resizeColumnsToContents() self.tableview.selectionModel().set_selected_items(saved_selection) def commit(self): sub_table = sub_distances = None if self.distances is not None: inds = self.tableview.selectionModel().selected_items() if inds: sub_distances = self.distances.submatrix(inds) if self.distances.axis and isinstance(self.items, Table): sub_table = self.items[inds] self.Outputs.distances.send(sub_distances) self.Outputs.table.send(sub_table) def send_report(self): if self.distances is None: return model = self.tablemodel dim = self.distances.shape[0] col_cell = model.color_for_cell def _rgb(brush): return "rgb({}, {}, {})".format(*brush.color().getRgb()) if model.labels: col_label = model.color_for_label label_colors = [_rgb(col_label(i)) for i in range(dim)] self.report_raw('<table style="border-collapse:collapse">') self.report_raw("<tr><td></td>") self.report_raw("".join( '<td style="background-color: {}">{}</td>'.format(*cv) for cv in zip(label_colors, model.labels))) self.report_raw("</tr>") for i in range(dim): self.report_raw("<tr>") self.report_raw( '<td style="background-color: {}">{}</td>'. format(label_colors[i], model.labels[i])) self.report_raw( "".join( '<td style="background-color: {};' 'border-top:1px solid {}; border-left:1px solid {};">' '{:.3f}</td>'.format( _rgb(col_cell(i, j)), label_colors[i], label_colors[j], self.distances[i, j]) for j in range(dim))) self.report_raw("</tr>") self.report_raw("</table>") else: self.report_raw('<table>') for i in range(dim): self.report_raw( "<tr>" + "".join('<td style="background-color: {}">{:.3f}</td>'. format(_rgb(col_cell(i, j)), self.distances[i, j]) for j in range(dim)) + "</tr>") self.report_raw("</table>")
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "File" id = "orange.widgets.data.file" description = "Read data from an input file or network " \ "and send a data table to the output." icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["file", "load", "read", "open"] class Outputs: data = Output("Data", Table, doc="Attribute-valued dataset read from the input file.") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") missing_reader = widget.Msg("Missing reader.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox( None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True, ) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) box = gui.vBox(self.controlArea, "Info") self.info = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation datasets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) def sizeHint(self): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.info.setText("No data.") def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() assert self.reader is not None except Exception: return self.Error.missing_reader try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.info.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: path = self.last_path() if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url) def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) def _describe(self, table): domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = "<b>{}</b>".format(descs[0]) if descs: text += "<p>{}</p>".format("<br/>".join(descs)) text += "<p>{} instance(s), {} feature(s), {} meta attribute(s)".\ format(len(table), len(domain.attributes), len(domain.metas)) if domain.has_continuous_class: text += "<br/>Regression; numerical class." elif domain.has_discrete_class: text += "<br/>Classification; categorical class with {} values.".\ format(len(domain.class_var.values)) elif table.domain.class_vars: text += "<br/>Multi-target; {} target variables.".format( len(table.domain.class_vars)) else: text += "<br/>Data has no target variable." text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += '<p>First entry: {}<br/>Last entry: {}</p>'.format( table[0, 'Timestamp'], table[-1, 'Timestamp']) return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def apply_domain_edit(self): if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += " ({})".format(self.sheet_combo.currentText()) self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) def dragEnterEvent(self, event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path( OSX_NSURL_toLocalFile(urls[0]) or urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data()
class OWAggregate(widget.OWWidget): name = 'Aggregate' description = "Aggregate data in bins by second, minute, hour, day, " \ "week, month, or year." icon = 'icons/Aggregate.svg' priority = 560 class Inputs: time_series = Input("Time series", Table) class Outputs: time_series = Output("Time series", Timeseries) settingsHandler = DomainContextHandler() variables = ContextSetting([]) agg_funcs = ContextSetting([]) want_main_area = False agg_interval = settings.Setting('day') autocommit = settings.Setting(False) AGG_TIME = OrderedDict(( ('second', lambda date: date.replace(microsecond=0)), ('minute', lambda date: date.replace(second=0, microsecond=0)), ('hour', lambda date: date.replace(minute=0, second=0, microsecond=0)), ('day', lambda date: date.replace(hour=0, minute=0, second=0, microsecond=0)), ('week', lambda date: date.strptime(date.strftime('%Y-W%W-0'), '%Y-W%W-%w') ), # Doesn't work for years before 1000 ('month', lambda date: date.replace( day=1, hour=0, minute=0, second=0, microsecond=0)), ('year', lambda date: date.replace( month=1, day=1, hour=0, minute=0, second=0, microsecond=0)), )) class Error(widget.OWWidget.Error): no_time_variable = widget.Msg( 'Aggregation currently requires a time series with a time variable.' ) def __init__(self): self.data = None gui.comboBox(self.controlArea, self, 'agg_interval', label='Aggregate by:', items=tuple(self.AGG_TIME.keys()), sendSelectedValue=True, orientation=Qt.Horizontal, callback=lambda: self.commit) self.model = model = PyTableModel(parent=self, editable=[False, True]) model.setHorizontalHeaderLabels(['Attribute', 'Aggregation function']) class TableView(gui.TableView): def __init__(self, parent): super().__init__( parent, editTriggers=(self.SelectedClicked | self.CurrentChanged | self.DoubleClicked | self.EditKeyPressed), ) self.horizontalHeader().setStretchLastSection(False) self.setItemDelegateForColumn(1, self.ComboDelegate(self)) class _ItemDelegate(QStyledItemDelegate): def updateEditorGeometry(self, widget, option, _index): widget.setGeometry(option.rect) class ComboDelegate(_ItemDelegate): def __init__(self, parent): super().__init__(parent) self._parent = parent self._combo_continuous_model = ListModel(AGG_FUNCTIONS, parent=self) self._combo_discrete_model = ListModel([Mode], parent=self) self._combo_string_model = ListModel([Concatenate], parent=self) def createEditor(self, parent, _QStyleOptionViewItem, index): combo = QComboBox(parent) attr = index.model()[index.row()][0] combo.setModel( self._combo_continuous_model if attr. is_continuous else self._combo_discrete_model if attr. is_discrete else self._combo_string_model) return combo def setEditorData(self, combo, index): var = index.model().data(index, Qt.EditRole) combo.setCurrentIndex(combo.model().indexOf(var)) def setModelData(self, combo, model, index): func = combo.model()[combo.currentIndex()] model.setData(index, func, Qt.EditRole) view = TableView(self) view.setModel(model) self.settingsAboutToBePacked.connect(self.pack_settings) self.controlArea.layout().addWidget(view) gui.auto_commit(self.controlArea, self, 'autocommit', '&Apply') @Inputs.time_series def set_data(self, data): self.pack_settings() self.closeContext() self.Error.clear() data = None if data is None else Timeseries.from_data_table(data) if data is not None and not isinstance(data.time_variable, TimeVariable): self.Error.no_time_variable() data = None self.data = data if data is None: self.model.clear() self.commit() return self.set_default(self.data) self.openContext(self.data) self.unpack_settings() self.commit() def set_default(self, data): self.variables = [ attr for attr in chain(data.domain.variables, data.domain.metas) if attr != data.time_variable ] self.agg_funcs = [ AGG_FUNCTIONS[0] if attr.is_continuous else Mode if attr.is_discrete else Concatenate if attr.is_string else None for attr in self.variables ] def pack_settings(self): self.variables = [i[0] for i in self.model.tolist()] self.agg_funcs = [i[1] for i in self.model.tolist()] def unpack_settings(self): self.model[:] = [[var, func] for var, func in zip(self.variables, self.agg_funcs)] def commit(self): data = self.data if not data: self.Outputs.time_series.send(None) return # Group-by expects data sorted sorted_indices = np.argsort(data.time_values) if not np.all(sorted_indices == np.arange(len(data))): data = Timeseries.from_data_table( Table.from_table_rows(data, sorted_indices)) attrs, cvars, metas = [], [], [] for attr, _ in self.model: if attr in data.domain.attributes: attrs.append(attr) elif attr in data.domain.class_vars: cvars.append(attr) else: metas.append(attr) aggregate_time = self.AGG_TIME[self.agg_interval] def time_key(i): return timestamp( aggregate_time( fromtimestamp(data.time_values[i], tz=data.time_variable.timezone))) times = [] X, Y, M = [], [], [] for key_time, indices in groupby(np.arange(len(data)), key=time_key): times.append(key_time) subset = data[list(indices)] xs, ys, ms = [], [], [] for attr, func in self.model: values = Table.from_table( Domain([], [], [attr], source=data.domain), subset).metas out = (xs if attr in data.domain.attributes else ys if attr in data.domain.class_vars else ms) out.append(func(values)) X.append(xs) Y.append(ys) M.append(ms) ts = Timeseries(Domain([data.time_variable] + attrs, cvars, metas), np.column_stack((times, np.row_stack(X))), np.array(Y), np.array(np.row_stack(M), dtype=object)) self.Outputs.time_series.send(ts)
class OWFeatureConstructor(OWWidget): name = "Feature Constructor" description = "Construct new features (data columns) from a set of " \ "existing features in the input dataset." icon = "icons/FeatureConstructor.svg" keywords = ['function', 'lambda'] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) want_main_area = False settingsHandler = FeatureConstructorHandler() descriptors = ContextSetting([]) currentIndex = ContextSetting(-1) expressions_with_values = ContextSetting(False) settings_version = 2 EDITORS = [(ContinuousDescriptor, ContinuousFeatureEditor), (DateTimeDescriptor, DateTimeFeatureEditor), (DiscreteDescriptor, DiscreteFeatureEditor), (StringDescriptor, StringFeatureEditor)] class Error(OWWidget.Error): more_values_needed = Msg("Categorical feature {} needs more values.") invalid_expressions = Msg("Invalid expressions: {}.") class Warning(OWWidget.Warning): renamed_var = Msg("Recently added variable has been renamed, " "to avoid duplicates.\n") def __init__(self): super().__init__() self.data = None self.editors = {} box = gui.vBox(self.controlArea, "Variable Definitions") toplayout = QHBoxLayout() toplayout.setContentsMargins(0, 0, 0, 0) box.layout().addLayout(toplayout) self.editorstack = QStackedWidget(sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding)) for descclass, editorclass in self.EDITORS: editor = editorclass() editor.featureChanged.connect(self._on_modified) self.editors[descclass] = editor self.editorstack.addWidget(editor) self.editorstack.setEnabled(False) buttonlayout = QVBoxLayout(spacing=10) buttonlayout.setContentsMargins(0, 0, 0, 0) self.addbutton = QPushButton("New", toolTip="Create a new variable", minimumWidth=120, shortcut=QKeySequence.New) def unique_name(fmt, reserved): candidates = (fmt.format(i) for i in count(1)) return next(c for c in candidates if c not in reserved) def generate_newname(fmt): return unique_name(fmt, self.reserved_names()) menu = QMenu(self.addbutton) cont = menu.addAction("Numeric") cont.triggered.connect(lambda: self.addFeature( ContinuousDescriptor(generate_newname("X{}"), "", 3))) disc = menu.addAction("Categorical") disc.triggered.connect(lambda: self.addFeature( DiscreteDescriptor(generate_newname("D{}"), "", (), False))) string = menu.addAction("Text") string.triggered.connect(lambda: self.addFeature( StringDescriptor(generate_newname("S{}"), ""))) datetime = menu.addAction("Date/Time") datetime.triggered.connect(lambda: self.addFeature( DateTimeDescriptor(generate_newname("T{}"), ""))) menu.addSeparator() self.duplicateaction = menu.addAction("Duplicate Selected Variable") self.duplicateaction.triggered.connect(self.duplicateFeature) self.duplicateaction.setEnabled(False) self.addbutton.setMenu(menu) self.removebutton = QPushButton("Remove", toolTip="Remove selected variable", minimumWidth=120, shortcut=QKeySequence.Delete) self.removebutton.clicked.connect(self.removeSelectedFeature) buttonlayout.addWidget(self.addbutton) buttonlayout.addWidget(self.removebutton) buttonlayout.addStretch(10) toplayout.addLayout(buttonlayout, 0) toplayout.addWidget(self.editorstack, 10) # Layout for the list view layout = QVBoxLayout(spacing=1, margin=0) self.featuremodel = DescriptorModel(parent=self) self.featureview = QListView(minimumWidth=200, minimumHeight=50, sizePolicy=QSizePolicy( QSizePolicy.Minimum, QSizePolicy.MinimumExpanding)) self.featureview.setItemDelegate(FeatureItemDelegate(self)) self.featureview.setModel(self.featuremodel) self.featureview.selectionModel().selectionChanged.connect( self._on_selectedVariableChanged) layout.addWidget(self.featureview) box.layout().addLayout(layout, 1) self.fix_button = gui.button(self.buttonsArea, self, "Upgrade Expressions", callback=self.fix_expressions) self.fix_button.setHidden(True) gui.button(self.buttonsArea, self, "Send", callback=self.apply, default=True) def setCurrentIndex(self, index): index = min(index, len(self.featuremodel) - 1) self.currentIndex = index if index >= 0: itemmodels.select_row(self.featureview, index) desc = self.featuremodel[min(index, len(self.featuremodel) - 1)] editor = self.editors[type(desc)] self.editorstack.setCurrentWidget(editor) editor.setEditorData(desc, self.data.domain if self.data else None) self.editorstack.setEnabled(index >= 0) self.duplicateaction.setEnabled(index >= 0) self.removebutton.setEnabled(index >= 0) def _on_selectedVariableChanged(self, selected, *_): index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) else: self.setCurrentIndex(-1) def _on_modified(self): if self.currentIndex >= 0: self.Warning.clear() editor = self.editorstack.currentWidget() proposed = editor.editorData().name uniq = get_unique_names(self.reserved_names(self.currentIndex), proposed) feature = editor.editorData() if editor.editorData().name != uniq: self.Warning.renamed_var() feature = feature.__class__(uniq, *feature[1:]) self.featuremodel[self.currentIndex] = feature self.descriptors = list(self.featuremodel) def setDescriptors(self, descriptors): """ Set a list of variable descriptors to edit. """ self.descriptors = descriptors self.featuremodel[:] = list(self.descriptors) def reserved_names(self, idx_=None): varnames = [] if self.data is not None: varnames = [ var.name for var in self.data.domain.variables + self.data.domain.metas ] varnames += [ desc.name for idx, desc in enumerate(self.featuremodel) if idx != idx_ ] return set(varnames) @Inputs.data @check_sql_input def setData(self, data=None): """Set the input dataset.""" self.closeContext() self.data = data self.expressions_with_values = False if self.data is not None: descriptors = list(self.descriptors) currindex = self.currentIndex self.descriptors = [] self.currentIndex = -1 self.openContext(data) self.fix_button.setHidden(not self.expressions_with_values) if descriptors != self.descriptors or \ self.currentIndex != currindex: # disconnect from the selection model while reseting the model selmodel = self.featureview.selectionModel() selmodel.selectionChanged.disconnect( self._on_selectedVariableChanged) self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(self.currentIndex) selmodel.selectionChanged.connect( self._on_selectedVariableChanged) self.editorstack.setEnabled(self.currentIndex >= 0) def handleNewSignals(self): if self.data is not None: self.apply() else: self.Outputs.data.send(None) self.fix_button.setHidden(True) def addFeature(self, descriptor): self.featuremodel.append(descriptor) self.setCurrentIndex(len(self.featuremodel) - 1) editor = self.editorstack.currentWidget() editor.nameedit.setFocus() editor.nameedit.selectAll() def removeFeature(self, index): del self.featuremodel[index] index = selected_row(self.featureview) if index is not None: self.setCurrentIndex(index) elif index is None and self.featuremodel.rowCount(): # Deleting the last item clears selection self.setCurrentIndex(self.featuremodel.rowCount() - 1) def removeSelectedFeature(self): if self.currentIndex >= 0: self.removeFeature(self.currentIndex) def duplicateFeature(self): desc = self.featuremodel[self.currentIndex] self.addFeature(copy.deepcopy(desc)) @staticmethod def check_attrs_values(attr, data): for i in range(len(data)): for var in attr: if not math.isnan(data[i, var]) \ and int(data[i, var]) >= len(var.values): return var.name return None def _validate_descriptors(self, desc): def validate(source): try: return validate_exp(ast.parse(source, mode="eval")) # ast.parse can return arbitrary errors, not only SyntaxError # pylint: disable=broad-except except Exception: return False final = [] invalid = [] for d in desc: if validate(d.expression): final.append(d) else: final.append(d._replace(expression="")) invalid.append(d) if invalid: self.Error.invalid_expressions(", ".join(s.name for s in invalid)) return final def apply(self): def report_error(err): log = logging.getLogger(__name__) log.error("", exc_info=True) self.error("".join(format_exception_only(type(err), err)).rstrip()) self.Error.clear() if self.data is None: return desc = list(self.featuremodel) desc = self._validate_descriptors(desc) try: new_variables = construct_variables(desc, self.data, self.expressions_with_values) # user's expression can contain arbitrary errors except Exception as err: # pylint: disable=broad-except report_error(err) return attrs = [var for var in new_variables if var.is_primitive()] metas = [var for var in new_variables if not var.is_primitive()] new_domain = Orange.data.Domain( self.data.domain.attributes + tuple(attrs), self.data.domain.class_vars, metas=self.data.domain.metas + tuple(metas)) try: for variable in new_variables: variable.compute_value.mask_exceptions = False data = self.data.transform(new_domain) # user's expression can contain arbitrary errors # pylint: disable=broad-except except Exception as err: report_error(err) return finally: for variable in new_variables: variable.compute_value.mask_exceptions = True disc_attrs_not_ok = self.check_attrs_values( [var for var in attrs if var.is_discrete], data) if disc_attrs_not_ok: self.Error.more_values_needed(disc_attrs_not_ok) return self.Outputs.data.send(data) def send_report(self): items = OrderedDict() for feature in self.featuremodel: if isinstance(feature, DiscreteDescriptor): desc = "categorical" if feature.values: desc += " with values " \ + ", ".join(f"'{val}'" for val in feature.values) if feature.ordered: desc += "; ordered" elif isinstance(feature, ContinuousDescriptor): desc = "numeric" elif isinstance(feature, DateTimeDescriptor): desc = "date/time" else: desc = "text" items[feature.name] = f"{feature.expression} ({desc})" self.report_items(report.plural("Constructed feature{s}", len(items)), items) def fix_expressions(self): dlg = QMessageBox( QMessageBox.Question, "Fix Expressions", "This widget's behaviour has changed. Values of categorical " "variables are now inserted as their textual representations " "(strings); previously they appeared as integer numbers, with an " "attribute '.value' that contained the text.\n\n" "The widget currently runs in compatibility mode. After " "expressions are updated, manually check for their correctness.") dlg.addButton("Update", QMessageBox.ApplyRole) dlg.addButton("Cancel", QMessageBox.RejectRole) if dlg.exec() == QMessageBox.RejectRole: return def fixer(mo): var = domain[mo.group(2)] if mo.group(3) == ".value": # uses string; remove `.value` return "".join(mo.group(1, 2, 4)) # Uses ints: get them by indexing return mo.group(1) + "{" + \ ", ".join(f"'{val}': {i}" for i, val in enumerate(var.values)) + \ f"}}[{var.name}]" + mo.group(4) domain = self.data.domain disc_vars = "|".join(f"{var.name}" for var in chain(domain.variables, domain.metas) if var.is_discrete) expr = re.compile(r"(^|\W)(" + disc_vars + r")(\.value)?(\W|$)") self.descriptors[:] = [ descriptor._replace( expression=expr.sub(fixer, descriptor.expression)) for descriptor in self.descriptors ] self.expressions_with_values = False self.fix_button.hide() index = self.currentIndex self.featuremodel[:] = list(self.descriptors) self.setCurrentIndex(index) self.apply() @classmethod def migrate_context(cls, context, version): if version is None or version < 2: used_vars = set( chain(*( freevars(ast.parse(descriptor.expression, mode="eval"), []) for descriptor in context.values["descriptors"] if descriptor.expression))) disc_vars = { name for (name, vtype) in chain(context.attributes.items(), context.metas.items()) if vtype == 1 } if used_vars & disc_vars: context.values["expressions_with_values"] = True
class OWSelectRows(widget.OWWidget): name = "Select Rows" id = "Orange.widgets.data.file" description = "Select rows from the data based on values of variables." icon = "icons/SelectRows.svg" priority = 100 category = "Data" keywords = ["filter"] class Inputs: data = Input("Data", Table) class Outputs: matching_data = Output("Matching Data", Table, default=True) unmatched_data = Output("Unmatched Data", Table) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) want_main_area = False settingsHandler = SelectRowsContextHandler() conditions = ContextSetting([]) update_on_change = Setting(True) purge_attributes = Setting(False, schema_only=True) purge_classes = Setting(False, schema_only=True) auto_commit = Setting(True) settings_version = 2 Operators = { ContinuousVariable: [ (FilterContinuous.Equal, "equals"), (FilterContinuous.NotEqual, "is not"), (FilterContinuous.Less, "is below"), (FilterContinuous.LessEqual, "is at most"), (FilterContinuous.Greater, "is greater than"), (FilterContinuous.GreaterEqual, "is at least"), (FilterContinuous.Between, "is between"), (FilterContinuous.Outside, "is outside"), (FilterContinuous.IsDefined, "is defined"), ], DiscreteVariable: [ (FilterDiscreteType.Equal, "is"), (FilterDiscreteType.NotEqual, "is not"), (FilterDiscreteType.In, "is one of"), (FilterDiscreteType.IsDefined, "is defined") ], StringVariable: [ (FilterString.Equal, "equals"), (FilterString.NotEqual, "is not"), (FilterString.Less, "is before"), (FilterString.LessEqual, "is equal or before"), (FilterString.Greater, "is after"), (FilterString.GreaterEqual, "is equal or after"), (FilterString.Between, "is between"), (FilterString.Outside, "is outside"), (FilterString.Contains, "contains"), (FilterString.StartsWith, "begins with"), (FilterString.EndsWith, "ends with"), (FilterString.IsDefined, "is defined"), ] } Operators[TimeVariable] = Operators[ContinuousVariable] AllTypes = {} for _all_name, _all_type, _all_ops in ( ("All variables", 0, [(None, "are defined")]), ("All numeric variables", 2, [(v, _plural(t)) for v, t in Operators[ContinuousVariable]]), ("All string variables", 3, [(v, _plural(t)) for v, t in Operators[StringVariable]])): Operators[_all_name] = _all_ops AllTypes[_all_name] = _all_type operator_names = {vtype: [name for _, name in filters] for vtype, filters in Operators.items()} class Error(widget.OWWidget.Error): parsing_error = Msg("{}") def __init__(self): super().__init__() self.old_purge_classes = True self.conditions = [] self.last_output_conditions = None self.data = None self.data_desc = self.match_desc = self.nonmatch_desc = None box = gui.vBox(self.controlArea, 'Conditions', stretch=100) self.cond_list = QTableWidget( box, showGrid=False, selectionMode=QTableWidget.NoSelection) box.layout().addWidget(self.cond_list) self.cond_list.setColumnCount(4) self.cond_list.setRowCount(0) self.cond_list.verticalHeader().hide() self.cond_list.horizontalHeader().hide() for i in range(3): self.cond_list.horizontalHeader().setSectionResizeMode(i, QHeaderView.Stretch) self.cond_list.horizontalHeader().resizeSection(3, 30) self.cond_list.viewport().setBackgroundRole(QPalette.Window) box2 = gui.hBox(box) gui.rubber(box2) self.add_button = gui.button( box2, self, "Add Condition", callback=self.add_row) self.add_all_button = gui.button( box2, self, "Add All Variables", callback=self.add_all) self.remove_all_button = gui.button( box2, self, "Remove All", callback=self.remove_all) gui.rubber(box2) boxes = gui.widgetBox(self.controlArea, orientation=QHBoxLayout()) layout = boxes.layout() box_setting = gui.vBox(boxes, addToLayout=False, box=True) self.cb_pa = gui.checkBox( box_setting, self, "purge_attributes", "Remove unused features", callback=self.conditions_changed) gui.separator(box_setting, height=1) self.cb_pc = gui.checkBox( box_setting, self, "purge_classes", "Remove unused classes", callback=self.conditions_changed) layout.addWidget(box_setting, 1) self.report_button.setFixedWidth(120) gui.rubber(self.buttonsArea.layout()) layout.addWidget(self.buttonsArea) acbox = gui.auto_send(None, self, "auto_commit") layout.addWidget(acbox, 1) layout.setAlignment(acbox, Qt.AlignBottom) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) self.set_data(None) self.resize(600, 400) def add_row(self, attr=None, condition_type=None, condition_value=None): model = self.cond_list.model() row = model.rowCount() model.insertRow(row) attr_combo = gui.OrangeComboBox( minimumContentsLength=12, sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon) attr_combo.row = row for var in self._visible_variables(self.data.domain): if isinstance(var, Variable): attr_combo.addItem(*gui.attributeItem(var)) else: attr_combo.addItem(var) if isinstance(attr, str): attr_combo.setCurrentText(attr) else: attr_combo.setCurrentIndex( attr or len(self.AllTypes) - (attr_combo.count() == len(self.AllTypes))) self.cond_list.setCellWidget(row, 0, attr_combo) index = QPersistentModelIndex(model.index(row, 3)) temp_button = QPushButton('×', self, flat=True, styleSheet='* {font-size: 16pt; color: silver}' '*:hover {color: black}') temp_button.clicked.connect(lambda: self.remove_one(index.row())) self.cond_list.setCellWidget(row, 3, temp_button) self.remove_all_button.setDisabled(False) self.set_new_operators(attr_combo, attr is not None, condition_type, condition_value) attr_combo.currentIndexChanged.connect( lambda _: self.set_new_operators(attr_combo, False)) self.cond_list.resizeRowToContents(row) @classmethod def _visible_variables(cls, domain): """Generate variables in order they should be presented in in combos.""" return chain( cls.AllTypes, filter_visible(chain(domain.class_vars, domain.metas, domain.attributes))) def add_all(self): if self.cond_list.rowCount(): Mb = QMessageBox if Mb.question( self, "Remove existing filters", "This will replace the existing filters with " "filters for all variables.", Mb.Ok | Mb.Cancel) != Mb.Ok: return self.remove_all() domain = self.data.domain for i in range(len(domain.variables) + len(domain.metas)): self.add_row(i) def remove_one(self, rownum): self.remove_one_row(rownum) self.conditions_changed() def remove_all(self): self.remove_all_rows() self.conditions_changed() def remove_one_row(self, rownum): self.cond_list.removeRow(rownum) if self.cond_list.model().rowCount() == 0: self.remove_all_button.setDisabled(True) def remove_all_rows(self): self.cond_list.clear() self.cond_list.setRowCount(0) self.remove_all_button.setDisabled(True) def set_new_operators(self, attr_combo, adding_all, selected_index=None, selected_values=None): oper_combo = QComboBox() oper_combo.row = attr_combo.row oper_combo.attr_combo = attr_combo attr_name = attr_combo.currentText() if attr_name in self.AllTypes: oper_combo.addItems(self.operator_names[attr_name]) else: var = self.data.domain[attr_name] oper_combo.addItems(self.operator_names[type(var)]) oper_combo.setCurrentIndex(selected_index or 0) self.cond_list.setCellWidget(oper_combo.row, 1, oper_combo) self.set_new_values(oper_combo, adding_all, selected_values) oper_combo.currentIndexChanged.connect( lambda _: self.set_new_values(oper_combo, False)) @staticmethod def _get_lineedit_contents(box): return [child.text() for child in getattr(box, "controls", [box]) if isinstance(child, QLineEdit)] @staticmethod def _get_value_contents(box): cont = [] names = [] for child in getattr(box, "controls", [box]): if isinstance(child, QLineEdit): cont.append(child.text()) elif isinstance(child, QComboBox): cont.append(child.currentIndex()) elif isinstance(child, QToolButton): if child.popup is not None: model = child.popup.list_view.model() for row in range(model.rowCount()): item = model.item(row) if item.checkState(): cont.append(row + 1) names.append(item.text()) child.desc_text = ', '.join(names) child.set_text() elif isinstance(child, QLabel) or child is None: pass else: raise TypeError('Type %s not supported.' % type(child)) return tuple(cont) class QDoubleValidatorEmpty(QDoubleValidator): def validate(self, input_, pos): if not input_: return QDoubleValidator.Acceptable, input_, pos if self.locale().groupSeparator() in input_: return QDoubleValidator.Invalid, input_, pos return super().validate(input_, pos) def set_new_values(self, oper_combo, adding_all, selected_values=None): # def remove_children(): # for child in box.children()[1:]: # box.layout().removeWidget(child) # child.setParent(None) def add_textual(contents): le = gui.lineEdit(box, self, None, sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)) if contents: le.setText(contents) le.setAlignment(Qt.AlignRight) le.editingFinished.connect(self.conditions_changed) return le def add_numeric(contents): le = add_textual(contents) le.setValidator(OWSelectRows.QDoubleValidatorEmpty()) return le def add_datetime(contents): le = add_textual(contents) le.setValidator(QRegExpValidator(QRegExp(TimeVariable.REGEX))) return le box = self.cond_list.cellWidget(oper_combo.row, 2) lc = ["", ""] oper = oper_combo.currentIndex() attr_name = oper_combo.attr_combo.currentText() if attr_name in self.AllTypes: vtype = self.AllTypes[attr_name] var = None else: var = self.data.domain[attr_name] vtype = vartype(var) if selected_values is not None: lc = list(selected_values) + ["", ""] lc = [str(x) for x in lc[:2]] if box and vtype == box.var_type: lc = self._get_lineedit_contents(box) + lc if oper_combo.currentText().endswith(" defined"): label = QLabel() label.var_type = vtype self.cond_list.setCellWidget(oper_combo.row, 2, label) elif var is not None and var.is_discrete: if oper_combo.currentText().endswith(" one of"): if selected_values: lc = [x for x in list(selected_values)] button = DropDownToolButton(self, var, lc) button.var_type = vtype self.cond_list.setCellWidget(oper_combo.row, 2, button) else: combo = QComboBox() combo.addItems(("", ) + var.values) if lc[0]: combo.setCurrentIndex(int(lc[0])) else: combo.setCurrentIndex(0) combo.var_type = vartype(var) self.cond_list.setCellWidget(oper_combo.row, 2, combo) combo.currentIndexChanged.connect(self.conditions_changed) else: box = gui.hBox(self, addToLayout=False) box.var_type = vtype self.cond_list.setCellWidget(oper_combo.row, 2, box) if vtype in (2, 4): # continuous, time: validator = add_datetime if isinstance(var, TimeVariable) else add_numeric box.controls = [validator(lc[0])] if oper > 5: gui.widgetLabel(box, " and ") box.controls.append(validator(lc[1])) elif vtype == 3: # string: box.controls = [add_textual(lc[0])] if oper in [6, 7]: gui.widgetLabel(box, " and ") box.controls.append(add_textual(lc[1])) else: box.controls = [] if not adding_all: self.conditions_changed() @Inputs.data def set_data(self, data): self.closeContext() self.data = data self.cb_pa.setEnabled(not isinstance(data, SqlTable)) self.cb_pc.setEnabled(not isinstance(data, SqlTable)) self.remove_all_rows() self.add_button.setDisabled(data is None) self.add_all_button.setDisabled( data is None or len(data.domain.variables) + len(data.domain.metas) > 100) if not data: self.info.set_input_summary(self.info.NoInput) self.data_desc = None self.commit() return self.data_desc = report.describe_data_brief(data) self.conditions = [] try: self.openContext(data) except Exception: pass variables = list(self._visible_variables(self.data.domain)) varnames = [v.name if isinstance(v, Variable) else v for v in variables] if self.conditions: for attr, cond_type, cond_value in self.conditions: if attr in varnames: self.add_row(varnames.index(attr), cond_type, cond_value) elif attr in self.AllTypes: self.add_row(attr, cond_type, cond_value) else: self.add_row() self.info.set_input_summary(len(data), format_summary_details(data)) self.unconditional_commit() def conditions_changed(self): try: self.conditions = [] self.conditions = [ (self.cond_list.cellWidget(row, 0).currentText(), self.cond_list.cellWidget(row, 1).currentIndex(), self._get_value_contents(self.cond_list.cellWidget(row, 2))) for row in range(self.cond_list.rowCount())] if self.update_on_change and ( self.last_output_conditions is None or self.last_output_conditions != self.conditions): self.commit() except AttributeError: # Attribute error appears if the signal is triggered when the # controls are being constructed pass def _values_to_floats(self, attr, values): if not len(values): return values if not all(values): return None if isinstance(attr, TimeVariable): parse = lambda x: (attr.parse(x), True) else: parse = QLocale().toDouble try: floats, ok = zip(*[parse(v) for v in values]) if not all(ok): raise ValueError('Some values could not be parsed as floats' 'in the current locale: {}'.format(values)) except TypeError: floats = values # values already floats assert all(isinstance(v, float) for v in floats) return floats def commit(self): matching_output = self.data non_matching_output = None annotated_output = None self.Error.clear() if self.data: domain = self.data.domain conditions = [] for attr_name, oper_idx, values in self.conditions: if attr_name in self.AllTypes: attr_index = attr = None attr_type = self.AllTypes[attr_name] operators = self.Operators[attr_name] else: attr_index = domain.index(attr_name) attr = domain[attr_index] attr_type = vartype(attr) operators = self.Operators[type(attr)] opertype, _ = operators[oper_idx] if attr_type == 0: filter = data_filter.IsDefined() elif attr_type in (2, 4): # continuous, time try: floats = self._values_to_floats(attr, values) except ValueError as e: self.Error.parsing_error(e.args[0]) return if floats is None: continue filter = data_filter.FilterContinuous( attr_index, opertype, *floats) elif attr_type == 3: # string filter = data_filter.FilterString( attr_index, opertype, *[str(v) for v in values]) else: if opertype == FilterDiscreteType.IsDefined: f_values = None else: if not values or not values[0]: continue values = [attr.values[i-1] for i in values] if opertype == FilterDiscreteType.Equal: f_values = {values[0]} elif opertype == FilterDiscreteType.NotEqual: f_values = set(attr.values) f_values.remove(values[0]) elif opertype == FilterDiscreteType.In: f_values = set(values) else: raise ValueError("invalid operand") filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: self.filters = data_filter.Values(conditions) matching_output = self.filters(self.data) self.filters.negate = True non_matching_output = self.filters(self.data) row_sel = np.in1d(self.data.ids, matching_output.ids) annotated_output = create_annotated_table(self.data, row_sel) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name purge_attrs = self.purge_attributes purge_classes = self.purge_classes if (purge_attrs or purge_classes) and \ not isinstance(self.data, SqlTable): attr_flags = sum([Remove.RemoveConstant * purge_attrs, Remove.RemoveUnusedValues * purge_attrs]) class_flags = sum([Remove.RemoveConstant * purge_classes, Remove.RemoveUnusedValues * purge_classes]) # same settings used for attributes and meta features remover = Remove(attr_flags, class_flags, attr_flags) matching_output = remover(matching_output) non_matching_output = remover(non_matching_output) annotated_output = remover(annotated_output) if matching_output is not None and not len(matching_output): matching_output = None if non_matching_output is not None and not len(non_matching_output): non_matching_output = None if annotated_output is not None and not len(annotated_output): annotated_output = None self.Outputs.matching_data.send(matching_output) self.Outputs.unmatched_data.send(non_matching_output) self.Outputs.annotated_data.send(annotated_output) self.match_desc = report.describe_data_brief(matching_output) self.nonmatch_desc = report.describe_data_brief(non_matching_output) summary = len(matching_output) if matching_output else self.info.NoOutput details = format_summary_details(matching_output) if matching_output else "" self.info.set_output_summary(summary, details) def send_report(self): if not self.data: self.report_paragraph("No data.") return pdesc = None describe_domain = False for d in (self.data_desc, self.match_desc, self.nonmatch_desc): if not d or not d["Data instances"]: continue ndesc = d.copy() del ndesc["Data instances"] if pdesc is not None and pdesc != ndesc: describe_domain = True pdesc = ndesc conditions = [] domain = self.data.domain for attr_name, oper, values in self.conditions: if attr_name in self.AllTypes: attr = attr_name names = self.operator_names[attr_name] var_type = self.AllTypes[attr_name] else: attr = domain[attr_name] var_type = vartype(attr) names = self.operator_names[type(attr)] name = names[oper] if oper == len(names) - 1: conditions.append("{} {}".format(attr, name)) elif var_type == 1: # discrete if name == "is one of": valnames = [attr.values[v - 1] for v in values] if not valnames: continue if len(valnames) == 1: valstr = valnames[0] else: valstr = f"{', '.join(valnames[:-1])} or {valnames[-1]}" conditions.append(f"{attr} is {valstr}") elif values and values[0]: value = values[0] - 1 conditions.append(f"{attr} {name} {attr.values[value]}") elif var_type == 3: # string variable conditions.append( f"{attr} {name} {' and '.join(map(repr, values))}") elif all(x for x in values): # numeric variable conditions.append(f"{attr} {name} {' and '.join(values)}") items = OrderedDict() if describe_domain: items.update(self.data_desc) else: items["Instances"] = self.data_desc["Data instances"] items["Condition"] = " AND ".join(conditions) or "no conditions" self.report_items("Data", items) if describe_domain: self.report_items("Matching data", self.match_desc) self.report_items("Non-matching data", self.nonmatch_desc) else: match_inst = \ bool(self.match_desc) and \ self.match_desc["Data instances"] nonmatch_inst = \ bool(self.nonmatch_desc) and \ self.nonmatch_desc["Data instances"] self.report_items( "Output", (("Matching data", "{} instances".format(match_inst) if match_inst else "None"), ("Non-matching data", nonmatch_inst > 0 and "{} instances".format(nonmatch_inst))))
class OWExplainPredictions(OWWidget, ConcurrentWidgetMixin): name = "Explain Predictions" description = "Predictions explanation widget." keywords = ["explain", "explain prediction", "explain model"] icon = "icons/ExplainPredictions.svg" priority = 120 class Inputs: model = Input("Model", Model) background_data = Input("Background Data", Table) data = Input("Data", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) scores = Output("Scores", Table) class Error(OWWidget.Error): domain_transform_err = Msg("{}") unknown_err = Msg("{}") not_enough_data = Msg("At least two instances are needed.") class Information(OWWidget.Information): data_sampled = Msg("Data has been sampled.") buttons_area_orientation = Qt.Vertical settingsHandler = PerfectDomainContextHandler() target_index = ContextSetting(0) order_index = ContextSetting(0) annot_index = ContextSetting(0) show_tooltip = Setting(True) highlight_feature = Setting(True) selection_ranges = Setting([], schema_only=True) auto_send = Setting(True) visual_settings = Setting({}, schema_only=True) graph_name = "graph.plotItem" ANNOTATIONS = ["None", "Enumeration"] def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.__results: Optional[RunnerResults] = None self.model: Optional[Model] = None self.background_data: Optional[Table] = None self.data: Optional[Table] = None # cached instance indices after instance ordering self.__data_idxs: Optional[np.ndarray] = None self.__pending_selection: List[Tuple[float, float]] = \ self.selection_ranges self.graph: ForcePlot = None self._target_combo: QComboBox = None self._order_combo: QComboBox = None self._annot_combo: QComboBox = None self.setup_gui() initial_settings = self.graph.parameter_setter.initial_settings VisualSettingsDialog(self, initial_settings) def setup_gui(self): self._add_plot() self._add_controls() self._add_buttons() def _add_plot(self): box = gui.vBox(self.mainArea) self.graph = ForcePlot(self) self.graph.set_show_tooltip(self.show_tooltip) self.graph.set_highlight_feature(self.highlight_feature) self.graph.selectionChanged.connect(self.__on_selection_changed) box.layout().addWidget(self.graph) def __on_selection_changed(self, selection: List[Tuple[float, float]]): self.selection_ranges = selection self.commit() def _add_controls(self): box = gui.vBox(self.controlArea, "Target class") self._target_combo = gui.comboBox(box, self, "target_index", callback=self.__on_target_changed, contentsLength=12) box = gui.vBox(self.controlArea, "Instance order") self._order_combo = gui.comboBox(box, self, "order_index", callback=self.__on_order_changed, searchable=True, contentsLength=12) model = VariableListModel() model[:] = INSTANCE_ORDERINGS self._order_combo.setModel(model) box = gui.vBox(self.controlArea, "Annotation") self._annot_combo = gui.comboBox(box, self, "annot_index", callback=self.__on_annot_changed, searchable=True, contentsLength=12) model = VariableListModel() model[:] = self.ANNOTATIONS self._annot_combo.setModel(model) box = gui.vBox(self.controlArea, "", margin=True, contentsMargins=(8, 4, 8, 4)) gui.checkBox(box, self, "show_tooltip", "Show tooltips", callback=self.__on_show_tooltip_changed) gui.checkBox(box, self, "highlight_feature", "Highlight feature on hover", callback=self.__on_highlight_feature_changed) gui.rubber(self.controlArea) def __on_target_changed(self): self.selection_ranges = [] self.setup_plot() self.commit() def __on_order_changed(self): self.selection_ranges = [] self.setup_plot() self.commit() def __on_annot_changed(self): if not self.__results or not self.data: return self._set_plot_annotations() def __on_show_tooltip_changed(self): self.graph.set_show_tooltip(self.show_tooltip) def __on_highlight_feature_changed(self): self.graph.set_highlight_feature(self.highlight_feature) def _add_buttons(self): plot_gui = OWPlotGUI(self) plot_gui.box_zoom_select(self.buttonsArea) gui.auto_send(self.buttonsArea, self, "auto_send") @Inputs.data @check_sql_input def set_data(self, data: Optional[Table]): self.closeContext() self.data = data self._check_data() self._setup_controls() self.openContext(self.data.domain if self.data else None) @Inputs.background_data @check_sql_input def set_background_data(self, data: Optional[Table]): self.background_data = data @Inputs.model def set_model(self, model: Optional[Model]): self.model = model def _check_data(self): self.Error.not_enough_data.clear() if self.data and len(self.data) < 2: self.data = None self.Error.not_enough_data() def _setup_controls(self): self._target_combo.clear() self._target_combo.setEnabled(True) self.order_index = 0 self.annot_index = 0 self._order_combo.clear() self._annot_combo.clear() orderings = INSTANCE_ORDERINGS annotations = self.ANNOTATIONS if self.data: domain = self.data.domain if domain.has_discrete_class: self._target_combo.addItems(domain.class_var.values) self.target_index = 0 elif domain.has_continuous_class: self.target_index = -1 self._target_combo.setEnabled(False) orderings = chain( INSTANCE_ORDERINGS, [VariableListModel.Separator] if domain.metas else [], domain.metas, [VariableListModel.Separator] if domain.class_vars else [], domain.class_vars, [VariableListModel.Separator] if domain.attributes else [], domain.attributes, ) annotations = chain( self.ANNOTATIONS, [VariableListModel.Separator] if domain.metas else [], domain.metas, [VariableListModel.Separator] if domain.class_vars else [], domain.class_vars, [VariableListModel.Separator] if domain.attributes else [], domain.attributes, ) self._order_combo.model()[:] = orderings self._annot_combo.model()[:] = annotations def handleNewSignals(self): self.clear() self.start(run, self.data, self.background_data, self.model) self.commit() def clear(self): self.__results = None self.cancel() self.Error.domain_transform_err.clear() self.Error.unknown_err.clear() self.Information.data_sampled.clear() self.selection_ranges = [] self.graph.clear_all() self.graph.set_axis(None) self.__data_idxs = None def setup_plot(self): self.graph.clear_all() self.__data_idxs = None if not self.__results or not self.data: return order = self._order_combo.model()[self.order_index] values_idxs = get_instance_ordering( self.__results.values[self.target_index], self.__results.predictions[self.__results.mask, self.target_index], self.data[self.__results.mask], order ) data_idxs = np.arange(len(self.data)) self.__data_idxs = data_idxs[self.__results.mask][values_idxs] x_data, pos_y_data, neg_y_data, pos_labels, neg_labels = \ prepare_force_plot_data_multi_inst( self.__results.values[self.target_index][values_idxs], self.__results.base_value[self.target_index], self.model.domain ) if self.order_index == 0: order = "hierarhical clustering" elif self.order_index == 1: order = "output value" elif self.order_index == 2: order = "original ordering" x_label = f"Instances ordered by {order}" target = self.model.domain.class_var if self.model.domain.has_discrete_class: target = f"{target} = {target.values[self.target_index]}" y_label = f"Output value ({target})" self.graph.set_data(x_data, pos_y_data, neg_y_data, pos_labels, neg_labels, x_label, y_label, self.__results.transformed_data[self.__data_idxs]) self._set_plot_annotations() def _set_plot_annotations(self): annotator = self._annot_combo.model()[self.annot_index] if isinstance(annotator, Variable): ticks = [[(i, str(row[annotator].value)) for i, row in enumerate(self.data[self.__data_idxs])]] self.graph.set_axis(ticks) elif annotator == "None": self.graph.set_axis([]) elif annotator == "Enumeration": ticks = [[(i, str(idx + 1)) for i, idx in enumerate(self.__data_idxs)]] self.graph.set_axis(ticks) else: raise NotImplementedError(annotator) def on_partial_result(self, _): pass def on_done(self, results: Optional[RunnerResults]): self.__results = results if results is not None and not all(results.mask): self.Information.data_sampled() self.setup_plot() self.apply_selection() self.output_scores() def on_exception(self, ex: Exception): if isinstance(ex, DomainTransformationError): self.Error.domain_transform_err(ex) else: self.Error.unknown_err(ex) def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def apply_selection(self): selection_ranges = self.selection_ranges or self.__pending_selection if selection_ranges: self.graph.apply_selection(selection_ranges) self.__on_selection_changed(selection_ranges) self.__pending_selection = [] def commit(self): selected = None selected_indices = [] if self.__results: selection = list(set( chain.from_iterable( range(int(np.ceil(start)), int(np.floor(stop) + 1)) for start, stop in self.selection_ranges) )) selected_indices = sorted(self.__data_idxs[selection]) if self.data and selected_indices: selected = self.data[selected_indices] annotated = create_annotated_table(self.data, selected_indices) self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) def output_scores(self): scores = None if self.__results is not None: mask = self.__results.mask data = self.__results.transformed_data[mask] domain = data.domain attrs = [ContinuousVariable(f"S({a.name})") for a in domain.attributes] domain = Domain(attrs, domain.class_vars, domain.metas) scores = self.__results.values[self.target_index] scores = Table(domain, scores, data.Y, data.metas) scores.name = "Feature Scores" self.Outputs.scores.send(scores) def send_report(self): if not self.data or not self.background_data or not self.model: return items = {"Target class": "None"} if self.model.domain.has_discrete_class: class_var = self.model.domain.class_var items["Target class"] = class_var.values[self.target_index] self.report_items(items) self.report_plot() def set_visual_settings(self, key: Tuple[str, str, str], value: Any): self.visual_settings[key] = value self.graph.parameter_setter.set_parameter(key, value)
class ImagePlot(QWidget, OWComponent, SelectionGroupMixin): attr_x = ContextSetting(None) attr_y = ContextSetting(None) gamma = Setting(0) threshold_low = Setting(0.0) threshold_high = Setting(1.0) palette_index = Setting(0) selection_changed = Signal() def __init__(self, parent): QWidget.__init__(self) OWComponent.__init__(self, parent) SelectionGroupMixin.__init__(self) self.parent = parent self.selection_type = SELECTMANY self.saving_enabled = hasattr(self.parent, "save_graph") self.selection_enabled = True self.viewtype = INDIVIDUAL # required bt InteractiveViewBox self.highlighted = None self.data_points = None self.data_values = None self.data_imagepixels = None self.plotview = pg.PlotWidget(background="w", viewBox=InteractiveViewBox(self)) self.plot = self.plotview.getPlotItem() self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) layout = QVBoxLayout() self.setLayout(layout) self.layout().setContentsMargins(0, 0, 0, 0) self.layout().addWidget(self.plotview) self.img = ImageItemNan() self.img.setOpts(axisOrder='row-major') self.plot.addItem(self.img) self.plot.vb.setAspectLocked() self.plot.scene().sigMouseMoved.connect(self.plot.vb.mouseMovedEvent) layout = QGridLayout() self.plotview.setLayout(layout) self.button = QPushButton("View", self.plotview) self.button.setAutoDefault(False) layout.setRowStretch(1, 1) layout.setColumnStretch(1, 1) layout.addWidget(self.button, 0, 0) view_menu = MenuFocus(self) self.button.setMenu(view_menu) # prepare interface according to the new context self.parent.contextAboutToBeOpened.connect( lambda x: self.init_interface_data(x[0])) actions = [] zoom_in = QAction("Zoom in", self, triggered=self.plot.vb.set_mode_zooming) zoom_in.setShortcuts([Qt.Key_Z, QKeySequence(QKeySequence.ZoomIn)]) zoom_in.setShortcutContext(Qt.WidgetWithChildrenShortcut) actions.append(zoom_in) zoom_fit = QAction( "Zoom to fit", self, triggered=lambda x: (self.plot.vb.autoRange(), self.plot.vb.set_mode_panning())) zoom_fit.setShortcuts( [Qt.Key_Backspace, QKeySequence(Qt.ControlModifier | Qt.Key_0)]) zoom_fit.setShortcutContext(Qt.WidgetWithChildrenShortcut) actions.append(zoom_fit) select_square = QAction( "Select (square)", self, triggered=self.plot.vb.set_mode_select_square, ) select_square.setShortcuts([Qt.Key_S]) select_square.setShortcutContext(Qt.WidgetWithChildrenShortcut) actions.append(select_square) select_polygon = QAction( "Select (polygon)", self, triggered=self.plot.vb.set_mode_select_polygon, ) select_polygon.setShortcuts([Qt.Key_P]) select_polygon.setShortcutContext(Qt.WidgetWithChildrenShortcut) actions.append(select_polygon) if self.saving_enabled: save_graph = QAction( "Save graph", self, triggered=self.save_graph, ) save_graph.setShortcuts( [QKeySequence(Qt.ControlModifier | Qt.Key_I)]) actions.append(save_graph) view_menu.addActions(actions) self.addActions(actions) common_options = dict(labelWidth=50, orientation=Qt.Horizontal, sendSelectedValue=True, valueType=str) choose_xy = QWidgetAction(self) box = gui.vBox(self) box.setFocusPolicy(Qt.TabFocus) self.xy_model = DomainModel(DomainModel.METAS | DomainModel.CLASSES, valid_types=DomainModel.PRIMITIVE) self.cb_attr_x = gui.comboBox(box, self, "attr_x", label="Axis x:", callback=self.update_attr, model=self.xy_model, **common_options) self.cb_attr_y = gui.comboBox(box, self, "attr_y", label="Axis y:", callback=self.update_attr, model=self.xy_model, **common_options) box.setFocusProxy(self.cb_attr_x) self.color_cb = gui.comboBox(box, self, "palette_index", label="Color:", labelWidth=50, orientation=Qt.Horizontal) self.color_cb.setIconSize(QSize(64, 16)) palettes = _color_palettes self.palette_index = min(self.palette_index, len(palettes) - 1) model = color_palette_model(palettes, self.color_cb.iconSize()) model.setParent(self) self.color_cb.setModel(model) self.color_cb.activated.connect(self.update_color_schema) self.color_cb.setCurrentIndex(self.palette_index) form = QFormLayout(formAlignment=Qt.AlignLeft, labelAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow) lowslider = gui.hSlider(box, self, "threshold_low", minValue=0.0, maxValue=1.0, step=0.05, ticks=True, intOnly=False, createLabel=False, callback=self.update_color_schema) highslider = gui.hSlider(box, self, "threshold_high", minValue=0.0, maxValue=1.0, step=0.05, ticks=True, intOnly=False, createLabel=False, callback=self.update_color_schema) form.addRow("Low:", lowslider) form.addRow("High:", highslider) box.layout().addLayout(form) choose_xy.setDefaultWidget(box) view_menu.addAction(choose_xy) self.markings_integral = [] self.lsx = None # info about the X axis self.lsy = None # info about the Y axis self.data = None self.data_ids = {} def init_interface_data(self, data): same_domain = (self.data and data and data.domain == self.data.domain) if not same_domain: self.init_attr_values(data) def help_event(self, ev): pos = self.plot.vb.mapSceneToView(ev.scenePos()) sel = self._points_at_pos(pos) prepared = [] if sel is not None: data, vals, points = self.data[sel], self.data_values[ sel], self.data_points[sel] for d, v, p in zip(data, vals, points): basic = "({}, {}): {}".format(p[0], p[1], v) variables = [ v for v in self.data.domain.metas + self.data.domain.class_vars if v not in [self.attr_x, self.attr_y] ] features = [ '{} = {}'.format(attr.name, d[attr]) for attr in variables ] prepared.append("\n".join([basic] + features)) text = "\n\n".join(prepared) if text: text = ('<span style="white-space:pre">{}</span>'.format( escape(text))) QToolTip.showText(ev.screenPos(), text, widget=self.plotview) return True else: return False def update_color_schema(self): if not self.threshold_low < self.threshold_high: # TODO this belongs here, not in the parent self.parent.Warning.threshold_error() return else: self.parent.Warning.threshold_error.clear() data = self.color_cb.itemData(self.palette_index, role=Qt.UserRole) _, colors = max(data.items()) cols = color_palette_table(colors, threshold_low=self.threshold_low, threshold_high=self.threshold_high) self.img.setLookupTable(cols) # use defined discrete palette if self.parent.value_type == 1: dat = self.data.domain[self.parent.attr_value] if isinstance(dat, DiscreteVariable): self.img.setLookupTable(dat.colors) def update_attr(self): self.update_view() def init_attr_values(self, data): domain = data.domain if data is not None else None self.xy_model.set_domain(domain) self.attr_x = self.xy_model[0] if self.xy_model else None self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \ else self.attr_x def save_graph(self): self.parent.save_graph() def set_data(self, data): if data: self.data = data self.data_ids = {e: i for i, e in enumerate(data.ids)} self.restore_selection_settings() else: self.data = None self.data_ids = {} def refresh_markings(self, di): refresh_integral_markings([{ "draw": di }], self.markings_integral, self.parent.curveplot) def update_view(self): self.img.clear() self.img.setSelection(None) self.lsx = None self.lsy = None self.data_points = None self.data_values = None self.data_imagepixels = None if self.data and self.attr_x and self.attr_y: xat = self.data.domain[self.attr_x] yat = self.data.domain[self.attr_y] ndom = Orange.data.Domain([xat, yat]) datam = Orange.data.Table(ndom, self.data) coorx = datam.X[:, 0] coory = datam.X[:, 1] self.data_points = datam.X self.lsx = lsx = values_to_linspace(coorx) self.lsy = lsy = values_to_linspace(coory) if lsx[-1] * lsy[-1] > IMAGE_TOO_BIG: self.parent.Error.image_too_big(lsx[-1], lsy[-1]) return else: self.parent.Error.image_too_big.clear() di = {} if self.parent.value_type == 0: # integrals imethod = self.parent.integration_methods[ self.parent.integration_method] l1, l2, l3 = self.parent.lowlim, self.parent.highlim, self.parent.choose gx = getx(self.data) if l1 is None: l1 = min(gx) - 1 if l2 is None: l2 = max(gx) + 1 l1, l2 = min(l1, l2), max(l1, l2) if l3 is None: l3 = (l1 + l2) / 2 if imethod != Integrate.PeakAt: datai = Integrate(methods=imethod, limits=[[l1, l2]])(self.data) else: datai = Integrate(methods=imethod, limits=[[l3, l3]])(self.data) if np.any(self.parent.curveplot.selection_group): # curveplot can have a subset of curves on the input> match IDs ind = np.flatnonzero( self.parent.curveplot.selection_group)[0] dind = self.data_ids[self.parent.curveplot.data[ind].id] di = datai.domain.attributes[0].compute_value.draw_info( self.data[dind:dind + 1]) d = datai.X[:, 0] else: dat = self.data.domain[self.parent.attr_value] ndom = Orange.data.Domain([dat]) d = Orange.data.Table(ndom, self.data).X[:, 0] self.refresh_markings(di) # set data imdata = np.ones((lsy[2], lsx[2])) * float("nan") xindex = index_values(coorx, lsx) yindex = index_values(coory, lsy) imdata[yindex, xindex] = d self.data_values = d self.data_imagepixels = np.vstack((yindex, xindex)).T levels = get_levels(imdata) self.update_color_schema() self.img.setImage(imdata, levels=levels) # shift centres of the pixels so that the axes are useful shiftx = _shift(lsx) shifty = _shift(lsy) left = lsx[0] - shiftx bottom = lsy[0] - shifty width = (lsx[1] - lsx[0]) + 2 * shiftx height = (lsy[1] - lsy[0]) + 2 * shifty self.img.setRect(QRectF(left, bottom, width, height)) self.selection_changed.emit() self.refresh_img_selection() def refresh_img_selection(self): selected_px = np.zeros((self.lsy[2], self.lsx[2]), dtype=np.uint8) selected_px[self.data_imagepixels[:, 0], self.data_imagepixels[:, 1]] = self.selection_group self.img.setSelection(selected_px) def make_selection(self, selected, add): """Add selected indices to the selection.""" add_to_group, add_group, remove = selection_modifiers() if self.data and self.lsx and self.lsy: if add_to_group: # both keys - need to test it before add_group selnum = np.max(self.selection_group) elif add_group: selnum = np.max(self.selection_group) + 1 elif remove: selnum = 0 else: self.selection_group *= 0 selnum = 1 if selected is not None: self.selection_group[selected] = selnum self.refresh_img_selection() self.prepare_settings_for_saving() self.selection_changed.emit() def select_square(self, p1, p2, add): """ Select elements within a square drawn by the user. A selection needs to contain whole pixels """ x1, y1 = p1.x(), p1.y() x2, y2 = p2.x(), p2.y() polygon = [ QPointF(x1, y1), QPointF(x2, y1), QPointF(x2, y2), QPointF(x1, y2), QPointF(x1, y1) ] self.select_polygon(polygon, add) def select_polygon(self, polygon, add): """ Select by a polygon which has to contain whole pixels. """ if self.data and self.lsx and self.lsy: polygon = [(p.x(), p.y()) for p in polygon] # a polygon should contain all pixel shiftx = _shift(self.lsx) shifty = _shift(self.lsy) points_edges = [ self.data_points + [[shiftx, shifty]], self.data_points + [[-shiftx, shifty]], self.data_points + [[shiftx, -shifty]], self.data_points + [[-shiftx, -shifty]] ] inp = in_polygon(points_edges[0], polygon) for p in points_edges[1:]: inp *= in_polygon(p, polygon) self.make_selection(inp, add) def _points_at_pos(self, pos): if self.data and self.lsx and self.lsy: x, y = pos.x(), pos.y() distance = np.abs(self.data_points - [[x, y]]) sel = (distance[:, 0] < _shift(self.lsx)) * (distance[:, 1] < _shift(self.lsy)) return sel def select_by_click(self, pos, add): sel = self._points_at_pos(pos) self.make_selection(sel, add)