class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Table), ("Evaluation Results", Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): train_data_empty = Msg("Train data set is empty.") test_data_empty = Msg("Test data set is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg("Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different target variables.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox( ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox( ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox( ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView( wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data and not data.domain.class_vars: self.Error.class_required() data = None elif data and len(data.domain.class_vars) > 1: self.Error.too_many_classes() data = None else: self.Error.class_required.clear() self.Error.too_many_classes.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not len(data): self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return {(True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test "}[(self.train_data_missing_vals, self.test_data_missing_vals)] def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() return elif self.test_data.domain.class_var != class_var: self.Error.class_inconsistent() return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.Warning.test_data_unused() rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict( store_data=True, preprocessor=self.preprocessor, callback=update_progress, n_jobs=-1, ) self.setStatusMessage("Running") with self.progressBar(): try: folds = self.NFolds[self.n_folds] if self.resampling == OWTestLearners.KFold: if len(self.data) < folds: self.Error.too_many_folds() return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except (RuntimeError, ValueError) as e: self.error(str(e)) self.setStatusMessage("") return else: self.error() learner_key = {slot.learner: key for key, slot in self.learners.items()} for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}" .format(name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest( slot.results.value, target_index) stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.commit() def commit(self): """Recompute and output the results""" self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [learner_name(slot.learner) for slot in valid] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation". format(stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [("Sampling type", "{}Shuffle split, {} random samples with {}% data " .format(stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size]))] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view)
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 class Inputs: evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) quantities = [ "Number of instances", "Proportion of predicted", "Proportion of actual" ] settings_version = 1 settingsHandler = settings.ClassValuesContextHandler() selected_learner = settings.Setting([0], schema_only=True) selection = settings.ContextSetting(set()) selected_quantity = settings.Setting(0) append_predictions = settings.Setting(True) append_probabilities = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell") ] class Error(widget.OWWidget.Error): no_regression = Msg("Confusion Matrix cannot show regression results.") invalid_values = Msg( "Evaluation Results input contains invalid values") def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.headers = [] self.learners_box = gui.listBox(self.controlArea, self, "selected_learner", "learners", box=True, callback=self._learner_changed) self.outputbox = gui.vBox(self.controlArea, "Output") box = gui.hBox(self.outputbox) gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_commit(self.outputbox, self, "autocommit", "Send Selected", "Send Automatically", box=False) self.mainArea.layout().setContentsMargins(0, 0, 0, 0) box = gui.vBox(self.mainArea, box=True) sbox = gui.hBox(box) gui.rubber(sbox) gui.comboBox(sbox, self, "selected_quantity", items=self.quantities, label="Show: ", orientation=Qt.Horizontal, callback=self._update) self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) view.clicked.connect(self.cell_clicked) box.layout().addWidget(view) selbox = gui.hBox(box) gui.button(selbox, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(selbox, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(selbox, self, "Clear Selection", callback=self.select_none, autoDefault=False) def sizeHint(self): """Initial size""" return QSize(750, 340) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def _init_table(self, nclasses): item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, nclasses) self.tableview.setSpan(2, 0, nclasses, 1) font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(self.headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setSectionResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(nclasses + 3) self.tablemodel.setColumnCount(nclasses + 3) @Inputs.evaluation_results def set_results(self, results): """Set the input results.""" prev_sel_learner = self.selected_learner.copy() self.clear() self.warning() self.closeContext() data = None if results is not None and results.data is not None: data = results.data[results.row_indices] if data is not None and not data.domain.has_discrete_class: self.Error.no_regression() data = results = None else: self.Error.no_regression.clear() nan_values = False if results is not None: assert isinstance(results, Orange.evaluation.Results) if np.any(np.isnan(results.actual)) or \ np.any(np.isnan(results.predicted)): # Error out here (could filter them out with a warning # instead). nan_values = True results = data = None if nan_values: self.Error.invalid_values() else: self.Error.invalid_values.clear() self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) else: self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] self.headers = class_values + \ [unicodedata.lookup("N-ARY SUMMATION")] # NOTE: The 'learner_names' is set in 'Test Learners' widget. if hasattr(results, "learner_names"): self.learners = results.learner_names else: self.learners = [ "Learner #{}".format(i + 1) for i in range(nmodels) ] self._init_table(len(class_values)) self.openContext(data.domain.class_var) if not prev_sel_learner or prev_sel_learner[0] >= len( self.learners): if self.learners: self.selected_learner[:] = [0] else: self.selected_learner[:] = prev_sel_learner self._update() self._set_selection() self.unconditional_commit() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_wrong(self): """Select the off-diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _prepare_data(self): indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner[0]] predicted = self.results.predicted[self.selected_learner[0]] selected = [ i for i, t in enumerate(zip(actual, predicted)) if t in indices ] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas if self.append_predictions: extra.append(predicted.reshape(-1, 1)) var = Orange.data.DiscreteVariable( "{}({})".format(class_var.name, learner_name), class_var.values) metas = metas + (var, ) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner[0]] extra.append(np.array(probs, dtype=object)) pvars = [ Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values ] metas = metas + tuple(pvars) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = self.data.transform(domain) if len(extra): data.metas[:, len(self.data.domain.metas):] = \ np.hstack(tuple(extra)) data.name = learner_name if selected: annotated_data = create_annotated_table(data, selected) data = data[selected] else: annotated_data = create_annotated_table(data, []) data = None return data, annotated_data def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner: data, annotated_data = self._prepare_data() else: data = None annotated_data = None self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send(annotated_data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit() def _set_selection(self): selection = QItemSelection() index = self.tableview.model().index for row, col in self.selection: sel = index(row + 2, col + 2) selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _learner_changed(self): self._update() self._set_selection() self.commit() def _update(self): def _isinvalid(x): return isnan(x) or isinf(x) # Update the displayed confusion matrix if self.results is not None and self.selected_learner: cmatrix = confusion_matrix(self.results, self.selected_learner[0]) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = np.diag_indices(n) colors = cmatrix.astype(np.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(np.int) formatstr = "{}" div = np.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, np.newaxis] div = colors.max(axis=1)[:, np.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div colors[diag] = normalized[diag] / normalized[diag].max() for i in range(n): for j in range(n): val = normalized[i, j] col_val = colors[i, j] item = self._item(i + 2, j + 2) item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) item.setData("trbl", BorderRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(n): self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t")) self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l")) self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum()))) def send_report(self): """Send report""" if self.results is not None and self.selected_learner: self.report_table( "Confusion matrix for {} (showing {})".format( self.learners[self.selected_learner[0]], self.quantities[self.selected_quantity].lower()), self.tableview) @classmethod def migrate_settings(cls, settings, version): if not version: # For some period of time the 'selected_learner' property was # changed from List[int] -> int # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0) if "selected_learner" in settings and \ isinstance(settings["selected_learner"], int): settings["selected_learner"] = [settings["selected_learner"]]
class OWTestLearners(widget.OWWidget): name = "Test Learners" description = "" icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Orange.classification.Learner, "set_learner", widget.Multiple), ("Data", Orange.data.Table, "set_train_data", widget.Default), ("Test Data", Orange.data.Table, "set_test_data")] outputs = [("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Number of repeats for bootstrap sampling n_repeat = settings.Setting(10) #: Bootstrap sampling p sample_p = settings.Setting(75) class_selection = settings.ContextSetting("(None)") def __init__(self, parent=None): super().__init__(parent) self.train_data = None self.test_data = None #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self.bootstrap_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100, ticks=20, vertical=False, labelFormat="%d %%", callback=self.bootstrap_changed) gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) gui.button(rbox, self, "Apply", callback=self.apply) self.cbox = gui.widgetBox(self.controlArea, "Target class") self.class_selection_combo = gui.comboBox(self.cbox, self, "class_selection", items=[], callback=self._select_class, sendSelectedValue=True, valueType=str) gui.rubber(self.controlArea) self.view = QTreeView(rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers) header = self.view.header() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel() self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) self._update_header() box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def set_learner(self, learner, key): if key in self.learners and learner is None: del self.learners[key] else: self.learners[key] = Input(learner, None, ()) self._update_stats_model() def set_train_data(self, data): self.error(0) if data is not None: if data.domain.class_var is None: self.error(0, "Train data input requires a class variable") data = None self.train_data = data self.closeContext() self.class_selection = "(None)" self.openContext(data.domain.class_var) self._update_class_selection() self._update_header() self._invalidate() def set_test_data(self, data): self.error(1) if data is not None: if data.domain.class_var is None: self.error(1, "Test data input requires a class variable") data = None self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def handleNewSignals(self): self.update_results() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def bootstrap_changed(self): self.resampling = OWTestLearners.Bootstrap self._param_changed() def _param_changed(self): self._invalidate() def update_results(self): self.warning([1, 2]) self.error(2) if self.train_data is None: return if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != \ self.train_data.domain.class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, input) for key, input in self.learners.items() if input.results is None] learners = [input.learner for _, input in items] self.setStatusMessage("Running") if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning( 1, "Test data is present but unused. " "Select 'Test on test data' to use it.") # TODO: Test each learner individually if self.resampling == OWTestLearners.KFold: results = Orange.evaluation.CrossValidation(self.train_data, learners, k=self.k_folds, store_data=True) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut(self.train_data, learners, store_data=True) elif self.resampling == OWTestLearners.Bootstrap: p = self.sample_p / 100.0 results = Orange.evaluation.Bootstrap(self.train_data, learners, n_resamples=self.n_repeat, p=p, store_data=True) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData(self.train_data, learners, store_data=True) elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: return results = Orange.evaluation.TestOnTestData(self.train_data, self.test_data, learners, store_data=True) else: assert False self.results = results results = list(split_by_model(results)) class_var = self.train_data.domain.class_var if is_discrete(class_var): stats = [ classification_stats(self.one_vs_rest(res)) for res in results ] else: stats = [regression_stats(res) for res in results] self._update_header() for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model() def _update_header(self): headers = ["Method"] if self.train_data is not None: if is_discrete(self.train_data.domain.class_var): headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): model = self.view.model() for r in reversed(range(model.rowCount())): model.takeRow(r) for input in self.learners.values(): name = learner_name(input.learner) row = [] head = QStandardItem() head.setData(name, Qt.DisplayRole) row.append(head) for stat in input.stats: item = QStandardItem() item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole) row.append(item) model.appendRow(row) def _update_class_selection(self): if is_discrete(self.train_data.domain.class_var): self.cbox.setVisible(True) values = self.train_data.domain.class_var.values self.class_selection_combo.clear() self.class_selection_combo.addItem("(None)") self.class_selection_combo.addItems(values) class_index = 0 if self.class_selection != '(None)' and self.class_selection != 0: class_index = self.train_data.domain.class_var.values.index( self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.previous_class_selection = "(None)" else: self.cbox.setVisible(False) def one_vs_rest(self, res): if self.class_selection != '(None)' and self.class_selection != 0: class_ = self.train_data.domain.class_var.values.index( self.class_selection) actual = res.actual == class_ predicted = res.predicted == class_ return Results(nmethods=1, domain=self.train_data.domain, actual=actual, predicted=predicted) else: return res def _select_class(self): if self.previous_class_selection == self.class_selection: return results = list(split_by_model(self.results)) items = [(key, input) for key, input in self.learners.items()] learners = [input.learner for _, input in items] class_var = self.train_data.domain.class_var if is_discrete(class_var): stats = [ classification_stats(self.one_vs_rest(res)) for res in results ] else: stats = [regression_stats(res) for res in results] for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model() self.previous_class_selection = self.class_selection def _invalidate(self, which=None): if which is None: which = self.learners.keys() all_keys = list(self.learners.keys()) model = self.view.model() for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in self.learners: row = all_keys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) def apply(self): self.update_results() self.commit() def commit(self): results = [ val.results for val in self.learners.values() if val.results is not None ] if results: combined = results_merge(results) combined.learner_names = [ learner_name(val.learner) for val in self.learners.values() ] else: combined = None self.send("Evaluation Results", combined)
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Results) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") wrong_targets = Msg( "Some model(s) predict a different target (see more ...)\n{}") class Error(OWWidget.Error): predictor_failed = Msg("Some predictor(s) failed (see more ...)\n{}") scorer_failed = Msg("Some scorer(s) failed (see more ...)\n{}") settingsHandler = settings.ClassValuesContextHandler() score_table = settings.SettingProvider(ScoreTable) #: List of selected class value indices in the `class_values` list selected_classes = settings.ContextSetting([]) selection = settings.Setting([], schema_only=True) def __init__(self): super().__init__() self.data = None # type: Optional[Orange.data.Table] self.predictors = {} # type: Dict[object, PredictorSlot] self.class_values = [] # type: List[str] self._delegates = [] self.left_width = 10 self.selection_store = None self.__pending_selection = self.selection self._set_input_summary() self._set_output_summary(None) gui.listBox(self.controlArea, self, "selected_classes", "class_values", box="Show probabibilities for", callback=self._update_prediction_delegate, selectionMode=QListWidget.ExtendedSelection, addSpace=False, sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred)) gui.rubber(self.controlArea) self.reset_button = gui.button( self.controlArea, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") table_opts = dict(horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.ExtendedSelection, focusPolicy=Qt.StrongFocus) self.dataview = TableView(sortingEnabled=True, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, **table_opts) self.predictionsview = TableView( sortingEnabled=True, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, **table_opts) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.dataview.setItemDelegate(DataItemDelegate(self.dataview)) self.splitter = QSplitter(orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2) self.splitter.splitterMoved.connect(self.splitter_resized) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.score_table = ScoreTable(self) self.vsplitter = gui.vBox(self.mainArea) self.vsplitter.layout().addWidget(self.splitter) self.vsplitter.layout().addWidget(self.score_table.view) def get_selection_store(self, proxy): # Both proxies map the same, so it doesn't matter which one is used # to initialize SharedSelectionStore if self.selection_store is None: self.selection_store = SharedSelectionStore(proxy) return self.selection_store @Inputs.data @check_sql_input def set_data(self, data): self.Warning.empty_data(shown=data is not None and not data) self.data = data self.selection_store = None if not data: self.dataview.setModel(None) self.predictionsview.setModel(None) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = SortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) sel_model = SharedSelectionModel( self.get_selection_store(modelproxy), modelproxy, self.dataview) self.dataview.setSelectionModel(sel_model) if self.__pending_selection is not None: self.selection = self.__pending_selection self.__pending_selection = None self.selection_store.select_rows( set(self.selection), QItemSelectionModel.ClearAndSelect) sel_model.selectionChanged.connect(self.commit) sel_model.selectionChanged.connect(self._store_selection) self.dataview.model().list_sorted.connect( partial(self._update_data_sort_order, self.dataview, self.predictionsview)) self._invalidate_predictions() def _store_selection(self): self.selection = list(self.selection_store.rows) @property def class_var(self): return self.data and self.data.domain.class_var # pylint: disable=redefined-builtin @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = PredictorSlot(predictor, predictor.name, None) def _set_class_values(self): class_values = [] for slot in self.predictors.values(): class_var = slot.predictor.domain.class_var if class_var and class_var.is_discrete: for value in class_var.values: if value not in class_values: class_values.append(value) if self.class_var and self.class_var.is_discrete: values = self.class_var.values self.class_values = sorted(class_values, key=lambda val: val not in values) self.selected_classes = [ i for i, name in enumerate(class_values) if name in values ] else: self.class_values = class_values # This assignment updates listview self.selected_classes = [] def handleNewSignals(self): self._set_class_values() self._call_predictors() self._update_scores() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._set_input_summary() self.commit() def _call_predictors(self): if not self.data: return if self.class_var: domain = self.data.domain classless_data = self.data.transform( Domain(domain.attributes, None, domain.metas)) else: classless_data = self.data for inputid, slot in self.predictors.items(): if isinstance(slot.results, Results): continue predictor = slot.predictor try: if predictor.domain.class_var.is_discrete: pred, prob = predictor(classless_data, Model.ValueProbs) else: pred = predictor(classless_data, Model.Value) prob = numpy.zeros((len(pred), 0)) except (ValueError, DomainTransformationError) as err: self.predictors[inputid] = \ slot._replace(results=f"{predictor.name}: {err}") continue results = Results() results.data = self.data results.domain = self.data.domain results.row_indices = numpy.arange(len(self.data)) results.folds = (Ellipsis, ) results.actual = self.data.Y results.unmapped_probabilities = prob results.unmapped_predicted = pred results.probabilities = results.predicted = None self.predictors[inputid] = slot._replace(results=results) target = predictor.domain.class_var if target != self.class_var: continue if target is not self.class_var and target.is_discrete: backmappers, n_values = predictor.get_backmappers(self.data) prob = predictor.backmap_probs(prob, n_values, backmappers) pred = predictor.backmap_value(pred, prob, n_values, backmappers) results.predicted = pred.reshape((1, len(self.data))) results.probabilities = prob.reshape((1, ) + prob.shape) def _update_scores(self): model = self.score_table.model model.clear() scorers = usable_scorers(self.class_var) if self.class_var else [] self.score_table.update_header(scorers) errors = [] for inputid, pred in self.predictors.items(): results = self.predictors[inputid].results if not isinstance(results, Results) or results.predicted is None: continue row = [ QStandardItem(learner_name(pred.predictor)), QStandardItem("N/A"), QStandardItem("N/A") ] for scorer in scorers: item = QStandardItem() try: score = scorer_caller(scorer, results)()[0] item.setText(f"{score:.3f}") except Exception as exc: # pylint: disable=broad-except item.setToolTip(str(exc)) if scorer.name in self.score_table.shown_scores: errors.append(str(exc)) row.append(item) self.score_table.model.appendRow(row) view = self.score_table.view if model.rowCount(): view.setVisible(True) view.ensurePolished() view.setFixedHeight(5 + view.horizontalHeader().height() + view.verticalHeader().sectionSize(0) * model.rowCount()) else: view.setVisible(False) self.Error.scorer_failed("\n".join(errors), shown=bool(errors)) def _set_errors(self): # Not all predictors are run every time, so errors can't be collected # in _call_predictors errors = "\n".join(f"- {p.predictor.name}: {p.results}" for p in self.predictors.values() if isinstance(p.results, str) and p.results) self.Error.predictor_failed(errors, shown=bool(errors)) if self.class_var: inv_targets = "\n".join( f"- {pred.name} predicts '{pred.domain.class_var.name}'" for pred in (p.predictor for p in self.predictors.values() if isinstance(p.results, Results) and p.results.probabilities is None)) self.Warning.wrong_targets(inv_targets, shown=bool(inv_targets)) else: self.Warning.wrong_targets.clear() def _set_input_summary(self): if not self.data and not self.predictors: self.info.set_input_summary(self.info.NoInput) return summary = len(self.data) if self.data else 0 details = self._get_details() self.info.set_input_summary(summary, details, format=Qt.RichText) def _get_details(self): details = "Data:<br>" details += format_summary_details(self.data).replace('\n', '<br>') if \ self.data else "No data on input." details += "<hr>" pred_names = [v.name for v in self.predictors.values()] n_predictors = len(self.predictors) if n_predictors: n_valid = len(self._non_errored_predictors()) details += plural("Model: {number} model{s}", n_predictors) if n_valid != n_predictors: details += f" ({n_predictors - n_valid} failed)" details += "<ul>" for name in pred_names: details += f"<li>{name}</li>" details += "</ul>" else: details += "Model:<br>No model on input." return details def _set_output_summary(self, output): summary = len(output) if output else self.info.NoOutput details = format_summary_details(output) if output else "" self.info.set_output_summary(summary, details) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _non_errored_predictors(self): return [ p for p in self.predictors.values() if isinstance(p.results, Results) ] def _reordered_probabilities(self, prediction): cur_values = prediction.predictor.domain.class_var.values new_ind = [self.class_values.index(x) for x in cur_values] probs = prediction.results.unmapped_probabilities new_probs = numpy.full((probs.shape[0], len(self.class_values)), numpy.nan) new_probs[:, new_ind] = probs return new_probs def _update_predictions_model(self): results = [] headers = [] for p in self._non_errored_predictors(): values = p.results.unmapped_predicted target = p.predictor.domain.class_var if target.is_discrete: # order probabilities in order from Show prob. for prob = self._reordered_probabilities(p) values = [Value(target, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) headers.append(p.predictor.name) if results: results = list(zip(*(zip(*res) for res in results))) model = PredictionsModel(results, headers) else: model = None if self.selection_store is not None: self.selection_store.unregister( self.predictionsview.selectionModel()) predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setModel(predmodel) self.predictionsview.setSelectionModel( SharedSelectionModel(self.get_selection_store(predmodel), predmodel, self.predictionsview)) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) self.predictionsview.model().list_sorted.connect( partial(self._update_data_sort_order, self.predictionsview, self.dataview)) self.predictionsview.resizeColumnsToContents() def _update_data_sort_order(self, sort_source_view, sort_dest_view): sort_dest = sort_dest_view.model() sort_source = sort_source_view.model() sortindicatorshown = False if sort_dest is not None: assert isinstance(sort_dest, QSortFilterProxyModel) n = sort_dest.rowCount() if sort_source is not None and sort_source.sortColumn() >= 0: sortind = numpy.argsort([ sort_source.mapToSource(sort_source.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None sort_dest.setSortIndices(sortind) sort_dest_view.horizontalHeader().setSortIndicatorShown(False) sort_source_view.horizontalHeader().setSortIndicatorShown( sortindicatorshown) self.commit() def _reset_order(self): datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.setSortIndices(None) datamodel.sort(-1) if predmodel is not None: predmodel.setSortIndices(None) predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) self.dataview.horizontalHeader().setSortIndicatorShown(False) def _all_color_values(self): """ Return list of colors together with their values from all predictors classes. Colors and values are sorted according to the values order for simpler comparison. """ predictors = self._non_errored_predictors() color_values = [ list( zip(*sorted(zip(p.predictor.domain.class_var.colors, p.predictor.domain.class_var.values), key=itemgetter(1)))) for p in predictors if p.predictor.domain.class_var.is_discrete ] return color_values if color_values else [([], [])] @staticmethod def _colors_match(colors1, values1, color2, values2): """ Test whether colors for values match. Colors matches when all values match for shorter list and colors match for shorter list. It is assumed that values will be sorted together with their colors. """ shorter_length = min(len(colors1), len(color2)) return (values1[:shorter_length] == values2[:shorter_length] and (numpy.array(colors1[:shorter_length]) == numpy.array( color2[:shorter_length])).all()) def _get_colors(self): """ Defines colors for values. If colors match in all models use the union otherwise use standard colors. """ all_colors_values = self._all_color_values() base_color, base_values = all_colors_values[0] for c, v in all_colors_values[1:]: if not self._colors_match(base_color, base_values, c, v): base_color = [] break # replace base_color if longer if len(v) > len(base_color): base_color = c base_values = v if len(base_color) != len(self.class_values): return LimitedDiscretePalette(len(self.class_values)).palette # reorder colors to widgets order colors = [None] * len(self.class_values) for c, v in zip(base_color, base_values): colors[self.class_values.index(v)] = c return colors def _update_prediction_delegate(self): self._delegates.clear() colors = self._get_colors() for col, slot in enumerate(self.predictors.values()): target = slot.predictor.domain.class_var shown_probs = (() if target.is_continuous else [ val if self.class_values[val] in target.values else None for val in self.selected_classes ]) delegate = PredictionsItemDelegate( None if target.is_continuous else self.class_values, colors, shown_probs, target.format_str if target.is_continuous else None, parent=self.predictionsview) # QAbstractItemView does not take ownership of delegates, so we must self._delegates.append(delegate) self.predictionsview.setItemDelegateForColumn(col, delegate) self.predictionsview.setColumnHidden(col, False) self.predictionsview.resizeColumnsToContents() self._recompute_splitter_sizes() if self.predictionsview.model() is not None: self.predictionsview.model().setProbInd(self.selected_classes) def _recompute_splitter_sizes(self): if not self.data: return view = self.predictionsview self.left_width = \ view.horizontalHeader().length() + view.verticalHeader().width() self._update_splitter() def _update_splitter(self): w1, w2 = self.splitter.sizes() self.splitter.setSizes([self.left_width, w1 + w2 - self.left_width]) def splitter_resized(self): self.left_width = self.splitter.sizes()[0] def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = [ p for p in self._non_errored_predictors() if p.results.predicted is not None ] if not slots: self.Outputs.evaluation_results.send(None) return nanmask = numpy.isnan(self.data.get_column_view(self.class_var)[0]) data = self.data[~nanmask] results = Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(len(data)) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results.predicted[0][~nanmask] for p in slots)) if self.class_var and self.class_var.is_discrete: results.probabilities = numpy.array( [p.results.probabilities[0][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): if not self.data: self._set_output_summary(None) self.Outputs.predictions.send(None) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) names = [ var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var ] uniq_newmetas = [] for new_ in newmetas: uniq = get_unique_names(names, new_.name) if uniq != new_.name: new_ = new_.copy(name=uniq) uniq_newmetas.append(new_) names.append(uniq) metas += uniq_newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns index = self.dataview.model().index map_to = self.dataview.model().mapToSource assert self.selection_store is not None rows = None if self.selection_store.rows: rows = [ ind.row() for ind in self.dataview.selectionModel().selectedRows(0) ] rows.sort() elif self.dataview.model().isSorted() \ or self.predictionsview.model().isSorted(): rows = list(range(len(self.data))) if rows: source_rows = [map_to(index(row, 0)).row() for row in rows] predictions = predictions[source_rows] self.Outputs.predictions.send(predictions) self._set_output_summary(predictions) @staticmethod def _add_classification_out_columns(slot, newmetas, newcolumns): # Mapped or unmapped predictions?! # Or provide a checkbox so the user decides? pred = slot.predictor name = pred.name values = pred.domain.class_var.values newmetas.append(DiscreteVariable(name=name, values=values)) newcolumns.append(slot.results.unmapped_predicted.reshape(-1, 1)) newmetas += [ ContinuousVariable(name=f"{name} ({value})") for value in values ] newcolumns.append(slot.results.unmapped_probabilities) @staticmethod def _add_regression_out_columns(slot, newmetas, newcolumns): newmetas.append(ContinuousVariable(name=slot.predictor.name)) newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1))) def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_view = self.predictionsview predictions_model = predictions_view.model() # use ItemDelegate to style prediction values delegates = [ predictions_view.itemDelegateForColumn(i) for i in range(predictions_model.columnCount()) ] # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [delegate.displayText( predictions_model.data(predictions_model.index(i, j)), QLocale()) for j, delegate in enumerate(delegates)] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data: text = self._get_details().replace('\n', '<br>') if self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) self.report_table("Scores", self.score_table.view) def resizeEvent(self, event): super().resizeEvent(event) self._update_splitter() def showEvent(self, event): super().showEvent(event) QTimer.singleShot(0, self._update_splitter)
class OWTestLearners(widget.OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Orange.data.Table), ("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, LeaveOneOut, ShuffleSplit, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Number of repeats for ShuffleSplit sampling n_repeat = settings.Setting(10) #: ShuffleSplit sampling p sample_p = settings.Setting(75) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed ) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self.shuffle_split_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=99, ticks=20, vertical=False, labelFormat="%d %%", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) self.apply_button = gui.button( rbox, self, "Apply", callback=self.apply, default=True) self.cbox = gui.widgetBox(self.controlArea, "Target class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = QTreeView( rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers ) header = self.view.header() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.error(0) self.information(0) if data and not data.domain.class_var: self.error(0, "Train data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(0, "Train data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.train_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning(4, self._get_missing_data_warning( self.train_data_missing_vals, self.test_data_missing_vals )) if data: data = RemoveNaNClasses(data) self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.error(1) self.information(1) if data and not data.domain.class_var: self.error(1, "Test data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(1, "Test data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.test_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning(4, self._get_missing_data_warning( self.train_data_missing_vals, self.test_data_missing_vals )) if data: data = RemoveNaNClasses(data) self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _get_missing_data_warning(self, train_missing, test_missing): return "Instances with unknown target values were removed from{}data"\ .format(train_missing * test_missing * " " or train_missing * " train " or test_missing * " test ") def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.apply() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.warning([1, 2]) self.error([2, 4]) if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning(1, "Test data is present but unused. " "Select 'Test on test data' to use it.") rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict( store_data=True, preprocessor=self.preprocessor, callback=update_progress) self.setStatusMessage("Running") with self.progressBar(): try: if self.resampling == OWTestLearners.KFold: if len(self.data) < self.k_folds: self.error(4, "Number of folds exceeds the data size") return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=self.k_folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(2, warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.sample_p / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.n_repeat, train_size=train_size, test_size=None, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except RuntimeError as e: self.error(2, str(e)) self.setStatusMessage("") return learner_key = {slot.learner: key for key, slot in self.learners.items()} for learner, result in zip(learners, split_by_model(results)): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}" .format(name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest( slot.results.value, target_index) stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) if errors: self.error(3, "\n".join(errors)) else: self.error(3) if has_missing_scores: self.warning(3, "Some scores could not be computed") else: self.warning(3) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.apply_button.setEnabled(True) def apply(self): self.apply_button.setEnabled(False) self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() self.commit() def commit(self): valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [learner_name(slot.learner) for slot in valid] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): if not self.data or not self.learners: return if self.resampling == self.KFold: items = [("Sampling type", "{}-fold Cross validation". format(self.k_folds))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.Bootstrap: items = [("Sampling type", "{} random samples with {} % data ".format( self.n_repeat, self.sample_p))] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view)
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display the predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Orange.evaluation.Results, dynamic=False) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") class Error(OWWidget.Error): predictor_failed = \ Msg("One or more predictors failed (see more...)\n{}") predictors_target_mismatch = \ Msg("Predictors do not have the same target.") data_target_mismatch = \ Msg("Data does not have the same target as predictors.") settingsHandler = settings.ClassValuesContextHandler() #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input.\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.vBox(self.controlArea, "Show", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Predicted probabilities for:", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.vBox(self.controlArea, "Data View") gui.checkBox(box, self, "show_attrs", "Show full dataset", callback=self._update_column_visibility) box = gui.vBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox(box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox(box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox(box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.predictionsview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.mainArea.layout().addWidget(self.splitter) @Inputs.data @check_sql_input def set_data(self, data): """Set the input dataset""" if data is not None and not len(data): data = None self.Warning.empty_data() else: self.Warning.empty_data.clear() self.data = data if data is None: self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() self._invalidate_predictions() @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) def set_class_var(self): pred_classes = set(pred.predictor.domain.class_var for pred in self.predictors.values()) self.Error.predictors_target_mismatch.clear() self.Error.data_target_mismatch.clear() self.class_var = None if len(pred_classes) > 1: self.Error.predictors_target_mismatch() if len(pred_classes) == 1: self.class_var = pred_classes.pop() if self.data is not None and \ self.data.domain.class_var is not None and \ self.class_var != self.data.domain.class_var: self.Error.data_target_mismatch() self.class_var = None discrete_class = self.class_var is not None \ and self.class_var.is_discrete self.classification_options.setVisible(discrete_class) self.closeContext() if discrete_class: self.class_values = list(self.class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] def handleNewSignals(self): self.set_class_var() if self.data is not None: self._call_predictors() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): for inputid, pred in self.predictors.items(): if pred.results is None or numpy.isnan(pred.results[0]).all(): try: results = self.predict(pred.predictor, self.data) except ValueError as err: results = "{}: {}".format(pred.predictor.name, err) self.predictors[inputid] = pred._replace(results=results) def _set_errors(self): errors = "\n".join(p.results for p in self.predictors.values() if isinstance(p.results, str)) if errors: self.Error.predictor_failed(errors) else: self.Error.predictor_failed.clear() def _update_info(self): info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") n_predictors = len(self.predictors) n_valid = len(self._valid_predictors()) if n_valid != n_predictors: info.append("Predictors: {} (+ {} failed)".format( n_valid, n_predictors - n_valid)) else: info.append("Predictors: {}".format(n_predictors or "N/A")) if self.class_var is None: info.append("Task: N/A") elif self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) self.infolabel.setText("\n".join(info)) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _valid_predictors(self): if self.class_var is not None and \ self.data is not None: return [ p for p in self.predictors.values() if p.results is not None and not isinstance(p.results, str) ] else: return [] def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None and self.class_var is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: values, prob = p.results if self.class_var.is_discrete: # if values were added to class_var between building the # model and predicting, add zeros for new class values, # which are always at the end prob = numpy.c_[prob, numpy.zeros( (prob.shape[0], len(class_var.values) - prob.shape[1]))] values = [Value(class_var, v) for v in values] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None and self.class_var is not None: domain = self.data.domain first_attr = len(domain.class_vars) + len(domain.metas) for i in range(first_attr, first_attr + len(domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if domain.class_var: self.dataview.setColumnHidden(0, False) def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort([ predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" if self.class_var is not None: delegate = PredictionsItemDelegate() if self.class_var.is_continuous: self._setup_delegate_continuous(delegate) else: self._setup_delegate_discrete(delegate) proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd( numpy.array(self.selected_classes, dtype=int)) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _setup_delegate_discrete(self, delegate): colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors] fmt = [] if self.show_probabilities: fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i) for i in sorted(self.selected_classes))) if self.show_predictions: fmt.append("{value!s}") delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt)) if self.draw_dist and colors is not None: delegate.setColors(colors) return delegate def _setup_delegate_continuous(self, delegate): delegate.setFormat("{{value:.{}f}}".format( self.class_var.number_of_decimals)) def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = self._valid_predictors() if not slots or self.data.domain.class_var is None: self.Outputs.evaluation_results.send(None) return class_var = self.class_var nanmask = numpy.isnan(self.data.get_column_view(class_var)[0]) data = self.data[~nanmask] N = len(data) results = Orange.evaluation.Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results[0][~nanmask] for p in slots)) if class_var and class_var.is_discrete: results.probabilities = numpy.array( [p.results[1][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): slots = self._valid_predictors() if not slots: self.Outputs.predictions.send(None) return if self.class_var and self.class_var.is_discrete: newmetas, newcolumns = self._classification_output_columns() else: newmetas, newcolumns = self._regression_output_columns() attrs = list(self.data.domain.attributes) if self.output_attrs else [] metas = list(self.data.domain.metas) + newmetas domain = \ Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions) def _classification_output_columns(self): newmetas = [] newcolumns = [] slots = self._valid_predictors() if self.output_predictions: newmetas += [ DiscreteVariable(name=p.name, values=self.class_values) for p in slots ] newcolumns += [p.results[0].reshape((-1, 1)) for p in slots] if self.output_probabilities: newmetas += [ ContinuousVariable(name="%s (%s)" % (p.name, value)) for p in slots for value in self.class_values ] newcolumns += [p.results[1] for p in slots] return newmetas, newcolumns def _regression_output_columns(self): slots = self._valid_predictors() newmetas = [ContinuousVariable(name=p.name) for p in slots] newcolumns = [p.results[0].reshape((-1, 1)) for p in slots] return newmetas, newcolumns def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText( x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data is not None and self.class_var is not None: text = self.infolabel.text().replace('\n', '<br>') if self.show_probabilities and self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) elif class_var.is_continuous: return cls.predict_continuous(predictor, data) @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, [None] * len(data)
class OWPredictions(widget.OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display predictions of models for an input data set." inputs = [("Data", Orange.data.Table, "set_data"), ("Predictors", Model, "set_predictor", widget.Multiple)] outputs = [("Predictions", Orange.data.Table), ("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.widgetBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.widgetBox( self.controlArea, "Options (classification)", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Show predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Show predicted probabilities", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QtGui.QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.widgetBox(self.controlArea, "Data view") gui.checkBox(box, self, "show_attrs", "Show full data set", callback=self._update_column_visibility) box = gui.widgetBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox(box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox(box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox(box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.splitter = QtGui.QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = QtGui.QTableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QtGui.QTableView.ScrollPerPixel, selectionMode=QtGui.QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.predictionsview = QtGui.QTableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QtGui.QTableView.ScrollPerPixel, selectionMode=QtGui.QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.splitter.addWidget(self.dataview) self.splitter.addWidget(self.predictionsview) self.mainArea.layout().addWidget(self.splitter) self.spliter_restore_state = int(self.show_attrs), 300 @check_sql_input def set_data(self, data): """Set the input data set""" self.data = data if data is None: self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() self.dataview.scrollTo( modelproxy.index(0, len(data.domain.attributes))) self.invalidate_predictions() def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) if predictor is not None: self.class_var = predictor.domain.class_var def handleNewSignals(self): self.error(0) if self.data is not None: for inputid, pred in list(self.predictors.items()): if pred.results is None or numpy.isnan(pred.results[0]).all(): try: results = self.predict(pred.predictor, self.data) except ValueError as err: err_msg = '{}:\n'.format(pred.predictor.name) + \ str(err) self.error(0, err_msg) n, m = len(self.data), 1 if self.data.domain.has_discrete_class: m = len(self.data.domain.class_var.values) probabilities = numpy.full((n, m), numpy.nan) results = (numpy.full(n, numpy.nan), probabilities) self.predictors[inputid] = pred._replace(results=results) if not self.predictors: self.class_var = None self.classification_options.setVisible(self.class_var is not None and self.class_var.is_discrete) self.closeContext() if self.class_var is not None and self.class_var.is_discrete: self.class_values = list(self.class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] self._update_predictions_model() self._update_prediction_delegate() # Check for prediction target consistency target_vars = set( [p.predictor.domain.class_var for p in self.predictors.values()]) if len(target_vars) > 1: self.warning(0, "Inconsistent class variables") else: self.warning(0) # Update the Info box text. info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") if self.predictors: info.append("Predictors: {}".format(len(self.predictors))) else: info.append("Predictors: N/A") if self.class_var is not None: if self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) else: info.append("Task: N/A") self.infolabel.setText("\n".join(info)) self.commit() def invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None: slots = self.predictors.values() results = [] for p in slots: values, prob = p.results if p.predictor.domain.class_var.is_discrete: values = [ Orange.data.Value(p.predictor.domain.class_var, v) for v in values ] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None: for i in range(len(self.data.domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if self.data.domain.class_var: self.dataview.setColumnHidden(len(self.data.domain.attributes), False) self._update_spliter() def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort([ predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" delegate = PredictionsItemDelegate() colors = None if self.class_var is not None: if self.class_var.is_discrete: colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors] dist_fmt = "" pred_fmt = "" if self.show_probabilities: decimals = 2 float_fmt = "{{dist[{}]:.{}f}}" dist_fmt = " : ".join( float_fmt.format(i, decimals) for i in range(len(self.class_var.values)) if i in self.selected_classes) if self.show_predictions: pred_fmt = "{value!s}" if pred_fmt and dist_fmt: fmt = dist_fmt + " \N{RIGHTWARDS ARROW} " + pred_fmt else: fmt = dist_fmt or pred_fmt else: assert isinstance(self.class_var, ContinuousVariable) fmt = "{{value:.{}f}}".format( self.class_var.number_of_decimals) delegate.setFormat(fmt) if self.draw_dist and colors is not None: delegate.setColors(colors) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() if self.class_var is not None and self.class_var.is_discrete: proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd(numpy.array(self.selected_classes, dtype=int)) def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() def widthForColumns(view, start=0, end=None): h_header = view.horizontalHeader() v_header = view.verticalHeader() width = sum([ h_header.sectionSize(i) for i in range(h_header.count())[start:end] ]) return v_header.width() + width if not self.show_attrs: w1, w2 = self.splitter.sizes() # w = widthHint(self.dataview) w = width(self.dataview) + 4 self.splitter.setSizes([w, w1 + w2 - w]) self.dataview.setMaximumWidth(w) state, w = self.spliter_restore_state if state == 0: # save dataview width on change from 'show all' to 'hide all' self.spliter_restore_state = 1, w1 else: w1, w2 = self.splitter.sizes() state, w = self.spliter_restore_state if state == 1: # restore dataview on change from 'hide all' to 'show all' # extend the dataview to the saved width but no further # then 2/3 of the available space w = min(w, (w1 + w2) * 2 // 3) else: # shrink the dataview width if its contents are smaller then # its width w1, w2 = self.splitter.sizes() w = widthForColumns(self.dataview, -2) + 4 w = min(w, (w1 + w2) // 2) predw = widthForColumns(self.predictionsview) w = max(w, min(w1 + w2 - predw - 20, w1 + w2 - w)) self.splitter.setSizes([w, w1 + w2 - w]) self.dataview.setMaximumWidth(QWIDGETSIZE_MAX) self.spliter_restore_state = 0, w self.dataview.horizontalScrollBar().triggerAction( QtGui.QScrollBar.SliderToMaximum) def commit(self): if self.data is None or not self.predictors: self.send("Predictions", None) self.send("Evaluation Results", None) return predictor = next(iter(self.predictors.values())).predictor class_var = predictor.domain.class_var classification = class_var and class_var.is_discrete newmetas = [] newcolumns = [] slots = list(self.predictors.values()) if classification: if self.output_predictions: mc = [ DiscreteVariable(name=p.name, values=class_var.values) for p in slots ] newmetas.extend(mc) newcolumns.extend(p.results[0].reshape((-1, 1)) for p in slots) if self.output_probabilities: for p in slots: m = [ ContinuousVariable(name="%s(%s)" % (p.name, value)) for value in class_var.values ] newmetas.extend(m) newcolumns.extend(p.results[1] for p in slots) else: # regression mc = [ ContinuousVariable(name=p.name) for p in self.predictors.values() ] newmetas.extend(mc) newcolumns.extend(p.results[0].reshape((-1, 1)) for p in slots) if self.output_attrs: attrs = list(self.data.domain.attributes) else: attrs = [] metas = list(self.data.domain.metas) + newmetas domain = Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas) predictions = self.data.from_table(domain, self.data) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns results = None if self.data.domain.class_var == class_var: N = len(self.data) results = Orange.evaluation.Results(self.data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) results.actual = self.data.Y.ravel() results.predicted = numpy.vstack(tuple(p.results[0] for p in slots)) if classification: results.probabilities = numpy.array( [p.results[1] for p in slots]) results.learner_names = [p.name for p in slots] self.send("Predictions", predictions) self.send("Evaluation Results", results) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) elif class_var.is_continuous: return cls.predict_continuous(predictor, data) @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, [None] * len(data)
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Results) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") wrong_targets = Msg( "Some model(s) predict a different target (see more ...)\n{}") class Error(OWWidget.Error): predictor_failed = Msg("Some predictor(s) failed (see more ...)\n{}") scorer_failed = Msg("Some scorer(s) failed (see more ...)\n{}") settingsHandler = settings.ClassValuesContextHandler() score_table = settings.SettingProvider(ScoreTable) #: List of selected class value indices in the `class_values` list selected_classes = settings.ContextSetting([]) def __init__(self): super().__init__() self.data = None # type: Optional[Orange.data.Table] self.predictors = {} # type: Dict[object, PredictorSlot] self.class_values = [] # type: List[str] self._delegates = [] gui.listBox(self.controlArea, self, "selected_classes", "class_values", box="Show probabibilities for", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False, sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred)) gui.rubber(self.controlArea) gui.button(self.controlArea, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") table_opts = dict(horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.dataview = TableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, **table_opts) self.predictionsview = TableView( sortingEnabled=True, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, **table_opts) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.splitter = QSplitter(orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.score_table = ScoreTable(self) self.vsplitter = gui.vBox(self.mainArea) self.vsplitter.layout().addWidget(self.splitter) self.vsplitter.layout().addWidget(self.score_table.view) @Inputs.data @check_sql_input def set_data(self, data): self.Warning.empty_data(shown=data is not None and not data) self.data = data if not data: self.dataview.setModel(None) self.predictionsview.setModel(None) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._invalidate_predictions() @property def class_var(self): return self.data and self.data.domain.class_var # pylint: disable=redefined-builtin @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = PredictorSlot(predictor, predictor.name, None) def _set_class_values(self): class_values = [] for slot in self.predictors.values(): class_var = slot.predictor.domain.class_var if class_var and class_var.is_discrete: for value in class_var.values: if value not in class_values: class_values.append(value) if self.class_var and self.class_var.is_discrete: values = self.class_var.values self.class_values = sorted(class_values, key=lambda val: val not in values) self.selected_classes = [ i for i, name in enumerate(class_values) if name in values ] else: self.class_values = class_values # This assignment updates listview self.selected_classes = [] def handleNewSignals(self): self._set_class_values() self._call_predictors() self._update_scores() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): if not self.data: return if self.class_var: domain = self.data.domain classless_data = self.data.transform( Domain(domain.attributes, None, domain.metas)) else: classless_data = self.data for inputid, slot in self.predictors.items(): if isinstance(slot.results, Results): continue predictor = slot.predictor try: if predictor.domain.class_var.is_discrete: pred, prob = predictor(classless_data, Model.ValueProbs) else: pred = predictor(classless_data, Model.Value) prob = numpy.zeros((len(pred), 0)) except (ValueError, DomainTransformationError) as err: self.predictors[inputid] = \ slot._replace(results=f"{predictor.name}: {err}") continue results = Results() results.data = self.data results.domain = self.data.domain results.row_indices = numpy.arange(len(self.data)) results.folds = (Ellipsis, ) results.actual = self.data.Y results.unmapped_probabilities = prob results.unmapped_predicted = pred results.probabilities = results.predicted = None self.predictors[inputid] = slot._replace(results=results) target = predictor.domain.class_var if target != self.class_var: continue if target is not self.class_var and target.is_discrete: backmappers, n_values = predictor.get_backmappers(self.data) prob = predictor.backmap_probs(prob, n_values, backmappers) pred = predictor.backmap_value(pred, prob, n_values, backmappers) results.predicted = pred.reshape((1, len(self.data))) results.probabilities = prob.reshape((1, ) + prob.shape) def _update_scores(self): model = self.score_table.model model.clear() scorers = usable_scorers(self.class_var) if self.class_var else [] self.score_table.update_header(scorers) errors = [] for inputid, pred in self.predictors.items(): results = self.predictors[inputid].results if not isinstance(results, Results) or results.predicted is None: continue row = [ QStandardItem(learner_name(pred.predictor)), QStandardItem("N/A"), QStandardItem("N/A") ] for scorer in scorers: item = QStandardItem() try: score = scorer_caller(scorer, results)()[0] item.setText(f"{score:.3f}") except Exception as exc: # pylint: disable=broad-except item.setToolTip(str(exc)) if scorer.name in self.score_table.shown_scores: errors.append(str(exc)) row.append(item) self.score_table.model.appendRow(row) view = self.score_table.view if model.rowCount(): view.setVisible(True) view.ensurePolished() view.setFixedHeight(5 + view.horizontalHeader().height() + view.verticalHeader().sectionSize(0) * model.rowCount()) else: view.setVisible(False) self.Error.scorer_failed("\n".join(errors), shown=bool(errors)) def _set_errors(self): # Not all predictors are run every time, so errors can't be collected # in _call_predictors errors = "\n".join(f"- {p.predictor.name}: {p.results}" for p in self.predictors.values() if isinstance(p.results, str) and p.results) self.Error.predictor_failed(errors, shown=bool(errors)) if self.class_var: inv_targets = "\n".join( f"- {pred.name} predicts '{pred.domain.class_var.name}'" for pred in (p.predictor for p in self.predictors.values() if isinstance(p.results, Results) and p.results.probabilities is None)) self.Warning.wrong_targets(inv_targets, shown=bool(inv_targets)) else: self.Warning.wrong_targets.clear() def _update_info(self): n_predictors = len(self.predictors) if not self.data and not n_predictors: self.info.set_input_summary(self.info.NoInput) return n_valid = len(self._non_errored_predictors()) summary = str(len(self.data)) if self.data else "0" details = f"{len(self.data)} instances" if self.data else "No data" details += f"\n{n_predictors} models" if n_predictors else "No models" if n_valid != n_predictors: details += f" ({n_predictors - n_valid} failed)" self.info.set_input_summary(summary, details) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _non_errored_predictors(self): return [ p for p in self.predictors.values() if isinstance(p.results, Results) ] def _update_predictions_model(self): results = [] headers = [] for p in self._non_errored_predictors(): values = p.results.unmapped_predicted target = p.predictor.domain.class_var if target.is_discrete: prob = p.results.unmapped_probabilities values = [Value(target, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) headers.append(p.predictor.name) if results: results = list(zip(*(zip(*res) for res in results))) model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_data_sort_order(self): datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort([ predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): selected = {self.class_values[i] for i in self.selected_classes} self._delegates.clear() for col, slot in enumerate(self.predictors.values()): target = slot.predictor.domain.class_var shown_probs = () if target.is_continuous else \ [i for i, name in enumerate(target.values) if name in selected] delegate = PredictionsItemDelegate(target, shown_probs) # QAbstractItemView does not take ownership of delegates, so we must self._delegates.append(delegate) self.predictionsview.setItemDelegateForColumn(col, delegate) self.predictionsview.setColumnHidden(col, False) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _update_spliter(self): if not self.data: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = [ p for p in self._non_errored_predictors() if p.results.predicted is not None ] if not slots: self.Outputs.evaluation_results.send(None) return nanmask = numpy.isnan(self.data.get_column_view(self.class_var)[0]) data = self.data[~nanmask] results = Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(len(data)) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results.predicted[0][~nanmask] for p in slots)) if self.class_var and self.class_var.is_discrete: results.probabilities = numpy.array( [p.results.probabilities[0][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): if not self.data: self.Outputs.predictions.send(None) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) + newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions) @staticmethod def _add_classification_out_columns(slot, newmetas, newcolumns): # Mapped or unmapped predictions?! # Or provide a checkbox so the user decides? pred = slot.predictor name = pred.name values = pred.domain.class_var.values newmetas.append(DiscreteVariable(name=name, values=values)) newcolumns.append(slot.results.unmapped_predicted.reshape(-1, 1)) newmetas += [ ContinuousVariable(name=f"{name} ({value})") for value in values ] newcolumns.append(slot.results.unmapped_probabilities) @staticmethod def _add_regression_out_columns(slot, newmetas, newcolumns): newmetas.append(ContinuousVariable(name=slot.predictor.name)) newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1))) def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText( x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data: text = self.infolabel.text().replace('\n', '<br>') if self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1)
class OWPythagorasTree(OWWidget): name = 'Pythagorean Tree' description = 'Pythagorean Tree visualization for tree like-structures.' icon = 'icons/PythagoreanTree.svg' keywords = ["fractal"] priority = 1000 class Inputs: tree = Input("Tree", TreeModel) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) # Enable the save as feature graph_name = 'scene' # Settings settingsHandler = settings.ClassValuesContextHandler() depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) size_log_scale = settings.Setting(2) tooltips_enabled = settings.Setting(True) show_legend = settings.Setting(False) LEGEND_OPTIONS = { 'corner': Anchorable.BOTTOM_RIGHT, 'offset': (10, 10), } def __init__(self): super().__init__() # Instance variables self.model = None self.data = None # The tree adapter instance which is passed from the outside self.tree_adapter = None self.legend = None self.color_palette = None # Different methods to calculate the size of squares self.SIZE_CALCULATION = [ ('Normal', lambda x: x), ('Square root', lambda x: sqrt(x)), ('Logarithmic', lambda x: log(x * self.size_log_scale + 1)), ] # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, 'Tree Info') self.infolabel = gui.widgetLabel(box_info) self.info.set_output_summary(self.info.NoOutput) # Display settings area box_display = gui.widgetBox(self.controlArea, 'Display Settings') self.depth_slider = gui.hSlider(box_display, self, 'depth_limit', label='Depth', ticks=False, callback=self.update_depth) self.target_class_combo = gui.comboBox(box_display, self, 'target_class_index', label='Target class', orientation=Qt.Horizontal, items=[], contentsLength=8, searchable=True, callback=self.update_colors) self.size_calc_combo = gui.comboBox( box_display, self, 'size_calc_idx', label='Size', orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, callback=self.update_size_calc) self.log_scale_box = gui.hSlider(box_display, self, 'size_log_scale', label='Log scale factor', minValue=1, maxValue=100, ticks=False, callback=self.invalidate_tree) # Plot properties area box_plot = gui.widgetBox(self.controlArea, 'Plot Properties') self.cb_show_tooltips = gui.checkBox( box_plot, self, 'tooltips_enabled', label='Enable tooltips', callback=self.update_tooltip_enabled) self.cb_show_legend = gui.checkBox(box_plot, self, 'show_legend', label='Show legend', callback=self.update_show_legend) gui.rubber(self.controlArea) gui.button(self.buttonsArea, self, label="Redraw", callback=self.redraw) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.scene = TreeGraphicsScene(self) self.scene.selectionChanged.connect(self.commit) self.view = TreeGraphicsView(self.scene, padding=(150, 150)) self.view.setRenderHint(QPainter.Antialiasing, True) self.mainArea.layout().addWidget(self.view) self.ptree = PythagorasTreeViewer(self) self.scene.addItem(self.ptree) self.view.set_central_widget(self.ptree) self.resize(800, 500) # Clear the widget to correctly set the intial values self.clear() @Inputs.tree def set_tree(self, model=None): """When a different tree is given.""" self.closeContext() self.clear() self.model = model if model is not None: self.data = model.instances self._update_target_class_combo() self.tree_adapter = self._get_tree_adapter(self.model) self.ptree.clear() self.ptree.set_tree( self.tree_adapter, weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1], target_class_index=self.target_class_index, ) self._update_depth_slider() self.color_palette = self.ptree.root.color_palette self._update_legend_colors() self._update_legend_visibility() self._update_info_box() self._update_main_area() self.openContext( model.domain.class_var if model.domain is not None else None) self.update_depth() # The forest widget sets the following attributes on the tree, # describing the settings on the forest widget. To keep the tree # looking the same as on the forest widget, we prefer these settings to # context settings, if set. if hasattr(model, "meta_target_class_index"): self.target_class_index = model.meta_target_class_index self.update_colors() if hasattr(model, "meta_size_calc_idx"): self.size_calc_idx = model.meta_size_calc_idx self.update_size_calc() if hasattr(model, "meta_depth_limit"): self.depth_limit = model.meta_depth_limit self.update_depth() self.Outputs.annotated_data.send( create_annotated_table(self.data, None)) def clear(self): """Clear all relevant data from the widget.""" self.model = None self.data = None self.tree_adapter = None if self.legend is not None: self.scene.removeItem(self.legend) self.legend = None self.ptree.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() self._update_log_scale_slider() def update_depth(self): """This method should be called when the depth changes""" self.ptree.set_depth_limit(self.depth_limit) def update_colors(self): """When the target class / node coloring needs to be updated.""" self.ptree.target_class_changed(self.target_class_index) self._update_legend_colors() def update_size_calc(self): """When the tree size calculation is updated.""" self._update_log_scale_slider() self.invalidate_tree() def redraw(self): if self.data is None: return self.tree_adapter.shuffle_children() self.invalidate_tree() def invalidate_tree(self): """When the tree needs to be completely recalculated.""" if self.model is not None: self.ptree.set_tree( self.tree_adapter, weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1], target_class_index=self.target_class_index, ) self.ptree.set_depth_limit(self.depth_limit) self._update_main_area() def update_tooltip_enabled(self): """When the tooltip visibility is changed and need to be updated.""" self.ptree.tooltip_changed(self.tooltips_enabled) def update_show_legend(self): """When the legend visibility needs to be updated.""" self._update_legend_visibility() def _update_info_box(self): self.infolabel.setText('Nodes: {}\nDepth: {}'.format( self.tree_adapter.num_nodes, self.tree_adapter.max_depth)) def _update_depth_slider(self): self.depth_slider.parent().setEnabled(True) self.depth_slider.setMaximum(self.tree_adapter.max_depth) self._set_max_depth() def _update_legend_visibility(self): if self.legend is not None: self.legend.setVisible(self.show_legend) def _update_log_scale_slider(self): """On calc method combo box changed.""" self.log_scale_box.parent().setEnabled( self.SIZE_CALCULATION[self.size_calc_idx][0] == 'Logarithmic') def _clear_info_box(self): self.infolabel.setText('No tree on input') def _clear_depth_slider(self): self.depth_slider.parent().setEnabled(False) self.depth_slider.setMaximum(0) def _clear_target_class_combo(self): self.target_class_combo.clear() self.target_class_index = -1 def _set_max_depth(self): """Set the depth to the max depth and update appropriate actors.""" self.depth_limit = self.tree_adapter.max_depth self.depth_slider.setValue(self.depth_limit) def _update_main_area(self): # refresh the scene rect, cuts away the excess whitespace, and adds # padding for panning. self.scene.setSceneRect(self.view.central_widget_rect()) # reset the zoom level self.view.recalculate_and_fit() self.view.update_anchored_items() def _get_tree_adapter(self, model): if isinstance(model, SklModel): return SklTreeAdapter(model) return TreeAdapter(model) def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self): """Commit the selected data to output.""" if self.data is None: self.info.set_output_summary(self.info.NoOutput) self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) return nodes = [ i.tree_node.label for i in self.scene.selectedItems() if isinstance(i, SquareGraphicsItem) ] data = self.tree_adapter.get_instances_in_nodes(nodes) summary = len(data) if data else self.info.NoOutput details = format_summary_details(data) if data else "" self.info.set_output_summary(summary, details) self.Outputs.selected_data.send(data) selected_indices = self.tree_adapter.get_indices(nodes) self.Outputs.annotated_data.send( create_annotated_table(self.data, selected_indices)) def send_report(self): """Send report.""" self.report_plot() def _update_target_class_combo(self): self._clear_target_class_combo() label = [ x for x in self.target_class_combo.parent().children() if isinstance(x, QLabel) ][0] if self.data.domain.has_discrete_class: label_text = 'Target class' values = [c.title() for c in self.data.domain.class_vars[0].values] values.insert(0, 'None') else: label_text = 'Node color' values = list(ContinuousTreeNode.COLOR_METHODS.keys()) label.setText(label_text) self.target_class_combo.addItems(values) # set it to 0, context will change if required self.target_class_index = 0 def _update_legend_colors(self): if self.legend is not None: self.scene.removeItem(self.legend) if self.data.domain.has_discrete_class: self._classification_update_legend_colors() else: self._regression_update_legend_colors() def _classification_update_legend_colors(self): if self.target_class_index == 0: self.legend = OWDiscreteLegend(domain=self.model.domain, **self.LEGEND_OPTIONS) else: items = ((self.target_class_combo.itemText( self.target_class_index), self.color_palette[self.target_class_index - 1]), ('other', QColor('#ffffff'))) self.legend = OWDiscreteLegend(items=items, **self.LEGEND_OPTIONS) self.legend.setVisible(self.show_legend) self.scene.addItem(self.legend) def _regression_update_legend_colors(self): # The colors are the class mean palette = self.model.domain.class_var.palette if self.target_class_index == 1: items = ((np.min(self.data.Y), np.max(self.data.Y)), palette) # Colors are the stddev elif self.target_class_index == 2: items = ((0, np.std(self.data.Y)), palette) else: items = None self.legend = OWContinuousLegend(items=items, **self.LEGEND_OPTIONS) self.legend.setVisible(self.show_legend) self.scene.addItem(self.legend)
class OWPythagoreanForest(OWWidget): name = "毕达哥拉斯森林(Pythagorean Forest)" description = "毕达哥拉斯森林,用于将随机森林可视化。" icon = "icons/PythagoreanForest.svg" settings_version = 2 keywords = ["fractal", "bidagelasisenlin", "gougusenlin"] category = "可视化(Visualize)" priority = 1001 class Inputs: random_forest = Input("随机森林(Random forest)", RandomForestModel, replaces=["Data"]) class Outputs: tree = Output("树(Tree)", TreeModel, replaces=["Tree"]) # Enable the save as feature graph_name = "scene" # Settings settingsHandler = settings.ClassValuesContextHandler() depth_limit = settings.Setting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) zoom = settings.Setting(200) selected_index = settings.ContextSetting(None) SIZE_CALCULATION = [ ("正常", lambda x: x), ("平方根", lambda x: sqrt(x)), ("对数的", lambda x: log(x + 1)), ] @classmethod def migrate_settings(cls, settings, version): if version < 2: settings.pop("selected_tree_index", None) v1_min, v1_max = 20, 150 v2_min, v2_max = 100, 400 ratio = (v2_max - v2_min) / (v1_max - v1_min) settings["zoom"] = int(ratio * (settings["zoom"] - v1_min) + v2_min) def __init__(self): super().__init__() self.rf_model = None self.forest = None self.instances = None self.color_palette = None # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, "森林") self.ui_info = gui.widgetLabel(box_info) # Display controls area box_display = gui.widgetBox(self.controlArea, "显示") # maxValue is set to a wide three-digit number to probably ensure the # proper label width. The maximum is later set to match the tree depth self.ui_depth_slider = gui.hSlider(box_display, self, "depth_limit", label="深度", ticks=False, maxValue=900) # type: QSlider self.ui_target_class_combo = gui.comboBox( box_display, self, "target_class_index", label="目标类别", orientation=Qt.Horizontal, items=[], contentsLength=8, searchable=True, ) self.ui_size_calc_combo = gui.comboBox( box_display, self, "size_calc_idx", label="大小", orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, ) self.ui_zoom_slider = gui.hSlider( box_display, self, "zoom", label="缩放", ticks=False, minValue=100, maxValue=400, createLabel=False, intOnly=False, ) # type: QSlider # Stretch to fit the rest of the unsused area gui.rubber(self.controlArea) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.forest_model = PythagoreanForestModel(parent=self) self.forest_model.update_item_size(self.zoom) self.ui_depth_slider.valueChanged.connect( self.forest_model.update_depth) self.ui_target_class_combo.currentIndexChanged.connect( self.forest_model.update_target_class) self.ui_zoom_slider.valueChanged.connect( self.forest_model.update_item_size) self.ui_size_calc_combo.currentIndexChanged.connect( self.forest_model.update_size_calc) self.list_delegate = PythagorasTreeDelegate(parent=self) self.list_view = ClickToClearSelectionListView(parent=self) self.list_view.setWrapping(True) self.list_view.setFlow(QListView.LeftToRight) self.list_view.setResizeMode(QListView.Adjust) self.list_view.setModel(self.forest_model) self.list_view.setItemDelegate(self.list_delegate) self.list_view.setSpacing(2) self.list_view.setSelectionMode(QListView.SingleSelection) self.list_view.selectionModel().selectionChanged.connect(self.commit) self.list_view.setUniformItemSizes(True) self.mainArea.layout().addWidget(self.list_view) self.resize(800, 500) # Clear to set sensible default values self.clear() @Inputs.random_forest def set_rf(self, model=None): """When a different forest is given.""" self.closeContext() self.clear() self.rf_model = model if model is not None: self.instances = model.instances self._update_target_class_combo() self.forest = self._get_forest_adapter(self.rf_model) self.forest_model[:] = self.forest.trees self._update_info_box() self._update_depth_slider() self.openContext( model.domain.class_var if model.domain is not None else None) # Restore item selection if self.selected_index is not None: index = self.list_view.model().index(self.selected_index) selection = QItemSelection(index, index) self.list_view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def clear(self): """Clear all relevant data from the widget.""" self.rf_model = None self.forest = None self.forest_model.clear() self.selected_index = None self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() def _update_info_box(self): self.ui_info.setText("Trees: {}".format(len(self.forest.trees))) def _update_depth_slider(self): self.depth_limit = self._get_max_depth() self.ui_depth_slider.parent().setEnabled(True) self.ui_depth_slider.setMaximum(self.depth_limit) self.ui_depth_slider.setValue(self.depth_limit) def _update_target_class_combo(self): self._clear_target_class_combo() label = [ x for x in self.ui_target_class_combo.parent().children() if isinstance(x, QLabel) ][0] if self.instances.domain.has_discrete_class: label_text = "目标类别" values = [ c.title() for c in self.instances.domain.class_vars[0].values ] values.insert(0, "无") else: label_text = "Node color" values = list(ContinuousTreeNode.COLOR_METHODS.keys()) label.setText(label_text) self.ui_target_class_combo.addItems(values) # set it to 0, context will change if required self.target_class_index = 0 def _clear_info_box(self): self.ui_info.setText("No forest on input.") def _clear_target_class_combo(self): self.ui_target_class_combo.clear() self.target_class_index = -1 def _clear_depth_slider(self): self.ui_depth_slider.parent().setEnabled(False) self.ui_depth_slider.setMaximum(0) def _get_max_depth(self): return max(tree.max_depth for tree in self.forest.trees) def _get_forest_adapter(self, model): return SklRandomForestAdapter(model) def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self, selection: QItemSelection) -> None: """Commit the selected tree to output.""" selected_indices = selection.indexes() if not len(selected_indices): self.selected_index = None self.Outputs.tree.send(None) return # We only allow selecting a single tree so there will always be one index self.selected_index = selected_indices[0].row() tree = self.rf_model.trees[self.selected_index] tree.instances = self.instances tree.meta_target_class_index = self.target_class_index tree.meta_size_calc_idx = self.size_calc_idx tree.meta_depth_limit = self.depth_limit self.Outputs.tree.send(tree) def send_report(self): """Send report.""" self.report_plot()
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 class Inputs: evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) quantities = ["Number of instances", "Proportion of predicted", "Proportion of actual"] settings_version = 1 settingsHandler = settings.ClassValuesContextHandler() selected_learner = settings.Setting([0], schema_only=True) selection = settings.ContextSetting(set()) selected_quantity = settings.Setting(0) append_predictions = settings.Setting(True) append_probabilities = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell")] class Error(widget.OWWidget.Error): no_regression = Msg("Confusion Matrix cannot show regression results.") invalid_values = Msg("Evaluation Results input contains invalid values") def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.learners_box = gui.listBox( self.controlArea, self, "selected_learner", "learners", box=True, callback=self._learner_changed ) self.outputbox = gui.vBox(self.controlArea, "Output") box = gui.hBox(self.outputbox) gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_commit(self.outputbox, self, "autocommit", "Send Selected", "Send Automatically", box=False) self.mainArea.layout().setContentsMargins(0, 0, 0, 0) box = gui.vBox(self.mainArea, box=True) sbox = gui.hBox(box) gui.rubber(sbox) gui.comboBox(sbox, self, "selected_quantity", items=self.quantities, label="Show: ", orientation=Qt.Horizontal, callback=self._update) self.tableview = ContingencyTable(self) box.layout().addWidget(self.tableview) selbox = gui.hBox(box) gui.button(selbox, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(selbox, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(selbox, self, "Clear Selection", callback=self.select_none, autoDefault=False) def sizeHint(self): """Initial size""" return QSize(750, 340) @Inputs.evaluation_results def set_results(self, results): """Set the input results.""" prev_sel_learner = self.selected_learner.copy() self.clear() self.warning() self.closeContext() data = None if results is not None and results.data is not None: data = results.data[results.row_indices] if data is not None and not data.domain.has_discrete_class: self.Error.no_regression() data = results = None else: self.Error.no_regression.clear() nan_values = False if results is not None: assert isinstance(results, Orange.evaluation.Results) if np.any(np.isnan(results.actual)) or \ np.any(np.isnan(results.predicted)): # Error out here (could filter them out with a warning # instead). nan_values = True results = data = None if nan_values: self.Error.invalid_values() else: self.Error.invalid_values.clear() self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) else: self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] # NOTE: The 'learner_names' is set in 'Test Learners' widget. if hasattr(results, "learner_names"): self.learners = results.learner_names else: self.learners = ["Learner #{}".format(i + 1) for i in range(nmodels)] self.tableview.set_headers(class_values, class_values, "Actual", "Predicted") self.openContext(data.domain.class_var) if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners): if self.learners: self.selected_learner[:] = [0] else: self.selected_learner[:] = prev_sel_learner self._update() self.tableview.set_selection(self.selection) self.unconditional_commit() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tableview.clear() # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" self.tableview.set_selection({(i, i) for i in range(len(self.tableview.classesv))}) def select_wrong(self): """Select the off-diagonal elements of the matrix""" self.tableview.set_selection({(i, j) for i in range(len(self.tableview.classesv)) for j in range(len(self.tableview.classesv)) if i != j}) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def _prepare_data(self): indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner[0]] predicted = self.results.predicted[self.selected_learner[0]] selected = [i for i, t in enumerate(zip(actual, predicted)) if t in indices] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas if self.append_predictions: extra.append(predicted.reshape(-1, 1)) var = Orange.data.DiscreteVariable( "{}({})".format(class_var.name, learner_name), class_var.values ) metas = metas + (var,) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner[0]] extra.append(np.array(probs, dtype=object)) pvars = [Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values] metas = metas + tuple(pvars) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = self.data.transform(domain) if len(extra): data.metas[:, len(self.data.domain.metas):] = \ np.hstack(tuple(extra)) data.name = learner_name if selected: annotated_data = create_annotated_table(data, selected) data = data[selected] else: annotated_data = create_annotated_table(data, []) data = None return data, annotated_data def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner: data, annotated_data = self._prepare_data() else: data = None annotated_data = None self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send(annotated_data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit() def _learner_changed(self): self._update() self.commit() def _update(self): # Update the displayed confusion matrix if self.results is not None and self.selected_learner: cmatrix = confusion_matrix(self.results, self.selected_learner[0]) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = np.diag_indices(n) colors = cmatrix.astype(np.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(np.int) formatstr = "{}" div = np.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, np.newaxis] div = colors.max(axis=1)[:, np.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div colors[diag] = normalized[diag] / normalized[diag].max() def tooltip(i, j): return "actual: {}\npredicted: {}".format(self.tableview.classesv[i], self.tableview.classesh[j]) self.tableview.update_table(normalized, colsum=colsum, rowsum=rowsum, colors=colors, formatstr=formatstr, tooltip=tooltip) def send_report(self): """Send report""" if self.results is not None and self.selected_learner: self.report_table( "Confusion matrix for {} (showing {})". format(self.learners[self.selected_learner[0]], self.quantities[self.selected_quantity].lower()), self.tableview) @classmethod def migrate_settings(cls, settings, version): if not version: # For some period of time the 'selected_learner' property was # changed from List[int] -> int # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0) if "selected_learner" in settings and \ isinstance(settings["selected_learner"], int): settings["selected_learner"] = [settings["selected_learner"]]
class OWPythagorasTree(OWWidget): name = "毕达哥拉斯树(Pythagorean Tree)" description = "类似树结构的毕达哥拉斯树可视化。" icon = "icons/PythagoreanTree.svg" keywords = ["fractal", "bidagelasishu", "gougushu"] category = "可视化(Visualize)" priority = 1000 class Inputs: tree = Input("树(Tree)", TreeModel, replaces=["Tree"]) class Outputs: selected_data = Output("选定的数据(Selected Data)", Table, default=True, replaces=["Selected Data"]) annotated_data = Output("数据(Data)", Table, replaces=["Data"]) # Enable the save as feature graph_name = "scene" # Settings settingsHandler = settings.ClassValuesContextHandler() depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) size_log_scale = settings.Setting(2) tooltips_enabled = settings.Setting(True) show_legend = settings.Setting(False) LEGEND_OPTIONS = { "corner": Anchorable.BOTTOM_RIGHT, "offset": (10, 10), } def __init__(self): super().__init__() # Instance variables self.model = None self.data = None # The tree adapter instance which is passed from the outside self.tree_adapter = None self.legend = None self.color_palette = None # Different methods to calculate the size of squares self.SIZE_CALCULATION = [ ("Normal", lambda x: x, "正常"), ("Square root", lambda x: sqrt(x), "平方根"), ("Logarithmic", lambda x: log(x * self.size_log_scale + 1), "对数"), ] # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, "树信息") self.infolabel = gui.widgetLabel(box_info) # Display settings area box_display = gui.widgetBox(self.controlArea, "显示设置") # maxValue is set to a wide three-digit number to probably ensure the # proper label width. The maximum is later set to match the tree depth self.depth_slider = gui.hSlider( box_display, self, "depth_limit", label="深度", ticks=False, maxValue=900, callback=self.update_depth, ) self.target_class_combo = gui.comboBox( box_display, self, "target_class_index", label="目标类别", orientation=Qt.Horizontal, items=[], contentsLength=8, searchable=True, callback=self.update_colors, ) self.size_calc_combo = gui.comboBox( box_display, self, "size_calc_idx", label="大小", orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[2], contentsLength=8, callback=self.update_size_calc, ) self.log_scale_box = gui.hSlider( box_display, self, "size_log_scale", label="对数比例因子", minValue=1, maxValue=100, ticks=False, callback=self.invalidate_tree, ) # Plot properties area box_plot = gui.widgetBox(self.controlArea, "绘图属性") self.cb_show_tooltips = gui.checkBox( box_plot, self, "tooltips_enabled", label="启动工具提示", callback=self.update_tooltip_enabled, ) self.cb_show_legend = gui.checkBox( box_plot, self, "show_legend", label="显示图例", callback=self.update_show_legend, ) gui.rubber(self.controlArea) gui.button(self.buttonsArea, self, label="重新绘制", callback=self.redraw) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.scene = TreeGraphicsScene(self) self.scene.selectionChanged.connect(self.commit) self.view = TreeGraphicsView(self.scene, padding=(150, 150)) self.view.setRenderHint(QPainter.Antialiasing, True) self.mainArea.layout().addWidget(self.view) self.ptree = PythagorasTreeViewer(self) self.scene.addItem(self.ptree) self.view.set_central_widget(self.ptree) self.resize(800, 500) # Clear the widget to correctly set the intial values self.clear() @Inputs.tree def set_tree(self, model=None): """When a different tree is given.""" self.closeContext() self.clear() self.model = model if model is not None: self.data = model.instances self._update_target_class_combo() self.tree_adapter = self._get_tree_adapter(self.model) self.ptree.clear() self.ptree.set_tree( self.tree_adapter, weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1], target_class_index=self.target_class_index, ) self._update_depth_slider() self.color_palette = self.ptree.root.color_palette self._update_legend_colors() self._update_legend_visibility() self._update_info_box() self._update_main_area() self.openContext( model.domain.class_var if model.domain is not None else None) self.update_depth() # The forest widget sets the following attributes on the tree, # describing the settings on the forest widget. To keep the tree # looking the same as on the forest widget, we prefer these settings to # context settings, if set. if hasattr(model, "meta_target_class_index"): self.target_class_index = model.meta_target_class_index self.update_colors() if hasattr(model, "meta_size_calc_idx"): self.size_calc_idx = model.meta_size_calc_idx self.update_size_calc() if hasattr(model, "meta_depth_limit"): self.depth_limit = model.meta_depth_limit self.update_depth() self.Outputs.annotated_data.send( create_annotated_table(self.data, None)) def clear(self): """Clear all relevant data from the widget.""" self.model = None self.data = None self.tree_adapter = None if self.legend is not None: self.scene.removeItem(self.legend) self.legend = None self.ptree.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() self._update_log_scale_slider() def update_depth(self): """This method should be called when the depth changes""" self.ptree.set_depth_limit(self.depth_limit) def update_colors(self): """When the target class / node coloring needs to be updated.""" self.ptree.target_class_changed(self.target_class_index) self._update_legend_colors() def update_size_calc(self): """When the tree size calculation is updated.""" self._update_log_scale_slider() self.invalidate_tree() def redraw(self): if self.data is None: return self.tree_adapter.shuffle_children() self.invalidate_tree() def invalidate_tree(self): """When the tree needs to be completely recalculated.""" if self.model is not None: self.ptree.set_tree( self.tree_adapter, weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1], target_class_index=self.target_class_index, ) self.ptree.set_depth_limit(self.depth_limit) self._update_main_area() def update_tooltip_enabled(self): """When the tooltip visibility is changed and need to be updated.""" self.ptree.tooltip_changed(self.tooltips_enabled) def update_show_legend(self): """When the legend visibility needs to be updated.""" self._update_legend_visibility() def _update_info_box(self): self.infolabel.setText("节点: {}\n深度: {}".format( self.tree_adapter.num_nodes, self.tree_adapter.max_depth)) def _update_depth_slider(self): self.depth_slider.parent().setEnabled(True) self.depth_slider.setMaximum(self.tree_adapter.max_depth) self._set_max_depth() def _update_legend_visibility(self): if self.legend is not None: self.legend.setVisible(self.show_legend) def _update_log_scale_slider(self): """On calc method combo box changed.""" self.log_scale_box.parent().setEnabled( self.SIZE_CALCULATION[self.size_calc_idx][0] == "Logarithmic") def _clear_info_box(self): self.infolabel.setText("没有树输入") def _clear_depth_slider(self): self.depth_slider.parent().setEnabled(False) self.depth_slider.setMaximum(0) def _clear_target_class_combo(self): self.target_class_combo.clear() self.target_class_index = -1 def _set_max_depth(self): """Set the depth to the max depth and update appropriate actors.""" self.depth_limit = self.tree_adapter.max_depth self.depth_slider.setValue(self.depth_limit) def _update_main_area(self): # refresh the scene rect, cuts away the excess whitespace, and adds # padding for panning. self.scene.setSceneRect(self.view.central_widget_rect()) # reset the zoom level self.view.recalculate_and_fit() self.view.update_anchored_items() def _get_tree_adapter(self, model): if isinstance(model, SklModel): return SklTreeAdapter(model) return TreeAdapter(model) def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self): """Commit the selected data to output.""" if self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) return nodes = [ i.tree_node.label for i in self.scene.selectedItems() if isinstance(i, SquareGraphicsItem) ] data = self.tree_adapter.get_instances_in_nodes(nodes) self.Outputs.selected_data.send(data) selected_indices = self.tree_adapter.get_indices(nodes) self.Outputs.annotated_data.send( create_annotated_table(self.data, selected_indices)) def send_report(self): """Send report.""" self.report_plot() def _update_target_class_combo(self): self._clear_target_class_combo() label = [ x for x in self.target_class_combo.parent().children() if isinstance(x, QLabel) ][0] if self.data.domain.has_discrete_class: label_text = "目标类别" values = [c.title() for c in self.data.domain.class_vars[0].values] values.insert(0, "None") else: label_text = "Node color" values = list(ContinuousTreeNode.COLOR_METHODS.keys()) label.setText(label_text) self.target_class_combo.addItems(values) # set it to 0, context will change if required self.target_class_index = 0 def _update_legend_colors(self): if self.legend is not None: self.scene.removeItem(self.legend) if self.data.domain.has_discrete_class: self._classification_update_legend_colors() else: self._regression_update_legend_colors() def _classification_update_legend_colors(self): if self.target_class_index == 0: self.legend = OWDiscreteLegend(domain=self.model.domain, **self.LEGEND_OPTIONS) else: items = ( ( self.target_class_combo.itemText(self.target_class_index), self.color_palette[self.target_class_index - 1], ), ("other", QColor("#ffffff")), ) self.legend = OWDiscreteLegend(items=items, **self.LEGEND_OPTIONS) self.legend.setVisible(self.show_legend) self.scene.addItem(self.legend) def _regression_update_legend_colors(self): # The colors are the class mean palette = self.model.domain.class_var.palette if self.target_class_index == 1: items = ((np.min(self.data.Y), np.max(self.data.Y)), palette) # Colors are the stddev elif self.target_class_index == 2: items = ((0, np.std(self.data.Y)), palette) else: items = None self.legend = OWContinuousLegend(items=items, **self.LEGEND_OPTIONS) self.legend.setVisible(self.show_legend) self.scene.addItem(self.legend)
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")] outputs = [("Selected Data", Orange.data.Table)] quantities = ["Number of instances", "Proportion of predicted", "Proportion of actual"] settingsHandler = settings.ClassValuesContextHandler() selected_learner = settings.Setting(0) selection = settings.ContextSetting(set()) selected_quantity = settings.Setting(0) append_predictions = settings.Setting(True) append_probabilities = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell")] def __init__(self): super().__init__() if isinstance(self.selected_learner, list): self.selected_learner = (self.selected_learner + [0])[0] self.data = None self.results = None self.learners = [] self.headers = [] box = gui.vBox(self.controlArea, "Learners") self.learners_box = gui.listBox( box, self, "selected_learner", "learners", callback=self._learner_changed ) box = gui.vBox(self.controlArea, "Show") gui.comboBox(box, self, "selected_quantity", items=self.quantities, callback=self._update) box = gui.vBox(self.controlArea, "Select") gui.button(box, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(box, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(box, self, "Clear Selection", callback=self.select_none, autoDefault=False) self.outputbox = box = gui.vBox(self.controlArea, "Output") gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_commit(self.controlArea, self, "autocommit", "Send Selected", "Send Automatically") grid = QGridLayout() self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.clicked.connect(self.cell_clicked) grid.addWidget(view, 0, 0) self.mainArea.layout().addLayout(grid) def sizeHint(self): """Initial size""" return QSize(750, 490) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def _init_table(self, nclasses): item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, nclasses) self.tableview.setSpan(2, 0, nclasses, 1) font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(self.headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(nclasses + 3) self.tablemodel.setColumnCount(nclasses + 3) def set_results(self, results): """Set the input results.""" prev_sel_learner = self.selected_learner self.clear() self.warning([0, 1]) self.closeContext() data = None if results is not None and results.data is not None: data = results.data if data is not None and not data.domain.has_discrete_class: self.warning( 0, "Confusion Matrix cannot be used for regression results.") self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) else: self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] self.headers = class_values + \ [unicodedata.lookup("N-ARY SUMMATION")] # NOTE: The 'learner_names' is set in 'Test Learners' widget. if hasattr(results, "learner_names"): self.learners = results.learner_names else: self.learners = ["Learner #{}".format(i + 1) for i in range(nmodels)] self._init_table(len(class_values)) self.openContext(data.domain.class_var) if prev_sel_learner is None or \ prev_sel_learner >= len(self.learners): self.selected_learner = 0 else: self.selected_learner = prev_sel_learner self._update() self._set_selection() self.unconditional_commit() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_wrong(self): """Select the off-diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner is not None: indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner] predicted = self.results.predicted[self.selected_learner] selected = [i for i, t in enumerate(zip(actual, predicted)) if t in indices] row_indices = self.results.row_indices[selected] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas if self.append_predictions: predicted = numpy.array(predicted[selected], dtype=object) extra.append(predicted.reshape(-1, 1)) var = Orange.data.DiscreteVariable( "{}({})".format(class_var.name, learner_name), class_var.values ) metas = metas + (var,) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner, selected] extra.append(numpy.array(probs, dtype=object)) pvars = [Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values] metas = metas + tuple(pvars) X = self.data.X[row_indices] Y = self.data.Y[row_indices] M = self.data.metas[row_indices] row_ids = self.data.ids[row_indices] M = numpy.hstack((M,) + tuple(extra)) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, metas ) data = Orange.data.Table.from_numpy(domain, X, Y, M) data.ids = row_ids data.name = learner_name else: data = None self.send("Selected Data", data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit() def _set_selection(self): selection = QItemSelection() index = self.tableview.model().index for row, col in self.selection: sel = index(row + 2, col + 2) selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _learner_changed(self): self._update() self._set_selection() self.commit() def _update(self): def _isinvalid(x): return isnan(x) or isinf(x) # Update the displayed confusion matrix if self.results is not None and self.selected_learner is not None: cmatrix = confusion_matrix(self.results, self.selected_learner) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = numpy.diag_indices(n) colors = cmatrix.astype(numpy.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(numpy.int) formatstr = "{}" div = numpy.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, numpy.newaxis] div = colors.max(axis=1)[:, numpy.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div colors[diag] = normalized[diag] / normalized[diag].max() for i in range(n): for j in range(n): val = normalized[i, j] col_val = colors[i, j] item = self._item(i + 2, j + 2) item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) item.setData("trbl", BorderRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(n): self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t")) self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l")) self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum()))) def send_report(self): """Send report""" if self.results is not None and self.selected_learner is not None: self.report_table( "Confusion matrix for {} (showing {})". format(self.learners[self.selected_learner], self.quantities[self.selected_quantity].lower()), self.tableview)