Ejemplo n.º 1
0
class OWTestLearners(OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner, "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Table),
               ("Evaluation Results", Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train data set is empty.")
        test_data_empty = Msg("Test data set is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg("Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train data sets "
                                 "have different target variables.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_folds", label="Number of folds: ",
            items=[str(x) for x in self.NFolds], maximumContentsLength=3,
            orientation=Qt.Horizontal, callback=self.kfold_changed)
        gui.checkBox(
            ibox, self, "cv_stratified", "Stratified",
            callback=self.kfold_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_repeats", label="Repeat train/test: ",
            items=[str(x) for x in self.NRepeats], maximumContentsLength=3,
            orientation=Qt.Horizontal, callback=self.shuffle_split_changed)
        gui.comboBox(
            ibox, self, "sample_size", label="Training set size: ",
            items=["{} %".format(x) for x in self.SampleSizes],
            maximumContentsLength=5, orientation=Qt.Horizontal,
            callback=self.shuffle_split_changed)
        gui.checkBox(
            ibox, self, "shuffle_stratified", "Stratified",
            callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(
            wordWrap=True,
        )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        if data is not None and not len(data):
            self.Error.train_data_empty()
            data = None
        if data and not data.domain.class_vars:
            self.Error.class_required()
            data = None
        elif data and len(data.domain.class_vars) > 1:
            self.Error.too_many_classes()
            data = None
        else:
            self.Error.class_required.clear()
            self.Error.too_many_classes.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = RemoveNaNClasses(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not len(data):
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = RemoveNaNClasses(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {(True, True): " ",  # both, don't specify
                (True, False): " train ",
                (False, True): " test "}[(self.train_data_missing_vals,
                                          self.test_data_missing_vals)]

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.Warning.test_data_unused.clear()
        self.Warning.test_data_missing.clear()
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()
        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                return
            elif self.test_data.domain.class_var != class_var:
                self.Error.class_inconsistent()
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.Warning.test_data_unused()

        rstate = 42
        def update_progress(finished):
            self.progressBarSet(100 * finished)
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
            callback=update_progress,
            n_jobs=-1,
        )
        self.setStatusMessage("Running")

        with self.progressBar():
            try:
                folds = self.NFolds[self.n_folds]
                if self.resampling == OWTestLearners.KFold:
                    if len(self.data) < folds:
                        self.Error.too_many_folds()
                        return
                    warnings = []
                    results = Orange.evaluation.CrossValidation(
                        self.data, learners, k=folds,
                        random_state=rstate, warnings=warnings, **common_args)
                    if warnings:
                        self.warning(warnings[0])
                elif self.resampling == OWTestLearners.LeaveOneOut:
                    results = Orange.evaluation.LeaveOneOut(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.ShuffleSplit:
                    train_size = self.SampleSizes[self.sample_size] / 100
                    results = Orange.evaluation.ShuffleSplit(
                        self.data, learners,
                        n_resamples=self.NRepeats[self.n_repeats],
                        train_size=train_size, test_size=None,
                        stratified=self.shuffle_stratified,
                        random_state=rstate, **common_args)
                elif self.resampling == OWTestLearners.TestOnTrain:
                    results = Orange.evaluation.TestOnTrainingData(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.TestOnTest:
                    results = Orange.evaluation.TestOnTestData(
                        self.data, self.test_data, learners, **common_args)
                else:
                    assert False
            except (RuntimeError, ValueError) as e:
                self.error(str(e))
                self.setStatusMessage("")
                return
            else:
                self.error()

        learner_key = {slot.learner: key for key, slot in self.learners.items()}
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}"
                              .format(name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(
                        slot.results.value, target_index)

                    stats = [Try(lambda: score(ovr_results))
                             for score in classification_stats.scores]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole)
                         for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.commit()

    def commit(self):
        """Recompute and output the results"""
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        valid = [slot for slot in self.learners.values()
                 if slot.results is not None and slot.results.success]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner)
                                      for slot in valid]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".
                      format(stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [("Sampling type",
                      "{}Shuffle split, {} random samples with {}% data "
                      .format(stratified, self.NRepeats[self.n_repeats],
                              self.SampleSizes[self.sample_size]))]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)
Ejemplo n.º 2
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001

    class Inputs:
        evaluation_results = Input("Evaluation Results",
                                   Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = [
        "Number of instances", "Proportion of predicted",
        "Proportion of actual"
    ]

    settings_version = 1
    settingsHandler = settings.ClassValuesContextHandler()

    selected_learner = settings.Setting([0], schema_only=True)
    selection = settings.ContextSetting(set())
    selected_quantity = settings.Setting(0)
    append_predictions = settings.Setting(True)
    append_probabilities = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances", "click_cell")
    ]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg(
            "Evaluation Results input contains invalid values")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        self.learners_box = gui.listBox(self.controlArea,
                                        self,
                                        "selected_learner",
                                        "learners",
                                        box=True,
                                        callback=self._learner_changed)

        self.outputbox = gui.vBox(self.controlArea, "Output")
        box = gui.hBox(self.outputbox)
        gui.checkBox(box,
                     self,
                     "append_predictions",
                     "Predictions",
                     callback=self._invalidate)
        gui.checkBox(box,
                     self,
                     "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_commit(self.outputbox,
                        self,
                        "autocommit",
                        "Send Selected",
                        "Send Automatically",
                        box=False)

        self.mainArea.layout().setContentsMargins(0, 0, 0, 0)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox,
                     self,
                     "selected_quantity",
                     items=self.quantities,
                     label="Show: ",
                     orientation=Qt.Horizontal,
                     callback=self._update)

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.setSizePolicy(QSizePolicy.MinimumExpanding,
                           QSizePolicy.MinimumExpanding)
        view.clicked.connect(self.cell_clicked)
        box.layout().addWidget(view)

        selbox = gui.hBox(box)
        gui.button(selbox,
                   self,
                   "Select Correct",
                   callback=self.select_correct,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Select Misclassified",
                   callback=self.select_wrong,
                   autoDefault=False)
        gui.button(selbox,
                   self,
                   "Clear Selection",
                   callback=self.select_none,
                   autoDefault=False)

    def sizeHint(self):
        """Initial size"""
        return QSize(750, 340)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setSectionResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""

        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        else:
            self.Error.no_regression.clear()

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        if nan_values:
            self.Error.invalid_values()
        else:
            self.Error.invalid_values.clear()

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
        else:
            self.report_button.setDisabled(False)

            nmodels = results.predicted.shape[0]
            self.headers = class_values + \
                           [unicodedata.lookup("N-ARY SUMMATION")]

            # NOTE: The 'learner_names' is set in 'Test Learners' widget.
            if hasattr(results, "learner_names"):
                self.learners = results.learner_names
            else:
                self.learners = [
                    "Learner #{}".format(i + 1) for i in range(nmodels)
                ]

            self._init_table(len(class_values))
            self.openContext(data.domain.class_var)
            if not prev_sel_learner or prev_sel_learner[0] >= len(
                    self.learners):
                if self.learners:
                    self.selected_learner[:] = [0]
            else:
                self.selected_learner[:] = prev_sel_learner
            self._update()
            self._set_selection()
            self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [
            i for i, t in enumerate(zip(actual, predicted)) if t in indices
        ]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            var = Orange.data.DiscreteVariable(
                "{}({})".format(class_var.name, learner_name),
                class_var.values)
            metas = metas + (var, )

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [
                Orange.data.ContinuousVariable("p({})".format(value))
                for value in class_var.values
            ]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars, metas)
        data = self.data.transform(domain)
        if len(extra):
            data.metas[:, len(self.data.domain.metas):] = \
                np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(np.int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            colors[diag] = normalized[diag] / normalized[diag].max()

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 -
                                                            30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".format(
                    self.learners[self.selected_learner[0]],
                    self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Ejemplo n.º 3
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = ""
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Orange.classification.Learner, "set_learner",
               widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    class_selection = settings.ContextSetting("(None)")

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox,
                 self,
                 "k_folds",
                 2,
                 50,
                 label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox,
                 self,
                 "n_repeat",
                 2,
                 50,
                 label="Repeat train/test",
                 callback=self.bootstrap_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox,
                    self,
                    "sample_p",
                    minValue=1,
                    maxValue=100,
                    ticks=20,
                    vertical=False,
                    labelFormat="%d %%",
                    callback=self.bootstrap_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(self.cbox,
                                                  self,
                                                  "class_selection",
                                                  items=[],
                                                  callback=self._select_class,
                                                  sendSelectedValue=True,
                                                  valueType=str)

        gui.rubber(self.controlArea)

        self.view = QTreeView(rootIsDecorated=False,
                              uniformRowHeights=True,
                              wordWrap=True,
                              editTriggers=QTreeView.NoEditTriggers)
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel()
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())
        self._update_header()
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, ())
        self._update_stats_model()

    def set_train_data(self, data):
        self.error(0)
        if data is not None:
            if data.domain.class_var is None:
                self.error(0, "Train data input requires a class variable")
                data = None

        self.train_data = data
        self.closeContext()
        self.class_selection = "(None)"
        self.openContext(data.domain.class_var)
        self._update_class_selection()
        self._update_header()
        self._invalidate()

    def set_test_data(self, data):
        self.error(1)
        if data is not None:
            if data.domain.class_var is None:
                self.error(1, "Test data input requires a class variable")
                data = None

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def bootstrap_changed(self):
        self.resampling = OWTestLearners.Bootstrap
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        self.warning([1, 2])
        self.error(2)

        if self.train_data is None:
            return

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return

            elif self.test_data.domain.class_var != \
                    self.train_data.domain.class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(
                1, "Test data is present but unused. "
                "Select 'Test on test data' to use it.")

        # TODO: Test each learner individually
        if self.resampling == OWTestLearners.KFold:
            results = Orange.evaluation.CrossValidation(self.train_data,
                                                        learners,
                                                        k=self.k_folds,
                                                        store_data=True)
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = Orange.evaluation.LeaveOneOut(self.train_data,
                                                    learners,
                                                    store_data=True)
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = Orange.evaluation.Bootstrap(self.train_data,
                                                  learners,
                                                  n_resamples=self.n_repeat,
                                                  p=p,
                                                  store_data=True)
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = Orange.evaluation.TestOnTrainingData(self.train_data,
                                                           learners,
                                                           store_data=True)
        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                return
            results = Orange.evaluation.TestOnTestData(self.train_data,
                                                       self.test_data,
                                                       learners,
                                                       store_data=True)
        else:
            assert False

        self.results = results
        results = list(split_by_model(results))

        class_var = self.train_data.domain.class_var

        if is_discrete(class_var):
            stats = [
                classification_stats(self.one_vs_rest(res)) for res in results
            ]
        else:
            stats = [regression_stats(res) for res in results]

        self._update_header()

        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")

        self._update_stats_model()

    def _update_header(self):
        headers = ["Method"]
        if self.train_data is not None:
            if is_discrete(self.train_data.domain.class_var):
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

    def _update_class_selection(self):
        if is_discrete(self.train_data.domain.class_var):
            self.cbox.setVisible(True)
            values = self.train_data.domain.class_var.values
            self.class_selection_combo.clear()
            self.class_selection_combo.addItem("(None)")
            self.class_selection_combo.addItems(values)

            class_index = 0
            if self.class_selection != '(None)' and self.class_selection != 0:
                class_index = self.train_data.domain.class_var.values.index(
                    self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.previous_class_selection = "(None)"
        else:
            self.cbox.setVisible(False)

    def one_vs_rest(self, res):
        if self.class_selection != '(None)' and self.class_selection != 0:
            class_ = self.train_data.domain.class_var.values.index(
                self.class_selection)
            actual = res.actual == class_
            predicted = res.predicted == class_
            return Results(nmethods=1,
                           domain=self.train_data.domain,
                           actual=actual,
                           predicted=predicted)
        else:
            return res

    def _select_class(self):
        if self.previous_class_selection == self.class_selection:
            return

        results = list(split_by_model(self.results))

        items = [(key, input) for key, input in self.learners.items()]
        learners = [input.learner for _, input in items]

        class_var = self.train_data.domain.class_var
        if is_discrete(class_var):
            stats = [
                classification_stats(self.one_vs_rest(res)) for res in results
            ]
        else:
            stats = [regression_stats(res) for res in results]

        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")

        self._update_stats_model()
        self.previous_class_selection = self.class_selection

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [
            val.results for val in self.learners.values()
            if val.results is not None
        ]
        if results:
            combined = results_merge(results)
            combined.learner_names = [
                learner_name(val.learner) for val in self.learners.values()
            ]
        else:
            combined = None
        self.send("Evaluation Results", combined)
Ejemplo n.º 4
0
class OWPredictions(OWWidget):
    name = "Predictions"
    icon = "icons/Predictions.svg"
    priority = 200
    description = "Display predictions of models for an input dataset."
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)
        predictors = Input("Predictors", Model, multiple=True)

    class Outputs:
        predictions = Output("Predictions", Orange.data.Table)
        evaluation_results = Output("Evaluation Results", Results)

    class Warning(OWWidget.Warning):
        empty_data = Msg("Empty dataset")
        wrong_targets = Msg(
            "Some model(s) predict a different target (see more ...)\n{}")

    class Error(OWWidget.Error):
        predictor_failed = Msg("Some predictor(s) failed (see more ...)\n{}")
        scorer_failed = Msg("Some scorer(s) failed (see more ...)\n{}")

    settingsHandler = settings.ClassValuesContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: List of selected class value indices in the `class_values` list
    selected_classes = settings.ContextSetting([])
    selection = settings.Setting([], schema_only=True)

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Orange.data.Table]
        self.predictors = {}  # type: Dict[object, PredictorSlot]
        self.class_values = []  # type: List[str]
        self._delegates = []
        self.left_width = 10
        self.selection_store = None
        self.__pending_selection = self.selection

        self._set_input_summary()
        self._set_output_summary(None)

        gui.listBox(self.controlArea,
                    self,
                    "selected_classes",
                    "class_values",
                    box="Show probabibilities for",
                    callback=self._update_prediction_delegate,
                    selectionMode=QListWidget.ExtendedSelection,
                    addSpace=False,
                    sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred))
        gui.rubber(self.controlArea)
        self.reset_button = gui.button(
            self.controlArea,
            self,
            "Restore Original Order",
            callback=self._reset_order,
            tooltip="Show rows in the original order")

        table_opts = dict(horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                          horizontalScrollMode=QTableView.ScrollPerPixel,
                          selectionMode=QTableView.ExtendedSelection,
                          focusPolicy=Qt.StrongFocus)
        self.dataview = TableView(sortingEnabled=True,
                                  verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                  **table_opts)
        self.predictionsview = TableView(
            sortingEnabled=True,
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            **table_opts)
        self.dataview.verticalHeader().hide()
        dsbar = self.dataview.verticalScrollBar()
        psbar = self.predictionsview.verticalScrollBar()
        psbar.valueChanged.connect(dsbar.setValue)
        dsbar.valueChanged.connect(psbar.setValue)

        self.dataview.verticalHeader().setDefaultSectionSize(22)
        self.predictionsview.verticalHeader().setDefaultSectionSize(22)
        self.dataview.verticalHeader().sectionResized.connect(
            lambda index, _, size: self.predictionsview.verticalHeader(
            ).resizeSection(index, size))

        self.dataview.setItemDelegate(DataItemDelegate(self.dataview))

        self.splitter = QSplitter(orientation=Qt.Horizontal,
                                  childrenCollapsible=False,
                                  handleWidth=2)
        self.splitter.splitterMoved.connect(self.splitter_resized)
        self.splitter.addWidget(self.predictionsview)
        self.splitter.addWidget(self.dataview)

        self.score_table = ScoreTable(self)
        self.vsplitter = gui.vBox(self.mainArea)
        self.vsplitter.layout().addWidget(self.splitter)
        self.vsplitter.layout().addWidget(self.score_table.view)

    def get_selection_store(self, proxy):
        # Both proxies map the same, so it doesn't matter which one is used
        # to initialize SharedSelectionStore
        if self.selection_store is None:
            self.selection_store = SharedSelectionStore(proxy)
        return self.selection_store

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.Warning.empty_data(shown=data is not None and not data)
        self.data = data
        self.selection_store = None
        if not data:
            self.dataview.setModel(None)
            self.predictionsview.setModel(None)
        else:
            # force full reset of the view's HeaderView state
            self.dataview.setModel(None)
            model = TableModel(data, parent=None)
            modelproxy = SortProxyModel()
            modelproxy.setSourceModel(model)
            self.dataview.setModel(modelproxy)
            sel_model = SharedSelectionModel(
                self.get_selection_store(modelproxy), modelproxy,
                self.dataview)
            self.dataview.setSelectionModel(sel_model)
            if self.__pending_selection is not None:
                self.selection = self.__pending_selection
                self.__pending_selection = None
                self.selection_store.select_rows(
                    set(self.selection), QItemSelectionModel.ClearAndSelect)
            sel_model.selectionChanged.connect(self.commit)
            sel_model.selectionChanged.connect(self._store_selection)

            self.dataview.model().list_sorted.connect(
                partial(self._update_data_sort_order, self.dataview,
                        self.predictionsview))

        self._invalidate_predictions()

    def _store_selection(self):
        self.selection = list(self.selection_store.rows)

    @property
    def class_var(self):
        return self.data and self.data.domain.class_var

    # pylint: disable=redefined-builtin
    @Inputs.predictors
    def set_predictor(self, predictor=None, id=None):
        if id in self.predictors:
            if predictor is not None:
                self.predictors[id] = self.predictors[id]._replace(
                    predictor=predictor, name=predictor.name, results=None)
            else:
                del self.predictors[id]
        elif predictor is not None:
            self.predictors[id] = PredictorSlot(predictor, predictor.name,
                                                None)

    def _set_class_values(self):
        class_values = []
        for slot in self.predictors.values():
            class_var = slot.predictor.domain.class_var
            if class_var and class_var.is_discrete:
                for value in class_var.values:
                    if value not in class_values:
                        class_values.append(value)

        if self.class_var and self.class_var.is_discrete:
            values = self.class_var.values
            self.class_values = sorted(class_values,
                                       key=lambda val: val not in values)
            self.selected_classes = [
                i for i, name in enumerate(class_values) if name in values
            ]
        else:
            self.class_values = class_values  # This assignment updates listview
            self.selected_classes = []

    def handleNewSignals(self):
        self._set_class_values()
        self._call_predictors()
        self._update_scores()
        self._update_predictions_model()
        self._update_prediction_delegate()
        self._set_errors()
        self._set_input_summary()
        self.commit()

    def _call_predictors(self):
        if not self.data:
            return
        if self.class_var:
            domain = self.data.domain
            classless_data = self.data.transform(
                Domain(domain.attributes, None, domain.metas))
        else:
            classless_data = self.data

        for inputid, slot in self.predictors.items():
            if isinstance(slot.results, Results):
                continue

            predictor = slot.predictor
            try:
                if predictor.domain.class_var.is_discrete:
                    pred, prob = predictor(classless_data, Model.ValueProbs)
                else:
                    pred = predictor(classless_data, Model.Value)
                    prob = numpy.zeros((len(pred), 0))
            except (ValueError, DomainTransformationError) as err:
                self.predictors[inputid] = \
                    slot._replace(results=f"{predictor.name}: {err}")
                continue

            results = Results()
            results.data = self.data
            results.domain = self.data.domain
            results.row_indices = numpy.arange(len(self.data))
            results.folds = (Ellipsis, )
            results.actual = self.data.Y
            results.unmapped_probabilities = prob
            results.unmapped_predicted = pred
            results.probabilities = results.predicted = None
            self.predictors[inputid] = slot._replace(results=results)

            target = predictor.domain.class_var
            if target != self.class_var:
                continue

            if target is not self.class_var and target.is_discrete:
                backmappers, n_values = predictor.get_backmappers(self.data)
                prob = predictor.backmap_probs(prob, n_values, backmappers)
                pred = predictor.backmap_value(pred, prob, n_values,
                                               backmappers)
            results.predicted = pred.reshape((1, len(self.data)))
            results.probabilities = prob.reshape((1, ) + prob.shape)

    def _update_scores(self):
        model = self.score_table.model
        model.clear()
        scorers = usable_scorers(self.class_var) if self.class_var else []
        self.score_table.update_header(scorers)
        errors = []
        for inputid, pred in self.predictors.items():
            results = self.predictors[inputid].results
            if not isinstance(results, Results) or results.predicted is None:
                continue
            row = [
                QStandardItem(learner_name(pred.predictor)),
                QStandardItem("N/A"),
                QStandardItem("N/A")
            ]
            for scorer in scorers:
                item = QStandardItem()
                try:
                    score = scorer_caller(scorer, results)()[0]
                    item.setText(f"{score:.3f}")
                except Exception as exc:  # pylint: disable=broad-except
                    item.setToolTip(str(exc))
                    if scorer.name in self.score_table.shown_scores:
                        errors.append(str(exc))
                row.append(item)
            self.score_table.model.appendRow(row)

        view = self.score_table.view
        if model.rowCount():
            view.setVisible(True)
            view.ensurePolished()
            view.setFixedHeight(5 + view.horizontalHeader().height() +
                                view.verticalHeader().sectionSize(0) *
                                model.rowCount())
        else:
            view.setVisible(False)

        self.Error.scorer_failed("\n".join(errors), shown=bool(errors))

    def _set_errors(self):
        # Not all predictors are run every time, so errors can't be collected
        # in _call_predictors
        errors = "\n".join(f"- {p.predictor.name}: {p.results}"
                           for p in self.predictors.values()
                           if isinstance(p.results, str) and p.results)
        self.Error.predictor_failed(errors, shown=bool(errors))

        if self.class_var:
            inv_targets = "\n".join(
                f"- {pred.name} predicts '{pred.domain.class_var.name}'"
                for pred in (p.predictor for p in self.predictors.values()
                             if isinstance(p.results, Results)
                             and p.results.probabilities is None))
            self.Warning.wrong_targets(inv_targets, shown=bool(inv_targets))
        else:
            self.Warning.wrong_targets.clear()

    def _set_input_summary(self):
        if not self.data and not self.predictors:
            self.info.set_input_summary(self.info.NoInput)
            return

        summary = len(self.data) if self.data else 0
        details = self._get_details()
        self.info.set_input_summary(summary, details, format=Qt.RichText)

    def _get_details(self):
        details = "Data:<br>"
        details += format_summary_details(self.data).replace('\n', '<br>') if \
            self.data else "No data on input."
        details += "<hr>"
        pred_names = [v.name for v in self.predictors.values()]
        n_predictors = len(self.predictors)
        if n_predictors:
            n_valid = len(self._non_errored_predictors())
            details += plural("Model: {number} model{s}", n_predictors)
            if n_valid != n_predictors:
                details += f" ({n_predictors - n_valid} failed)"
            details += "<ul>"
            for name in pred_names:
                details += f"<li>{name}</li>"
            details += "</ul>"
        else:
            details += "Model:<br>No model on input."
        return details

    def _set_output_summary(self, output):
        summary = len(output) if output else self.info.NoOutput
        details = format_summary_details(output) if output else ""
        self.info.set_output_summary(summary, details)

    def _invalidate_predictions(self):
        for inputid, pred in list(self.predictors.items()):
            self.predictors[inputid] = pred._replace(results=None)

    def _non_errored_predictors(self):
        return [
            p for p in self.predictors.values()
            if isinstance(p.results, Results)
        ]

    def _reordered_probabilities(self, prediction):
        cur_values = prediction.predictor.domain.class_var.values
        new_ind = [self.class_values.index(x) for x in cur_values]
        probs = prediction.results.unmapped_probabilities
        new_probs = numpy.full((probs.shape[0], len(self.class_values)),
                               numpy.nan)
        new_probs[:, new_ind] = probs
        return new_probs

    def _update_predictions_model(self):
        results = []
        headers = []
        for p in self._non_errored_predictors():
            values = p.results.unmapped_predicted
            target = p.predictor.domain.class_var
            if target.is_discrete:
                # order probabilities in order from Show prob. for
                prob = self._reordered_probabilities(p)
                values = [Value(target, v) for v in values]
            else:
                prob = numpy.zeros((len(values), 0))
            results.append((values, prob))
            headers.append(p.predictor.name)

        if results:
            results = list(zip(*(zip(*res) for res in results)))
            model = PredictionsModel(results, headers)
        else:
            model = None

        if self.selection_store is not None:
            self.selection_store.unregister(
                self.predictionsview.selectionModel())

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setModel(predmodel)

        self.predictionsview.setSelectionModel(
            SharedSelectionModel(self.get_selection_store(predmodel),
                                 predmodel, self.predictionsview))

        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        self.predictionsview.model().list_sorted.connect(
            partial(self._update_data_sort_order, self.predictionsview,
                    self.dataview))

        self.predictionsview.resizeColumnsToContents()

    def _update_data_sort_order(self, sort_source_view, sort_dest_view):
        sort_dest = sort_dest_view.model()
        sort_source = sort_source_view.model()
        sortindicatorshown = False
        if sort_dest is not None:
            assert isinstance(sort_dest, QSortFilterProxyModel)
            n = sort_dest.rowCount()
            if sort_source is not None and sort_source.sortColumn() >= 0:
                sortind = numpy.argsort([
                    sort_source.mapToSource(sort_source.index(i, 0)).row()
                    for i in range(n)
                ])
                sortind = numpy.array(sortind, numpy.int)
                sortindicatorshown = True
            else:
                sortind = None

            sort_dest.setSortIndices(sortind)

        sort_dest_view.horizontalHeader().setSortIndicatorShown(False)
        sort_source_view.horizontalHeader().setSortIndicatorShown(
            sortindicatorshown)
        self.commit()

    def _reset_order(self):
        datamodel = self.dataview.model()
        predmodel = self.predictionsview.model()
        if datamodel is not None:
            datamodel.setSortIndices(None)
            datamodel.sort(-1)
        if predmodel is not None:
            predmodel.setSortIndices(None)
            predmodel.sort(-1)
        self.predictionsview.horizontalHeader().setSortIndicatorShown(False)
        self.dataview.horizontalHeader().setSortIndicatorShown(False)

    def _all_color_values(self):
        """
        Return list of colors together with their values from all predictors
        classes. Colors and values are sorted according to the values order
        for simpler comparison.
        """
        predictors = self._non_errored_predictors()
        color_values = [
            list(
                zip(*sorted(zip(p.predictor.domain.class_var.colors,
                                p.predictor.domain.class_var.values),
                            key=itemgetter(1)))) for p in predictors
            if p.predictor.domain.class_var.is_discrete
        ]
        return color_values if color_values else [([], [])]

    @staticmethod
    def _colors_match(colors1, values1, color2, values2):
        """
        Test whether colors for values match. Colors matches when all
        values match for shorter list and colors match for shorter list.
        It is assumed that values will be sorted together with their colors.
        """
        shorter_length = min(len(colors1), len(color2))
        return (values1[:shorter_length] == values2[:shorter_length]
                and (numpy.array(colors1[:shorter_length]) == numpy.array(
                    color2[:shorter_length])).all())

    def _get_colors(self):
        """
        Defines colors for values. If colors match in all models use the union
        otherwise use standard colors.
        """
        all_colors_values = self._all_color_values()
        base_color, base_values = all_colors_values[0]
        for c, v in all_colors_values[1:]:
            if not self._colors_match(base_color, base_values, c, v):
                base_color = []
                break
            # replace base_color if longer
            if len(v) > len(base_color):
                base_color = c
                base_values = v

        if len(base_color) != len(self.class_values):
            return LimitedDiscretePalette(len(self.class_values)).palette
        # reorder colors to widgets order
        colors = [None] * len(self.class_values)
        for c, v in zip(base_color, base_values):
            colors[self.class_values.index(v)] = c
        return colors

    def _update_prediction_delegate(self):
        self._delegates.clear()
        colors = self._get_colors()
        for col, slot in enumerate(self.predictors.values()):
            target = slot.predictor.domain.class_var
            shown_probs = (() if target.is_continuous else [
                val if self.class_values[val] in target.values else None
                for val in self.selected_classes
            ])
            delegate = PredictionsItemDelegate(
                None if target.is_continuous else self.class_values,
                colors,
                shown_probs,
                target.format_str if target.is_continuous else None,
                parent=self.predictionsview)
            # QAbstractItemView does not take ownership of delegates, so we must
            self._delegates.append(delegate)
            self.predictionsview.setItemDelegateForColumn(col, delegate)
            self.predictionsview.setColumnHidden(col, False)

        self.predictionsview.resizeColumnsToContents()
        self._recompute_splitter_sizes()
        if self.predictionsview.model() is not None:
            self.predictionsview.model().setProbInd(self.selected_classes)

    def _recompute_splitter_sizes(self):
        if not self.data:
            return
        view = self.predictionsview
        self.left_width = \
            view.horizontalHeader().length() + view.verticalHeader().width()
        self._update_splitter()

    def _update_splitter(self):
        w1, w2 = self.splitter.sizes()
        self.splitter.setSizes([self.left_width, w1 + w2 - self.left_width])

    def splitter_resized(self):
        self.left_width = self.splitter.sizes()[0]

    def commit(self):
        self._commit_predictions()
        self._commit_evaluation_results()

    def _commit_evaluation_results(self):
        slots = [
            p for p in self._non_errored_predictors()
            if p.results.predicted is not None
        ]
        if not slots:
            self.Outputs.evaluation_results.send(None)
            return

        nanmask = numpy.isnan(self.data.get_column_view(self.class_var)[0])
        data = self.data[~nanmask]
        results = Results(data, store_data=True)
        results.folds = None
        results.row_indices = numpy.arange(len(data))
        results.actual = data.Y.ravel()
        results.predicted = numpy.vstack(
            tuple(p.results.predicted[0][~nanmask] for p in slots))
        if self.class_var and self.class_var.is_discrete:
            results.probabilities = numpy.array(
                [p.results.probabilities[0][~nanmask] for p in slots])
        results.learner_names = [p.name for p in slots]
        self.Outputs.evaluation_results.send(results)

    def _commit_predictions(self):
        if not self.data:
            self._set_output_summary(None)
            self.Outputs.predictions.send(None)
            return

        newmetas = []
        newcolumns = []
        for slot in self._non_errored_predictors():
            if slot.predictor.domain.class_var.is_discrete:
                self._add_classification_out_columns(slot, newmetas,
                                                     newcolumns)
            else:
                self._add_regression_out_columns(slot, newmetas, newcolumns)

        attrs = list(self.data.domain.attributes)
        metas = list(self.data.domain.metas)
        names = [
            var.name
            for var in chain(attrs, self.data.domain.class_vars, metas) if var
        ]
        uniq_newmetas = []
        for new_ in newmetas:
            uniq = get_unique_names(names, new_.name)
            if uniq != new_.name:
                new_ = new_.copy(name=uniq)
            uniq_newmetas.append(new_)
            names.append(uniq)

        metas += uniq_newmetas
        domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns

        index = self.dataview.model().index
        map_to = self.dataview.model().mapToSource
        assert self.selection_store is not None
        rows = None
        if self.selection_store.rows:
            rows = [
                ind.row()
                for ind in self.dataview.selectionModel().selectedRows(0)
            ]
            rows.sort()
        elif self.dataview.model().isSorted() \
                or self.predictionsview.model().isSorted():
            rows = list(range(len(self.data)))
        if rows:
            source_rows = [map_to(index(row, 0)).row() for row in rows]
            predictions = predictions[source_rows]
        self.Outputs.predictions.send(predictions)
        self._set_output_summary(predictions)

    @staticmethod
    def _add_classification_out_columns(slot, newmetas, newcolumns):
        # Mapped or unmapped predictions?!
        # Or provide a checkbox so the user decides?
        pred = slot.predictor
        name = pred.name
        values = pred.domain.class_var.values
        newmetas.append(DiscreteVariable(name=name, values=values))
        newcolumns.append(slot.results.unmapped_predicted.reshape(-1, 1))
        newmetas += [
            ContinuousVariable(name=f"{name} ({value})") for value in values
        ]
        newcolumns.append(slot.results.unmapped_probabilities)

    @staticmethod
    def _add_regression_out_columns(slot, newmetas, newcolumns):
        newmetas.append(ContinuousVariable(name=slot.predictor.name))
        newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1)))

    def send_report(self):
        def merge_data_with_predictions():
            data_model = self.dataview.model()
            predictions_view = self.predictionsview
            predictions_model = predictions_view.model()

            # use ItemDelegate to style prediction values
            delegates = [
                predictions_view.itemDelegateForColumn(i)
                for i in range(predictions_model.columnCount())
            ]

            # iterate only over visible columns of data's QTableView
            iter_data_cols = list(
                filter(lambda x: not self.dataview.isColumnHidden(x),
                       range(data_model.columnCount())))

            # print header
            yield [''] + \
                  [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in range(predictions_model.columnCount())] + \
                  [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in iter_data_cols]

            # print data & predictions
            for i in range(data_model.rowCount()):
                yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \
                      [delegate.displayText(
                          predictions_model.data(predictions_model.index(i, j)),
                          QLocale())
                       for j, delegate in enumerate(delegates)] + \
                      [data_model.data(data_model.index(i, j))
                       for j in iter_data_cols]

        if self.data:
            text = self._get_details().replace('\n', '<br>')
            if self.selected_classes:
                text += '<br>Showing probabilities for: '
                text += ', '.join(
                    [self.class_values[i] for i in self.selected_classes])
            self.report_paragraph('Info', text)
            self.report_table("Data & Predictions",
                              merge_data_with_predictions(),
                              header_rows=1,
                              header_columns=1)

            self.report_table("Scores", self.score_table.view)

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self._update_splitter()

    def showEvent(self, event):
        super().showEvent(event)
        QTimer.singleShot(0, self._update_splitter)
Ejemplo n.º 5
0
class OWTestLearners(widget.OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner,
               "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Orange.data.Table),
               ("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, LeaveOneOut, ShuffleSplit, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for ShuffleSplit sampling
    n_repeat = settings.Setting(10)
    #: ShuffleSplit sampling p
    sample_p = settings.Setting(75)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.shuffle_split_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=99,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        self.apply_button = gui.button(
            rbox, self, "Apply", callback=self.apply, default=True)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.error(0)
        self.information(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(0, "Train data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.train_data_missing_vals = data is not None and \
                                       np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(4, self._get_missing_data_warning(
                self.train_data_missing_vals, self.test_data_missing_vals
            ))
            if data:
                data = RemoveNaNClasses(data)

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.error(1)
        self.information(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(1, "Test data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.test_data_missing_vals = data is not None and \
                                      np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(4, self._get_missing_data_warning(
                self.train_data_missing_vals, self.test_data_missing_vals
            ))
            if data:
                data = RemoveNaNClasses(data)

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _get_missing_data_warning(self, train_missing, test_missing):
        return "Instances with unknown target values were removed from{}data"\
            .format(train_missing * test_missing * " "
                    or train_missing * " train " or test_missing * " test ")

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.apply()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.warning([1, 2])
        self.error([2, 4])
        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return
            elif self.test_data.domain.class_var != class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        rstate = 42
        def update_progress(finished):
            self.progressBarSet(100 * finished)
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
            callback=update_progress)
        self.setStatusMessage("Running")

        with self.progressBar():
            try:
                if self.resampling == OWTestLearners.KFold:
                    if len(self.data) < self.k_folds:
                        self.error(4, "Number of folds exceeds the data size")
                        return

                    warnings = []
                    results = Orange.evaluation.CrossValidation(
                        self.data, learners, k=self.k_folds,
                        random_state=rstate, warnings=warnings, **common_args)
                    if warnings:
                        self.warning(2, warnings[0])
                elif self.resampling == OWTestLearners.LeaveOneOut:
                    results = Orange.evaluation.LeaveOneOut(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.ShuffleSplit:
                    train_size = self.sample_p / 100
                    results = Orange.evaluation.ShuffleSplit(
                        self.data, learners, n_resamples=self.n_repeat,
                        train_size=train_size, test_size=None,
                        random_state=rstate, **common_args)
                elif self.resampling == OWTestLearners.TestOnTrain:
                    results = Orange.evaluation.TestOnTrainingData(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.TestOnTest:
                    results = Orange.evaluation.TestOnTestData(
                        self.data, self.test_data, learners, **common_args)
                else:
                    assert False
            except RuntimeError as e:
                self.error(2, str(e))
                self.setStatusMessage("")
                return

        learner_key = {slot.learner: key for key, slot in self.learners.items()}
        for learner, result in zip(learners, split_by_model(results)):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}"
                              .format(name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(
                        slot.results.value, target_index)

                    stats = [Try(lambda: score(ovr_results))
                             for score in classification_stats.scores]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        if errors:
            self.error(3, "\n".join(errors))
        else:
            self.error(3)

        if has_missing_scores:
            self.warning(3, "Some scores could not be computed")
        else:
            self.warning(3)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole)
                         for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.apply_button.setEnabled(True)

    def apply(self):
        self.apply_button.setEnabled(False)
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        self.commit()

    def commit(self):
        valid = [slot for slot in self.learners.values()
                 if slot.results is not None and slot.results.success]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner)
                                      for slot in valid]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)

    def send_report(self):
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            items = [("Sampling type", "{}-fold Cross validation".
                      format(self.k_folds))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.Bootstrap:
            items = [("Sampling type",
                      "{} random samples with {} % data ".format(
                          self.n_repeat, self.sample_p))]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)
Ejemplo n.º 6
0
class OWPredictions(OWWidget):
    name = "Predictions"
    icon = "icons/Predictions.svg"
    priority = 200
    description = "Display the predictions of models for an input dataset."
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)
        predictors = Input("Predictors", Model, multiple=True)

    class Outputs:
        predictions = Output("Predictions", Orange.data.Table)
        evaluation_results = Output("Evaluation Results",
                                    Orange.evaluation.Results,
                                    dynamic=False)

    class Warning(OWWidget.Warning):
        empty_data = Msg("Empty dataset")

    class Error(OWWidget.Error):
        predictor_failed = \
            Msg("One or more predictors failed (see more...)\n{}")
        predictors_target_mismatch = \
            Msg("Predictors do not have the same target.")
        data_target_mismatch = \
            Msg("Data does not have the same target as predictors.")

    settingsHandler = settings.ClassValuesContextHandler()
    #: Display the full input dataset or only the target variable columns (if
    #: available)
    show_attrs = settings.Setting(True)
    #: Show predicted values (for discrete target variable)
    show_predictions = settings.Setting(True)
    #: Show predictions probabilities (for discrete target variable)
    show_probabilities = settings.Setting(True)
    #: List of selected class value indices in the "Show probabilities" list
    selected_classes = settings.ContextSetting([])
    #: Draw colored distribution bars
    draw_dist = settings.Setting(True)

    output_attrs = settings.Setting(True)
    output_predictions = settings.Setting(True)
    output_probabilities = settings.Setting(True)

    def __init__(self):
        super().__init__()

        #: Input data table
        self.data = None  # type: Optional[Orange.data.Table]
        #: A dict mapping input ids to PredictorSlot
        self.predictors = OrderedDict()  # type: Dict[object, PredictorSlot]
        #: A class variable (prediction target)
        self.class_var = None  # type: Optional[Orange.data.Variable]
        #: List of (discrete) class variable's values
        self.class_values = []  # type: List[str]

        box = gui.vBox(self.controlArea, "Info")
        self.infolabel = gui.widgetLabel(
            box, "No data on input.\nPredictors: 0\nTask: N/A")
        self.infolabel.setMinimumWidth(150)
        gui.button(box,
                   self,
                   "Restore Original Order",
                   callback=self._reset_order,
                   tooltip="Show rows in the original order")

        self.classification_options = box = gui.vBox(self.controlArea,
                                                     "Show",
                                                     spacing=-1,
                                                     addSpace=False)

        gui.checkBox(box,
                     self,
                     "show_predictions",
                     "Predicted class",
                     callback=self._update_prediction_delegate)
        b = gui.checkBox(box,
                         self,
                         "show_probabilities",
                         "Predicted probabilities for:",
                         callback=self._update_prediction_delegate)
        ibox = gui.indentedBox(box,
                               sep=gui.checkButtonOffsetHint(b),
                               addSpace=False)
        gui.listBox(ibox,
                    self,
                    "selected_classes",
                    "class_values",
                    callback=self._update_prediction_delegate,
                    selectionMode=QListWidget.MultiSelection,
                    addSpace=False)
        gui.checkBox(box,
                     self,
                     "draw_dist",
                     "Draw distribution bars",
                     callback=self._update_prediction_delegate)

        box = gui.vBox(self.controlArea, "Data View")
        gui.checkBox(box,
                     self,
                     "show_attrs",
                     "Show full dataset",
                     callback=self._update_column_visibility)

        box = gui.vBox(self.controlArea, "Output", spacing=-1)
        self.checkbox_class = gui.checkBox(box,
                                           self,
                                           "output_attrs",
                                           "Original data",
                                           callback=self.commit)
        self.checkbox_class = gui.checkBox(box,
                                           self,
                                           "output_predictions",
                                           "Predictions",
                                           callback=self.commit)
        self.checkbox_prob = gui.checkBox(box,
                                          self,
                                          "output_probabilities",
                                          "Probabilities",
                                          callback=self.commit)

        gui.rubber(self.controlArea)

        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
            handleWidth=2,
        )
        self.dataview = TableView(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollMode=QTableView.ScrollPerPixel,
            selectionMode=QTableView.NoSelection,
            focusPolicy=Qt.StrongFocus)
        self.predictionsview = TableView(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollMode=QTableView.ScrollPerPixel,
            selectionMode=QTableView.NoSelection,
            focusPolicy=Qt.StrongFocus,
            sortingEnabled=True,
        )

        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.dataview.verticalHeader().hide()

        dsbar = self.dataview.verticalScrollBar()
        psbar = self.predictionsview.verticalScrollBar()

        psbar.valueChanged.connect(dsbar.setValue)
        dsbar.valueChanged.connect(psbar.setValue)

        self.dataview.verticalHeader().setDefaultSectionSize(22)
        self.predictionsview.verticalHeader().setDefaultSectionSize(22)
        self.dataview.verticalHeader().sectionResized.connect(
            lambda index, _, size: self.predictionsview.verticalHeader(
            ).resizeSection(index, size))

        self.splitter.addWidget(self.predictionsview)
        self.splitter.addWidget(self.dataview)

        self.mainArea.layout().addWidget(self.splitter)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """Set the input dataset"""
        if data is not None and not len(data):
            data = None
            self.Warning.empty_data()
        else:
            self.Warning.empty_data.clear()

        self.data = data
        if data is None:
            self.dataview.setModel(None)
            self.predictionsview.setModel(None)
            self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        else:
            # force full reset of the view's HeaderView state
            self.dataview.setModel(None)
            model = TableModel(data, parent=None)
            modelproxy = TableSortProxyModel()
            modelproxy.setSourceModel(model)
            self.dataview.setModel(modelproxy)
            self._update_column_visibility()

        self._invalidate_predictions()

    @Inputs.predictors
    def set_predictor(self, predictor=None, id=None):
        if id in self.predictors:
            if predictor is not None:
                self.predictors[id] = self.predictors[id]._replace(
                    predictor=predictor, name=predictor.name, results=None)
            else:
                del self.predictors[id]
        elif predictor is not None:
            self.predictors[id] = \
                PredictorSlot(predictor, predictor.name, None)

    def set_class_var(self):
        pred_classes = set(pred.predictor.domain.class_var
                           for pred in self.predictors.values())
        self.Error.predictors_target_mismatch.clear()
        self.Error.data_target_mismatch.clear()
        self.class_var = None
        if len(pred_classes) > 1:
            self.Error.predictors_target_mismatch()
        if len(pred_classes) == 1:
            self.class_var = pred_classes.pop()
            if self.data is not None and \
                    self.data.domain.class_var is not None and \
                    self.class_var != self.data.domain.class_var:
                self.Error.data_target_mismatch()
                self.class_var = None

        discrete_class = self.class_var is not None \
                         and self.class_var.is_discrete
        self.classification_options.setVisible(discrete_class)
        self.closeContext()
        if discrete_class:
            self.class_values = list(self.class_var.values)
            self.selected_classes = list(range(len(self.class_values)))
            self.openContext(self.class_var)
        else:
            self.class_values = []
            self.selected_classes = []

    def handleNewSignals(self):
        self.set_class_var()
        if self.data is not None:
            self._call_predictors()
        self._update_predictions_model()
        self._update_prediction_delegate()
        self._set_errors()
        self._update_info()
        self.commit()

    def _call_predictors(self):
        for inputid, pred in self.predictors.items():
            if pred.results is None or numpy.isnan(pred.results[0]).all():
                try:
                    results = self.predict(pred.predictor, self.data)
                except ValueError as err:
                    results = "{}: {}".format(pred.predictor.name, err)
                self.predictors[inputid] = pred._replace(results=results)

    def _set_errors(self):
        errors = "\n".join(p.results for p in self.predictors.values()
                           if isinstance(p.results, str))
        if errors:
            self.Error.predictor_failed(errors)
        else:
            self.Error.predictor_failed.clear()

    def _update_info(self):
        info = []
        if self.data is not None:
            info.append("Data: {} instances.".format(len(self.data)))
        else:
            info.append("Data: N/A")

        n_predictors = len(self.predictors)
        n_valid = len(self._valid_predictors())
        if n_valid != n_predictors:
            info.append("Predictors: {} (+ {} failed)".format(
                n_valid, n_predictors - n_valid))
        else:
            info.append("Predictors: {}".format(n_predictors or "N/A"))

        if self.class_var is None:
            info.append("Task: N/A")
        elif self.class_var.is_discrete:
            info.append("Task: Classification")
            self.checkbox_class.setEnabled(True)
            self.checkbox_prob.setEnabled(True)
        else:
            info.append("Task: Regression")
            self.checkbox_class.setEnabled(False)
            self.checkbox_prob.setEnabled(False)

        self.infolabel.setText("\n".join(info))

    def _invalidate_predictions(self):
        for inputid, pred in list(self.predictors.items()):
            self.predictors[inputid] = pred._replace(results=None)

    def _valid_predictors(self):
        if self.class_var is not None and \
                self.data is not None:
            return [
                p for p in self.predictors.values()
                if p.results is not None and not isinstance(p.results, str)
            ]
        else:
            return []

    def _update_predictions_model(self):
        """Update the prediction view model."""
        if self.data is not None and self.class_var is not None:
            slots = self._valid_predictors()
            results = []
            class_var = self.class_var
            for p in slots:
                values, prob = p.results
                if self.class_var.is_discrete:
                    # if values were added to class_var between building the
                    # model and predicting, add zeros for new class values,
                    # which are always at the end
                    prob = numpy.c_[prob,
                                    numpy.zeros(
                                        (prob.shape[0], len(class_var.values) -
                                         prob.shape[1]))]
                    values = [Value(class_var, v) for v in values]
                results.append((values, prob))
            results = list(zip(*(zip(*res) for res in results)))
            headers = [p.name for p in slots]
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.predictionsview.setModel(predmodel)
        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()

    def _update_column_visibility(self):
        """Update data column visibility."""
        if self.data is not None and self.class_var is not None:
            domain = self.data.domain
            first_attr = len(domain.class_vars) + len(domain.metas)

            for i in range(first_attr, first_attr + len(domain.attributes)):
                self.dataview.setColumnHidden(i, not self.show_attrs)
            if domain.class_var:
                self.dataview.setColumnHidden(0, False)

    def _update_data_sort_order(self):
        """Update data row order to match the current predictions view order"""
        datamodel = self.dataview.model()  # data model proxy
        predmodel = self.predictionsview.model()  # predictions model proxy
        sortindicatorshown = False
        if datamodel is not None:
            assert isinstance(datamodel, TableSortProxyModel)
            n = datamodel.rowCount()
            if predmodel is not None and predmodel.sortColumn() >= 0:
                sortind = numpy.argsort([
                    predmodel.mapToSource(predmodel.index(i, 0)).row()
                    for i in range(n)
                ])
                sortind = numpy.array(sortind, numpy.int)
                sortindicatorshown = True
            else:
                sortind = None

            datamodel.setSortIndices(sortind)

        self.predictionsview.horizontalHeader() \
            .setSortIndicatorShown(sortindicatorshown)

    def _reset_order(self):
        """Reset the row sorting to original input order."""
        datamodel = self.dataview.model()
        predmodel = self.predictionsview.model()
        if datamodel is not None:
            datamodel.sort(-1)
        if predmodel is not None:
            predmodel.sort(-1)
        self.predictionsview.horizontalHeader().setSortIndicatorShown(False)

    def _update_prediction_delegate(self):
        """Update the predicted probability visibility state"""
        if self.class_var is not None:
            delegate = PredictionsItemDelegate()
            if self.class_var.is_continuous:
                self._setup_delegate_continuous(delegate)
            else:
                self._setup_delegate_discrete(delegate)
                proxy = self.predictionsview.model()
                if proxy is not None:
                    proxy.setProbInd(
                        numpy.array(self.selected_classes, dtype=int))
            self.predictionsview.setItemDelegate(delegate)
            self.predictionsview.resizeColumnsToContents()
        self._update_spliter()

    def _setup_delegate_discrete(self, delegate):
        colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors]
        fmt = []
        if self.show_probabilities:
            fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i)
                                  for i in sorted(self.selected_classes)))
        if self.show_predictions:
            fmt.append("{value!s}")
        delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt))
        if self.draw_dist and colors is not None:
            delegate.setColors(colors)
        return delegate

    def _setup_delegate_continuous(self, delegate):
        delegate.setFormat("{{value:.{}f}}".format(
            self.class_var.number_of_decimals))

    def _update_spliter(self):
        if self.data is None:
            return

        def width(view):
            h_header = view.horizontalHeader()
            v_header = view.verticalHeader()
            return h_header.length() + v_header.width()

        w = width(self.predictionsview) + 4
        w1, w2 = self.splitter.sizes()
        self.splitter.setSizes([w, w1 + w2 - w])

    def commit(self):
        self._commit_predictions()
        self._commit_evaluation_results()

    def _commit_evaluation_results(self):
        slots = self._valid_predictors()
        if not slots or self.data.domain.class_var is None:
            self.Outputs.evaluation_results.send(None)
            return

        class_var = self.class_var
        nanmask = numpy.isnan(self.data.get_column_view(class_var)[0])
        data = self.data[~nanmask]
        N = len(data)
        results = Orange.evaluation.Results(data, store_data=True)
        results.folds = None
        results.row_indices = numpy.arange(N)
        results.actual = data.Y.ravel()
        results.predicted = numpy.vstack(
            tuple(p.results[0][~nanmask] for p in slots))
        if class_var and class_var.is_discrete:
            results.probabilities = numpy.array(
                [p.results[1][~nanmask] for p in slots])
        results.learner_names = [p.name for p in slots]
        self.Outputs.evaluation_results.send(results)

    def _commit_predictions(self):
        slots = self._valid_predictors()
        if not slots:
            self.Outputs.predictions.send(None)
            return

        if self.class_var and self.class_var.is_discrete:
            newmetas, newcolumns = self._classification_output_columns()
        else:
            newmetas, newcolumns = self._regression_output_columns()

        attrs = list(self.data.domain.attributes) if self.output_attrs else []
        metas = list(self.data.domain.metas) + newmetas
        domain = \
            Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns
        self.Outputs.predictions.send(predictions)

    def _classification_output_columns(self):
        newmetas = []
        newcolumns = []
        slots = self._valid_predictors()
        if self.output_predictions:
            newmetas += [
                DiscreteVariable(name=p.name, values=self.class_values)
                for p in slots
            ]
            newcolumns += [p.results[0].reshape((-1, 1)) for p in slots]

        if self.output_probabilities:
            newmetas += [
                ContinuousVariable(name="%s (%s)" % (p.name, value))
                for p in slots for value in self.class_values
            ]
            newcolumns += [p.results[1] for p in slots]
        return newmetas, newcolumns

    def _regression_output_columns(self):
        slots = self._valid_predictors()
        newmetas = [ContinuousVariable(name=p.name) for p in slots]
        newcolumns = [p.results[0].reshape((-1, 1)) for p in slots]
        return newmetas, newcolumns

    def send_report(self):
        def merge_data_with_predictions():
            data_model = self.dataview.model()
            predictions_model = self.predictionsview.model()

            # use ItemDelegate to style prediction values
            style = lambda x: self.predictionsview.itemDelegate().displayText(
                x, QLocale())

            # iterate only over visible columns of data's QTableView
            iter_data_cols = list(
                filter(lambda x: not self.dataview.isColumnHidden(x),
                       range(data_model.columnCount())))

            # print header
            yield [''] + \
                  [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in range(predictions_model.columnCount())] + \
                  [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in iter_data_cols]

            # print data & predictions
            for i in range(data_model.rowCount()):
                yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \
                      [style(predictions_model.data(predictions_model.index(i, j)))
                       for j in range(predictions_model.columnCount())] + \
                      [data_model.data(data_model.index(i, j))
                       for j in iter_data_cols]

        if self.data is not None and self.class_var is not None:
            text = self.infolabel.text().replace('\n', '<br>')
            if self.show_probabilities and self.selected_classes:
                text += '<br>Showing probabilities for: '
                text += ', '.join(
                    [self.class_values[i] for i in self.selected_classes])
            self.report_paragraph('Info', text)
            self.report_table("Data & Predictions",
                              merge_data_with_predictions(),
                              header_rows=1,
                              header_columns=1)

    @classmethod
    def predict(cls, predictor, data):
        class_var = predictor.domain.class_var
        if class_var:
            if class_var.is_discrete:
                return cls.predict_discrete(predictor, data)
            elif class_var.is_continuous:
                return cls.predict_continuous(predictor, data)

    @staticmethod
    def predict_discrete(predictor, data):
        return predictor(data, Model.ValueProbs)

    @staticmethod
    def predict_continuous(predictor, data):
        values = predictor(data, Model.Value)
        return values, [None] * len(data)
Ejemplo n.º 7
0
class OWPredictions(widget.OWWidget):
    name = "Predictions"
    icon = "icons/Predictions.svg"
    priority = 200
    description = "Display predictions of models for an input data set."
    inputs = [("Data", Orange.data.Table, "set_data"),
              ("Predictors", Model, "set_predictor", widget.Multiple)]
    outputs = [("Predictions", Orange.data.Table),
               ("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()
    #: Display the full input dataset or only the target variable columns (if
    #: available)
    show_attrs = settings.Setting(True)
    #: Show predicted values (for discrete target variable)
    show_predictions = settings.Setting(True)
    #: Show predictions probabilities (for discrete target variable)
    show_probabilities = settings.Setting(True)
    #: List of selected class value indices in the "Show probabilities" list
    selected_classes = settings.ContextSetting([])
    #: Draw colored distribution bars
    draw_dist = settings.Setting(True)

    output_attrs = settings.Setting(True)
    output_predictions = settings.Setting(True)
    output_probabilities = settings.Setting(True)

    def __init__(self):
        super().__init__()

        #: Input data table
        self.data = None  # type: Optional[Orange.data.Table]
        #: A dict mapping input ids to PredictorSlot
        self.predictors = OrderedDict()  # type: Dict[object, PredictorSlot]
        #: A class variable (prediction target)
        self.class_var = None  # type: Optional[Orange.data.Variable]
        #: List of (discrete) class variable's values
        self.class_values = []  # type: List[str]

        box = gui.widgetBox(self.controlArea, "Info")
        self.infolabel = gui.widgetLabel(
            box, "No data on input\nPredictors: 0\nTask: N/A")
        self.infolabel.setMinimumWidth(150)
        gui.button(box,
                   self,
                   "Restore Original Order",
                   callback=self._reset_order,
                   tooltip="Show rows in the original order")

        self.classification_options = box = gui.widgetBox(
            self.controlArea,
            "Options (classification)",
            spacing=-1,
            addSpace=False)

        gui.checkBox(box,
                     self,
                     "show_predictions",
                     "Show predicted class",
                     callback=self._update_prediction_delegate)
        b = gui.checkBox(box,
                         self,
                         "show_probabilities",
                         "Show predicted probabilities",
                         callback=self._update_prediction_delegate)
        ibox = gui.indentedBox(box,
                               sep=gui.checkButtonOffsetHint(b),
                               addSpace=False)
        gui.listBox(ibox,
                    self,
                    "selected_classes",
                    "class_values",
                    callback=self._update_prediction_delegate,
                    selectionMode=QtGui.QListWidget.MultiSelection,
                    addSpace=False)
        gui.checkBox(box,
                     self,
                     "draw_dist",
                     "Draw distribution bars",
                     callback=self._update_prediction_delegate)

        box = gui.widgetBox(self.controlArea, "Data view")
        gui.checkBox(box,
                     self,
                     "show_attrs",
                     "Show full data set",
                     callback=self._update_column_visibility)

        box = gui.widgetBox(self.controlArea, "Output", spacing=-1)
        self.checkbox_class = gui.checkBox(box,
                                           self,
                                           "output_attrs",
                                           "Original data",
                                           callback=self.commit)
        self.checkbox_class = gui.checkBox(box,
                                           self,
                                           "output_predictions",
                                           "Predictions",
                                           callback=self.commit)
        self.checkbox_prob = gui.checkBox(box,
                                          self,
                                          "output_probabilities",
                                          "Probabilities",
                                          callback=self.commit)

        gui.rubber(self.controlArea)

        self.splitter = QtGui.QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
            handleWidth=2,
        )
        self.dataview = QtGui.QTableView(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollMode=QtGui.QTableView.ScrollPerPixel,
            selectionMode=QtGui.QTableView.NoSelection,
            focusPolicy=Qt.StrongFocus)
        self.predictionsview = QtGui.QTableView(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollMode=QtGui.QTableView.ScrollPerPixel,
            selectionMode=QtGui.QTableView.NoSelection,
            focusPolicy=Qt.StrongFocus,
            sortingEnabled=True,
        )

        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.predictionsview.verticalHeader().hide()

        dsbar = self.dataview.verticalScrollBar()
        psbar = self.predictionsview.verticalScrollBar()

        psbar.valueChanged.connect(dsbar.setValue)
        dsbar.valueChanged.connect(psbar.setValue)

        self.dataview.verticalHeader().setDefaultSectionSize(22)
        self.predictionsview.verticalHeader().setDefaultSectionSize(22)
        self.dataview.verticalHeader().sectionResized.connect(
            lambda index, _, size: self.predictionsview.verticalHeader(
            ).resizeSection(index, size))

        self.splitter.addWidget(self.dataview)
        self.splitter.addWidget(self.predictionsview)

        self.mainArea.layout().addWidget(self.splitter)
        self.spliter_restore_state = int(self.show_attrs), 300

    @check_sql_input
    def set_data(self, data):
        """Set the input data set"""
        self.data = data
        if data is None:
            self.dataview.setModel(None)
            self.predictionsview.setModel(None)
            self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        else:
            # force full reset of the view's HeaderView state
            self.dataview.setModel(None)
            model = TableModel(data, parent=None)
            modelproxy = TableSortProxyModel()
            modelproxy.setSourceModel(model)
            self.dataview.setModel(modelproxy)
            self._update_column_visibility()
            self.dataview.scrollTo(
                modelproxy.index(0, len(data.domain.attributes)))

        self.invalidate_predictions()

    def set_predictor(self, predictor=None, id=None):
        if id in self.predictors:
            if predictor is not None:
                self.predictors[id] = self.predictors[id]._replace(
                    predictor=predictor, name=predictor.name, results=None)
            else:
                del self.predictors[id]
        elif predictor is not None:
            self.predictors[id] = \
                PredictorSlot(predictor, predictor.name, None)

        if predictor is not None:
            self.class_var = predictor.domain.class_var

    def handleNewSignals(self):
        self.error(0)
        if self.data is not None:
            for inputid, pred in list(self.predictors.items()):
                if pred.results is None or numpy.isnan(pred.results[0]).all():
                    try:
                        results = self.predict(pred.predictor, self.data)
                    except ValueError as err:
                        err_msg = '{}:\n'.format(pred.predictor.name) + \
                                  str(err)
                        self.error(0, err_msg)
                        n, m = len(self.data), 1
                        if self.data.domain.has_discrete_class:
                            m = len(self.data.domain.class_var.values)
                        probabilities = numpy.full((n, m), numpy.nan)
                        results = (numpy.full(n, numpy.nan), probabilities)
                    self.predictors[inputid] = pred._replace(results=results)

        if not self.predictors:
            self.class_var = None

        self.classification_options.setVisible(self.class_var is not None
                                               and self.class_var.is_discrete)

        self.closeContext()
        if self.class_var is not None and self.class_var.is_discrete:
            self.class_values = list(self.class_var.values)
            self.selected_classes = list(range(len(self.class_values)))
            self.openContext(self.class_var)
        else:
            self.class_values = []
            self.selected_classes = []

        self._update_predictions_model()
        self._update_prediction_delegate()
        # Check for prediction target consistency
        target_vars = set(
            [p.predictor.domain.class_var for p in self.predictors.values()])

        if len(target_vars) > 1:
            self.warning(0, "Inconsistent class variables")
        else:
            self.warning(0)

        # Update the Info box text.
        info = []
        if self.data is not None:
            info.append("Data: {} instances.".format(len(self.data)))
        else:
            info.append("Data: N/A")

        if self.predictors:
            info.append("Predictors: {}".format(len(self.predictors)))
        else:
            info.append("Predictors: N/A")

        if self.class_var is not None:
            if self.class_var.is_discrete:
                info.append("Task: Classification")
                self.checkbox_class.setEnabled(True)
                self.checkbox_prob.setEnabled(True)
            else:
                info.append("Task: Regression")
                self.checkbox_class.setEnabled(False)
                self.checkbox_prob.setEnabled(False)
        else:
            info.append("Task: N/A")

        self.infolabel.setText("\n".join(info))
        self.commit()

    def invalidate_predictions(self):
        for inputid, pred in list(self.predictors.items()):
            self.predictors[inputid] = pred._replace(results=None)

    def _update_predictions_model(self):
        """Update the prediction view model."""
        if self.data is not None:
            slots = self.predictors.values()
            results = []
            for p in slots:
                values, prob = p.results
                if p.predictor.domain.class_var.is_discrete:
                    values = [
                        Orange.data.Value(p.predictor.domain.class_var, v)
                        for v in values
                    ]
                results.append((values, prob))
            results = list(zip(*(zip(*res) for res in results)))

            headers = [p.name for p in slots]
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.predictionsview.setModel(predmodel)
        self.predictionsview.horizontalHeader().setSortIndicatorShown(False)
        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()

    def _update_column_visibility(self):
        """Update data column visibility."""
        if self.data is not None:
            for i in range(len(self.data.domain.attributes)):
                self.dataview.setColumnHidden(i, not self.show_attrs)
            if self.data.domain.class_var:
                self.dataview.setColumnHidden(len(self.data.domain.attributes),
                                              False)
            self._update_spliter()

    def _update_data_sort_order(self):
        """Update data row order to match the current predictions view order"""
        datamodel = self.dataview.model()  # data model proxy
        predmodel = self.predictionsview.model()  # predictions model proxy
        sortindicatorshown = False
        if datamodel is not None:
            assert isinstance(datamodel, TableSortProxyModel)
            n = datamodel.rowCount()
            if predmodel is not None and predmodel.sortColumn() >= 0:
                sortind = numpy.argsort([
                    predmodel.mapToSource(predmodel.index(i, 0)).row()
                    for i in range(n)
                ])
                sortind = numpy.array(sortind, numpy.int)
                sortindicatorshown = True
            else:
                sortind = None

            datamodel.setSortIndices(sortind)

        self.predictionsview.horizontalHeader() \
            .setSortIndicatorShown(sortindicatorshown)

    def _reset_order(self):
        """Reset the row sorting to original input order."""
        datamodel = self.dataview.model()
        predmodel = self.predictionsview.model()
        if datamodel is not None:
            datamodel.sort(-1)
        if predmodel is not None:
            predmodel.sort(-1)
        self.predictionsview.horizontalHeader().setSortIndicatorShown(False)

    def _update_prediction_delegate(self):
        """Update the predicted probability visibility state"""
        delegate = PredictionsItemDelegate()
        colors = None
        if self.class_var is not None:
            if self.class_var.is_discrete:
                colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors]
                dist_fmt = ""
                pred_fmt = ""
                if self.show_probabilities:
                    decimals = 2
                    float_fmt = "{{dist[{}]:.{}f}}"
                    dist_fmt = " : ".join(
                        float_fmt.format(i, decimals)
                        for i in range(len(self.class_var.values))
                        if i in self.selected_classes)
                if self.show_predictions:
                    pred_fmt = "{value!s}"
                if pred_fmt and dist_fmt:
                    fmt = dist_fmt + " \N{RIGHTWARDS ARROW} " + pred_fmt
                else:
                    fmt = dist_fmt or pred_fmt
            else:
                assert isinstance(self.class_var, ContinuousVariable)
                fmt = "{{value:.{}f}}".format(
                    self.class_var.number_of_decimals)

            delegate.setFormat(fmt)
            if self.draw_dist and colors is not None:
                delegate.setColors(colors)
            self.predictionsview.setItemDelegate(delegate)
            self.predictionsview.resizeColumnsToContents()

        if self.class_var is not None and self.class_var.is_discrete:
            proxy = self.predictionsview.model()
            if proxy is not None:
                proxy.setProbInd(numpy.array(self.selected_classes, dtype=int))

    def _update_spliter(self):
        if self.data is None:
            return

        def width(view):
            h_header = view.horizontalHeader()
            v_header = view.verticalHeader()
            return h_header.length() + v_header.width()

        def widthForColumns(view, start=0, end=None):
            h_header = view.horizontalHeader()
            v_header = view.verticalHeader()
            width = sum([
                h_header.sectionSize(i)
                for i in range(h_header.count())[start:end]
            ])
            return v_header.width() + width

        if not self.show_attrs:
            w1, w2 = self.splitter.sizes()
            # w = widthHint(self.dataview)
            w = width(self.dataview) + 4
            self.splitter.setSizes([w, w1 + w2 - w])
            self.dataview.setMaximumWidth(w)

            state, w = self.spliter_restore_state
            if state == 0:
                # save dataview width on change from 'show all' to 'hide all'
                self.spliter_restore_state = 1, w1
        else:
            w1, w2 = self.splitter.sizes()
            state, w = self.spliter_restore_state
            if state == 1:
                # restore dataview on change from 'hide all' to 'show all'
                # extend the dataview to the saved width but no further
                # then 2/3 of the available space
                w = min(w, (w1 + w2) * 2 // 3)
            else:
                # shrink the dataview width if its contents are smaller then
                # its width
                w1, w2 = self.splitter.sizes()
                w = widthForColumns(self.dataview, -2) + 4
                w = min(w, (w1 + w2) // 2)
                predw = widthForColumns(self.predictionsview)
                w = max(w, min(w1 + w2 - predw - 20, w1 + w2 - w))
            self.splitter.setSizes([w, w1 + w2 - w])
            self.dataview.setMaximumWidth(QWIDGETSIZE_MAX)

            self.spliter_restore_state = 0, w
            self.dataview.horizontalScrollBar().triggerAction(
                QtGui.QScrollBar.SliderToMaximum)

    def commit(self):
        if self.data is None or not self.predictors:
            self.send("Predictions", None)
            self.send("Evaluation Results", None)
            return

        predictor = next(iter(self.predictors.values())).predictor
        class_var = predictor.domain.class_var
        classification = class_var and class_var.is_discrete

        newmetas = []
        newcolumns = []
        slots = list(self.predictors.values())

        if classification:
            if self.output_predictions:
                mc = [
                    DiscreteVariable(name=p.name, values=class_var.values)
                    for p in slots
                ]
                newmetas.extend(mc)
                newcolumns.extend(p.results[0].reshape((-1, 1)) for p in slots)

            if self.output_probabilities:
                for p in slots:
                    m = [
                        ContinuousVariable(name="%s(%s)" % (p.name, value))
                        for value in class_var.values
                    ]
                    newmetas.extend(m)
                newcolumns.extend(p.results[1] for p in slots)

        else:
            # regression
            mc = [
                ContinuousVariable(name=p.name)
                for p in self.predictors.values()
            ]
            newmetas.extend(mc)
            newcolumns.extend(p.results[0].reshape((-1, 1)) for p in slots)

        if self.output_attrs:
            attrs = list(self.data.domain.attributes)
        else:
            attrs = []
        metas = list(self.data.domain.metas) + newmetas

        domain = Orange.data.Domain(attrs,
                                    self.data.domain.class_var,
                                    metas=metas)
        predictions = self.data.from_table(domain, self.data)

        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns

        results = None
        if self.data.domain.class_var == class_var:
            N = len(self.data)
            results = Orange.evaluation.Results(self.data, store_data=True)
            results.folds = None
            results.row_indices = numpy.arange(N)
            results.actual = self.data.Y.ravel()
            results.predicted = numpy.vstack(tuple(p.results[0]
                                                   for p in slots))
            if classification:
                results.probabilities = numpy.array(
                    [p.results[1] for p in slots])
            results.learner_names = [p.name for p in slots]

        self.send("Predictions", predictions)
        self.send("Evaluation Results", results)

    @classmethod
    def predict(cls, predictor, data):
        class_var = predictor.domain.class_var
        if class_var:
            if class_var.is_discrete:
                return cls.predict_discrete(predictor, data)
            elif class_var.is_continuous:
                return cls.predict_continuous(predictor, data)

    @staticmethod
    def predict_discrete(predictor, data):
        return predictor(data, Model.ValueProbs)

    @staticmethod
    def predict_continuous(predictor, data):
        values = predictor(data, Model.Value)
        return values, [None] * len(data)
Ejemplo n.º 8
0
class OWPredictions(OWWidget):
    name = "Predictions"
    icon = "icons/Predictions.svg"
    priority = 200
    description = "Display predictions of models for an input dataset."
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)
        predictors = Input("Predictors", Model, multiple=True)

    class Outputs:
        predictions = Output("Predictions", Orange.data.Table)
        evaluation_results = Output("Evaluation Results", Results)

    class Warning(OWWidget.Warning):
        empty_data = Msg("Empty dataset")
        wrong_targets = Msg(
            "Some model(s) predict a different target (see more ...)\n{}")

    class Error(OWWidget.Error):
        predictor_failed = Msg("Some predictor(s) failed (see more ...)\n{}")
        scorer_failed = Msg("Some scorer(s) failed (see more ...)\n{}")

    settingsHandler = settings.ClassValuesContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: List of selected class value indices in the `class_values` list
    selected_classes = settings.ContextSetting([])

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Orange.data.Table]
        self.predictors = {}  # type: Dict[object, PredictorSlot]
        self.class_values = []  # type: List[str]
        self._delegates = []

        gui.listBox(self.controlArea,
                    self,
                    "selected_classes",
                    "class_values",
                    box="Show probabibilities for",
                    callback=self._update_prediction_delegate,
                    selectionMode=QListWidget.MultiSelection,
                    addSpace=False,
                    sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred))
        gui.rubber(self.controlArea)
        gui.button(self.controlArea,
                   self,
                   "Restore Original Order",
                   callback=self._reset_order,
                   tooltip="Show rows in the original order")

        table_opts = dict(horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                          horizontalScrollMode=QTableView.ScrollPerPixel,
                          selectionMode=QTableView.NoSelection,
                          focusPolicy=Qt.StrongFocus)
        self.dataview = TableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                  **table_opts)
        self.predictionsview = TableView(
            sortingEnabled=True,
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            **table_opts)
        self.dataview.verticalHeader().hide()
        dsbar = self.dataview.verticalScrollBar()
        psbar = self.predictionsview.verticalScrollBar()
        psbar.valueChanged.connect(dsbar.setValue)
        dsbar.valueChanged.connect(psbar.setValue)

        self.dataview.verticalHeader().setDefaultSectionSize(22)
        self.predictionsview.verticalHeader().setDefaultSectionSize(22)
        self.dataview.verticalHeader().sectionResized.connect(
            lambda index, _, size: self.predictionsview.verticalHeader(
            ).resizeSection(index, size))

        self.splitter = QSplitter(orientation=Qt.Horizontal,
                                  childrenCollapsible=False,
                                  handleWidth=2)
        self.splitter.addWidget(self.predictionsview)
        self.splitter.addWidget(self.dataview)

        self.score_table = ScoreTable(self)
        self.vsplitter = gui.vBox(self.mainArea)
        self.vsplitter.layout().addWidget(self.splitter)
        self.vsplitter.layout().addWidget(self.score_table.view)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.Warning.empty_data(shown=data is not None and not data)
        self.data = data
        if not data:
            self.dataview.setModel(None)
            self.predictionsview.setModel(None)
        else:
            # force full reset of the view's HeaderView state
            self.dataview.setModel(None)
            model = TableModel(data, parent=None)
            modelproxy = TableSortProxyModel()
            modelproxy.setSourceModel(model)
            self.dataview.setModel(modelproxy)

        self._invalidate_predictions()

    @property
    def class_var(self):
        return self.data and self.data.domain.class_var

    # pylint: disable=redefined-builtin
    @Inputs.predictors
    def set_predictor(self, predictor=None, id=None):
        if id in self.predictors:
            if predictor is not None:
                self.predictors[id] = self.predictors[id]._replace(
                    predictor=predictor, name=predictor.name, results=None)
            else:
                del self.predictors[id]
        elif predictor is not None:
            self.predictors[id] = PredictorSlot(predictor, predictor.name,
                                                None)

    def _set_class_values(self):
        class_values = []
        for slot in self.predictors.values():
            class_var = slot.predictor.domain.class_var
            if class_var and class_var.is_discrete:
                for value in class_var.values:
                    if value not in class_values:
                        class_values.append(value)

        if self.class_var and self.class_var.is_discrete:
            values = self.class_var.values
            self.class_values = sorted(class_values,
                                       key=lambda val: val not in values)
            self.selected_classes = [
                i for i, name in enumerate(class_values) if name in values
            ]
        else:
            self.class_values = class_values  # This assignment updates listview
            self.selected_classes = []

    def handleNewSignals(self):
        self._set_class_values()
        self._call_predictors()
        self._update_scores()
        self._update_predictions_model()
        self._update_prediction_delegate()
        self._set_errors()
        self._update_info()
        self.commit()

    def _call_predictors(self):
        if not self.data:
            return
        if self.class_var:
            domain = self.data.domain
            classless_data = self.data.transform(
                Domain(domain.attributes, None, domain.metas))
        else:
            classless_data = self.data

        for inputid, slot in self.predictors.items():
            if isinstance(slot.results, Results):
                continue

            predictor = slot.predictor
            try:
                if predictor.domain.class_var.is_discrete:
                    pred, prob = predictor(classless_data, Model.ValueProbs)
                else:
                    pred = predictor(classless_data, Model.Value)
                    prob = numpy.zeros((len(pred), 0))
            except (ValueError, DomainTransformationError) as err:
                self.predictors[inputid] = \
                    slot._replace(results=f"{predictor.name}: {err}")
                continue

            results = Results()
            results.data = self.data
            results.domain = self.data.domain
            results.row_indices = numpy.arange(len(self.data))
            results.folds = (Ellipsis, )
            results.actual = self.data.Y
            results.unmapped_probabilities = prob
            results.unmapped_predicted = pred
            results.probabilities = results.predicted = None
            self.predictors[inputid] = slot._replace(results=results)

            target = predictor.domain.class_var
            if target != self.class_var:
                continue

            if target is not self.class_var and target.is_discrete:
                backmappers, n_values = predictor.get_backmappers(self.data)
                prob = predictor.backmap_probs(prob, n_values, backmappers)
                pred = predictor.backmap_value(pred, prob, n_values,
                                               backmappers)
            results.predicted = pred.reshape((1, len(self.data)))
            results.probabilities = prob.reshape((1, ) + prob.shape)

    def _update_scores(self):
        model = self.score_table.model
        model.clear()
        scorers = usable_scorers(self.class_var) if self.class_var else []
        self.score_table.update_header(scorers)
        errors = []
        for inputid, pred in self.predictors.items():
            results = self.predictors[inputid].results
            if not isinstance(results, Results) or results.predicted is None:
                continue
            row = [
                QStandardItem(learner_name(pred.predictor)),
                QStandardItem("N/A"),
                QStandardItem("N/A")
            ]
            for scorer in scorers:
                item = QStandardItem()
                try:
                    score = scorer_caller(scorer, results)()[0]
                    item.setText(f"{score:.3f}")
                except Exception as exc:  # pylint: disable=broad-except
                    item.setToolTip(str(exc))
                    if scorer.name in self.score_table.shown_scores:
                        errors.append(str(exc))
                row.append(item)
            self.score_table.model.appendRow(row)

        view = self.score_table.view
        if model.rowCount():
            view.setVisible(True)
            view.ensurePolished()
            view.setFixedHeight(5 + view.horizontalHeader().height() +
                                view.verticalHeader().sectionSize(0) *
                                model.rowCount())
        else:
            view.setVisible(False)

        self.Error.scorer_failed("\n".join(errors), shown=bool(errors))

    def _set_errors(self):
        # Not all predictors are run every time, so errors can't be collected
        # in _call_predictors
        errors = "\n".join(f"- {p.predictor.name}: {p.results}"
                           for p in self.predictors.values()
                           if isinstance(p.results, str) and p.results)
        self.Error.predictor_failed(errors, shown=bool(errors))

        if self.class_var:
            inv_targets = "\n".join(
                f"- {pred.name} predicts '{pred.domain.class_var.name}'"
                for pred in (p.predictor for p in self.predictors.values()
                             if isinstance(p.results, Results)
                             and p.results.probabilities is None))
            self.Warning.wrong_targets(inv_targets, shown=bool(inv_targets))
        else:
            self.Warning.wrong_targets.clear()

    def _update_info(self):
        n_predictors = len(self.predictors)
        if not self.data and not n_predictors:
            self.info.set_input_summary(self.info.NoInput)
            return

        n_valid = len(self._non_errored_predictors())
        summary = str(len(self.data)) if self.data else "0"
        details = f"{len(self.data)} instances" if self.data else "No data"
        details += f"\n{n_predictors} models" if n_predictors else "No models"
        if n_valid != n_predictors:
            details += f" ({n_predictors - n_valid} failed)"
        self.info.set_input_summary(summary, details)

    def _invalidate_predictions(self):
        for inputid, pred in list(self.predictors.items()):
            self.predictors[inputid] = pred._replace(results=None)

    def _non_errored_predictors(self):
        return [
            p for p in self.predictors.values()
            if isinstance(p.results, Results)
        ]

    def _update_predictions_model(self):
        results = []
        headers = []
        for p in self._non_errored_predictors():
            values = p.results.unmapped_predicted
            target = p.predictor.domain.class_var
            if target.is_discrete:
                prob = p.results.unmapped_probabilities
                values = [Value(target, v) for v in values]
            else:
                prob = numpy.zeros((len(values), 0))
            results.append((values, prob))
            headers.append(p.predictor.name)

        if results:
            results = list(zip(*(zip(*res) for res in results)))
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setModel(predmodel)
        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()

    def _update_data_sort_order(self):
        datamodel = self.dataview.model()  # data model proxy
        predmodel = self.predictionsview.model()  # predictions model proxy
        sortindicatorshown = False
        if datamodel is not None:
            assert isinstance(datamodel, TableSortProxyModel)
            n = datamodel.rowCount()
            if predmodel is not None and predmodel.sortColumn() >= 0:
                sortind = numpy.argsort([
                    predmodel.mapToSource(predmodel.index(i, 0)).row()
                    for i in range(n)
                ])
                sortind = numpy.array(sortind, numpy.int)
                sortindicatorshown = True
            else:
                sortind = None

            datamodel.setSortIndices(sortind)

        self.predictionsview.horizontalHeader() \
            .setSortIndicatorShown(sortindicatorshown)

    def _reset_order(self):
        datamodel = self.dataview.model()
        predmodel = self.predictionsview.model()
        if datamodel is not None:
            datamodel.sort(-1)
        if predmodel is not None:
            predmodel.sort(-1)
        self.predictionsview.horizontalHeader().setSortIndicatorShown(False)

    def _update_prediction_delegate(self):
        selected = {self.class_values[i] for i in self.selected_classes}
        self._delegates.clear()
        for col, slot in enumerate(self.predictors.values()):
            target = slot.predictor.domain.class_var
            shown_probs = () if target.is_continuous else \
                [i for i, name in enumerate(target.values) if name in selected]
            delegate = PredictionsItemDelegate(target, shown_probs)
            # QAbstractItemView does not take ownership of delegates, so we must
            self._delegates.append(delegate)
            self.predictionsview.setItemDelegateForColumn(col, delegate)
            self.predictionsview.setColumnHidden(col, False)

        self.predictionsview.resizeColumnsToContents()
        self._update_spliter()

    def _update_spliter(self):
        if not self.data:
            return

        def width(view):
            h_header = view.horizontalHeader()
            v_header = view.verticalHeader()
            return h_header.length() + v_header.width()

        w = width(self.predictionsview) + 4
        w1, w2 = self.splitter.sizes()
        self.splitter.setSizes([w, w1 + w2 - w])

    def commit(self):
        self._commit_predictions()
        self._commit_evaluation_results()

    def _commit_evaluation_results(self):
        slots = [
            p for p in self._non_errored_predictors()
            if p.results.predicted is not None
        ]
        if not slots:
            self.Outputs.evaluation_results.send(None)
            return

        nanmask = numpy.isnan(self.data.get_column_view(self.class_var)[0])
        data = self.data[~nanmask]
        results = Results(data, store_data=True)
        results.folds = None
        results.row_indices = numpy.arange(len(data))
        results.actual = data.Y.ravel()
        results.predicted = numpy.vstack(
            tuple(p.results.predicted[0][~nanmask] for p in slots))
        if self.class_var and self.class_var.is_discrete:
            results.probabilities = numpy.array(
                [p.results.probabilities[0][~nanmask] for p in slots])
        results.learner_names = [p.name for p in slots]
        self.Outputs.evaluation_results.send(results)

    def _commit_predictions(self):
        if not self.data:
            self.Outputs.predictions.send(None)
            return

        newmetas = []
        newcolumns = []
        for slot in self._non_errored_predictors():
            if slot.predictor.domain.class_var.is_discrete:
                self._add_classification_out_columns(slot, newmetas,
                                                     newcolumns)
            else:
                self._add_regression_out_columns(slot, newmetas, newcolumns)

        attrs = list(self.data.domain.attributes)
        metas = list(self.data.domain.metas) + newmetas
        domain = Orange.data.Domain(attrs, self.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns
        self.Outputs.predictions.send(predictions)

    @staticmethod
    def _add_classification_out_columns(slot, newmetas, newcolumns):
        # Mapped or unmapped predictions?!
        # Or provide a checkbox so the user decides?
        pred = slot.predictor
        name = pred.name
        values = pred.domain.class_var.values
        newmetas.append(DiscreteVariable(name=name, values=values))
        newcolumns.append(slot.results.unmapped_predicted.reshape(-1, 1))
        newmetas += [
            ContinuousVariable(name=f"{name} ({value})") for value in values
        ]
        newcolumns.append(slot.results.unmapped_probabilities)

    @staticmethod
    def _add_regression_out_columns(slot, newmetas, newcolumns):
        newmetas.append(ContinuousVariable(name=slot.predictor.name))
        newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1)))

    def send_report(self):
        def merge_data_with_predictions():
            data_model = self.dataview.model()
            predictions_model = self.predictionsview.model()

            # use ItemDelegate to style prediction values
            style = lambda x: self.predictionsview.itemDelegate().displayText(
                x, QLocale())

            # iterate only over visible columns of data's QTableView
            iter_data_cols = list(
                filter(lambda x: not self.dataview.isColumnHidden(x),
                       range(data_model.columnCount())))

            # print header
            yield [''] + \
                  [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in range(predictions_model.columnCount())] + \
                  [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in iter_data_cols]

            # print data & predictions
            for i in range(data_model.rowCount()):
                yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \
                      [style(predictions_model.data(predictions_model.index(i, j)))
                       for j in range(predictions_model.columnCount())] + \
                      [data_model.data(data_model.index(i, j))
                       for j in iter_data_cols]

        if self.data:
            text = self.infolabel.text().replace('\n', '<br>')
            if self.selected_classes:
                text += '<br>Showing probabilities for: '
                text += ', '.join(
                    [self.class_values[i] for i in self.selected_classes])
            self.report_paragraph('Info', text)
            self.report_table("Data & Predictions",
                              merge_data_with_predictions(),
                              header_rows=1,
                              header_columns=1)
Ejemplo n.º 9
0
class OWPythagorasTree(OWWidget):
    name = 'Pythagorean Tree'
    description = 'Pythagorean Tree visualization for tree like-structures.'
    icon = 'icons/PythagoreanTree.svg'
    keywords = ["fractal"]

    priority = 1000

    class Inputs:
        tree = Input("Tree", TreeModel)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    settingsHandler = settings.ClassValuesContextHandler()

    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    size_log_scale = settings.Setting(2)
    tooltips_enabled = settings.Setting(True)
    show_legend = settings.Setting(False)

    LEGEND_OPTIONS = {
        'corner': Anchorable.BOTTOM_RIGHT,
        'offset': (10, 10),
    }

    def __init__(self):
        super().__init__()
        # Instance variables
        self.model = None
        self.data = None
        # The tree adapter instance which is passed from the outside
        self.tree_adapter = None
        self.legend = None

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x * self.size_log_scale + 1)),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Tree Info')
        self.infolabel = gui.widgetLabel(box_info)
        self.info.set_output_summary(self.info.NoOutput)

        # Display settings area
        box_display = gui.widgetBox(self.controlArea, 'Display Settings')
        self.depth_slider = gui.hSlider(box_display,
                                        self,
                                        'depth_limit',
                                        label='Depth',
                                        ticks=False,
                                        callback=self.update_depth)
        self.target_class_combo = gui.comboBox(box_display,
                                               self,
                                               'target_class_index',
                                               label='Target class',
                                               orientation=Qt.Horizontal,
                                               items=[],
                                               contentsLength=8,
                                               searchable=True,
                                               callback=self.update_colors)
        self.size_calc_combo = gui.comboBox(
            box_display,
            self,
            'size_calc_idx',
            label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0],
            contentsLength=8,
            callback=self.update_size_calc)
        self.log_scale_box = gui.hSlider(box_display,
                                         self,
                                         'size_log_scale',
                                         label='Log scale factor',
                                         minValue=1,
                                         maxValue=100,
                                         ticks=False,
                                         callback=self.invalidate_tree)

        # Plot properties area
        box_plot = gui.widgetBox(self.controlArea, 'Plot Properties')
        self.cb_show_tooltips = gui.checkBox(
            box_plot,
            self,
            'tooltips_enabled',
            label='Enable tooltips',
            callback=self.update_tooltip_enabled)
        self.cb_show_legend = gui.checkBox(box_plot,
                                           self,
                                           'show_legend',
                                           label='Show legend',
                                           callback=self.update_show_legend)

        gui.rubber(self.controlArea)

        gui.button(self.buttonsArea,
                   self,
                   label="Redraw",
                   callback=self.redraw)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred,
                                       QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = TreeGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.view = TreeGraphicsView(self.scene, padding=(150, 150))
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.mainArea.layout().addWidget(self.view)

        self.ptree = PythagorasTreeViewer(self)
        self.scene.addItem(self.ptree)
        self.view.set_central_widget(self.ptree)

        self.resize(800, 500)
        # Clear the widget to correctly set the intial values
        self.clear()

    @Inputs.tree
    def set_tree(self, model=None):
        """When a different tree is given."""
        self.closeContext()
        self.clear()
        self.model = model

        if model is not None:
            self.data = model.instances

            self._update_target_class_combo()
            self.tree_adapter = self._get_tree_adapter(self.model)
            self.ptree.clear()

            self.ptree.set_tree(
                self.tree_adapter,
                weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1],
                target_class_index=self.target_class_index,
            )

            self._update_depth_slider()
            self.color_palette = self.ptree.root.color_palette
            self._update_legend_colors()
            self._update_legend_visibility()
            self._update_info_box()

            self._update_main_area()

            self.openContext(
                model.domain.class_var if model.domain is not None else None)

        self.update_depth()

        # The forest widget sets the following attributes on the tree,
        # describing the settings on the forest widget. To keep the tree
        # looking the same as on the forest widget, we prefer these settings to
        # context settings, if set.
        if hasattr(model, "meta_target_class_index"):
            self.target_class_index = model.meta_target_class_index
            self.update_colors()
        if hasattr(model, "meta_size_calc_idx"):
            self.size_calc_idx = model.meta_size_calc_idx
            self.update_size_calc()
        if hasattr(model, "meta_depth_limit"):
            self.depth_limit = model.meta_depth_limit
            self.update_depth()

        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, None))

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.data = None
        self.tree_adapter = None

        if self.legend is not None:
            self.scene.removeItem(self.legend)
        self.legend = None

        self.ptree.clear()
        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()
        self._update_log_scale_slider()

    def update_depth(self):
        """This method should be called when the depth changes"""
        self.ptree.set_depth_limit(self.depth_limit)

    def update_colors(self):
        """When the target class / node coloring needs to be updated."""
        self.ptree.target_class_changed(self.target_class_index)
        self._update_legend_colors()

    def update_size_calc(self):
        """When the tree size calculation is updated."""
        self._update_log_scale_slider()
        self.invalidate_tree()

    def redraw(self):
        if self.data is None:
            return
        self.tree_adapter.shuffle_children()
        self.invalidate_tree()

    def invalidate_tree(self):
        """When the tree needs to be completely recalculated."""
        if self.model is not None:
            self.ptree.set_tree(
                self.tree_adapter,
                weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1],
                target_class_index=self.target_class_index,
            )
            self.ptree.set_depth_limit(self.depth_limit)
            self._update_main_area()

    def update_tooltip_enabled(self):
        """When the tooltip visibility is changed and need to be updated."""
        self.ptree.tooltip_changed(self.tooltips_enabled)

    def update_show_legend(self):
        """When the legend visibility needs to be updated."""
        self._update_legend_visibility()

    def _update_info_box(self):
        self.infolabel.setText('Nodes: {}\nDepth: {}'.format(
            self.tree_adapter.num_nodes, self.tree_adapter.max_depth))

    def _update_depth_slider(self):
        self.depth_slider.parent().setEnabled(True)
        self.depth_slider.setMaximum(self.tree_adapter.max_depth)
        self._set_max_depth()

    def _update_legend_visibility(self):
        if self.legend is not None:
            self.legend.setVisible(self.show_legend)

    def _update_log_scale_slider(self):
        """On calc method combo box changed."""
        self.log_scale_box.parent().setEnabled(
            self.SIZE_CALCULATION[self.size_calc_idx][0] == 'Logarithmic')

    def _clear_info_box(self):
        self.infolabel.setText('No tree on input')

    def _clear_depth_slider(self):
        self.depth_slider.parent().setEnabled(False)
        self.depth_slider.setMaximum(0)

    def _clear_target_class_combo(self):
        self.target_class_combo.clear()
        self.target_class_index = -1

    def _set_max_depth(self):
        """Set the depth to the max depth and update appropriate actors."""
        self.depth_limit = self.tree_adapter.max_depth
        self.depth_slider.setValue(self.depth_limit)

    def _update_main_area(self):
        # refresh the scene rect, cuts away the excess whitespace, and adds
        # padding for panning.
        self.scene.setSceneRect(self.view.central_widget_rect())
        # reset the zoom level
        self.view.recalculate_and_fit()
        self.view.update_anchored_items()

    def _get_tree_adapter(self, model):
        if isinstance(model, SklModel):
            return SklTreeAdapter(model)
        return TreeAdapter(model)

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected data to output."""
        if self.data is None:
            self.info.set_output_summary(self.info.NoOutput)
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            return

        nodes = [
            i.tree_node.label for i in self.scene.selectedItems()
            if isinstance(i, SquareGraphicsItem)
        ]
        data = self.tree_adapter.get_instances_in_nodes(nodes)

        summary = len(data) if data else self.info.NoOutput
        details = format_summary_details(data) if data else ""
        self.info.set_output_summary(summary, details)
        self.Outputs.selected_data.send(data)
        selected_indices = self.tree_adapter.get_indices(nodes)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, selected_indices))

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [
            x for x in self.target_class_combo.parent().children()
            if isinstance(x, QLabel)
        ][0]

        if self.data.domain.has_discrete_class:
            label_text = 'Target class'
            values = [c.title() for c in self.data.domain.class_vars[0].values]
            values.insert(0, 'None')
        else:
            label_text = 'Node color'
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.target_class_combo.addItems(values)
        # set it to 0, context will change if required
        self.target_class_index = 0

    def _update_legend_colors(self):
        if self.legend is not None:
            self.scene.removeItem(self.legend)

        if self.data.domain.has_discrete_class:
            self._classification_update_legend_colors()
        else:
            self._regression_update_legend_colors()

    def _classification_update_legend_colors(self):
        if self.target_class_index == 0:
            self.legend = OWDiscreteLegend(domain=self.model.domain,
                                           **self.LEGEND_OPTIONS)
        else:
            items = ((self.target_class_combo.itemText(
                self.target_class_index),
                      self.color_palette[self.target_class_index - 1]),
                     ('other', QColor('#ffffff')))
            self.legend = OWDiscreteLegend(items=items, **self.LEGEND_OPTIONS)

        self.legend.setVisible(self.show_legend)
        self.scene.addItem(self.legend)

    def _regression_update_legend_colors(self):
        # The colors are the class mean
        palette = self.model.domain.class_var.palette
        if self.target_class_index == 1:
            items = ((np.min(self.data.Y), np.max(self.data.Y)), palette)
        # Colors are the stddev
        elif self.target_class_index == 2:
            items = ((0, np.std(self.data.Y)), palette)
        else:
            items = None

        self.legend = OWContinuousLegend(items=items, **self.LEGEND_OPTIONS)
        self.legend.setVisible(self.show_legend)
        self.scene.addItem(self.legend)
Ejemplo n.º 10
0
class OWPythagoreanForest(OWWidget):
    name = "毕达哥拉斯森林(Pythagorean Forest)"
    description = "毕达哥拉斯森林,用于将随机森林可视化。"
    icon = "icons/PythagoreanForest.svg"
    settings_version = 2
    keywords = ["fractal", "bidagelasisenlin", "gougusenlin"]
    category = "可视化(Visualize)"

    priority = 1001

    class Inputs:
        random_forest = Input("随机森林(Random forest)",
                              RandomForestModel,
                              replaces=["Data"])

    class Outputs:
        tree = Output("树(Tree)", TreeModel, replaces=["Tree"])

    # Enable the save as feature
    graph_name = "scene"

    # Settings
    settingsHandler = settings.ClassValuesContextHandler()

    depth_limit = settings.Setting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    zoom = settings.Setting(200)

    selected_index = settings.ContextSetting(None)

    SIZE_CALCULATION = [
        ("正常", lambda x: x),
        ("平方根", lambda x: sqrt(x)),
        ("对数的", lambda x: log(x + 1)),
    ]

    @classmethod
    def migrate_settings(cls, settings, version):
        if version < 2:
            settings.pop("selected_tree_index", None)
            v1_min, v1_max = 20, 150
            v2_min, v2_max = 100, 400
            ratio = (v2_max - v2_min) / (v1_max - v1_min)
            settings["zoom"] = int(ratio * (settings["zoom"] - v1_min) +
                                   v2_min)

    def __init__(self):
        super().__init__()
        self.rf_model = None
        self.forest = None
        self.instances = None

        self.color_palette = None

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, "森林")
        self.ui_info = gui.widgetLabel(box_info)

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, "显示")
        # maxValue is set to a wide three-digit number to probably ensure the
        # proper label width. The maximum is later set to match the tree depth
        self.ui_depth_slider = gui.hSlider(box_display,
                                           self,
                                           "depth_limit",
                                           label="深度",
                                           ticks=False,
                                           maxValue=900)  # type: QSlider
        self.ui_target_class_combo = gui.comboBox(
            box_display,
            self,
            "target_class_index",
            label="目标类别",
            orientation=Qt.Horizontal,
            items=[],
            contentsLength=8,
            searchable=True,
        )
        self.ui_size_calc_combo = gui.comboBox(
            box_display,
            self,
            "size_calc_idx",
            label="大小",
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0],
            contentsLength=8,
        )
        self.ui_zoom_slider = gui.hSlider(
            box_display,
            self,
            "zoom",
            label="缩放",
            ticks=False,
            minValue=100,
            maxValue=400,
            createLabel=False,
            intOnly=False,
        )  # type: QSlider

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred,
                                       QSizePolicy.Expanding)

        # MAIN AREA
        self.forest_model = PythagoreanForestModel(parent=self)
        self.forest_model.update_item_size(self.zoom)
        self.ui_depth_slider.valueChanged.connect(
            self.forest_model.update_depth)
        self.ui_target_class_combo.currentIndexChanged.connect(
            self.forest_model.update_target_class)
        self.ui_zoom_slider.valueChanged.connect(
            self.forest_model.update_item_size)
        self.ui_size_calc_combo.currentIndexChanged.connect(
            self.forest_model.update_size_calc)

        self.list_delegate = PythagorasTreeDelegate(parent=self)
        self.list_view = ClickToClearSelectionListView(parent=self)
        self.list_view.setWrapping(True)
        self.list_view.setFlow(QListView.LeftToRight)
        self.list_view.setResizeMode(QListView.Adjust)
        self.list_view.setModel(self.forest_model)
        self.list_view.setItemDelegate(self.list_delegate)
        self.list_view.setSpacing(2)
        self.list_view.setSelectionMode(QListView.SingleSelection)
        self.list_view.selectionModel().selectionChanged.connect(self.commit)
        self.list_view.setUniformItemSizes(True)
        self.mainArea.layout().addWidget(self.list_view)

        self.resize(800, 500)

        # Clear to set sensible default values
        self.clear()

    @Inputs.random_forest
    def set_rf(self, model=None):
        """When a different forest is given."""
        self.closeContext()
        self.clear()
        self.rf_model = model

        if model is not None:
            self.instances = model.instances
            self._update_target_class_combo()

            self.forest = self._get_forest_adapter(self.rf_model)
            self.forest_model[:] = self.forest.trees

            self._update_info_box()
            self._update_depth_slider()

            self.openContext(
                model.domain.class_var if model.domain is not None else None)
        # Restore item selection
        if self.selected_index is not None:
            index = self.list_view.model().index(self.selected_index)
            selection = QItemSelection(index, index)
            self.list_view.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def clear(self):
        """Clear all relevant data from the widget."""
        self.rf_model = None
        self.forest = None
        self.forest_model.clear()
        self.selected_index = None

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    def _update_info_box(self):
        self.ui_info.setText("Trees: {}".format(len(self.forest.trees)))

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [
            x for x in self.ui_target_class_combo.parent().children()
            if isinstance(x, QLabel)
        ][0]

        if self.instances.domain.has_discrete_class:
            label_text = "目标类别"
            values = [
                c.title() for c in self.instances.domain.class_vars[0].values
            ]
            values.insert(0, "无")
        else:
            label_text = "Node color"
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.ui_target_class_combo.addItems(values)
        # set it to 0, context will change if required
        self.target_class_index = 0

    def _clear_info_box(self):
        self.ui_info.setText("No forest on input.")

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = -1

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    def _get_max_depth(self):
        return max(tree.max_depth for tree in self.forest.trees)

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self, selection: QItemSelection) -> None:
        """Commit the selected tree to output."""
        selected_indices = selection.indexes()

        if not len(selected_indices):
            self.selected_index = None
            self.Outputs.tree.send(None)
            return

        # We only allow selecting a single tree so there will always be one index
        self.selected_index = selected_indices[0].row()

        tree = self.rf_model.trees[self.selected_index]
        tree.instances = self.instances
        tree.meta_target_class_index = self.target_class_index
        tree.meta_size_calc_idx = self.size_calc_idx
        tree.meta_depth_limit = self.depth_limit

        self.Outputs.tree.send(tree)

    def send_report(self):
        """Send report."""
        self.report_plot()
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001

    class Inputs:
        evaluation_results = Input("Evaluation Results", Orange.evaluation.Results)

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    quantities = ["Number of instances",
                  "Proportion of predicted",
                  "Proportion of actual"]

    settings_version = 1
    settingsHandler = settings.ClassValuesContextHandler()

    selected_learner = settings.Setting([0], schema_only=True)
    selection = settings.ContextSetting(set())
    selected_quantity = settings.Setting(0)
    append_predictions = settings.Setting(True)
    append_probabilities = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances",
            "click_cell")]

    class Error(widget.OWWidget.Error):
        no_regression = Msg("Confusion Matrix cannot show regression results.")
        invalid_values = Msg("Evaluation Results input contains invalid values")

    def __init__(self):
        super().__init__()

        self.data = None
        self.results = None
        self.learners = []

        self.learners_box = gui.listBox(
            self.controlArea, self, "selected_learner", "learners", box=True,
            callback=self._learner_changed
        )

        self.outputbox = gui.vBox(self.controlArea, "Output")
        box = gui.hBox(self.outputbox)
        gui.checkBox(box, self, "append_predictions",
                     "Predictions", callback=self._invalidate)
        gui.checkBox(box, self, "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_commit(self.outputbox, self, "autocommit",
                        "Send Selected", "Send Automatically", box=False)

        self.mainArea.layout().setContentsMargins(0, 0, 0, 0)

        box = gui.vBox(self.mainArea, box=True)

        sbox = gui.hBox(box)
        gui.rubber(sbox)
        gui.comboBox(sbox, self, "selected_quantity",
                     items=self.quantities, label="Show: ",
                     orientation=Qt.Horizontal, callback=self._update)

        self.tableview = ContingencyTable(self)
        box.layout().addWidget(self.tableview)

        selbox = gui.hBox(box)
        gui.button(selbox, self, "Select Correct",
                   callback=self.select_correct, autoDefault=False)
        gui.button(selbox, self, "Select Misclassified",
                   callback=self.select_wrong, autoDefault=False)
        gui.button(selbox, self, "Clear Selection",
                   callback=self.select_none, autoDefault=False)

    def sizeHint(self):
        """Initial size"""
        return QSize(750, 340)

    @Inputs.evaluation_results
    def set_results(self, results):
        """Set the input results."""

        prev_sel_learner = self.selected_learner.copy()
        self.clear()
        self.warning()
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data[results.row_indices]

        if data is not None and not data.domain.has_discrete_class:
            self.Error.no_regression()
            data = results = None
        else:
            self.Error.no_regression.clear()

        nan_values = False
        if results is not None:
            assert isinstance(results, Orange.evaluation.Results)
            if np.any(np.isnan(results.actual)) or \
                    np.any(np.isnan(results.predicted)):
                # Error out here (could filter them out with a warning
                # instead).
                nan_values = True
                results = data = None

        if nan_values:
            self.Error.invalid_values()
        else:
            self.Error.invalid_values.clear()

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
        else:
            self.report_button.setDisabled(False)

            nmodels = results.predicted.shape[0]

            # NOTE: The 'learner_names' is set in 'Test Learners' widget.
            if hasattr(results, "learner_names"):
                self.learners = results.learner_names
            else:
                self.learners = ["Learner #{}".format(i + 1)
                                 for i in range(nmodels)]

            self.tableview.set_headers(class_values, class_values, "Actual", "Predicted")
            self.openContext(data.domain.class_var)
            if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners):
                if self.learners:
                    self.selected_learner[:] = [0]
            else:
                self.selected_learner[:] = prev_sel_learner
            self._update()
            self.tableview.set_selection(self.selection)
            self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tableview.clear()
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        self.tableview.set_selection({(i, i) for i in range(len(self.tableview.classesv))})

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        self.tableview.set_selection({(i, j)
                                      for i in range(len(self.tableview.classesv))
                                      for j in range(len(self.tableview.classesv)) if i != j})

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def _prepare_data(self):
        indices = self.tableview.selectedIndexes()
        indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        actual = self.results.actual
        learner_name = self.learners[self.selected_learner[0]]
        predicted = self.results.predicted[self.selected_learner[0]]
        selected = [i for i, t in enumerate(zip(actual, predicted))
                    if t in indices]

        extra = []
        class_var = self.data.domain.class_var
        metas = self.data.domain.metas

        if self.append_predictions:
            extra.append(predicted.reshape(-1, 1))
            var = Orange.data.DiscreteVariable(
                "{}({})".format(class_var.name, learner_name),
                class_var.values
            )
            metas = metas + (var,)

        if self.append_probabilities and \
                        self.results.probabilities is not None:
            probs = self.results.probabilities[self.selected_learner[0]]
            extra.append(np.array(probs, dtype=object))
            pvars = [Orange.data.ContinuousVariable("p({})".format(value))
                     for value in class_var.values]
            metas = metas + tuple(pvars)

        domain = Orange.data.Domain(self.data.domain.attributes,
                                    self.data.domain.class_vars,
                                    metas)
        data = self.data.transform(domain)
        if len(extra):
            data.metas[:, len(self.data.domain.metas):] = \
                np.hstack(tuple(extra))
        data.name = learner_name

        if selected:
            annotated_data = create_annotated_table(data, selected)
            data = data[selected]
        else:
            annotated_data = create_annotated_table(data, [])
            data = None

        return data, annotated_data

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner:
            data, annotated_data = self._prepare_data()
        else:
            data = None
            annotated_data = None

        self.Outputs.selected_data.send(data)
        self.Outputs.annotated_data.send(annotated_data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _learner_changed(self):
        self._update()
        self.commit()

    def _update(self):
        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner:
            cmatrix = confusion_matrix(self.results, self.selected_learner[0])
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = np.diag_indices(n)

            colors = cmatrix.astype(np.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(np.int)
                formatstr = "{}"
                div = np.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, np.newaxis]
                    div = colors.max(axis=1)[:, np.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            colors[diag] = normalized[diag] / normalized[diag].max()

            def tooltip(i, j):
                return "actual: {}\npredicted: {}".format(self.tableview.classesv[i],
                                                          self.tableview.classesh[j])
            self.tableview.update_table(normalized, colsum=colsum, rowsum=rowsum, colors=colors, formatstr=formatstr,
                                        tooltip=tooltip)

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner:
            self.report_table(
                "Confusion matrix for {} (showing {})".
                format(self.learners[self.selected_learner[0]],
                       self.quantities[self.selected_quantity].lower()),
                self.tableview)

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            # For some period of time the 'selected_learner' property was
            # changed from List[int] -> int
            # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back
            # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0)
            if "selected_learner" in settings and \
                    isinstance(settings["selected_learner"], int):
                settings["selected_learner"] = [settings["selected_learner"]]
Ejemplo n.º 12
0
class OWPythagorasTree(OWWidget):
    name = "毕达哥拉斯树(Pythagorean Tree)"
    description = "类似树结构的毕达哥拉斯树可视化。"
    icon = "icons/PythagoreanTree.svg"
    keywords = ["fractal", "bidagelasishu", "gougushu"]
    category = "可视化(Visualize)"

    priority = 1000

    class Inputs:
        tree = Input("树(Tree)", TreeModel, replaces=["Tree"])

    class Outputs:
        selected_data = Output("选定的数据(Selected Data)",
                               Table,
                               default=True,
                               replaces=["Selected Data"])
        annotated_data = Output("数据(Data)", Table, replaces=["Data"])

    # Enable the save as feature
    graph_name = "scene"

    # Settings
    settingsHandler = settings.ClassValuesContextHandler()

    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    size_log_scale = settings.Setting(2)
    tooltips_enabled = settings.Setting(True)
    show_legend = settings.Setting(False)

    LEGEND_OPTIONS = {
        "corner": Anchorable.BOTTOM_RIGHT,
        "offset": (10, 10),
    }

    def __init__(self):
        super().__init__()
        # Instance variables
        self.model = None
        self.data = None
        # The tree adapter instance which is passed from the outside
        self.tree_adapter = None
        self.legend = None

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ("Normal", lambda x: x, "正常"),
            ("Square root", lambda x: sqrt(x), "平方根"),
            ("Logarithmic", lambda x: log(x * self.size_log_scale + 1), "对数"),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, "树信息")
        self.infolabel = gui.widgetLabel(box_info)

        # Display settings area
        box_display = gui.widgetBox(self.controlArea, "显示设置")
        # maxValue is set to a wide three-digit number to probably ensure the
        # proper label width. The maximum is later set to match the tree depth
        self.depth_slider = gui.hSlider(
            box_display,
            self,
            "depth_limit",
            label="深度",
            ticks=False,
            maxValue=900,
            callback=self.update_depth,
        )
        self.target_class_combo = gui.comboBox(
            box_display,
            self,
            "target_class_index",
            label="目标类别",
            orientation=Qt.Horizontal,
            items=[],
            contentsLength=8,
            searchable=True,
            callback=self.update_colors,
        )
        self.size_calc_combo = gui.comboBox(
            box_display,
            self,
            "size_calc_idx",
            label="大小",
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[2],
            contentsLength=8,
            callback=self.update_size_calc,
        )
        self.log_scale_box = gui.hSlider(
            box_display,
            self,
            "size_log_scale",
            label="对数比例因子",
            minValue=1,
            maxValue=100,
            ticks=False,
            callback=self.invalidate_tree,
        )

        # Plot properties area
        box_plot = gui.widgetBox(self.controlArea, "绘图属性")
        self.cb_show_tooltips = gui.checkBox(
            box_plot,
            self,
            "tooltips_enabled",
            label="启动工具提示",
            callback=self.update_tooltip_enabled,
        )
        self.cb_show_legend = gui.checkBox(
            box_plot,
            self,
            "show_legend",
            label="显示图例",
            callback=self.update_show_legend,
        )

        gui.rubber(self.controlArea)

        gui.button(self.buttonsArea, self, label="重新绘制", callback=self.redraw)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred,
                                       QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = TreeGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.view = TreeGraphicsView(self.scene, padding=(150, 150))
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.mainArea.layout().addWidget(self.view)

        self.ptree = PythagorasTreeViewer(self)
        self.scene.addItem(self.ptree)
        self.view.set_central_widget(self.ptree)

        self.resize(800, 500)
        # Clear the widget to correctly set the intial values
        self.clear()

    @Inputs.tree
    def set_tree(self, model=None):
        """When a different tree is given."""
        self.closeContext()
        self.clear()
        self.model = model

        if model is not None:
            self.data = model.instances

            self._update_target_class_combo()
            self.tree_adapter = self._get_tree_adapter(self.model)
            self.ptree.clear()

            self.ptree.set_tree(
                self.tree_adapter,
                weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1],
                target_class_index=self.target_class_index,
            )

            self._update_depth_slider()
            self.color_palette = self.ptree.root.color_palette
            self._update_legend_colors()
            self._update_legend_visibility()
            self._update_info_box()

            self._update_main_area()

            self.openContext(
                model.domain.class_var if model.domain is not None else None)

        self.update_depth()

        # The forest widget sets the following attributes on the tree,
        # describing the settings on the forest widget. To keep the tree
        # looking the same as on the forest widget, we prefer these settings to
        # context settings, if set.
        if hasattr(model, "meta_target_class_index"):
            self.target_class_index = model.meta_target_class_index
            self.update_colors()
        if hasattr(model, "meta_size_calc_idx"):
            self.size_calc_idx = model.meta_size_calc_idx
            self.update_size_calc()
        if hasattr(model, "meta_depth_limit"):
            self.depth_limit = model.meta_depth_limit
            self.update_depth()

        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, None))

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.data = None
        self.tree_adapter = None

        if self.legend is not None:
            self.scene.removeItem(self.legend)
        self.legend = None

        self.ptree.clear()
        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()
        self._update_log_scale_slider()

    def update_depth(self):
        """This method should be called when the depth changes"""
        self.ptree.set_depth_limit(self.depth_limit)

    def update_colors(self):
        """When the target class / node coloring needs to be updated."""
        self.ptree.target_class_changed(self.target_class_index)
        self._update_legend_colors()

    def update_size_calc(self):
        """When the tree size calculation is updated."""
        self._update_log_scale_slider()
        self.invalidate_tree()

    def redraw(self):
        if self.data is None:
            return
        self.tree_adapter.shuffle_children()
        self.invalidate_tree()

    def invalidate_tree(self):
        """When the tree needs to be completely recalculated."""
        if self.model is not None:
            self.ptree.set_tree(
                self.tree_adapter,
                weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1],
                target_class_index=self.target_class_index,
            )
            self.ptree.set_depth_limit(self.depth_limit)
            self._update_main_area()

    def update_tooltip_enabled(self):
        """When the tooltip visibility is changed and need to be updated."""
        self.ptree.tooltip_changed(self.tooltips_enabled)

    def update_show_legend(self):
        """When the legend visibility needs to be updated."""
        self._update_legend_visibility()

    def _update_info_box(self):
        self.infolabel.setText("节点: {}\n深度: {}".format(
            self.tree_adapter.num_nodes, self.tree_adapter.max_depth))

    def _update_depth_slider(self):
        self.depth_slider.parent().setEnabled(True)
        self.depth_slider.setMaximum(self.tree_adapter.max_depth)
        self._set_max_depth()

    def _update_legend_visibility(self):
        if self.legend is not None:
            self.legend.setVisible(self.show_legend)

    def _update_log_scale_slider(self):
        """On calc method combo box changed."""
        self.log_scale_box.parent().setEnabled(
            self.SIZE_CALCULATION[self.size_calc_idx][0] == "Logarithmic")

    def _clear_info_box(self):
        self.infolabel.setText("没有树输入")

    def _clear_depth_slider(self):
        self.depth_slider.parent().setEnabled(False)
        self.depth_slider.setMaximum(0)

    def _clear_target_class_combo(self):
        self.target_class_combo.clear()
        self.target_class_index = -1

    def _set_max_depth(self):
        """Set the depth to the max depth and update appropriate actors."""
        self.depth_limit = self.tree_adapter.max_depth
        self.depth_slider.setValue(self.depth_limit)

    def _update_main_area(self):
        # refresh the scene rect, cuts away the excess whitespace, and adds
        # padding for panning.
        self.scene.setSceneRect(self.view.central_widget_rect())
        # reset the zoom level
        self.view.recalculate_and_fit()
        self.view.update_anchored_items()

    def _get_tree_adapter(self, model):
        if isinstance(model, SklModel):
            return SklTreeAdapter(model)
        return TreeAdapter(model)

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected data to output."""
        if self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            return

        nodes = [
            i.tree_node.label for i in self.scene.selectedItems()
            if isinstance(i, SquareGraphicsItem)
        ]
        data = self.tree_adapter.get_instances_in_nodes(nodes)

        self.Outputs.selected_data.send(data)
        selected_indices = self.tree_adapter.get_indices(nodes)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, selected_indices))

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [
            x for x in self.target_class_combo.parent().children()
            if isinstance(x, QLabel)
        ][0]

        if self.data.domain.has_discrete_class:
            label_text = "目标类别"
            values = [c.title() for c in self.data.domain.class_vars[0].values]
            values.insert(0, "None")
        else:
            label_text = "Node color"
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.target_class_combo.addItems(values)
        # set it to 0, context will change if required
        self.target_class_index = 0

    def _update_legend_colors(self):
        if self.legend is not None:
            self.scene.removeItem(self.legend)

        if self.data.domain.has_discrete_class:
            self._classification_update_legend_colors()
        else:
            self._regression_update_legend_colors()

    def _classification_update_legend_colors(self):
        if self.target_class_index == 0:
            self.legend = OWDiscreteLegend(domain=self.model.domain,
                                           **self.LEGEND_OPTIONS)
        else:
            items = (
                (
                    self.target_class_combo.itemText(self.target_class_index),
                    self.color_palette[self.target_class_index - 1],
                ),
                ("other", QColor("#ffffff")),
            )
            self.legend = OWDiscreteLegend(items=items, **self.LEGEND_OPTIONS)

        self.legend.setVisible(self.show_legend)
        self.scene.addItem(self.legend)

    def _regression_update_legend_colors(self):
        # The colors are the class mean
        palette = self.model.domain.class_var.palette
        if self.target_class_index == 1:
            items = ((np.min(self.data.Y), np.max(self.data.Y)), palette)
        # Colors are the stddev
        elif self.target_class_index == 2:
            items = ((0, np.std(self.data.Y)), palette)
        else:
            items = None

        self.legend = OWContinuousLegend(items=items, **self.LEGEND_OPTIONS)
        self.legend.setVisible(self.show_legend)
        self.scene.addItem(self.legend)
Ejemplo n.º 13
0
class OWConfusionMatrix(widget.OWWidget):
    """Confusion matrix widget"""

    name = "Confusion Matrix"
    description = "Display a confusion matrix constructed from " \
                  "the results of classifier evaluations."
    icon = "icons/ConfusionMatrix.svg"
    priority = 1001

    inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")]
    outputs = [("Selected Data", Orange.data.Table)]

    quantities = ["Number of instances",
                  "Proportion of predicted",
                  "Proportion of actual"]

    settingsHandler = settings.ClassValuesContextHandler()

    selected_learner = settings.Setting(0)
    selection = settings.ContextSetting(set())
    selected_quantity = settings.Setting(0)
    append_predictions = settings.Setting(True)
    append_probabilities = settings.Setting(False)
    autocommit = settings.Setting(True)

    UserAdviceMessages = [
        widget.Message(
            "Clicking on cells or in headers outputs the corresponding "
            "data instances",
            "click_cell")]

    def __init__(self):
        super().__init__()
        if isinstance(self.selected_learner, list):
            self.selected_learner = (self.selected_learner + [0])[0]

        self.data = None
        self.results = None
        self.learners = []
        self.headers = []

        box = gui.vBox(self.controlArea, "Learners")

        self.learners_box = gui.listBox(
            box, self, "selected_learner", "learners",
            callback=self._learner_changed
        )
        box = gui.vBox(self.controlArea, "Show")

        gui.comboBox(box, self, "selected_quantity", items=self.quantities,
                     callback=self._update)

        box = gui.vBox(self.controlArea, "Select")

        gui.button(box, self, "Select Correct",
                   callback=self.select_correct, autoDefault=False)
        gui.button(box, self, "Select Misclassified",
                   callback=self.select_wrong, autoDefault=False)
        gui.button(box, self, "Clear Selection",
                   callback=self.select_none, autoDefault=False)

        self.outputbox = box = gui.vBox(self.controlArea, "Output")
        gui.checkBox(box, self, "append_predictions",
                     "Predictions", callback=self._invalidate)
        gui.checkBox(box, self, "append_probabilities",
                     "Probabilities",
                     callback=self._invalidate)

        gui.auto_commit(self.controlArea, self, "autocommit",
                        "Send Selected", "Send Automatically")

        grid = QGridLayout()

        self.tablemodel = QStandardItemModel(self)
        view = self.tableview = QTableView(
            editTriggers=QTableView.NoEditTriggers)
        view.setModel(self.tablemodel)
        view.horizontalHeader().hide()
        view.verticalHeader().hide()
        view.horizontalHeader().setMinimumSectionSize(60)
        view.selectionModel().selectionChanged.connect(self._invalidate)
        view.setShowGrid(False)
        view.setItemDelegate(BorderedItemDelegate(Qt.white))
        view.clicked.connect(self.cell_clicked)
        grid.addWidget(view, 0, 0)
        self.mainArea.layout().addLayout(grid)

    def sizeHint(self):
        """Initial size"""
        return QSize(750, 490)

    def _item(self, i, j):
        return self.tablemodel.item(i, j) or QStandardItem()

    def _set_item(self, i, j, item):
        self.tablemodel.setItem(i, j, item)

    def _init_table(self, nclasses):
        item = self._item(0, 2)
        item.setData("Predicted", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignCenter)
        item.setFlags(Qt.NoItemFlags)

        self._set_item(0, 2, item)
        item = self._item(2, 0)
        item.setData("Actual", Qt.DisplayRole)
        item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom)
        item.setFlags(Qt.NoItemFlags)
        self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate())
        self._set_item(2, 0, item)
        self.tableview.setSpan(0, 2, 1, nclasses)
        self.tableview.setSpan(2, 0, nclasses, 1)

        font = self.tablemodel.invisibleRootItem().font()
        bold_font = QFont(font)
        bold_font.setBold(True)

        for i in (0, 1):
            for j in (0, 1):
                item = self._item(i, j)
                item.setFlags(Qt.NoItemFlags)
                self._set_item(i, j, item)

        for p, label in enumerate(self.headers):
            for i, j in ((1, p + 2), (p + 2, 1)):
                item = self._item(i, j)
                item.setData(label, Qt.DisplayRole)
                item.setFont(bold_font)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                if p < len(self.headers) - 1:
                    item.setData("br"[j == 1], BorderRole)
                    item.setData(QColor(192, 192, 192), BorderColorRole)
                self._set_item(i, j, item)

        hor_header = self.tableview.horizontalHeader()
        if len(' '.join(self.headers)) < 120:
            hor_header.setResizeMode(QHeaderView.ResizeToContents)
        else:
            hor_header.setDefaultSectionSize(60)
        self.tablemodel.setRowCount(nclasses + 3)
        self.tablemodel.setColumnCount(nclasses + 3)

    def set_results(self, results):
        """Set the input results."""

        prev_sel_learner = self.selected_learner
        self.clear()
        self.warning([0, 1])
        self.closeContext()

        data = None
        if results is not None and results.data is not None:
            data = results.data

        if data is not None and not data.domain.has_discrete_class:
            self.warning(
                0, "Confusion Matrix cannot be used for regression results.")

        self.results = results
        self.data = data

        if data is not None:
            class_values = data.domain.class_var.values
        elif results is not None:
            raise NotImplementedError

        if results is None:
            self.report_button.setDisabled(True)
        else:
            self.report_button.setDisabled(False)

            nmodels = results.predicted.shape[0]
            self.headers = class_values + \
                           [unicodedata.lookup("N-ARY SUMMATION")]

            # NOTE: The 'learner_names' is set in 'Test Learners' widget.
            if hasattr(results, "learner_names"):
                self.learners = results.learner_names
            else:
                self.learners = ["Learner #{}".format(i + 1)
                                 for i in range(nmodels)]

            self._init_table(len(class_values))
            self.openContext(data.domain.class_var)
            if prev_sel_learner is None or \
                    prev_sel_learner >= len(self.learners):
                self.selected_learner = 0
            else:
                self.selected_learner = prev_sel_learner
            self._update()
            self._set_selection()
            self.unconditional_commit()

    def clear(self):
        """Reset the widget, clear controls"""
        self.results = None
        self.data = None
        self.tablemodel.clear()
        self.headers = []
        # Clear learners last. This action will invoke `_learner_changed`
        self.learners = []

    def select_correct(self):
        """Select the diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            index = self.tablemodel.index(i, i)
            selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_wrong(self):
        """Select the off-diagonal elements of the matrix"""
        selection = QItemSelection()
        n = self.tablemodel.rowCount()
        for i in range(2, n):
            for j in range(i + 1, n):
                index = self.tablemodel.index(i, j)
                selection.select(index, index)
                index = self.tablemodel.index(j, i)
                selection.select(index, index)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def select_none(self):
        """Reset selection"""
        self.tableview.selectionModel().clear()

    def cell_clicked(self, model_index):
        """Handle cell click event"""
        i, j = model_index.row(), model_index.column()
        if not i or not j:
            return
        n = self.tablemodel.rowCount()
        index = self.tablemodel.index
        selection = None
        if i == j == 1 or i == j == n - 1:
            selection = QItemSelection(index(2, 2), index(n - 1, n - 1))
        elif i in (1, n - 1):
            selection = QItemSelection(index(2, j), index(n - 1, j))
        elif j in (1, n - 1):
            selection = QItemSelection(index(i, 2), index(i, n - 1))

        if selection is not None:
            self.tableview.selectionModel().select(
                selection, QItemSelectionModel.ClearAndSelect)

    def commit(self):
        """Output data instances corresponding to selected cells"""
        if self.results is not None and self.data is not None \
                and self.selected_learner is not None:
            indices = self.tableview.selectedIndexes()
            indices = {(ind.row() - 2, ind.column() - 2) for ind in indices}
            actual = self.results.actual
            learner_name = self.learners[self.selected_learner]
            predicted = self.results.predicted[self.selected_learner]
            selected = [i for i, t in enumerate(zip(actual, predicted))
                        if t in indices]
            row_indices = self.results.row_indices[selected]

            extra = []
            class_var = self.data.domain.class_var
            metas = self.data.domain.metas

            if self.append_predictions:
                predicted = numpy.array(predicted[selected], dtype=object)
                extra.append(predicted.reshape(-1, 1))
                var = Orange.data.DiscreteVariable(
                    "{}({})".format(class_var.name, learner_name),
                    class_var.values
                )
                metas = metas + (var,)

            if self.append_probabilities and \
                    self.results.probabilities is not None:
                probs = self.results.probabilities[self.selected_learner,
                                                   selected]
                extra.append(numpy.array(probs, dtype=object))
                pvars = [Orange.data.ContinuousVariable("p({})".format(value))
                         for value in class_var.values]
                metas = metas + tuple(pvars)

            X = self.data.X[row_indices]
            Y = self.data.Y[row_indices]
            M = self.data.metas[row_indices]
            row_ids = self.data.ids[row_indices]

            M = numpy.hstack((M,) + tuple(extra))
            domain = Orange.data.Domain(
                self.data.domain.attributes,
                self.data.domain.class_vars,
                metas
            )
            data = Orange.data.Table.from_numpy(domain, X, Y, M)
            data.ids = row_ids
            data.name = learner_name

        else:
            data = None

        self.send("Selected Data", data)

    def _invalidate(self):
        indices = self.tableview.selectedIndexes()
        self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices}
        self.commit()

    def _set_selection(self):
        selection = QItemSelection()
        index = self.tableview.model().index
        for row, col in self.selection:
            sel = index(row + 2, col + 2)
            selection.select(sel, sel)
        self.tableview.selectionModel().select(
            selection, QItemSelectionModel.ClearAndSelect)

    def _learner_changed(self):
        self._update()
        self._set_selection()
        self.commit()

    def _update(self):
        def _isinvalid(x):
            return isnan(x) or isinf(x)

        # Update the displayed confusion matrix
        if self.results is not None and self.selected_learner is not None:
            cmatrix = confusion_matrix(self.results, self.selected_learner)
            colsum = cmatrix.sum(axis=0)
            rowsum = cmatrix.sum(axis=1)
            n = len(cmatrix)
            diag = numpy.diag_indices(n)

            colors = cmatrix.astype(numpy.double)
            colors[diag] = 0
            if self.selected_quantity == 0:
                normalized = cmatrix.astype(numpy.int)
                formatstr = "{}"
                div = numpy.array([colors.max()])
            else:
                if self.selected_quantity == 1:
                    normalized = 100 * cmatrix / colsum
                    div = colors.max(axis=0)
                else:
                    normalized = 100 * cmatrix / rowsum[:, numpy.newaxis]
                    div = colors.max(axis=1)[:, numpy.newaxis]
                formatstr = "{:2.1f} %"
            div[div == 0] = 1
            colors /= div
            colors[diag] = normalized[diag] / normalized[diag].max()

            for i in range(n):
                for j in range(n):
                    val = normalized[i, j]
                    col_val = colors[i, j]
                    item = self._item(i + 2, j + 2)
                    item.setData(
                        "NA" if _isinvalid(val) else formatstr.format(val),
                        Qt.DisplayRole)
                    bkcolor = QColor.fromHsl(
                        [0, 240][i == j], 160,
                        255 if _isinvalid(col_val) else int(255 - 30 * col_val))
                    item.setData(QBrush(bkcolor), Qt.BackgroundRole)
                    item.setData("trbl", BorderRole)
                    item.setToolTip("actual: {}\npredicted: {}".format(
                        self.headers[i], self.headers[j]))
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable)
                    self._set_item(i + 2, j + 2, item)

            bold_font = self.tablemodel.invisibleRootItem().font()
            bold_font.setBold(True)

            def _sum_item(value, border=""):
                item = QStandardItem()
                item.setData(value, Qt.DisplayRole)
                item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                item.setFlags(Qt.ItemIsEnabled)
                item.setFont(bold_font)
                item.setData(border, BorderRole)
                item.setData(QColor(192, 192, 192), BorderColorRole)
                return item

            for i in range(n):
                self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t"))
                self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l"))
            self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum())))

    def send_report(self):
        """Send report"""
        if self.results is not None and self.selected_learner is not None:
            self.report_table(
                "Confusion matrix for {} (showing {})".
                format(self.learners[self.selected_learner],
                       self.quantities[self.selected_quantity].lower()),
                self.tableview)