Beispiel #1
0
 def test_curves_from_results_nans(self, init):
     res = Results()
     ytrue, probs = self.data.T
     ytrue[0] = np.nan
     probs[-1] = np.nan
     res.actual = ytrue.astype(float)
     res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2)
     Curves.from_results(res)
     cytrue, cprobs = init.call_args[0]
     np.testing.assert_equal(cytrue, ytrue[1:-1])
     np.testing.assert_equal(cprobs, probs[1:-1])
Beispiel #2
0
    def fit_storage(self, data):
        """
        Induce a model using the provided `base_learner`, compute probabilities
        on training data and the find the optimal decision thresholds. In case
        of ties, select the threshold that is closest to 0.5.
        """
        if not data.domain.class_var.is_discrete \
                or len(data.domain.class_var.values) != 2:
            raise ValueError("ThresholdLearner requires a binary class")

        res = TestOnTrainingData(store_models=True)(data, [self.base_learner])
        model = res.models[0, 0]
        curves = Curves.from_results(res)
        curve = [curves.ca, curves.f1][self.threshold_criterion]()
        # In case of ties, we want the optimal threshold that is closest to 0.5
        best_threshs = curves.probs[curve == np.max(curve)]
        threshold = best_threshs[min(np.searchsorted(best_threshs, 0.5),
                                     len(best_threshs) - 1)]
        return ThresholdClassifier(model, threshold)
    def _setup_plot(self):
        target = self.target_index
        results = self.results
        metrics = Metrics[self.score].functions
        plot_folds = self.fold_curves and results.folds is not None
        self.scores = []

        if not self._check_class_presence(results.actual == target):
            return

        self.Warning.omitted_folds.clear()
        self.Warning.omitted_nan_prob_points.clear()
        no_valid_models = []
        shadow_width = 4 + 4 * plot_folds
        for clsf in self.selected_classifiers:
            data = Curves.from_results(results, target, clsf)
            if data.tot == 0:  # all probabilities are nan
                no_valid_models.append(clsf)
                continue
            if data.tot != results.probabilities.shape[1]:  # some are nan
                self.Warning.omitted_nan_prob_points()

            color = self.colors[clsf]
            pen_args = dict(
                pen=pg.mkPen(color, width=1), antiAlias=True,
                shadowPen=pg.mkPen(color.lighter(160), width=shadow_width))
            self.scores.append(
                (self.classifier_names[clsf],
                 self.plot_metrics(data, metrics, pen_args)))

            if self.display_rug:
                self._rug(data, pen_args)

            if plot_folds:
                pen_args = dict(
                    pen=pg.mkPen(color, width=1, style=Qt.DashLine),
                    antiAlias=True)
                for fold in range(len(results.folds)):
                    fold_results = results.get_fold(fold)
                    fold_curve = Curves.from_results(fold_results, target, clsf)
                    # Can't check this before: p and n can be 0 because of
                    # nan probabilities
                    if fold_curve.p * fold_curve.n == 0:
                        self.Warning.omitted_folds()
                    self.plot_metrics(fold_curve, metrics, pen_args)

        if no_valid_models:
            self.Warning.no_valid_data(
                ", ".join(self.classifier_names[i] for i in no_valid_models))

        if self.score == 0:
            self.plot.plot([0, 1], [0, 1], antialias=True)
        else:
            self.line = pg.InfiniteLine(
                pos=self.threshold, movable=True,
                pen=pg.mkPen(color="k", style=Qt.DashLine, width=2),
                hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3),
                bounds=(0, 1),
            )
            self.line.sigPositionChanged.connect(self.threshold_change)
            self.line.sigPositionChangeFinished.connect(
                self.threshold_change_done)
            self.plot.addItem(self.line)
Beispiel #4
0
    def test_curves_from_results(self, init):
        res = Results()
        ytrue, probs = self.data.T
        res.actual = ytrue.astype(float)
        res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2)
        Curves.from_results(res)
        cytrue, cprobs = init.call_args[0]
        np.testing.assert_equal(cytrue, ytrue)
        np.testing.assert_equal(cprobs, probs)

        Curves.from_results(res, target_class=0)
        cytrue, cprobs = init.call_args[0]
        np.testing.assert_equal(cytrue, 1 - ytrue)
        np.testing.assert_equal(cprobs, 1 - probs)

        res.actual = ytrue.astype(float)
        res.probabilities = np.random.random((2, 19, 2))
        res.probabilities[1] = np.vstack((1 - probs, probs)).T

        Curves.from_results(res, model_index=1)
        cytrue, cprobs = init.call_args[0]
        np.testing.assert_equal(cytrue, ytrue)
        np.testing.assert_equal(cprobs, probs)

        self.assertRaises(ValueError, Curves.from_results, res)

        ytrue[ytrue == 0] = 2 * (np.arange(10) % 2)
        res.actual = ytrue.astype(float)
        res.probabilities = np.random.random((2, 19, 3))
        res.probabilities[1] = np.vstack(
            ((1 - probs) / 3, probs, (1 - probs) * 2 / 3)).T

        Curves.from_results(res, model_index=1, target_class=1)
        cytrue, cprobs = init.call_args[0]
        np.testing.assert_equal(cytrue, ytrue == 1)
        np.testing.assert_equal(cprobs, probs)

        Curves.from_results(res, model_index=1, target_class=0)
        cytrue, cprobs = init.call_args[0]
        np.testing.assert_equal(cytrue, ytrue == 0)
        np.testing.assert_equal(cprobs, (1 - probs) / 3)

        Curves.from_results(res, model_index=1, target_class=2)
        cytrue, cprobs = init.call_args[0]
        np.testing.assert_equal(cytrue, ytrue == 2)
        np.testing.assert_equal(cprobs, (1 - probs) * 2 / 3)

        self.assertRaises(ValueError, Curves.from_results, res, model_index=1)