def test_curves_from_results_nans(self, init): res = Results() ytrue, probs = self.data.T ytrue[0] = np.nan probs[-1] = np.nan res.actual = ytrue.astype(float) res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2) Curves.from_results(res) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, ytrue[1:-1]) np.testing.assert_equal(cprobs, probs[1:-1])
def fit_storage(self, data): """ Induce a model using the provided `base_learner`, compute probabilities on training data and the find the optimal decision thresholds. In case of ties, select the threshold that is closest to 0.5. """ if not data.domain.class_var.is_discrete \ or len(data.domain.class_var.values) != 2: raise ValueError("ThresholdLearner requires a binary class") res = TestOnTrainingData(store_models=True)(data, [self.base_learner]) model = res.models[0, 0] curves = Curves.from_results(res) curve = [curves.ca, curves.f1][self.threshold_criterion]() # In case of ties, we want the optimal threshold that is closest to 0.5 best_threshs = curves.probs[curve == np.max(curve)] threshold = best_threshs[min(np.searchsorted(best_threshs, 0.5), len(best_threshs) - 1)] return ThresholdClassifier(model, threshold)
def _setup_plot(self): target = self.target_index results = self.results metrics = Metrics[self.score].functions plot_folds = self.fold_curves and results.folds is not None self.scores = [] if not self._check_class_presence(results.actual == target): return self.Warning.omitted_folds.clear() self.Warning.omitted_nan_prob_points.clear() no_valid_models = [] shadow_width = 4 + 4 * plot_folds for clsf in self.selected_classifiers: data = Curves.from_results(results, target, clsf) if data.tot == 0: # all probabilities are nan no_valid_models.append(clsf) continue if data.tot != results.probabilities.shape[1]: # some are nan self.Warning.omitted_nan_prob_points() color = self.colors[clsf] pen_args = dict( pen=pg.mkPen(color, width=1), antiAlias=True, shadowPen=pg.mkPen(color.lighter(160), width=shadow_width)) self.scores.append( (self.classifier_names[clsf], self.plot_metrics(data, metrics, pen_args))) if self.display_rug: self._rug(data, pen_args) if plot_folds: pen_args = dict( pen=pg.mkPen(color, width=1, style=Qt.DashLine), antiAlias=True) for fold in range(len(results.folds)): fold_results = results.get_fold(fold) fold_curve = Curves.from_results(fold_results, target, clsf) # Can't check this before: p and n can be 0 because of # nan probabilities if fold_curve.p * fold_curve.n == 0: self.Warning.omitted_folds() self.plot_metrics(fold_curve, metrics, pen_args) if no_valid_models: self.Warning.no_valid_data( ", ".join(self.classifier_names[i] for i in no_valid_models)) if self.score == 0: self.plot.plot([0, 1], [0, 1], antialias=True) else: self.line = pg.InfiniteLine( pos=self.threshold, movable=True, pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3), bounds=(0, 1), ) self.line.sigPositionChanged.connect(self.threshold_change) self.line.sigPositionChangeFinished.connect( self.threshold_change_done) self.plot.addItem(self.line)
def test_curves_from_results(self, init): res = Results() ytrue, probs = self.data.T res.actual = ytrue.astype(float) res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2) Curves.from_results(res) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, ytrue) np.testing.assert_equal(cprobs, probs) Curves.from_results(res, target_class=0) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, 1 - ytrue) np.testing.assert_equal(cprobs, 1 - probs) res.actual = ytrue.astype(float) res.probabilities = np.random.random((2, 19, 2)) res.probabilities[1] = np.vstack((1 - probs, probs)).T Curves.from_results(res, model_index=1) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, ytrue) np.testing.assert_equal(cprobs, probs) self.assertRaises(ValueError, Curves.from_results, res) ytrue[ytrue == 0] = 2 * (np.arange(10) % 2) res.actual = ytrue.astype(float) res.probabilities = np.random.random((2, 19, 3)) res.probabilities[1] = np.vstack( ((1 - probs) / 3, probs, (1 - probs) * 2 / 3)).T Curves.from_results(res, model_index=1, target_class=1) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, ytrue == 1) np.testing.assert_equal(cprobs, probs) Curves.from_results(res, model_index=1, target_class=0) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, ytrue == 0) np.testing.assert_equal(cprobs, (1 - probs) / 3) Curves.from_results(res, model_index=1, target_class=2) cytrue, cprobs = init.call_args[0] np.testing.assert_equal(cytrue, ytrue == 2) np.testing.assert_equal(cprobs, (1 - probs) * 2 / 3) self.assertRaises(ValueError, Curves.from_results, res, model_index=1)