def test_callback(self): def record_progress(p): progress.append(p) progress = [] data = random_data(50, 4) TestOnTestData( data, data, [MajorityLearner(), MajorityLearner()], callback=record_progress) self.assertEqual(progress, [0, 0.5, 1])
def test_miss_majority(self): x = np.zeros((50, 3)) y = x[:, -1] x[-4:] = np.ones((4, 3)) data = Table(x, y) res = CrossValidation(data, [MajorityLearner()], k=3) np.testing.assert_equal(res.predicted[0][:49], 0) x[-4:] = np.zeros((4, 3)) res = CrossValidation(data, [MajorityLearner()], k=3) np.testing.assert_equal(res.predicted[0][:49], 0)
def test_callback(self): def record_progress(p): progress.append(p) progress = [] data = random_data(50, 4) TestOnTestData( data, data, [MajorityLearner(), MajorityLearner()], callback=record_progress) np.testing.assert_almost_equal(progress, self._callback_values(2))
def test_probs(self): data = self.iris[30:130] learners = [MajorityLearner(), MajorityLearner()] results = TestOnTrainingData(data, learners) self.assertEqual(results.predicted.shape, (2, len(data))) np.testing.assert_equal(results.predicted, np.ones((2, 100))) probs = results.probabilities self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all()) self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all())
def test_probs(self): data = Table('iris')[30:130] learners = [MajorityLearner(), MajorityLearner()] results = LeaveOneOut()(data, learners) self.assertEqual(results.predicted.shape, (2, len(data))) np.testing.assert_equal(results.predicted, np.ones((2, 100))) probs = results.probabilities self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all()) self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all())
def run_test_preprocessor(self, method, expected_sizes): def preprocessor(data): data_sizes.append(len(data)) return data data_sizes = [] method( Table("iris"), [MajorityLearner(), MajorityLearner()], preprocessor=preprocessor, ) self.assertEqual(data_sizes, expected_sizes)
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[-4:] = np.ones((4, 3)) data = Table.from_numpy(None, x, y) cv = CrossValidation(k=3) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[-4:] = np.zeros((4, 3)) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0)
def test_preprocessor(self): def preprocessor(data): data_sizes.append(len(data)) return data data_sizes = [] data = random_data(50, 5) TestOnTestData()( data[:30], data[-20:], [MajorityLearner(), MajorityLearner()], preprocessor=preprocessor) self.assertEqual(data_sizes, [30])
def test_missing(self): iris = Table('iris') learn = MajorityLearner() for e in iris[:len(iris) // 2:2]: e.set_class("?") clf = learn(iris) y = clf(iris) self.assertTrue((y == 2).all()) learn = MajorityLearner() for e in iris: e.set_class("?") clf = learn(iris) y = clf(iris) self.assertTrue((y == 1).all())
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table(x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[25:] = 1 data = Table(x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0])
def test_results_one_vs_rest(self): data = Table(test_filename("datasets/lenses.tab")) learners = [MajorityLearner()] res = TestOnTestData()(data[1::2], data[::2], learners=learners) r1 = results_one_vs_rest(res, pos_index=0) r2 = results_one_vs_rest(res, pos_index=1) r3 = results_one_vs_rest(res, pos_index=2) np.testing.assert_almost_equal(np.sum(r1.probabilities, axis=2), 1.0) np.testing.assert_almost_equal(np.sum(r2.probabilities, axis=2), 1.0) np.testing.assert_almost_equal(np.sum(r3.probabilities, axis=2), 1.0) np.testing.assert_almost_equal( r1.probabilities[:, :, 1] + r2.probabilities[:, :, 1] + r3.probabilities[:, :, 1], 1.0 ) self.assertEqual(r1.folds, res.folds) self.assertEqual(r2.folds, res.folds) self.assertEqual(r3.folds, res.folds) np.testing.assert_equal(r1.row_indices, res.row_indices) np.testing.assert_equal(r2.row_indices, res.row_indices) np.testing.assert_equal(r3.row_indices, res.row_indices)
def test_store_models(self): nrows, ncols = 50, 10 data = random_data(nrows, ncols) train = data[:80] test = data[80:] learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTestData(train, test, learners) self.assertIsNone(res.models) res = TestOnTestData(train, test, learners, store_models=True) self.assertEqual(len(res.models), 1) for models in res.models: self.assertEqual(len(models), 2) self.assertIsInstance(models[0], NaiveBayesModel) self.assertIsInstance(models[1], ConstantModel) res = TestOnTestData(train, test, learners) self.assertIsNone(res.models) res = TestOnTestData(train, test, learners, store_models=True) self.assertEqual(len(res.models), 1) for models in res.models: self.assertEqual(len(models), 2) self.assertIsInstance(models[0], NaiveBayesModel) self.assertIsInstance(models[1], ConstantModel)
def test_basic(self): data = Table("iris")[::3] self.send_signal(self.widget.Inputs.train_data, data) self.send_signal(self.widget.Inputs.learner, MajorityLearner(), 0, wait=5000) res = self.get_output(self.widget.Outputs.evaluations_results) self.assertIsInstance(res, Results) self.assertIsNotNone(res.domain) self.assertIsNotNone(res.data) self.assertIsNotNone(res.probabilities) self.send_signal(self.widget.Inputs.learner, None, 0, wait=5000) res = self.get_output(self.widget.Outputs.evaluations_results) self.assertIsNone(res) data = Table("housing")[::10] self.send_signal(self.widget.Inputs.train_data, data) self.send_signal(self.widget.Inputs.learner, MeanLearner(), 0, wait=5000) res = self.get_output(self.widget.Outputs.evaluations_results) self.assertIsInstance(res, Results) self.assertIsNotNone(res.domain) self.assertIsNotNone(res.data)
def test_multi_target_input(self): class NewScorer(Score): class_types = ( ContinuousVariable, DiscreteVariable, ) @staticmethod def is_compatible(domain: Domain) -> bool: return True def compute_score(self, results): return [0.75] domain = Domain([ContinuousVariable('var1')], class_vars=[ ContinuousVariable('c1'), DiscreteVariable('c2', values=('no', 'yes')) ]) data = Table.from_list(domain, [[1, 5, 0], [2, 10, 1], [2, 10, 1]]) mock_model = Mock(spec=Model, return_value=np.asarray([[0.2, 0.1, 0.2]])) mock_model.name = 'Mockery' mock_model.domain = domain mock_learner = Mock(spec=Learner, return_value=mock_model) mock_learner.name = 'Mockery' self.widget.resampling = OWTestAndScore.TestOnTrain self.send_signal(self.widget.Inputs.train_data, data) self.send_signal(self.widget.Inputs.learner, MajorityLearner(), 0) self.send_signal(self.widget.Inputs.learner, mock_learner, 1) _ = self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertTrue(len(self.widget.scorers) == 1) self.assertTrue(NewScorer in self.widget.scorers) self.assertTrue(len(self.widget._successful_slots()) == 1)
def test_no_pregressbar_warning(self): data = Table("iris")[::15] with warnings.catch_warnings(record=True) as w: self.send_signal(self.widget.Inputs.train_data, data) self.send_signal(self.widget.Inputs.learner, MajorityLearner(), 0) assert not w
def test_too_many_folds(self): w = self.widget w.controls.resampling.buttons[OWTestAndScore.KFold].click() w.n_folds = 3 self.send_signal(w.Inputs.train_data, Table("zoo")[:8]) self.send_signal(w.Inputs.learner, MajorityLearner(), 0, wait=5000) self.assertTrue(w.Error.too_many_folds.is_shown())
def test_input_invalid_cls(self): """Check any classifier on input""" majority_cls = MajorityLearner()(self.data) self.send_signal("Classifier", majority_cls) self.assertTrue(self.widget.Error.invalid_classifier.is_shown()) self.send_signal("Classifier", None) self.assertFalse(self.widget.Error.invalid_classifier.is_shown())
def test_bad_domain(self): table = data.Table.from_file('iris') imputer = impute.Model(MajorityLearner()) self.assertRaises(ValueError, imputer, data=table, variable=table.domain[0])
def test_nan_target_input(self): data = self.iris[::10].copy() data.Y[1] = np.nan yvec, _ = data.get_column_view(data.domain.class_var) nanmask = np.isnan(yvec) self.send_signal("Data", data) self.send_signal("Predictors", MajorityLearner()(data), 1) pred = self.get_output("Predictions", ) self.assertIsInstance(pred, Table) np.testing.assert_array_equal( yvec, pred.get_column_view(data.domain.class_var)[0]) evres = self.get_output("Evaluation Results") self.assertIsInstance(evres, Results) self.assertIsInstance(evres.data, Table) ev_yvec, _ = evres.data.get_column_view(data.domain.class_var) self.assertTrue(np.all(~np.isnan(ev_yvec))) self.assertTrue(np.all(~np.isnan(evres.actual))) data.Y[:] = np.nan self.send_signal("Data", data) evres = self.get_output("Evaluation Results") self.assertEqual(len(evres.data), 0)
def test_augmented_data_classification(self): data = Table("iris") n_classes = len(data.domain.class_var.values) res = CrossValidation(store_data=True)(data, [NaiveBayesLearner()]) table = res.get_augmented_data(['Naive Bayes']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +n_classes for probabilities, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + n_classes + 1) self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) res = CrossValidation(store_data=True)( data, [NaiveBayesLearner(), MajorityLearner()]) table = res.get_augmented_data(['Naive Bayes', 'Majority']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2 * (n_classes + 1) + 1) self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) self.assertEqual(table.domain.metas[len(data.domain.metas) + 1].values, data.domain.class_var.values)
def test_multiclass_auc_multi_learners(self): learners = [LogisticRegressionLearner(), MajorityLearner()] res = CrossValidation(self.iris, learners, k=10) self.assertGreater(AUC(res)[0], 0.6) self.assertLess(AUC(res)[1], 0.6) self.assertGreater(AUC(res)[1], 0.4)
def test_comparison_requires_multiple_models(self): w = self.widget rbs = w.controls.resampling.buttons self._set_three_majorities() w.comparison_criterion = 1 rbs[OWTestAndScore.KFold].click() self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertTrue(w.comparison_table.isEnabled()) self.send_signal(w.Inputs.learner, None, 1) self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertTrue(w.comparison_table.isEnabled()) self.send_signal(w.Inputs.learner, None, 2) self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertFalse(w.comparison_table.isEnabled()) rbs[OWTestAndScore.LeaveOneOut].click() self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertFalse(w.comparison_table.isEnabled()) learner = MajorityLearner() learner.name = "majd" self.send_signal(w.Inputs.learner, learner, 1) self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertFalse(w.comparison_table.isEnabled()) rbs[OWTestAndScore.KFold].click() self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertTrue(w.comparison_table.isEnabled())
def test_returns_random_class(self): iris = self.iris train = np.ones((150, ), dtype='bool') train[0] = False majority = MajorityLearner()(iris[train]) pred1 = majority(iris[0]) self.assertIn(pred1, [1, 2]) for i in range(1, 50): train[i] = train[50 + i] = train[100 + i] = False majority = MajorityLearner()(iris[train]) pred2 = majority(iris[0]) self.assertIn(pred2, [1, 2]) if pred1 != pred2: break else: self.fail("Majority always returns the same value.")
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table(x, y) res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[49] = 0 res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[25:] = 1 data = Table(x, y) res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], 1 - data.Y[res.row_indices].flatten())
def test_store_models(self): learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(self.random_table, learners, k=5, store_models=False) self.assertIsNone(res.models) res = CrossValidation(self.random_table, learners, k=5, store_models=True) self.assertEqual(len(res.models), 5) self.check_models(res, learners, 5)
def test_probs(self): data = self.iris[30:130] learners = [MajorityLearner(), MajorityLearner()] results = TestOnTestData()(data, data, learners) self.assertEqual(results.predicted.shape, (2, len(data))) np.testing.assert_equal(results.predicted, np.ones((2, 100))) probs = results.probabilities self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all()) self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all()) train = self.iris[50:120] test = self.iris[:50] results = TestOnTestData()(train, test, learners) self.assertEqual(results.predicted.shape, (2, len(test))) np.testing.assert_equal(results.predicted, np.ones((2, 50))) probs = results.probabilities self.assertTrue((probs[:, :, 0] == 0).all())
def test_miss_majority(): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) with data.unlocked(data.X): x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) with data.unlocked(data.X): x[25:] = 1 data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0])
def test_store_models(self): t = self.random_table learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTrainingData()(t, learners) self.assertIsNone(res.models) res = TestOnTrainingData(store_models=True)(t, learners) self.check_models(res, learners, 1)
def test_store_models(self): t = self.random_table learners = [NaiveBayesLearner(), MajorityLearner()] res = LeaveOneOut()(t, learners) self.assertIsNone(res.models) res = LeaveOneOut(store_models=True)(t, learners) self.check_models(res, learners, self.nrows)
def _set_three_majorities(self): w = self.widget data = Table("iris")[::15] self.send_signal(w.Inputs.train_data, data) for i, name in enumerate(["maja", "majb", "majc"]): learner = MajorityLearner() learner.name = name self.send_signal(w.Inputs.learner, learner, i) self.get_output(self.widget.Outputs.evaluations_results, wait=5000)