def test_store_models(self): nrows, ncols = 50, 10 t = random_data(nrows, ncols) fitters = [naive_bayes.BayesLearner(), majority.MajorityFitter()] cv = testing.TestOnTrainingData() res = cv(t, fitters) self.assertIsNone(res.models) cv = testing.TestOnTrainingData(store_models=True) res = cv(t, fitters) self.assertEqual(len(res.models), 1) for models in res.models: self.assertEqual(len(models), 2) self.assertIsInstance(models[0], naive_bayes.BayesClassifier) self.assertIsInstance(models[1], majority.ConstantClassifier) cv = testing.TestOnTrainingData() res = cv(t, fitters) self.assertIsNone(res.models) res = testing.TestOnTrainingData(t, fitters, store_models=True) self.assertEqual(len(res.models), 1) for models in res.models: self.assertEqual(len(models), 2) self.assertIsInstance(models[0], naive_bayes.BayesClassifier) self.assertIsInstance(models[1], majority.ConstantClassifier)
def test_bayes(self): x = np.random.random_integers(1, 3, (100, 5)) col = np.random.randint(5) y = x[:, col].copy().reshape(100, 1) t = Table(x, y) t = discretization.DiscretizeTable(t, method=EqualWidth(n=3)) res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()]) np.testing.assert_almost_equal(scoring.CA(res), [1]) t.Y[-20:] = 4 - t.Y[-20:] res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()]) self.assertGreaterEqual(scoring.CA(res)[0], 0.75) self.assertLess(scoring.CA(res)[0], 1)
def test_miss_majority(self): x = np.zeros((50, 3)) y = x[:, -1] x[49] = 1 data = Table(x, y) res = testing.TestOnTrainingData(data, [majority.MajorityFitter()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[49] = 0 res = testing.TestOnTrainingData(data, [majority.MajorityFitter()]) np.testing.assert_equal(res.predicted[0][:49], 0) x[25:] = 1 y = x[:, -1] data = Table(x, y) res = testing.TestOnTrainingData(data, [majority.MajorityFitter()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0])
def test_store_data(self): nrows, ncols = 50, 10 t = random_data(nrows, ncols) fitters = [naive_bayes.BayesLearner()] cv = testing.TestOnTrainingData() res = cv(t, fitters) self.assertIsNone(res.data) cv = testing.TestOnTrainingData(store_data=True) res = cv(t, fitters) self.assertIs(res.data, t) res = testing.TestOnTrainingData(t, fitters) self.assertIsNone(res.data) res = testing.TestOnTrainingData(t, fitters, store_data=True) self.assertIs(res.data, t)
def test_probs(self): data = Table('iris')[30:130] fitters = [majority.MajorityFitter(), majority.MajorityFitter()] results = testing.TestOnTrainingData(k=10)(data, fitters) self.assertEqual(results.predicted.shape, (2, len(data))) np.testing.assert_equal(results.predicted, np.ones((2, 100))) probs = results.probabilities self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all()) self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all())
def test_results(self): nrows, ncols = 50, 10 t = random_data(nrows, ncols) res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.row_indices, np.arange(nrows))
def update_results(self): # items in need of an update items = [(key, input) for key, input in self.learners.items() if input.results is None] learners = [input.learner for _, input in items] self.setStatusMessage("Running") # TODO: Test each learner individually if self.resampling == OWTestLearners.KFold: results = testing.CrossValidation(self.train_data, learners, k=self.k_folds, store_data=True) elif self.resampling == OWTestLearners.LeaveOneOut: results = testing.LeaveOneOut(self.train_data, learners, store_data=True) elif self.resampling == OWTestLearners.Bootstrap: p = self.sample_p / 100.0 results = testing.Bootstrap(self.train_data, learners, n_resamples=self.n_repeat, p=p, store_data=True) elif self.resampling == OWTestLearners.TestOnTrain: results = testing.TestOnTrainingData(self.train_data, learners, store_data=True) elif self.resampling == OWTestLearners.TestOnTest: assert self.test_data is not None results = testing.TestOnTestData(self.train_data, self.test_data, learners, store_data=True) else: assert False results = list(split_by_model(results)) class_var = self.train_data.domain.class_var if is_discrete(class_var): test_stats = classification_stats else: test_stats = regression_stats stats = [test_stats(res) for res in results] for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model()
def update_results(self): self.warning([1, 2]) self.error(2) if self.train_data is None: return if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != \ self.train_data.domain.class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, input) for key, input in self.learners.items() if input.results is None] learners = [input.learner for _, input in items] self.setStatusMessage("Running") if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning(1, "Test data is present but unused. " "Select 'Test on test data' to use it.") # TODO: Test each learner individually if self.resampling == OWTestLearners.KFold: results = testing.CrossValidation( self.train_data, learners, k=self.k_folds, store_data=True ) elif self.resampling == OWTestLearners.LeaveOneOut: results = testing.LeaveOneOut( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.Bootstrap: p = self.sample_p / 100.0 results = testing.Bootstrap( self.train_data, learners, n_resamples=self.n_repeat, p=p, store_data=True ) elif self.resampling == OWTestLearners.TestOnTrain: results = testing.TestOnTrainingData( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: return results = testing.TestOnTestData( self.train_data, self.test_data, learners, store_data=True ) else: assert False results = list(split_by_model(results)) class_var = self.train_data.domain.class_var if is_discrete(class_var): test_stats = classification_stats else: test_stats = regression_stats self._update_header() stats = [test_stats(res) for res in results] for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model()