def test_store_models(self): t = self.random_table learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTrainingData(t, learners) self.assertIsNone(res.models) res = TestOnTrainingData(t, learners, store_models=True) self.check_models(res, learners, 1)
def test_results(self): data = self.random_table train_size, n_resamples = 0.6, 10 res = ShuffleSplit(data, [NaiveBayesLearner()], train_size=train_size, test_size=1 - train_size, n_resamples=n_resamples) self.assertEqual(len(res.predicted[0]), n_resamples * self.nrows * (1 - train_size))
def test_store_models(self): learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(self.random_table, learners, k=5, store_models=False) self.assertIsNone(res.models) res = CrossValidation(self.random_table, learners, k=5, store_models=True) self.assertEqual(len(res.models), 5) self.check_models(res, learners, 5)
def test_store_models(self): t = self.random_table learners = [NaiveBayesLearner(), MajorityLearner()] res = LeaveOneOut(t, learners) self.assertIsNone(res.models) res = LeaveOneOut(t, learners, store_models=True) self.check_models(res, learners, self.nrows)
def test_bad_feature(self): feat = DiscreteVariable(name="fold", values="abc") domain = Domain([DiscreteVariable("x", values="ab")], DiscreteVariable("y", values="cd"), metas=[feat]) t = Table.from_numpy(domain, np.zeros((10, 1)), np.ones((10, 1)), np.full((10, 1), np.nan)) self.assertRaises(ValueError, CrossValidationFeature(feature=feat), t, [NaiveBayesLearner()])
def test_store_data(self): t = self.random_table learners = [NaiveBayesLearner()] res = LeaveOneOut(t, learners, store_data=False) self.assertIsNone(res.data) res = LeaveOneOut(t, learners, store_data=True) self.assertIs(res.data, t)
def test_degenerate(self): d = Domain((ContinuousVariable(name="A"), ContinuousVariable(name="B"), ContinuousVariable(name="C")), DiscreteVariable(name="CLASS", values=["M", "F"])) t = Table(d, [[0,1,0,0], [0,1,0,1], [0,1,0,1]]) nb = NaiveBayesLearner() model = nb(t) self.assertEqual(model.domain.attributes, ()) self.assertEqual(model(t[0]), 1) self.assertTrue(all(model(t) == 1))
def test_wrap_score_predict_cls(self): data = self.titanic model = NaiveBayesLearner()(data) scorer = _wrap_score(CA(), _check_model(model, data)) mocked_model = Mock(wraps=model) baseline_score = scorer(mocked_model, data) # mocked_model.assert_not_called() # mocked_model.predict.assert_called_once() self.assertAlmostEqual(baseline_score, 0.778, 3)
def test_store_data(self): nrows, ncols = 50, 10 t = random_data(nrows, ncols) learners = [NaiveBayesLearner()] res = TestOnTrainingData(t, learners) self.assertIsNone(res.data) res = TestOnTrainingData(t, learners, store_data=True) self.assertIs(res.data, t)
def test_call(self): nrows, ncols = 100, 10 t = random_data(nrows, ncols) res = LeaveOneOut(t, [NaiveBayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows))
def test_results(self): nrows, ncols = 100, 10 data = random_data(nrows, ncols) train_size, n_resamples = 0.6, 10 res = ShuffleSplit(data, [NaiveBayesLearner()], train_size=train_size, test_size=1 - train_size, n_resamples=n_resamples) self.assertEqual(len(res.predicted[0]), n_resamples * nrows * (1 - train_size))
def test_store_data(self): nrows, ncols = 100, 10 t = random_data(nrows, ncols) learners = [NaiveBayesLearner()] res = CrossValidation(t, learners) self.assertIsNone(res.data) res = CrossValidation(t, learners, store_data=True) self.assertIs(res.data, t)
def test_call(self): nrows = self.nrows t = self.random_table res = LeaveOneOut()(t, [NaiveBayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows))
def test_call(self): t = self.random_table t = self.add_meta_fold(t, 3) res = CrossValidationFeature(feature=t.domain.metas[0])(t, [NaiveBayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(len(t))) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(len(t))) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(len(t)))
def test_stratified(self): # strata size n = 50 res = ShuffleSplit( train_size=.5, test_size=.5, n_resamples=3, stratified=True, random_state=0)(self.iris, [NaiveBayesLearner()]) for fold in res.folds: self.assertEqual(np.count_nonzero(res.row_indices[fold] < n), n // 2) self.assertEqual(np.count_nonzero(res.row_indices[fold] < 2 * n), n)
def test_results(self): nrows, ncols = 50, 10 t = random_data(nrows, ncols) res = TestOnTestData(t, t, [NaiveBayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.row_indices, np.arange(nrows))
def test_row_indices(self): """Map data instances when using random shuffling""" results = ShuffleSplit(self.iris, [NaiveBayesLearner()], store_data=True) self.send_signal(self.widget.Inputs.evaluation_results, results) self.widget.select_correct() selected = self.get_output(self.widget.Outputs.selected_data) correct = np.equal(results.actual, results.predicted)[0] correct_indices = results.row_indices[correct] self.assertSetEqual(set(self.iris[correct_indices].ids), set(selected.ids))
def test_nomogram_with_instance_nb(self): """Check initialized marker values and feature sorting for naive bayes classifier and data on input""" cls = NaiveBayesLearner()(self.titanic) data = self.titanic[10:11] self.send_signal("Classifier", cls) self.send_signal("Data", data) self._check_values(data.domain.attributes, data) self._test_sort([["status", "age", "sex"], ["age", "sex", "status"], ["sex", "status", "age"], ["sex", "status", "age"], ["sex", "status", "age"]])
def test_store_models(self): data = self.random_table train = data[:int(self.nrows * .75)] test = data[int(self.nrows * .75):] learners = [NaiveBayesLearner(), MajorityLearner()] res = TestOnTestData()(train, test, learners) self.assertIsNone(res.models) res = TestOnTestData(store_models=True)(train, test, learners) self.check_models(res, learners, 1)
def test_constant_feature_cont(self): """Check nomogram for data with constant continuous feature""" domain = Domain( [DiscreteVariable("d", ("a", "b")), ContinuousVariable("c")], DiscreteVariable("cls", ("c", "d"))) X = np.array([[0, 0], [1, 0], [0, 0], [1, 0]]) data = Table(domain, X, np.array([0, 1, 1, 0])) cls = NaiveBayesLearner()(data) self._test_helper(cls, [50, 50]) cls = LogisticRegressionLearner()(data) self._test_helper(cls, [50, 50])
def setUpClass(cls): super().setUpClass() bayes = NaiveBayesLearner() tree = TreeLearner() iris = Table("iris") titanic = Table("titanic") common = dict(k=3, store_data=True) cls.results_1_iris = CrossValidation(iris, [bayes], **common) cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common) cls.results_2_titanic = CrossValidation(titanic, [bayes, tree], **common)
def test_results(self): nrows, _ = self.random_table.X.shape t = self.random_table res = TestOnTestData()(t, t, [NaiveBayesLearner()]) y = t.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.row_indices, np.arange(nrows))
def test_call_5(self): nrows, _ = self.random_table.X.shape res = CrossValidation(k=5, stratified=False)(self.random_table, [NaiveBayesLearner()]) y = self.random_table.Y np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows)) np.testing.assert_equal(res.predicted[0], y[res.row_indices].reshape(nrows)) np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1), y[res.row_indices].reshape(nrows)) self.check_folds(res, 5, nrows)
def test_LOOClassNC(self): for incl in [False, True]: for rel in [False, True]: for neigh in ['fixed', 'variable']: nc = LOOClassNC(NaiveBayesLearner(), Euclidean, 20, relative=rel, include=incl, neighbourhood=neigh) icp = InductiveClassifier(nc, self.train, self.calibrate) pred = icp(self.test.x, 0.1) print(pred) self.assertEqual(pred, ['Iris-setosa']) icp = InductiveClassifier( LOOClassNC(NaiveBayesLearner(), Euclidean, 20)) r = run(icp, 0.1, CrossSampler(Table('iris'), 4)) self.assertGreater(r.accuracy(), 0.85) self.assertGreater(r.singleton_criterion(), 0.8)
def test_split_by_model(self): learners = [NaiveBayesLearner(), MajorityLearner()] res = CrossValidation(self.random_table, learners, k=5, store_models=True) for i, result in enumerate(res.split_by_model()): self.assertIsInstance(result, Results) self.assertTrue((result.predicted == res.predicted[i]).all()) self.assertTrue((result.probabilities == res.probabilities[i]).all()) self.assertEqual(len(result.models), 5) for model in result.models: self.assertIsInstance(model, learners[i].__returns__)
def setUp(self): self.widget = self.create_widget( OWCalibratedLearner, stored_settings={"auto_apply": False}) self.send_signal(self.widget.Inputs.base_learner, NaiveBayesLearner()) self.data = Table("heart_disease") self.valid_datasets = (self.data,) self.inadequate_dataset = (Table(datasets.path("testing_dataset_reg")),) self.learner_class = LearnerClassification self.model_class = ModelClassification self.model_name = 'Calibrated classifier' self.parameters = []
def test_pickle(self): import pickle train, test = next(RandomSampler(Table('iris'), 2, 1)) train, cal = next(RandomSampler(train, 2, 1)) ic = InductiveClassifier(InverseProbability(NaiveBayesLearner())) ic.fit(train, cal) print(ic(test[0].x, 0.1)) with open('temp.cp', 'wb') as f: pickle.dump(ic, f) with open('temp.cp', 'rb') as f: ic2 = pickle.load(f) print(ic2(test[0].x, 0.1))
def test_validate_cross_classification(self): tab = shuffle_data(Table('iris')) eps = 0.1 correct, num, all = 0, 0, len(tab) for i in range(all): train, test = get_instance(tab, i) ccp = CrossClassifier(InverseProbability(NaiveBayesLearner()), 5, train) pred = ccp(test.x, eps) if test.get_class() in pred: correct += 1 num += len(pred) self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
def test_not_stratified(self): # strata size n = 50 res = ShuffleSplit(self.iris, [NaiveBayesLearner()], train_size=.5, test_size=.5, n_resamples=3, stratified=False, random_state=0) strata_samples = [] for train, test in res.indices: strata_samples.append(np.count_nonzero(train < n) == n/2) strata_samples.append(np.count_nonzero(train < 2 * n) == n) self.assertTrue(not all(strata_samples))
def test_augmented_data_classification(self): data = Table("iris") n_classes = len(data.domain.class_var.values) res = CrossValidation(data, [NaiveBayesLearner()], store_data=True) table = res.get_augmented_data(['Naive Bayes']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) # +1 for class, +n_classes for probabilities, +1 for fold self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + n_classes + 1) self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) res = CrossValidation(data, [NaiveBayesLearner(), MajorityLearner()], store_data=True) table = res.get_augmented_data(['Naive Bayes', 'Majority']) self.assertEqual(len(table), len(data)) self.assertEqual(len(table.domain.attributes), len(data.domain.attributes)) self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars)) self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2*(n_classes+1) + 1) self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values) self.assertEqual(table.domain.metas[len(data.domain.metas)+1].values, data.domain.class_var.values)