Exemple #1
0
    def test_store_models(self):
        t = self.random_table
        learners = [NaiveBayesLearner(), MajorityLearner()]

        res = TestOnTrainingData(t, learners)
        self.assertIsNone(res.models)

        res = TestOnTrainingData(t, learners, store_models=True)
        self.check_models(res, learners, 1)
Exemple #2
0
 def test_results(self):
     data = self.random_table
     train_size, n_resamples = 0.6, 10
     res = ShuffleSplit(data, [NaiveBayesLearner()],
                        train_size=train_size,
                        test_size=1 - train_size,
                        n_resamples=n_resamples)
     self.assertEqual(len(res.predicted[0]),
                      n_resamples * self.nrows * (1 - train_size))
Exemple #3
0
    def test_store_models(self):
        learners = [NaiveBayesLearner(), MajorityLearner()]

        res = CrossValidation(self.random_table, learners, k=5, store_models=False)
        self.assertIsNone(res.models)

        res = CrossValidation(self.random_table, learners, k=5, store_models=True)
        self.assertEqual(len(res.models), 5)
        self.check_models(res, learners, 5)
    def test_store_models(self):
        t = self.random_table
        learners = [NaiveBayesLearner(), MajorityLearner()]

        res = LeaveOneOut(t, learners)
        self.assertIsNone(res.models)

        res = LeaveOneOut(t, learners, store_models=True)
        self.check_models(res, learners, self.nrows)
 def test_bad_feature(self):
     feat = DiscreteVariable(name="fold", values="abc")
     domain = Domain([DiscreteVariable("x", values="ab")],
                     DiscreteVariable("y", values="cd"),
                     metas=[feat])
     t = Table.from_numpy(domain, np.zeros((10, 1)), np.ones((10, 1)),
                          np.full((10, 1), np.nan))
     self.assertRaises(ValueError, CrossValidationFeature(feature=feat), t,
                       [NaiveBayesLearner()])
    def test_store_data(self):
        t = self.random_table
        learners = [NaiveBayesLearner()]

        res = LeaveOneOut(t, learners, store_data=False)
        self.assertIsNone(res.data)

        res = LeaveOneOut(t, learners, store_data=True)
        self.assertIs(res.data, t)
 def test_degenerate(self):
     d = Domain((ContinuousVariable(name="A"), ContinuousVariable(name="B"), ContinuousVariable(name="C")),
                 DiscreteVariable(name="CLASS", values=["M", "F"]))
     t = Table(d, [[0,1,0,0], [0,1,0,1], [0,1,0,1]])
     nb = NaiveBayesLearner()
     model = nb(t)
     self.assertEqual(model.domain.attributes, ())
     self.assertEqual(model(t[0]), 1)
     self.assertTrue(all(model(t) == 1))
Exemple #8
0
    def test_wrap_score_predict_cls(self):
        data = self.titanic
        model = NaiveBayesLearner()(data)
        scorer = _wrap_score(CA(), _check_model(model, data))

        mocked_model = Mock(wraps=model)
        baseline_score = scorer(mocked_model, data)
        # mocked_model.assert_not_called()
        # mocked_model.predict.assert_called_once()
        self.assertAlmostEqual(baseline_score, 0.778, 3)
    def test_store_data(self):
        nrows, ncols = 50, 10
        t = random_data(nrows, ncols)
        learners = [NaiveBayesLearner()]

        res = TestOnTrainingData(t, learners)
        self.assertIsNone(res.data)

        res = TestOnTrainingData(t, learners, store_data=True)
        self.assertIs(res.data, t)
 def test_call(self):
     nrows, ncols = 100, 10
     t = random_data(nrows, ncols)
     res = LeaveOneOut(t, [NaiveBayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
 def test_results(self):
     nrows, ncols = 100, 10
     data = random_data(nrows, ncols)
     train_size, n_resamples = 0.6, 10
     res = ShuffleSplit(data, [NaiveBayesLearner()],
                        train_size=train_size,
                        test_size=1 - train_size,
                        n_resamples=n_resamples)
     self.assertEqual(len(res.predicted[0]),
                      n_resamples * nrows * (1 - train_size))
    def test_store_data(self):
        nrows, ncols = 100, 10
        t = random_data(nrows, ncols)
        learners = [NaiveBayesLearner()]

        res = CrossValidation(t, learners)
        self.assertIsNone(res.data)

        res = CrossValidation(t, learners, store_data=True)
        self.assertIs(res.data, t)
Exemple #13
0
 def test_call(self):
     nrows = self.nrows
     t = self.random_table
     res = LeaveOneOut()(t, [NaiveBayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
 def test_call(self):
     t = self.random_table
     t = self.add_meta_fold(t, 3)
     res = CrossValidationFeature(feature=t.domain.metas[0])(t, [NaiveBayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(len(t)))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(len(t)))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(len(t)))
    def test_stratified(self):
        # strata size
        n = 50
        res = ShuffleSplit(
            train_size=.5, test_size=.5, n_resamples=3, stratified=True,
            random_state=0)(self.iris, [NaiveBayesLearner()])

        for fold in res.folds:
            self.assertEqual(np.count_nonzero(res.row_indices[fold] < n), n // 2)
            self.assertEqual(np.count_nonzero(res.row_indices[fold] < 2 * n), n)
 def test_results(self):
     nrows, ncols = 50, 10
     t = random_data(nrows, ncols)
     res = TestOnTestData(t, t, [NaiveBayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.row_indices, np.arange(nrows))
 def test_row_indices(self):
     """Map data instances when using random shuffling"""
     results = ShuffleSplit(self.iris, [NaiveBayesLearner()],
                            store_data=True)
     self.send_signal(self.widget.Inputs.evaluation_results, results)
     self.widget.select_correct()
     selected = self.get_output(self.widget.Outputs.selected_data)
     correct = np.equal(results.actual, results.predicted)[0]
     correct_indices = results.row_indices[correct]
     self.assertSetEqual(set(self.iris[correct_indices].ids),
                         set(selected.ids))
Exemple #18
0
 def test_nomogram_with_instance_nb(self):
     """Check initialized marker values and feature sorting for naive bayes
     classifier and data on input"""
     cls = NaiveBayesLearner()(self.titanic)
     data = self.titanic[10:11]
     self.send_signal("Classifier", cls)
     self.send_signal("Data", data)
     self._check_values(data.domain.attributes, data)
     self._test_sort([["status", "age", "sex"], ["age", "sex", "status"],
                      ["sex", "status", "age"], ["sex", "status", "age"],
                      ["sex", "status", "age"]])
Exemple #19
0
    def test_store_models(self):
        data = self.random_table
        train = data[:int(self.nrows * .75)]
        test = data[int(self.nrows * .75):]
        learners = [NaiveBayesLearner(), MajorityLearner()]

        res = TestOnTestData()(train, test, learners)
        self.assertIsNone(res.models)

        res = TestOnTestData(store_models=True)(train, test, learners)
        self.check_models(res, learners, 1)
Exemple #20
0
 def test_constant_feature_cont(self):
     """Check nomogram for data with constant continuous feature"""
     domain = Domain(
         [DiscreteVariable("d", ("a", "b")),
          ContinuousVariable("c")], DiscreteVariable("cls", ("c", "d")))
     X = np.array([[0, 0], [1, 0], [0, 0], [1, 0]])
     data = Table(domain, X, np.array([0, 1, 1, 0]))
     cls = NaiveBayesLearner()(data)
     self._test_helper(cls, [50, 50])
     cls = LogisticRegressionLearner()(data)
     self._test_helper(cls, [50, 50])
 def setUpClass(cls):
     super().setUpClass()
     bayes = NaiveBayesLearner()
     tree = TreeLearner()
     iris = Table("iris")
     titanic = Table("titanic")
     common = dict(k=3, store_data=True)
     cls.results_1_iris = CrossValidation(iris, [bayes], **common)
     cls.results_2_iris = CrossValidation(iris, [bayes, tree], **common)
     cls.results_2_titanic = CrossValidation(titanic, [bayes, tree],
                                             **common)
Exemple #22
0
 def test_results(self):
     nrows, _ = self.random_table.X.shape
     t = self.random_table
     res = TestOnTestData()(t, t, [NaiveBayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.row_indices, np.arange(nrows))
Exemple #23
0
 def test_call_5(self):
     nrows, _ = self.random_table.X.shape
     res = CrossValidation(k=5, stratified=False)(self.random_table,
                                                  [NaiveBayesLearner()])
     y = self.random_table.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
     self.check_folds(res, 5, nrows)
Exemple #24
0
    def test_LOOClassNC(self):
        for incl in [False, True]:
            for rel in [False, True]:
                for neigh in ['fixed', 'variable']:
                    nc = LOOClassNC(NaiveBayesLearner(),
                                    Euclidean,
                                    20,
                                    relative=rel,
                                    include=incl,
                                    neighbourhood=neigh)
                    icp = InductiveClassifier(nc, self.train, self.calibrate)
                    pred = icp(self.test.x, 0.1)
                    print(pred)
                    self.assertEqual(pred, ['Iris-setosa'])

        icp = InductiveClassifier(
            LOOClassNC(NaiveBayesLearner(), Euclidean, 20))
        r = run(icp, 0.1, CrossSampler(Table('iris'), 4))
        self.assertGreater(r.accuracy(), 0.85)
        self.assertGreater(r.singleton_criterion(), 0.8)
    def test_split_by_model(self):
        learners = [NaiveBayesLearner(), MajorityLearner()]
        res = CrossValidation(self.random_table, learners, k=5, store_models=True)

        for i, result in enumerate(res.split_by_model()):
            self.assertIsInstance(result, Results)
            self.assertTrue((result.predicted == res.predicted[i]).all())
            self.assertTrue((result.probabilities == res.probabilities[i]).all())
            self.assertEqual(len(result.models), 5)
            for model in result.models:
                self.assertIsInstance(model, learners[i].__returns__)
Exemple #26
0
    def setUp(self):
        self.widget = self.create_widget(
            OWCalibratedLearner, stored_settings={"auto_apply": False})
        self.send_signal(self.widget.Inputs.base_learner, NaiveBayesLearner())

        self.data = Table("heart_disease")
        self.valid_datasets = (self.data,)
        self.inadequate_dataset = (Table(datasets.path("testing_dataset_reg")),)
        self.learner_class = LearnerClassification
        self.model_class = ModelClassification
        self.model_name = 'Calibrated classifier'
        self.parameters = []
Exemple #27
0
 def test_pickle(self):
     import pickle
     train, test = next(RandomSampler(Table('iris'), 2, 1))
     train, cal = next(RandomSampler(train, 2, 1))
     ic = InductiveClassifier(InverseProbability(NaiveBayesLearner()))
     ic.fit(train, cal)
     print(ic(test[0].x, 0.1))
     with open('temp.cp', 'wb') as f:
         pickle.dump(ic, f)
     with open('temp.cp', 'rb') as f:
         ic2 = pickle.load(f)
     print(ic2(test[0].x, 0.1))
Exemple #28
0
 def test_validate_cross_classification(self):
     tab = shuffle_data(Table('iris'))
     eps = 0.1
     correct, num, all = 0, 0, len(tab)
     for i in range(all):
         train, test = get_instance(tab, i)
         ccp = CrossClassifier(InverseProbability(NaiveBayesLearner()), 5,
                               train)
         pred = ccp(test.x, eps)
         if test.get_class() in pred: correct += 1
         num += len(pred)
     self.assertAlmostEqual(correct / all, 1.0 - eps, delta=0.02)
    def test_not_stratified(self):
        # strata size
        n = 50
        res = ShuffleSplit(self.iris, [NaiveBayesLearner()],
                           train_size=.5, test_size=.5,
                           n_resamples=3, stratified=False, random_state=0)

        strata_samples = []
        for train, test in res.indices:
            strata_samples.append(np.count_nonzero(train < n) == n/2)
            strata_samples.append(np.count_nonzero(train < 2 * n) == n)

        self.assertTrue(not all(strata_samples))
    def test_augmented_data_classification(self):
        data = Table("iris")
        n_classes = len(data.domain.class_var.values)
        res = CrossValidation(data, [NaiveBayesLearner()], store_data=True)
        table = res.get_augmented_data(['Naive Bayes'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes), len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars))
        # +1 for class, +n_classes for probabilities, +1 for fold
        self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 1 + n_classes + 1)
        self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values)

        res = CrossValidation(data, [NaiveBayesLearner(), MajorityLearner()], store_data=True)
        table = res.get_augmented_data(['Naive Bayes', 'Majority'])

        self.assertEqual(len(table), len(data))
        self.assertEqual(len(table.domain.attributes), len(data.domain.attributes))
        self.assertEqual(len(table.domain.class_vars), len(data.domain.class_vars))
        self.assertEqual(len(table.domain.metas), len(data.domain.metas) + 2*(n_classes+1) + 1)
        self.assertEqual(table.domain.metas[len(data.domain.metas)].values, data.domain.class_var.values)
        self.assertEqual(table.domain.metas[len(data.domain.metas)+1].values, data.domain.class_var.values)