Beispiel #1
0
    def test_neg_epsilon(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)
        clf = GaussianNB(epsilon=-1, bounds=[(0, 1)])

        with self.assertRaises(ValueError):
            clf.fit(X, y)
    def test_sigma(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1, bounds=([0, 0], [1, 1]))
        clf.fit(X, y)
        self.assertIsInstance(clf.sigma_, np.ndarray)
Beispiel #3
0
    def test_mis_ordered_bounds(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1, bounds=[(0, 1), (1, 0)])

        with self.assertRaises(ValueError):
            clf.fit(X, y)
    def test_missing_bounds(self):
        X = np.random.random((10, 3))
        y = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1, bounds=([0, 0], [1, 1]))

        with self.assertRaises(ValueError):
            clf.fit(X, y)
Beispiel #5
0
    def test_sample_weight_warning(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)
        clf = GaussianNB(epsilon=1, bounds=[(0, 1), (0, 1)])
        w = abs(np.random.randn(10))

        with self.assertWarns(DiffprivlibCompatibilityWarning):
            clf.fit(X, y, sample_weight=w)
Beispiel #6
0
    def test_no_bounds(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)
        clf = GaussianNB()

        with self.assertWarns(PrivacyLeakWarning):
            clf.fit(X, y)

        self.assertIsNotNone(clf)
Beispiel #7
0
    def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = [(4.3, 7.9), (2.0, 4.4), (1.0, 6.9), (0.1, 2.5)]

        clf = GaussianNB(epsilon=1.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = sum(clf.predict(x_test) == y_test) / y_test.shape[0]
        # print(accuracy)
        self.assertGreater(accuracy, 0.5)
    def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data,
                                                            dataset.target,
                                                            test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        # Todo: remove try...except when sklearn v1.0 is required
        try:
            nonprivate_var = clf_non_private.var_
        except AttributeError:
            nonprivate_var = clf_non_private.sigma_

        theta_diff = (clf_dp.theta_ - clf_non_private.theta_)**2
        self.assertGreater(theta_diff.sum(), 0)

        var_diff = (clf_dp.var_ - nonprivate_var)**2
        self.assertGreater(var_diff.sum(), 0)
    def test_update_mean_variance(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1, bounds=([0, 0], [1, 1]))
        self.assertIsNotNone(clf._update_mean_variance(0, 0, 0, X, n_noisy=5))
        self.assertIsNotNone(clf._update_mean_variance(0, 0, 0, X, n_noisy=0))
        self.assertWarns(PrivacyLeakWarning, clf._update_mean_variance, 0, 0,
                         0, X)
        self.assertWarns(DiffprivlibCompatibilityWarning,
                         clf._update_mean_variance,
                         0,
                         0,
                         0,
                         X,
                         n_noisy=1,
                         sample_weight=1)
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        x_train = np.random.random((10, 2))
        y_train = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1.0, bounds=(0, 1), accountant=acc)
        clf.fit(x_train, y_train)
        self.assertEqual((1, 0), acc.total())

        with BudgetAccountant(1.5, 0) as acc2:
            clf = GaussianNB(epsilon=1.0, bounds=(0, 1))
            clf.fit(x_train, y_train)
            self.assertEqual((1, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(x_train, y_train)
Beispiel #11
0
    def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = [(4.3, 7.9), (2.0, 4.4), (1.0, 6.9), (0.1, 2.5)]

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test)

        self.assertFalse(np.all(same_prediction))
    def test_bad_refit_shape(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1, bounds=([0, 0], [1, 1]))
        clf.fit(X, y)

        X2 = np.random.random((10, 3))
        clf.bounds = ([0, 0, 0], [1, 1, 1])

        with self.assertRaises(ValueError):
            clf.partial_fit(X2, y)
    def test_noisy_count(self):
        y = np.random.randint(20, size=10000)
        actual_counts = np.array([(y == y_i).sum() for y_i in np.unique(y)])

        clf = GaussianNB(epsilon=3)
        noisy_counts = clf._noisy_class_counts(y)
        self.assertEqual(y.shape[0], noisy_counts.sum())
        self.assertFalse(np.all(noisy_counts == actual_counts))

        clf = GaussianNB(epsilon=float("inf"))
        noisy_counts = clf._noisy_class_counts(y)
        self.assertEqual(y.shape[0], noisy_counts.sum())
        self.assertTrue(np.all(noisy_counts == actual_counts))
Beispiel #14
0
    def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf = GaussianNB(epsilon=5.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = clf.score(x_test, y_test)
        counts = clf.class_count_.copy()
        self.assertGreater(accuracy, 0.45)

        clf.partial_fit(x_train, y_train)
        new_counts = clf.class_count_
        self.assertEqual(np.sum(new_counts), np.sum(counts) * 2)
Beispiel #15
0
    def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        theta_diff = (clf_dp.theta_ - clf_non_private.theta_) ** 2
        self.assertGreater(theta_diff.sum(), 0)

        sigma_diff = (clf_dp.sigma_ - clf_non_private.sigma_) ** 2
        self.assertGreater(sigma_diff.sum(), 0)
    def test_priors(self):
        X = np.random.random((10, 2))
        y = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1,
                         bounds=([0, 0], [1, 1]),
                         priors=(0.75, 0.25))
        self.assertIsNotNone(clf.fit(X, y))

        clf = GaussianNB(epsilon=1, bounds=([0, 0], [1, 1]), priors=(1, ))
        with self.assertRaises(ValueError):
            clf.fit(X, y)

        clf = GaussianNB(epsilon=1, bounds=([0, 0], [1, 1]), priors=(0.5, 0.7))
        with self.assertRaises(ValueError):
            clf.fit(X, y)

        clf = GaussianNB(epsilon=1,
                         bounds=([0, 0], [1, 1]),
                         priors=(-0.5, 1.5))
        with self.assertRaises(ValueError):
            clf.fit(X, y)
Beispiel #17
0
 def test_not_none(self):
     clf = GaussianNB(epsilon=1, bounds=[(0, 1)])
     self.assertIsNotNone(clf)