def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = Bingham
        a = np.random.random((5, 3))
        self.random_array = a.T.dot(a)
    def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data,
                                                            dataset.target,
                                                            test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        # Todo: remove try...except when sklearn v1.0 is required
        try:
            nonprivate_var = clf_non_private.var_
        except AttributeError:
            nonprivate_var = clf_non_private.sigma_

        theta_diff = (clf_dp.theta_ - clf_non_private.theta_)**2
        self.assertGreater(theta_diff.sum(), 0)

        var_diff = (clf_dp.var_ - nonprivate_var)**2
        self.assertGreater(var_diff.sum(), 0)
    def test_large_epsilon(self):
        global_seed(12345)
        a = np.random.random(1000)
        res = float(np.std(a))
        res_dp = std(a, epsilon=1, range=1)

        self.assertAlmostEqual(res, res_dp, delta=0.01)
Beispiel #4
0
    def test_large_epsilon(self):
        global_seed(12345)
        a = np.random.random(1000)
        res = float(np.std(a))
        res_dp = nanstd(a, epsilon=5, bounds=(0, 1))

        self.assertAlmostEqual(res, res_dp, delta=0.01)
Beispiel #5
0
    def test_density_2d(self):
        global_seed(3141592653)
        a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]).T
        dp_hist, _ = histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True)

        # print(dp_hist.sum())

        self.assertAlmostEqual(dp_hist.sum(), 1.0 * (3 / 10) ** 2)
Beispiel #6
0
    def test_large_epsilon_axis(self):
        global_seed(12345)
        a = np.random.random((1000, 5))
        res = np.std(a, axis=0)
        res_dp = nanstd(a, epsilon=15, bounds=(0, 1), axis=0)

        for i in range(res.shape[0]):
            self.assertAlmostEqual(res[i], res_dp[i], delta=0.01)
Beispiel #7
0
    def test_different_result(self):
        global_seed(3141592653)
        a = np.array([1, 2, 3, 4, 5])
        hist, _ = np.histogram(a, bins=3, range=(0, 10))
        dp_hist, _ = histogram(a, epsilon=0.1, bins=3, range=(0, 10))

        # print("Non-private histogram: %s" % hist)
        # print("Private histogram: %s" % dp_hist)
        self.assertTrue((hist != dp_hist).any())
    def test_density(self):
        global_seed(3141592653)
        a = np.array([1, 2, 3, 4, 5])
        dp_hist, _ = histogram(a,
                               epsilon=10,
                               bins=3,
                               range=(0, 10),
                               density=True)

        self.assertAlmostEqual(dp_hist.sum(), 3 / 10)
    def test_inf_epsilon(self):
        global_seed(3141592653)
        clf = KMeans(float("inf"), (0, 1), 3)

        X = np.array([0.1, 0.1, 0.1, 0.1, 0.5, 0.5, 0.5, 0.5, 0.9, 0.9, 0.9]).reshape(-1, 1)
        clf.fit(X)
        centers = clf.cluster_centers_

        self.assertIn(0.1, centers)
        self.assertIn(0.5, centers)
        self.assertIn(0.9, centers)
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = Wishart

        # with warnings.catch_warnings(record=True) as w:
        #     warnings.filterwarnings('always', '', DeprecationWarning)
        #     self.mech = Wishart()
        #     self.assertIs(w[0].category, DeprecationWarning)

        self.random_array = np.random.randn(5, 5)
    def test_density_1d(self):
        global_seed(3141592653)
        a = np.array([1, 2, 3, 4, 5])
        dp_hist, _ = histogramdd(a,
                                 epsilon=10,
                                 bins=3,
                                 range=[(0, 10)],
                                 density=True)

        # print(dp_hist.sum())

        self.assertAlmostEqual(dp_hist.sum(), 1.0 * 3 / 10)
    def test_different_result(self):
        global_seed(3141592653)
        x = np.array([1, 2, 3, 4, 5])
        y = np.array([5, 7, 1, 5, 9])
        hist, _, _ = np.histogram2d(x, y, bins=3, range=[(0, 10), (0, 10)])
        dp_hist, _, _ = histogram2d(x,
                                    y,
                                    epsilon=0.1,
                                    bins=3,
                                    range=[(0, 10), (0, 10)])

        # print("Non-private histogram: %s" % hist)
        # print("Private histogram: %s" % dp_hist)
        self.assertTrue((hist != dp_hist).any())
    def test_density(self):
        global_seed(3141592653)
        x = np.array([1, 2, 3, 4, 5])
        y = np.array([5, 7, 1, 5, 9])
        dp_hist, _, _ = histogram2d(x,
                                    y,
                                    epsilon=1,
                                    bins=3,
                                    range=[(0, 10), (0, 10)],
                                    density=True)

        # print(dp_hist.sum())

        self.assertAlmostEqual(dp_hist.sum(), 1.0 * (3 / 10)**2)
    def test_simple(self):
        global_seed(3141592653)
        clf = KMeans(epsilon=10, bounds=(0, 1), n_clusters=3)

        X = np.zeros(6000) + 0.1
        X[:4000] = 0.5
        X[:2000] = 0.9
        X = X.reshape(-1, 1)

        clf.fit(X)
        centers = clf.cluster_centers_

        self.assertAlmostEqual(np.min(centers), 0.1, delta=0.1)
        self.assertAlmostEqual(np.median(centers), 0.5, delta=0.1)
        self.assertAlmostEqual(np.max(centers), 0.9, delta=0.1)
    def test_simple(self):
        global_seed(3141592653)
        clf = KMeans(5, (0, 1), 3)

        X = np.zeros(1000) + 0.1
        X[:666] = 0.5
        X[:333] = 0.9
        X = X.reshape(-1, 1)

        clf.fit(X)
        centers = clf.cluster_centers_

        self.assertTrue(np.isclose(centers, 0.1, atol=0.05).any())
        self.assertTrue(np.isclose(centers, 0.5, atol=0.05).any())
        self.assertTrue(np.isclose(centers, 0.9, atol=0.05).any())
Beispiel #16
0
    def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = [(4.3, 7.9), (2.0, 4.4), (1.0, 6.9), (0.1, 2.5)]

        clf = GaussianNB(epsilon=1.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = sum(clf.predict(x_test) == y_test) / y_test.shape[0]
        # print(accuracy)
        self.assertGreater(accuracy, 0.5)
Beispiel #17
0
    def test_similar_results(self):
        global_seed(314159)

        X = np.random.rand(100000, 5)

        dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
        dp_ss.fit(X)

        sk_ss = sk_pp.StandardScaler()
        sk_ss.fit(X)

        self.assertTrue(
            np.allclose(dp_ss.mean_, sk_ss.mean_, rtol=1, atol=1e-4),
            "Arrays %s and %s should be close" % (dp_ss.mean_, sk_ss.mean_))
        self.assertTrue(
            np.allclose(dp_ss.var_, sk_ss.var_, rtol=1, atol=1e-4),
            "Arrays %s and %s should be close" % (dp_ss.var_, sk_ss.var_))
        self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))
Beispiel #18
0
    def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf = GaussianNB(epsilon=5.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = clf.score(x_test, y_test)
        counts = clf.class_count_.copy()
        self.assertGreater(accuracy, 0.45)

        clf.partial_fit(x_train, y_train)
        new_counts = clf.class_count_
        self.assertEqual(np.sum(new_counts), np.sum(counts) * 2)
Beispiel #19
0
    def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = [(4.3, 7.9), (2.0, 4.4), (1.0, 6.9), (0.1, 2.5)]

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test)

        self.assertFalse(np.all(same_prediction))
Beispiel #20
0
    def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        theta_diff = (clf_dp.theta_ - clf_non_private.theta_) ** 2
        self.assertGreater(theta_diff.sum(), 0)

        sigma_diff = (clf_dp.sigma_ - clf_non_private.sigma_) ** 2
        self.assertGreater(sigma_diff.sum(), 0)
Beispiel #21
0
 def setUp(self):
     global_seed(2718281828)
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = GaussianAnalytic
Beispiel #23
0
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = Wishart()
        self.random_array = np.random.randn(5, 5)
 def setup_method(self, method):
     global_seed(3141592653)
Beispiel #25
0
    def setup_method(self, method):
        if method.__name__ .endswith("prob"):
            global_seed(314159)

        self.mech = Exponential()
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = Laplace()
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = GeometricFolded()
Beispiel #28
0
import numpy as np
from unittest import TestCase

from diffprivlib.mechanisms import Geometric
from diffprivlib.mechanisms.transforms import StringToInt
from diffprivlib.utils import global_seed

global_seed(3141592653)


class TestStringToInt(TestCase):
    def test_not_none(self):
        mech = StringToInt(Geometric())
        self.assertIsNotNone(mech)
        _mech = mech.copy()
        self.assertIsNotNone(_mech)

    def test_class(self):
        from diffprivlib.mechanisms import DPMachine
        from diffprivlib.mechanisms.transforms import DPTransformer

        self.assertTrue(issubclass(StringToInt, DPMachine))
        self.assertTrue(issubclass(StringToInt, DPTransformer))

    def test_no_parent(self):
        with self.assertRaises(TypeError):
            StringToInt()

    def test_randomise(self):
        mech = StringToInt(Geometric().set_sensitivity(1).set_epsilon(1))
        self.assertIsInstance(mech.randomise("1"), str)
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = GeneralRandomizedBinaryResponse()