def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = Bingham a = np.random.random((5, 3)) self.random_array = a.T.dot(a)
def test_different_results(self): from sklearn.naive_bayes import GaussianNB as sk_nb from sklearn import datasets global_seed(12345) dataset = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2) bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]) clf_dp = GaussianNB(epsilon=1.0, bounds=bounds) clf_non_private = sk_nb() for clf in [clf_dp, clf_non_private]: clf.fit(x_train, y_train) # Todo: remove try...except when sklearn v1.0 is required try: nonprivate_var = clf_non_private.var_ except AttributeError: nonprivate_var = clf_non_private.sigma_ theta_diff = (clf_dp.theta_ - clf_non_private.theta_)**2 self.assertGreater(theta_diff.sum(), 0) var_diff = (clf_dp.var_ - nonprivate_var)**2 self.assertGreater(var_diff.sum(), 0)
def test_large_epsilon(self): global_seed(12345) a = np.random.random(1000) res = float(np.std(a)) res_dp = std(a, epsilon=1, range=1) self.assertAlmostEqual(res, res_dp, delta=0.01)
def test_large_epsilon(self): global_seed(12345) a = np.random.random(1000) res = float(np.std(a)) res_dp = nanstd(a, epsilon=5, bounds=(0, 1)) self.assertAlmostEqual(res, res_dp, delta=0.01)
def test_density_2d(self): global_seed(3141592653) a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]).T dp_hist, _ = histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True) # print(dp_hist.sum()) self.assertAlmostEqual(dp_hist.sum(), 1.0 * (3 / 10) ** 2)
def test_large_epsilon_axis(self): global_seed(12345) a = np.random.random((1000, 5)) res = np.std(a, axis=0) res_dp = nanstd(a, epsilon=15, bounds=(0, 1), axis=0) for i in range(res.shape[0]): self.assertAlmostEqual(res[i], res_dp[i], delta=0.01)
def test_different_result(self): global_seed(3141592653) a = np.array([1, 2, 3, 4, 5]) hist, _ = np.histogram(a, bins=3, range=(0, 10)) dp_hist, _ = histogram(a, epsilon=0.1, bins=3, range=(0, 10)) # print("Non-private histogram: %s" % hist) # print("Private histogram: %s" % dp_hist) self.assertTrue((hist != dp_hist).any())
def test_density(self): global_seed(3141592653) a = np.array([1, 2, 3, 4, 5]) dp_hist, _ = histogram(a, epsilon=10, bins=3, range=(0, 10), density=True) self.assertAlmostEqual(dp_hist.sum(), 3 / 10)
def test_inf_epsilon(self): global_seed(3141592653) clf = KMeans(float("inf"), (0, 1), 3) X = np.array([0.1, 0.1, 0.1, 0.1, 0.5, 0.5, 0.5, 0.5, 0.9, 0.9, 0.9]).reshape(-1, 1) clf.fit(X) centers = clf.cluster_centers_ self.assertIn(0.1, centers) self.assertIn(0.5, centers) self.assertIn(0.9, centers)
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = Wishart # with warnings.catch_warnings(record=True) as w: # warnings.filterwarnings('always', '', DeprecationWarning) # self.mech = Wishart() # self.assertIs(w[0].category, DeprecationWarning) self.random_array = np.random.randn(5, 5)
def test_density_1d(self): global_seed(3141592653) a = np.array([1, 2, 3, 4, 5]) dp_hist, _ = histogramdd(a, epsilon=10, bins=3, range=[(0, 10)], density=True) # print(dp_hist.sum()) self.assertAlmostEqual(dp_hist.sum(), 1.0 * 3 / 10)
def test_different_result(self): global_seed(3141592653) x = np.array([1, 2, 3, 4, 5]) y = np.array([5, 7, 1, 5, 9]) hist, _, _ = np.histogram2d(x, y, bins=3, range=[(0, 10), (0, 10)]) dp_hist, _, _ = histogram2d(x, y, epsilon=0.1, bins=3, range=[(0, 10), (0, 10)]) # print("Non-private histogram: %s" % hist) # print("Private histogram: %s" % dp_hist) self.assertTrue((hist != dp_hist).any())
def test_density(self): global_seed(3141592653) x = np.array([1, 2, 3, 4, 5]) y = np.array([5, 7, 1, 5, 9]) dp_hist, _, _ = histogram2d(x, y, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True) # print(dp_hist.sum()) self.assertAlmostEqual(dp_hist.sum(), 1.0 * (3 / 10)**2)
def test_simple(self): global_seed(3141592653) clf = KMeans(epsilon=10, bounds=(0, 1), n_clusters=3) X = np.zeros(6000) + 0.1 X[:4000] = 0.5 X[:2000] = 0.9 X = X.reshape(-1, 1) clf.fit(X) centers = clf.cluster_centers_ self.assertAlmostEqual(np.min(centers), 0.1, delta=0.1) self.assertAlmostEqual(np.median(centers), 0.5, delta=0.1) self.assertAlmostEqual(np.max(centers), 0.9, delta=0.1)
def test_simple(self): global_seed(3141592653) clf = KMeans(5, (0, 1), 3) X = np.zeros(1000) + 0.1 X[:666] = 0.5 X[:333] = 0.9 X = X.reshape(-1, 1) clf.fit(X) centers = clf.cluster_centers_ self.assertTrue(np.isclose(centers, 0.1, atol=0.05).any()) self.assertTrue(np.isclose(centers, 0.5, atol=0.05).any()) self.assertTrue(np.isclose(centers, 0.9, atol=0.05).any())
def test_with_iris(self): global_seed(12345) from sklearn import datasets dataset = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2) bounds = [(4.3, 7.9), (2.0, 4.4), (1.0, 6.9), (0.1, 2.5)] clf = GaussianNB(epsilon=1.0, bounds=bounds) clf.fit(x_train, y_train) accuracy = sum(clf.predict(x_test) == y_test) / y_test.shape[0] # print(accuracy) self.assertGreater(accuracy, 0.5)
def test_similar_results(self): global_seed(314159) X = np.random.rand(100000, 5) dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf")) dp_ss.fit(X) sk_ss = sk_pp.StandardScaler() sk_ss.fit(X) self.assertTrue( np.allclose(dp_ss.mean_, sk_ss.mean_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" % (dp_ss.mean_, sk_ss.mean_)) self.assertTrue( np.allclose(dp_ss.var_, sk_ss.var_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" % (dp_ss.var_, sk_ss.var_)) self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))
def test_with_iris(self): global_seed(12345) from sklearn import datasets dataset = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2) bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]) clf = GaussianNB(epsilon=5.0, bounds=bounds) clf.fit(x_train, y_train) accuracy = clf.score(x_test, y_test) counts = clf.class_count_.copy() self.assertGreater(accuracy, 0.45) clf.partial_fit(x_train, y_train) new_counts = clf.class_count_ self.assertEqual(np.sum(new_counts), np.sum(counts) * 2)
def test_different_results(self): from sklearn.naive_bayes import GaussianNB as sk_nb from sklearn import datasets global_seed(12345) dataset = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2) bounds = [(4.3, 7.9), (2.0, 4.4), (1.0, 6.9), (0.1, 2.5)] clf_dp = GaussianNB(epsilon=1.0, bounds=bounds) clf_non_private = sk_nb() for clf in [clf_dp, clf_non_private]: clf.fit(x_train, y_train) same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test) self.assertFalse(np.all(same_prediction))
def test_different_results(self): from sklearn.naive_bayes import GaussianNB as sk_nb from sklearn import datasets global_seed(12345) dataset = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2) bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]) clf_dp = GaussianNB(epsilon=1.0, bounds=bounds) clf_non_private = sk_nb() for clf in [clf_dp, clf_non_private]: clf.fit(x_train, y_train) theta_diff = (clf_dp.theta_ - clf_non_private.theta_) ** 2 self.assertGreater(theta_diff.sum(), 0) sigma_diff = (clf_dp.sigma_ - clf_non_private.sigma_) ** 2 self.assertGreater(sigma_diff.sum(), 0)
def setUp(self): global_seed(2718281828)
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = GaussianAnalytic
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = Wishart() self.random_array = np.random.randn(5, 5)
def setup_method(self, method): global_seed(3141592653)
def setup_method(self, method): if method.__name__ .endswith("prob"): global_seed(314159) self.mech = Exponential()
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = Laplace()
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = GeometricFolded()
import numpy as np from unittest import TestCase from diffprivlib.mechanisms import Geometric from diffprivlib.mechanisms.transforms import StringToInt from diffprivlib.utils import global_seed global_seed(3141592653) class TestStringToInt(TestCase): def test_not_none(self): mech = StringToInt(Geometric()) self.assertIsNotNone(mech) _mech = mech.copy() self.assertIsNotNone(_mech) def test_class(self): from diffprivlib.mechanisms import DPMachine from diffprivlib.mechanisms.transforms import DPTransformer self.assertTrue(issubclass(StringToInt, DPMachine)) self.assertTrue(issubclass(StringToInt, DPTransformer)) def test_no_parent(self): with self.assertRaises(TypeError): StringToInt() def test_randomise(self): mech = StringToInt(Geometric().set_sensitivity(1).set_epsilon(1)) self.assertIsInstance(mech.randomise("1"), str)
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = GeneralRandomizedBinaryResponse()