def test_accountant(self): acc = BudgetAccountant(1.5, 0) a = np.array([1, 2, 3, 4, 5]) histogram(a, epsilon=1, bins=3, range=(0, 10), density=True, accountant=acc) self.assertEqual((1, 0), acc.total()) with self.assertRaises(BudgetError): histogram(a, epsilon=1, bins=3, range=(0, 10), density=True, accountant=acc) with self.assertRaises(TypeError): histogram(a, epsilon=1, bins=3, range=(0, 10), density=True, accountant=[acc])
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() X = np.linspace(-1, 1, 100) y = X.copy() X = X[:, np.newaxis] clf = LinearRegression(epsilon=2, fit_intercept=False, bounds_X=(-1, 1), bounds_y=(-1, 1), accountant=acc) clf.fit(X, y) self.assertEqual((2, 0), acc.total()) with BudgetAccountant(3, 0) as acc2: clf = LinearRegression(epsilon=2, fit_intercept=False, bounds_X=(-1, 1), bounds_y=(-1, 1)) clf.fit(X, y) self.assertEqual((2, 0), acc2.total()) with self.assertRaises(BudgetError): clf.fit(X, y)
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() X = np.array([[12, 3, 14], [12, 3, 4], [12, 3, 4], [2, 13, 4], [2, 13, 14], [2, 3, 14], [3, 5, 15]]) y = np.array([1, 1, 1, 0, 0, 0, 1]) model = RandomForestClassifier(epsilon=2, n_estimators=5, accountant=acc, feature_domains={ '0': [2.0, 12.0], '1': [3.0, 13.0], '2': [3.0, 16.0] }) model.fit(X, y) self.assertEqual((2, 0), acc.total()) with BudgetAccountant(3, 0) as acc2: model = RandomForestClassifier(epsilon=2, n_estimators=5, feature_domains={ '0': [2.0, 12.0], '1': [3.0, 13.0], '2': [3.0, 16.0] }) model.fit(X, y) self.assertEqual((2, 0), acc2.total()) with self.assertRaises(BudgetError): model.fit(X, y)
def test_change_large_slack(self): acc = BudgetAccountant(1, 0.2, 0) acc.spend(0.1, 0.1) acc.spend(0.1, 0.1) with self.assertRaises(BudgetError): acc.slack = 0.2
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() X = np.array([ 0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50 ]) y = np.array( [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1]) X = X[:, np.newaxis] X -= 3.0 X /= 2.5 clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc) clf.fit(X, y) self.assertEqual((2, 0), acc.total()) with BudgetAccountant(3, 0) as acc2: clf = LogisticRegression(epsilon=2, data_norm=1.0) clf.fit(X, y) self.assertEqual((2, 0), acc2.total()) with self.assertRaises(BudgetError): clf.fit(X, y)
def test_repr(self): acc = BudgetAccountant() self.assertIn("BudgetAccountant(", repr(acc)) self.assertEqual("BudgetAccountant()", repr(acc)) acc = BudgetAccountant(epsilon=1, delta=0.01, slack=0.01) self.assertIn("BudgetAccountant(", repr(acc)) self.assertIn("epsilon", repr(acc)) self.assertIn("delta", repr(acc)) self.assertIn("slack", repr(acc)) self.assertNotIn("spent_budget", repr(acc)) acc = BudgetAccountant(spent_budget=[(1, 0), (0, 1)]) self.assertIn("BudgetAccountant(", repr(acc)) self.assertNotIn("epsilon", repr(acc)) self.assertNotIn("delta", repr(acc)) self.assertNotIn("slack", repr(acc)) self.assertIn("spent_budget", repr(acc)) acc = BudgetAccountant(spent_budget=[(1., 0.)] * 10 + [(5., 0.5)]) self.assertIn("BudgetAccountant(", repr(acc)) self.assertIn("...", repr(acc)) self.assertNotIn("5", repr(acc)) self.assertNotIn("5", acc.__repr__(10)) self.assertIn("5", acc.__repr__(11))
def test_accountant_with_axes(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() a = np.random.random((1000, 4)) median(a, epsilon=1, bounds=(0, 1), axis=0, accountant=acc) # Expecting a different spend on each of the 8 outputs self.assertEqual((1, 0), acc.total()) self.assertEqual(4, len(acc))
def test_default(self): # Specify accountant as arg acc1 = BudgetAccountant(1.5, 0.0) self.sample_model(accountant=acc1) self.assertEqual((1.0, 0.0), acc1.total()) # Use default accountant without one being set self.sample_model() acc2 = BudgetAccountant.pop_default() self.assertIsNot(acc1, acc2) self.assertEqual(float("inf"), acc2.epsilon) self.assertEqual((1.0, 0.0), acc2.total()) # Set accountant as default acc3 = BudgetAccountant(2.0, 0.0).set_default() self.sample_model(epsilon=1.5) self.assertEqual((1.5, 0), acc3.total()) self.assertEqual(2.0, acc3.epsilon) self.assertIsNot(acc3, acc2) self.assertIsNot(acc3, acc1) # Check default is same as what we set it acc4 = BudgetAccountant.pop_default() self.assertIs(acc3, acc4) self.assertEqual((1.0, 0.0), acc2.total()) # Run again in 2 different functions without setting a default self.sample_model() self.sample_model2() acc5 = BudgetAccountant.pop_default() self.assertIsNot(acc5, acc2) self.assertIsNot(acc5, acc3) self.assertEqual((2.0, 0), acc5.total())
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant(1.5, 0) a = np.random.random((1000, 5)).round() count_nonzero(a, epsilon=1, accountant=acc) self.assertEqual((1.0, 0), acc.total()) with acc: with self.assertRaises(BudgetError): count_nonzero(a, epsilon=1)
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant(1.5, 0) a = np.random.random((1000, 5)) var(a, epsilon=1, bounds=(0, 1), accountant=acc) self.assertEqual((1.0, 0), acc.total()) with acc: with self.assertRaises(BudgetError): var(a, epsilon=1, bounds=(0, 1))
def test_with_statement_errors(self): with BudgetAccountant(1.5) as acc: self.assertIsInstance(acc, BudgetAccountant) self.sample_model(1) with self.assertRaises(BudgetError): self.sample_model(1) with self.assertRaises(BudgetError): with BudgetAccountant(1): self.sample_model(2)
def test_default_accountant(self): BudgetAccountant.pop_default() a = np.array([1, 2, 3, 4, 5]) histogram(a, epsilon=1, bins=3, range=(0, 10), density=True) acc = BudgetAccountant.pop_default() self.assertEqual((1, 0), acc.total()) histogram(a, epsilon=1, bins=3, range=(0, 10)) acc2 = BudgetAccountant.pop_default() self.assertEqual((1, 0), acc.total()) self.assertIsNot(acc, acc2)
def test_default_accountant(self): BudgetAccountant.pop_default() a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]).T histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True) acc = BudgetAccountant.pop_default() self.assertEqual((1, 0), acc.total()) self.assertEqual(acc.epsilon, float("inf")) self.assertEqual(acc.delta, 1.0) histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)]) self.assertEqual((1, 0), acc.total())
def test_change_spent_budget(self): acc = BudgetAccountant(1, 0, spent_budget=[(0.5, 0), (0.5, 0)]) with self.assertRaises(AttributeError): acc.spent_budget = [(1, 0)] with self.assertRaises(AttributeError): del acc.spent_budget acc.spent_budget.append((1, 0)) self.assertEqual(2, len(acc)) self.assertEqual(acc.spent_budget, acc._BudgetAccountant__spent_budget) self.assertIsNot(acc.spent_budget, acc._BudgetAccountant__spent_budget)
def test_correct_composition(self): epsilons = [2**-9] * 700 slack = 2**-25 acc = BudgetAccountant(slack=slack) for epsilon in epsilons: acc.spend(epsilon, 0) spent_epsilon, spent_delta = acc.total() self.assertAlmostEqual(spent_epsilon, 0.27832280615743646366122002955588987576913442137093, places=14) self.assertEqual(spent_delta, slack)
def test_change_budget(self): acc = BudgetAccountant(1, 0, spent_budget=[(0.5, 0), (0.5, 0)]) with self.assertRaises(AttributeError): acc.epsilon = 2 with self.assertRaises(AttributeError): del acc.epsilon with self.assertRaises(AttributeError): acc.delta = 0.1 with self.assertRaises(AttributeError): del acc.delta
def test_spend_errors(self): acc = BudgetAccountant() with self.assertRaises(ValueError): acc.spend(0, 0) with self.assertRaises(ValueError): acc.spend(-1, 0) with self.assertRaises(ValueError): acc.spend(1, -1) with self.assertRaises(ValueError): acc.spend(1, 2)
def test_inf_spend(self): acc = BudgetAccountant() acc.spend(float("inf"), 1) self.assertEqual((float("inf"), 1), acc.total()) self.assertEqual((float("inf"), 1), acc.remaining()) self.assertEqual((float("inf"), 1), acc.remaining(100)) self.assertTrue(acc.check(float("inf"), 1))
def test_remaining_budget_inf(self): acc = BudgetAccountant() self.assertEqual((float("inf"), 1.0), acc.remaining()) self.assertEqual((float("inf"), 1.0), acc.remaining(100)) acc.spend(float("inf"), 1) self.assertEqual((float("inf"), 1.0), acc.remaining()) self.assertEqual((float("inf"), 1.0), acc.remaining(100))
def __init__(self, n_estimators=10, *, epsilon=1.0, cat_feature_threshold=10, n_jobs=1, verbose=0, accountant=None, max_depth=15, random_state=None, feature_domains=None, **unused_args): super().__init__(base_estimator=DecisionTreeClassifier(), n_estimators=n_estimators, estimator_params=("cat_feature_threshold", "max_depth", "epsilon", "random_state"), n_jobs=n_jobs, random_state=random_state, verbose=verbose) self.epsilon = epsilon self.cat_feature_threshold = cat_feature_threshold self.max_depth = max_depth self.accountant = BudgetAccountant.load_default(accountant) self.feature_domains = feature_domains if random_state is not None: np.random.seed(random_state) self._warn_unused_args(unused_args)
def __init__(self, n_components=None, centered=False, epsilon=1.0, data_norm=None, bounds=None, copy=True, whiten=False, random_state=None, accountant=None, **unused_args): super().__init__(n_components=n_components, copy=copy, whiten=whiten, svd_solver='full', tol=0.0, iterated_power='auto', random_state=random_state) self.centered = centered self.epsilon = epsilon self.data_norm = data_norm self.bounds = bounds self.accountant = BudgetAccountant.load_default(accountant) warn_unused_args(unused_args)
def _sum(array, epsilon=1.0, bounds=None, accountant=None, axis=None, dtype=None, keepdims=False, nan=False): if bounds is None: warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " "result in additional privacy leakage. To ensure differential privacy and no additional " "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) bounds = (np.min(array), np.max(array)) if axis is not None or keepdims: return _wrap_axis(_sum, array, epsilon=epsilon, bounds=bounds, accountant=accountant, axis=axis, dtype=dtype, keepdims=keepdims, nan=nan) lower, upper = check_bounds(bounds, shape=0, dtype=dtype) accountant = BudgetAccountant.load_default(accountant) accountant.check(epsilon, 0) # Let's ravel array to be single-dimensional array = clip_to_bounds(np.ravel(array), bounds) _func = np.nansum if nan else np.sum actual_sum = _func(array, axis=axis, dtype=dtype, keepdims=keepdims) mech = GeometricTruncated if dtype is not None and issubclass(dtype, Integral) else LaplaceTruncated mech = mech(epsilon=epsilon, sensitivity=upper - lower, lower=lower * array.size, upper=upper * array.size) output = mech.randomise(actual_sum) accountant.spend(epsilon, 0) return output
def _var(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, accountant=None, nan=False): if bounds is None: warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will " "result in additional privacy leakage. To ensure differential privacy and no additional " "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning) bounds = (np.min(array), np.max(array)) if axis is not None or keepdims: return _wrap_axis(_var, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims, accountant=accountant, nan=nan) lower, upper = check_bounds(bounds, shape=0, dtype=dtype) accountant = BudgetAccountant.load_default(accountant) accountant.check(epsilon, 0) # Let's ravel array to be single-dimensional array = clip_to_bounds(np.ravel(array), bounds) _func = np.nanvar if nan else np.var actual_var = _func(array, axis=axis, dtype=dtype, keepdims=keepdims) dp_mech = LaplaceBoundedDomain(epsilon=epsilon, delta=0, sensitivity=((upper - lower) / array.size) ** 2 * (array.size - 1), lower=0, upper=((upper - lower) ** 2) / 4) output = dp_mech.randomise(actual_var) accountant.spend(epsilon, 0) return output
def __init__(self, epsilon=1.0, data_norm=None, tol=1e-4, C=1.0, fit_intercept=True, max_iter=100, verbose=0, warm_start=False, n_jobs=None, accountant=None, **unused_args): super().__init__(penalty='l2', dual=False, tol=tol, C=C, fit_intercept=fit_intercept, intercept_scaling=1.0, class_weight=None, random_state=None, solver='lbfgs', max_iter=max_iter, multi_class='ovr', verbose=verbose, warm_start=warm_start, n_jobs=n_jobs) self.epsilon = epsilon self.data_norm = data_norm self.classes_ = None self.accountant = BudgetAccountant.load_default(accountant) warn_unused_args(unused_args)
def test_get_spent_budget(self): acc = BudgetAccountant(1, 0, spent_budget=[(0.5, 0), (0.5, 0)]) spent_budget = acc.spent_budget self.assertIsInstance(spent_budget, list) self.assertEqual(2, len(spent_budget))
def test_spend_exceed(self): acc = BudgetAccountant(5, 0) acc.spend(3, 0) with self.assertRaises(BudgetError): acc.spend(3, 0) with self.assertRaises(BudgetError): acc.spend(0, 1e-5)
def test_accountant(self): acc = BudgetAccountant(1.5, 0) a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]).T histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True, accountant=acc) with self.assertRaises(BudgetError): histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True, accountant=acc)
def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True, **unused_args): warn_unused_args(unused_args) if check_input: X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES) elif copy: X = X.copy(order='K') y = np.asarray(y, dtype=X.dtype) X_scale = np.ones(X.shape[1], dtype=X.dtype) if fit_intercept: bounds_X = check_bounds(bounds_X, X.shape[1]) bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1) X = clip_to_bounds(X, bounds_X) y = clip_to_bounds(y, bounds_y) X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, accountant=BudgetAccountant()) X -= X_offset y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, accountant=BudgetAccountant()) y = y - y_offset else: X_offset = np.zeros(X.shape[1], dtype=X.dtype) if y.ndim == 1: y_offset = X.dtype.type(0) else: y_offset = np.zeros(y.shape[1], dtype=X.dtype) return X, y, X_offset, y_offset, X_scale
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() X = np.random.rand(10, 5) ss = StandardScaler(epsilon=1, bounds=(0, 1), accountant=acc) ss.fit(X) self.assertEqual((1, 0), acc.total()) with BudgetAccountant(1.5, 0) as acc2: ss = StandardScaler(epsilon=1, bounds=(0, 1)) ss.fit(X) self.assertEqual((1, 0), acc2.total()) with self.assertRaises(BudgetError): ss.fit(X) self.assertEqual((1, 0), acc.total())
def test_accountant(self): from diffprivlib.accountant import BudgetAccountant acc = BudgetAccountant() x_train = np.random.random((10, 2)) y_train = np.random.randint(2, size=10) clf = GaussianNB(epsilon=1.0, bounds=(0, 1), accountant=acc) clf.fit(x_train, y_train) self.assertEqual((1, 0), acc.total()) with BudgetAccountant(1.5, 0) as acc2: clf = GaussianNB(epsilon=1.0, bounds=(0, 1)) clf.fit(x_train, y_train) self.assertEqual((1, 0), acc2.total()) with self.assertRaises(BudgetError): clf.fit(x_train, y_train)