def analysis_privacy(lot_size, data_size, sgd_sigma, gmm_sigma, gmm_iter, gmm_n_comp, sgd_epoch, pca_eps, delta=1e-5): q = lot_size / data_size sgd_steps = int(math.ceil(sgd_epoch * data_size / lot_size)) gmm_steps = gmm_iter * (2 * gmm_n_comp + 1) orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + list(range(5, 64)) + [128, 256, 512]) pca_rdp = np.array(orders) * 2 * (pca_eps**2) sgd_rdp = compute_rdp(q, sgd_sigma, sgd_steps, orders) gmm_rdp = compute_rdp(1, gmm_sigma, gmm_steps, orders) rdp = pca_rdp + gmm_rdp + sgd_rdp eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) index = orders.index(opt_order) print( f"ratio(pca:gmm:sgd):{pca_rdp[index]/rdp[index]}:{gmm_rdp[index]/rdp[index]}:{sgd_rdp[index]/rdp[index]}" ) print(f"GMM + SGD + PCA (MA): {eps}, {delta}-DP") return eps, [ pca_rdp[index] / rdp[index], gmm_rdp[index] / rdp[index], sgd_rdp[index] / rdp[index] ]
def test_check_composition(self): orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.) rdp = rdp_accountant.compute_rdp(q=1e-4, noise_multiplier=.4, steps=40000, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) rdp += rdp_accountant.compute_rdp(q=0.1, noise_multiplier=2, steps=100, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5) # These tests use the old RDP -> approx DP conversion # self.assertAlmostEqual(eps, 8.509656, places=5) # self.assertEqual(opt_order, 2.5) # But these still provide an upper bound self.assertLessEqual(eps, 8.509656)
def search_optimal_noise_multiplier(self, target_epsilon): """ Performs binary search to get the optimal value for noise multiplier (sigma) for RDP and GDP accounting mechanisms. Functionality adapted from Opacus (https://github.com/pytorch/opacus). """ eps_high = float("inf") sigma_low, sigma_high = 0, 10 orders = [1 + x / 100.0 for x in range(1, 1000)] + list(range(12, 1200)) while eps_high > target_epsilon: sigma_high = 2 * sigma_high if self.dp_type == 'rdp': rdp = compute_rdp(self.sampling_rate, sigma_high, self.steps, orders) eps_high, _, _ = get_privacy_spent( orders, rdp, target_delta=self.target_delta) else: # if self.dp_type == 'gdp' mu = compute_gdp_mu(self.sampling_rate, sigma_high, self.steps) eps_high, delta = get_gdp_privacy_spent( mu, target_delta=self.target_delta) if delta > self.target_delta: raise ValueError( "Could not find suitable privacy parameters.") if sigma_high > MAX_SIGMA: raise ValueError("The privacy budget is too low.") while target_epsilon - eps_high > EPS_TOLERANCE * target_epsilon: sigma = (sigma_low + sigma_high) / 2 if self.dp_type == 'rdp': rdp = compute_rdp(self.sampling_rate, sigma, self.steps, orders) eps, _, _ = get_privacy_spent(orders, rdp, target_delta=self.target_delta) else: # if self.dp_type == 'gdp' mu = compute_gdp_mu(self.sampling_rate, sigma, self.steps) eps, delta = get_gdp_privacy_spent( mu, target_delta=self.target_delta) if eps < target_epsilon: sigma_high = sigma eps_high = eps else: sigma_low = sigma return sigma_high
def test_get_privacy_spent_gaussian(self): # Compare the optimal bound for Gaussian with the one derived from RDP. # Also compare the RDP upper bound with the "standard" upper bound. orders = [0.1 * x for x in range(10, 505)] eps_vec = [0.1 * x for x in range(500)] rdp = rdp_accountant.compute_rdp(1, 1, 1, orders) for eps in eps_vec: _, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=eps) # For comparison, we compute the optimal guarantee for Gaussian # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2 delta0 = delta0 - math.exp(eps) * math.erfc( (eps + .5) / math.sqrt(2)) / 2 self.assertLessEqual(delta0, delta + 1e-300) # need tolerance 10^-300 # Compute the "standard" upper bound, which should be an upper bound. # Note, if orders is too sparse, this will NOT be an upper bound. if eps >= 0.5: delta1 = math.exp(-0.5 * (eps - 0.5)**2) else: delta1 = 1 self.assertLessEqual(delta, delta1 + 1e-300)
def test_compute_rdp_sequence(self): rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50, [1.5, 2.5, 5, 50, 100, np.inf]) self.assertAllClose(rdp_vec, [ 6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, np.inf ], rtol=1e-4)
def test_no_tree_no_sampling(self, total_steps, noise_multiplier): orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) tree_rdp = tree_aggregation_accountant.compute_rdp_tree_restart( noise_multiplier, [1] * total_steps, orders) rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
def test_get_privacy_spent_check_target_eps(self): orders = range(2, 33) rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) _, delta, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=1.258575) self.assertAlmostEqual(delta, 1e-5) self.assertEqual(opt_order, 20)
def test_get_privacy_spent_check_target_delta(self): orders = range(2, 33) rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) eps, _, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=1e-5) self.assertAlmostEqual(eps, 1.258575, places=5) self.assertEqual(opt_order, 20)
def test_compute_rdp_sequence(self): rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50, [1.5, 2.5, 5, 50, 100, np.inf]) self.assertSequenceAlmostEqual( rdp_vec, [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf], delta=1e-5)
def __init__( self, num_epochs, client_fraction, batch_size, sensitivity, noise_scale, user_weight_cap, delta, model, training_data, coef_init, intercept_init, test_data=None, label_col="label", user_id_col="user_id", ): self._num_epochs = num_epochs self._client_fraction = client_fraction self._batch_size = batch_size self._sensitivity = sensitivity self._noise_scale = noise_scale self._user_weight_cap = user_weight_cap self._delta = delta # Privacy cost (RDP) can be precomputed. self._rdp = rdp_accountant.compute_rdp( q=self._client_fraction, noise_multiplier=self._noise_scale, steps=1, orders=self.RDP_ORDERS, ) # Store the progressive epsilon values (privacy budget used). # Start from an initial value of 0 to align with the coefficient arrays. self._eps = [0] super().__init__( model, training_data, coef_init, intercept_init, test_data, label_col, user_id_col, ) user_contrib_weight_sum = 0.0 for client in self._clients: user_contrib_weight_sum += client.update_contrib_weight( self._user_weight_cap) self._user_contrib_weight_sum = user_contrib_weight_sum self._standard_dev = (self._noise_scale * self._sensitivity) / ( self._client_fraction * self._user_contrib_weight_sum) self._avg_denom = self._client_fraction * self._user_contrib_weight_sum self._server.reset_dp_params(avg_denom=self._avg_denom, standard_dev=slef._standard_dev)
def compute_epsilon(steps, target_delta=1e-5): if NUM_EXAMPLES * target_delta > 1.: warnings.warn('Your delta might be too high.') q = FLAGS.batch_size / float(NUM_EXAMPLES) orders = list(jnp.linspace(1.1, 10.9, 99)) + list(range(11, 64)) rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders) eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta) return eps
def compute_heterogenous_rdp(sampling_probabilities, noise_multipliers, steps_list, orders): assert len(sampling_probabilities) == len(noise_multipliers) rdp = 0 for q, noise_multiplier, steps in zip(sampling_probabilities, noise_multipliers, steps_list): rdp += rdp_accountant.compute_rdp(q, noise_multiplier, steps, orders) return rdp
def compute_epsilon(steps, num_examples=60000, target_delta=1e-5): if num_examples * target_delta > 1.: warnings.warn('Your delta might be too high.') q = FLAGS.batch_size / float(num_examples) orders = list(np.linspace(1.1, 10.9, 99)) + range(11, 64) rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders) eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta) return eps
def compute_epsilon(epoch,noise_multi,N,batch_size,delta): """Computes epsilon value for given hyperparameters.""" orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = batch_size / N rdp = compute_rdp(q=sampling_probability, noise_multiplier=noise_multi, steps=epoch*N/batch_size, orders=orders) return get_privacy_spent(orders, rdp, target_delta=delta)[0]
def compute_epsilon(steps): orders = [1 + x / 10.0 for x in range(1, 1200)] rdp = rdp_accountant.compute_rdp(q=mb_size / N, noise_multiplier=noise_multiplier, steps=steps, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders, rdp=rdp, target_delta=1 / (2 * N)) return eps
def compute_epsilon(self, steps): if self.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = self.batch_size / self.total_data_size rdp = compute_rdp(q=sampling_probability, noise_multiplier=self.noise_multiplier, steps=steps, orders=orders) return get_privacy_spent(orders, rdp, target_delta=self.delta)[0]
def test_check_composition(self): orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.) rdp = rdp_accountant.compute_rdp(q=1e-4, noise_multiplier=.4, steps=40000, orders=orders) eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) rdp += rdp_accountant.compute_rdp(q=0.1, noise_multiplier=2, steps=100, orders=orders) eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5) self.assertAlmostEqual(eps, 8.509656, places=5) self.assertEqual(opt_order, 2.5)
def compute_epsilon(steps, noise_multiplier, batch_size, input_size, delta): """Computes epsilon value for given hyperparameters.""" if noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = batch_size / input_size rdp = compute_rdp(q=sampling_probability, noise_multiplier=noise_multiplier, steps=steps, orders=orders) return get_privacy_spent(orders, rdp, target_delta=delta)[0]
def compute_renyi_privacy(num_examples, batch_size, steps, sigma, delta): """compute privacy loss using Renyi Differential-Privacy estimate""" sampling_ratio = batch_size / num_examples orders = [1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] \ + list(range(5, 64)) + [128, 256, 512] rdp = compute_rdp(sampling_ratio, sigma, steps, orders) epsilon, _, alpha = get_privacy_spent(orders, rdp, target_delta=delta) return SpentDP(epsilon, delta)
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / 60000 rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" # compute_rdp requires that sigma be the ratio of the standard deviation of # the Gaussian noise to the l2-sensitivity of the function to which it is # added. Hence, sigma here corresponds to the `noise_multiplier` parameter # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer rdp = compute_rdp(q, sigma, steps, orders) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) return eps, opt_order
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to approximate 1 / (number of training points). return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(epoch, num_train_eg, args): """Computes epsilon value for given hyperparameters.""" steps = epoch * num_train_eg // args.batch_size if args.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = args.batch_size / num_train_eg rdp = compute_rdp(q=sampling_probability, noise_multiplier=args.noise_multiplier, steps=steps, orders=orders) # Delta is set to approximate 1 / (number of training points). return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(epochs=epochs, mb_size=mb_size, N=N, noise_multiplier=noise_multiplier): orders = [1 + x / 10.0 for x in range(1, 800)] steps = (N / mb_size) * epochs rdp = rdp_accountant.compute_rdp(q=mb_size / N, noise_multiplier=noise_multiplier, steps=steps, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders, rdp=rdp, target_delta=1 / (2 * N)) return eps
def get_epsilon_for_delta(n, target_delta=1e-5): if FLAGS.dp: noise_multiplier = FLAGS.dp_noise_multiplier sampling_probability = FLAGS.batch_size / float(n) steps = FLAGS.epochs * float(n) // FLAGS.batch_size orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.dp_noise_multiplier, steps=steps, orders=orders) epsilon = get_privacy_spent(orders, rdp, target_delta=target_delta)[0] print("[INFO] epsilon={} for delta={}".format(epsilon, target_delta)) else: epsilon = None target_delta = None return epsilon, target_delta
def test_compute_rdp_from_ledger(self): orders = range(2, 33) q = 0.1 n = 1000 l2_norm_clip = 3.14159 noise_stddev = 2.71828 steps = 3 query_entry = privacy_ledger.GaussianSumQueryEntry( l2_norm_clip, noise_stddev) ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps z = noise_stddev / l2_norm_clip rdp = rdp_accountant.compute_rdp(q, z, steps, orders) rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders) self.assertSequenceAlmostEqual(rdp, rdp_from_ledger)
def _get_rdp_and_orders(self): orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) if self.ledger is None: steps = self.epochs * self.train_size // self.batch_size sampling_probability = self.batch_size / self.train_size rdp = compute_rdp( q=sampling_probability, noise_multiplier=self.noise_multiplier, steps=steps, orders=orders, ) else: # calculate with ledger print("Formatted Ledger") formatted_ledger = self.ledger.get_formatted_ledger(get_session()) rdp = compute_rdp_from_ledger(formatted_ledger, orders) return rdp, orders
def compute_epsilon(steps, noise_multi, user_ratio, delta): from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent """Computes epsilon value for given hyperparameters.""" if noise_multi == 0.0: return float('inf') # This probably are the alphas orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) rdp = compute_rdp( q=user_ratio, noise_multiplier=noise_multi, steps=steps, orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=delta)[0]
def test_get_privacy_spent_check_target_eps(self): orders = range(2, 33) rdp = [1.1 for o in orders] # Constant corresponds to pure DP. _, delta, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=1.32783806176) # Since rdp is constant, it should always pick the largest order. self.assertEqual(opt_order, 32) self.assertAlmostEqual(delta, 1e-5) # Second test for Gaussian noise (with no subsampling): orders = [0.001 * i for i in range(1000, 100000)] # Pick fine set of order. rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders) # Scale is chosen to obtain exactly (1,1e-6)-DP. _, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=1) self.assertAlmostEqual(delta, 1e-6)
def test_get_privacy_spent_consistency(self): orders = range(2, 50) # Large range of orders (helps test for overflows). for q in [0.01, 0.1, 0.8, 1.]: # Different subsampling rates. for multiplier in [0.1, 1., 3., 10., 100.]: # Different noise scales. rdp = rdp_accountant.compute_rdp(q, multiplier, 1, orders) for delta in [ .9, .5, .1, .01, 1e-3, 1e-4, 1e-5, 1e-6, 1e-9, 1e-12 ]: eps1, delta1, ord1 = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=delta) eps2, delta2, ord2 = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=eps1) self.assertEqual(delta1, delta) self.assertEqual(eps2, eps1) if eps1 != 0: self.assertEqual(ord1, ord2) self.assertAlmostEqual(delta, delta2) else: # This is a degenerate case; we won't have consistency. self.assertLessEqual(delta2, delta)