def testNoiseMultiplier(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) grads = [] for _ in range(1000): grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
def testBaseline(self, cls, num_microbatches, expected_answer): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50) dp_average_query = gaussian_query.GaussianAverageQuery( 1.0e9, 0.0, num_microbatches, ledger) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) opt = cls(dp_average_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. gradient_op = opt.compute_gradients(loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
def __init__( self, l2_norm_clip, noise_multiplier, num_microbatches, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): dp_average_query = gaussian_query.GaussianAverageQuery( l2_norm_clip, l2_norm_clip * noise_multiplier, num_microbatches) if 'population_size' in kwargs: population_size = kwargs.pop('population_size') max_queries = kwargs.pop('ledger_max_queries', 1e6) max_samples = kwargs.pop('ledger_max_samples', 1e6) selection_probability = num_microbatches / population_size ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, max_samples, max_queries) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) super(DPGaussianOptimizerClass, self).__init__( dp_average_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testUnrollMicrobatches(self, cls): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) num_microbatches = 4 dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls(dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0, unroll_microbatches=True) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-2.5, -2.5], grads_and_vars[0][0])
def test_ledger(self): record1 = tf.constant([8.5]) record2 = tf.constant([-7.25]) population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, 50, 50) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, noise_multiplier=1.0, target_unclipped_quantile=0.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0, ledger=ledger) query = privacy_ledger.QueryWithLedger(query, ledger) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) _, global_state = test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 10.0], [0.5, 0.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2], global_state) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]] self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries_2)
def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger(population_size, selection_probability, 50, 50) query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger) query = nested_query.NestedQuery([query1, query2]) query = privacy_ledger.QueryWithLedger(query, ledger) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
def linear_model_fn(features, labels, mode): preds = tf.keras.layers.Dense(1, activation='linear', name='dense').apply(features['x']) vector_loss = tf.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(vector_loss) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, 1 / 1e6) optimizer = dp_optimizer.DPGradientDescentOptimizer( dp_sum_query, num_microbatches=1, learning_rate=1.0) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op)
def __init__(self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, **kwargs): dp_sum_query = gaussian_query.GaussianSumQuery( l2_norm_clip, l2_norm_clip * noise_multiplier) if ledger: dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) super(DPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def __init__( self, l2_norm_clip, noise_multiplier, num_microbatches, ledger, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): dp_average_query = gaussian_query.GaussianAverageQuery( l2_norm_clip, l2_norm_clip * noise_multiplier, num_microbatches, ledger) if ledger: dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) super(DPGaussianOptimizerClass, self).__init__(dp_average_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testClippingNorm(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def __init__( self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): dp_sum_query = SparseGaussianSumQuery( l2_norm_clip, l2_norm_clip * noise_multiplier) if ledger: dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) super(SparseDPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testClippingNorm(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. gradient_op = opt.compute_gradients(loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def test_sum_query(self): record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger(population_size, selection_probability, 50, 50) query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0, ledger=ledger) query = privacy_ledger.QueryWithLedger(query, ledger) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 0.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries)
def testBaseline(self, cls, num_microbatches, expected_answer): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
def testNoiseMultiplier(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads = [] for _ in range(1000): grads_and_vars = sess.run(gradient_op) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)