def test_complex_nested_query(self): with self.cached_session() as sess: query_ab = gaussian_query.GaussianSumQuery(l2_norm_clip=1.0, stddev=0.0) query_c = gaussian_query.GaussianAverageQuery(l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) query_d = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedQuery( [query_ab, { 'c': query_c, 'd': [query_d] }]) record1 = [{ 'a': 0.0, 'b': 2.71828 }, { 'c': (-4.0, 6.0), 'd': [-4.0] }] record2 = [{ 'a': 3.14159, 'b': 0.0 }, { 'c': (6.0, -4.0), 'd': [5.0] }] query_result = _run_query(query, [record1, record2]) result = sess.run(query_result) expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}] self.assertAllClose(result, expected)
def testBaseline(self, cls, num_microbatches, expected_answer): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50) dp_average_query = gaussian_query.GaussianAverageQuery( 1.0e9, 0.0, num_microbatches, ledger) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) opt = cls(dp_average_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. gradient_op = opt.compute_gradients(loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
def test_nested_query_with_noise(self): with self.cached_session() as sess: sum_stddev = 2.71828 denominator = 3.14159 query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=1.5, stddev=sum_stddev) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) query = nested_query.NestedQuery((query1, query2)) record1 = (3.0, [2.0, 1.5]) record2 = (0.0, [-1.0, -3.5]) query_result = _run_query(query, [record1, record2]) noised_averages = [] for _ in xrange(1000): noised_averages.append(nest.flatten(sess.run(query_result))) result_stddev = np.std(noised_averages, 0) avg_stddev = sum_stddev / denominator expected_stddev = [sum_stddev, avg_stddev, avg_stddev] self.assertArrayNear(result_stddev, expected_stddev, 0.1)
def __init__( self, l2_norm_clip, noise_multiplier, num_microbatches, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): dp_average_query = gaussian_query.GaussianAverageQuery( l2_norm_clip, l2_norm_clip * noise_multiplier, num_microbatches) if 'population_size' in kwargs: population_size = kwargs.pop('population_size') max_queries = kwargs.pop('ledger_max_queries', 1e6) max_samples = kwargs.pop('ledger_max_samples', 1e6) selection_probability = num_microbatches / population_size ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, max_samples, max_queries) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) super(DPGaussianOptimizerClass, self).__init__( dp_average_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testNoiseMultiplier(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) grads = [] for _ in range(1000): grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger(population_size, selection_probability, 50, 50) query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger) query = nested_query.NestedQuery([query1, query2]) query = privacy_ledger.QueryWithLedger(query, ledger) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
def __init__(self, l2_norm_clip, noise_multiplier, num_microbatches, *args, **kwargs): super(DPOptimizerClass, self).__init__(*args, **kwargs) stddev = l2_norm_clip * noise_multiplier self._num_microbatches = num_microbatches self._private_query = gaussian_query.GaussianAverageQuery( l2_norm_clip, stddev, num_microbatches) self._global_state = self._private_query.initial_global_state()
def test_nested_gaussian_average_with_clip_no_noise(self): with self.cached_session() as sess: query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) record1 = [1.0, [12.0, 9.0]] # Clipped to [1.0, [4.0, 3.0]] record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]] query_result = _run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, [1.0, 1.0]] self.assertAllClose(result, expected)
def test_gaussian_average_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. record2 = tf.constant([-1.0, 2.0]) # Not clipped. query = gaussian_query.GaussianAverageQuery( l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected_average = [1.0, 1.0] self.assertAllClose(result, expected_average)
def linear_model_fn(features, labels, mode): preds = tf.keras.layers.Dense(1, activation='linear', name='dense').apply(features['x']) vector_loss = tf.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(vector_loss) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) optimizer = dp_optimizer.DPGradientDescentOptimizer( dp_average_query, num_microbatches=1, learning_rate=1.0) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op)
def test_gaussian_average_with_noise(self): with self.cached_session() as sess: record1, record2 = 2.71828, 3.14159 sum_stddev = 1.0 denominator = 2.0 query = gaussian_query.GaussianAverageQuery( l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(sess.run(query_result)) result_stddev = np.std(noised_averages) avg_stddev = sum_stddev / denominator self.assertNear(result_stddev, avg_stddev, 0.1)
def testClippingNorm(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. gradient_op = opt.compute_gradients(loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def testNoiseMultiplier(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) gradient_op = opt.compute_gradients(loss(data0, var0), [var0]) grads = [] for _ in range(1000): grads_and_vars = sess.run(gradient_op) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
def testClippingNorm(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def testUnrollMicrobatches(self, cls): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_average_query = (gaussian_query.GaussianAverageQuery( 1.0e9, 0.0, 4)) opt = cls(dp_average_query, num_microbatches=4, learning_rate=2.0, unroll_microbatches=True) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. gradient_op = opt.compute_gradients(loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-2.5, -2.5], grads_and_vars[0][0])