def testBaseline(self, cls, num_microbatches, expected_answer): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls(dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
def test_gaussian_sum_merge(self): records1 = [tf.constant([2.0, 0.0]), tf.constant([-1.0, 1.0])] records2 = [tf.constant([3.0, 5.0]), tf.constant([-1.0, 4.0])] def get_sample_state(records): query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=1.0) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) sample_state = query.initial_sample_state(records[0]) for record in records: sample_state = query.accumulate_record(params, sample_state, record) return sample_state sample_state_1 = get_sample_state(records1) sample_state_2 = get_sample_state(records2) merged = gaussian_query.GaussianSumQuery( 10.0, 1.0).merge_sample_states(sample_state_1, sample_state_2) with self.cached_session() as sess: result = sess.run(merged) expected = [3.0, 10.0] self.assertAllClose(result, expected)
def get_sample_state(records): query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=1.0) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) sample_state = query.initial_sample_state(global_state, records[0]) for record in records: sample_state = query.accumulate_record(params, sample_state, record) return sample_state
def test_complex_nested_query(self): with self.cached_session() as sess: query_ab = gaussian_query.GaussianSumQuery( l2_norm_clip=1.0, stddev=0.0) query_c = gaussian_query.GaussianAverageQuery( l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) query_d = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedQuery( [query_ab, {'c': query_c, 'd': [query_d]}]) record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}] record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}] self.assertAllClose(result, expected)
def test_gaussian_sum_no_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected)
def __init__(self, initial_l2_norm_clip, noise_multiplier, target_unclipped_quantile, learning_rate, clipped_count_stddev, expected_num_records, ledger=None): """Initializes the QuantileAdaptiveClipSumQuery. Args: initial_l2_norm_clip: The initial value of clipping norm. noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of the noise added to the output of the sum query. target_unclipped_quantile: The desired quantile of updates which should be unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be found for which approximately 20% of updates are clipped each round. learning_rate: The learning rate for the clipping norm adaptation. A rate of r means that the clipping norm will change by a maximum of r at each step. This maximum is attained when |clip - target| is 1.0. Can be a tf.Variable for example to implement a learning rate schedule. clipped_count_stddev: The stddev of the noise added to the clipped_count. Since the sensitivity of the clipped count is 0.5, as a rule of thumb it should be about 0.5 for reasonable privacy. expected_num_records: The expected number of records per round, used to estimate the clipped count quantile. ledger: The privacy ledger to which queries should be recorded. """ self._initial_l2_norm_clip = tf.cast(initial_l2_norm_clip, tf.float32) self._noise_multiplier = tf.cast(noise_multiplier, tf.float32) self._target_unclipped_quantile = tf.cast(target_unclipped_quantile, tf.float32) self._learning_rate = tf.cast(learning_rate, tf.float32) self._l2_norm_clip = tf.Variable(self._initial_l2_norm_clip) self._sum_stddev = tf.Variable(self._initial_l2_norm_clip * self._noise_multiplier) self._sum_query = gaussian_query.GaussianSumQuery( self._l2_norm_clip, self._sum_stddev, ledger) # self._clipped_fraction_query is a DPQuery used to estimate the fraction of # records that are clipped. It accumulates an indicator 0/1 of whether each # record is clipped, and normalizes by the expected number of records. In # practice, we accumulate clipped counts shifted by -0.5 so they are # centered at zero. This makes the sensitivity of the clipped count query # 0.5 instead of 1.0, since the maximum that a single record could affect # the count is 0.5. Note that although the l2_norm_clip of the clipped # fraction query is 0.5, no clipping will ever actually occur because the # value of each record is always +/-0.5. self._clipped_fraction_query = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=clipped_count_stddev, denominator=expected_num_records, ledger=ledger)
def test_gaussian_sum_with_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected)
def test_normalization(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query = normalized_query.NormalizedQuery(numerator_query=sum_query, denominator=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [0.5, 0.5] self.assertAllClose(result, expected)
def test_gaussian_sum_with_noise(self): with self.cached_session() as sess: record1, record2 = 2.71828, 3.14159 stddev = 1.0 query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=stddev) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_sums = [] for _ in xrange(1000): noised_sums.append(sess.run(query_result)) result_stddev = np.std(noised_sums) self.assertNear(result_stddev, stddev, 0.1)
def linear_model_fn(features, labels, mode): preds = tf.keras.layers.Dense(1, activation='linear', name='dense').apply(features['x']) vector_loss = tf.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(vector_loss) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, 1 / 1e6) optimizer = dp_optimizer.DPGradientDescentOptimizer( dp_sum_query, num_microbatches=1, learning_rate=1.0) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=scalar_loss, train_op=train_op)
def __init__(self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, **kwargs): dp_sum_query = gaussian_query.GaussianSumQuery( l2_norm_clip, l2_norm_clip * noise_multiplier) if ledger: dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) super(DPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testClippingNorm(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def test_gaussian_sum_with_changing_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. l2_norm_clip = tf.Variable(5.0) l2_norm_clip_placeholder = tf.placeholder(tf.float32) assign_l2_norm_clip = tf.assign(l2_norm_clip, l2_norm_clip_placeholder) query = gaussian_query.GaussianSumQuery( l2_norm_clip=l2_norm_clip, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) self.evaluate(tf.global_variables_initializer()) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected) sess.run(assign_l2_norm_clip, {l2_norm_clip_placeholder: 0.0}) result = sess.run(query_result) expected = [0.0, 0.0] self.assertAllClose(result, expected)
def testClippingNorm(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def test_sum_query(self): record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, 50, 50) query = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0, ledger=ledger) query = privacy_ledger.QueryWithLedger(query, ledger) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 0.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries)
def testNoiseMultiplier(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) grads = [] for _ in range(1000): grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
def test_nested_query_with_noise(self): with self.cached_session() as sess: sum_stddev = 2.71828 denominator = 3.14159 query1 = gaussian_query.GaussianSumQuery( l2_norm_clip=1.5, stddev=sum_stddev) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) query = nested_query.NestedQuery((query1, query2)) record1 = (3.0, [2.0, 1.5]) record2 = (0.0, [-1.0, -3.5]) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(nest.flatten(sess.run(query_result))) result_stddev = np.std(noised_averages, 0) avg_stddev = sum_stddev / denominator expected_stddev = [sum_stddev, avg_stddev, avg_stddev] self.assertArrayNear(result_stddev, expected_stddev, 0.1)
from absl.testing import parameterized from distutils.version import LooseVersion import numpy as np import tensorflow as tf from privacy.dp_query import gaussian_query from privacy.dp_query import nested_query from privacy.dp_query import test_utils if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): nest = tf.contrib.framework.nest else: nest = tf.nest _basic_query = gaussian_query.GaussianSumQuery(1.0, 0.0) class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): def test_nested_gaussian_sum_no_clip_no_noise(self): with self.cached_session() as sess: query1 = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query2 = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedQuery([query1, query2]) record1 = [1.0, [2.0, 3.0]] record2 = [4.0, [3.0, 2.0]]
def test_incompatible_records(self, record1, record2, error_type): query = gaussian_query.GaussianSumQuery(1.0, 0.0) with self.assertRaises(error_type): test_utils.run_query(query, [record1, record2])