def test_nested_query_with_noise(self): with self.cached_session() as sess: sum_stddev = 2.71828 denominator = 3.14159 query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=1.5, stddev=sum_stddev) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) query = nested_query.NestedSumQuery((query1, query2)) record1 = (3.0, [2.0, 1.5]) record2 = (0.0, [-1.0, -3.5]) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(tf.nest.flatten(sess.run(query_result))) result_stddev = np.std(noised_averages, 0) avg_stddev = sum_stddev / denominator expected_stddev = [sum_stddev, avg_stddev, avg_stddev] self.assertArrayNear(result_stddev, expected_stddev, 0.1)
def test_complex_nested_query(self): with self.cached_session() as sess: query_ab = gaussian_query.GaussianSumQuery(l2_norm_clip=1.0, stddev=0.0) query_c = gaussian_query.GaussianAverageQuery(l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) query_d = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedSumQuery( [query_ab, { 'c': query_c, 'd': [query_d] }]) record1 = [{ 'a': 0.0, 'b': 2.71828 }, { 'c': (-4.0, 6.0), 'd': [-4.0] }] record2 = [{ 'a': 3.14159, 'b': 0.0 }, { 'c': (6.0, -4.0), 'd': [5.0] }] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}] self.assertAllClose(result, expected)
def test_nested_gaussian_average_with_clip_no_noise(self): with self.cached_session() as sess: query1 = gaussian_query.GaussianAverageQuery( l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) record1 = [1.0, [12.0, 9.0]] # Clipped to [1.0, [4.0, 3.0]] record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, [1.0, 1.0]] self.assertAllClose(result, expected)
def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) query = privacy_ledger.QueryWithLedger(query, population_size, selection_probability) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] # First sample. tf.compat.v1.assign(population_size, 10) tf.compat.v1.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) # Second sample. tf.compat.v1.assign(population_size, 20) tf.compat.v1.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
def test_gaussian_average_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. record2 = tf.constant([-1.0, 2.0]) # Not clipped. query = gaussian_query.GaussianAverageQuery( l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected_average = [1.0, 1.0] self.assertAllClose(result, expected_average)
def __init__(self, initial_l2_norm_clip, noise_multiplier, target_unclipped_quantile, learning_rate, clipped_count_stddev, expected_num_records, geometric_update=False): """Initializes the QuantileAdaptiveClipSumQuery. Args: initial_l2_norm_clip: The initial value of clipping norm. noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of the noise added to the output of the sum query. target_unclipped_quantile: The desired quantile of updates which should be unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be found for which approximately 20% of updates are clipped each round. learning_rate: The learning rate for the clipping norm adaptation. A rate of r means that the clipping norm will change by a maximum of r at each step. This maximum is attained when |clip - target| is 1.0. clipped_count_stddev: The stddev of the noise added to the clipped_count. Since the sensitivity of the clipped count is 0.5, as a rule of thumb it should be about 0.5 for reasonable privacy. expected_num_records: The expected number of records per round, used to estimate the clipped count quantile. geometric_update: If True, use geometric updating of clip. """ self._initial_l2_norm_clip = initial_l2_norm_clip self._noise_multiplier = noise_multiplier self._target_unclipped_quantile = target_unclipped_quantile self._learning_rate = learning_rate # Initialize sum query's global state with None, to be set later. self._sum_query = gaussian_query.GaussianSumQuery(None, None) # self._clipped_fraction_query is a DPQuery used to estimate the fraction of # records that are clipped. It accumulates an indicator 0/1 of whether each # record is clipped, and normalizes by the expected number of records. In # practice, we accumulate clipped counts shifted by -0.5 so they are # centered at zero. This makes the sensitivity of the clipped count query # 0.5 instead of 1.0, since the maximum that a single record could affect # the count is 0.5. Note that although the l2_norm_clip of the clipped # fraction query is 0.5, no clipping will ever actually occur because the # value of each record is always +/-0.5. self._clipped_fraction_query = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=clipped_count_stddev, denominator=expected_num_records) self._geometric_update = geometric_update
def _construct_below_estimate_query(self, below_estimate_stddev, expected_num_records): # A DPQuery used to estimate the fraction of records that are less than the # current quantile estimate. It accumulates an indicator 0/1 of whether each # record is below the estimate, and normalizes by the expected number of # records. In practice, we accumulate counts shifted by -0.5 so they are # centered at zero. This makes the sensitivity of the below_estimate count # query 0.5 instead of 1.0, since the maximum that a single record could # affect the count is 0.5. Note that although the l2_norm_clip of the # below_estimate query is 0.5, no clipping will ever actually occur # because the value of each record is always +/-0.5. return gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=below_estimate_stddev, denominator=expected_num_records)
def __init__(self, initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update=False): """Initializes the QuantileAdaptiveClipSumQuery. Args: initial_estimate: The initial estimate of the quantile. target_quantile: The target quantile. I.e., a value of 0.8 means a value should be found for which approximately 80% of updates are less than the estimate each round. learning_rate: The learning rate. A rate of r means that the estimate will change by a maximum of r at each step (for arithmetic updating) or by a maximum factor of exp(r) (for geometric updating). below_estimate_stddev: The stddev of the noise added to the count of records currently below the estimate. Since the sensitivity of the count query is 0.5, as a rule of thumb it should be about 0.5 for reasonable privacy. expected_num_records: The expected number of records per round. geometric_update: If True, use geometric updating of estimate. Geometric updating is preferred for non-negative records like vector norms that could potentially be very large or very close to zero. """ self._initial_estimate = initial_estimate self._target_quantile = target_quantile self._learning_rate = learning_rate # A DPQuery used to estimate the fraction of records that are less than the # current quantile estimate. It accumulates an indicator 0/1 of whether each # record is below the estimate, and normalizes by the expected number of # records. In practice, we accumulate counts shifted by -0.5 so they are # centered at zero. This makes the sensitivity of the below_estimate count # query 0.5 instead of 1.0, since the maximum that a single record could # affect the count is 0.5. Note that although the l2_norm_clip of the # below_estimate query is 0.5, no clipping will ever actually occur # because the value of each record is always +/-0.5. self._below_estimate_query = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=below_estimate_stddev, denominator=expected_num_records) self._geometric_update = geometric_update assert isinstance(self._below_estimate_query, dp_query.SumAggregationDPQuery)
def test_gaussian_average_with_noise(self): with self.cached_session() as sess: record1, record2 = 2.71828, 3.14159 sum_stddev = 1.0 denominator = 2.0 query = gaussian_query.GaussianAverageQuery( l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(sess.run(query_result)) result_stddev = np.std(noised_averages) avg_stddev = sum_stddev / denominator self.assertNear(result_stddev, avg_stddev, 0.1)
def __init__( self, l2_norm_clip, noise_multiplier, scalars, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): query1 = gaussian_query.GaussianAverageQuery( l2_norm_clip[0], l2_norm_clip[0] * noise_multiplier[0], scalars[0]) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip[1], l2_norm_clip[1] * noise_multiplier[1], scalars[1]) query3 = gaussian_query.GaussianAverageQuery( l2_norm_clip[2], l2_norm_clip[2] * noise_multiplier[2], scalars[2]) query4 = gaussian_query.GaussianAverageQuery( l2_norm_clip[3], l2_norm_clip[3] * noise_multiplier[3], scalars[3]) query5 = gaussian_query.GaussianAverageQuery( l2_norm_clip[4], l2_norm_clip[4] * noise_multiplier[4], scalars[4]) query6 = gaussian_query.GaussianAverageQuery( l2_norm_clip[5], l2_norm_clip[5] * noise_multiplier[5], scalars[5]) query7 = gaussian_query.GaussianAverageQuery( l2_norm_clip[6], l2_norm_clip[6] * noise_multiplier[6], scalars[6]) query8 = gaussian_query.GaussianAverageQuery( l2_norm_clip[7], l2_norm_clip[7] * noise_multiplier[7], scalars[7]) query9 = gaussian_query.GaussianAverageQuery( l2_norm_clip[8], l2_norm_clip[8] * noise_multiplier[8], scalars[8]) query10 = gaussian_query.GaussianAverageQuery( l2_norm_clip[9], l2_norm_clip[9] * noise_multiplier[9], scalars[9]) query11 = gaussian_query.GaussianAverageQuery( l2_norm_clip[10], l2_norm_clip[10] * noise_multiplier[10], scalars[10]) query12 = gaussian_query.GaussianAverageQuery( l2_norm_clip[11], l2_norm_clip[11] * noise_multiplier[11], scalars[11]) dp_nested_query = nested_query.NestedQuery([ query1, query2, query3, query4, query5, query6, query7, query8, query9, query10, query11, query12 ]) #dp_nested_query = nested_query.NestedQuery([query1, query2, query3]) if ledger: dp_nested_query = privacy_ledger.QueryWithLedger( dp_nested_query, ledger=ledger) super(DPMultiGaussianOptimizerClass, self).__init__(dp_nested_query, num_microbatches, unroll_microbatches, *args, **kwargs)