def test_target_zero_geometric(self): record1 = tf.constant(5.0) record2 = tf.constant(2.5) query = quantile_estimator_query.QuantileEstimatorQuery( initial_estimate=16.0, target_quantile=0.0, learning_rate=np.log(2.0), # Geometric steps in powers of 2. below_estimate_stddev=0.0, expected_num_records=2.0, geometric_update=True) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 16.0) # For two iterations, both records are below, so the estimate is halved. # Then only one record is below, so the estimate goes down by only sqrt(2.0) # to 4 / sqrt(2.0). Still only one record is below, so it reduces to 2.0. # Now no records are below, and the estimate norm stays there (at 2.0). four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828 expected_estimates = [8.0, 4.0, four_div_root_two, 2.0, 2.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_target_one(self): record1 = tf.constant(1.5) record2 = tf.constant(2.75) query = quantile_estimator_query.QuantileEstimatorQuery( initial_estimate=0.0, target_quantile=1.0, learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=2.0, geometric_update=False) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 0.0) # On the first two iterations, both are above, so the estimate goes up # by 1.0 (the learning rate). When it reaches 2.0, only one record is # above, so the estimate goes up by only 0.5. After two more iterations, # both records are below, and the estimate stays there (at 3.0). expected_estimates = [1.0, 2.0, 2.5, 3.0, 3.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_target_zero(self): record1 = tf.constant(8.5) record2 = tf.constant(7.25) query = quantile_estimator_query.QuantileEstimatorQuery( initial_estimate=10.0, target_quantile=0.0, learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=2.0, geometric_update=False) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 10.0) # On the first two iterations, both records are below, so the estimate goes # down by 1.0 (the learning rate). When the estimate reaches 8.0, only one # record is below, so the estimate goes down by only 0.5. After two more # iterations, both records are below, and the estimate stays there (at 7.0). expected_estimates = [9.0, 8.0, 7.5, 7.0, 7.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_all_equal(self, start_low, geometric): # 20 equal records. Test that we converge to that record and bounce around # it. Unlike the linspace test, the quantile-matching objective is very # sharp at the optimum so a decaying learning rate is necessary. num_records = 20 records = [tf.constant(5.0)] * num_records learning_rate = tf.Variable(1.0) query = quantile_estimator_query.QuantileEstimatorQuery( initial_estimate=(1.0 if start_low else 10.0), target_quantile=0.5, learning_rate=learning_rate, below_estimate_stddev=0.0, expected_num_records=num_records, geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): tf.assign(learning_rate, 1.0 / np.sqrt(t + 1)) _, global_state = test_utils.run_query(query, records, global_state) actual_estimate = global_state.current_estimate if t > 40: self.assertNear(actual_estimate, 5.0, 0.5)
def test_target_one_geometric(self): record1 = tf.constant(1.5) record2 = tf.constant(3.0) query = quantile_estimator_query.QuantileEstimatorQuery( initial_estimate=0.5, target_quantile=1.0, learning_rate=np.log(2.0), # Geometric steps in powers of 2. below_estimate_stddev=0.0, expected_num_records=2.0, geometric_update=True) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 0.5) # On the first two iterations, both are above, so the estimate is doubled. # When the estimate reaches 2.0, only one record is above, so the estimate # is multiplied by sqrt(2.0). Still only one is above so it increases to # 4.0. Now both records are above, and the estimate stays there (at 4.0). two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828 expected_estimates = [1.0, 2.0, two_times_root_two, 4.0, 4.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def _make_quantile_estimator_query(initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update): if expected_num_records is not None: return quantile_estimator_query.QuantileEstimatorQuery( initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update) else: return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery( initial_estimate, target_quantile, learning_rate, geometric_update)
def __init__( self, initial_l2_norm_clip, noise_multiplier, target_unclipped_quantile, learning_rate, clipped_count_stddev, expected_num_records, geometric_update=True): """Initializes the QuantileAdaptiveClipSumQuery. Args: initial_l2_norm_clip: The initial value of clipping norm. noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of the noise added to the output of the sum query. target_unclipped_quantile: The desired quantile of updates which should be unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be found for which approximately 20% of updates are clipped each round. learning_rate: The learning rate for the clipping norm adaptation. A rate of r means that the clipping norm will change by a maximum of r at each step. This maximum is attained when |clip - target| is 1.0. clipped_count_stddev: The stddev of the noise added to the clipped_count. Since the sensitivity of the clipped count is 0.5, as a rule of thumb it should be about 0.5 for reasonable privacy. expected_num_records: The expected number of records per round, used to estimate the clipped count quantile. geometric_update: If True, use geometric updating of clip. """ self._noise_multiplier = noise_multiplier self._quantile_estimator_query = quantile_estimator_query.QuantileEstimatorQuery( initial_l2_norm_clip, target_unclipped_quantile, learning_rate, clipped_count_stddev, expected_num_records, geometric_update) self._sum_query = gaussian_query.GaussianSumQuery( initial_l2_norm_clip, noise_multiplier * initial_l2_norm_clip) assert isinstance(self._sum_query, dp_query.SumAggregationDPQuery) assert isinstance(self._quantile_estimator_query, dp_query.SumAggregationDPQuery)
def __init__(self, initial_l2_norm_clip, noise_multiplier, target_unclipped_quantile, learning_rate, clipped_count_stddev, expected_num_records, geometric_update=True): """Initializes the QuantileAdaptiveClipSumQuery. Args: initial_l2_norm_clip: The initial value of clipping norm. noise_multiplier: The stddev of the noise added to the output will be this times the current value of the clipping norm. target_unclipped_quantile: The desired quantile of updates which should be unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be found for which approximately 20% of updates are clipped each round. Andrew et al. recommends that this be set to 0.5 to clip to the median. learning_rate: The learning rate for the clipping norm adaptation. With geometric updating, a rate of r means that the clipping norm will change by a maximum factor of exp(r) at each round. This maximum is attained when |actual_unclipped_fraction - target_unclipped_quantile| is 1.0. Andrew et al. recommends that this be set to 0.2 for geometric updating. clipped_count_stddev: The stddev of the noise added to the clipped_count. Andrew et al. recommends that this be set to `expected_num_records / 20` for reasonably fast adaptation and high privacy. expected_num_records: The expected number of records per round, used to estimate the clipped count quantile. geometric_update: If `True`, use geometric updating of clip (recommended). """ self._noise_multiplier = noise_multiplier self._quantile_estimator_query = quantile_estimator_query.QuantileEstimatorQuery( initial_l2_norm_clip, target_unclipped_quantile, learning_rate, clipped_count_stddev, expected_num_records, geometric_update) self._sum_query = gaussian_query.GaussianSumQuery( initial_l2_norm_clip, noise_multiplier * initial_l2_norm_clip) assert isinstance(self._sum_query, dp_query.SumAggregationDPQuery) assert isinstance(self._quantile_estimator_query, dp_query.SumAggregationDPQuery)
def _make_quantile_estimator_query(initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update, tree_aggregation=False): if expected_num_records is not None: if tree_aggregation: return quantile_estimator_query.TreeQuantileEstimatorQuery( initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update) else: return quantile_estimator_query.QuantileEstimatorQuery( initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update) else: if tree_aggregation: raise ValueError( 'Cannot set expected_num_records to None for tree aggregation.' ) return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery( initial_estimate, target_quantile, learning_rate, geometric_update)
def test_linspace(self, start_low, geometric): # 100 records equally spaced from 0 to 10 in 0.1 increments. # Test that we converge to the correct median value and bounce around it. num_records = 21 records = [tf.constant(x) for x in np.linspace( 0.0, 10.0, num=num_records, dtype=np.float32)] query = quantile_estimator_query.QuantileEstimatorQuery( initial_estimate=(1.0 if start_low else 10.0), target_quantile=0.5, learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=num_records, geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): _, global_state = test_utils.run_query(query, records, global_state) actual_estimate = global_state.current_estimate if t > 40: self.assertNear(actual_estimate, 5.0, 0.25)