Beispiel #1
0
  def test_target_zero_geometric(self):
    record1 = tf.constant(5.0)
    record2 = tf.constant(2.5)

    query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate=16.0,
        target_quantile=0.0,
        learning_rate=np.log(2.0),        # Geometric steps in powers of 2.
        below_estimate_stddev=0.0,
        expected_num_records=2.0,
        geometric_update=True)

    global_state = query.initial_global_state()

    initial_estimate = global_state.current_estimate
    self.assertAllClose(initial_estimate, 16.0)

    # For two iterations, both records are below, so the estimate is halved.
    # Then only one record is below, so the estimate goes down by only sqrt(2.0)
    # to 4 / sqrt(2.0). Still only one record is below, so it reduces to 2.0.
    # Now no records are below, and the estimate norm stays there (at 2.0).

    four_div_root_two = 4 / np.sqrt(2.0)   # approx 2.828

    expected_estimates = [8.0, 4.0, four_div_root_two, 2.0, 2.0]
    for expected_estimate in expected_estimates:
      actual_estimate, global_state = test_utils.run_query(
          query, [record1, record2], global_state)

      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
Beispiel #2
0
  def test_target_one(self):
    record1 = tf.constant(1.5)
    record2 = tf.constant(2.75)

    query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate=0.0,
        target_quantile=1.0,
        learning_rate=1.0,
        below_estimate_stddev=0.0,
        expected_num_records=2.0,
        geometric_update=False)

    global_state = query.initial_global_state()

    initial_estimate = global_state.current_estimate
    self.assertAllClose(initial_estimate, 0.0)

    # On the first two iterations, both are above, so the estimate goes up
    # by 1.0 (the learning rate). When it reaches 2.0, only one record is
    # above, so the estimate goes up by only 0.5. After two more iterations,
    # both records are below, and the estimate stays there (at 3.0).

    expected_estimates = [1.0, 2.0, 2.5, 3.0, 3.0]
    for expected_estimate in expected_estimates:
      actual_estimate, global_state = test_utils.run_query(
          query, [record1, record2], global_state)

      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
Beispiel #3
0
  def test_target_zero(self):
    record1 = tf.constant(8.5)
    record2 = tf.constant(7.25)

    query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate=10.0,
        target_quantile=0.0,
        learning_rate=1.0,
        below_estimate_stddev=0.0,
        expected_num_records=2.0,
        geometric_update=False)

    global_state = query.initial_global_state()

    initial_estimate = global_state.current_estimate
    self.assertAllClose(initial_estimate, 10.0)

    # On the first two iterations, both records are below, so the estimate goes
    # down by 1.0 (the learning rate). When the estimate reaches 8.0, only one
    # record is below, so the estimate goes down by only 0.5. After two more
    # iterations, both records are below, and the estimate stays there (at 7.0).

    expected_estimates = [9.0, 8.0, 7.5, 7.0, 7.0]
    for expected_estimate in expected_estimates:
      actual_estimate, global_state = test_utils.run_query(
          query, [record1, record2], global_state)

      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
Beispiel #4
0
  def test_all_equal(self, start_low, geometric):
    # 20 equal records. Test that we converge to that record and bounce around
    # it. Unlike the linspace test, the quantile-matching objective is very
    # sharp at the optimum so a decaying learning rate is necessary.
    num_records = 20
    records = [tf.constant(5.0)] * num_records

    learning_rate = tf.Variable(1.0)

    query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate=(1.0 if start_low else 10.0),
        target_quantile=0.5,
        learning_rate=learning_rate,
        below_estimate_stddev=0.0,
        expected_num_records=num_records,
        geometric_update=geometric)

    global_state = query.initial_global_state()

    for t in range(50):
      tf.assign(learning_rate, 1.0 / np.sqrt(t + 1))
      _, global_state = test_utils.run_query(query, records, global_state)

      actual_estimate = global_state.current_estimate

      if t > 40:
        self.assertNear(actual_estimate, 5.0, 0.5)
Beispiel #5
0
  def test_target_one_geometric(self):
    record1 = tf.constant(1.5)
    record2 = tf.constant(3.0)

    query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate=0.5,
        target_quantile=1.0,
        learning_rate=np.log(2.0),        # Geometric steps in powers of 2.
        below_estimate_stddev=0.0,
        expected_num_records=2.0,
        geometric_update=True)

    global_state = query.initial_global_state()

    initial_estimate = global_state.current_estimate
    self.assertAllClose(initial_estimate, 0.5)

    # On the first two iterations, both are above, so the estimate is doubled.
    # When the estimate reaches 2.0, only one record is above, so the estimate
    # is multiplied by sqrt(2.0). Still only one is above so it increases to
    # 4.0. Now both records are above, and the estimate stays there (at 4.0).

    two_times_root_two = 2 * np.sqrt(2.0)   # approx 2.828

    expected_estimates = [1.0, 2.0, two_times_root_two, 4.0, 4.0]
    for expected_estimate in expected_estimates:
      actual_estimate, global_state = test_utils.run_query(
          query, [record1, record2], global_state)

      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def _make_quantile_estimator_query(initial_estimate, target_quantile,
                                   learning_rate, below_estimate_stddev,
                                   expected_num_records, geometric_update):
    if expected_num_records is not None:
        return quantile_estimator_query.QuantileEstimatorQuery(
            initial_estimate, target_quantile, learning_rate,
            below_estimate_stddev, expected_num_records, geometric_update)
    else:
        return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
            initial_estimate, target_quantile, learning_rate, geometric_update)
Beispiel #7
0
  def __init__(
      self,
      initial_l2_norm_clip,
      noise_multiplier,
      target_unclipped_quantile,
      learning_rate,
      clipped_count_stddev,
      expected_num_records,
      geometric_update=True):
    """Initializes the QuantileAdaptiveClipSumQuery.

    Args:
      initial_l2_norm_clip: The initial value of clipping norm.
      noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of
        the noise added to the output of the sum query.
      target_unclipped_quantile: The desired quantile of updates which should be
        unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be
        found for which approximately 20% of updates are clipped each round.
      learning_rate: The learning rate for the clipping norm adaptation. A
        rate of r means that the clipping norm will change by a maximum of r at
        each step. This maximum is attained when |clip - target| is 1.0.
      clipped_count_stddev: The stddev of the noise added to the clipped_count.
        Since the sensitivity of the clipped count is 0.5, as a rule of thumb it
        should be about 0.5 for reasonable privacy.
      expected_num_records: The expected number of records per round, used to
        estimate the clipped count quantile.
      geometric_update: If True, use geometric updating of clip.
    """
    self._noise_multiplier = noise_multiplier

    self._quantile_estimator_query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_l2_norm_clip,
        target_unclipped_quantile,
        learning_rate,
        clipped_count_stddev,
        expected_num_records,
        geometric_update)

    self._sum_query = gaussian_query.GaussianSumQuery(
        initial_l2_norm_clip,
        noise_multiplier * initial_l2_norm_clip)

    assert isinstance(self._sum_query, dp_query.SumAggregationDPQuery)
    assert isinstance(self._quantile_estimator_query,
                      dp_query.SumAggregationDPQuery)
Beispiel #8
0
    def __init__(self,
                 initial_l2_norm_clip,
                 noise_multiplier,
                 target_unclipped_quantile,
                 learning_rate,
                 clipped_count_stddev,
                 expected_num_records,
                 geometric_update=True):
        """Initializes the QuantileAdaptiveClipSumQuery.

    Args:
      initial_l2_norm_clip: The initial value of clipping norm.
      noise_multiplier: The stddev of the noise added to the output will be this
        times the current value of the clipping norm.
      target_unclipped_quantile: The desired quantile of updates which should be
        unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be
        found for which approximately 20% of updates are clipped each round.
        Andrew et al. recommends that this be set to 0.5 to clip to the median.
      learning_rate: The learning rate for the clipping norm adaptation. With
        geometric updating, a rate of r means that the clipping norm will change
        by a maximum factor of exp(r) at each round. This maximum is attained
        when |actual_unclipped_fraction - target_unclipped_quantile| is 1.0.
        Andrew et al. recommends that this be set to 0.2 for geometric updating.
      clipped_count_stddev: The stddev of the noise added to the clipped_count.
        Andrew et al. recommends that this be set to `expected_num_records / 20`
        for reasonably fast adaptation and high privacy.
      expected_num_records: The expected number of records per round, used to
        estimate the clipped count quantile.
      geometric_update: If `True`, use geometric updating of clip (recommended).
    """
        self._noise_multiplier = noise_multiplier

        self._quantile_estimator_query = quantile_estimator_query.QuantileEstimatorQuery(
            initial_l2_norm_clip, target_unclipped_quantile, learning_rate,
            clipped_count_stddev, expected_num_records, geometric_update)

        self._sum_query = gaussian_query.GaussianSumQuery(
            initial_l2_norm_clip, noise_multiplier * initial_l2_norm_clip)

        assert isinstance(self._sum_query, dp_query.SumAggregationDPQuery)
        assert isinstance(self._quantile_estimator_query,
                          dp_query.SumAggregationDPQuery)
def _make_quantile_estimator_query(initial_estimate,
                                   target_quantile,
                                   learning_rate,
                                   below_estimate_stddev,
                                   expected_num_records,
                                   geometric_update,
                                   tree_aggregation=False):
    if expected_num_records is not None:
        if tree_aggregation:
            return quantile_estimator_query.TreeQuantileEstimatorQuery(
                initial_estimate, target_quantile, learning_rate,
                below_estimate_stddev, expected_num_records, geometric_update)
        else:
            return quantile_estimator_query.QuantileEstimatorQuery(
                initial_estimate, target_quantile, learning_rate,
                below_estimate_stddev, expected_num_records, geometric_update)
    else:
        if tree_aggregation:
            raise ValueError(
                'Cannot set expected_num_records to None for tree aggregation.'
            )
        return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
            initial_estimate, target_quantile, learning_rate, geometric_update)
Beispiel #10
0
  def test_linspace(self, start_low, geometric):
    # 100 records equally spaced from 0 to 10 in 0.1 increments.
    # Test that we converge to the correct median value and bounce around it.
    num_records = 21
    records = [tf.constant(x) for x in np.linspace(
        0.0, 10.0, num=num_records, dtype=np.float32)]

    query = quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate=(1.0 if start_low else 10.0),
        target_quantile=0.5,
        learning_rate=1.0,
        below_estimate_stddev=0.0,
        expected_num_records=num_records,
        geometric_update=geometric)

    global_state = query.initial_global_state()

    for t in range(50):
      _, global_state = test_utils.run_query(query, records, global_state)

      actual_estimate = global_state.current_estimate

      if t > 40:
        self.assertNear(actual_estimate, 5.0, 0.25)