Beispiel #1
0
    def testBaseline(self, cls, num_microbatches, expected_answer):
        with self.cached_session() as sess:
            var0 = tf.Variable([1.0, 2.0])
            data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0],
                                 [-1.0, 0.0]])

            dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
            dp_sum_query = privacy_ledger.QueryWithLedger(
                dp_sum_query, 1e6, num_microbatches / 1e6)

            opt = cls(dp_sum_query,
                      num_microbatches=num_microbatches,
                      learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))

            # Expected gradient is sum of differences divided by number of
            # microbatches.
            gradient_op = opt.compute_gradients(self._loss(data0, var0),
                                                [var0])
            grads_and_vars = sess.run(gradient_op)
            self.assertAllCloseAccordingToType(expected_answer,
                                               grads_and_vars[0][0])
Beispiel #2
0
    def test_gaussian_sum_merge(self):
        records1 = [tf.constant([2.0, 0.0]), tf.constant([-1.0, 1.0])]
        records2 = [tf.constant([3.0, 5.0]), tf.constant([-1.0, 4.0])]

        def get_sample_state(records):
            query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0,
                                                    stddev=1.0)
            global_state = query.initial_global_state()
            params = query.derive_sample_params(global_state)
            sample_state = query.initial_sample_state(records[0])
            for record in records:
                sample_state = query.accumulate_record(params, sample_state,
                                                       record)
            return sample_state

        sample_state_1 = get_sample_state(records1)
        sample_state_2 = get_sample_state(records2)

        merged = gaussian_query.GaussianSumQuery(
            10.0, 1.0).merge_sample_states(sample_state_1, sample_state_2)

        with self.cached_session() as sess:
            result = sess.run(merged)

        expected = [3.0, 10.0]
        self.assertAllClose(result, expected)
Beispiel #3
0
 def get_sample_state(records):
   query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=1.0)
   global_state = query.initial_global_state()
   params = query.derive_sample_params(global_state)
   sample_state = query.initial_sample_state(global_state, records[0])
   for record in records:
     sample_state = query.accumulate_record(params, sample_state, record)
   return sample_state
Beispiel #4
0
  def test_complex_nested_query(self):
    with self.cached_session() as sess:
      query_ab = gaussian_query.GaussianSumQuery(
          l2_norm_clip=1.0, stddev=0.0)
      query_c = gaussian_query.GaussianAverageQuery(
          l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0)
      query_d = gaussian_query.GaussianSumQuery(
          l2_norm_clip=10.0, stddev=0.0)

      query = nested_query.NestedQuery(
          [query_ab, {'c': query_c, 'd': [query_d]}])

      record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}]
      record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}]

      query_result, _ = test_utils.run_query(query, [record1, record2])
      result = sess.run(query_result)
      expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}]
      self.assertAllClose(result, expected)
    def test_gaussian_sum_no_clip_no_noise(self):
        with self.cached_session() as sess:
            record1 = tf.constant([2.0, 0.0])
            record2 = tf.constant([-1.0, 1.0])

            query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0,
                                                    stddev=0.0)
            query_result, _ = test_utils.run_query(query, [record1, record2])
            result = sess.run(query_result)
            expected = [1.0, 1.0]
            self.assertAllClose(result, expected)
    def __init__(self,
                 initial_l2_norm_clip,
                 noise_multiplier,
                 target_unclipped_quantile,
                 learning_rate,
                 clipped_count_stddev,
                 expected_num_records,
                 ledger=None):
        """Initializes the QuantileAdaptiveClipSumQuery.

    Args:
      initial_l2_norm_clip: The initial value of clipping norm.
      noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of
        the noise added to the output of the sum query.
      target_unclipped_quantile: The desired quantile of updates which should be
        unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be
        found for which approximately 20% of updates are clipped each round.
      learning_rate: The learning rate for the clipping norm adaptation. A
        rate of r means that the clipping norm will change by a maximum of r at
        each step. This maximum is attained when |clip - target| is 1.0. Can be
        a tf.Variable for example to implement a learning rate schedule.
      clipped_count_stddev: The stddev of the noise added to the clipped_count.
        Since the sensitivity of the clipped count is 0.5, as a rule of thumb it
        should be about 0.5 for reasonable privacy.
      expected_num_records: The expected number of records per round, used to
        estimate the clipped count quantile.
      ledger: The privacy ledger to which queries should be recorded.
    """
        self._initial_l2_norm_clip = tf.cast(initial_l2_norm_clip, tf.float32)
        self._noise_multiplier = tf.cast(noise_multiplier, tf.float32)
        self._target_unclipped_quantile = tf.cast(target_unclipped_quantile,
                                                  tf.float32)
        self._learning_rate = tf.cast(learning_rate, tf.float32)

        self._l2_norm_clip = tf.Variable(self._initial_l2_norm_clip)
        self._sum_stddev = tf.Variable(self._initial_l2_norm_clip *
                                       self._noise_multiplier)
        self._sum_query = gaussian_query.GaussianSumQuery(
            self._l2_norm_clip, self._sum_stddev, ledger)

        # self._clipped_fraction_query is a DPQuery used to estimate the fraction of
        # records that are clipped. It accumulates an indicator 0/1 of whether each
        # record is clipped, and normalizes by the expected number of records. In
        # practice, we accumulate clipped counts shifted by -0.5 so they are
        # centered at zero. This makes the sensitivity of the clipped count query
        # 0.5 instead of 1.0, since the maximum that a single record could affect
        # the count is 0.5. Note that although the l2_norm_clip of the clipped
        # fraction query is 0.5, no clipping will ever actually occur because the
        # value of each record is always +/-0.5.
        self._clipped_fraction_query = gaussian_query.GaussianAverageQuery(
            l2_norm_clip=0.5,
            sum_stddev=clipped_count_stddev,
            denominator=expected_num_records,
            ledger=ledger)
    def test_gaussian_sum_with_clip_no_noise(self):
        with self.cached_session() as sess:
            record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
            record2 = tf.constant([4.0, -3.0])  # Not clipped.

            query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0,
                                                    stddev=0.0)
            query_result, _ = test_utils.run_query(query, [record1, record2])
            result = sess.run(query_result)
            expected = [1.0, 1.0]
            self.assertAllClose(result, expected)
Beispiel #8
0
    def test_normalization(self):
        with self.cached_session() as sess:
            record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
            record2 = tf.constant([4.0, -3.0])  # Not clipped.

            sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0,
                                                        stddev=0.0)
            query = normalized_query.NormalizedQuery(numerator_query=sum_query,
                                                     denominator=2.0)

            query_result, _ = test_utils.run_query(query, [record1, record2])
            result = sess.run(query_result)
            expected = [0.5, 0.5]
            self.assertAllClose(result, expected)
    def test_gaussian_sum_with_noise(self):
        with self.cached_session() as sess:
            record1, record2 = 2.71828, 3.14159
            stddev = 1.0

            query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0,
                                                    stddev=stddev)
            query_result, _ = test_utils.run_query(query, [record1, record2])

            noised_sums = []
            for _ in xrange(1000):
                noised_sums.append(sess.run(query_result))

            result_stddev = np.std(noised_sums)
            self.assertNear(result_stddev, stddev, 0.1)
Beispiel #10
0
        def linear_model_fn(features, labels, mode):
            preds = tf.keras.layers.Dense(1, activation='linear',
                                          name='dense').apply(features['x'])

            vector_loss = tf.squared_difference(labels, preds)
            scalar_loss = tf.reduce_mean(vector_loss)
            dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
            dp_sum_query = privacy_ledger.QueryWithLedger(
                dp_sum_query, 1e6, 1 / 1e6)
            optimizer = dp_optimizer.DPGradientDescentOptimizer(
                dp_sum_query, num_microbatches=1, learning_rate=1.0)
            global_step = tf.train.get_global_step()
            train_op = optimizer.minimize(loss=vector_loss,
                                          global_step=global_step)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=scalar_loss,
                                              train_op=train_op)
Beispiel #11
0
        def __init__(self,
                     l2_norm_clip,
                     noise_multiplier,
                     num_microbatches=None,
                     ledger=None,
                     unroll_microbatches=False,
                     *args,
                     **kwargs):
            dp_sum_query = gaussian_query.GaussianSumQuery(
                l2_norm_clip, l2_norm_clip * noise_multiplier)

            if ledger:
                dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
                                                              ledger=ledger)

            super(DPGaussianOptimizerClass,
                  self).__init__(dp_sum_query, num_microbatches,
                                 unroll_microbatches, *args, **kwargs)
  def testClippingNorm(self, cls):
    with tf.GradientTape(persistent=True) as gradient_tape:
      var0 = tf.Variable([0.0, 0.0])
      data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])

      dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
      dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)

      opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

      self.evaluate(tf.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([0.0, 0.0], self.evaluate(var0))

      # Expected gradient is sum of differences.
      grads_and_vars = opt.compute_gradients(
          lambda: self._loss_fn(var0, data0), [var0],
          gradient_tape=gradient_tape)
      self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
Beispiel #13
0
  def test_gaussian_sum_with_changing_clip_no_noise(self):
    with self.cached_session() as sess:
      record1 = tf.constant([-6.0, 8.0])  # Clipped to [-3.0, 4.0].
      record2 = tf.constant([4.0, -3.0])  # Not clipped.

      l2_norm_clip = tf.Variable(5.0)
      l2_norm_clip_placeholder = tf.placeholder(tf.float32)
      assign_l2_norm_clip = tf.assign(l2_norm_clip, l2_norm_clip_placeholder)
      query = gaussian_query.GaussianSumQuery(
          l2_norm_clip=l2_norm_clip, stddev=0.0)
      query_result, _ = test_utils.run_query(query, [record1, record2])

      self.evaluate(tf.global_variables_initializer())
      result = sess.run(query_result)
      expected = [1.0, 1.0]
      self.assertAllClose(result, expected)

      sess.run(assign_l2_norm_clip, {l2_norm_clip_placeholder: 0.0})
      result = sess.run(query_result)
      expected = [0.0, 0.0]
      self.assertAllClose(result, expected)
Beispiel #14
0
    def testClippingNorm(self, cls):
        with self.cached_session() as sess:
            var0 = tf.Variable([0.0, 0.0])
            data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])

            dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
            dp_sum_query = privacy_ledger.QueryWithLedger(
                dp_sum_query, 1e6, 1 / 1e6)

            opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([0.0, 0.0], self.evaluate(var0))

            # Expected gradient is sum of differences.
            gradient_op = opt.compute_gradients(self._loss(data0, var0),
                                                [var0])
            grads_and_vars = sess.run(gradient_op)
            self.assertAllCloseAccordingToType([-0.6, -0.8],
                                               grads_and_vars[0][0])
  def test_sum_query(self):
    record1 = tf.constant([2.0, 0.0])
    record2 = tf.constant([-1.0, 1.0])

    population_size = tf.Variable(0)
    selection_probability = tf.Variable(0.0)
    ledger = privacy_ledger.PrivacyLedger(
        population_size, selection_probability, 50, 50)

    query = gaussian_query.GaussianSumQuery(
        l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
    query = privacy_ledger.QueryWithLedger(query, ledger)

    # First sample.
    tf.assign(population_size, 10)
    tf.assign(selection_probability, 0.1)
    test_utils.run_query(query, [record1, record2])

    expected_queries = [[10.0, 0.0]]
    formatted = ledger.get_formatted_ledger_eager()
    sample_1 = formatted[0]
    self.assertAllClose(sample_1.population_size, 10.0)
    self.assertAllClose(sample_1.selection_probability, 0.1)
    self.assertAllClose(sample_1.queries, expected_queries)

    # Second sample.
    tf.assign(population_size, 20)
    tf.assign(selection_probability, 0.2)
    test_utils.run_query(query, [record1, record2])

    formatted = ledger.get_formatted_ledger_eager()
    sample_1, sample_2 = formatted
    self.assertAllClose(sample_1.population_size, 10.0)
    self.assertAllClose(sample_1.selection_probability, 0.1)
    self.assertAllClose(sample_1.queries, expected_queries)

    self.assertAllClose(sample_2.population_size, 20.0)
    self.assertAllClose(sample_2.selection_probability, 0.2)
    self.assertAllClose(sample_2.queries, expected_queries)
  def testNoiseMultiplier(self, cls):
    with tf.GradientTape(persistent=True) as gradient_tape:
      var0 = tf.Variable([0.0])
      data0 = tf.Variable([[0.0]])

      dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
      dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)

      opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

      self.evaluate(tf.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([0.0], self.evaluate(var0))

      grads = []
      for _ in range(1000):
        grads_and_vars = opt.compute_gradients(
            lambda: self._loss_fn(var0, data0), [var0],
            gradient_tape=gradient_tape)
        grads.append(grads_and_vars[0][0])

      # Test standard deviation is close to l2_norm_clip * noise_multiplier.
      self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
Beispiel #17
0
  def test_nested_query_with_noise(self):
    with self.cached_session() as sess:
      sum_stddev = 2.71828
      denominator = 3.14159

      query1 = gaussian_query.GaussianSumQuery(
          l2_norm_clip=1.5, stddev=sum_stddev)
      query2 = gaussian_query.GaussianAverageQuery(
          l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator)
      query = nested_query.NestedQuery((query1, query2))

      record1 = (3.0, [2.0, 1.5])
      record2 = (0.0, [-1.0, -3.5])

      query_result, _ = test_utils.run_query(query, [record1, record2])

      noised_averages = []
      for _ in range(1000):
        noised_averages.append(nest.flatten(sess.run(query_result)))

      result_stddev = np.std(noised_averages, 0)
      avg_stddev = sum_stddev / denominator
      expected_stddev = [sum_stddev, avg_stddev, avg_stddev]
      self.assertArrayNear(result_stddev, expected_stddev, 0.1)
Beispiel #18
0
from absl.testing import parameterized
from distutils.version import LooseVersion
import numpy as np
import tensorflow as tf

from privacy.dp_query import gaussian_query
from privacy.dp_query import nested_query
from privacy.dp_query import test_utils

if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
  nest = tf.contrib.framework.nest
else:
  nest = tf.nest

_basic_query = gaussian_query.GaussianSumQuery(1.0, 0.0)


class NestedQueryTest(tf.test.TestCase, parameterized.TestCase):

  def test_nested_gaussian_sum_no_clip_no_noise(self):
    with self.cached_session() as sess:
      query1 = gaussian_query.GaussianSumQuery(
          l2_norm_clip=10.0, stddev=0.0)
      query2 = gaussian_query.GaussianSumQuery(
          l2_norm_clip=10.0, stddev=0.0)

      query = nested_query.NestedQuery([query1, query2])

      record1 = [1.0, [2.0, 3.0]]
      record2 = [4.0, [3.0, 2.0]]
 def test_incompatible_records(self, record1, record2, error_type):
     query = gaussian_query.GaussianSumQuery(1.0, 0.0)
     with self.assertRaises(error_type):
         test_utils.run_query(query, [record1, record2])