Esempio n. 1
0
    def testBaseline(self, cls, num_microbatches, expected_answer):
        with tf.GradientTape(persistent=True) as gradient_tape:
            var0 = tf.Variable([1.0, 2.0])
            data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0],
                                 [-1.0, 0.0]])

            dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
            dp_sum_query = privacy_ledger.QueryWithLedger(
                dp_sum_query, 1e6, num_microbatches / 1e6)

            opt = cls(dp_sum_query,
                      num_microbatches=num_microbatches,
                      learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))

            # Expected gradient is sum of differences divided by number of
            # microbatches.
            grads_and_vars = opt.compute_gradients(
                lambda: self._loss_fn(var0, data0), [var0],
                gradient_tape=gradient_tape)
            self.assertAllCloseAccordingToType(expected_answer,
                                               grads_and_vars[0][0])
Esempio n. 2
0
    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        num_microbatches=None,
        ledger=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      self._l2_norm_clip = l2_norm_clip
      self._noise_multiplier = noise_multiplier
      self._num_microbatches = num_microbatches
      self._base_optimizer_class = cls

      dp_sum_query = gaussian_query.GaussianSumQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier)

      if ledger:
        dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
                                                      ledger=ledger)

      super(DPGaussianOptimizerClass, self).__init__(
          dp_sum_query,
          num_microbatches,
          unroll_microbatches,
          *args,
          **kwargs)
Esempio n. 3
0
  def testUnrollMicrobatches(self, cls):
    with self.cached_session() as sess:
      var0 = tf.Variable([1.0, 2.0])
      data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])

      num_microbatches = 4

      dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
      dp_sum_query = privacy_ledger.QueryWithLedger(
          dp_sum_query, 1e6, num_microbatches / 1e6)

      opt = cls(
          dp_sum_query,
          num_microbatches=num_microbatches,
          learning_rate=2.0,
          unroll_microbatches=True)

      self.evaluate(tf.compat.v1.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([1.0, 2.0], self.evaluate(var0))

      # Expected gradient is sum of differences divided by number of
      # microbatches.
      gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0])
      grads_and_vars = sess.run(gradient_op)
      self.assertAllCloseAccordingToType([-2.5, -2.5], grads_and_vars[0][0])
Esempio n. 4
0
    def linear_model_fn(features, labels, mode):
      preds = tf.keras.layers.Dense(
          1, activation='linear', name='dense').apply(features['x'])

      vector_loss = tf.math.squared_difference(labels, preds)
      scalar_loss = tf.reduce_mean(input_tensor=vector_loss)
      dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
      dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
      optimizer = dp_optimizer.DPGradientDescentOptimizer(
          dp_sum_query,
          num_microbatches=1,
          learning_rate=1.0)
      global_step = tf.compat.v1.train.get_global_step()
      train_op = optimizer.minimize(loss=vector_loss, global_step=global_step)
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=scalar_loss, train_op=train_op)
    def test_ledger(self):
        record1 = tf.constant([8.5])
        record2 = tf.constant([-7.25])

        population_size = tf.Variable(0)
        selection_probability = tf.Variable(1.0)

        query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
            initial_l2_norm_clip=10.0,
            noise_multiplier=1.0,
            target_unclipped_quantile=0.0,
            learning_rate=1.0,
            clipped_count_stddev=0.0,
            expected_num_records=2.0,
            geometric_update=False)

        query = privacy_ledger.QueryWithLedger(query, population_size,
                                               selection_probability)

        # First sample.
        tf.assign(population_size, 10)
        tf.assign(selection_probability, 0.1)
        _, global_state = test_utils.run_query(query, [record1, record2])

        expected_queries = [[10.0, 10.0], [0.5, 0.0]]
        formatted = query.ledger.get_formatted_ledger_eager()
        sample_1 = formatted[0]
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sample_1.queries, expected_queries)

        # Second sample.
        tf.assign(population_size, 20)
        tf.assign(selection_probability, 0.2)
        test_utils.run_query(query, [record1, record2], global_state)

        formatted = query.ledger.get_formatted_ledger_eager()
        sample_1, sample_2 = formatted
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sample_1.queries, expected_queries)

        expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]]
        self.assertAllClose(sample_2.population_size, 20.0)
        self.assertAllClose(sample_2.selection_probability, 0.2)
        self.assertAllClose(sample_2.queries, expected_queries_2)
Esempio n. 6
0
  def testClippingNorm(self, cls):
    with self.cached_session() as sess:
      var0 = tf.Variable([0.0, 0.0])
      data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])

      dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
      dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)

      opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

      self.evaluate(tf.compat.v1.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([0.0, 0.0], self.evaluate(var0))

      # Expected gradient is sum of differences.
      gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0])
      grads_and_vars = sess.run(gradient_op)
      self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
Esempio n. 7
0
    def test_nested_query(self):
        population_size = tf.Variable(0)
        selection_probability = tf.Variable(1.0)

        query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0,
                                                     sum_stddev=2.0,
                                                     denominator=5.0)
        query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0,
                                                     sum_stddev=1.0,
                                                     denominator=5.0)

        query = nested_query.NestedQuery([query1, query2])
        query = privacy_ledger.QueryWithLedger(query, population_size,
                                               selection_probability)

        record1 = [1.0, [12.0, 9.0]]
        record2 = [5.0, [1.0, 2.0]]

        # First sample.
        tf.compat.v1.assign(population_size, 10)
        tf.compat.v1.assign(selection_probability, 0.1)
        test_utils.run_query(query, [record1, record2])

        expected_queries = [[4.0, 2.0], [5.0, 1.0]]
        formatted = query.ledger.get_formatted_ledger_eager()
        sample_1 = formatted[0]
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))

        # Second sample.
        tf.compat.v1.assign(population_size, 20)
        tf.compat.v1.assign(selection_probability, 0.2)
        test_utils.run_query(query, [record1, record2])

        formatted = query.ledger.get_formatted_ledger_eager()
        sample_1, sample_2 = formatted
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))

        self.assertAllClose(sample_2.population_size, 20.0)
        self.assertAllClose(sample_2.selection_probability, 0.2)
        self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
Esempio n. 8
0
        def __init__(
                self,
                l2_norm_clip,
                exponents,
                noise_variance,
                num_microbatches=None,
                ledger=None,
                unroll_microbatches=False,
                *args,  # pylint: disable=keyword-arg-before-vararg
                **kwargs):
            dp_sum_query = dp_fixedvariance_query.GeneralSumQuery(
                l2_norm_clip, exponents, noise_variance)

            if ledger:
                dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
                                                              ledger=ledger)
            super(DP_fixedvarianceoptimizerClass,
                  self).__init__(dp_sum_query, num_microbatches,
                                 unroll_microbatches, *args, **kwargs)
Esempio n. 9
0
    def testClippingNorm(self, cls):
        with tf.GradientTape(persistent=True) as gradient_tape:
            var0 = tf.Variable([0.0, 0.0])
            data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])

            dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
            dp_sum_query = privacy_ledger.QueryWithLedger(
                dp_sum_query, 1e6, 1 / 1e6)

            opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([0.0, 0.0], self.evaluate(var0))

            # Expected gradient is sum of differences.
            grads_and_vars = opt.compute_gradients(
                lambda: self._loss_fn(var0, data0), [var0],
                gradient_tape=gradient_tape)
            self.assertAllCloseAccordingToType([-0.6, -0.8],
                                               grads_and_vars[0][0])
Esempio n. 10
0
        def __init__(
                self,
                l2_norm_clip,
                noise_multiplier,
                num_microbatches=None,
                ledger=None,
                unroll_microbatches=False,
                *args,  # pylint: disable=keyword-arg-before-vararg
                **kwargs):
            """Initializes the `DPGaussianOptimizerClass`.

      Args:
        l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
        noise_multiplier: Ratio of the standard deviation to the clipping norm.
        num_microbatches: Number of microbatches into which each minibatch is
          split. If `None`, will default to the size of the minibatch, and
          per-example gradients will be computed.
        ledger: Defaults to `None`. An instance of `tf_privacy.PrivacyLedger`.
        unroll_microbatches: If true, processes microbatches within a Python
          loop instead of a `tf.while_loop`. Can be used if using a
          `tf.while_loop` raises an exception.
        *args: These will be passed on to the base class `__init__` method.
        **kwargs: These will be passed on to the base class `__init__` method.
      """
            self._l2_norm_clip = l2_norm_clip
            self._noise_multiplier = noise_multiplier
            self._num_microbatches = num_microbatches
            self._base_optimizer_class = cls

            dp_sum_query = gaussian_query.GaussianSumQuery(
                l2_norm_clip, l2_norm_clip * noise_multiplier)

            if ledger:
                dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
                                                              ledger=ledger)

            super(DPGaussianOptimizerClass,
                  self).__init__(dp_sum_query, num_microbatches,
                                 unroll_microbatches, *args, **kwargs)
Esempio n. 11
0
  def testNoiseMultiplier(self, cls):
    with self.cached_session() as sess:
      var0 = tf.Variable([0.0])
      data0 = tf.Variable([[0.0]])

      dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
      dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)

      opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

      self.evaluate(tf.compat.v1.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([0.0], self.evaluate(var0))

      gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0])
      grads = []
      for _ in range(1000):
        grads_and_vars = sess.run(gradient_op)
        grads.append(grads_and_vars[0][0])

      # Test standard deviation is close to l2_norm_clip * noise_multiplier.
      self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
Esempio n. 12
0
  def test_sum_query(self):
    record1 = tf.constant([2.0, 0.0])
    record2 = tf.constant([-1.0, 1.0])

    population_size = tf.Variable(0)
    selection_probability = tf.Variable(1.0)

    query = gaussian_query.GaussianSumQuery(
        l2_norm_clip=10.0, stddev=0.0)
    query = privacy_ledger.QueryWithLedger(
        query, population_size, selection_probability)

    # First sample.
    tf.assign(population_size, 10)
    tf.assign(selection_probability, 0.1)
    test_utils.run_query(query, [record1, record2])

    expected_queries = [[10.0, 0.0]]
    formatted = query.ledger.get_formatted_ledger_eager()
    sample_1 = formatted[0]
    self.assertAllClose(sample_1.population_size, 10.0)
    self.assertAllClose(sample_1.selection_probability, 0.1)
    self.assertAllClose(sample_1.queries, expected_queries)

    # Second sample.
    tf.assign(population_size, 20)
    tf.assign(selection_probability, 0.2)
    test_utils.run_query(query, [record1, record2])

    formatted = query.ledger.get_formatted_ledger_eager()
    sample_1, sample_2 = formatted
    self.assertAllClose(sample_1.population_size, 10.0)
    self.assertAllClose(sample_1.selection_probability, 0.1)
    self.assertAllClose(sample_1.queries, expected_queries)

    self.assertAllClose(sample_2.population_size, 20.0)
    self.assertAllClose(sample_2.selection_probability, 0.2)
    self.assertAllClose(sample_2.queries, expected_queries)
Esempio n. 13
0
    def testNoiseMultiplier(self, cls):
        with tf.GradientTape(persistent=True) as gradient_tape:
            var0 = tf.Variable([0.0])
            data0 = tf.Variable([[0.0]])

            dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
            dp_sum_query = privacy_ledger.QueryWithLedger(
                dp_sum_query, 1e6, 1 / 1e6)

            opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([0.0], self.evaluate(var0))

            grads = []
            for _ in range(1000):
                grads_and_vars = opt.compute_gradients(
                    lambda: self._loss_fn(var0, data0), [var0],
                    gradient_tape=gradient_tape)
                grads.append(grads_and_vars[0][0])

            # Test standard deviation is close to l2_norm_clip * noise_multiplier.
            self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
Esempio n. 14
0
        def __init__(
                self,
                l2_norm_clip,
                noise_multiplier,
                scalars,
                num_microbatches=None,
                ledger=None,
                unroll_microbatches=False,
                *args,  # pylint: disable=keyword-arg-before-vararg
                **kwargs):

            query1 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[0], l2_norm_clip[0] * noise_multiplier[0],
                scalars[0])
            query2 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[1], l2_norm_clip[1] * noise_multiplier[1],
                scalars[1])
            query3 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[2], l2_norm_clip[2] * noise_multiplier[2],
                scalars[2])
            query4 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[3], l2_norm_clip[3] * noise_multiplier[3],
                scalars[3])
            query5 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[4], l2_norm_clip[4] * noise_multiplier[4],
                scalars[4])
            query6 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[5], l2_norm_clip[5] * noise_multiplier[5],
                scalars[5])
            query7 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[6], l2_norm_clip[6] * noise_multiplier[6],
                scalars[6])
            query8 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[7], l2_norm_clip[7] * noise_multiplier[7],
                scalars[7])
            query9 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[8], l2_norm_clip[8] * noise_multiplier[8],
                scalars[8])
            query10 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[9], l2_norm_clip[9] * noise_multiplier[9],
                scalars[9])
            query11 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[10], l2_norm_clip[10] * noise_multiplier[10],
                scalars[10])
            query12 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[11], l2_norm_clip[11] * noise_multiplier[11],
                scalars[11])

            dp_nested_query = nested_query.NestedQuery([
                query1, query2, query3, query4, query5, query6, query7, query8,
                query9, query10, query11, query12
            ])

            #dp_nested_query = nested_query.NestedQuery([query1, query2, query3])

            if ledger:
                dp_nested_query = privacy_ledger.QueryWithLedger(
                    dp_nested_query, ledger=ledger)

            super(DPMultiGaussianOptimizerClass,
                  self).__init__(dp_nested_query, num_microbatches,
                                 unroll_microbatches, *args, **kwargs)