예제 #1
0
    def test_check_composition(self):
        orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12.,
                  14., 16., 20., 24., 28., 32., 64., 256.)

        rdp = rdp_accountant.compute_rdp(q=1e-4,
                                         noise_multiplier=.4,
                                         steps=40000,
                                         orders=orders)

        eps, _, _ = rdp_accountant.get_privacy_spent(orders,
                                                     rdp,
                                                     target_delta=1e-6)

        rdp += rdp_accountant.compute_rdp(q=0.1,
                                          noise_multiplier=2,
                                          steps=100,
                                          orders=orders)
        eps, _, _ = rdp_accountant.get_privacy_spent(orders,
                                                     rdp,
                                                     target_delta=1e-5)
        # These tests use the old RDP -> approx DP conversion
        # self.assertAlmostEqual(eps, 8.509656, places=5)
        # self.assertEqual(opt_order, 2.5)
        # But these still provide an upper bound
        self.assertLessEqual(eps, 8.509656)
예제 #2
0
    def search_optimal_noise_multiplier(self, target_epsilon):
        """
        Performs binary search to get the optimal value for noise multiplier (sigma) for RDP and GDP accounting mechanisms. Functionality adapted from Opacus (https://github.com/pytorch/opacus).
        """
        eps_high = float("inf")
        sigma_low, sigma_high = 0, 10
        orders = [1 + x / 100.0
                  for x in range(1, 1000)] + list(range(12, 1200))

        while eps_high > target_epsilon:
            sigma_high = 2 * sigma_high

            if self.dp_type == 'rdp':
                rdp = compute_rdp(self.sampling_rate, sigma_high, self.steps,
                                  orders)
                eps_high, _, _ = get_privacy_spent(
                    orders, rdp, target_delta=self.target_delta)
            else:  # if self.dp_type == 'gdp'
                mu = compute_gdp_mu(self.sampling_rate, sigma_high, self.steps)
                eps_high, delta = get_gdp_privacy_spent(
                    mu, target_delta=self.target_delta)
                if delta > self.target_delta:
                    raise ValueError(
                        "Could not find suitable privacy parameters.")

            if sigma_high > MAX_SIGMA:
                raise ValueError("The privacy budget is too low.")

        while target_epsilon - eps_high > EPS_TOLERANCE * target_epsilon:
            sigma = (sigma_low + sigma_high) / 2

            if self.dp_type == 'rdp':
                rdp = compute_rdp(self.sampling_rate, sigma, self.steps,
                                  orders)
                eps, _, _ = get_privacy_spent(orders,
                                              rdp,
                                              target_delta=self.target_delta)
            else:  # if self.dp_type == 'gdp'
                mu = compute_gdp_mu(self.sampling_rate, sigma, self.steps)
                eps, delta = get_gdp_privacy_spent(
                    mu, target_delta=self.target_delta)

            if eps < target_epsilon:
                sigma_high = sigma
                eps_high = eps
            else:
                sigma_low = sigma

        return sigma_high
 def test_get_privacy_spent_check_target_eps(self):
     orders = range(2, 33)
     rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
     _, delta, opt_order = rdp_accountant.get_privacy_spent(
         orders, rdp, target_eps=1.258575)
     self.assertAlmostEqual(delta, 1e-5)
     self.assertEqual(opt_order, 20)
예제 #4
0
 def test_get_privacy_spent_check_target_delta(self):
   orders = range(2, 33)
   rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
   eps, _, opt_order = rdp_accountant.get_privacy_spent(
       orders, rdp, target_delta=1e-5)
   self.assertAlmostEqual(eps, 1.258575, places=5)
   self.assertEqual(opt_order, 20)
예제 #5
0
 def _compute_privacy_budget_spent(self):
     """Compute the epsilon value representing the privacy budget spent up to now."""
     current_rdp = self._rdp * self._num_rounds_completed
     eps, _, _ = rdp_accountant.get_privacy_spent(orders=self.RDP_ORDERS,
                                                  rdp=current_rdp,
                                                  target_delta=self._delta)
     return eps
예제 #6
0
def analysis_privacy(lot_size,
                     data_size,
                     sgd_sigma,
                     gmm_sigma,
                     gmm_iter,
                     gmm_n_comp,
                     sgd_epoch,
                     pca_eps,
                     delta=1e-5):
    q = lot_size / data_size
    sgd_steps = int(math.ceil(sgd_epoch * data_size / lot_size))
    gmm_steps = gmm_iter * (2 * gmm_n_comp + 1)
    orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
              list(range(5, 64)) + [128, 256, 512])
    pca_rdp = np.array(orders) * 2 * (pca_eps**2)
    sgd_rdp = compute_rdp(q, sgd_sigma, sgd_steps, orders)
    gmm_rdp = compute_rdp(1, gmm_sigma, gmm_steps, orders)

    rdp = pca_rdp + gmm_rdp + sgd_rdp

    eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)

    index = orders.index(opt_order)
    print(
        f"ratio(pca:gmm:sgd):{pca_rdp[index]/rdp[index]}:{gmm_rdp[index]/rdp[index]}:{sgd_rdp[index]/rdp[index]}"
    )
    print(f"GMM + SGD + PCA (MA): {eps}, {delta}-DP")

    return eps, [
        pca_rdp[index] / rdp[index], gmm_rdp[index] / rdp[index],
        sgd_rdp[index] / rdp[index]
    ]
예제 #7
0
    def test_get_privacy_spent_gaussian(self):
        # Compare the optimal bound for Gaussian with the one derived from RDP.
        # Also compare the RDP upper bound with the "standard" upper bound.
        orders = [0.1 * x for x in range(10, 505)]
        eps_vec = [0.1 * x for x in range(500)]
        rdp = rdp_accountant.compute_rdp(1, 1, 1, orders)
        for eps in eps_vec:
            _, delta, _ = rdp_accountant.get_privacy_spent(orders,
                                                           rdp,
                                                           target_eps=eps)
            # For comparison, we compute the optimal guarantee for Gaussian
            # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
            delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2
            delta0 = delta0 - math.exp(eps) * math.erfc(
                (eps + .5) / math.sqrt(2)) / 2
            self.assertLessEqual(delta0,
                                 delta + 1e-300)  # need tolerance 10^-300

            # Compute the "standard" upper bound, which should be an upper bound.
            # Note, if orders is too sparse, this will NOT be an upper bound.
            if eps >= 0.5:
                delta1 = math.exp(-0.5 * (eps - 0.5)**2)
            else:
                delta1 = 1
            self.assertLessEqual(delta, delta1 + 1e-300)
예제 #8
0
def compute_epsilon(steps, target_delta=1e-5):
    if NUM_EXAMPLES * target_delta > 1.:
        warnings.warn('Your delta might be too high.')
    q = FLAGS.batch_size / float(NUM_EXAMPLES)
    orders = list(jnp.linspace(1.1, 10.9, 99)) + list(range(11, 64))
    rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders)
    eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta)
    return eps
예제 #9
0
def TF_MA(q, sigmas, nc, target_delta=None, target_epsilon=None, max_order=32):
    sp = q*np.ones(len(sigmas))
    steps_list = nc*np.ones(len(sigmas))
    orders = range(2, max_order + 1)
    rdp = np.zeros_like(orders, dtype=float)
    rdp += compute_heterogenous_rdp(sp, sigmas, steps_list, orders)
    eps, delta, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=target_delta, target_eps=target_epsilon)
    return (eps, delta)
예제 #10
0
 def end(self, session):
     orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))
     samples = session.run(self._samples)
     queries = session.run(self._queries)
     formatted_ledger = privacy_ledger.format_ledger(samples, queries)
     rdp = compute_rdp_from_ledger(formatted_ledger, orders)
     eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
     print('For delta=1e-5, the current epsilon is: %.2f' % eps)
예제 #11
0
def compute_epsilon(steps, num_examples=60000, target_delta=1e-5):
    if num_examples * target_delta > 1.:
        warnings.warn('Your delta might be too high.')
    q = FLAGS.batch_size / float(num_examples)
    orders = list(np.linspace(1.1, 10.9, 99)) + range(11, 64)
    rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders)
    eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta)
    return eps
예제 #12
0
    def test_get_privacy_spent_check_target_eps(self):
        orders = range(2, 33)
        rdp = [1.1 for o in orders]  # Constant corresponds to pure DP.
        _, delta, opt_order = rdp_accountant.get_privacy_spent(
            orders, rdp, target_eps=1.32783806176)
        # Since rdp is constant, it should always pick the largest order.
        self.assertEqual(opt_order, 32)
        self.assertAlmostEqual(delta, 1e-5)

        # Second test for Gaussian noise (with no subsampling):
        orders = [0.001 * i
                  for i in range(1000, 100000)]  # Pick fine set of order.
        rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders)
        # Scale is chosen to obtain exactly (1,1e-6)-DP.
        _, delta, _ = rdp_accountant.get_privacy_spent(orders,
                                                       rdp,
                                                       target_eps=1)
        self.assertAlmostEqual(delta, 1e-6)
예제 #13
0
def compute_epsilon(epoch,noise_multi,N,batch_size,delta):
  """Computes epsilon value for given hyperparameters."""
  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
  sampling_probability = batch_size / N
  rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=noise_multi,
                    steps=epoch*N/batch_size,
                    orders=orders)
  return get_privacy_spent(orders, rdp, target_delta=delta)[0]
예제 #14
0
 def end(self, session):
     orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))
     samples = session.run(self._samples)
     queries = session.run(self._queries)
     formatted_ledger = privacy_ledger.format_ledger(samples, queries)
     rdp = compute_rdp_from_ledger(formatted_ledger, orders)
     eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
     sys.stdout.write(',%s' % eps)
     sys.stdout.flush()
예제 #15
0
def compute_epsilon(steps):
    orders = [1 + x / 10.0 for x in range(1, 1200)]
    rdp = rdp_accountant.compute_rdp(q=mb_size / N,
                                     noise_multiplier=noise_multiplier,
                                     steps=steps,
                                     orders=orders)
    eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders,
                                                 rdp=rdp,
                                                 target_delta=1 / (2 * N))
    return eps
 def compute_epsilon(self, steps):
     if self.noise_multiplier == 0.0:
         return float('inf')
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
     sampling_probability = self.batch_size / self.total_data_size
     rdp = compute_rdp(q=sampling_probability,
                       noise_multiplier=self.noise_multiplier,
                       steps=steps,
                       orders=orders)
     return get_privacy_spent(orders, rdp, target_delta=self.delta)[0]
예제 #17
0
def compute_epsilon(steps, noise_multiplier, batch_size, input_size, delta):
    """Computes epsilon value for given hyperparameters."""
    if noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = batch_size / input_size
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=noise_multiplier,
                      steps=steps,
                      orders=orders)
    return get_privacy_spent(orders, rdp, target_delta=delta)[0]
예제 #18
0
  def test_check_composition(self):
    orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
              16., 20., 24., 28., 32., 64., 256.)

    rdp = rdp_accountant.compute_rdp(q=1e-4,
                                     noise_multiplier=.4,
                                     steps=40000,
                                     orders=orders)

    eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
                                                         target_delta=1e-6)

    rdp += rdp_accountant.compute_rdp(q=0.1,
                                      noise_multiplier=2,
                                      steps=100,
                                      orders=orders)
    eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
                                                         target_delta=1e-5)
    self.assertAlmostEqual(eps, 8.509656, places=5)
    self.assertEqual(opt_order, 2.5)
예제 #19
0
def compute_renyi_privacy(num_examples, batch_size, steps, sigma, delta):
    """compute privacy loss using Renyi Differential-Privacy estimate"""

    sampling_ratio = batch_size / num_examples
    orders = [1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] \
        + list(range(5, 64)) + [128, 256, 512]

    rdp = compute_rdp(sampling_ratio, sigma, steps, orders)
    epsilon, _, alpha = get_privacy_spent(orders, rdp, target_delta=delta)

    return SpentDP(epsilon, delta)
 def test_compute_eps_tree(self, noise_multiplier, eps):
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
     # This tests is based on the StackOverflow setting in "Practical and
     # Private (Deep) Learning without Sampling or Shuffling". The calculated
     # epsilon could be better as the method in this package keeps improving.
     steps_list, target_delta = 1600, 1e-6
     rdp = tree_aggregation_accountant.compute_rdp_tree_restart(
         noise_multiplier, steps_list, orders)
     new_eps = rdp_accountant.get_privacy_spent(
         orders, rdp, target_delta=target_delta)[0]
     self.assertLess(new_eps, eps)
def compute_epsilon(steps):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / 60000
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
    """Compute and print results of DP-SGD analysis."""

    # compute_rdp requires that sigma be the ratio of the standard deviation of
    # the Gaussian noise to the l2-sensitivity of the function to which it is
    # added. Hence, sigma here corresponds to the `noise_multiplier` parameter
    # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer
    rdp = compute_rdp(q, sigma, steps, orders)

    eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)

    return eps, opt_order
예제 #23
0
def compute_epsilon(steps):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to approximate 1 / (number of training points).
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
예제 #24
0
 def test_get_privacy_spent_consistency(self):
     orders = range(2,
                    50)  # Large range of orders (helps test for overflows).
     for q in [0.01, 0.1, 0.8, 1.]:  # Different subsampling rates.
         for multiplier in [0.1, 1., 3., 10.,
                            100.]:  # Different noise scales.
             rdp = rdp_accountant.compute_rdp(q, multiplier, 1, orders)
             for delta in [
                     .9, .5, .1, .01, 1e-3, 1e-4, 1e-5, 1e-6, 1e-9, 1e-12
             ]:
                 eps1, delta1, ord1 = rdp_accountant.get_privacy_spent(
                     orders, rdp, target_delta=delta)
                 eps2, delta2, ord2 = rdp_accountant.get_privacy_spent(
                     orders, rdp, target_eps=eps1)
                 self.assertEqual(delta1, delta)
                 self.assertEqual(eps2, eps1)
                 if eps1 != 0:
                     self.assertEqual(ord1, ord2)
                     self.assertAlmostEqual(delta, delta2)
                 else:  # This is a degenerate case; we won't have consistency.
                     self.assertLessEqual(delta2, delta)
예제 #25
0
def compute_epsilon(epoch, num_train_eg, args):
    """Computes epsilon value for given hyperparameters."""
    steps = epoch * num_train_eg // args.batch_size
    if args.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = args.batch_size / num_train_eg
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=args.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to approximate 1 / (number of training points).
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
예제 #26
0
 def compute_epsilon(epochs=epochs,
                     mb_size=mb_size,
                     N=N,
                     noise_multiplier=noise_multiplier):
     orders = [1 + x / 10.0 for x in range(1, 800)]
     steps = (N / mb_size) * epochs
     rdp = rdp_accountant.compute_rdp(q=mb_size / N,
                                      noise_multiplier=noise_multiplier,
                                      steps=steps,
                                      orders=orders)
     eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders,
                                                  rdp=rdp,
                                                  target_delta=1 / (2 * N))
     return eps
 def test_compute_eps_tree_decreasing(self, steps_list):
     # Test privacy epsilon decreases with noise multiplier increasing when
     # keeping other parameters the same.
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
     target_delta = 1e-6
     prev_eps = tree_aggregation_accountant.compute_rdp_tree_restart(
         0, steps_list, orders)
     for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
         rdp = tree_aggregation_accountant.compute_rdp_tree_restart(
             noise_multiplier, steps_list, orders)
         eps = rdp_accountant.get_privacy_spent(
             orders, rdp, target_delta=target_delta)[0]
         self.assertLess(eps, prev_eps)
         prev_eps = eps
예제 #28
0
def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier):
    """Tabulating position-dependent privacy guarantees."""
    if noise_multiplier == 0:
        print('No differential privacy (additive noise is 0).')
        return

    print(
        'In the conditions of Theorem 34 (https://arxiv.org/abs/1808.06651) '
        'the training procedure results in the following privacy guarantees.')

    print('Out of the total of {} samples:'.format(samples))

    steps_per_epoch = samples // batch_size
    orders = np.concatenate(
        [np.linspace(2, 20, num=181),
         np.linspace(20, 100, num=81)])
    delta = 1e-5
    for p in (.5, .9, .99):
        steps = math.ceil(steps_per_epoch * p)  # Steps in the last epoch.
        coef = 2 * (noise_multiplier * batch_size)**-2 * (
            # Accounting for privacy loss
            (epochs - 1) / steps_per_epoch +  # ... from all-but-last epochs
            1 / (steps_per_epoch - steps + 1))  # ... due to the last epoch
        # Using RDP accountant to compute eps. Doing computation analytically is
        # an option.
        rdp = [order * coef for order in orders]
        eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta)
        print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(
            p * 100, eps, delta))

    # Compute privacy guarantees for the Sampled Gaussian Mechanism.
    rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier,
                          epochs * steps_per_epoch, orders)
    eps_sgm, _, _ = get_privacy_spent(orders, rdp_sgm, target_delta=delta)
    print('By comparison, DP-SGD analysis for training done with the same '
          'parameters and random shuffling in each epoch guarantees '
          '({:.2f}, {})-DP for all samples.'.format(eps_sgm, delta))
예제 #29
0
    def get_delta_spent(self, target_epsilon):
        """
        Computes the epsilon budget spent by a DP optimizer.
        :param target_epsilon: fixed epsilon of an (\eps, \delta)-DP guarantee
        :return: delta
        """

        rdp, orders = self._get_rdp_and_orders()
        _, delta, opt_order = get_privacy_spent(orders, rdp, target_eps=target_epsilon)
        if opt_order == max(orders) or opt_order == min(orders):
            print(
                "The privacy estimate is likely to be improved by expanding "
                "the set of orders."
            )
        return delta
예제 #30
0
def TF_MA(sigma, T, target_delta=None, target_epsilon=None, max_order=32):
    orders = range(2, max_order + 1)
    rdp = np.zeros_like(orders, dtype=float)
    #print(rdp)
    #print(size(rdp))
    for i in orders:
        # RDP for the Gaussian mechanism
        rdp[i - 2] += (T / 2) * i / (2 * sigma**2)
        # RDP for the randomised response
        rdp[i - 2] += (T / 2) * (1 /
                                 (i - 1)) * np.log((p**i) * (1 - p)**(1 - i) +
                                                   (1 - p)**i * p**(1 - i))
    eps, delta, opt_order = rdp_accountant.get_privacy_spent(
        orders, rdp, target_delta=target_delta, target_eps=target_epsilon)
    return (eps, delta)