Beispiel #1
0
def analysis_privacy(lot_size,
                     data_size,
                     sgd_sigma,
                     gmm_sigma,
                     gmm_iter,
                     gmm_n_comp,
                     sgd_epoch,
                     pca_eps,
                     delta=1e-5):
    q = lot_size / data_size
    sgd_steps = int(math.ceil(sgd_epoch * data_size / lot_size))
    gmm_steps = gmm_iter * (2 * gmm_n_comp + 1)
    orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
              list(range(5, 64)) + [128, 256, 512])
    pca_rdp = np.array(orders) * 2 * (pca_eps**2)
    sgd_rdp = compute_rdp(q, sgd_sigma, sgd_steps, orders)
    gmm_rdp = compute_rdp(1, gmm_sigma, gmm_steps, orders)

    rdp = pca_rdp + gmm_rdp + sgd_rdp

    eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)

    index = orders.index(opt_order)
    print(
        f"ratio(pca:gmm:sgd):{pca_rdp[index]/rdp[index]}:{gmm_rdp[index]/rdp[index]}:{sgd_rdp[index]/rdp[index]}"
    )
    print(f"GMM + SGD + PCA (MA): {eps}, {delta}-DP")

    return eps, [
        pca_rdp[index] / rdp[index], gmm_rdp[index] / rdp[index],
        sgd_rdp[index] / rdp[index]
    ]
    def test_check_composition(self):
        orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12.,
                  14., 16., 20., 24., 28., 32., 64., 256.)

        rdp = rdp_accountant.compute_rdp(q=1e-4,
                                         noise_multiplier=.4,
                                         steps=40000,
                                         orders=orders)

        eps, _, _ = rdp_accountant.get_privacy_spent(orders,
                                                     rdp,
                                                     target_delta=1e-6)

        rdp += rdp_accountant.compute_rdp(q=0.1,
                                          noise_multiplier=2,
                                          steps=100,
                                          orders=orders)
        eps, _, _ = rdp_accountant.get_privacy_spent(orders,
                                                     rdp,
                                                     target_delta=1e-5)
        # These tests use the old RDP -> approx DP conversion
        # self.assertAlmostEqual(eps, 8.509656, places=5)
        # self.assertEqual(opt_order, 2.5)
        # But these still provide an upper bound
        self.assertLessEqual(eps, 8.509656)
Beispiel #3
0
    def search_optimal_noise_multiplier(self, target_epsilon):
        """
        Performs binary search to get the optimal value for noise multiplier (sigma) for RDP and GDP accounting mechanisms. Functionality adapted from Opacus (https://github.com/pytorch/opacus).
        """
        eps_high = float("inf")
        sigma_low, sigma_high = 0, 10
        orders = [1 + x / 100.0
                  for x in range(1, 1000)] + list(range(12, 1200))

        while eps_high > target_epsilon:
            sigma_high = 2 * sigma_high

            if self.dp_type == 'rdp':
                rdp = compute_rdp(self.sampling_rate, sigma_high, self.steps,
                                  orders)
                eps_high, _, _ = get_privacy_spent(
                    orders, rdp, target_delta=self.target_delta)
            else:  # if self.dp_type == 'gdp'
                mu = compute_gdp_mu(self.sampling_rate, sigma_high, self.steps)
                eps_high, delta = get_gdp_privacy_spent(
                    mu, target_delta=self.target_delta)
                if delta > self.target_delta:
                    raise ValueError(
                        "Could not find suitable privacy parameters.")

            if sigma_high > MAX_SIGMA:
                raise ValueError("The privacy budget is too low.")

        while target_epsilon - eps_high > EPS_TOLERANCE * target_epsilon:
            sigma = (sigma_low + sigma_high) / 2

            if self.dp_type == 'rdp':
                rdp = compute_rdp(self.sampling_rate, sigma, self.steps,
                                  orders)
                eps, _, _ = get_privacy_spent(orders,
                                              rdp,
                                              target_delta=self.target_delta)
            else:  # if self.dp_type == 'gdp'
                mu = compute_gdp_mu(self.sampling_rate, sigma, self.steps)
                eps, delta = get_gdp_privacy_spent(
                    mu, target_delta=self.target_delta)

            if eps < target_epsilon:
                sigma_high = sigma
                eps_high = eps
            else:
                sigma_low = sigma

        return sigma_high
    def test_get_privacy_spent_gaussian(self):
        # Compare the optimal bound for Gaussian with the one derived from RDP.
        # Also compare the RDP upper bound with the "standard" upper bound.
        orders = [0.1 * x for x in range(10, 505)]
        eps_vec = [0.1 * x for x in range(500)]
        rdp = rdp_accountant.compute_rdp(1, 1, 1, orders)
        for eps in eps_vec:
            _, delta, _ = rdp_accountant.get_privacy_spent(orders,
                                                           rdp,
                                                           target_eps=eps)
            # For comparison, we compute the optimal guarantee for Gaussian
            # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
            delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2
            delta0 = delta0 - math.exp(eps) * math.erfc(
                (eps + .5) / math.sqrt(2)) / 2
            self.assertLessEqual(delta0,
                                 delta + 1e-300)  # need tolerance 10^-300

            # Compute the "standard" upper bound, which should be an upper bound.
            # Note, if orders is too sparse, this will NOT be an upper bound.
            if eps >= 0.5:
                delta1 = math.exp(-0.5 * (eps - 0.5)**2)
            else:
                delta1 = 1
            self.assertLessEqual(delta, delta1 + 1e-300)
 def test_compute_rdp_sequence(self):
     rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50,
                                          [1.5, 2.5, 5, 50, 100, np.inf])
     self.assertAllClose(rdp_vec, [
         6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, np.inf
     ],
                         rtol=1e-4)
 def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
     tree_rdp = tree_aggregation_accountant.compute_rdp_tree_restart(
         noise_multiplier, [1] * total_steps, orders)
     rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps,
                                      orders)
     self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
 def test_get_privacy_spent_check_target_eps(self):
     orders = range(2, 33)
     rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
     _, delta, opt_order = rdp_accountant.get_privacy_spent(
         orders, rdp, target_eps=1.258575)
     self.assertAlmostEqual(delta, 1e-5)
     self.assertEqual(opt_order, 20)
Beispiel #8
0
 def test_get_privacy_spent_check_target_delta(self):
   orders = range(2, 33)
   rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
   eps, _, opt_order = rdp_accountant.get_privacy_spent(
       orders, rdp, target_delta=1e-5)
   self.assertAlmostEqual(eps, 1.258575, places=5)
   self.assertEqual(opt_order, 20)
 def test_compute_rdp_sequence(self):
     rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50,
                                          [1.5, 2.5, 5, 50, 100, np.inf])
     self.assertSequenceAlmostEqual(
         rdp_vec,
         [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf],
         delta=1e-5)
Beispiel #10
0
    def __init__(
        self,
        num_epochs,
        client_fraction,
        batch_size,
        sensitivity,
        noise_scale,
        user_weight_cap,
        delta,
        model,
        training_data,
        coef_init,
        intercept_init,
        test_data=None,
        label_col="label",
        user_id_col="user_id",
    ):
        self._num_epochs = num_epochs
        self._client_fraction = client_fraction
        self._batch_size = batch_size
        self._sensitivity = sensitivity
        self._noise_scale = noise_scale
        self._user_weight_cap = user_weight_cap
        self._delta = delta

        # Privacy cost (RDP) can be precomputed.
        self._rdp = rdp_accountant.compute_rdp(
            q=self._client_fraction,
            noise_multiplier=self._noise_scale,
            steps=1,
            orders=self.RDP_ORDERS,
        )

        # Store the progressive epsilon values (privacy budget used).
        # Start from an initial value of 0 to align with the coefficient arrays.
        self._eps = [0]

        super().__init__(
            model,
            training_data,
            coef_init,
            intercept_init,
            test_data,
            label_col,
            user_id_col,
        )

        user_contrib_weight_sum = 0.0
        for client in self._clients:
            user_contrib_weight_sum += client.update_contrib_weight(
                self._user_weight_cap)
        self._user_contrib_weight_sum = user_contrib_weight_sum

        self._standard_dev = (self._noise_scale * self._sensitivity) / (
            self._client_fraction * self._user_contrib_weight_sum)
        self._avg_denom = self._client_fraction * self._user_contrib_weight_sum

        self._server.reset_dp_params(avg_denom=self._avg_denom,
                                     standard_dev=slef._standard_dev)
Beispiel #11
0
def compute_epsilon(steps, target_delta=1e-5):
    if NUM_EXAMPLES * target_delta > 1.:
        warnings.warn('Your delta might be too high.')
    q = FLAGS.batch_size / float(NUM_EXAMPLES)
    orders = list(jnp.linspace(1.1, 10.9, 99)) + list(range(11, 64))
    rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders)
    eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta)
    return eps
Beispiel #12
0
def compute_heterogenous_rdp(sampling_probabilities, noise_multipliers,
                             steps_list, orders):
  assert len(sampling_probabilities) == len(noise_multipliers)
  rdp = 0
  for q, noise_multiplier, steps in zip(sampling_probabilities,
                                        noise_multipliers, steps_list):
    rdp += rdp_accountant.compute_rdp(q, noise_multiplier, steps, orders)
  return rdp
Beispiel #13
0
def compute_epsilon(steps, num_examples=60000, target_delta=1e-5):
    if num_examples * target_delta > 1.:
        warnings.warn('Your delta might be too high.')
    q = FLAGS.batch_size / float(num_examples)
    orders = list(np.linspace(1.1, 10.9, 99)) + range(11, 64)
    rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders)
    eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta)
    return eps
Beispiel #14
0
def compute_epsilon(epoch,noise_multi,N,batch_size,delta):
  """Computes epsilon value for given hyperparameters."""
  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
  sampling_probability = batch_size / N
  rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=noise_multi,
                    steps=epoch*N/batch_size,
                    orders=orders)
  return get_privacy_spent(orders, rdp, target_delta=delta)[0]
def compute_epsilon(steps):
    orders = [1 + x / 10.0 for x in range(1, 1200)]
    rdp = rdp_accountant.compute_rdp(q=mb_size / N,
                                     noise_multiplier=noise_multiplier,
                                     steps=steps,
                                     orders=orders)
    eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders,
                                                 rdp=rdp,
                                                 target_delta=1 / (2 * N))
    return eps
 def compute_epsilon(self, steps):
     if self.noise_multiplier == 0.0:
         return float('inf')
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
     sampling_probability = self.batch_size / self.total_data_size
     rdp = compute_rdp(q=sampling_probability,
                       noise_multiplier=self.noise_multiplier,
                       steps=steps,
                       orders=orders)
     return get_privacy_spent(orders, rdp, target_delta=self.delta)[0]
Beispiel #17
0
  def test_check_composition(self):
    orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
              16., 20., 24., 28., 32., 64., 256.)

    rdp = rdp_accountant.compute_rdp(q=1e-4,
                                     noise_multiplier=.4,
                                     steps=40000,
                                     orders=orders)

    eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
                                                         target_delta=1e-6)

    rdp += rdp_accountant.compute_rdp(q=0.1,
                                      noise_multiplier=2,
                                      steps=100,
                                      orders=orders)
    eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
                                                         target_delta=1e-5)
    self.assertAlmostEqual(eps, 8.509656, places=5)
    self.assertEqual(opt_order, 2.5)
Beispiel #18
0
def compute_epsilon(steps, noise_multiplier, batch_size, input_size, delta):
    """Computes epsilon value for given hyperparameters."""
    if noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = batch_size / input_size
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=noise_multiplier,
                      steps=steps,
                      orders=orders)
    return get_privacy_spent(orders, rdp, target_delta=delta)[0]
Beispiel #19
0
def compute_renyi_privacy(num_examples, batch_size, steps, sigma, delta):
    """compute privacy loss using Renyi Differential-Privacy estimate"""

    sampling_ratio = batch_size / num_examples
    orders = [1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] \
        + list(range(5, 64)) + [128, 256, 512]

    rdp = compute_rdp(sampling_ratio, sigma, steps, orders)
    epsilon, _, alpha = get_privacy_spent(orders, rdp, target_delta=delta)

    return SpentDP(epsilon, delta)
def compute_epsilon(steps):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / 60000
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
    """Compute and print results of DP-SGD analysis."""

    # compute_rdp requires that sigma be the ratio of the standard deviation of
    # the Gaussian noise to the l2-sensitivity of the function to which it is
    # added. Hence, sigma here corresponds to the `noise_multiplier` parameter
    # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer
    rdp = compute_rdp(q, sigma, steps, orders)

    eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)

    return eps, opt_order
Beispiel #22
0
def compute_epsilon(steps):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to approximate 1 / (number of training points).
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(epoch, num_train_eg, args):
    """Computes epsilon value for given hyperparameters."""
    steps = epoch * num_train_eg // args.batch_size
    if args.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = args.batch_size / num_train_eg
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=args.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to approximate 1 / (number of training points).
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
Beispiel #24
0
 def compute_epsilon(epochs=epochs,
                     mb_size=mb_size,
                     N=N,
                     noise_multiplier=noise_multiplier):
     orders = [1 + x / 10.0 for x in range(1, 800)]
     steps = (N / mb_size) * epochs
     rdp = rdp_accountant.compute_rdp(q=mb_size / N,
                                      noise_multiplier=noise_multiplier,
                                      steps=steps,
                                      orders=orders)
     eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders,
                                                  rdp=rdp,
                                                  target_delta=1 / (2 * N))
     return eps
Beispiel #25
0
def get_epsilon_for_delta(n, target_delta=1e-5):
    if FLAGS.dp:
        noise_multiplier = FLAGS.dp_noise_multiplier
        sampling_probability = FLAGS.batch_size / float(n)
        steps = FLAGS.epochs * float(n) // FLAGS.batch_size
        orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
        rdp = compute_rdp(q=sampling_probability,
                          noise_multiplier=FLAGS.dp_noise_multiplier,
                          steps=steps,
                          orders=orders)
        epsilon = get_privacy_spent(orders, rdp, target_delta=target_delta)[0]
        print("[INFO] epsilon={} for delta={}".format(epsilon, target_delta))
    else:
        epsilon = None
        target_delta = None
    return epsilon, target_delta
Beispiel #26
0
  def test_compute_rdp_from_ledger(self):
    orders = range(2, 33)
    q = 0.1
    n = 1000
    l2_norm_clip = 3.14159
    noise_stddev = 2.71828
    steps = 3

    query_entry = privacy_ledger.GaussianSumQueryEntry(
        l2_norm_clip, noise_stddev)
    ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps

    z = noise_stddev / l2_norm_clip
    rdp = rdp_accountant.compute_rdp(q, z, steps, orders)
    rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders)
    self.assertSequenceAlmostEqual(rdp, rdp_from_ledger)
Beispiel #27
0
    def _get_rdp_and_orders(self):
        orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))

        if self.ledger is None:
            steps = self.epochs * self.train_size // self.batch_size
            sampling_probability = self.batch_size / self.train_size
            rdp = compute_rdp(
                q=sampling_probability,
                noise_multiplier=self.noise_multiplier,
                steps=steps,
                orders=orders,
            )
        else:
            # calculate with ledger
            print("Formatted Ledger")
            formatted_ledger = self.ledger.get_formatted_ledger(get_session())
            rdp = compute_rdp_from_ledger(formatted_ledger, orders)
        return rdp, orders
Beispiel #28
0
def compute_epsilon(steps, noise_multi, user_ratio, delta):

    from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
    from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
    """Computes epsilon value for given hyperparameters."""
    if noise_multi == 0.0:
        return float('inf')

    # This probably are the alphas
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))

    rdp = compute_rdp(
      q=user_ratio,
      noise_multiplier=noise_multi,
      steps=steps,
      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.
    return get_privacy_spent(orders, rdp, target_delta=delta)[0]
    def test_get_privacy_spent_check_target_eps(self):
        orders = range(2, 33)
        rdp = [1.1 for o in orders]  # Constant corresponds to pure DP.
        _, delta, opt_order = rdp_accountant.get_privacy_spent(
            orders, rdp, target_eps=1.32783806176)
        # Since rdp is constant, it should always pick the largest order.
        self.assertEqual(opt_order, 32)
        self.assertAlmostEqual(delta, 1e-5)

        # Second test for Gaussian noise (with no subsampling):
        orders = [0.001 * i
                  for i in range(1000, 100000)]  # Pick fine set of order.
        rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders)
        # Scale is chosen to obtain exactly (1,1e-6)-DP.
        _, delta, _ = rdp_accountant.get_privacy_spent(orders,
                                                       rdp,
                                                       target_eps=1)
        self.assertAlmostEqual(delta, 1e-6)
 def test_get_privacy_spent_consistency(self):
     orders = range(2,
                    50)  # Large range of orders (helps test for overflows).
     for q in [0.01, 0.1, 0.8, 1.]:  # Different subsampling rates.
         for multiplier in [0.1, 1., 3., 10.,
                            100.]:  # Different noise scales.
             rdp = rdp_accountant.compute_rdp(q, multiplier, 1, orders)
             for delta in [
                     .9, .5, .1, .01, 1e-3, 1e-4, 1e-5, 1e-6, 1e-9, 1e-12
             ]:
                 eps1, delta1, ord1 = rdp_accountant.get_privacy_spent(
                     orders, rdp, target_delta=delta)
                 eps2, delta2, ord2 = rdp_accountant.get_privacy_spent(
                     orders, rdp, target_eps=eps1)
                 self.assertEqual(delta1, delta)
                 self.assertEqual(eps2, eps1)
                 if eps1 != 0:
                     self.assertEqual(ord1, ord2)
                     self.assertAlmostEqual(delta, delta2)
                 else:  # This is a degenerate case; we won't have consistency.
                     self.assertLessEqual(delta2, delta)