Ejemplo n.º 1
0
def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders,
                 data_ind):
    """Computes the (data-dependent) RDP curve for Confident GNMax."""
    rdp_cum = np.zeros(len(orders))
    rdp_sqrd_cum = np.zeros(len(orders))
    answered = 0

    for i, v in enumerate(votes):
        if threshold is None:
            logq_step1 = 0  # No thresholding, always proceed to step 2.
            rdp_step1 = np.zeros(len(orders))
        else:
            logq_step1 = pate.compute_logpr_answered(threshold, sigma1,
                                                     v - baseline[i, ])
            if data_ind:
                rdp_step1 = pate.compute_rdp_data_independent_threshold(
                    sigma1, orders)
            else:
                rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1,
                                                       orders)

        if data_ind:
            rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders)
        else:
            logq_step2 = pate.compute_logq_gaussian(v, sigma2)
            rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders)

        q_step1 = np.exp(logq_step1)
        rdp = rdp_step1 + rdp_step2 * q_step1
        # The expression below evaluates
        #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
        rdp_sqrd = (rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 +
                    q_step1 * rdp_step2**2)
        rdp_sqrd_cum += rdp_sqrd

        rdp_cum += rdp
        answered += q_step1
        if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1):
            rdp_var = rdp_sqrd_cum / i - (rdp_cum /
                                          i)**2  # Ignore Bessel's correction.
            eps_total, order_opt = pate.compute_eps_from_delta(
                orders, rdp_cum, delta)
            order_opt_idx = np.searchsorted(orders, order_opt)
            eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
            print(
                'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
                'at order = {:.2f} (contribution from delta = {:.3f})'.format(
                    i + 1, answered, eps_total, eps_std, order_opt,
                    -math.log(delta) / (order_opt - 1)))
            sys.stdout.flush()

        _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)

    return order_opt
Ejemplo n.º 2
0
def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders,
                 data_ind):
  """Computes the (data-dependent) RDP curve for Confident GNMax."""
  rdp_cum = np.zeros(len(orders))
  rdp_sqrd_cum = np.zeros(len(orders))
  answered = 0

  for i, v in enumerate(votes):
    if threshold is None:
      logq_step1 = 0  # No thresholding, always proceed to step 2.
      rdp_step1 = np.zeros(len(orders))
    else:
      logq_step1 = pate.compute_logpr_answered(threshold, sigma1,
                                               v - baseline[i,])
      if data_ind:
        rdp_step1 = pate.compute_rdp_data_independent_threshold(sigma1, orders)
      else:
        rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1, orders)

    if data_ind:
      rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders)
    else:
      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
      rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders)

    q_step1 = np.exp(logq_step1)
    rdp = rdp_step1 + rdp_step2 * q_step1
    # The expression below evaluates
    #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
    rdp_sqrd = (
        rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 +
        q_step1 * rdp_step2**2)
    rdp_sqrd_cum += rdp_sqrd

    rdp_cum += rdp
    answered += q_step1
    if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1):
      rdp_var = rdp_sqrd_cum / i - (
          rdp_cum / i)**2  # Ignore Bessel's correction.
      eps_total, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)
      order_opt_idx = np.searchsorted(orders, order_opt)
      eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
      print(
          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
              i + 1, answered, eps_total, eps_std, order_opt,
              -math.log(delta) / (order_opt - 1)))
      sys.stdout.flush()

    _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)

  return order_opt
Ejemplo n.º 3
0
def analyze_gnmax_conf_data_ind(votes, threshold, sigma1, sigma2, delta):
    orders = np.logspace(np.log10(1.5), np.log10(500), num=100)
    n = votes.shape[0]

    rdp_total = np.zeros(len(orders))
    answered_total = 0
    answered = np.zeros(n)
    eps_cum = np.full(n, None, dtype=float)

    for i in range(n):
        v = votes[i, ]
        if threshold is not None and sigma1 is not None:
            q_step1 = np.exp(pate.compute_logpr_answered(threshold, sigma1, v))
            rdp_total += pate.rdp_data_independent_gaussian(sigma1, orders)
        else:
            q_step1 = 1.  # always answer

        answered_total += q_step1
        answered[i] = answered_total

        rdp_total += q_step1 * pate.rdp_data_independent_gaussian(
            sigma2, orders)

        eps_cum[i], order_opt = pate.compute_eps_from_delta(
            orders, rdp_total, delta)

        if i > 0 and (i + 1) % 1000 == 0:
            print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} '
                  'at order = {:.2f}.'.format(i + 1, answered[i], eps_cum[i],
                                              order_opt))
            sys.stdout.flush()

    return eps_cum, answered
Ejemplo n.º 4
0
def analyze_gnmax_conf_data_ind(votes, threshold, sigma1, sigma2, delta):
  orders = np.logspace(np.log10(1.5), np.log10(500), num=100)
  n = votes.shape[0]

  rdp_total = np.zeros(len(orders))
  answered_total = 0
  answered = np.zeros(n)
  eps_cum = np.full(n, None, dtype=float)

  for i in range(n):
    v = votes[i,]
    if threshold is not None and sigma1 is not None:
      q_step1 = np.exp(pate.compute_logpr_answered(threshold, sigma1, v))
      rdp_total += pate.rdp_data_independent_gaussian(sigma1, orders)
    else:
      q_step1 = 1.  # always answer

    answered_total += q_step1
    answered[i] = answered_total

    rdp_total += q_step1 * pate.rdp_data_independent_gaussian(sigma2, orders)

    eps_cum[i], order_opt = pate.compute_eps_from_delta(orders, rdp_total,
                                                        delta)

    if i > 0 and (i + 1) % 1000 == 0:
      print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} '
            'at order = {:.2f}.'.format(
          i + 1,
          answered[i],
          eps_cum[i],
          order_opt))
      sys.stdout.flush()

  return eps_cum, answered
Ejemplo n.º 5
0
  def _test_compute_eps_from_delta_monotonicity(self):
    # Test for monotonicity with respect to delta.
    orders = [1.1, 2.5, 250.0]
    sigmas = [1e-3, 1.0, 1e5]
    deltas = [1e-60, 1e-6, 0.1, 0.999]
    for sigma in sigmas:
      list_of_eps = []
      rdps_for_gaussian = np.array(orders) / (2 * sigma**2)
      for delta in deltas:
        list_of_eps.append(
            pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0])

      # Check that in list_of_eps, epsilons are decreasing (as delta increases).
      sorted_list_of_eps = list(list_of_eps)
      sorted_list_of_eps.sort(reverse=True)
      self.assertEqual(list_of_eps, sorted_list_of_eps)
Ejemplo n.º 6
0
def run_analysis(votes, mechanism, noise_scale, params):
  """Computes data-dependent privacy.

  Args:
    votes: A matrix of votes, where each row contains votes in one instance.
    mechanism: A name of the mechanism ('lnmax', 'gnmax', or 'gnmax_conf')
    noise_scale: A mechanism privacy parameter.
    params: Other privacy parameters.

  Returns:
    Four lists: cumulative privacy cost epsilon, how privacy budget is split,
    how many queries were answered, optimal order.
  """

  def compute_partition(order_opt, eps):
    order_opt_idx = np.searchsorted(orders, order_opt)
    if mechanism == 'gnmax_conf':
      p = (rdp_select_cum[order_opt_idx],
           rdp_cum[order_opt_idx] - rdp_select_cum[order_opt_idx],
           -math.log(delta) / (order_opt - 1))
    else:
      p = (rdp_cum[order_opt_idx], -math.log(delta) / (order_opt - 1))
    return [x / eps for x in p]  # Ensures that sum(x) == 1

  # Short list of orders.
  # orders = np.round(np.concatenate((np.arange(2, 50 + 1, 1),
  #                   np.logspace(np.log10(50), np.log10(1000), num=20))))

  # Long list of orders.
  orders = np.concatenate((np.arange(2, 100 + 1, .5),
                           np.logspace(np.log10(100), np.log10(500), num=100)))
  delta = 1e-8

  n = votes.shape[0]
  eps_total = np.zeros(n)
  partition = [None] * n
  order_opt = np.full(n, np.nan, dtype=float)
  answered = np.zeros(n, dtype=float)

  rdp_cum = np.zeros(len(orders))
  rdp_sqrd_cum = np.zeros(len(orders))
  rdp_select_cum = np.zeros(len(orders))
  answered_sum = 0

  for i in range(n):
    v = votes[i,]
    if mechanism == 'lnmax':
      logq_lnmax = pate.compute_logq_laplace(v, noise_scale)
      rdp_query = pate.rdp_pure_eps(logq_lnmax, 2. / noise_scale, orders)
      rdp_sqrd = rdp_query ** 2
      pr_answered = 1
    elif mechanism == 'gnmax':
      logq_gmax = pate.compute_logq_gaussian(v, noise_scale)
      rdp_query = pate.rdp_gaussian(logq_gmax, noise_scale, orders)
      rdp_sqrd = rdp_query ** 2
      pr_answered = 1
    elif mechanism == 'gnmax_conf':
      logq_step1 = pate.compute_logpr_answered(params['t'], params['sigma1'], v)
      logq_step2 = pate.compute_logq_gaussian(v, noise_scale)
      q_step1 = np.exp(logq_step1)
      logq_step1_min = min(logq_step1, math.log1p(-q_step1))
      rdp_gnmax_step1 = pate.rdp_gaussian(logq_step1_min,
                                          2 ** .5 * params['sigma1'], orders)
      rdp_gnmax_step2 = pate.rdp_gaussian(logq_step2, noise_scale, orders)
      rdp_query = rdp_gnmax_step1 + q_step1 * rdp_gnmax_step2
      # The expression below evaluates
      #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
      rdp_sqrd = (
          rdp_gnmax_step1 ** 2 + 2 * rdp_gnmax_step1 * q_step1 * rdp_gnmax_step2
          + q_step1 * rdp_gnmax_step2 ** 2)
      rdp_select_cum += rdp_gnmax_step1
      pr_answered = q_step1
    else:
      raise ValueError(
          'Mechanism must be one of ["lnmax", "gnmax", "gnmax_conf"]')

    rdp_cum += rdp_query
    rdp_sqrd_cum += rdp_sqrd
    answered_sum += pr_answered

    answered[i] = answered_sum
    eps_total[i], order_opt[i] = pate.compute_eps_from_delta(
        orders, rdp_cum, delta)
    partition[i] = compute_partition(order_opt[i], eps_total[i])

    if i > 0 and (i + 1) % 1000 == 0:
      rdp_var = rdp_sqrd_cum / i - (
          rdp_cum / i) ** 2  # Ignore Bessel's correction.
      order_opt_idx = np.searchsorted(orders, order_opt[i])
      eps_std = ((i + 1) * rdp_var[order_opt_idx]) ** .5  # Std of the sum.
      print(
          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
              i + 1, answered_sum, eps_total[i], eps_std, order_opt[i],
              -math.log(delta) / (order_opt[i] - 1)))
      sys.stdout.flush()

  return eps_total, partition, answered, order_opt
Ejemplo n.º 7
0
def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
    # Short list of orders.
    # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))

    # Long list of orders.
    orders = np.concatenate((np.arange(20, 40, .2), np.arange(40, 75, .5),
                             np.logspace(np.log10(75), np.log10(200), num=20)))

    n = votes.shape[0]
    num_classes = votes.shape[1]
    num_teachers = int(sum(votes[0, ]))

    if threshold is not None and sigma1 is not None:
        is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
            num_teachers, num_classes, sigma1, orders)
    else:
        is_data_ind_step1 = [True] * len(orders)

    is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
        num_teachers, num_classes, sigma2, orders)

    eps_partitioned = np.full(n, None, dtype=Partition)
    order_opt = np.full(n, None, dtype=float)
    ss_std_opt = np.full(n, None, dtype=float)
    answered = np.zeros(n)

    rdp_step1_total = np.zeros(len(orders))
    rdp_step2_total = np.zeros(len(orders))

    ls_total = np.zeros((len(orders), num_teachers))
    answered_total = 0

    for i in range(n):
        v = votes[i, ]

        if threshold is not None and sigma1 is not None:
            logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
            rdp_step1_total += pate.compute_rdp_threshold(
                logq_step1, sigma1, orders)
        else:
            logq_step1 = 0.  # always answer

        pr_answered = np.exp(logq_step1)
        logq_step2 = pate.compute_logq_gaussian(v, sigma2)
        rdp_step2_total += pr_answered * pate.rdp_gaussian(
            logq_step2, sigma2, orders)

        answered_total += pr_answered

        rdp_ss = np.zeros(len(orders))
        ss_std = np.zeros(len(orders))

        for j, order in enumerate(orders):
            if not is_data_ind_step1[j]:
                ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
                    v, num_teachers, threshold, sigma1, order)
            else:
                ls_step1 = np.full(num_teachers, 0, dtype=float)

            if not is_data_ind_step2[j]:
                ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
                    v, num_teachers, sigma2, order)
            else:
                ls_step2 = np.full(num_teachers, 0, dtype=float)

            ls_total[j, ] += ls_step1 + pr_answered * ls_step2

            beta_ss = .49 / order

            ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j, ])
            sigma_ss = ((order * math.exp(2 * beta_ss)) / ss)**(1 / 3)
            rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                beta_ss, sigma_ss, order)
            ss_std[j] = ss * sigma_ss

        rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss

        answered[i] = answered_total
        _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
        order_idx = np.searchsorted(orders, order_opt[i])

        # Since optimal orders are always non-increasing, shrink orders array
        # and all cumulative arrays to speed up computation.
        if order_idx < len(orders):
            orders = orders[:order_idx + 1]
            rdp_step1_total = rdp_step1_total[:order_idx + 1]
            rdp_step2_total = rdp_step2_total[:order_idx + 1]

        eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
                                       step2=rdp_step2_total[order_idx],
                                       ss=rdp_ss[order_idx],
                                       delta=-math.log(delta) /
                                       (order_opt[i] - 1))
        ss_std_opt[i] = ss_std[order_idx]
        if i > 0 and (i + 1) % 1 == 0:
            print(
                'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
                'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
                'step2 = {:.3f}, ss = {:.3f}'.format(
                    i + 1, answered[i], sum(eps_partitioned[i]), ss_std_opt[i],
                    order_opt[i], eps_partitioned[i].delta,
                    eps_partitioned[i].step1, eps_partitioned[i].step2,
                    eps_partitioned[i].ss))
            sys.stdout.flush()

    return eps_partitioned, answered, ss_std_opt, order_opt
Ejemplo n.º 8
0
def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
  # Short list of orders.
  # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))

  # Long list of orders.
  orders = np.concatenate((np.arange(20, 40, .2),
                           np.arange(40, 75, .5),
                            np.logspace(np.log10(75), np.log10(200), num=20)))

  n = votes.shape[0]
  num_classes = votes.shape[1]
  num_teachers = int(sum(votes[0,]))

  if threshold is not None and sigma1 is not None:
    is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
        num_teachers, num_classes, sigma1, orders)
  else:
    is_data_ind_step1 = [True] * len(orders)

  is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
      num_teachers, num_classes, sigma2, orders)

  eps_partitioned = np.full(n, None, dtype=Partition)
  order_opt = np.full(n, None, dtype=float)
  ss_std_opt = np.full(n, None, dtype=float)
  answered = np.zeros(n)

  rdp_step1_total = np.zeros(len(orders))
  rdp_step2_total = np.zeros(len(orders))

  ls_total = np.zeros((len(orders), num_teachers))
  answered_total = 0

  for i in range(n):
    v = votes[i,]

    if threshold is not None and sigma1 is not None:
      logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
      rdp_step1_total += pate.compute_rdp_threshold(logq_step1, sigma1, orders)
    else:
      logq_step1 = 0.  # always answer

    pr_answered = np.exp(logq_step1)
    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
    rdp_step2_total += pr_answered * pate.rdp_gaussian(logq_step2, sigma2,
                                                       orders)

    answered_total += pr_answered

    rdp_ss = np.zeros(len(orders))
    ss_std = np.zeros(len(orders))

    for j, order in enumerate(orders):
      if not is_data_ind_step1[j]:
        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(v,
            num_teachers, threshold, sigma1, order)
      else:
        ls_step1 = np.full(num_teachers, 0, dtype=float)

      if not is_data_ind_step2[j]:
        ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
            v, num_teachers, sigma2, order)
      else:
        ls_step2 = np.full(num_teachers, 0, dtype=float)

      ls_total[j,] += ls_step1 + pr_answered * ls_step2

      beta_ss = .49 / order

      ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j,])
      sigma_ss = ((order * math.exp(2 * beta_ss)) / ss) ** (1 / 3)
      rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
          beta_ss, sigma_ss, order)
      ss_std[j] = ss * sigma_ss

    rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss

    answered[i] = answered_total
    _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
    order_idx = np.searchsorted(orders, order_opt[i])

    # Since optimal orders are always non-increasing, shrink orders array
    # and all cumulative arrays to speed up computation.
    if order_idx < len(orders):
      orders = orders[:order_idx + 1]
      rdp_step1_total = rdp_step1_total[:order_idx + 1]
      rdp_step2_total = rdp_step2_total[:order_idx + 1]

    eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
                                   step2=rdp_step2_total[order_idx],
                                   ss=rdp_ss[order_idx],
                                   delta=-math.log(delta) / (order_opt[i] - 1))
    ss_std_opt[i] = ss_std[order_idx]
    if i > 0 and (i + 1) % 1 == 0:
      print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
            'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
            'step2 = {:.3f}, ss = {:.3f}'.format(
          i + 1,
          answered[i],
          sum(eps_partitioned[i]),
          ss_std_opt[i],
          order_opt[i],
          eps_partitioned[i].delta,
          eps_partitioned[i].step1,
          eps_partitioned[i].step2,
          eps_partitioned[i].ss))
      sys.stdout.flush()

  return eps_partitioned, answered, ss_std_opt, order_opt
Ejemplo n.º 9
0
 def _test_compute_eps_from_delta_value_error(self):
   # Test for ValueError.
   with self.assertRaises(ValueError):
     pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001)