def test_compute_local_sensitivity_bounds_gnmax(self):
        counts1 = np.array([10, 0, 0])
        sigma1 = .5
        order1 = 1.5

        answer1 = np.array([3.13503646e-17, 1.60178280e-08, 5.90681786e-03] +
                           [5.99981308e+00] * 7)

        # Test for "going right" in the smooth sensitivity computation.
        out1 = pate_ss.compute_local_sensitivity_bounds_gnmax(
            counts1, 10, sigma1, order1)

        self._assert_all_close(out1, answer1)

        counts2 = np.array([1000, 500, 300, 200, 0])
        sigma2 = 250.
        order2 = 10.

        # Test for "going left" in the smooth sensitivity computation.
        out2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
            counts2, 2000, sigma2, order2)

        answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] +
                           [2.73113623988e-6] * 1700)
        self._assert_all_close(out2, answer2)
  def test_compute_local_sensitivity_bounds_gnmax(self):
    counts1 = np.array([10, 0, 0])
    sigma1 = .5
    order1 = 1.5

    answer1 = np.array(
        [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7)

    # Test for "going right" in the smooth sensitivity computation.
    out1 = pate_ss.compute_local_sensitivity_bounds_gnmax(
        counts1, 10, sigma1, order1)

    self._assert_all_close(out1, answer1)

    counts2 = np.array([1000, 500, 300, 200, 0])
    sigma2 = 250.
    order2 = 10.

    # Test for "going left" in the smooth sensitivity computation.
    out2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
        counts2, 2000, sigma2, order2)

    answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] +
                       [2.73113623988e-6] * 1700)
    self._assert_all_close(out2, answer2)
Beispiel #3
0
def local_sensitivity(votes, num_teachers, sigma, order, thresh=None):
    """

  :param votes:
  :param num_teachers:
  :param sigma:
  :param order:
  :param thresh:
  :return:
  """
    if thresh is not None:
        return compute_local_sensitivity_bounds_threshold(
            votes, num_teachers, thresh, sigma, order)
    else:
        return compute_local_sensitivity_bounds_gnmax(votes, num_teachers,
                                                      sigma, order)
Beispiel #4
0
def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
    # Short list of orders.
    # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))

    # Long list of orders.
    orders = np.concatenate((np.arange(20, 40, .2), np.arange(40, 75, .5),
                             np.logspace(np.log10(75), np.log10(200), num=20)))

    n = votes.shape[0]
    num_classes = votes.shape[1]
    num_teachers = int(sum(votes[0, ]))

    if threshold is not None and sigma1 is not None:
        is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
            num_teachers, num_classes, sigma1, orders)
    else:
        is_data_ind_step1 = [True] * len(orders)

    is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
        num_teachers, num_classes, sigma2, orders)

    eps_partitioned = np.full(n, None, dtype=Partition)
    order_opt = np.full(n, None, dtype=float)
    ss_std_opt = np.full(n, None, dtype=float)
    answered = np.zeros(n)

    rdp_step1_total = np.zeros(len(orders))
    rdp_step2_total = np.zeros(len(orders))

    ls_total = np.zeros((len(orders), num_teachers))
    answered_total = 0

    for i in range(n):
        v = votes[i, ]

        if threshold is not None and sigma1 is not None:
            logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
            rdp_step1_total += pate.compute_rdp_threshold(
                logq_step1, sigma1, orders)
        else:
            logq_step1 = 0.  # always answer

        pr_answered = np.exp(logq_step1)
        logq_step2 = pate.compute_logq_gaussian(v, sigma2)
        rdp_step2_total += pr_answered * pate.rdp_gaussian(
            logq_step2, sigma2, orders)

        answered_total += pr_answered

        rdp_ss = np.zeros(len(orders))
        ss_std = np.zeros(len(orders))

        for j, order in enumerate(orders):
            if not is_data_ind_step1[j]:
                ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
                    v, num_teachers, threshold, sigma1, order)
            else:
                ls_step1 = np.full(num_teachers, 0, dtype=float)

            if not is_data_ind_step2[j]:
                ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
                    v, num_teachers, sigma2, order)
            else:
                ls_step2 = np.full(num_teachers, 0, dtype=float)

            ls_total[j, ] += ls_step1 + pr_answered * ls_step2

            beta_ss = .49 / order

            ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j, ])
            sigma_ss = ((order * math.exp(2 * beta_ss)) / ss)**(1 / 3)
            rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                beta_ss, sigma_ss, order)
            ss_std[j] = ss * sigma_ss

        rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss

        answered[i] = answered_total
        _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
        order_idx = np.searchsorted(orders, order_opt[i])

        # Since optimal orders are always non-increasing, shrink orders array
        # and all cumulative arrays to speed up computation.
        if order_idx < len(orders):
            orders = orders[:order_idx + 1]
            rdp_step1_total = rdp_step1_total[:order_idx + 1]
            rdp_step2_total = rdp_step2_total[:order_idx + 1]

        eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
                                       step2=rdp_step2_total[order_idx],
                                       ss=rdp_ss[order_idx],
                                       delta=-math.log(delta) /
                                       (order_opt[i] - 1))
        ss_std_opt[i] = ss_std[order_idx]
        if i > 0 and (i + 1) % 1 == 0:
            print(
                'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
                'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
                'step2 = {:.3f}, ss = {:.3f}'.format(
                    i + 1, answered[i], sum(eps_partitioned[i]), ss_std_opt[i],
                    order_opt[i], eps_partitioned[i].delta,
                    eps_partitioned[i].step1, eps_partitioned[i].step2,
                    eps_partitioned[i].ss))
            sys.stdout.flush()

    return eps_partitioned, answered, ss_std_opt, order_opt
Beispiel #5
0
def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
  # Short list of orders.
  # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))

  # Long list of orders.
  orders = np.concatenate((np.arange(20, 40, .2),
                           np.arange(40, 75, .5),
                            np.logspace(np.log10(75), np.log10(200), num=20)))

  n = votes.shape[0]
  num_classes = votes.shape[1]
  num_teachers = int(sum(votes[0,]))

  if threshold is not None and sigma1 is not None:
    is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
        num_teachers, num_classes, sigma1, orders)
  else:
    is_data_ind_step1 = [True] * len(orders)

  is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
      num_teachers, num_classes, sigma2, orders)

  eps_partitioned = np.full(n, None, dtype=Partition)
  order_opt = np.full(n, None, dtype=float)
  ss_std_opt = np.full(n, None, dtype=float)
  answered = np.zeros(n)

  rdp_step1_total = np.zeros(len(orders))
  rdp_step2_total = np.zeros(len(orders))

  ls_total = np.zeros((len(orders), num_teachers))
  answered_total = 0

  for i in range(n):
    v = votes[i,]

    if threshold is not None and sigma1 is not None:
      logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
      rdp_step1_total += pate.compute_rdp_threshold(logq_step1, sigma1, orders)
    else:
      logq_step1 = 0.  # always answer

    pr_answered = np.exp(logq_step1)
    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
    rdp_step2_total += pr_answered * pate.rdp_gaussian(logq_step2, sigma2,
                                                       orders)

    answered_total += pr_answered

    rdp_ss = np.zeros(len(orders))
    ss_std = np.zeros(len(orders))

    for j, order in enumerate(orders):
      if not is_data_ind_step1[j]:
        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(v,
            num_teachers, threshold, sigma1, order)
      else:
        ls_step1 = np.full(num_teachers, 0, dtype=float)

      if not is_data_ind_step2[j]:
        ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
            v, num_teachers, sigma2, order)
      else:
        ls_step2 = np.full(num_teachers, 0, dtype=float)

      ls_total[j,] += ls_step1 + pr_answered * ls_step2

      beta_ss = .49 / order

      ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j,])
      sigma_ss = ((order * math.exp(2 * beta_ss)) / ss) ** (1 / 3)
      rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
          beta_ss, sigma_ss, order)
      ss_std[j] = ss * sigma_ss

    rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss

    answered[i] = answered_total
    _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
    order_idx = np.searchsorted(orders, order_opt[i])

    # Since optimal orders are always non-increasing, shrink orders array
    # and all cumulative arrays to speed up computation.
    if order_idx < len(orders):
      orders = orders[:order_idx + 1]
      rdp_step1_total = rdp_step1_total[:order_idx + 1]
      rdp_step2_total = rdp_step2_total[:order_idx + 1]

    eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
                                   step2=rdp_step2_total[order_idx],
                                   ss=rdp_ss[order_idx],
                                   delta=-math.log(delta) / (order_opt[i] - 1))
    ss_std_opt[i] = ss_std[order_idx]
    if i > 0 and (i + 1) % 1 == 0:
      print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
            'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
            'step2 = {:.3f}, ss = {:.3f}'.format(
          i + 1,
          answered[i],
          sum(eps_partitioned[i]),
          ss_std_opt[i],
          order_opt[i],
          eps_partitioned[i].delta,
          eps_partitioned[i].step1,
          eps_partitioned[i].step2,
          eps_partitioned[i].ss))
      sys.stdout.flush()

  return eps_partitioned, answered, ss_std_opt, order_opt
Beispiel #6
0
def _find_optimal_smooth_sensitivity_parameters(votes, baseline, num_teachers,
                                                threshold, sigma1, sigma2,
                                                delta, ind_step1, ind_step2,
                                                order):
    """Optimizes smooth sensitivity parameters by minimizing a cost function.

  The cost function is
        exact_eps + cost of GNSS + two stds of noise,
  which captures that upper bound of the confidence interval of the sanitized
  privacy budget.

  Since optimization is done with full view of sensitive data, the results
  cannot be released.
  """
    rdp_cum = 0
    answered_cum = 0
    ls_cum = 0

    # Define a plausible range for the beta values.
    betas = np.arange(.3 / order, .495 / order, .01 / order)
    cost_delta = math.log(1 / delta) / (order - 1)

    for i, v in enumerate(votes):
        if threshold is None:
            log_pr_answered = 0
            rdp1 = 0
            ls_step1 = np.zeros(num_teachers)
        else:
            log_pr_answered = pate.compute_logpr_answered(
                threshold, sigma1, v - baseline[i, ])
            if ind_step1:  # apply data-independent bound for step 1 (thresholding).
                rdp1 = pate.compute_rdp_data_independent_threshold(
                    sigma1, order)
                ls_step1 = np.zeros(num_teachers)
            else:
                rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1,
                                                  order)
                ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
                    v - baseline[i, ], num_teachers, threshold, sigma1, order)

        pr_answered = math.exp(log_pr_answered)
        answered_cum += pr_answered

        if ind_step2:  # apply data-independent bound for step 2 (GNMax).
            rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
            ls_step2 = np.zeros(num_teachers)
        else:
            logq_step2 = pate.compute_logq_gaussian(v, sigma2)
            rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
            # Compute smooth sensitivity.
            ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
                v, num_teachers, sigma2, order)

        rdp_cum += rdp1 + pr_answered * rdp2
        ls_cum += ls_step1 + pr_answered * ls_step2  # Expected local sensitivity.

        if ind_step1 and ind_step2:
            # Data-independent bounds.
            cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
        else:
            # Data-dependent bounds.
            cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None

            for beta in betas:
                ss = pate_ss.compute_discounted_max(beta, ls_cum)

                # Solution to the minimization problem:
                #   min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
                sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
                cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                    beta, sigma_ss, order)

                # Cost captures exact_eps + cost of releasing SS + two stds of noise.
                cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
                if cost < cost_opt:
                    cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss

        if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
            eps_before_ss = rdp_cum + cost_delta
            eps_with_ss = (eps_before_ss +
                           pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                               beta_opt, sigma_ss_opt, order))
            print(
                '{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
                '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'
                .format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
                        ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
            sys.stdout.flush()

    # Return optimal parameters for the last iteration.
    return beta_opt, ss_opt, sigma_ss_opt
def _find_optimal_smooth_sensitivity_parameters(
    votes, baseline, num_teachers, threshold, sigma1, sigma2, delta, ind_step1,
    ind_step2, order):
  """Optimizes smooth sensitivity parameters by minimizing a cost function.

  The cost function is
        exact_eps + cost of GNSS + two stds of noise,
  which captures that upper bound of the confidence interval of the sanitized
  privacy budget.

  Since optimization is done with full view of sensitive data, the results
  cannot be released.
  """
  rdp_cum = 0
  answered_cum = 0
  ls_cum = 0

  # Define a plausible range for the beta values.
  betas = np.arange(.3 / order, .495 / order, .01 / order)
  cost_delta = math.log(1 / delta) / (order - 1)

  for i, v in enumerate(votes):
    if threshold is None:
      log_pr_answered = 0
      rdp1 = 0
      ls_step1 = np.zeros(num_teachers)
    else:
      log_pr_answered = pate.compute_logpr_answered(threshold, sigma1,
                                                    v - baseline[i,])
      if ind_step1:  # apply data-independent bound for step 1 (thresholding).
        rdp1 = pate.compute_rdp_data_independent_threshold(sigma1, order)
        ls_step1 = np.zeros(num_teachers)
      else:
        rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1, order)
        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
            v - baseline[i,], num_teachers, threshold, sigma1, order)

    pr_answered = math.exp(log_pr_answered)
    answered_cum += pr_answered

    if ind_step2:  # apply data-independent bound for step 2 (GNMax).
      rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
      ls_step2 = np.zeros(num_teachers)
    else:
      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
      rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
      # Compute smooth sensitivity.
      ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
          v, num_teachers, sigma2, order)

    rdp_cum += rdp1 + pr_answered * rdp2
    ls_cum += ls_step1 + pr_answered * ls_step2  # Expected local sensitivity.

    if ind_step1 and ind_step2:
      # Data-independent bounds.
      cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
    else:
      # Data-dependent bounds.
      cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None

      for beta in betas:
        ss = pate_ss.compute_discounted_max(beta, ls_cum)

        # Solution to the minimization problem:
        #   min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
        sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
        cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
            beta, sigma_ss, order)

        # Cost captures exact_eps + cost of releasing SS + two stds of noise.
        cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
        if cost < cost_opt:
          cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss

    if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
      eps_before_ss = rdp_cum + cost_delta
      eps_with_ss = (
          eps_before_ss + pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
              beta_opt, sigma_ss_opt, order))
      print('{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
            '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'.
            format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
                   ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
      sys.stdout.flush()

  # Return optimal parameters for the last iteration.
  return beta_opt, ss_opt, sigma_ss_opt