Python compute_logq_gaussian Exemples, core.compute_logq_gaussian Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : plots_for_slides.py Projet : 812864539/models

def plot_rdp_curve_per_example(votes, sigmas):
  orders = np.linspace(1., 100., endpoint=True, num=1000)
  orders[0] = 1.001
  fig, ax = setup_plot()

  for i in range(votes.shape[0]):
    for sigma in sigmas:
      logq = pate.compute_logq_gaussian(votes[i,], sigma)
      rdp = pate.rdp_gaussian(logq, sigma, orders)
      ax.plot(
          orders,
          rdp,
          alpha=1.,
          label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)),
          linewidth=5)

  for sigma in sigmas:
    ax.plot(
        orders,
        pate.rdp_data_independent_gaussian(sigma, orders),
        alpha=.3,
        label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
        linewidth=10)

  plt.xlim(xmin=1, xmax=100)
  plt.ylim(ymin=0)
  plt.xticks([1, 20, 40, 60, 80, 100])
  plt.yticks([0, .0025, .005, .0075, .01])
  plt.xlabel(r'Order $\alpha$', fontsize=16)
  plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
  ax.tick_params(labelsize=14)

  plt.legend(loc=0, fontsize=13)
  plt.show()

Exemple #2

0

Afficher le fichier

def plot_rdp_curve_per_example(votes, sigmas):
  orders = np.linspace(1., 100., endpoint=True, num=1000)
  orders[0] = 1.001
  fig, ax = setup_plot()

  for i in range(votes.shape[0]):
    for sigma in sigmas:
      logq = pate.compute_logq_gaussian(votes[i,], sigma)
      rdp = pate.rdp_gaussian(logq, sigma, orders)
      ax.plot(
          orders,
          rdp,
          alpha=1.,
          label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)),
          linewidth=5)

  for sigma in sigmas:
    ax.plot(
        orders,
        pate.rdp_data_independent_gaussian(sigma, orders),
        alpha=.3,
        label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
        linewidth=10)

  plt.xlim(xmin=1, xmax=100)
  plt.ylim(ymin=0)
  plt.xticks([1, 20, 40, 60, 80, 100])
  plt.yticks([0, .0025, .005, .0075, .01])
  plt.xlabel(r'Order $\alpha$', fontsize=16)
  plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
  ax.tick_params(labelsize=14)

  plt.legend(loc=0, fontsize=13)
  plt.show()

Exemple #3

0

Afficher le fichier

def compute_privacy_cost_per_bins(bin_num, votes, sigma2, order):
  """Outputs average privacy cost per bin.

  Args:
    bin_num: Number of bins.
    votes: A matrix of votes, where each row contains votes in one instance.
    sigma2: The scale (std) of the Gaussian noise. (Same as sigma_2 in
            Algorithms 1 and 2.)
    order: The Renyi order for which privacy cost is computed.

  Returns:
    Expected eps of RDP (ignoring delta) per example in each bin.
  """
  n = votes.shape[0]

  bin_counts = np.zeros(bin_num)
  bin_rdp = np.zeros(bin_num)  # RDP at order=order

  for i in xrange(n):
    v = votes[i,]
    logq = pate.compute_logq_gaussian(v, sigma2)
    rdp_at_order = pate.rdp_gaussian(logq, sigma2, order)

    bin_idx = int(math.floor(max(v) * bin_num / sum(v)))
    assert 0 <= bin_idx < bin_num
    bin_counts[bin_idx] += 1
    bin_rdp[bin_idx] += rdp_at_order
    if (i + 1) % 1000 == 0:
      print('example {}'.format(i + 1))
      sys.stdout.flush()

  return bin_rdp / bin_counts

Exemple #4

0

Afficher le fichier

Fichier : plots_for_slides.py Projet : 812864539/models

def scatter_plot(votes, threshold, sigma1, sigma2, order):
  fig, ax = setup_plot()
  x = []
  y = []
  for i, v in enumerate(votes):
    if threshold is not None and sigma1 is not None:
      q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
    else:
      q_step1 = 1.
    if random.random() < q_step1:
      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
      x.append(max(v))
      y.append(pate.rdp_gaussian(logq_step2, sigma2, order))

  print('Selected {} queries.'.format(len(x)))
  # Plot the data-independent curve:
  # data_ind = pate.rdp_data_independent_gaussian(sigma, order)
  # plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2)
  ax.set_yscale('log')
  plt.xlim(xmin=0, xmax=5000)
  plt.ylim(ymin=1e-300, ymax=1)
  plt.yticks([1, 1e-100, 1e-200, 1e-300])
  plt.scatter(x, y, s=1, alpha=0.5)
  plt.ylabel(r'RDP at $\alpha={}$'.format(order), fontsize=16)
  plt.xlabel(r'max count', fontsize=16)
  ax.tick_params(labelsize=14)
  plt.show()

Exemple #5

0

Afficher le fichier

def scatter_plot(votes, threshold, sigma1, sigma2, order):
  fig, ax = setup_plot()
  x = []
  y = []
  for i, v in enumerate(votes):
    if threshold is not None and sigma1 is not None:
      q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
    else:
      q_step1 = 1.
    if random.random() < q_step1:
      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
      x.append(max(v))
      y.append(pate.rdp_gaussian(logq_step2, sigma2, order))

  print('Selected {} queries.'.format(len(x)))
  # Plot the data-independent curve:
  # data_ind = pate.rdp_data_independent_gaussian(sigma, order)
  # plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2)
  ax.set_yscale('log')
  plt.xlim(xmin=0, xmax=5000)
  plt.ylim(ymin=1e-300, ymax=1)
  plt.yticks([1, 1e-100, 1e-200, 1e-300])
  plt.scatter(x, y, s=1, alpha=0.5)
  plt.ylabel(r'RDP at $\alpha={}$'.format(order), fontsize=16)
  plt.xlabel(r'max count', fontsize=16)
  ax.tick_params(labelsize=14)
  plt.show()

Exemple #6

0

Afficher le fichier

def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders,
                 data_ind):
    """Computes the (data-dependent) RDP curve for Confident GNMax."""
    rdp_cum = np.zeros(len(orders))
    rdp_sqrd_cum = np.zeros(len(orders))
    answered = 0

    for i, v in enumerate(votes):
        if threshold is None:
            logq_step1 = 0  # No thresholding, always proceed to step 2.
            rdp_step1 = np.zeros(len(orders))
        else:
            logq_step1 = pate.compute_logpr_answered(threshold, sigma1,
                                                     v - baseline[i, ])
            if data_ind:
                rdp_step1 = pate.compute_rdp_data_independent_threshold(
                    sigma1, orders)
            else:
                rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1,
                                                       orders)

        if data_ind:
            rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders)
        else:
            logq_step2 = pate.compute_logq_gaussian(v, sigma2)
            rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders)

        q_step1 = np.exp(logq_step1)
        rdp = rdp_step1 + rdp_step2 * q_step1
        # The expression below evaluates
        #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
        rdp_sqrd = (rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 +
                    q_step1 * rdp_step2**2)
        rdp_sqrd_cum += rdp_sqrd

        rdp_cum += rdp
        answered += q_step1
        if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1):
            rdp_var = rdp_sqrd_cum / i - (rdp_cum /
                                          i)**2  # Ignore Bessel's correction.
            eps_total, order_opt = pate.compute_eps_from_delta(
                orders, rdp_cum, delta)
            order_opt_idx = np.searchsorted(orders, order_opt)
            eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
            print(
                'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
                'at order = {:.2f} (contribution from delta = {:.3f})'.format(
                    i + 1, answered, eps_total, eps_std, order_opt,
                    -math.log(delta) / (order_opt - 1)))
            sys.stdout.flush()

        _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)

    return order_opt

Exemple #7

0

Afficher le fichier

Fichier : smooth_sensitivity_table.py Projet : 812864539/models

def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders,
                 data_ind):
  """Computes the (data-dependent) RDP curve for Confident GNMax."""
  rdp_cum = np.zeros(len(orders))
  rdp_sqrd_cum = np.zeros(len(orders))
  answered = 0

  for i, v in enumerate(votes):
    if threshold is None:
      logq_step1 = 0  # No thresholding, always proceed to step 2.
      rdp_step1 = np.zeros(len(orders))
    else:
      logq_step1 = pate.compute_logpr_answered(threshold, sigma1,
                                               v - baseline[i,])
      if data_ind:
        rdp_step1 = pate.compute_rdp_data_independent_threshold(sigma1, orders)
      else:
        rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1, orders)

    if data_ind:
      rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders)
    else:
      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
      rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders)

    q_step1 = np.exp(logq_step1)
    rdp = rdp_step1 + rdp_step2 * q_step1
    # The expression below evaluates
    #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
    rdp_sqrd = (
        rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 +
        q_step1 * rdp_step2**2)
    rdp_sqrd_cum += rdp_sqrd

    rdp_cum += rdp
    answered += q_step1
    if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1):
      rdp_var = rdp_sqrd_cum / i - (
          rdp_cum / i)**2  # Ignore Bessel's correction.
      eps_total, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)
      order_opt_idx = np.searchsorted(orders, order_opt)
      eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
      print(
          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
              i + 1, answered, eps_total, eps_std, order_opt,
              -math.log(delta) / (order_opt - 1)))
      sys.stdout.flush()

    _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)

  return order_opt

Exemple #8

0

Afficher le fichier

def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
    target_answered):
  rdp_cum = np.zeros(len(orders))
  answered = 0
  for i, v in enumerate(votes):
    v = sorted(v, reverse=True)
    q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
    rdp = pate.rdp_gaussian(logq_step2, sigma2, orders)
    rdp_cum += q_step1 * rdp

    answered += q_step1
    if answered >= target_answered:
      print('Processed {} queries to answer {}.'.format(i, target_answered))
      return rdp_cum

  assert False, 'Never reached {} answered queries.'.format(target_answered)

Exemple #9

0

Afficher le fichier

Fichier : plots_for_slides.py Projet : 812864539/models

def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
    target_answered):
  rdp_cum = np.zeros(len(orders))
  answered = 0
  for i, v in enumerate(votes):
    v = sorted(v, reverse=True)
    q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
    rdp = pate.rdp_gaussian(logq_step2, sigma2, orders)
    rdp_cum += q_step1 * rdp

    answered += q_step1
    if answered >= target_answered:
      print('Processed {} queries to answer {}.'.format(i, target_answered))
      return rdp_cum

  assert False, 'Never reached {} answered queries.'.format(target_answered)

Exemple #10

0

Afficher le fichier

Fichier : plots_for_slides.py Projet : 812864539/models

def plot_rdp_of_sigma(v, order):
  sigmas = np.linspace(1., 1000., endpoint=True, num=1000)
  fig, ax = setup_plot()

  y = np.zeros(len(sigmas))

  for i, sigma in enumerate(sigmas):
    logq = pate.compute_logq_gaussian(v, sigma)
    y[i] = pate.rdp_gaussian(logq, sigma, order)

  ax.plot(sigmas, y, alpha=.8, linewidth=5)

  plt.xlim(xmin=1, xmax=1000)
  plt.ylim(ymin=0)
  # plt.yticks([0, .0004, .0008, .0012])
  ax.tick_params(labelleft='off')
  plt.xlabel(r'Noise $\sigma$', fontsize=16)
  plt.ylabel(r'RDP at order $\alpha={}$'.format(order), fontsize=16)
  ax.tick_params(labelsize=14)

  # plt.legend(loc=0, fontsize=13)
  plt.show()

Exemple #11

0

Afficher le fichier

Fichier : rdp_flow.py Projet : fangliang11/tensorflow

def plot_rdp_curve_per_example(votes, sigmas):
    orders = np.linspace(1., 100., endpoint=True, num=1000)
    orders[0] = 1.5

    fig, ax = plt.subplots()
    fig.set_figheight(4.5)
    fig.set_figwidth(4.7)

    styles = [':', '-']
    labels = ['ex1', 'ex2']

    for i in xrange(votes.shape[0]):
        print(sorted(votes[i, ], reverse=True)[:10])
        for sigma in sigmas:
            logq = pate.compute_logq_gaussian(votes[i, ], sigma)
            rdp = pate.rdp_gaussian(logq, sigma, orders)
            ax.plot(orders,
                    rdp,
                    label=r'{} $\sigma$={}'.format(labels[i], int(sigma)),
                    linestyle=styles[i],
                    linewidth=5)

    for sigma in sigmas:
        ax.plot(orders,
                pate.rdp_data_independent_gaussian(sigma, orders),
                alpha=.3,
                label=r'Data-ind bound $\sigma$={}'.format(int(sigma)),
                linewidth=10)

    plt.yticks([0, .01])
    plt.xlabel(r'Order $\lambda$', fontsize=16)
    plt.ylabel(r'RDP value $\varepsilon$ at $\lambda$', fontsize=16)
    ax.tick_params(labelsize=14)

    fout_name = os.path.join(FLAGS.figures_dir, 'rdp_flow1.pdf')
    print('Saving the graph to ' + fout_name)
    fig.savefig(fout_name, bbox_inches='tight')
    plt.legend(loc=0, fontsize=13)
    plt.show()

Exemple #12

0

Afficher le fichier

def plot_rdp_of_sigma(v, order):
  sigmas = np.linspace(1., 1000., endpoint=True, num=1000)
  fig, ax = setup_plot()

  y = np.zeros(len(sigmas))

  for i, sigma in enumerate(sigmas):
    logq = pate.compute_logq_gaussian(v, sigma)
    y[i] = pate.rdp_gaussian(logq, sigma, order)

  ax.plot(sigmas, y, alpha=.8, linewidth=5)

  plt.xlim(xmin=1, xmax=1000)
  plt.ylim(ymin=0)
  # plt.yticks([0, .0004, .0008, .0012])
  ax.tick_params(labelleft='off')
  plt.xlabel(r'Noise $\sigma$', fontsize=16)
  plt.ylabel(r'RDP at order $\alpha={}$'.format(order), fontsize=16)
  ax.tick_params(labelsize=14)

  # plt.legend(loc=0, fontsize=13)
  plt.show()

Exemple #13

0

Afficher le fichier

Fichier : smooth_sensitivity.py Projet : BTM520/MusicObjectDetector-TF

def compute_local_sensitivity_bounds_gnmax(votes, num_teachers, sigma, order):
    """Computes a list of max-LS-at-distance-d for the GNMax mechanism.

  A more efficient implementation of Algorithms 4 and 5 working in time
  O(teachers*classes). A naive implementation is O(teachers^2*classes) or worse.

  Args:
    votes: A numpy array of votes.
    num_teachers: Total number of voting teachers.
    sigma: Standard deviation of the Guassian noise.
    order: The Renyi order.

  Returns:
    A numpy array of local sensitivities at distances d, 0 <= d <= num_teachers.
  """

    num_classes = len(votes)  # Called m in the paper.

    logq0 = _compute_logq0(sigma, order)
    logq1 = _compute_logq1(sigma, order, num_classes)
    logq = pate.compute_logq_gaussian(votes, sigma)
    plateau = _compute_local_sens_gnmax(logq1, sigma, num_classes, order)

    res = np.full(num_teachers, plateau)

    if logq1 <= logq <= logq0:
        return res

    # Invariant: votes is sorted in the non-increasing order.
    votes = sorted(votes, reverse=True)

    res[0] = _compute_local_sens_gnmax(logq, sigma, num_classes, order)
    curr_d = 0

    go_left = logq > logq0  # Otherwise logq < logq1 and we go right.

    # Iterate while the following is true:
    #    1. If we are going left, logq is still larger than logq0 and we may still
    #       increase the gap between votes[0] and votes[1].
    #    2. If we are going right, logq is still smaller than logq1.
    while ((go_left and logq > logq0 and votes[1] > 0)
           or (not go_left and logq < logq1)):
        curr_d += 1
        if go_left:  # Try decreasing logq.
            votes[0] += 1
            votes[1] -= 1
            idx = 1
            # Restore the invariant. (Can be implemented more efficiently by keeping
            # track of the range of indices equal to votes[1]. Does not seem to matter
            # for the overall running time.)
            while idx < len(votes) - 1 and votes[idx] < votes[idx + 1]:
                votes[idx], votes[idx + 1] = votes[idx + 1], votes[idx]
                idx += 1
        else:  # Go right, i.e., try increasing logq.
            votes[0] -= 1
            votes[1] += 1  # The invariant holds since otherwise logq >= logq1.

        logq = pate.compute_logq_gaussian(votes, sigma)
        res[curr_d] = _compute_local_sens_gnmax(logq, sigma, num_classes,
                                                order)

    return res

Exemple #14

0

Afficher le fichier

Fichier : smooth_sensitivity.py Projet : codeinpeace/models

def compute_local_sensitivity_bounds_gnmax(votes, num_teachers, sigma, order):
  """Computes a list of max-LS-at-distance-d for the GNMax mechanism.

  A more efficient implementation of Algorithms 4 and 5 working in time
  O(teachers*classes). A naive implementation is O(teachers^2*classes) or worse.

  Args:
    votes: A numpy array of votes.
    num_teachers: Total number of voting teachers.
    sigma: Standard deviation of the Guassian noise.
    order: The Renyi order.

  Returns:
    A numpy array of local sensitivities at distances d, 0 <= d <= num_teachers.
  """

  num_classes = len(votes)  # Called m in the paper.

  logq0 = _compute_logq0(sigma, order)
  logq1 = _compute_logq1(sigma, order, num_classes)
  logq = pate.compute_logq_gaussian(votes, sigma)
  plateau = _compute_local_sens_gnmax(logq1, sigma, num_classes, order)

  res = np.full(num_teachers, plateau)

  if logq1 <= logq <= logq0:
    return res

  # Invariant: votes is sorted in the non-increasing order.
  votes = sorted(votes, reverse=True)

  res[0] = _compute_local_sens_gnmax(logq, sigma, num_classes, order)
  curr_d = 0

  go_left = logq > logq0  # Otherwise logq < logq1 and we go right.

  # Iterate while the following is true:
  #    1. If we are going left, logq is still larger than logq0 and we may still
  #       increase the gap between votes[0] and votes[1].
  #    2. If we are going right, logq is still smaller than logq1.
  while ((go_left and logq > logq0 and votes[1] > 0) or
         (not go_left and logq < logq1)):
    curr_d += 1
    if go_left:  # Try decreasing logq.
      votes[0] += 1
      votes[1] -= 1
      idx = 1
      # Restore the invariant. (Can be implemented more efficiently by keeping
      # track of the range of indices equal to votes[1]. Does not seem to matter
      # for the overall running time.)
      while idx < len(votes) - 1 and votes[idx] < votes[idx + 1]:
        votes[idx], votes[idx + 1] = votes[idx + 1], votes[idx]
        idx += 1
    else:  # Go right, i.e., try increasing logq.
      votes[0] -= 1
      votes[1] += 1  # The invariant holds since otherwise logq >= logq1.

    logq = pate.compute_logq_gaussian(votes, sigma)
    res[curr_d] = _compute_local_sens_gnmax(logq, sigma, num_classes, order)

  return res

Exemple #15

0

Afficher le fichier

def _find_optimal_smooth_sensitivity_parameters(votes, baseline, num_teachers,
                                                threshold, sigma1, sigma2,
                                                delta, ind_step1, ind_step2,
                                                order):
    """Optimizes smooth sensitivity parameters by minimizing a cost function.

  The cost function is
        exact_eps + cost of GNSS + two stds of noise,
  which captures that upper bound of the confidence interval of the sanitized
  privacy budget.

  Since optimization is done with full view of sensitive data, the results
  cannot be released.
  """
    rdp_cum = 0
    answered_cum = 0
    ls_cum = 0

    # Define a plausible range for the beta values.
    betas = np.arange(.3 / order, .495 / order, .01 / order)
    cost_delta = math.log(1 / delta) / (order - 1)

    for i, v in enumerate(votes):
        if threshold is None:
            log_pr_answered = 0
            rdp1 = 0
            ls_step1 = np.zeros(num_teachers)
        else:
            log_pr_answered = pate.compute_logpr_answered(
                threshold, sigma1, v - baseline[i, ])
            if ind_step1:  # apply data-independent bound for step 1 (thresholding).
                rdp1 = pate.compute_rdp_data_independent_threshold(
                    sigma1, order)
                ls_step1 = np.zeros(num_teachers)
            else:
                rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1,
                                                  order)
                ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
                    v - baseline[i, ], num_teachers, threshold, sigma1, order)

        pr_answered = math.exp(log_pr_answered)
        answered_cum += pr_answered

        if ind_step2:  # apply data-independent bound for step 2 (GNMax).
            rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
            ls_step2 = np.zeros(num_teachers)
        else:
            logq_step2 = pate.compute_logq_gaussian(v, sigma2)
            rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
            # Compute smooth sensitivity.
            ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
                v, num_teachers, sigma2, order)

        rdp_cum += rdp1 + pr_answered * rdp2
        ls_cum += ls_step1 + pr_answered * ls_step2  # Expected local sensitivity.

        if ind_step1 and ind_step2:
            # Data-independent bounds.
            cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
        else:
            # Data-dependent bounds.
            cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None

            for beta in betas:
                ss = pate_ss.compute_discounted_max(beta, ls_cum)

                # Solution to the minimization problem:
                #   min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
                sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
                cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                    beta, sigma_ss, order)

                # Cost captures exact_eps + cost of releasing SS + two stds of noise.
                cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
                if cost < cost_opt:
                    cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss

        if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
            eps_before_ss = rdp_cum + cost_delta
            eps_with_ss = (eps_before_ss +
                           pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                               beta_opt, sigma_ss_opt, order))
            print(
                '{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
                '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'
                .format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
                        ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
            sys.stdout.flush()

    # Return optimal parameters for the last iteration.
    return beta_opt, ss_opt, sigma_ss_opt

Exemple #16

0

Afficher le fichier

Fichier : plot_partition.py Projet : 812864539/models

def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
  # Short list of orders.
  # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))

  # Long list of orders.
  orders = np.concatenate((np.arange(20, 40, .2),
                           np.arange(40, 75, .5),
                            np.logspace(np.log10(75), np.log10(200), num=20)))

  n = votes.shape[0]
  num_classes = votes.shape[1]
  num_teachers = int(sum(votes[0,]))

  if threshold is not None and sigma1 is not None:
    is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
        num_teachers, num_classes, sigma1, orders)
  else:
    is_data_ind_step1 = [True] * len(orders)

  is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
      num_teachers, num_classes, sigma2, orders)

  eps_partitioned = np.full(n, None, dtype=Partition)
  order_opt = np.full(n, None, dtype=float)
  ss_std_opt = np.full(n, None, dtype=float)
  answered = np.zeros(n)

  rdp_step1_total = np.zeros(len(orders))
  rdp_step2_total = np.zeros(len(orders))

  ls_total = np.zeros((len(orders), num_teachers))
  answered_total = 0

  for i in range(n):
    v = votes[i,]

    if threshold is not None and sigma1 is not None:
      logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
      rdp_step1_total += pate.compute_rdp_threshold(logq_step1, sigma1, orders)
    else:
      logq_step1 = 0.  # always answer

    pr_answered = np.exp(logq_step1)
    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
    rdp_step2_total += pr_answered * pate.rdp_gaussian(logq_step2, sigma2,
                                                       orders)

    answered_total += pr_answered

    rdp_ss = np.zeros(len(orders))
    ss_std = np.zeros(len(orders))

    for j, order in enumerate(orders):
      if not is_data_ind_step1[j]:
        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(v,
            num_teachers, threshold, sigma1, order)
      else:
        ls_step1 = np.full(num_teachers, 0, dtype=float)

      if not is_data_ind_step2[j]:
        ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
            v, num_teachers, sigma2, order)
      else:
        ls_step2 = np.full(num_teachers, 0, dtype=float)

      ls_total[j,] += ls_step1 + pr_answered * ls_step2

      beta_ss = .49 / order

      ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j,])
      sigma_ss = ((order * math.exp(2 * beta_ss)) / ss) ** (1 / 3)
      rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
          beta_ss, sigma_ss, order)
      ss_std[j] = ss * sigma_ss

    rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss

    answered[i] = answered_total
    _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
    order_idx = np.searchsorted(orders, order_opt[i])

    # Since optimal orders are always non-increasing, shrink orders array
    # and all cumulative arrays to speed up computation.
    if order_idx < len(orders):
      orders = orders[:order_idx + 1]
      rdp_step1_total = rdp_step1_total[:order_idx + 1]
      rdp_step2_total = rdp_step2_total[:order_idx + 1]

    eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
                                   step2=rdp_step2_total[order_idx],
                                   ss=rdp_ss[order_idx],
                                   delta=-math.log(delta) / (order_opt[i] - 1))
    ss_std_opt[i] = ss_std[order_idx]
    if i > 0 and (i + 1) % 1 == 0:
      print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
            'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
            'step2 = {:.3f}, ss = {:.3f}'.format(
          i + 1,
          answered[i],
          sum(eps_partitioned[i]),
          ss_std_opt[i],
          order_opt[i],
          eps_partitioned[i].delta,
          eps_partitioned[i].step1,
          eps_partitioned[i].step2,
          eps_partitioned[i].ss))
      sys.stdout.flush()

  return eps_partitioned, answered, ss_std_opt, order_opt

Exemple #17

0

Afficher le fichier

def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
    # Short list of orders.
    # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))

    # Long list of orders.
    orders = np.concatenate((np.arange(20, 40, .2), np.arange(40, 75, .5),
                             np.logspace(np.log10(75), np.log10(200), num=20)))

    n = votes.shape[0]
    num_classes = votes.shape[1]
    num_teachers = int(sum(votes[0, ]))

    if threshold is not None and sigma1 is not None:
        is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
            num_teachers, num_classes, sigma1, orders)
    else:
        is_data_ind_step1 = [True] * len(orders)

    is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
        num_teachers, num_classes, sigma2, orders)

    eps_partitioned = np.full(n, None, dtype=Partition)
    order_opt = np.full(n, None, dtype=float)
    ss_std_opt = np.full(n, None, dtype=float)
    answered = np.zeros(n)

    rdp_step1_total = np.zeros(len(orders))
    rdp_step2_total = np.zeros(len(orders))

    ls_total = np.zeros((len(orders), num_teachers))
    answered_total = 0

    for i in range(n):
        v = votes[i, ]

        if threshold is not None and sigma1 is not None:
            logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
            rdp_step1_total += pate.compute_rdp_threshold(
                logq_step1, sigma1, orders)
        else:
            logq_step1 = 0.  # always answer

        pr_answered = np.exp(logq_step1)
        logq_step2 = pate.compute_logq_gaussian(v, sigma2)
        rdp_step2_total += pr_answered * pate.rdp_gaussian(
            logq_step2, sigma2, orders)

        answered_total += pr_answered

        rdp_ss = np.zeros(len(orders))
        ss_std = np.zeros(len(orders))

        for j, order in enumerate(orders):
            if not is_data_ind_step1[j]:
                ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
                    v, num_teachers, threshold, sigma1, order)
            else:
                ls_step1 = np.full(num_teachers, 0, dtype=float)

            if not is_data_ind_step2[j]:
                ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
                    v, num_teachers, sigma2, order)
            else:
                ls_step2 = np.full(num_teachers, 0, dtype=float)

            ls_total[j, ] += ls_step1 + pr_answered * ls_step2

            beta_ss = .49 / order

            ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j, ])
            sigma_ss = ((order * math.exp(2 * beta_ss)) / ss)**(1 / 3)
            rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
                beta_ss, sigma_ss, order)
            ss_std[j] = ss * sigma_ss

        rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss

        answered[i] = answered_total
        _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
        order_idx = np.searchsorted(orders, order_opt[i])

        # Since optimal orders are always non-increasing, shrink orders array
        # and all cumulative arrays to speed up computation.
        if order_idx < len(orders):
            orders = orders[:order_idx + 1]
            rdp_step1_total = rdp_step1_total[:order_idx + 1]
            rdp_step2_total = rdp_step2_total[:order_idx + 1]

        eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
                                       step2=rdp_step2_total[order_idx],
                                       ss=rdp_ss[order_idx],
                                       delta=-math.log(delta) /
                                       (order_opt[i] - 1))
        ss_std_opt[i] = ss_std[order_idx]
        if i > 0 and (i + 1) % 1 == 0:
            print(
                'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
                'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
                'step2 = {:.3f}, ss = {:.3f}'.format(
                    i + 1, answered[i], sum(eps_partitioned[i]), ss_std_opt[i],
                    order_opt[i], eps_partitioned[i].delta,
                    eps_partitioned[i].step1, eps_partitioned[i].step2,
                    eps_partitioned[i].ss))
            sys.stdout.flush()

    return eps_partitioned, answered, ss_std_opt, order_opt

Exemple #18

0

Afficher le fichier

def run_analysis(votes, mechanism, noise_scale, params):
  """Computes data-dependent privacy.

  Args:
    votes: A matrix of votes, where each row contains votes in one instance.
    mechanism: A name of the mechanism ('lnmax', 'gnmax', or 'gnmax_conf')
    noise_scale: A mechanism privacy parameter.
    params: Other privacy parameters.

  Returns:
    Four lists: cumulative privacy cost epsilon, how privacy budget is split,
    how many queries were answered, optimal order.
  """

  def compute_partition(order_opt, eps):
    order_opt_idx = np.searchsorted(orders, order_opt)
    if mechanism == 'gnmax_conf':
      p = (rdp_select_cum[order_opt_idx],
           rdp_cum[order_opt_idx] - rdp_select_cum[order_opt_idx],
           -math.log(delta) / (order_opt - 1))
    else:
      p = (rdp_cum[order_opt_idx], -math.log(delta) / (order_opt - 1))
    return [x / eps for x in p]  # Ensures that sum(x) == 1

  # Short list of orders.
  # orders = np.round(np.concatenate((np.arange(2, 50 + 1, 1),
  #                   np.logspace(np.log10(50), np.log10(1000), num=20))))

  # Long list of orders.
  orders = np.concatenate((np.arange(2, 100 + 1, .5),
                           np.logspace(np.log10(100), np.log10(500), num=100)))
  delta = 1e-8

  n = votes.shape[0]
  eps_total = np.zeros(n)
  partition = [None] * n
  order_opt = np.full(n, np.nan, dtype=float)
  answered = np.zeros(n, dtype=float)

  rdp_cum = np.zeros(len(orders))
  rdp_sqrd_cum = np.zeros(len(orders))
  rdp_select_cum = np.zeros(len(orders))
  answered_sum = 0

  for i in range(n):
    v = votes[i,]
    if mechanism == 'lnmax':
      logq_lnmax = pate.compute_logq_laplace(v, noise_scale)
      rdp_query = pate.rdp_pure_eps(logq_lnmax, 2. / noise_scale, orders)
      rdp_sqrd = rdp_query ** 2
      pr_answered = 1
    elif mechanism == 'gnmax':
      logq_gmax = pate.compute_logq_gaussian(v, noise_scale)
      rdp_query = pate.rdp_gaussian(logq_gmax, noise_scale, orders)
      rdp_sqrd = rdp_query ** 2
      pr_answered = 1
    elif mechanism == 'gnmax_conf':
      logq_step1 = pate.compute_logpr_answered(params['t'], params['sigma1'], v)
      logq_step2 = pate.compute_logq_gaussian(v, noise_scale)
      q_step1 = np.exp(logq_step1)
      logq_step1_min = min(logq_step1, math.log1p(-q_step1))
      rdp_gnmax_step1 = pate.rdp_gaussian(logq_step1_min,
                                          2 ** .5 * params['sigma1'], orders)
      rdp_gnmax_step2 = pate.rdp_gaussian(logq_step2, noise_scale, orders)
      rdp_query = rdp_gnmax_step1 + q_step1 * rdp_gnmax_step2
      # The expression below evaluates
      #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
      rdp_sqrd = (
          rdp_gnmax_step1 ** 2 + 2 * rdp_gnmax_step1 * q_step1 * rdp_gnmax_step2
          + q_step1 * rdp_gnmax_step2 ** 2)
      rdp_select_cum += rdp_gnmax_step1
      pr_answered = q_step1
    else:
      raise ValueError(
          'Mechanism must be one of ["lnmax", "gnmax", "gnmax_conf"]')

    rdp_cum += rdp_query
    rdp_sqrd_cum += rdp_sqrd
    answered_sum += pr_answered

    answered[i] = answered_sum
    eps_total[i], order_opt[i] = pate.compute_eps_from_delta(
        orders, rdp_cum, delta)
    partition[i] = compute_partition(order_opt[i], eps_total[i])

    if i > 0 and (i + 1) % 1000 == 0:
      rdp_var = rdp_sqrd_cum / i - (
          rdp_cum / i) ** 2  # Ignore Bessel's correction.
      order_opt_idx = np.searchsorted(orders, order_opt[i])
      eps_std = ((i + 1) * rdp_var[order_opt_idx]) ** .5  # Std of the sum.
      print(
          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
              i + 1, answered_sum, eps_total[i], eps_std, order_opt[i],
              -math.log(delta) / (order_opt[i] - 1)))
      sys.stdout.flush()

  return eps_total, partition, answered, order_opt

Exemple #19

0

Afficher le fichier

Fichier : smooth_sensitivity_table.py Projet : 812864539/models

def _find_optimal_smooth_sensitivity_parameters(
    votes, baseline, num_teachers, threshold, sigma1, sigma2, delta, ind_step1,
    ind_step2, order):
  """Optimizes smooth sensitivity parameters by minimizing a cost function.

  The cost function is
        exact_eps + cost of GNSS + two stds of noise,
  which captures that upper bound of the confidence interval of the sanitized
  privacy budget.

  Since optimization is done with full view of sensitive data, the results
  cannot be released.
  """
  rdp_cum = 0
  answered_cum = 0
  ls_cum = 0

  # Define a plausible range for the beta values.
  betas = np.arange(.3 / order, .495 / order, .01 / order)
  cost_delta = math.log(1 / delta) / (order - 1)

  for i, v in enumerate(votes):
    if threshold is None:
      log_pr_answered = 0
      rdp1 = 0
      ls_step1 = np.zeros(num_teachers)
    else:
      log_pr_answered = pate.compute_logpr_answered(threshold, sigma1,
                                                    v - baseline[i,])
      if ind_step1:  # apply data-independent bound for step 1 (thresholding).
        rdp1 = pate.compute_rdp_data_independent_threshold(sigma1, order)
        ls_step1 = np.zeros(num_teachers)
      else:
        rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1, order)
        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
            v - baseline[i,], num_teachers, threshold, sigma1, order)

    pr_answered = math.exp(log_pr_answered)
    answered_cum += pr_answered

    if ind_step2:  # apply data-independent bound for step 2 (GNMax).
      rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
      ls_step2 = np.zeros(num_teachers)
    else:
      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
      rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
      # Compute smooth sensitivity.
      ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
          v, num_teachers, sigma2, order)

    rdp_cum += rdp1 + pr_answered * rdp2
    ls_cum += ls_step1 + pr_answered * ls_step2  # Expected local sensitivity.

    if ind_step1 and ind_step2:
      # Data-independent bounds.
      cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
    else:
      # Data-dependent bounds.
      cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None

      for beta in betas:
        ss = pate_ss.compute_discounted_max(beta, ls_cum)

        # Solution to the minimization problem:
        #   min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
        sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
        cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
            beta, sigma_ss, order)

        # Cost captures exact_eps + cost of releasing SS + two stds of noise.
        cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
        if cost < cost_opt:
          cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss

    if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
      eps_before_ss = rdp_cum + cost_delta
      eps_with_ss = (
          eps_before_ss + pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
              beta_opt, sigma_ss_opt, order))
      print('{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
            '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'.
            format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
                   ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
      sys.stdout.flush()

  # Return optimal parameters for the last iteration.
  return beta_opt, ss_opt, sigma_ss_opt