Beispiel #1
0
def csmooth_tune_t_experiment(eps, num_samples, num_trials,
                              num_quantiles_range, data_low, data_high,
                              log_t_low, log_t_high, num_t):
    """Returns 2-D array of ts, tuned for each (num_quantiles, quantile) pair.

  Args:
    eps: Privacy parameter epsilon.
    num_samples: Number of standard Gaussian samples to draw for each trial.
    num_trials: Number of trials to average.
    num_quantiles_range: Array of number of quantiles to estimate.
    data_low: Lower bound for data, used by CSmooth.
    data_high: Upper bound for data, used by CSmooth.
    log_t_low: Tuning range for t has lower bound 10^(log_t_low).
    log_t_high: Tuning range for t has upper bound 10^(log_t_high).
    num_t: Number of logarithmically spaced t used to populate tuning range.
  """
    ts = [np.zeros(num_quantiles) for num_quantiles in num_quantiles_range]
    num_quantiles_idx = 0
    for num_quantiles_idx in range(len(num_quantiles_range)):
        num_quantiles = num_quantiles_range[num_quantiles_idx]
        divided_eps = eps / np.sqrt(num_quantiles)
        for _ in range(num_trials):
            sorted_data = base.gen_gaussian(num_samples, 0, 1)
            qs = np.linspace(0, 1, num_quantiles + 2)[1:-1]
            ts[num_quantiles_idx] += csmooth_tune_and_return_ts(
                sorted_data, data_low, data_high, qs, divided_eps, log_t_low,
                log_t_high, num_t) / num_trials
        print("Finished num_quantiles: {}".format(num_quantiles))
    return ts
Beispiel #2
0
def comparison(methods, error_func, fixed_data, distribution, num_samples,
               data_low, data_high, num_trials, qs, eps, delta, swap, ts):
  """Helper function to run the trials set up by synthetic/real_comparison.

  Args:
    methods: Array of private quantiles algorithms to test.
    error_func: Function for computing quantile estimation error.
    fixed_data: In the case of real data, an array of data to subsample in each
      trial. In the case of synthetic data, an empty array.
    distribution: In the case of real data, an empty string. In the case of
      synthetic data, either "gaussian" or "uniform".
    num_samples: Number of samples to use in each trial.
    data_low: Lower bound for data, used by private quantiles algorithms.
    data_high: Upper bound for data, used by private quantiles algorithms.
    num_trials: Number of trials to average over.
    qs: Array of quantiles to estimate.
    eps: Privacy parameter epsilon.
    delta: Privacy parameter delta, used only by Smooth.
    swap: If true, uses swap privacy definition. Otherwise uses add-remove.
    ts: Matrix of smooth sensitivity parameters passed to CSmooth.

  Returns:
    Arrays errors and times storing, respectively, average number of
    misclassified points and time in seconds for each of the methods.

  Throws:
    ValueError if the Smooth or CSmooth method is used in conjunction with
    swap=False, or if one of the specified methods is unrecognized.
  """
  # Create an array of DP quantile functions from the array of method names.
  quant_funcs = []
  for method in methods:
    quant_func = functools.partial(
        _PARTIAL_METHODS[method], data_low=data_low, data_high=data_high, qs=qs)

    if method == QuantilesEstimationMethod.JOINT_EXP:
      quant_func = functools.partial(quant_func, eps=eps, swap=swap)
    elif method == QuantilesEstimationMethod.IND_EXP:
      quant_func = functools.partial(
          quant_func, divided_eps=eps / len(qs), swap=swap)
    elif method == QuantilesEstimationMethod.APP_IND_EXP:
      quant_func = functools.partial(
          quant_func,
          divided_eps=ind_exp.opt_comp_calculator(eps, delta, len(qs)),
          swap=swap)
    elif method == QuantilesEstimationMethod.SMOOTH:
      if not swap:
        raise ValueError("Smooth method is only implemented for swap DP.")
      quant_func = functools.partial(
          quant_func, divided_eps=eps / len(qs), divided_delta=delta / len(qs))
    elif method == QuantilesEstimationMethod.CSMOOTH:
      if not swap:
        raise ValueError("CSmooth method is only implemented for swap DP.")
      quant_func = functools.partial(
          quant_func, divided_eps=eps / np.sqrt(len(qs)), ts=ts)
    elif method == QuantilesEstimationMethod.LAP_TREE:
      quant_func = functools.partial(quant_func, eps=eps, delta=0, swap=swap)
    elif method == QuantilesEstimationMethod.GAUSS_TREE:
      quant_func = functools.partial(
          quant_func, eps=eps, delta=delta, swap=swap)
    else:
      raise ValueError("Unrecognized method name: {}".format(method))
    quant_funcs.append(quant_func)

  num_methods = len(methods)
  if len(quant_funcs) != num_methods:
    raise ValueError(
        "Quantile functions array length does not match methods array length.")

  errors = np.zeros(num_methods)
  times = np.zeros(num_methods)
  for _ in range(num_trials):
    # Sample a dataset.
    if fixed_data.size > 0:
      sampled_data = np.sort(
          np.random.choice(fixed_data, num_samples, replace=False))
    elif distribution == "gaussian":
      sampled_data = base.gen_gaussian(num_samples, 0, 5)
    elif distribution == "uniform":
      sampled_data = base.gen_uniform(num_samples, -5, 5)
    true_quantiles = base.quantiles(sampled_data, qs)

    for method_num in range(num_methods):
      quant_func = quant_funcs[method_num]
      begin = time.time()
      estimates = quant_func(sampled_data)
      end = time.time()
      times[method_num] = (end - begin) / num_trials
      errors[method_num] += error_func(sampled_data, true_quantiles,
                                       estimates) / num_trials

  return errors, times
def comparison(fixed_data, distribution, num_samples, data_low, data_high,
               num_trials, qs, eps, delta, swap, ts):
    """Helper function to run the trials set up by synthetic/real_comparison.

  Args:
    fixed_data: In the case of real data, an array of data to subsample in each
      trial. In the case of synthetic data, an empty array.
    distribution: In the case of real data, an empty string. In the case of
      synthetic data, either "gaussian" or "uniform".
    num_samples: Number of samples to use in each trial.
    data_low: Lower bound for data, used by private quantiles algorithms.
    data_high: Upper bound for data, used by private quantiles algorithms.
    num_trials: Number of trials to average over.
    qs: Array of quantiles to estimate.
    eps: Privacy parameter epsilon.
    delta: Privacy parameter delta, used only by Smooth.
    swap: If true, uses swap privacy definition. Otherwise uses add-remove.
    ts: Matrix of smooth sensitivity parameters passed to CSmooth.

  Returns:
    Arrays errors and times storing, respectively, average number of
    misclassified points and time in seconds for each of the five methods.
  """
    errors = np.zeros(5)
    times = np.zeros(5)
    for _ in range(num_trials):
        if fixed_data.size > 0:
            sampled_data = np.sort(
                np.random.choice(fixed_data, num_samples, replace=False))
        elif distribution == "gaussian":
            sampled_data = base.gen_gaussian(num_samples, 0, 5)
        elif distribution == "uniform":
            sampled_data = base.gen_uniform(num_samples, -5, 5)
        true_quantiles = base.quantiles(sampled_data, qs)
        begin = time.time()
        joint_exp_quantiles = joint_exp.joint_exp(sampled_data, data_low,
                                                  data_high, qs, eps, swap)
        end = time.time()
        errors[0] += base.quantiles_error(sampled_data, qs, true_quantiles,
                                          joint_exp_quantiles) / num_trials
        times[0] += (end - begin) / num_trials
        begin = time.time()
        ind_exp_quantiles = ind_exp.ind_exp(sampled_data, data_low, data_high,
                                            qs, swap, eps / len(qs))
        end = time.time()
        errors[1] += base.quantiles_error(sampled_data, qs, true_quantiles,
                                          ind_exp_quantiles) / num_trials
        times[1] += (end - begin) / num_trials
        app_ind_exp_eps = ind_exp.opt_comp_calculator(eps, delta, len(qs))
        begin = time.time()
        app_ind_exp_quantiles = ind_exp.ind_exp(sampled_data, data_low,
                                                data_high, qs, swap,
                                                app_ind_exp_eps)
        end = time.time()
        errors[2] += base.quantiles_error(sampled_data, qs, true_quantiles,
                                          app_ind_exp_quantiles) / num_trials
        times[2] += (end - begin) / num_trials
        begin = time.time()
        smooth_quantiles = smooth.smooth(sampled_data, data_low, data_high, qs,
                                         eps / len(qs), delta / len(qs))
        end = time.time()
        errors[3] += base.quantiles_error(sampled_data, qs, true_quantiles,
                                          smooth_quantiles) / num_trials
        times[3] += (end - begin) / num_trials
        begin = time.time()
        csmooth_quantiles = csmooth.csmooth(sampled_data, data_low, data_high,
                                            qs, eps / np.sqrt(len(qs)), ts)
        end = time.time()
        errors[4] += base.quantiles_error(sampled_data, qs, true_quantiles,
                                          csmooth_quantiles) / num_trials
        times[4] += (end - begin) / num_trials
    return errors, times