def __init__(self,
                 f,
                 delta_f,
                 epsilon,
                 delta,
                 num_queries=1,
                 random_state=None):
        """Instantiates a gaussian mechanism.

    Args:
      f: A function which takes as input a database and which returns as output
        a numpy array.
      delta_f: The sensitivity paramater, e.g., the maximum value by which the
        function can change for two databases that differ by only one row.
      epsilon: Differential privacy parameter.
      delta: Differential privacy parameter.
      num_queries: The number of queries for which the mechanism is used. Note
        that the constructed mechanism will be (epsilon, delta)-differentially
        private when answering (no more than) num_queries queries.
      random_state:  Optional instance of numpy.random.RandomState that is
        used to seed the random number generator.
    """
        self._func = f
        self._delta_f = delta_f
        self._sigma = accountant.get_smallest_gaussian_noise(
            common.DifferentialPrivacyParameters(epsilon, delta),
            num_queries,
            sensitivity=delta_f)
        self._random_state = random_state or np.random.RandomState()
コード例 #2
0
def get_private_average(nonprivate_points: np.ndarray, private_count: int,
                        clustering_param: clustering_params.ClusteringParam,
                        dim: int) -> np.ndarray:
    """Returns a differentially private average of the given data points.

  Args:
    nonprivate_points: data points to be averaged, may be empty.
    private_count: differentially private count of the number of data points.
      This is provided to save privacy budget since, in our applications, it is
      often already computed elsewhere. Required to be >= 1.
    clustering_param: parameters of the clustering algorithm.
    dim: dimension of the data points.

  Returns:
    A differentially private average of the given data points.
  """
    if private_count < 1:
        raise ValueError(
            f"get_private_average() called with private_count={private_count}")

    sum_points = np.sum(nonprivate_points, axis=0)
    epsilon_sum = (clustering_param.privacy_budget_split.frac_sum *
                   clustering_param.privacy_param.epsilon)

    if epsilon_sum == np.inf:
        return sum_points / private_count

    gaussian_standard_deviation = accountant.get_smallest_gaussian_noise(
        common.DifferentialPrivacyParameters(
            epsilon_sum, clustering_param.privacy_param.delta),
        num_queries=1,
        sensitivity=clustering_param.radius)
    sum_points += np.random.normal(scale=gaussian_standard_deviation, size=dim)
    return sum_points / private_count
コード例 #3
0
 def test_discrete_laplace_from_privacy_parameters_value_errors(
         self, sensitivity, sampling_prob, epsilon, delta):
     with self.assertRaises(ValueError):
         privacy_loss_mechanism.DiscreteLaplacePrivacyLoss.from_privacy_guarantee(
             common.DifferentialPrivacyParameters(epsilon, delta),
             sensitivity,
             sampling_prob=sampling_prob)
コード例 #4
0
 def test_discrete_gaussian_from_privacy_parameters(self, sensitivity, epsilon,
                                                    delta, expected_sigma):
   pl = (
       privacy_loss_mechanism.DiscreteGaussianPrivacyLoss
       .from_privacy_guarantee(
           common.DifferentialPrivacyParameters(epsilon, delta), sensitivity))
   self.assertAlmostEqual(expected_sigma, pl._sigma, 3)
コード例 #5
0
 def test_gaussian_from_privacy_parameters(self, sensitivity, epsilon, delta,
                                           expected_standard_deviation):
   pl = privacy_loss_mechanism.GaussianPrivacyLoss.from_privacy_guarantee(
       common.DifferentialPrivacyParameters(epsilon, delta),
       sensitivity)
   self.assertAlmostEqual(expected_standard_deviation, pl.standard_deviation,
                          3)
コード例 #6
0
  def test_default_tree_param(self, points, returned_private_count, k, epsilon,
                              expected_min_num_points_in_branching_node,
                              expected_min_num_points_in_node,
                              expected_max_depth, mock_gaussian_noise,
                              mock_private_count):
    dim = 10
    mock_private_count.return_value = returned_private_count
    data = clustering_params.Data(np.ones(shape=(points, dim)), radius=1.0)
    privacy_param = clustering_params.DifferentialPrivacyParam(
        epsilon=epsilon, delta=1e-2)
    budget_split = clustering_params.PrivacyBudgetSplit(
        frac_sum=0.8, frac_group_count=0.2)

    (tree_param, private_count) = default_clustering_params.default_tree_param(
        k, data, privacy_param, budget_split)
    self.assertEqual(tree_param.max_depth, expected_max_depth)
    if epsilon == np.inf:
      mock_gaussian_noise.assert_not_called()
    else:
      mock_gaussian_noise.assert_called_once_with(
          common.DifferentialPrivacyParameters(0.8 * epsilon, 1e-2), 1, 1.0)
    mock_private_count.assert_called_once_with(
        nonprivate_count=points,
        count_privacy_param=central_privacy_utils.CountPrivacyParam(
            epsilon=0.2 * epsilon / (tree_param.max_depth + 1), delta=1e-2))
    self.assertEqual(private_count, returned_private_count)
    self.assertEqual(tree_param.min_num_points_in_node,
                     expected_min_num_points_in_node)
    self.assertEqual(tree_param.min_num_points_in_branching_node,
                     expected_min_num_points_in_branching_node)
コード例 #7
0
def default_tree_param(
    k: int, data: clustering_params.Data,
    privacy_param: clustering_params.DifferentialPrivacyParam,
    privacy_budget_split: clustering_params.PrivacyBudgetSplit
) -> typing.Tuple[clustering_params.TreeParam, PrivateCount]:
    """Heuristic tree param based on the data and number of clusters.

  Args:
    k: Number of clusters to divide the data into.
    data: Data to find centers for.
    privacy_param: privacy parameters for the algorithm.
    privacy_budget_split: budget split between different computations.

  Returns:
    (default TreeParam, private count). The private count is provided so that
    it doesn't need to be re-computed.
  """
    # Note that max_depth is used for the private count calculation so it cannot
    # depend on the count.
    # Chosen experimentally over multiple datasets.
    max_depth = 20

    # Calculate the standard deviation for the sum noise using a sensitivity of 1.
    if privacy_param.epsilon == np.inf:
        sum_sigma = 0
    else:
        sum_sigma = accountant.get_smallest_gaussian_noise(
            common.DifferentialPrivacyParameters(
                privacy_param.epsilon * privacy_budget_split.frac_sum,
                privacy_param.delta),
            num_queries=1,
            sensitivity=1.0)

    private_count = central_privacy_utils.get_private_count(
        data.num_points,
        central_privacy_utils.PrivateCountParam(privacy_param,
                                                privacy_budget_split,
                                                max_depth))

    # We can consider the noise as distributed amongst the points that are being
    # summed. The noise has l2-norm roughly sqrt(dimension) * sum_sigma * radius,
    # so if we distribute among 10 * sqrt(dimension) * sum_sigma, each point
    # has noise roughly 0.1 * radius.
    num_points_in_node_for_low_noise = int(10 * np.sqrt(data.dim) * sum_sigma)

    # We want to at least have the ability to consider a node per cluster, even
    # if the noise might be higher than we'd like.
    min_num_points_in_node = min(num_points_in_node_for_low_noise,
                                 private_count // (2 * k))

    # min_num_points_in_node must always be at least 1. Note it's possible that
    # the private_count is negative, so we should ensure this max is done last.
    min_num_points_in_node = max(1, min_num_points_in_node)
    min_num_points_in_branching_node = 3 * min_num_points_in_node

    return (clustering_params.TreeParam(
        min_num_points_in_branching_node=min_num_points_in_branching_node,
        min_num_points_in_node=min_num_points_in_node,
        max_depth=max_depth), private_count)
コード例 #8
0
 def test_discrete_laplace_from_privacy_parameters(self, sensitivity, epsilon,
                                                   delta, expected_parameter):
   pl = (privacy_loss_mechanism.DiscreteLaplacePrivacyLoss
         .from_privacy_guarantee(
             common.DifferentialPrivacyParameters(
                 epsilon, delta),
             sensitivity))
   self.assertAlmostEqual(expected_parameter, pl.parameter)
コード例 #9
0
 def test_get_smallest_gaussian_noise(self, epsilon, delta, num_queries,
                                      sensitivity, expected_std):
   privacy_parameters = common.DifferentialPrivacyParameters(
       epsilon, delta)
   self.assertAlmostEqual(
       expected_std,
       accountant.get_smallest_gaussian_noise(
           privacy_parameters, num_queries, sensitivity=sensitivity))
コード例 #10
0
 def test_laplace_from_privacy_parameters(self, sensitivity, sampling_prob,
                                          adjacency_type, epsilon, delta,
                                          expected_parameter):
     pl = privacy_loss_mechanism.LaplacePrivacyLoss.from_privacy_guarantee(
         common.DifferentialPrivacyParameters(epsilon, delta),
         sensitivity,
         sampling_prob=sampling_prob,
         adjacency_type=adjacency_type)
     self.assertAlmostEqual(expected_parameter, pl.parameter)
     self.assertEqual(adjacency_type, pl.adjacency_type)
コード例 #11
0
 def test_get_smallest_discrete_laplace_noise(self, epsilon, delta,
                                              num_queries, sensitivity,
                                              expected_parameter):
   privacy_parameters = common.DifferentialPrivacyParameters(
       epsilon, delta)
   self.assertAlmostEqual(
       expected_parameter,
       accountant.get_smallest_discrete_laplace_noise(
           privacy_parameters, num_queries, sensitivity=sensitivity),
       delta=1e-3)
コード例 #12
0
 def test_get_smallest_epsilon_from_advanced_composition(
     self, total_epsilon, total_delta, num_queries, delta, expected_epsilon):
   total_privacy_parameters = common.DifferentialPrivacyParameters(
       total_epsilon, total_delta)
   epsilon = accountant.get_smallest_epsilon_from_advanced_composition(
       total_privacy_parameters, num_queries, delta)
   if expected_epsilon is None:
     self.assertIsNone(epsilon)
   else:
     self.assertAlmostEqual(expected_epsilon, epsilon, places=6)
コード例 #13
0
 def test_advanced_composition(self, epsilon, delta, num_queries, total_delta,
                               expected_total_epsilon):
   privacy_parameters = common.DifferentialPrivacyParameters(
       epsilon, delta)
   total_epsilon = accountant.advanced_composition(privacy_parameters,
                                                   num_queries, total_delta)
   if expected_total_epsilon is None:
     self.assertIsNone(total_epsilon)
   else:
     self.assertAlmostEqual(expected_total_epsilon, total_epsilon)
コード例 #14
0
 def test_from_privacy_parameters(
         self, epsilon, delta, value_discretization_interval,
         expected_rounded_probability_mass_function,
         expected_infinity_mass):
     pld = privacy_loss_distribution.PrivacyLossDistribution.from_privacy_parameters(
         common.DifferentialPrivacyParameters(epsilon, delta),
         value_discretization_interval=value_discretization_interval)
     self.assertAlmostEqual(expected_infinity_mass, pld.infinity_mass)
     test_util.dictionary_almost_equal(
         self, expected_rounded_probability_mass_function,
         pld.rounded_probability_mass_function)
コード例 #15
0
 def test_self_composition_truncation_account_for_truncated_mass(self):
     num_composition = 2
     tail_mass_truncation = 0.5
     epsilon_initial = 1
     pld = privacy_loss_distribution.PrivacyLossDistribution.from_privacy_parameters(
         common.DifferentialPrivacyParameters(epsilon_initial, 0))
     pld = pld.self_compose(num_composition,
                            tail_mass_truncation=tail_mass_truncation)
     self.assertAlmostEqual(
         tail_mass_truncation,
         pld.get_delta_for_epsilon(num_composition * epsilon_initial))
コード例 #16
0
    def _compose_distributions(
            self, noise_standard_deviation: float
    ) -> 'pldlib.PrivacyLossDistribution':
        """Uses the Privacy Loss Distribution library to compose distributions.

        Args:
            noise_standard_deviation: The noise of the distributions to construct.

        Returns:
            A PrivacyLossDistribution object for the pipeline.
        """
        composed, pld = None, None

        for mechanism_spec_internal in self._mechanisms:
            if mechanism_spec_internal.mechanism_spec.mechanism_type == MechanismType.LAPLACE:
                # The Laplace distribution parameter = std/sqrt(2).
                pld = pldlib.PrivacyLossDistribution.from_laplace_mechanism(
                    mechanism_spec_internal.sensitivity *
                    noise_standard_deviation / math.sqrt(2) /
                    mechanism_spec_internal.weight,
                    value_discretization_interval=self._pld_discretization)
            elif mechanism_spec_internal.mechanism_spec.mechanism_type == MechanismType.GAUSSIAN:
                pld = pldlib.PrivacyLossDistribution.from_gaussian_mechanism(
                    mechanism_spec_internal.sensitivity *
                    noise_standard_deviation / mechanism_spec_internal.weight,
                    value_discretization_interval=self._pld_discretization)
            elif mechanism_spec_internal.mechanism_spec.mechanism_type == MechanismType.GENERIC:
                # It is required to convert between the noise_standard_deviation of a Laplace or Gaussian mechanism
                # and the (epsilon, delta) Generic mechanism because the calibration is defined by one parameter.
                # There are multiple ways to do this; here it is assumed that (epsilon, delta) specifies the Laplace
                # mechanism and epsilon is computed based on this. The delta is computed to be proportional to epsilon.
                epsilon_0_interim = math.sqrt(2) / noise_standard_deviation
                delta_0_interim = epsilon_0_interim / self._total_epsilon * self._total_delta
                pld = pldlib.PrivacyLossDistribution.from_privacy_parameters(
                    common.DifferentialPrivacyParameters(
                        epsilon_0_interim, delta_0_interim),
                    value_discretization_interval=self._pld_discretization)

            composed = pld if composed is None else composed.compose(pld)

        return composed
コード例 #17
0
 def test_epsilon_delta_value_errors(self, epsilon, delta):
     with self.assertRaises(ValueError):
         common.DifferentialPrivacyParameters(epsilon, delta)
コード例 #18
0
 def get_total_epsilon_for_epsilon(epsilon):
     privacy_parameters = common.DifferentialPrivacyParameters(
         epsilon, delta)
     return advanced_composition(privacy_parameters, num_queries,
                                 total_privacy_parameters.delta)
コード例 #19
0
 def test_discrete_gaussian_from_privacy_parameters_value_errors(
     self, sensitivity, epsilon, delta):
   with self.assertRaises(ValueError):
     privacy_loss_mechanism.DiscreteGaussianPrivacyLoss.from_privacy_guarantee(
         common.DifferentialPrivacyParameters(epsilon, delta),
         sensitivity)