def __init__(self, f, delta_f, epsilon, delta, num_queries=1, random_state=None): """Instantiates a gaussian mechanism. Args: f: A function which takes as input a database and which returns as output a numpy array. delta_f: The sensitivity paramater, e.g., the maximum value by which the function can change for two databases that differ by only one row. epsilon: Differential privacy parameter. delta: Differential privacy parameter. num_queries: The number of queries for which the mechanism is used. Note that the constructed mechanism will be (epsilon, delta)-differentially private when answering (no more than) num_queries queries. random_state: Optional instance of numpy.random.RandomState that is used to seed the random number generator. """ self._func = f self._delta_f = delta_f self._sigma = accountant.get_smallest_gaussian_noise( common.DifferentialPrivacyParameters(epsilon, delta), num_queries, sensitivity=delta_f) self._random_state = random_state or np.random.RandomState()
def get_private_average(nonprivate_points: np.ndarray, private_count: int, clustering_param: clustering_params.ClusteringParam, dim: int) -> np.ndarray: """Returns a differentially private average of the given data points. Args: nonprivate_points: data points to be averaged, may be empty. private_count: differentially private count of the number of data points. This is provided to save privacy budget since, in our applications, it is often already computed elsewhere. Required to be >= 1. clustering_param: parameters of the clustering algorithm. dim: dimension of the data points. Returns: A differentially private average of the given data points. """ if private_count < 1: raise ValueError( f"get_private_average() called with private_count={private_count}") sum_points = np.sum(nonprivate_points, axis=0) epsilon_sum = (clustering_param.privacy_budget_split.frac_sum * clustering_param.privacy_param.epsilon) if epsilon_sum == np.inf: return sum_points / private_count gaussian_standard_deviation = accountant.get_smallest_gaussian_noise( common.DifferentialPrivacyParameters( epsilon_sum, clustering_param.privacy_param.delta), num_queries=1, sensitivity=clustering_param.radius) sum_points += np.random.normal(scale=gaussian_standard_deviation, size=dim) return sum_points / private_count
def test_discrete_laplace_from_privacy_parameters_value_errors( self, sensitivity, sampling_prob, epsilon, delta): with self.assertRaises(ValueError): privacy_loss_mechanism.DiscreteLaplacePrivacyLoss.from_privacy_guarantee( common.DifferentialPrivacyParameters(epsilon, delta), sensitivity, sampling_prob=sampling_prob)
def test_discrete_gaussian_from_privacy_parameters(self, sensitivity, epsilon, delta, expected_sigma): pl = ( privacy_loss_mechanism.DiscreteGaussianPrivacyLoss .from_privacy_guarantee( common.DifferentialPrivacyParameters(epsilon, delta), sensitivity)) self.assertAlmostEqual(expected_sigma, pl._sigma, 3)
def test_gaussian_from_privacy_parameters(self, sensitivity, epsilon, delta, expected_standard_deviation): pl = privacy_loss_mechanism.GaussianPrivacyLoss.from_privacy_guarantee( common.DifferentialPrivacyParameters(epsilon, delta), sensitivity) self.assertAlmostEqual(expected_standard_deviation, pl.standard_deviation, 3)
def test_default_tree_param(self, points, returned_private_count, k, epsilon, expected_min_num_points_in_branching_node, expected_min_num_points_in_node, expected_max_depth, mock_gaussian_noise, mock_private_count): dim = 10 mock_private_count.return_value = returned_private_count data = clustering_params.Data(np.ones(shape=(points, dim)), radius=1.0) privacy_param = clustering_params.DifferentialPrivacyParam( epsilon=epsilon, delta=1e-2) budget_split = clustering_params.PrivacyBudgetSplit( frac_sum=0.8, frac_group_count=0.2) (tree_param, private_count) = default_clustering_params.default_tree_param( k, data, privacy_param, budget_split) self.assertEqual(tree_param.max_depth, expected_max_depth) if epsilon == np.inf: mock_gaussian_noise.assert_not_called() else: mock_gaussian_noise.assert_called_once_with( common.DifferentialPrivacyParameters(0.8 * epsilon, 1e-2), 1, 1.0) mock_private_count.assert_called_once_with( nonprivate_count=points, count_privacy_param=central_privacy_utils.CountPrivacyParam( epsilon=0.2 * epsilon / (tree_param.max_depth + 1), delta=1e-2)) self.assertEqual(private_count, returned_private_count) self.assertEqual(tree_param.min_num_points_in_node, expected_min_num_points_in_node) self.assertEqual(tree_param.min_num_points_in_branching_node, expected_min_num_points_in_branching_node)
def default_tree_param( k: int, data: clustering_params.Data, privacy_param: clustering_params.DifferentialPrivacyParam, privacy_budget_split: clustering_params.PrivacyBudgetSplit ) -> typing.Tuple[clustering_params.TreeParam, PrivateCount]: """Heuristic tree param based on the data and number of clusters. Args: k: Number of clusters to divide the data into. data: Data to find centers for. privacy_param: privacy parameters for the algorithm. privacy_budget_split: budget split between different computations. Returns: (default TreeParam, private count). The private count is provided so that it doesn't need to be re-computed. """ # Note that max_depth is used for the private count calculation so it cannot # depend on the count. # Chosen experimentally over multiple datasets. max_depth = 20 # Calculate the standard deviation for the sum noise using a sensitivity of 1. if privacy_param.epsilon == np.inf: sum_sigma = 0 else: sum_sigma = accountant.get_smallest_gaussian_noise( common.DifferentialPrivacyParameters( privacy_param.epsilon * privacy_budget_split.frac_sum, privacy_param.delta), num_queries=1, sensitivity=1.0) private_count = central_privacy_utils.get_private_count( data.num_points, central_privacy_utils.PrivateCountParam(privacy_param, privacy_budget_split, max_depth)) # We can consider the noise as distributed amongst the points that are being # summed. The noise has l2-norm roughly sqrt(dimension) * sum_sigma * radius, # so if we distribute among 10 * sqrt(dimension) * sum_sigma, each point # has noise roughly 0.1 * radius. num_points_in_node_for_low_noise = int(10 * np.sqrt(data.dim) * sum_sigma) # We want to at least have the ability to consider a node per cluster, even # if the noise might be higher than we'd like. min_num_points_in_node = min(num_points_in_node_for_low_noise, private_count // (2 * k)) # min_num_points_in_node must always be at least 1. Note it's possible that # the private_count is negative, so we should ensure this max is done last. min_num_points_in_node = max(1, min_num_points_in_node) min_num_points_in_branching_node = 3 * min_num_points_in_node return (clustering_params.TreeParam( min_num_points_in_branching_node=min_num_points_in_branching_node, min_num_points_in_node=min_num_points_in_node, max_depth=max_depth), private_count)
def test_discrete_laplace_from_privacy_parameters(self, sensitivity, epsilon, delta, expected_parameter): pl = (privacy_loss_mechanism.DiscreteLaplacePrivacyLoss .from_privacy_guarantee( common.DifferentialPrivacyParameters( epsilon, delta), sensitivity)) self.assertAlmostEqual(expected_parameter, pl.parameter)
def test_get_smallest_gaussian_noise(self, epsilon, delta, num_queries, sensitivity, expected_std): privacy_parameters = common.DifferentialPrivacyParameters( epsilon, delta) self.assertAlmostEqual( expected_std, accountant.get_smallest_gaussian_noise( privacy_parameters, num_queries, sensitivity=sensitivity))
def test_laplace_from_privacy_parameters(self, sensitivity, sampling_prob, adjacency_type, epsilon, delta, expected_parameter): pl = privacy_loss_mechanism.LaplacePrivacyLoss.from_privacy_guarantee( common.DifferentialPrivacyParameters(epsilon, delta), sensitivity, sampling_prob=sampling_prob, adjacency_type=adjacency_type) self.assertAlmostEqual(expected_parameter, pl.parameter) self.assertEqual(adjacency_type, pl.adjacency_type)
def test_get_smallest_discrete_laplace_noise(self, epsilon, delta, num_queries, sensitivity, expected_parameter): privacy_parameters = common.DifferentialPrivacyParameters( epsilon, delta) self.assertAlmostEqual( expected_parameter, accountant.get_smallest_discrete_laplace_noise( privacy_parameters, num_queries, sensitivity=sensitivity), delta=1e-3)
def test_get_smallest_epsilon_from_advanced_composition( self, total_epsilon, total_delta, num_queries, delta, expected_epsilon): total_privacy_parameters = common.DifferentialPrivacyParameters( total_epsilon, total_delta) epsilon = accountant.get_smallest_epsilon_from_advanced_composition( total_privacy_parameters, num_queries, delta) if expected_epsilon is None: self.assertIsNone(epsilon) else: self.assertAlmostEqual(expected_epsilon, epsilon, places=6)
def test_advanced_composition(self, epsilon, delta, num_queries, total_delta, expected_total_epsilon): privacy_parameters = common.DifferentialPrivacyParameters( epsilon, delta) total_epsilon = accountant.advanced_composition(privacy_parameters, num_queries, total_delta) if expected_total_epsilon is None: self.assertIsNone(total_epsilon) else: self.assertAlmostEqual(expected_total_epsilon, total_epsilon)
def test_from_privacy_parameters( self, epsilon, delta, value_discretization_interval, expected_rounded_probability_mass_function, expected_infinity_mass): pld = privacy_loss_distribution.PrivacyLossDistribution.from_privacy_parameters( common.DifferentialPrivacyParameters(epsilon, delta), value_discretization_interval=value_discretization_interval) self.assertAlmostEqual(expected_infinity_mass, pld.infinity_mass) test_util.dictionary_almost_equal( self, expected_rounded_probability_mass_function, pld.rounded_probability_mass_function)
def test_self_composition_truncation_account_for_truncated_mass(self): num_composition = 2 tail_mass_truncation = 0.5 epsilon_initial = 1 pld = privacy_loss_distribution.PrivacyLossDistribution.from_privacy_parameters( common.DifferentialPrivacyParameters(epsilon_initial, 0)) pld = pld.self_compose(num_composition, tail_mass_truncation=tail_mass_truncation) self.assertAlmostEqual( tail_mass_truncation, pld.get_delta_for_epsilon(num_composition * epsilon_initial))
def _compose_distributions( self, noise_standard_deviation: float ) -> 'pldlib.PrivacyLossDistribution': """Uses the Privacy Loss Distribution library to compose distributions. Args: noise_standard_deviation: The noise of the distributions to construct. Returns: A PrivacyLossDistribution object for the pipeline. """ composed, pld = None, None for mechanism_spec_internal in self._mechanisms: if mechanism_spec_internal.mechanism_spec.mechanism_type == MechanismType.LAPLACE: # The Laplace distribution parameter = std/sqrt(2). pld = pldlib.PrivacyLossDistribution.from_laplace_mechanism( mechanism_spec_internal.sensitivity * noise_standard_deviation / math.sqrt(2) / mechanism_spec_internal.weight, value_discretization_interval=self._pld_discretization) elif mechanism_spec_internal.mechanism_spec.mechanism_type == MechanismType.GAUSSIAN: pld = pldlib.PrivacyLossDistribution.from_gaussian_mechanism( mechanism_spec_internal.sensitivity * noise_standard_deviation / mechanism_spec_internal.weight, value_discretization_interval=self._pld_discretization) elif mechanism_spec_internal.mechanism_spec.mechanism_type == MechanismType.GENERIC: # It is required to convert between the noise_standard_deviation of a Laplace or Gaussian mechanism # and the (epsilon, delta) Generic mechanism because the calibration is defined by one parameter. # There are multiple ways to do this; here it is assumed that (epsilon, delta) specifies the Laplace # mechanism and epsilon is computed based on this. The delta is computed to be proportional to epsilon. epsilon_0_interim = math.sqrt(2) / noise_standard_deviation delta_0_interim = epsilon_0_interim / self._total_epsilon * self._total_delta pld = pldlib.PrivacyLossDistribution.from_privacy_parameters( common.DifferentialPrivacyParameters( epsilon_0_interim, delta_0_interim), value_discretization_interval=self._pld_discretization) composed = pld if composed is None else composed.compose(pld) return composed
def test_epsilon_delta_value_errors(self, epsilon, delta): with self.assertRaises(ValueError): common.DifferentialPrivacyParameters(epsilon, delta)
def get_total_epsilon_for_epsilon(epsilon): privacy_parameters = common.DifferentialPrivacyParameters( epsilon, delta) return advanced_composition(privacy_parameters, num_queries, total_privacy_parameters.delta)
def test_discrete_gaussian_from_privacy_parameters_value_errors( self, sensitivity, epsilon, delta): with self.assertRaises(ValueError): privacy_loss_mechanism.DiscreteGaussianPrivacyLoss.from_privacy_guarantee( common.DifferentialPrivacyParameters(epsilon, delta), sensitivity)