def test_get_private_count_infinite_eps(self): nonprivate_count = 60 count_privacy_param = CountPrivacyParam(epsilon=np.inf, delta=1e-2) self.assertEqual( central_privacy_utils.get_private_count(nonprivate_count, count_privacy_param), nonprivate_count)
def default_tree_param( k: int, data: clustering_params.Data, privacy_param: clustering_params.DifferentialPrivacyParam, privacy_budget_split: clustering_params.PrivacyBudgetSplit ) -> typing.Tuple[clustering_params.TreeParam, PrivateCount]: """Heuristic tree param based on the data and number of clusters. Args: k: Number of clusters to divide the data into. data: Data to find centers for. privacy_param: privacy parameters for the algorithm. privacy_budget_split: budget split between different computations. Returns: (default TreeParam, private count). The private count is provided so that it doesn't need to be re-computed. """ # Note that max_depth is used for the private count calculation so it cannot # depend on the count. # Chosen experimentally over multiple datasets. max_depth = 20 # Calculate the standard deviation for the sum noise using a sensitivity of 1. if privacy_param.epsilon == np.inf: sum_sigma = 0 else: sum_sigma = accountant.get_smallest_gaussian_noise( common.DifferentialPrivacyParameters( privacy_param.epsilon * privacy_budget_split.frac_sum, privacy_param.delta), num_queries=1, sensitivity=1.0) private_count = central_privacy_utils.get_private_count( data.num_points, central_privacy_utils.PrivateCountParam(privacy_param, privacy_budget_split, max_depth)) # We can consider the noise as distributed amongst the points that are being # summed. The noise has l2-norm roughly sqrt(dimension) * sum_sigma * radius, # so if we distribute among 10 * sqrt(dimension) * sum_sigma, each point # has noise roughly 0.1 * radius. num_points_in_node_for_low_noise = int(10 * np.sqrt(data.dim) * sum_sigma) # We want to at least have the ability to consider a node per cluster, even # if the noise might be higher than we'd like. min_num_points_in_node = min(num_points_in_node_for_low_noise, private_count // (2 * k)) # min_num_points_in_node must always be at least 1. Note it's possible that # the private_count is negative, so we should ensure this max is done last. min_num_points_in_node = max(1, min_num_points_in_node) min_num_points_in_branching_node = 3 * min_num_points_in_node return (clustering_params.TreeParam( min_num_points_in_branching_node=min_num_points_in_branching_node, min_num_points_in_node=min_num_points_in_node, max_depth=max_depth), private_count)
def test_get_private_count(self, dlaplace_noise, expected_private_count, mock_dlaplace_fn): mock_dlaplace_fn.return_value = dlaplace_noise nonprivate_count = 60 count_privacy_param = CountPrivacyParam(epsilon=2.0, delta=1e-2) result = central_privacy_utils.get_private_count( nonprivate_count, count_privacy_param) self.assertEqual(result, expected_private_count) mock_dlaplace_fn.assert_called_once_with(2)
def test_get_private_count_infinite_eps(self): nonprivate_count = 60 clustering_param = test_utils.get_test_clustering_param( epsilon=np.inf, delta=1e-2, frac_sum=0.2, frac_group_count=0.8, max_depth=4) self.assertEqual( central_privacy_utils.get_private_count( nonprivate_count, CentralPrivateCountParam.from_clustering_param( clustering_param)), nonprivate_count)
def get_private_count(self) -> int: """Returns and saves private count of the points in the node.""" if self.private_count is not None: return self.private_count privacy_param = self.clustering_param.privacy_param if privacy_param.privacy_model != clustering_params.PrivacyModel.CENTRAL: raise NotImplementedError( f"Currently unsupported privacy model: {privacy_param.privacy_model}" ) self.private_count = central_privacy_utils.get_private_count( len(self.nonprivate_points), central_privacy_utils.PrivateCountParam.from_clustering_param( self.clustering_param)) return self.private_count
def test_get_private_count(self, dlaplace_noise, expected_private_count, mock_dlaplace_fn): mock_dlaplace_fn.return_value = dlaplace_noise nonprivate_count = 60 clustering_param = test_utils.get_test_clustering_param( epsilon=10, delta=1e-2, frac_sum=0.2, frac_group_count=0.8, max_depth=3) result = central_privacy_utils.get_private_count( nonprivate_count, CentralPrivateCountParam.from_clustering_param(clustering_param)) self.assertEqual(result, expected_private_count) mock_dlaplace_fn.assert_called_once_with(2)