コード例 #1
0
 def test_get_private_count_infinite_eps(self):
     nonprivate_count = 60
     count_privacy_param = CountPrivacyParam(epsilon=np.inf, delta=1e-2)
     self.assertEqual(
         central_privacy_utils.get_private_count(nonprivate_count,
                                                 count_privacy_param),
         nonprivate_count)
コード例 #2
0
def default_tree_param(
    k: int, data: clustering_params.Data,
    privacy_param: clustering_params.DifferentialPrivacyParam,
    privacy_budget_split: clustering_params.PrivacyBudgetSplit
) -> typing.Tuple[clustering_params.TreeParam, PrivateCount]:
    """Heuristic tree param based on the data and number of clusters.

  Args:
    k: Number of clusters to divide the data into.
    data: Data to find centers for.
    privacy_param: privacy parameters for the algorithm.
    privacy_budget_split: budget split between different computations.

  Returns:
    (default TreeParam, private count). The private count is provided so that
    it doesn't need to be re-computed.
  """
    # Note that max_depth is used for the private count calculation so it cannot
    # depend on the count.
    # Chosen experimentally over multiple datasets.
    max_depth = 20

    # Calculate the standard deviation for the sum noise using a sensitivity of 1.
    if privacy_param.epsilon == np.inf:
        sum_sigma = 0
    else:
        sum_sigma = accountant.get_smallest_gaussian_noise(
            common.DifferentialPrivacyParameters(
                privacy_param.epsilon * privacy_budget_split.frac_sum,
                privacy_param.delta),
            num_queries=1,
            sensitivity=1.0)

    private_count = central_privacy_utils.get_private_count(
        data.num_points,
        central_privacy_utils.PrivateCountParam(privacy_param,
                                                privacy_budget_split,
                                                max_depth))

    # We can consider the noise as distributed amongst the points that are being
    # summed. The noise has l2-norm roughly sqrt(dimension) * sum_sigma * radius,
    # so if we distribute among 10 * sqrt(dimension) * sum_sigma, each point
    # has noise roughly 0.1 * radius.
    num_points_in_node_for_low_noise = int(10 * np.sqrt(data.dim) * sum_sigma)

    # We want to at least have the ability to consider a node per cluster, even
    # if the noise might be higher than we'd like.
    min_num_points_in_node = min(num_points_in_node_for_low_noise,
                                 private_count // (2 * k))

    # min_num_points_in_node must always be at least 1. Note it's possible that
    # the private_count is negative, so we should ensure this max is done last.
    min_num_points_in_node = max(1, min_num_points_in_node)
    min_num_points_in_branching_node = 3 * min_num_points_in_node

    return (clustering_params.TreeParam(
        min_num_points_in_branching_node=min_num_points_in_branching_node,
        min_num_points_in_node=min_num_points_in_node,
        max_depth=max_depth), private_count)
コード例 #3
0
    def test_get_private_count(self, dlaplace_noise, expected_private_count,
                               mock_dlaplace_fn):
        mock_dlaplace_fn.return_value = dlaplace_noise

        nonprivate_count = 60
        count_privacy_param = CountPrivacyParam(epsilon=2.0, delta=1e-2)

        result = central_privacy_utils.get_private_count(
            nonprivate_count, count_privacy_param)
        self.assertEqual(result, expected_private_count)
        mock_dlaplace_fn.assert_called_once_with(2)
コード例 #4
0
 def test_get_private_count_infinite_eps(self):
     nonprivate_count = 60
     clustering_param = test_utils.get_test_clustering_param(
         epsilon=np.inf,
         delta=1e-2,
         frac_sum=0.2,
         frac_group_count=0.8,
         max_depth=4)
     self.assertEqual(
         central_privacy_utils.get_private_count(
             nonprivate_count,
             CentralPrivateCountParam.from_clustering_param(
                 clustering_param)), nonprivate_count)
コード例 #5
0
    def get_private_count(self) -> int:
        """Returns and saves private count of the points in the node."""
        if self.private_count is not None:
            return self.private_count

        privacy_param = self.clustering_param.privacy_param
        if privacy_param.privacy_model != clustering_params.PrivacyModel.CENTRAL:
            raise NotImplementedError(
                f"Currently unsupported privacy model: {privacy_param.privacy_model}"
            )
        self.private_count = central_privacy_utils.get_private_count(
            len(self.nonprivate_points),
            central_privacy_utils.PrivateCountParam.from_clustering_param(
                self.clustering_param))
        return self.private_count
コード例 #6
0
    def test_get_private_count(self, dlaplace_noise, expected_private_count,
                               mock_dlaplace_fn):
        mock_dlaplace_fn.return_value = dlaplace_noise

        nonprivate_count = 60
        clustering_param = test_utils.get_test_clustering_param(
            epsilon=10,
            delta=1e-2,
            frac_sum=0.2,
            frac_group_count=0.8,
            max_depth=3)

        result = central_privacy_utils.get_private_count(
            nonprivate_count,
            CentralPrivateCountParam.from_clustering_param(clustering_param))
        self.assertEqual(result, expected_private_count)
        mock_dlaplace_fn.assert_called_once_with(2)