Ejemplo n.º 1
0
  def test_default_tree_param(self, points, returned_private_count, k, epsilon,
                              expected_min_num_points_in_branching_node,
                              expected_min_num_points_in_node,
                              expected_max_depth, mock_gaussian_noise,
                              mock_private_count):
    dim = 10
    mock_private_count.return_value = returned_private_count
    data = clustering_params.Data(np.ones(shape=(points, dim)), radius=1.0)
    privacy_param = clustering_params.DifferentialPrivacyParam(
        epsilon=epsilon, delta=1e-2)
    budget_split = clustering_params.PrivacyBudgetSplit(
        frac_sum=0.8, frac_group_count=0.2)

    (tree_param, private_count) = default_clustering_params.default_tree_param(
        k, data, privacy_param, budget_split)
    self.assertEqual(tree_param.max_depth, expected_max_depth)
    if epsilon == np.inf:
      mock_gaussian_noise.assert_not_called()
    else:
      mock_gaussian_noise.assert_called_once_with(
          common.DifferentialPrivacyParameters(0.8 * epsilon, 1e-2), 1, 1.0)
    mock_private_count.assert_called_once_with(
        nonprivate_count=points,
        count_privacy_param=central_privacy_utils.CountPrivacyParam(
            epsilon=0.2 * epsilon / (tree_param.max_depth + 1), delta=1e-2))
    self.assertEqual(private_count, returned_private_count)
    self.assertEqual(tree_param.min_num_points_in_node,
                     expected_min_num_points_in_node)
    self.assertEqual(tree_param.min_num_points_in_branching_node,
                     expected_min_num_points_in_branching_node)
Ejemplo n.º 2
0
def get_test_clustering_param(epsilon=1.0,
                              delta=1e-2,
                              frac_sum=0.2,
                              frac_group_count=0.8,
                              min_num_points_in_branching_node=4,
                              min_num_points_in_node=2,
                              max_depth=4,
                              radius=1):
  # pylint: disable=g-doc-args
  """Returns clustering_param with defaults for params not needed for testing.

  Usage: Explicitly pass in parameters that are relied on in the test.
  """
  privacy_param = clustering_params.DifferentialPrivacyParam(
      epsilon=epsilon, delta=delta)
  privacy_budget_split = clustering_params.PrivacyBudgetSplit(
      frac_sum=frac_sum,
      frac_group_count=frac_group_count)
  tree_param = clustering_params.TreeParam(
      min_num_points_in_branching_node=min_num_points_in_branching_node,
      min_num_points_in_node=min_num_points_in_node,
      max_depth=max_depth)
  clustering_param = clustering_params.ClusteringParam(
      privacy_param=privacy_param,
      privacy_budget_split=privacy_budget_split,
      tree_param=tree_param,
      short_description='TestClusteringParam',
      radius=radius)
  return clustering_param
 def test_private_count_param(self):
     privacy_param = clustering_params.DifferentialPrivacyParam(epsilon=10,
                                                                delta=1e-2)
     privacy_budget_split = clustering_params.PrivacyBudgetSplit(
         frac_sum=0.2, frac_group_count=0.8)
     max_tree_depth = 3
     count_privacy_param = CountPrivacyParam.compute_group_count_privacy_param(
         privacy_param, privacy_budget_split, max_tree_depth)
     self.assertEqual(count_privacy_param.epsilon, 2.0)
     self.assertEqual(count_privacy_param.delta, 1e-2)
Ejemplo n.º 4
0
 def test_private_count_param(self):
     privacy_param = clustering_params.DifferentialPrivacyParam(epsilon=2.0,
                                                                delta=1e-3)
     privacy_budget_split = clustering_params.PrivacyBudgetSplit(
         frac_sum=0.7, frac_group_count=0.3)
     max_tree_depth = 10
     private_count_param = CentralPrivateCountParam(privacy_param,
                                                    privacy_budget_split,
                                                    max_tree_depth)
     self.assertEqual(private_count_param.privacy_param, privacy_param)
     self.assertEqual(private_count_param.privacy_budget_split,
                      privacy_budget_split)
     self.assertEqual(private_count_param.max_tree_depth, max_tree_depth)
Ejemplo n.º 5
0
 def run_clustering(k: int, eps: float) -> None:
     privacy_param = clustering_params.DifferentialPrivacyParam(epsilon=eps,
                                                                delta=1e-6)
     clustering_result: clustering_algorithm.ClusteringResult = (
         clustering_algorithm.private_lsh_clustering(
             k, data, privacy_param))
     clustering_metrics: clustering_algorithm.ClusteringMetrics = (
         clustering_result.get_clustering_metrics())
     correct_pred = clustering_metrics.dominant_label_correct_count
     accuracy = clustering_metrics.dominant_label_accuracy
     false_match_frac = clustering_metrics.false_match_frac
     true_nonmatch_frac = clustering_metrics.true_nonmatch_frac
     print(
         f'| {k:>2} | {eps:>7} '
         f'| {clustering_result.loss:>15.8} '
         f'| {accuracy:>6.2} ({correct_pred:>6} out of {FLAGS.num_points:>6}) '
         f'| {false_match_frac:>20.4} '
         f'| {true_nonmatch_frac:>23.4} |')
Ejemplo n.º 6
0
 def test_clustering_param(self):
   privacy_param = clustering_params.DifferentialPrivacyParam()
   privacy_budget_split = clustering_params.PrivacyBudgetSplit()
   tree_param = clustering_params.TreeParam(
       min_num_points_in_branching_node=4,
       min_num_points_in_node=2,
       max_depth=5)
   clustering_param = clustering_params.ClusteringParam(
       privacy_param=privacy_param,
       privacy_budget_split=privacy_budget_split,
       tree_param=tree_param,
       short_description="TestClusteringParam",
       radius=20)
   self.assertEqual(clustering_param.privacy_param, privacy_param)
   self.assertEqual(clustering_param.privacy_budget_split,
                    privacy_budget_split)
   self.assertEqual(clustering_param.tree_param, tree_param)
   self.assertEqual(clustering_param.short_description, "TestClusteringParam")
   self.assertEqual(clustering_param.radius, 20)
Ejemplo n.º 7
0
    def test_clipped_data_used_for_clustering_and_not_result_calculation(self):
        # Clipped datapoints (radius=1): [[0.3, 0.2], [0.6, 0.8], [0.6, 0.8]]
        datapoints = np.array([[0.3, 0.2], [3, 4], [6, 8]])
        # Very small radius means the datapoint will be clipped for the center
        # calculation.
        data = clustering_params.Data(datapoints=datapoints, radius=1)
        # No noise
        privacy_param = clustering_params.DifferentialPrivacyParam(np.inf)
        # No branching, the coreset will just be the average of the points
        tree_param = clustering_params.TreeParam(1, 1, 0)
        clustering_result = clustering_algorithm.private_lsh_clustering(
            3, data, privacy_param, tree_param=tree_param)

        # Center should be calculated using the clipped data.
        expected_center = np.array([0.5, 0.6])
        self.assertLen(clustering_result.centers, 1)
        self.assertSequenceAlmostEqual(clustering_result.centers[0],
                                       expected_center)

        self.assertListEqual(list(clustering_result.labels), [0, 0, 0])

        # Loss calculation should still be relative to the original points.
        self.assertAlmostEqual(clustering_result.loss, 103.02)
Ejemplo n.º 8
0
 def setUp(self):
     super().setUp()
     self.baseline_k = 2
     self.baseline_privacy_param = clustering_params.DifferentialPrivacyParam(
     )
Ejemplo n.º 9
0
 def test_privacy_param_defaults(self):
   privacy_param = clustering_params.DifferentialPrivacyParam()
   self.assertEqual(privacy_param.epsilon, 1.0)
   self.assertEqual(privacy_param.delta, 1e-6)
   self.assertEqual(privacy_param.privacy_model,
                    clustering_params.PrivacyModel.CENTRAL)