Exemple #1
0
    def test_get_children(self):
        hash_prefix, dim, max_hash_len = '0', 5, 2
        datapoints = np.array([[1.5, 0, 1, 0.5, 0], [1, 1, 0, 0.7, 0.1],
                               [0.8, 0.1, 1, 0.2, 0.4],
                               [0.1, 0.5, 0.3, 0.7, 0.8],
                               [-0.5, 0.1, -0.3, -0.4, 0.2]])
        # Returns children regardless of whether the node should branch. The
        # filtering in the algorithm is done after.
        clustering_param = test_utils.get_test_clustering_param(
            max_depth=max_hash_len)
        projection_vectors = np.array([[0, 1, 1, -1, 0], [1, 0, -1, 0, 0]])
        sh = lsh.SimHash(dim, max_hash_len, projection_vectors)
        node = lsh_tree.LshTreeNode(hash_prefix, datapoints, clustering_param,
                                    sh)
        children = node.children()

        self.assertSameElements([child.hash_prefix for child in children],
                                ['00', '01'])
        for child in children:
            self.assertEqual(child.clustering_param, clustering_param)
            self.assertEqual(child.sim_hash, sh)
            if child.hash_prefix == '00':
                self.assertTrue(
                    (child.nonprivate_points == datapoints[[0, 1]]).all())
            if child.hash_prefix == '01':
                self.assertTrue(
                    (child.nonprivate_points == datapoints[[2, 3, 4]]).all())
Exemple #2
0
 def test_get_next_level(self):
     sim_hash = get_test_sim_hash()
     clustering_param = test_utils.get_test_clustering_param(
         min_num_points_in_branching_node=10, min_num_points_in_node=5)
     level: lsh_tree.LshTreeLevel = [
         TestLshTreeNode('0',
                         get_test_origin_points(nonprivate_count=16),
                         clustering_param,
                         sim_hash,
                         private_count=20),
     ]
     expected_next_level = [
         TestLshTreeNode('00',
                         get_test_origin_points(nonprivate_count=8),
                         clustering_param,
                         sim_hash,
                         private_count=9),
         TestLshTreeNode('01',
                         get_test_origin_points(nonprivate_count=8),
                         clustering_param,
                         sim_hash,
                         private_count=9),
     ]
     branching_nodes = lsh_tree.LshTree.filter_branching_nodes(level)
     self.assertSequenceEqual(
         lsh_tree.LshTree.get_next_level(branching_nodes),
         expected_next_level)
Exemple #3
0
 def test_lsh_tree_leaves(self):
     # Test tree:
     # Nodes are nonprivate count + 1.
     # Branches to the left are 0, to the right are 1.
     # Nodes in parentheses are filtered out.
     #           64+1
     #          /    \
     #      8+1      56+1
     #     /  \      /   \
     # (1+1)   7+1  7+1   49+1
     #                   /   \
     #                (6+1)  43+1
     nonprivate_count = 64
     sh = get_test_sim_hash()
     cp = test_utils.get_test_clustering_param(
         min_num_points_in_node=8,
         min_num_points_in_branching_node=9,
         max_depth=3)
     test_root = TestLshTreeNode('',
                                 get_test_origin_points(nonprivate_count),
                                 cp,
                                 sh,
                                 frac_zero=0.125)
     expected_leaves = [
         TestLshTreeNode('01', get_test_origin_points(7), cp, sh),
         TestLshTreeNode('10', get_test_origin_points(7), cp, sh),
         TestLshTreeNode('111', get_test_origin_points(43), cp, sh)
     ]
     tree = lsh_tree.LshTree(test_root)
     self.assertEqual(tree.leaves, expected_leaves)
Exemple #4
0
 def test_get_next_level_filters_children_node(self):
     sim_hash = get_test_sim_hash()
     clustering_param = test_utils.get_test_clustering_param(
         min_num_points_in_branching_node=10, min_num_points_in_node=9)
     level: lsh_tree.LshTreeLevel = [
         # The children test nodes have a private count of 6, which is less than
         # min_num_points_in_node.
         TestLshTreeNode('0',
                         get_test_origin_points(nonprivate_count=10),
                         clustering_param,
                         sim_hash,
                         private_count=11),
         # The children test nodes have a private count of 3 and 9, only the node
         # with 9 should be in the result.
         TestLshTreeNode('1',
                         get_test_origin_points(nonprivate_count=10),
                         clustering_param,
                         sim_hash,
                         private_count=11,
                         frac_zero=0.2),
     ]
     expected_next_level = [
         TestLshTreeNode('11',
                         get_test_origin_points(nonprivate_count=8),
                         clustering_param,
                         sim_hash,
                         private_count=9),
     ]
     branching_nodes = lsh_tree.LshTree.filter_branching_nodes(level)
     self.assertSequenceEqual(
         lsh_tree.LshTree.get_next_level(branching_nodes),
         expected_next_level)
Exemple #5
0
 def test_lsh_tree_leaves_branching_node_becomes_leaf(self):
     # Test tree:
     # Nodes are nonprivate count + 1.
     # Branches to the left are 0, to the right are 1.
     # Nodes in parentheses are filtered out.
     #             64+1
     #          /       \
     #      32+1          32+1
     #     /   \         /   \
     # (16+1) (16+1)  (16+1) (16+1)
     nonprivate_count = 64
     sh = get_test_sim_hash()
     cp = test_utils.get_test_clustering_param(
         min_num_points_in_node=20,
         min_num_points_in_branching_node=30,
         max_depth=5)
     test_root = TestLshTreeNode('',
                                 get_test_origin_points(nonprivate_count),
                                 cp,
                                 sh,
                                 frac_zero=0.5)
     expected_leaves = [
         TestLshTreeNode('0', get_test_origin_points(32), cp, sh),
         TestLshTreeNode('1', get_test_origin_points(32), cp, sh),
     ]
     tree = lsh_tree.LshTree(test_root)
     self.assertEqual(tree.leaves, expected_leaves)
Exemple #6
0
 def test_lsh_tree_negative_count_root_errors(self):
     test_root = lsh_tree.LshTreeNode(
         '0',
         get_test_origin_points(nonprivate_count=15),
         test_utils.get_test_clustering_param(),
         get_test_sim_hash(),
         private_count=-10)
     with self.assertRaises(ValueError):
         lsh_tree.LshTree(test_root)
Exemple #7
0
 def test_private_count_param_from_clustering_param(self):
     clustering_param = test_utils.get_test_clustering_param()
     private_count_param = CentralPrivateCountParam.from_clustering_param(
         clustering_param)
     self.assertEqual(private_count_param.privacy_param,
                      clustering_param.privacy_param)
     self.assertEqual(private_count_param.privacy_budget_split,
                      clustering_param.privacy_budget_split)
     self.assertEqual(private_count_param.max_tree_depth,
                      clustering_param.tree_param.max_depth)
 def test_private_count_param_from_clustering_param(self):
     clustering_param = test_utils.get_test_clustering_param(
         epsilon=10,
         delta=1e-2,
         frac_sum=0.2,
         frac_group_count=0.8,
         max_depth=3)
     count_privacy_param = CountPrivacyParam.from_clustering_param(
         clustering_param)
     self.assertEqual(count_privacy_param.epsilon, 2.0)
     self.assertEqual(count_privacy_param.delta, 1e-2)
Exemple #9
0
 def test_filter_branching_nodes_too_few_points(self):
     sim_hash = get_test_sim_hash()
     # private_count, not the nonprivate_count, should be used for the check.
     level: lsh_tree.LshTreeLevel = [
         lsh_tree.LshTreeNode('0',
                              get_test_origin_points(nonprivate_count=15),
                              test_utils.get_test_clustering_param(
                                  min_num_points_in_branching_node=10),
                              sim_hash,
                              private_count=1),
     ]
     self.assertEmpty(lsh_tree.LshTree.filter_branching_nodes(level))
 def test_average_privacy_param(self):
     clustering_param = test_utils.get_test_clustering_param(
         epsilon=10,
         delta=1e-2,
         frac_sum=0.7,
         frac_group_count=0.3,
         radius=4.3)
     average_privacy_param = AveragePrivacyParam.from_clustering_param(
         clustering_param)
     self.assertEqual(average_privacy_param.epsilon, 7.0)
     self.assertEqual(average_privacy_param.delta, 1e-2)
     self.assertEqual(average_privacy_param.sensitivity, 4.3)
Exemple #11
0
 def test_filter_branching_nodes_enough_points(self):
     sim_hash = get_test_sim_hash()
     level: lsh_tree.LshTreeLevel = [
         lsh_tree.LshTreeNode('0',
                              get_test_origin_points(nonprivate_count=15),
                              test_utils.get_test_clustering_param(
                                  min_num_points_in_branching_node=10),
                              sim_hash,
                              private_count=20),
     ]
     self.assertSequenceEqual(
         lsh_tree.LshTree.filter_branching_nodes(level), level)
Exemple #12
0
 def test_root_node_provide_private_count(self):
     nonprivate_points = [[1, 2, 1], [0.4, 0.2, 0.8], [3, 0, 3]]
     data = clustering_params.Data(nonprivate_points, radius=4.3)
     clustering_param = test_utils.get_test_clustering_param(radius=4.3,
                                                             max_depth=20)
     root = lsh_tree.root_node(data, clustering_param, private_count=10)
     self.assertEqual(root.hash_prefix, '')
     self.assertSequenceEqual(root.nonprivate_points, nonprivate_points)
     self.assertEqual(root.clustering_param, clustering_param)
     self.assertEqual(root.sim_hash.dim, 3)
     self.assertEqual(root.sim_hash.max_hash_len, 20)
     self.assertEqual(root.private_count, 10)
Exemple #13
0
 def test_get_private_count_infinite_eps(self):
     nonprivate_count = 60
     clustering_param = test_utils.get_test_clustering_param(
         epsilon=np.inf,
         delta=1e-2,
         frac_sum=0.2,
         frac_group_count=0.8,
         max_depth=4)
     self.assertEqual(
         central_privacy_utils.get_private_count(
             nonprivate_count,
             CentralPrivateCountParam.from_clustering_param(
                 clustering_param)), nonprivate_count)
Exemple #14
0
 def test_get_private_count_basic(self, mock_dlaplace_fn):
     nonprivate_count = 30
     nonprivate_points = get_test_origin_points(
         nonprivate_count=nonprivate_count)
     clustering_param = test_utils.get_test_clustering_param(
         epsilon=5, frac_sum=0.2, frac_group_count=0.8, max_depth=9)
     sim_hash = get_test_sim_hash()
     lsh_tree_node = lsh_tree.LshTreeNode(
         hash_prefix='',
         nonprivate_points=nonprivate_points,
         clustering_param=clustering_param,
         sim_hash=sim_hash)
     self.assertEqual(lsh_tree_node.get_private_count(), 25)
     mock_dlaplace_fn.assert_called_once_with(0.4)
Exemple #15
0
    def test_get_children_error(self):
        hash_prefix, dim, max_hash_len = '00', 5, 2
        datapoints = np.array([[1.5, 0, 1, 0.5, 0], [1, 1, 0, 0.7, 0.1]])
        # Returns children regardless of whether the node should branch. The
        # filtering in the algorithm is done after.
        clustering_param = test_utils.get_test_clustering_param(
            max_depth=max_hash_len)
        projection_vectors = np.array([[0, 1, 1, -1, 0], [1, 0, -1, 0, 0]])
        sh = lsh.SimHash(dim, max_hash_len, projection_vectors)
        node = lsh_tree.LshTreeNode(hash_prefix, datapoints, clustering_param,
                                    sh)

        with self.assertRaises(ValueError):
            node.children()
Exemple #16
0
 def test_get_private_average_infinite_eps(self):
     nonprivate_points = [[1, 2, 1], [0.2, 0.1, 0.8], [3, 0, 3]]
     private_count = 3
     expected_center = [1.4, 0.7, 1.6]
     clustering_param = test_utils.get_test_clustering_param(
         epsilon=np.inf,
         delta=1e-2,
         frac_sum=0.7,
         frac_group_count=0.3,
         radius=4.3)
     self.assertSequenceAlmostEqual(
         central_privacy_utils.get_private_average(nonprivate_points,
                                                   private_count,
                                                   clustering_param,
                                                   dim=3), expected_center)
Exemple #17
0
    def test_get_private_count_cache(self):
        nonprivate_count = 30
        nonprivate_points = get_test_origin_points(
            nonprivate_count=nonprivate_count)
        clustering_param = test_utils.get_test_clustering_param(epsilon=0.01)
        sim_hash = get_test_sim_hash()
        lsh_tree_node = lsh_tree.LshTreeNode(
            hash_prefix='',
            nonprivate_points=nonprivate_points,
            clustering_param=clustering_param,
            sim_hash=sim_hash)

        first_private_count = lsh_tree_node.get_private_count()
        self.assertEqual(first_private_count,
                         lsh_tree_node.get_private_count())
Exemple #18
0
    def test_get_private_count(self, dlaplace_noise, expected_private_count,
                               mock_dlaplace_fn):
        mock_dlaplace_fn.return_value = dlaplace_noise

        nonprivate_count = 60
        clustering_param = test_utils.get_test_clustering_param(
            epsilon=10,
            delta=1e-2,
            frac_sum=0.2,
            frac_group_count=0.8,
            max_depth=3)

        result = central_privacy_utils.get_private_count(
            nonprivate_count,
            CentralPrivateCountParam.from_clustering_param(clustering_param))
        self.assertEqual(result, expected_private_count)
        mock_dlaplace_fn.assert_called_once_with(2)
Exemple #19
0
    def test_get_private_average_error(self):
        nonprivate_points = [[1, 2, 1], [0.4, 0.2, 0.8], [3, 0, 3]]
        clustering_param = test_utils.get_test_clustering_param(
            epsilon=10,
            delta=1e-2,
            frac_sum=0.7,
            frac_group_count=0.3,
            radius=4.3)

        with self.assertRaises(ValueError):
            central_privacy_utils.get_private_average(nonprivate_points,
                                                      0,
                                                      clustering_param,
                                                      dim=3)
        with self.assertRaises(ValueError):
            central_privacy_utils.get_private_average(nonprivate_points,
                                                      -2,
                                                      clustering_param,
                                                      dim=3)
Exemple #20
0
    def test_get_private_average(self, nonprivate_points, expected_center,
                                 mock_normal_fn):
        private_count = 4
        clustering_param = test_utils.get_test_clustering_param(
            epsilon=10,
            delta=1e-2,
            frac_sum=0.7,
            frac_group_count=0.3,
            radius=4.3)

        result = central_privacy_utils.get_private_average(nonprivate_points,
                                                           private_count,
                                                           clustering_param,
                                                           dim=3)
        self.assertSequenceAlmostEqual(result, expected_center)
        mock_normal_fn.assert_called_once()
        self.assertEqual(mock_normal_fn.call_args[1]['size'], 3)
        self.assertAlmostEqual(mock_normal_fn.call_args[1]['scale'],
                               1.927768,
                               delta=1e-5)