def test_get_children(self): hash_prefix, dim, max_hash_len = '0', 5, 2 datapoints = np.array([[1.5, 0, 1, 0.5, 0], [1, 1, 0, 0.7, 0.1], [0.8, 0.1, 1, 0.2, 0.4], [0.1, 0.5, 0.3, 0.7, 0.8], [-0.5, 0.1, -0.3, -0.4, 0.2]]) # Returns children regardless of whether the node should branch. The # filtering in the algorithm is done after. clustering_param = test_utils.get_test_clustering_param( max_depth=max_hash_len) projection_vectors = np.array([[0, 1, 1, -1, 0], [1, 0, -1, 0, 0]]) sh = lsh.SimHash(dim, max_hash_len, projection_vectors) node = lsh_tree.LshTreeNode(hash_prefix, datapoints, clustering_param, sh) children = node.children() self.assertSameElements([child.hash_prefix for child in children], ['00', '01']) for child in children: self.assertEqual(child.clustering_param, clustering_param) self.assertEqual(child.sim_hash, sh) if child.hash_prefix == '00': self.assertTrue( (child.nonprivate_points == datapoints[[0, 1]]).all()) if child.hash_prefix == '01': self.assertTrue( (child.nonprivate_points == datapoints[[2, 3, 4]]).all())
def test_get_next_level(self): sim_hash = get_test_sim_hash() clustering_param = test_utils.get_test_clustering_param( min_num_points_in_branching_node=10, min_num_points_in_node=5) level: lsh_tree.LshTreeLevel = [ TestLshTreeNode('0', get_test_origin_points(nonprivate_count=16), clustering_param, sim_hash, private_count=20), ] expected_next_level = [ TestLshTreeNode('00', get_test_origin_points(nonprivate_count=8), clustering_param, sim_hash, private_count=9), TestLshTreeNode('01', get_test_origin_points(nonprivate_count=8), clustering_param, sim_hash, private_count=9), ] branching_nodes = lsh_tree.LshTree.filter_branching_nodes(level) self.assertSequenceEqual( lsh_tree.LshTree.get_next_level(branching_nodes), expected_next_level)
def test_lsh_tree_leaves(self): # Test tree: # Nodes are nonprivate count + 1. # Branches to the left are 0, to the right are 1. # Nodes in parentheses are filtered out. # 64+1 # / \ # 8+1 56+1 # / \ / \ # (1+1) 7+1 7+1 49+1 # / \ # (6+1) 43+1 nonprivate_count = 64 sh = get_test_sim_hash() cp = test_utils.get_test_clustering_param( min_num_points_in_node=8, min_num_points_in_branching_node=9, max_depth=3) test_root = TestLshTreeNode('', get_test_origin_points(nonprivate_count), cp, sh, frac_zero=0.125) expected_leaves = [ TestLshTreeNode('01', get_test_origin_points(7), cp, sh), TestLshTreeNode('10', get_test_origin_points(7), cp, sh), TestLshTreeNode('111', get_test_origin_points(43), cp, sh) ] tree = lsh_tree.LshTree(test_root) self.assertEqual(tree.leaves, expected_leaves)
def test_get_next_level_filters_children_node(self): sim_hash = get_test_sim_hash() clustering_param = test_utils.get_test_clustering_param( min_num_points_in_branching_node=10, min_num_points_in_node=9) level: lsh_tree.LshTreeLevel = [ # The children test nodes have a private count of 6, which is less than # min_num_points_in_node. TestLshTreeNode('0', get_test_origin_points(nonprivate_count=10), clustering_param, sim_hash, private_count=11), # The children test nodes have a private count of 3 and 9, only the node # with 9 should be in the result. TestLshTreeNode('1', get_test_origin_points(nonprivate_count=10), clustering_param, sim_hash, private_count=11, frac_zero=0.2), ] expected_next_level = [ TestLshTreeNode('11', get_test_origin_points(nonprivate_count=8), clustering_param, sim_hash, private_count=9), ] branching_nodes = lsh_tree.LshTree.filter_branching_nodes(level) self.assertSequenceEqual( lsh_tree.LshTree.get_next_level(branching_nodes), expected_next_level)
def test_lsh_tree_leaves_branching_node_becomes_leaf(self): # Test tree: # Nodes are nonprivate count + 1. # Branches to the left are 0, to the right are 1. # Nodes in parentheses are filtered out. # 64+1 # / \ # 32+1 32+1 # / \ / \ # (16+1) (16+1) (16+1) (16+1) nonprivate_count = 64 sh = get_test_sim_hash() cp = test_utils.get_test_clustering_param( min_num_points_in_node=20, min_num_points_in_branching_node=30, max_depth=5) test_root = TestLshTreeNode('', get_test_origin_points(nonprivate_count), cp, sh, frac_zero=0.5) expected_leaves = [ TestLshTreeNode('0', get_test_origin_points(32), cp, sh), TestLshTreeNode('1', get_test_origin_points(32), cp, sh), ] tree = lsh_tree.LshTree(test_root) self.assertEqual(tree.leaves, expected_leaves)
def test_lsh_tree_negative_count_root_errors(self): test_root = lsh_tree.LshTreeNode( '0', get_test_origin_points(nonprivate_count=15), test_utils.get_test_clustering_param(), get_test_sim_hash(), private_count=-10) with self.assertRaises(ValueError): lsh_tree.LshTree(test_root)
def test_private_count_param_from_clustering_param(self): clustering_param = test_utils.get_test_clustering_param() private_count_param = CentralPrivateCountParam.from_clustering_param( clustering_param) self.assertEqual(private_count_param.privacy_param, clustering_param.privacy_param) self.assertEqual(private_count_param.privacy_budget_split, clustering_param.privacy_budget_split) self.assertEqual(private_count_param.max_tree_depth, clustering_param.tree_param.max_depth)
def test_private_count_param_from_clustering_param(self): clustering_param = test_utils.get_test_clustering_param( epsilon=10, delta=1e-2, frac_sum=0.2, frac_group_count=0.8, max_depth=3) count_privacy_param = CountPrivacyParam.from_clustering_param( clustering_param) self.assertEqual(count_privacy_param.epsilon, 2.0) self.assertEqual(count_privacy_param.delta, 1e-2)
def test_filter_branching_nodes_too_few_points(self): sim_hash = get_test_sim_hash() # private_count, not the nonprivate_count, should be used for the check. level: lsh_tree.LshTreeLevel = [ lsh_tree.LshTreeNode('0', get_test_origin_points(nonprivate_count=15), test_utils.get_test_clustering_param( min_num_points_in_branching_node=10), sim_hash, private_count=1), ] self.assertEmpty(lsh_tree.LshTree.filter_branching_nodes(level))
def test_average_privacy_param(self): clustering_param = test_utils.get_test_clustering_param( epsilon=10, delta=1e-2, frac_sum=0.7, frac_group_count=0.3, radius=4.3) average_privacy_param = AveragePrivacyParam.from_clustering_param( clustering_param) self.assertEqual(average_privacy_param.epsilon, 7.0) self.assertEqual(average_privacy_param.delta, 1e-2) self.assertEqual(average_privacy_param.sensitivity, 4.3)
def test_filter_branching_nodes_enough_points(self): sim_hash = get_test_sim_hash() level: lsh_tree.LshTreeLevel = [ lsh_tree.LshTreeNode('0', get_test_origin_points(nonprivate_count=15), test_utils.get_test_clustering_param( min_num_points_in_branching_node=10), sim_hash, private_count=20), ] self.assertSequenceEqual( lsh_tree.LshTree.filter_branching_nodes(level), level)
def test_root_node_provide_private_count(self): nonprivate_points = [[1, 2, 1], [0.4, 0.2, 0.8], [3, 0, 3]] data = clustering_params.Data(nonprivate_points, radius=4.3) clustering_param = test_utils.get_test_clustering_param(radius=4.3, max_depth=20) root = lsh_tree.root_node(data, clustering_param, private_count=10) self.assertEqual(root.hash_prefix, '') self.assertSequenceEqual(root.nonprivate_points, nonprivate_points) self.assertEqual(root.clustering_param, clustering_param) self.assertEqual(root.sim_hash.dim, 3) self.assertEqual(root.sim_hash.max_hash_len, 20) self.assertEqual(root.private_count, 10)
def test_get_private_count_infinite_eps(self): nonprivate_count = 60 clustering_param = test_utils.get_test_clustering_param( epsilon=np.inf, delta=1e-2, frac_sum=0.2, frac_group_count=0.8, max_depth=4) self.assertEqual( central_privacy_utils.get_private_count( nonprivate_count, CentralPrivateCountParam.from_clustering_param( clustering_param)), nonprivate_count)
def test_get_private_count_basic(self, mock_dlaplace_fn): nonprivate_count = 30 nonprivate_points = get_test_origin_points( nonprivate_count=nonprivate_count) clustering_param = test_utils.get_test_clustering_param( epsilon=5, frac_sum=0.2, frac_group_count=0.8, max_depth=9) sim_hash = get_test_sim_hash() lsh_tree_node = lsh_tree.LshTreeNode( hash_prefix='', nonprivate_points=nonprivate_points, clustering_param=clustering_param, sim_hash=sim_hash) self.assertEqual(lsh_tree_node.get_private_count(), 25) mock_dlaplace_fn.assert_called_once_with(0.4)
def test_get_children_error(self): hash_prefix, dim, max_hash_len = '00', 5, 2 datapoints = np.array([[1.5, 0, 1, 0.5, 0], [1, 1, 0, 0.7, 0.1]]) # Returns children regardless of whether the node should branch. The # filtering in the algorithm is done after. clustering_param = test_utils.get_test_clustering_param( max_depth=max_hash_len) projection_vectors = np.array([[0, 1, 1, -1, 0], [1, 0, -1, 0, 0]]) sh = lsh.SimHash(dim, max_hash_len, projection_vectors) node = lsh_tree.LshTreeNode(hash_prefix, datapoints, clustering_param, sh) with self.assertRaises(ValueError): node.children()
def test_get_private_average_infinite_eps(self): nonprivate_points = [[1, 2, 1], [0.2, 0.1, 0.8], [3, 0, 3]] private_count = 3 expected_center = [1.4, 0.7, 1.6] clustering_param = test_utils.get_test_clustering_param( epsilon=np.inf, delta=1e-2, frac_sum=0.7, frac_group_count=0.3, radius=4.3) self.assertSequenceAlmostEqual( central_privacy_utils.get_private_average(nonprivate_points, private_count, clustering_param, dim=3), expected_center)
def test_get_private_count_cache(self): nonprivate_count = 30 nonprivate_points = get_test_origin_points( nonprivate_count=nonprivate_count) clustering_param = test_utils.get_test_clustering_param(epsilon=0.01) sim_hash = get_test_sim_hash() lsh_tree_node = lsh_tree.LshTreeNode( hash_prefix='', nonprivate_points=nonprivate_points, clustering_param=clustering_param, sim_hash=sim_hash) first_private_count = lsh_tree_node.get_private_count() self.assertEqual(first_private_count, lsh_tree_node.get_private_count())
def test_get_private_count(self, dlaplace_noise, expected_private_count, mock_dlaplace_fn): mock_dlaplace_fn.return_value = dlaplace_noise nonprivate_count = 60 clustering_param = test_utils.get_test_clustering_param( epsilon=10, delta=1e-2, frac_sum=0.2, frac_group_count=0.8, max_depth=3) result = central_privacy_utils.get_private_count( nonprivate_count, CentralPrivateCountParam.from_clustering_param(clustering_param)) self.assertEqual(result, expected_private_count) mock_dlaplace_fn.assert_called_once_with(2)
def test_get_private_average_error(self): nonprivate_points = [[1, 2, 1], [0.4, 0.2, 0.8], [3, 0, 3]] clustering_param = test_utils.get_test_clustering_param( epsilon=10, delta=1e-2, frac_sum=0.7, frac_group_count=0.3, radius=4.3) with self.assertRaises(ValueError): central_privacy_utils.get_private_average(nonprivate_points, 0, clustering_param, dim=3) with self.assertRaises(ValueError): central_privacy_utils.get_private_average(nonprivate_points, -2, clustering_param, dim=3)
def test_get_private_average(self, nonprivate_points, expected_center, mock_normal_fn): private_count = 4 clustering_param = test_utils.get_test_clustering_param( epsilon=10, delta=1e-2, frac_sum=0.7, frac_group_count=0.3, radius=4.3) result = central_privacy_utils.get_private_average(nonprivate_points, private_count, clustering_param, dim=3) self.assertSequenceAlmostEqual(result, expected_center) mock_normal_fn.assert_called_once() self.assertEqual(mock_normal_fn.call_args[1]['size'], 3) self.assertAlmostEqual(mock_normal_fn.call_args[1]['scale'], 1.927768, delta=1e-5)