def test_clip_empty_vector_of_count(self, epsilon, clip_threshold, expected): sketch = VectorOfCounts(num_buckets=2, random_seed=0) sketch.stats = np.array([2, 2]) pairwise_estimator = PairwiseEstimator(clip=True, epsilon=epsilon, clip_threshold=clip_threshold) res = pairwise_estimator.clip_empty_vector_of_count(sketch) np.testing.assert_array_equal(res.stats, expected)
def test_get_std_of_intersection(self, epsilon, intersection_cardinality, expected): this_sketch = VectorOfCounts(num_buckets=4, random_seed=0) this_sketch.stats = np.array([2, 2, 0, 0]) that_sketch = VectorOfCounts(num_buckets=4, random_seed=0) that_sketch.stats = np.array([2, 0, 2, 0]) pairwise_estimator = PairwiseEstimator(clip=True, epsilon=epsilon) res = pairwise_estimator._get_std_of_intersection( intersection_cardinality, this_sketch, that_sketch) self.assertAlmostEqual(res, expected, 2)
def test_merge_no_clip(self): sketch_list = [] for _ in range(2): sketch = VectorOfCounts(num_buckets=2, random_seed=2) sketch.add_ids([1]) sketch_list.append(sketch) pairwise_estimator = PairwiseEstimator() merged = pairwise_estimator.merge(sketch_list[0], sketch_list[1]) np.testing.assert_array_equal(np.sort(merged.stats), np.array([0, 1.5]))
def test_has_full_intersection(self): pairwise_estimator = PairwiseEstimator() this = VectorOfCounts(num_buckets=64, random_seed=2) this.add_ids(range(100)) that = VectorOfCounts(num_buckets=64, random_seed=2) that.add_ids(range(100)) intersection_cardinality = pairwise_estimator._intersection(this, that) self.assertTrue( pairwise_estimator.has_full_intersection(intersection_cardinality, this, that))
def test_has_zero_intersection(self): pairwise_estimator = PairwiseEstimator() this = VectorOfCounts(num_buckets=64, random_seed=2) this.add_ids(range(100)) # Clip relies on hypothesis testing and hence requires a minimum size that = VectorOfCounts(num_buckets=64, random_seed=2) that.add_ids(range(100, 200)) intersection_cardinality = pairwise_estimator._intersection(this, that) self.assertTrue( pairwise_estimator.has_zero_intersection(intersection_cardinality, this, that))
def test_evaluate_closeness_to_a_value(self, epsilon, intersection_cardinality, value_to_compare_with, expected): this_sketch = VectorOfCounts(num_buckets=4, random_seed=0) this_sketch.stats = np.array([2, 2, 0, 0]) that_sketch = VectorOfCounts(num_buckets=4, random_seed=0) that_sketch.stats = np.array([2, 0, 2, 0]) pairwise_estimator = PairwiseEstimator(clip=True, epsilon=epsilon) res = pairwise_estimator.evaluate_closeness_to_a_value( intersection_cardinality, value_to_compare_with, this_sketch, that_sketch) self.assertAlmostEqual(res, expected, 2)
def test_merge_with_clip(self): this_sketch = VectorOfCounts(num_buckets=64, random_seed=2) this_sketch.add_ids(range(100)) # First test no intersection that_sketch = VectorOfCounts(num_buckets=64, random_seed=2) that_sketch.add_ids(range(100, 200)) pairwise_estimator = PairwiseEstimator(clip=True) merged = pairwise_estimator.merge(this_sketch, that_sketch) np.testing.assert_array_equal( x=merged.stats, y=this_sketch.stats + that_sketch.stats, err_msg='Fail to detect the no-intersection case.') # Then test full intersection that_sketch = VectorOfCounts(num_buckets=64, random_seed=2) that_sketch.add_ids(range(100)) merged = pairwise_estimator.merge(this_sketch, that_sketch) np.testing.assert_array_equal( x=merged.stats, y=this_sketch.stats, err_msg='Fail to detect the full-intersection case.')
def test_assert_compatible_not_vector_of_count(self): sketch = VectorOfCounts(num_buckets=4, random_seed=2) estimator = PairwiseEstimator() with self.assertRaises(AssertionError): estimator.assert_compatible(sketch, []) with self.assertRaises(AssertionError): estimator.assert_compatible([], sketch)
def test_assert_compatible_not_same_hash_function(self): sketch1 = VectorOfCounts(num_buckets=4, random_seed=1) sketch2 = VectorOfCounts(num_buckets=4, random_seed=2) estimator = PairwiseEstimator() with self.assertRaises(AssertionError): estimator.assert_compatible(sketch1, sketch2)
def test_assert_compatible_not_equal_length(self): sketch1 = VectorOfCounts(num_buckets=4, random_seed=2) sketch2 = VectorOfCounts(num_buckets=8, random_seed=2) estimator = PairwiseEstimator() with self.assertRaises(AssertionError): estimator.assert_compatible(sketch1, sketch2)
def test_get_std_of_sketch_sum(self, epsilon, expected): sketch = VectorOfCounts(num_buckets=2, random_seed=0) sketch.stats = np.array([2, 2]) pairwise_estimator = PairwiseEstimator(clip=True, epsilon=epsilon) res = pairwise_estimator._get_std_of_sketch_sum(sketch) self.assertEqual(res, expected)