def test_intersection3(self): a = CountingFilter(n, p) b = CountingFilter(n, p) c = CountingFilter(n, p) for i in range(1000): a.insert(str(i)) for i in range(500, 1000): b.insert(str(i)) for i in range(600, 700): c.insert(str(i)) b.intersection(c) a.intersection(b) fpr = self.get_false_positive_rate(100, a.getBitSize(), a.getHashCount()) # only 100 elements in intersect # False positive errors should be expected in this range errors = 0 for i in range(600): if a.contains(str(i)): errors = errors + 1 self.assertTrue(errors < n * (fpr + buffer)) # 100% accuracy is expected in this range for i in range(600, 700): self.assertTrue(a.contains(str(i))) # False positive errors should be expected in this range errors = 0 for i in range(700, 1000): if a.contains(str(i)): errors = errors + 1 self.assertTrue(errors < n * (fpr + buffer))
def test_check_if_in(self): a = CountingFilter(n, p) a.insert(str(1034)) a.insert(str(1034)) ret = a.howMany(str(1034)) self.assertTrue(ret == 2) b = CountingFilter(n, p) b.insert(str(1034)) b.insert(str(1034)) a.intersection(b) ret2 = a.howMany(str(1034)) self.assertTrue(ret2 == 2)
def test_intersection1(self): a = CountingFilter(n, p / 50) # we need to use a slightly smaller fpr here so we don't have too many intersects b = CountingFilter(n, p / 50) for i in range(1000): a.insert(str(i)) for i in range(1000, 2000): b.insert(str(i)) a.intersection(b) # There are no elements in the intersection, so this bloom filter should contain nothing except for a few # elements caused by hash collisions errors = 0 for i in range(2000): if a.contains(str(i)): errors = errors + 1 print("number of intersections is", errors) self.assertTrue(errors < n * 0.01)
def test_intersection2(self): a = CountingFilter(n, p) b = CountingFilter(n, p) for i in range(1000): a.insert(str(i)) for i in range(500, 1000): b.insert(str(i)) a.intersection(b) # There are no elements from 0 - 499, so in this range the bloom filter should only contain false postives errors = 0 for i in range(500): if a.contains(str(i)): errors = errors + 1 print("intersect2 errors", errors) fpr = self.get_false_positive_rate(500, a.getBitSize(), a.getHashCount()) # only 500 elements in intersect print("intersect2 fpr", fpr) self.assertTrue(errors < n * (fpr + buffer)) # These elements all should be in the intersection, so they must be in the intersect bloom filter for i in range(500, 1000, 1): self.assertTrue(a.contains(str(i)))