def test_intersection_size(self): fpr = 0.001 # False positive rate with small numbers is high, therefore let's test with bigger sets bloom_one = BloomFilter(100000, fpr) bloom_two = BloomFilter(100000, fpr) listA = [str(random.getrandbits(14)) for i in range(71000)] listB = [str(random.getrandbits(12)) for i in range(69000)] for char in listA: bloom_one.add(char) for char in listB: bloom_two.add(char) merged_bloom = bloom_one.intersection(bloom_two) bloom_one_count = bloom_one.count bloom_two_count = bloom_two.count listA_uniq_count = len(set(listA)) listB_uniq_count = len(set(listB)) merged_bloom_count = merged_bloom.count listAB_uniq_count = len(set(listA).intersection(set(listB))) assert bloom_one_count == listA_uniq_count assert bloom_two_count == listB_uniq_count # Intersection guarantees to have all elements of the intersection but the false positive rate might be slightly higher than that of the pure intersection: assert (listAB_uniq_count * (1 - 2 * fpr) <= merged_bloom_count <= listAB_uniq_count * (1 + 2 * fpr))
def test_intersection(self): bloom_one = BloomFilter(100, 0.001) bloom_two = BloomFilter(100, 0.001) chars = [chr(i) for i in range_fn(97, 123)] for char in chars: bloom_one.add(char) for char in chars[:int(len(chars)/2)]: bloom_two.add(char) new_bloom = bloom_one.intersection(bloom_two) for char in chars[:int(len(chars)/2)]: self.assertTrue(char in new_bloom) for char in chars[int(len(chars)/2):]: self.assertTrue(char not in new_bloom)
def test_intersection(self): bloom_one = BloomFilter(100, 0.001) bloom_two = BloomFilter(100, 0.001) chars = [chr(i) for i in range(97, 123)] for char in chars: bloom_one.add(char) for char in chars[:int(len(chars) / 2)]: bloom_two.add(char) new_bloom = bloom_one.intersection(bloom_two) for char in chars[:int(len(chars) / 2)]: self.assertTrue(char in new_bloom) for char in chars[int(len(chars) / 2):]: self.assertTrue(char not in new_bloom)
def test_nstar_intersection_1(self): bloom_one = BloomFilter(200, 0.001) bloom_two = BloomFilter(200, 0.001) chars = [chr(i) for i in range_fn(0, 200)] for char in chars: bloom_one.add(char) for char in chars[:int(len(chars)/2)]: bloom_two.add(char) new_bloom = bloom_one.intersection(bloom_two) self.assertTrue(bloom_one.nstar() > len(chars)-10 and bloom_one.nstar() < len(chars)+10) self.assertTrue(bloom_two.nstar() > len(chars)/2-10 and bloom_two.nstar() < len(chars)/2+10) self.assertTrue(new_bloom.nstar() > len(chars)/2-10 and new_bloom.nstar() < len(chars)/2+10)
def test_nstar_intersection_2(self): bloom_one = BloomFilter(200, 0.001) bloom_two = BloomFilter(200, 0.001) chars = [chr(i) for i in range_fn(0, 200)] for char in chars[int(len(chars)/2):]: bloom_one.add(char) for char in chars[:int(len(chars)/2)]: bloom_two.add(char) new_bloom = bloom_one.intersection(bloom_two) self.assertTrue(bloom_one.nstar() > len(chars)/2-10 and bloom_one.nstar() < len(chars)/2+10) self.assertTrue(bloom_two.nstar() > len(chars)/2-10 and bloom_two.nstar() < len(chars)/2+10) #The nstar operator will fail on the intersection of the filters.. self.assertTrue(new_bloom.nstar() > 10) self.assertTrue(bloom_one.nstar_intersection(bloom_two) < 10)