class DoubleBloom: def __init__(self, values, probability): self.zero_bloom = BloomFilter(values, probability, False) self.one_bloom = BloomFilter(values, probability, False) self.next_level = None def insert(self, key, value): if value == '0': self.zero_bloom.insert(key) else: self.one_bloom.insert(key) def get_value(self, key): if self.zero_bloom.contains(key): if self.one_bloom.contains(key): if self.next_level is not None: return self.next_level.get_value(key) return 'Both' return '0' elif self.one_bloom.contains(key): return '1' return None def add_level(self, values, probability): self.next_level = DoubleBloom(values, probability)
class FastPrimes(object): def __init__(self, primes, num_prime_funcs, num_prime_bits, num_fp_funcs, num_fp_bits): self.primes_bloom_filter = BloomFilter(num_prime_funcs, num_prime_bits) self.fps_bloom_filter = BloomFilter(num_fp_funcs, num_fp_bits) print 'Adding primes' for p in primes[:-1]: # why ignore the last prime? self.primes_bloom_filter.add(p) print 'Adding false positives..' for i in range(primes[0], primes[-1]): true_prime = i in primes bf_prime = self.primes_bloom_filter.contains(i) if true_prime and not bf_prime: assert False, 'False negatives NEVER happen' elif not true_prime and bf_prime: self.fps_bloom_filter.add(i) def isPrime(self, n): bf_prime = self.primes_bloom_filter.contains(n) bf_composite = self.fps_bloom_filter.contains(n) if bf_prime and not bf_composite: return True else: return False def evaluate(self, primes): """Tests every number between the first and last primes, including the numbers not in primes. We use the primes array as the source of truth.""" num_false_positives, num_double_fps, min_false_positive = (0, 0, primes[-1] + 1) for i in range(primes[0], primes[-1]): true_prime = i in primes my_prime = self.isPrime(i) if true_prime and not my_prime: num_double_fps += 1 elif not true_prime and my_prime: num_false_positives += 1 if i < min_false_positive: min_false_positive = i return (num_false_positives, num_double_fps, min_false_positive) def getCounts(self): return (self.primes_bloom_filter.num_adds, self.fps_bloom_filter.num_adds)
def test(self): size = 1000 h_funcs = [ lambda x, size: (x * c) % size for c in (3, 5, 7, 11, 13, 17, 19) ] added = [] bf = BloomFilter(size=size, hash_functions=h_funcs) for _ in range(100): v = randint(0, 999) bf.add(v) added.append(v) c = 0 nc = 0 for _ in range(100): t = randint(0, 999) if (bf.contains(t)) == (t in added): c += 1 else: nc += 1 print("c: " + str(c), " nc: " + str(nc)) self.assertGreater((c) / float(c + nc), 0.95)