class DoubleBloom:
    def __init__(self, values, probability):
        self.zero_bloom = BloomFilter(values, probability, False)
        self.one_bloom = BloomFilter(values, probability, False)
        self.next_level = None

    def insert(self, key, value):
        if value == '0':
            self.zero_bloom.insert(key)
        else:
            self.one_bloom.insert(key)

    def get_value(self, key):
        if self.zero_bloom.contains(key):
            if self.one_bloom.contains(key):
                if self.next_level is not None:
                    return self.next_level.get_value(key)
                return 'Both'
            return '0'
        elif self.one_bloom.contains(key):
            return '1'
        return None

    def add_level(self, values, probability):
        self.next_level = DoubleBloom(values, probability)
Example #2
0
class FastPrimes(object):
  def __init__(self, primes, num_prime_funcs, num_prime_bits, num_fp_funcs, num_fp_bits):
    self.primes_bloom_filter = BloomFilter(num_prime_funcs, num_prime_bits)
    self.fps_bloom_filter = BloomFilter(num_fp_funcs, num_fp_bits)
    print 'Adding primes'
    for p in primes[:-1]: # why ignore the last prime?
      self.primes_bloom_filter.add(p)
    print 'Adding false positives..'
    for i in range(primes[0], primes[-1]):
      true_prime = i in primes
      bf_prime = self.primes_bloom_filter.contains(i)
      if true_prime and not bf_prime:
        assert False, 'False negatives NEVER happen'
      elif not true_prime and bf_prime:
        self.fps_bloom_filter.add(i)

  def isPrime(self, n):
    bf_prime = self.primes_bloom_filter.contains(n)
    bf_composite = self.fps_bloom_filter.contains(n)
    if bf_prime and not bf_composite:
      return True
    else:
      return False

  def evaluate(self, primes):
    """Tests every number between the first and last primes, including the
    numbers not in primes. We use the primes array as the source of truth."""
    num_false_positives, num_double_fps, min_false_positive = (0, 0, primes[-1] + 1)
    for i in range(primes[0], primes[-1]):
      true_prime = i in primes
      my_prime = self.isPrime(i)
      if true_prime and not my_prime:
        num_double_fps += 1
      elif not true_prime and my_prime:
        num_false_positives += 1
        if i < min_false_positive:
          min_false_positive = i
    return (num_false_positives, num_double_fps, min_false_positive)

  def getCounts(self):
    return (self.primes_bloom_filter.num_adds, self.fps_bloom_filter.num_adds)
    def test(self):
        size = 1000
        h_funcs = [
            lambda x, size: (x * c) % size for c in (3, 5, 7, 11, 13, 17, 19)
        ]

        added = []
        bf = BloomFilter(size=size, hash_functions=h_funcs)
        for _ in range(100):
            v = randint(0, 999)
            bf.add(v)
            added.append(v)

        c = 0
        nc = 0
        for _ in range(100):
            t = randint(0, 999)
            if (bf.contains(t)) == (t in added):
                c += 1
            else:
                nc += 1

        print("c: " + str(c), " nc: " + str(nc))
        self.assertGreater((c) / float(c + nc), 0.95)