def testFalseNegativeNeverHappens(self): n = 10 inserted = [] bf = BloomFilter(n, n) for i in range(n): bf.insert(str(i)) inserted.append(i) for j in inserted: self.assertTrue(bf.query(j))
def testInputIsFalsePositiveUntilInserted(self): n = 100 bf = BloomFilter(n / 2, n) fp = [False] * n inserted = [False] * n fp_cnt = 0 for i in range(n): bf.insert(str(i)) inserted[i] = True for j in range(n): if inserted[j]: continue # It was false positive before, it must continue # to be (since it was not inserted). if fp[j]: self.assertTrue(bf.query(j)) # Update false positives list elif bf.query(j): fp[j] = True fp_cnt += 1 # We're inserting more elements than the size of the array, so # there must be false positives. self.assertTrue(fp_cnt > 0)
def testInputIsFalsePositiveUntilInserted(self): n = 100 bf = BloomFilter(n/2, n) fp = [False]*n inserted = [False]*n fp_cnt = 0 for i in range(n): bf.insert(str(i)) inserted[i] = True for j in range(n): if inserted[j]: continue # It was false positive before, it must continue # to be (since it was not inserted). if fp[j]: self.assertTrue(bf.query(j)) # Update false positives list elif bf.query(j): fp[j] = True fp_cnt += 1 # We're inserting more elements than the size of the array, so # there must be false positives. self.assertTrue(fp_cnt > 0)
def bloom_filter_run(n, m, k=None): keys = range(n) # Random sampling without replacement random.shuffle(keys) probs = [] filter = BloomFilter(m, n, k) inserted = [False] * (n) for cnt, entry in enumerate(keys): filter.insert(str(entry)) inserted[entry] = True false_positives, total = 0, 0 # Compute false positives for probe in range(n): if not inserted[probe]: exists = filter.query(probe) if exists: false_positives += 1 total += 1 if total != 0: prob = false_positives * 1.0 / total probs.append(prob) return probs