def _test_prefix_false_positives(constructor = BloomFilter): for error_rate in [0.0001, 0.001, 0.01, 0.1, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: a = constructor(error_rate, 10374, prefix="A") b = constructor(error_rate, 10374, prefix="B") c = constructor(error_rate, 10374, prefix="C") d = constructor(error_rate, 10374, prefix="D") p(a) print "Estimated errors:", a.error_rate, "->", a.error_rate * b.error_rate, "->", a.error_rate * b.error_rate * c.error_rate, "->", a.error_rate * b.error_rate * c.error_rate * d.error_rate #we fill each bloomfilter up to its capacity data = ["%i" % i for i in xrange(a.capacity)] map(a.add, data) map(b.add, data) map(c.add, data) map(d.add, data) errors = 0 two_errors = 0 three_errors = 0 four_errors = 0 #we check what happens if we check twice the capacity for i in xrange(a.capacity * 2): if "X%i" % i in a: errors += 1 if "X%i" % i in b: two_errors += 1 if "X%i" % i in c: three_errors += 1 if "X%i" % i in d: four_errors += 1 print "Errors:", errors, "~", errors / (i + 1.0), "Two-Errors:", two_errors, "~", two_errors / (i + 1.0), "Three-Errors:", three_errors, "~", three_errors / (i + 1.0), four_errors, "~", four_errors / (i + 1.0) print
def test(bits, count, constructor = BloomFilter): ok = 0 create_begin = time() bloom = constructor(0.0001, bits) fill_begin = time() for i in xrange(count): if i % 2 == 0: bloom.add(str(i)) check_begin = time() for i in xrange(count): if (str(i) in bloom) == (i % 2 == 0): ok += 1 write_begin = time() string = str(bloom) write_end = time() print "create: {create:.1f}; fill: {fill:.1f}; check: {check:.1f}; write: {write:.1f}".format(create=fill_begin-create_begin, fill=check_begin-fill_begin, check=write_begin-check_begin, write=write_end-write_begin) print string.encode("HEX")[:100], "{len} bytes; ({ok}/{total} ~{part:.0%})".format(len=len(string), ok=ok, total=count, part=1.0*ok/count)
def _test_false_positives(constructor = BloomFilter): for error_rate in [0.001, 0.01, 0.1, 0.5]: begin = time() # if constructor == BloomFilter: # a = constructor(error_rate, 1024*8) # capacity = a.capacity # else: a = constructor(1024*8, error_rate) capacity = a.get_capacity(error_rate) print "capacity:", capacity, " error-rate:", error_rate, "bits:", a.size, "bytes:", a.size / 8 data = ["%i" % i for i in xrange(capacity)] map(a.add, data) errors = 0 for i in xrange(200000): if "X%i" % i in a: errors += 1 end = time() print "%.3f"%(end-begin), "Errors:", errors, "/", i + 1, " ~ ", errors / (i + 1.0) print