def mainScalable(capacity=100000, request_error_rate=0.001): server = StrictRedis.from_url("redis://192.168.254.106:6379") bfkeypreffix = "test-sbf" f = ScalableRedisBloomFilter(initial_capacity=10000, server=server, bfkeypreffix=bfkeypreffix, error_rate=request_error_rate, max_filters=5) # assert (capacity == f.capacity) start = time.time() for i in range_fn(0, capacity): f.add(i) end = time.time() print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format( end - start, f.capacity / (end - start))) oneBits = 0 filterKeys = server.lrange('%s:fs' % bfkeypreffix, 0, -1) for filterKey in filterKeys: filterKey, _ = filterKey.rsplit("||", 1) keys = server.keys("%s:s:%s" % (filterKey, "*")) for key in keys: oneBits += server.bitcount(key) #print "Number of 1 bits:", oneBits print("Number of Filter Bits:", sum(ff.num_bits for ff in f.filtersMap.values())) print("Number of slices:", sum(ff.num_slices for ff in f.filtersMap.values())) print("Bits per slice:", sum(ff.bits_per_slice for ff in f.filtersMap.values())) print("------") print("Fraction of 1 bits at capacity: {:5.3f}".format( oneBits / float(sum(ff.num_bits for ff in f.filtersMap.values())))) # Look for false positives and measure the actual fp rate trials = f.capacity fp = 0 start = time.time() for i in range_fn(f.capacity, f.capacity + trials + 1): if i in f: fp += 1 end = time.time() print(("{:5.3f} seconds to check false positives, " "{:10.2f} checks/second".format(end - start, trials / (end - start)))) print("Requested FP rate: {:2.4f}".format(request_error_rate)) print("Experimental false positive rate: {:2.4f}".format(fp / float(trials)))
def main(capacity=100000, request_error_rate=0.001): server = StrictRedis.from_url("redis://127.0.0.1:6379") bfkeypreffix = "test-bf" print("1231") f = RedisBloomFilter(capacity=capacity, server=server, bfkeypreffix=bfkeypreffix, error_rate=request_error_rate) assert (capacity == f.capacity) start = time.time() for i in range_fn(0, f.capacity): f.add(i, skip_check=True) end = time.time() print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format( end - start, f.capacity / (end - start))) keys = server.keys("%s:s:%s" % (bfkeypreffix, "*")) oneBits = 0 for key in keys: oneBits += server.bitcount(key) #print "Number of 1 bits:", oneBits print("Number of Filter Bits:", f.num_bits) print("Number of slices:", f.num_slices) print("Bits per slice:", f.bits_per_slice) print("------") print("Fraction of 1 bits at capacity: {:5.3f}".format(oneBits / float(f.num_bits))) # Look for false positives and measure the actual fp rate trials = f.capacity fp = 0 start = time.time() for i in range_fn(f.capacity, f.capacity + trials + 1): if i in f: fp += 1 end = time.time() print(("{:5.3f} seconds to check false positives, " "{:10.2f} checks/second".format(end - start, trials / (end - start)))) print("Requested FP rate: {:2.4f}".format(request_error_rate)) print("Experimental false positive rate: {:2.4f}".format(fp / float(trials))) # Compute theoretical fp max (Goel/Gupta) k = f.num_slices m = f.num_bits n = f.capacity fp_theory = math.pow((1 - math.exp(-k * (n + 0.5) / (m - 1))), k) print("Projected FP rate (Goel/Gupta): {:2.6f}".format(fp_theory))
def make_hashfuncs(num_slices, num_bits): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8#Q 代表格式,8个16进制 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2#每2位表示一个字符,如果有3个H,则需要6位bit的值unpack total_hash_bits = 8 * num_slices * chunk_size # print('total_hash_bits:',total_hash_bits) if total_hash_bits > 384: hashfn = hashlib.sha512 elif total_hash_bits > 256: hashfn = hashlib.sha384 elif total_hash_bits > 160: hashfn = hashlib.sha256 elif total_hash_bits > 128: hashfn = hashlib.sha1#20 bit else: hashfn = hashlib.md5#16 bit fmt = fmt_code * (hashfn().digest_size // chunk_size) # print('fmt:',fmt,fmt_code,hashfn().digest_size, chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) # print('div:',num_salts,extra) if extra: num_salts += 1 salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts)) # print('salts:',salts) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, unicode): key = key.encode('utf-8') else: key = str(key) i = 0 # print('byt_key:',key) for salt in salts: h = salt.copy() h.update(key) # print('h:',salt) for uint in unpack(fmt, h.digest()): # print('uin:',uint,(uint % num_bits)) yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs
def main(capacity=100000, request_error_rate=0.1): f = BloomFilter(capacity=capacity, error_rate=request_error_rate) assert (capacity == f.capacity) start = time.time() for i in range_fn(0, f.capacity): f.add(i, skip_check=True) end = time.time() print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format( end - start, f.capacity / (end - start))) oneBits = f.bitarray.count(True) zeroBits = f.bitarray.count(False) #print "Number of 1 bits:", oneBits #print "Number of 0 bits:", zeroBits print("Number of Filter Bits:", f.num_bits) print("Number of slices:", f.num_slices) print("Bits per slice:", f.bits_per_slice) print("------") print("Fraction of 1 bits at capacity: {:5.3f}".format(oneBits / float(f.num_bits))) # Look for false positives and measure the actual fp rate trials = f.capacity fp = 0 start = time.time() for i in range_fn(f.capacity, f.capacity + trials + 1): if i in f: fp += 1 end = time.time() print(("{:5.3f} seconds to check false positives, " "{:10.2f} checks/second".format(end - start, trials / (end - start)))) print("Requested FP rate: {:2.4f}".format(request_error_rate)) print("Experimental false positive rate: {:2.4f}".format(fp / float(trials))) # Compute theoretical fp max (Goel/Gupta) k = f.num_slices m = f.num_bits n = f.capacity fp_theory = math.pow((1 - math.exp(-k * (n + 0.5) / (m - 1))), k) print("Projected FP rate (Goel/Gupta): {:2.6f}".format(fp_theory))
def main(capacity=100000, request_error_rate=0.1): f = BloomFilter(capacity=capacity, error_rate=request_error_rate) assert (capacity == f.capacity) start = time.time() for i in range_fn(0, f.capacity): f.add(i, skip_check=True) end = time.time() print("{:5.3f} seconds to add to capacity, {:10.2f} entries/second".format( end - start, f.capacity / (end - start))) oneBits = f.bitarray.count(True) zeroBits = f.bitarray.count(False) #print "Number of 1 bits:", oneBits #print "Number of 0 bits:", zeroBits print("Number of Filter Bits:", f.num_bits) print("Number of slices:", f.num_slices) print("Bits per slice:", f.bits_per_slice) print("------") print("Fraction of 1 bits at capacity: {:5.3f}".format( oneBits / float(f.num_bits))) # Look for false positives and measure the actual fp rate trials = f.capacity fp = 0 start = time.time() for i in range_fn(f.capacity, f.capacity + trials + 1): if i in f: fp += 1 end = time.time() print(("{:5.3f} seconds to check false positives, " "{:10.2f} checks/second".format(end - start, trials / (end - start)))) print("Requested FP rate: {:2.4f}".format(request_error_rate)) print("Experimental false positive rate: {:2.4f}".format(fp / float(trials))) # Compute theoretical fp max (Goel/Gupta) k = f.num_slices m = f.num_bits n = f.capacity fp_theory = math.pow((1 - math.exp(-k * (n + 0.5) / (m - 1))), k) print("Projected FP rate (Goel/Gupta): {:2.6f}".format(fp_theory))
def make_hashfuncs(num_slices, num_bits): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2 total_hash_bits = 8 * num_slices * chunk_size if total_hash_bits > 384: hashfn = hashlib.sha512 elif total_hash_bits > 256: hashfn = hashlib.sha384 elif total_hash_bits > 160: hashfn = hashlib.sha256 elif total_hash_bits > 128: hashfn = hashlib.sha1 else: hashfn = hashlib.md5 fmt = fmt_code * (hashfn().digest_size // chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) if extra: num_salts += 1 salts = tuple( hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts)) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, unicode): key = key.encode('utf-8') else: key = str(key) i = 0 for salt in salts: h = salt.copy() h.update(key) for uint in unpack(fmt, h.digest()): yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs
def make_hashfuncs(num_slices, num_bits, hashmac): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2 total_hash_bits = 8 * num_slices * chunk_size if total_hash_bits > 384: digest_size = 64 if hashmac==0: hashfn = hashlib.sha512 else: hmackey = random.getrandbits(digest_size) hashfn = hmac elif total_hash_bits > 256: digest_size = 48 if hashmac==0: hashfn = hashlib.sha384 else: hmackey = random.getrandbits(digest_size) hashfn = hmac elif total_hash_bits > 160: digest_size = 32 if hashmac==0: hashfn = hashlib.sha256 else: hmackey = random.getrandbits(digest_size) hashfn = hmac elif total_hash_bits > 128: digest_size = 20 if hashmac==0: hashfn = hashlib.sha1 else: hmackey = random.getrandbits(digest_size) hashfn = hmac else: digest_size=16 if hashmac==0: hashfn = hashlib.md5 else: hmackey = random.getrandbits(digest_size) hashfn = hmac fmt = fmt_code * (digest_size // chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) if extra: num_salts += 1 if hashmac==0: salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts)) else: if digest_size==64: salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha512).digest(), hashlib.sha512) for i in range_fn(num_salts)) elif digest_size==48: salts =tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha384).digest(), hashlib.sha384) for i in range_fn(num_salts)) elif digest_size==32: salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha256).digest(), hashlib.sha256) for i in range_fn(num_salts)) elif digest_size==20: salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.sha1).digest(), hashlib.sha1) for i in range_fn(num_salts)) else: salts = tuple(hashfn.new(str(hmackey),hashfn.new(str(hmackey), pack('I',i), hashlib.md5).digest(), hashlib.md5) for i in range_fn(num_salts)) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, unicode): key = key.encode('utf-8') else: key = str(key) i = 0 for salt in salts: h = salt.copy() h.update(key) h.hexdigest() for uint in unpack(fmt, h.digest()): yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs
def make_hashfuncs(num_slices, num_bits, hashmac): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2 total_hash_bits = 8 * num_slices * chunk_size if total_hash_bits > 384: digest_size = 64 if hashmac == 0: hashfn = hashlib.sha512 else: hmackey = random.getrandbits(digest_size) hashfn = hmac elif total_hash_bits > 256: digest_size = 48 if hashmac == 0: hashfn = hashlib.sha384 else: hmackey = random.getrandbits(digest_size) hashfn = hmac elif total_hash_bits > 160: digest_size = 32 if hashmac == 0: hashfn = hashlib.sha256 else: hmackey = random.getrandbits(digest_size) hashfn = hmac elif total_hash_bits > 128: digest_size = 20 if hashmac == 0: hashfn = hashlib.sha1 else: hmackey = random.getrandbits(digest_size) hashfn = hmac else: digest_size = 16 if hashmac == 0: hashfn = hashlib.md5 else: hmackey = random.getrandbits(digest_size) hashfn = hmac fmt = fmt_code * (digest_size // chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) if extra: num_salts += 1 if hashmac == 0: salts = tuple( hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts)) else: if digest_size == 64: salts = tuple( hashfn.new( str(hmackey), hashfn.new(str(hmackey), pack('I', i), hashlib.sha512).digest(), hashlib.sha512) for i in range_fn(num_salts)) elif digest_size == 48: salts = tuple( hashfn.new( str(hmackey), hashfn.new(str(hmackey), pack('I', i), hashlib.sha384).digest(), hashlib.sha384) for i in range_fn(num_salts)) elif digest_size == 32: salts = tuple( hashfn.new( str(hmackey), hashfn.new(str(hmackey), pack('I', i), hashlib.sha256).digest(), hashlib.sha256) for i in range_fn(num_salts)) elif digest_size == 20: salts = tuple( hashfn.new( str(hmackey), hashfn.new(str(hmackey), pack('I', i), hashlib.sha1).digest(), hashlib.sha1) for i in range_fn(num_salts)) else: salts = tuple( hashfn.new( str(hmackey), hashfn.new(str(hmackey), pack('I', i), hashlib.md5).digest(), hashlib.md5) for i in range_fn(num_salts)) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, unicode): key = key.encode('utf-8') else: key = str(key) i = 0 for salt in salts: h = salt.copy() h.update(key) h.hexdigest() for uint in unpack(fmt, h.digest()): yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs