class Serialization(unittest.TestCase): SIZE = 12345 EXPECTED = set([random.randint(0, 10000100) for _ in range_fn(SIZE)]) def test_serialization(self): for klass, args in [(BloomFilter, (self.SIZE,)), (ScalableBloomFilter, ())]: filter = klass(*args) for item in self.EXPECTED: filter.add(item) f = tempfile.TemporaryFile() filter.tofile(f) stringio = StringIO() filter.tofile(stringio) streams_to_test = [f, stringio] if not running_python_3: cstringio = cStringIO.StringIO() filter.tofile(cstringio) streams_to_test.append(cstringio) del filter for stream in streams_to_test: stream.seek(0) filter = klass.fromfile(stream) for item in self.EXPECTED: self.assertTrue(item in filter) del(filter) stream.close()
def test_union(self): bloom_one = BloomFilter(100, 0.001) bloom_two = BloomFilter(100, 0.001) chars = [chr(i) for i in range_fn(97, 123)] for char in chars[int(len(chars)/2):]: bloom_one.add(char) for char in chars[:int(len(chars)/2)]: bloom_two.add(char) new_bloom = bloom_one.union(bloom_two) for char in chars: self.assertTrue(char in new_bloom)
def test_nstar_union(self): bloom_one = BloomFilter(200, 0.001) bloom_two = BloomFilter(200, 0.001) chars = [chr(i) for i in range_fn(0, 200)] for char in chars[:int(len(chars)/2)]: bloom_one.add(char) for char in chars[int(len(chars)/2):]: bloom_two.add(char) new_bloom = bloom_one.union(bloom_two) self.assertTrue(bloom_one.nstar() > len(chars)/2-10 and bloom_one.nstar() < len(chars)/2+10) self.assertTrue(bloom_two.nstar() > len(chars)/2-10 and bloom_two.nstar() < len(chars)/2+10) self.assertTrue(new_bloom.nstar() > len(chars)-10 and new_bloom.nstar() < len(chars)+10)
def make_hashfuncs(num_slices, num_bits): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2 total_hash_bits = 8 * num_slices * chunk_size if total_hash_bits > 384: hashfn = hashlib.sha512 elif total_hash_bits > 256: hashfn = hashlib.sha384 elif total_hash_bits > 160: hashfn = hashlib.sha256 elif total_hash_bits > 128: hashfn = hashlib.sha1 else: hashfn = hashlib.md5 fmt = fmt_code * (hashfn().digest_size // chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) if extra: num_salts += 1 salts = tuple( hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts)) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, unicode): key = key.encode('utf-8') else: key = str(key) i = 0 for salt in salts: h = salt.copy() h.update(key) for uint in unpack(fmt, h.digest()): yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs
def test_nstar_intersection_2(self): bloom_one = BloomFilter(200, 0.001) bloom_two = BloomFilter(200, 0.001) chars = [chr(i) for i in range_fn(0, 200)] for char in chars[int(len(chars)/2):]: bloom_one.add(char) for char in chars[:int(len(chars)/2)]: bloom_two.add(char) new_bloom = bloom_one.intersection(bloom_two) self.assertTrue(bloom_one.nstar() > len(chars)/2-10 and bloom_one.nstar() < len(chars)/2+10) self.assertTrue(bloom_two.nstar() > len(chars)/2-10 and bloom_two.nstar() < len(chars)/2+10) #The nstar operator will fail on the intersection of the filters.. self.assertTrue(new_bloom.nstar() > 10) self.assertTrue(bloom_one.nstar_intersection(bloom_two) < 10)
def make_hashfuncs(num_slices, num_bits): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2 total_hash_bits = 8 * num_slices * chunk_size if total_hash_bits > 384: hashfn = hashlib.sha512 elif total_hash_bits > 256: hashfn = hashlib.sha384 elif total_hash_bits > 160: hashfn = hashlib.sha256 elif total_hash_bits > 128: hashfn = hashlib.sha1 else: hashfn = hashlib.md5 fmt = fmt_code * (hashfn().digest_size // chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) if extra: num_salts += 1 salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts)) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, unicode): key = key.encode('utf-8') else: key = str(key) i = 0 for salt in salts: h = salt.copy() h.update(key) for uint in unpack(fmt, h.digest()): yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs
filter = klass(*args) for item in self.EXPECTED: filter.add(item) f = tempfile.TemporaryFile() filter.tofile(f) stringio = StringIO() filter.tofile(stringio) streams_to_test = [f, stringio] if not running_python_3: cstringio = cStringIO.StringIO() filter.tofile(cstringio) streams_to_test.append(cstringio) del filter for stream in streams_to_test: stream.seek(0) filter = klass.fromfile(stream) for item in self.EXPECTED: self.assertTrue(item in filter) del(filter) stream.close() if __name__ == '__main__': # unittest.main() f = BloomFilter(capacity=10000, error_rate=0.001) for i in range_fn(0, f.capacity): f.add(i) print (0 in f)
def test_nstar(self): bloom = BloomFilter(1000, 0.001) chars = [chr(i) for i in range_fn(0,200)] for char in chars: bloom.add(char) self.assertTrue(bloom.nstar() > len(chars)-10 and bloom.nstar() < len(chars)+10)