class Serialization(unittest.TestCase): SIZE = 12345 EXPECTED = set([random.randint(0, 10000100) for _ in range_fn(0, SIZE)]) def test_serialization(self): for klass, args in [(BloomFilter, (self.SIZE, )), (ScalableBloomFilter, ())]: filter = klass(*args) for item in self.EXPECTED: filter.add(item) f = tempfile.TemporaryFile() filter.tofile(f) stringio = StringIO() filter.tofile(stringio) streams_to_test = [f, stringio] if not running_python_3: cstringio = cStringIO.StringIO() filter.tofile(cstringio) streams_to_test.append(cstringio) del filter for stream in streams_to_test: stream.seek(0) filter = klass.fromfile(stream) for item in self.EXPECTED: self.assertTrue(item in filter) del (filter) stream.close()
class TestSerialization: SIZE = 12345 EXPECTED = set([random.randint(0, 10000100) for _ in range_fn(0, SIZE)]) @pytest.mark.parametrize("cls,args", [ (BloomFilter, (SIZE, )), (ScalableBloomFilter, ()), ]) @pytest.mark.parametrize("stream_factory", [ lambda: tempfile.TemporaryFile, lambda: io.BytesIO, pytest.param(lambda: cStringIO.StringIO, marks=pytest.mark.skipif(running_python_3, reason="Python 2 only")), pytest.param(lambda: StringIO.StringIO, marks=pytest.mark.skipif(running_python_3, reason="Python 2 only")), ]) def test_serialization(self, cls, args, stream_factory): filter = cls(*args) for item in self.EXPECTED: filter.add(item) f = stream_factory()() filter.tofile(f) del filter f.seek(0) filter = cls.fromfile(f) for item in self.EXPECTED: assert item in filter
def test_union(self): bloom_one = BloomFilter(100, 0.001) bloom_two = BloomFilter(100, 0.001) chars = [chr(i) for i in range_fn(97, 123)] for char in chars[int(len(chars) / 2):]: bloom_one.add(char) for char in chars[:int(len(chars) / 2)]: bloom_two.add(char) new_bloom = bloom_one.union(bloom_two) for char in chars: self.assertTrue(char in new_bloom)
def test_union_scalable_bloom_filter(self): bloom_one = ScalableBloomFilter( mode=ScalableBloomFilter.SMALL_SET_GROWTH) bloom_two = ScalableBloomFilter( mode=ScalableBloomFilter.SMALL_SET_GROWTH) numbers = [i for i in range_fn(1, 10000)] middle = int(len(numbers) / 2) for number in numbers[middle:]: bloom_one.add(number) for number in numbers[:middle]: bloom_two.add(number) new_bloom = bloom_one.union(bloom_two) for number in numbers: self.assertTrue(number in new_bloom)
def make_hashfuncs(num_slices, num_bits): if num_bits >= (1 << 31): fmt_code, chunk_size = 'Q', 8 elif num_bits >= (1 << 15): fmt_code, chunk_size = 'I', 4 else: fmt_code, chunk_size = 'H', 2 total_hash_bits = 8 * num_slices * chunk_size if total_hash_bits > 384: hashfn = hashlib.sha512 elif total_hash_bits > 256: hashfn = hashlib.sha384 elif total_hash_bits > 160: hashfn = hashlib.sha256 elif total_hash_bits > 128: hashfn = hashlib.sha1 else: hashfn = hashlib.md5 fmt = fmt_code * (hashfn().digest_size // chunk_size) num_salts, extra = divmod(num_slices, len(fmt)) if extra: num_salts += 1 salts = tuple( hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(stop=num_salts)) def _make_hashfuncs(key): if running_python_3: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key).encode('utf-8') else: if isinstance(key, str): key = key.encode('utf-8') else: key = str(key) i = 0 for salt in salts: h = salt.copy() h.update(key) for uint in unpack(fmt, h.digest()): yield uint % num_bits i += 1 if i >= num_slices: return return _make_hashfuncs