コード例 #1
0
class Serialization(unittest.TestCase):
    SIZE = 12345
    EXPECTED = set([random.randint(0, 10000100) for _ in range_fn(0, SIZE)])

    def test_serialization(self):
        for klass, args in [(BloomFilter, (self.SIZE, )),
                            (ScalableBloomFilter, ())]:
            filter = klass(*args)
            for item in self.EXPECTED:
                filter.add(item)

            f = tempfile.TemporaryFile()
            filter.tofile(f)
            stringio = StringIO()
            filter.tofile(stringio)
            streams_to_test = [f, stringio]
            if not running_python_3:
                cstringio = cStringIO.StringIO()
                filter.tofile(cstringio)
                streams_to_test.append(cstringio)

            del filter

            for stream in streams_to_test:
                stream.seek(0)
                filter = klass.fromfile(stream)
                for item in self.EXPECTED:
                    self.assertTrue(item in filter)
                del (filter)
                stream.close()
コード例 #2
0
class TestSerialization:
    SIZE = 12345
    EXPECTED = set([random.randint(0, 10000100) for _ in range_fn(0, SIZE)])

    @pytest.mark.parametrize("cls,args", [
        (BloomFilter, (SIZE, )),
        (ScalableBloomFilter, ()),
    ])
    @pytest.mark.parametrize("stream_factory", [
        lambda: tempfile.TemporaryFile,
        lambda: io.BytesIO,
        pytest.param(lambda: cStringIO.StringIO,
                     marks=pytest.mark.skipif(running_python_3,
                                              reason="Python 2 only")),
        pytest.param(lambda: StringIO.StringIO,
                     marks=pytest.mark.skipif(running_python_3,
                                              reason="Python 2 only")),
    ])
    def test_serialization(self, cls, args, stream_factory):
        filter = cls(*args)
        for item in self.EXPECTED:
            filter.add(item)

        f = stream_factory()()
        filter.tofile(f)
        del filter

        f.seek(0)
        filter = cls.fromfile(f)
        for item in self.EXPECTED:
            assert item in filter
コード例 #3
0
 def test_union(self):
     bloom_one = BloomFilter(100, 0.001)
     bloom_two = BloomFilter(100, 0.001)
     chars = [chr(i) for i in range_fn(97, 123)]
     for char in chars[int(len(chars) / 2):]:
         bloom_one.add(char)
     for char in chars[:int(len(chars) / 2)]:
         bloom_two.add(char)
     new_bloom = bloom_one.union(bloom_two)
     for char in chars:
         self.assertTrue(char in new_bloom)
コード例 #4
0
 def test_union_scalable_bloom_filter(self):
     bloom_one = ScalableBloomFilter(
         mode=ScalableBloomFilter.SMALL_SET_GROWTH)
     bloom_two = ScalableBloomFilter(
         mode=ScalableBloomFilter.SMALL_SET_GROWTH)
     numbers = [i for i in range_fn(1, 10000)]
     middle = int(len(numbers) / 2)
     for number in numbers[middle:]:
         bloom_one.add(number)
     for number in numbers[:middle]:
         bloom_two.add(number)
     new_bloom = bloom_one.union(bloom_two)
     for number in numbers:
         self.assertTrue(number in new_bloom)
コード例 #5
0
def make_hashfuncs(num_slices, num_bits):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2
    total_hash_bits = 8 * num_slices * chunk_size
    if total_hash_bits > 384:
        hashfn = hashlib.sha512
    elif total_hash_bits > 256:
        hashfn = hashlib.sha384
    elif total_hash_bits > 160:
        hashfn = hashlib.sha256
    elif total_hash_bits > 128:
        hashfn = hashlib.sha1
    else:
        hashfn = hashlib.md5
    fmt = fmt_code * (hashfn().digest_size // chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    if extra:
        num_salts += 1
    salts = tuple(
        hashfn(hashfn(pack('I', i)).digest())
        for i in range_fn(stop=num_salts))

    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        for salt in salts:
            h = salt.copy()
            h.update(key)
            for uint in unpack(fmt, h.digest()):
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs