コード例 #1
0
ファイル: tests.py プロジェクト: grafke/python-bloomfilter
class Serialization(unittest.TestCase):
    SIZE = 12345
    EXPECTED = set([random.randint(0, 10000100) for _ in range_fn(SIZE)])

    def test_serialization(self):
        for klass, args in [(BloomFilter, (self.SIZE,)),
                            (ScalableBloomFilter, ())]:
            filter = klass(*args)
            for item in self.EXPECTED:
                filter.add(item)

            f = tempfile.TemporaryFile()
            filter.tofile(f)
            stringio = StringIO()
            filter.tofile(stringio)
            streams_to_test = [f, stringio]
            if not running_python_3:
                cstringio = cStringIO.StringIO()
                filter.tofile(cstringio)
                streams_to_test.append(cstringio)

            del filter

            for stream in streams_to_test:
                stream.seek(0)
                filter = klass.fromfile(stream)
                for item in self.EXPECTED:
                    self.assertTrue(item in filter)
                del(filter)
                stream.close()
コード例 #2
0
 def test_union(self):
     bloom_one = BloomFilter(100, 0.001)
     bloom_two = BloomFilter(100, 0.001)
     chars = [chr(i) for i in range_fn(97, 123)]
     for char in chars[int(len(chars)/2):]:
         bloom_one.add(char)
     for char in chars[:int(len(chars)/2)]:
         bloom_two.add(char)
     new_bloom = bloom_one.union(bloom_two)
     for char in chars:
         self.assertTrue(char in new_bloom)
コード例 #3
0
ファイル: tests.py プロジェクト: grafke/python-bloomfilter
 def test_union(self):
     bloom_one = BloomFilter(100, 0.001)
     bloom_two = BloomFilter(100, 0.001)
     chars = [chr(i) for i in range_fn(97, 123)]
     for char in chars[int(len(chars)/2):]:
         bloom_one.add(char)
     for char in chars[:int(len(chars)/2)]:
         bloom_two.add(char)
     new_bloom = bloom_one.union(bloom_two)
     for char in chars:
         self.assertTrue(char in new_bloom)
コード例 #4
0
ファイル: tests.py プロジェクト: adewes/python-bloomfilter
    def test_nstar_union(self):
        bloom_one = BloomFilter(200, 0.001)
        bloom_two = BloomFilter(200, 0.001)
        chars = [chr(i) for i in range_fn(0, 200)]
        for char in chars[:int(len(chars)/2)]:
            bloom_one.add(char)
        for char in chars[int(len(chars)/2):]:
            bloom_two.add(char)
        new_bloom = bloom_one.union(bloom_two)

        self.assertTrue(bloom_one.nstar() > len(chars)/2-10 and bloom_one.nstar() < len(chars)/2+10)
        self.assertTrue(bloom_two.nstar() > len(chars)/2-10 and bloom_two.nstar() < len(chars)/2+10)
        self.assertTrue(new_bloom.nstar() > len(chars)-10 and new_bloom.nstar() < len(chars)+10)
コード例 #5
0
ファイル: pybloom.py プロジェクト: grafke/python-bloomfilter
def make_hashfuncs(num_slices, num_bits):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2
    total_hash_bits = 8 * num_slices * chunk_size
    if total_hash_bits > 384:
        hashfn = hashlib.sha512
    elif total_hash_bits > 256:
        hashfn = hashlib.sha384
    elif total_hash_bits > 160:
        hashfn = hashlib.sha256
    elif total_hash_bits > 128:
        hashfn = hashlib.sha1
    else:
        hashfn = hashlib.md5
    fmt = fmt_code * (hashfn().digest_size // chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    if extra:
        num_salts += 1
    salts = tuple(
        hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))

    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, unicode):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        for salt in salts:
            h = salt.copy()
            h.update(key)
            for uint in unpack(fmt, h.digest()):
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs
コード例 #6
0
ファイル: tests.py プロジェクト: adewes/python-bloomfilter
    def test_nstar_intersection_2(self):
        bloom_one = BloomFilter(200, 0.001)
        bloom_two = BloomFilter(200, 0.001)
        chars = [chr(i) for i in range_fn(0, 200)]
        for char in chars[int(len(chars)/2):]:
            bloom_one.add(char)
        for char in chars[:int(len(chars)/2)]:
            bloom_two.add(char)
        new_bloom = bloom_one.intersection(bloom_two)

        self.assertTrue(bloom_one.nstar() > len(chars)/2-10 and bloom_one.nstar() < len(chars)/2+10)
        self.assertTrue(bloom_two.nstar() > len(chars)/2-10 and bloom_two.nstar() < len(chars)/2+10)

        #The nstar operator will fail on the intersection of the filters..
        self.assertTrue(new_bloom.nstar() > 10)

        self.assertTrue(bloom_one.nstar_intersection(bloom_two) < 10)
コード例 #7
0
def make_hashfuncs(num_slices, num_bits):
    if num_bits >= (1 << 31):
        fmt_code, chunk_size = 'Q', 8
    elif num_bits >= (1 << 15):
        fmt_code, chunk_size = 'I', 4
    else:
        fmt_code, chunk_size = 'H', 2
    total_hash_bits = 8 * num_slices * chunk_size
    if total_hash_bits > 384:
        hashfn = hashlib.sha512
    elif total_hash_bits > 256:
        hashfn = hashlib.sha384
    elif total_hash_bits > 160:
        hashfn = hashlib.sha256
    elif total_hash_bits > 128:
        hashfn = hashlib.sha1
    else:
        hashfn = hashlib.md5
    fmt = fmt_code * (hashfn().digest_size // chunk_size)
    num_salts, extra = divmod(num_slices, len(fmt))
    if extra:
        num_salts += 1
    salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in range_fn(num_salts))
    def _make_hashfuncs(key):
        if running_python_3:
            if isinstance(key, str):
                key = key.encode('utf-8')
            else:
                key = str(key).encode('utf-8')
        else:
            if isinstance(key, unicode):
                key = key.encode('utf-8')
            else:
                key = str(key)
        i = 0
        for salt in salts:
            h = salt.copy()
            h.update(key)
            for uint in unpack(fmt, h.digest()):
                yield uint % num_bits
                i += 1
                if i >= num_slices:
                    return

    return _make_hashfuncs
コード例 #8
0
ファイル: filter_tests.py プロジェクト: caidao/zhihu_spider
            filter = klass(*args)
            for item in self.EXPECTED:
                filter.add(item)

            f = tempfile.TemporaryFile()
            filter.tofile(f)
            stringio = StringIO()
            filter.tofile(stringio)
            streams_to_test = [f, stringio]
            if not running_python_3:
                cstringio = cStringIO.StringIO()
                filter.tofile(cstringio)
                streams_to_test.append(cstringio)

            del filter

            for stream in streams_to_test:
                stream.seek(0)
                filter = klass.fromfile(stream)
                for item in self.EXPECTED:
                    self.assertTrue(item in filter)
                del(filter)
                stream.close()

if __name__ == '__main__':
    # unittest.main()
    f = BloomFilter(capacity=10000, error_rate=0.001)
    for i in range_fn(0, f.capacity):
        f.add(i)
    print (0 in f)
コード例 #9
0
ファイル: tests.py プロジェクト: adewes/python-bloomfilter
 def test_nstar(self):
     bloom = BloomFilter(1000, 0.001)
     chars = [chr(i) for i in range_fn(0,200)]
     for char in chars:
         bloom.add(char)
     self.assertTrue(bloom.nstar() > len(chars)-10 and bloom.nstar() < len(chars)+10)