Esempio n. 1
0
 def test_params_for_capacity(self):
     """
     Tests that the parameters that are generated for a given
     capacity and probability are correct given known sane values.
     """
     # From http://hur.st/bloomfilter?n=1e6&p=1e-4
     bytes, k = pyBloom.params_for_capacity(1e6, 1e-4)
     assert bytes - pyBloom.extra_buffer() == round(19170117 / 8.0)
     assert k == 14  # Parameters uses the ceiling instead of rounding
Esempio n. 2
0
 def get_bloom(cls, bloom_data):
     bloom_args = (100000, .01)
     if bloom_data:
         size, ideal_k = BloomFilter.params_for_capacity(*bloom_args)
         bitmap = Bitmap(size)
         bitmap.mmap = bloom_data
         return BloomFilter(bitmap, ideal_k)
     else:
         bf = BloomFilter.for_capacity(*bloom_args)
         logging.debug("get_bloom(): data size %dkB" %
                       round(len(bf.bitmap.mmap) / 1024))
         return bf
Esempio n. 3
0
    def test_swap_2(self):
        """
        Swaps the mmap files from one implementation to another,
        check that things work. Start with pyBloom, then cBloom.
        """
        bytes, k = pyBloom.params_for_capacity(2e4, 1e-3)
        bitmap = Bitmap(bytes, "testswap2.mmap")
        bf1 = pyBloom(bitmap, k)
        [bf1.add("foo%d" % x) for x in xrange(20000)]
        bf1.close()

        # Make a new bitmap
        bitmap = Bitmap(bytes, "testswap2.mmap")
        bf2 = cBloom(bitmap, 50)
        assert len(bf2) == 20000  # Should reload size and k
        assert bf2.k_num == k

        # Check all the entries
        assert all([bf2.__contains__("foo%d" % x) for x in xrange(20000)])
        bf2.close()