Esempio n. 1
0
 def test_params_for_capacity(self):
     """
     Tests that the parameters that are generated for a given
     capacity and probability are correct given known sane values.
     """
     # From http://hur.st/bloomfilter?n=1e6&p=1e-4
     bytes, k = cBloom.params_for_capacity(1e6, 1e-4)
     assert bytes - cBloom.extra_buffer() == round(19170117 / 8.0)
     assert k == 14  # Parameters uses the ceiling instead of rounding
Esempio n. 2
0
    def test_for_capacity(self):
        """
        Tests that the for_capacity method makes a sane bloom filter
        using parameters that are generated for a given
        capacity and probability are correct given known sane values.
        """
        # From http://hur.st/bloomfilter?n=1e6&p=1e-4
        bf = cBloom.for_capacity(1e6, 1e-4)

        # Check the bitmap size
        assert (len(bf.bitmap) / 8) - cBloom.extra_buffer() == round(19170117 / 8.0)

        # Check the k num
        assert bf.k_num == 14  # Parameters uses the ceiling instead of rounding
Esempio n. 3
0
 def test_check_int(self):
     """
     Tests checking for an int in a set. This should fail.
     """
     bf = cBloom.for_capacity(1000, 1e-4)
     with pytest.raises(TypeError):
         1234 in bf
Esempio n. 4
0
 def test_check_none(self):
     """
     Tests checking None in a set. This should fail.
     """
     bf = cBloom.for_capacity(1000, 1e-4)
     with pytest.raises(TypeError):
         None in bf
Esempio n. 5
0
 def test_add_int(self):
     """
     Tests adding an int to a set. This should fail.
     """
     bf = cBloom.for_capacity(1000, 1e-4)
     with pytest.raises(TypeError):
         bf.add(1234)
Esempio n. 6
0
 def test_add_none(self):
     """
     Tests adding None to a set. This should fail.
     """
     bf = cBloom.for_capacity(1000, 1e-4)
     with pytest.raises(TypeError):
         bf.add(None)
Esempio n. 7
0
 def test_expected_prob(self):
     """
     Tests that the expected probability of false positives
     is correct given known-sane values.
     """
     # From http://hur.st/bloomfilter?n=1e6&p=1e-4
     assert round(cBloom.expected_probability(19170117, 1e6), 4) == 1e-4
Esempio n. 8
0
 def test_required_bytes(self):
     """
     Tests that the number of required bytes that the bloom filter
     says it needs is the correct given some known-sane values.
     """
     # From http://hur.st/bloomfilter?n=1e6&p=1e-4
     assert round(cBloom.required_bits(1e6, 1e-4) / 8.0) == round(19170117 / 8.0)
Esempio n. 9
0
 def test_small_bitmap(self):
     """
     Tests initializing with a bitmap that is too small
     (e.g. less than or equal to the extra_buffer() size)
     """
     with pytest.raises(ValueError):
         cBloom(Bitmap(cBloom.extra_buffer()), 3)
Esempio n. 10
0
 def test_length(self):
     """
     Tests that length works
     """
     bf = cBloom.for_capacity(1000, 1e-4)
     assert len(bf) == 0
     [bf.add("test%d" % x) for x in xrange(1000)]
     assert len(bf) == 1000
Esempio n. 11
0
    def test_prob(self):
        """
        Tests that the bloom filter is only wrong within
        a certain threshold.
        """
        # Only wrong once per hundred
        bf = cBloom.for_capacity(1000, 0.01)
        res = [bf.add("test%d" % x, True) for x in xrange(1000)]
        num_wrong = len([x for x in res if x is False])

        # Should get about 10 wrong
        assert num_wrong >= 5
        assert num_wrong <= 15
Esempio n. 12
0
    def test_add_without_check(self):
        """
        Tests that adding to a bloom filter while checking
        for existing entries works
        """
        bf = cBloom.for_capacity(1000, 1e-4)

        # Assert all adds work
        assert all([bf.add("test%d" % x, False) for x in xrange(1000)])
        assert all([bf.__contains__("test%d" % x) for x in xrange(1000)])
        assert len(bf) == 1000

        # Assert all adds work
        assert all([bf.add("test%d" % x, False) for x in xrange(1000)])
        assert len(bf) == 2000
Esempio n. 13
0
    def test_swap(self):
        """
        Swaps the mmap files from one implementation to another,
        check that things work. Start with cBloom, then pyBloom.
        """
        bytes, k = cBloom.params_for_capacity(2e4, 1e-3)
        bitmap = Bitmap(bytes, "testswap1.mmap")
        bf1 = cBloom(bitmap, k)
        [bf1.add("foo%d" % x) for x in xrange(20000)]
        bf1.close()

        # Make a new bitmap
        bitmap = Bitmap(bytes, "testswap1.mmap")
        bf2 = pyBloom(bitmap, 50)
        assert len(bf2) == 20000  # Should reload size and k
        assert bf2.k_num == k

        # Check all the entries
        assert all([bf2.__contains__("foo%d" % x) for x in xrange(20000)])
        bf2.close()
Esempio n. 14
0
    def test_equality_2(self):
        """
        Tests that the two implementation generate matching mmaps
        """
        bytes, k = cBloom.params_for_capacity(2e4, 1e-3)
        bitmap = Bitmap(bytes, "testcompatc2.mmap")
        bf1 = cBloom(bitmap, k)
        [bf1.add("foo%d" % x) for x in xrange(20000)]

        # Make a new bitmap
        bitmap = Bitmap(bytes, "testcompatpy2.mmap")
        bf2 = pyBloom(bitmap, k)
        [bf2.add("foo%d" % x) for x in xrange(20000)]

        # Check the lengths
        assert len(bf1) == len(bf2)
        bf1.close()
        bf2.close()

        # Compare the mmap files
        self.compare_files("testcompatc2.mmap", "testcompatpy2.mmap")
Esempio n. 15
0
 def test_ideal_k(self):
     """
     Tests that the ideal K is correct given known-sane values.
     """
     # From http://hur.st/bloomfilter?n=1e6&p=1e-4
     assert round(cBloom.ideal_k(19170117, 1e6)) == 13
Esempio n. 16
0
 def test_expected_capacity(self):
     """
     Tests that the expected capacity is correct given known-sane values.
     """
     # From http://hur.st/bloomfilter?n=1e6&p=1e-4
     assert round(cBloom.expected_capacity(19170117, 1e-4)) == 1e6