Ejemplo n.º 1
0
 def test_cbf_jaccard_ident(self):
     ''' test jaccard of two identical counting bloom filters '''
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add('this is a test', 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm2.add('this is a test', 10)
     self.assertEqual(blm1.jaccard_index(blm2), 1.0)
Ejemplo n.º 2
0
 def test_cbf_jaccard_ident_2(self):
     ''' test jaccard of two mostly identical counting bloom filters '''
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add('this is a test', 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm2.add('this is a test', 15)
     self.assertEqual(blm1.jaccard_index(blm2), 1.0)
Ejemplo n.º 3
0
 def test_cbf_jaccard_different(self):
     ''' test jaccard of two completly different counting bloom filters '''
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm1.add('this is a test', 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm2.add('this is also a test', 10)
     self.assertEqual(blm1.jaccard_index(blm2), 0.0)
Ejemplo n.º 4
0
 def test_cbf_all_bits_set(self):
     """test inserting too many elements so that the all bits are set"""
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     for i in range(100):
         blm.add(str(i))
     # NOTE: this causes an exception when all bits are set
     self.assertEqual(-1, blm.estimate_elements())
Ejemplo n.º 5
0
 def test_cbf_jaccard_different(self):
     """ test jaccard of two completly different counting bloom filters """
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm1.add("this is a test", 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm2.add("this is also a test", 10)
     self.assertEqual(blm1.jaccard_index(blm2), 0.0)
Ejemplo n.º 6
0
 def test_cbf_jaccard_ident_2(self):
     """ test jaccard of two mostly identical counting bloom filters """
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add("this is a test", 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm2.add("this is a test", 15)
     self.assertEqual(blm1.jaccard_index(blm2), 1.0)
Ejemplo n.º 7
0
 def test_cbf_in_check(self):
     """ check that the in construct works """
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     blm.add("this is another test")
     self.assertEqual("this is a test" in blm, True)
     self.assertEqual("this is another test" in blm, True)
     self.assertEqual("this is yet another test" in blm, False)
     self.assertEqual("this is not another test" in blm, False)
Ejemplo n.º 8
0
 def test_cbf_in_check(self):
     ''' check that the in construct works '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     blm.add('this is another test')
     self.assertEqual('this is a test' in blm, True)
     self.assertEqual('this is another test' in blm, True)
     self.assertEqual('this is yet another test' in blm, False)
     self.assertEqual('this is not another test' in blm, False)
Ejemplo n.º 9
0
 def test_cbf_check(self):
     """ ensure that checking the bloom filter works """
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     blm.add("this is another test")
     self.assertEqual(blm.check("this is a test"), True)
     self.assertEqual(blm.check("this is another test"), True)
     self.assertEqual(blm.check("this is yet another test"), False)
     self.assertEqual(blm.check("this is not another test"), False)
Ejemplo n.º 10
0
 def test_cbf_check(self):
     ''' ensure that checking the bloom filter works '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     blm.add('this is another test')
     self.assertEqual(blm.check('this is a test'), True)
     self.assertEqual(blm.check('this is another test'), True)
     self.assertEqual(blm.check('this is yet another test'), False)
     self.assertEqual(blm.check('this is not another test'), False)
Ejemplo n.º 11
0
    def test_cbf_export_c_header(self):
        """test exporting a c header"""
        hex_val = ("01000000000000000100000002000000000000000100000001000000"
                   "00000000000000000000000001000000000000000000000002000000"
                   "00000000010000000200000000000000000000000000000001000000"
                   "00000000000000000200000000000000010000000200000000000000"
                   "00000000000000000100000000000000000000000100000000000000"
                   "01000000020000000000000000000000000000000100000001000000"
                   "00000000010000000000000001000000020000000000000000000000"
                   "01000000000000000100000001000000010000000000000001000000"
                   "03000000000000000100000001000000000000000000000001000000"
                   "000000000000000a000000000000000a3d4ccccd")
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)

        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export_c_header(fobj.name)

            # now load the file, parse it and do some tests!
            with open(fobj.name, "r") as fobj:
                data = fobj.readlines()

        data = [x.strip() for x in data]

        self.assertEqual("/* BloomFilter Export of a CountingBloomFilter */",
                         data[0])
        self.assertEqual("#include <inttypes.h>", data[1])
        self.assertEqual(
            "const uint64_t estimated_elements = {};".format(
                blm.estimated_elements), data[2])
        self.assertEqual(
            "const uint64_t elements_added = {};".format(blm.elements_added),
            data[3])
        self.assertEqual(
            "const float false_positive_rate = {};".format(
                blm.false_positive_rate), data[4])
        self.assertEqual(
            "const uint64_t number_bits = {};".format(blm.number_bits),
            data[5])
        self.assertEqual(
            "const unsigned int number_hashes = {};".format(blm.number_hashes),
            data[6])
        self.assertEqual("const unsigned char bloom[] = {", data[7])
        self.assertEqual("};", data[-1])

        # rebuild the hex version!
        new_hex = "".join([
            x.strip().replace("0x", "")
            for x in " ".join(data[8:-1]).split(",")
        ])
        self.assertEqual(hex_val, new_hex)
Ejemplo n.º 12
0
    def test_cbf_load_file(self):
        """ test loading bloom filter from file """
        filename = "test.cbm"
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm.export(filename)

        blm2 = CountingBloomFilter(filepath=filename)
        self.assertEqual("this is a test" in blm2, True)
        self.assertEqual("this is not a test" in blm2, False)
        os.remove(filename)
Ejemplo n.º 13
0
 def test_cbf_jaccard_invalid_msg(self):
     ''' check invalid type in a jaccard index message '''
     msg = ('The parameter second must be of type CountingBloomFilter')
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is another test')
     try:
         blm.jaccard_index(15)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Ejemplo n.º 14
0
    def test_cbf_load_file(self):
        ''' test loading bloom filter from file '''
        filename = 'test.cbm'
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        blm2 = CountingBloomFilter(filepath=filename)
        self.assertEqual('this is a test' in blm2, True)
        self.assertEqual('this is not a test' in blm2, False)
        os.remove(filename)
Ejemplo n.º 15
0
 def test_cbf_jaccard_invalid_msg(self):
     """ check invalid type in a jaccard index message """
     msg = "The parameter second must be of type CountingBloomFilter"
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is another test")
     try:
         blm.jaccard_index(15)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Ejemplo n.º 16
0
    def test_cbf_clear(self):
        ''' test clearing out the bloom filter '''
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            blm.add('this is a test {0}'.format(i))
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)
Ejemplo n.º 17
0
    def test_cbf_clear(self):
        """ test clearing out the bloom filter """
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            blm.add("this is a test {0}".format(i))
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)
Ejemplo n.º 18
0
 def test_cbf_very_large_add(self):
     """test adding a very large number of elements"""
     large = 2**32
     very_large = 2**64
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     res = blm.add("this is a test 0", large)
     self.assertEqual(blm.elements_added, large)
     self.assertEqual(res, large - 1)
     res = blm.add("this is a test 0", very_large)
     self.assertEqual(blm.elements_added, very_large - 1)
     self.assertEqual(res, large - 1)
Ejemplo n.º 19
0
 def test_cbf_remove_mult(self):
     """test to see if the remove multiples functionality works correctly"""
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     blm.add("this is a test 0", 15)
     self.assertEqual(blm.elements_added, 15)
     res = blm.remove("this is a test 0", 11)
     self.assertEqual(blm.elements_added, 4)
     self.assertEqual(res, 4)
     res = blm.remove("this is a test 0", 10)
     self.assertEqual(blm.elements_added, 0)
     self.assertEqual(res, 0)
Ejemplo n.º 20
0
 def test_cbf_remove_mult(self):
     ''' test to see if the remove multiples functionality works correctly
     '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     blm.add('this is a test 0', 15)
     self.assertEqual(blm.elements_added, 15)
     res = blm.remove('this is a test 0', 11)
     self.assertEqual(blm.elements_added, 4)
     self.assertEqual(res, 4)
     res = blm.remove('this is a test 0', 10)
     self.assertEqual(blm.elements_added, 0)
     self.assertEqual(res, 0)
Ejemplo n.º 21
0
    def test_cbf_load_file(self):
        """test loading bloom filter from file"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".cbm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = CountingBloomFilter(est_elements=10,
                                      false_positive_rate=0.05)
            blm.add("this is a test")
            blm.export(fobj.name)

            blm2 = CountingBloomFilter(filepath=fobj.name)
            self.assertEqual("this is a test" in blm2, True)
            self.assertEqual("this is not a test" in blm2, False)
Ejemplo n.º 22
0
 def test_cbf_remove(self):
     """ test to see if the remove functionality works correctly """
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     for i in range(0, 5):
         tmp = "this is a test {0}".format(i)
         blm.add(tmp)
     self.assertEqual(blm.elements_added, 5)
     res = blm.remove("this is a test 0")
     self.assertEqual(blm.elements_added, 4)
     self.assertEqual(res, 0)
     blm.remove("this is a test 0")
     self.assertEqual(blm.elements_added, 4)
     self.assertEqual(res, 0)
Ejemplo n.º 23
0
 def test_cbf_remove(self):
     ''' test to see if the remove functionality works correctly '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     for i in range(0, 5):
         tmp = 'this is a test {0}'.format(i)
         blm.add(tmp)
     self.assertEqual(blm.elements_added, 5)
     res = blm.remove('this is a test 0')
     self.assertEqual(blm.elements_added, 4)
     self.assertEqual(res, 0)
     blm.remove('this is a test 0')
     self.assertEqual(blm.elements_added, 4)
     self.assertEqual(res, 0)
Ejemplo n.º 24
0
 def test_cbf_very_large_add(self):
     ''' test adding a very large number of elements '''
     large = 2 ** 32
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     res = blm.add('this is a test 0', large)
     self.assertEqual(blm.elements_added, large)
     self.assertEqual(res, large - 1)
Ejemplo n.º 25
0
 def test_cbf_jaccard_similar(self):
     ''' test jaccard of two similar counting bloom filters '''
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add('this is a test', 10)
     blm1.add('this is a different test', 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm2.add('this is a test', 10)
     blm2.add('this is also a test', 10)
     res = blm1.jaccard_index(blm2)
     self.assertGreater(res, 0.33)
     self.assertLess(res, 0.50)
Ejemplo n.º 26
0
 def test_cbf_jaccard_similar(self):
     """ test jaccard of two similar counting bloom filters """
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add("this is a test", 10)
     blm1.add("this is a different test", 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm2.add("this is a test", 10)
     blm2.add("this is also a test", 10)
     res = blm1.jaccard_index(blm2)
     self.assertGreater(res, 0.33)
     self.assertLess(res, 0.50)
Ejemplo n.º 27
0
 def test_cbf_stats(self):
     ''' test that the information in the stats is correct '''
     msg = ('CountingBloom:\n'
            '\tbits: 63\n'
            '\testimated elements: 10\n'
            '\tnumber hashes: 4\n'
            '\tmax false positive rate: 0.050000\n'
            '\telements added: 10\n'
            '\tcurrent false positive rate: 0.048806\n'
            '\tis on disk: no\n'
            '\tindex fullness: 0.634921\n'
            '\tmax index usage: 3\n'
            '\tmax index id: 2\n'
            '\tcalculated elements: 10\n')
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     for i in range(0, 10):
         blm.add('this is a test {0}'.format(i))
     stats = str(blm)
     self.assertEqual(stats, msg)
Ejemplo n.º 28
0
 def test_cbf_stats(self):
     """ test that the information in the stats is correct """
     msg = ("CountingBloom:\n"
            "\tbits: 63\n"
            "\testimated elements: 10\n"
            "\tnumber hashes: 4\n"
            "\tmax false positive rate: 0.050000\n"
            "\telements added: 10\n"
            "\tcurrent false positive rate: 0.048806\n"
            "\tis on disk: no\n"
            "\tindex fullness: 0.634921\n"
            "\tmax index usage: 3\n"
            "\tmax index id: 2\n"
            "\tcalculated elements: 10\n")
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     for i in range(0, 10):
         blm.add("this is a test {0}".format(i))
     stats = str(blm)
     self.assertEqual(stats, msg)
Ejemplo n.º 29
0
    def test_cbf_export_hex(self):
        """ test the exporting of the bloom filter to a hex string """
        h_val = ("01000000000000000300000000000000000000000000000000000000010"
                 "00000000000000200000000000000000000000200000001000000010000"
                 "00020000000000000000000000000000000000000000000000000000000"
                 "10000000000000000000000010000000000000000000000000000000100"
                 "00000100000000000000020000000100000000000000010000000100000"
                 "00100000000000000000000000200000000000000010000000100000000"
                 "00000002000000010000000000000000000000000000000200000000000"
                 "00001000000000000000000000001000000010000000000000000000000"
                 "01000000020000000000000002000000000000000000000a00000000000"
                 "0000a3d4ccccd")

        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)
        hex_out = blm.export_hex()

        self.assertEqual(hex_out, h_val)
Ejemplo n.º 30
0
    def test_cbf_export_hex(self):
        ''' test the exporting of the bloom filter to a hex string '''
        h_val = ('01000000000000000300000000000000000000000000000000000000010'
                 '00000000000000200000000000000000000000200000001000000010000'
                 '00020000000000000000000000000000000000000000000000000000000'
                 '10000000000000000000000010000000000000000000000000000000100'
                 '00000100000000000000020000000100000000000000010000000100000'
                 '00100000000000000000000000200000000000000010000000100000000'
                 '00000002000000010000000000000000000000000000000200000000000'
                 '00001000000000000000000000001000000010000000000000000000000'
                 '01000000020000000000000002000000000000000000000a00000000000'
                 '0000a3d4ccccd')

        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)
        hex_out = blm.export_hex()

        self.assertEqual(hex_out, h_val)
Ejemplo n.º 31
0
 def test_cbf_jaccard_similar_2(self):
     ''' test jaccard of two similar counting bloom filters - again '''
     blm1 = CountingBloomFilter(est_elements=100, false_positive_rate=0.01)
     blm1.add('this is a test', 10)
     blm1.add('this is a different test', 10)
     blm2 = CountingBloomFilter(est_elements=100, false_positive_rate=0.01)
     blm2.add('this is a test', 10)
     res = blm1.jaccard_index(blm2)
     self.assertEqual(res, 0.50)
Ejemplo n.º 32
0
    def test_cbf_union(self):
        ''' test calculating the union between two counting bloom filters '''
        blm1 = CountingBloomFilter(est_elements=100, false_positive_rate=0.05)
        blm1.add('this is a test', 10)
        blm1.add('this is a different test', 10)
        blm2 = CountingBloomFilter(est_elements=100, false_positive_rate=0.05)
        blm2.add('this is a test', 10)
        res = blm1.union(blm2)

        self.assertEqual(res.check('this is a test'), 20)
        self.assertEqual(res.check('this is a different test'), 10)
        self.assertEqual(res.check('this is not a test'), 0)
        self.assertEqual(res.elements_added, 2)
Ejemplo n.º 33
0
    def test_cbf_union(self):
        """ test calculating the union between two counting bloom filters """
        blm1 = CountingBloomFilter(est_elements=100, false_positive_rate=0.05)
        blm1.add("this is a test", 10)
        blm1.add("this is a different test", 10)
        blm2 = CountingBloomFilter(est_elements=100, false_positive_rate=0.05)
        blm2.add("this is a test", 10)
        res = blm1.union(blm2)

        self.assertEqual(res.check("this is a test"), 20)
        self.assertEqual(res.check("this is a different test"), 10)
        self.assertEqual(res.check("this is not a test"), 0)
        self.assertEqual(res.elements_added, 2)
Ejemplo n.º 34
0
    def test_cbf_inter(self):
        ''' test calculating the intersection between two
            counting bloom filters '''
        blm1 = CountingBloomFilter(est_elements=100, false_positive_rate=0.05)
        blm1.add('this is a test', 10)
        blm1.add('this is a different test', 10)
        blm2 = CountingBloomFilter(est_elements=100, false_positive_rate=0.05)
        blm2.add('this is a test', 10)
        res = blm1.intersection(blm2)

        self.assertEqual(res.check('this is a test'), 20)
        self.assertEqual(res.check('this is a different test'), 0)
        self.assertEqual(res.check('this is not a test'), 0)
        self.assertEqual(res.elements_added, 1)
Ejemplo n.º 35
0
 def test_cbf_jaccard_empty(self):
     ''' test jaccard of an empty counting bloom filters '''
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add('this is a test', 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     self.assertEqual(blm1.jaccard_index(blm2), 0.0)
Ejemplo n.º 36
0
 def test_cbf_estimate_2(self):
     ''' check estimate elements - different '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test', 10)
     blm.add('this is a different test', 5)
     self.assertEqual(blm.estimate_elements(), 1)
Ejemplo n.º 37
0
    def test_cbf_export_file(self):
        ''' test exporting bloom filter to file '''
        filename = 'test.cbm'
        md5_val = '941b499746dd72d36658399b209d4869'
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
        blm.add('test')
        blm.add('out')
        blm.add('the')
        blm.add('counting')
        blm.add('bloom')
        blm.add('filter')

        blm.add('test')
        blm.add('Test')
        blm.add('out')
        blm.add('test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Ejemplo n.º 38
0
 def test_cbf_estimate_easy(self):
     ''' check estimate elements '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test', 10)
     blm.add('this is also a test', 5)
     self.assertEqual(blm.estimate_elements(), 2)
Ejemplo n.º 39
0
 def test_cbf_jaccard_invalid(self):
     ''' use an invalid type in a jaccard index '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
Ejemplo n.º 40
0
 def test_cbf_estimate_2(self):
     """ check estimate elements - different """
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test", 10)
     blm.add("this is a different test", 5)
     self.assertEqual(blm.estimate_elements(), 1)
Ejemplo n.º 41
0
 def test_cbf_ea(self):
     ''' test elements added is correct '''
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     blm.add('this is a test')
     self.assertEqual(blm.elements_added, 1)
Ejemplo n.º 42
0
 def test_cbf_jaccard_invalid(self):
     """ use an invalid type in a jaccard index """
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
Ejemplo n.º 43
0
 def test_cbf_jaccard_empty(self):
     """ test jaccard of an empty counting bloom filters """
     blm1 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     blm1.add("this is a test", 10)
     blm2 = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
     self.assertEqual(blm1.jaccard_index(blm2), 0.0)
Ejemplo n.º 44
0
 def test_cbf_estimate_easy(self):
     """check estimate elements"""
     blm = CountingBloomFilter(est_elements=20, false_positive_rate=0.05)
     blm.add("this is a test", 10)
     blm.add("this is also a test", 5)
     self.assertEqual(blm.estimate_elements(), 2)
Ejemplo n.º 45
0
    def test_cbf_export_file(self):
        """ test exporting bloom filter to file """
        filename = "test.cbm"
        md5_val = "941b499746dd72d36658399b209d4869"
        blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.01)
        blm.add("test")
        blm.add("out")
        blm.add("the")
        blm.add("counting")
        blm.add("bloom")
        blm.add("filter")

        blm.add("test")
        blm.add("Test")
        blm.add("out")
        blm.add("test")
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Ejemplo n.º 46
0
 def test_cbf_ea(self):
     """ test elements added is correct """
     blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     blm.add("this is a test")
     self.assertEqual(blm.elements_added, 1)