Beispiel #1
0
 def test_bf_all_bits_set(self):
     """test inserting too many elements so that the all bits are set"""
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     for i in range(100):
         blm.add(str(i))
     # NOTE: this causes an exception when all bits are set
     self.assertEqual(-1, blm.estimate_elements())
Beispiel #2
0
 def test_bf_bytes(self):
     """test exporting BloomFilter to bytes"""
     md5_val = "8d27e30e1c5875b0edcf7413c7bdb221"
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     b = bytes(blm)
     md5_out = hashlib.md5(b).hexdigest()
     self.assertEqual(md5_out, md5_val)
Beispiel #3
0
    def test_bf_jaccard_diff(self):
        """ make sure checking for different bloom filters works jaccard """
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm2 = BloomFilter(est_elements=100, false_positive_rate=0.05)

        blm3 = blm.jaccard_index(blm2)
        self.assertEqual(blm3, None)
Beispiel #4
0
    def test_bf_jaccard_diff(self):
        ''' make sure checking for different bloom filters works jaccard '''
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=100, false_positive_rate=0.05)

        blm3 = blm.jaccard_index(blm2)
        self.assertEqual(blm3, None)
Beispiel #5
0
 def test_bf_in_check(self):
     """ check that the in construct works """
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     blm.add("this is another test")
     self.assertEqual("this is a test" in blm, True)
     self.assertEqual("this is another test" in blm, True)
     self.assertEqual("this is yet another test" in blm, False)
     self.assertEqual("this is not another test" in blm, False)
Beispiel #6
0
    def test_bf_intersection_diff(self):
        ''' make sure checking for different bloom filters works
            intersection '''
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=100, false_positive_rate=0.05)

        blm3 = blm.intersection(blm2)
        self.assertEqual(blm3, None)
Beispiel #7
0
 def test_bf_in_check(self):
     ''' check that the in construct works '''
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     blm.add('this is another test')
     self.assertEqual('this is a test' in blm, True)
     self.assertEqual('this is another test' in blm, True)
     self.assertEqual('this is yet another test' in blm, False)
     self.assertEqual('this is not another test' in blm, False)
Beispiel #8
0
 def test_bf_check(self):
     ''' ensure that checking the bloom filter works '''
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     blm.add('this is another test')
     self.assertEqual(blm.check('this is a test'), True)
     self.assertEqual(blm.check('this is another test'), True)
     self.assertEqual(blm.check('this is yet another test'), False)
     self.assertEqual(blm.check('this is not another test'), False)
Beispiel #9
0
    def test_bf_union_diff(self):
        ''' make sure checking for different bloom filters works union '''
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.union(blm2)
        self.assertEqual(blm3, None)
Beispiel #10
0
 def test_bf_check(self):
     """ ensure that checking the bloom filter works """
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     blm.add("this is another test")
     self.assertEqual(blm.check("this is a test"), True)
     self.assertEqual(blm.check("this is another test"), True)
     self.assertEqual(blm.check("this is yet another test"), False)
     self.assertEqual(blm.check("this is not another test"), False)
Beispiel #11
0
    def test_bf_export_hex(self):
        """ test the exporting of the bloom filter to a hex string """
        hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd"
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)
        hex_out = blm.export_hex()

        self.assertEqual(hex_out, hex_val)
Beispiel #12
0
    def test_bf_export_hex(self):
        ''' test the exporting of the bloom filter to a hex string '''
        hex_val = '85f240623b6d9459000000000000000a000000000000000a3d4ccccd'
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)
        hex_out = blm.export_hex()

        self.assertEqual(hex_out, hex_val)
Beispiel #13
0
    def test_bf_union_diff(self):
        """ make sure checking for different bloom filters works union """
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm2 = BloomFilter(est_elements=10,
                           false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.union(blm2)
        self.assertEqual(blm3, None)
Beispiel #14
0
 def test_bf_add(self):
     ''' test estimate elements is correct '''
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     res1 = blm.estimate_elements()
     blm.add('this is a test')
     res2 = blm.estimate_elements()
     self.assertNotEqual(res1, res2)
     self.assertEqual(res1, 0)
     self.assertEqual(res2, 1)
     self.assertEqual(blm.elements_added, 1)
Beispiel #15
0
 def test_bf_add(self):
     """ test estimate elements is correct """
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     res1 = blm.estimate_elements()
     blm.add("this is a test")
     res2 = blm.estimate_elements()
     self.assertNotEqual(res1, res2)
     self.assertEqual(res1, 0)
     self.assertEqual(res2, 1)
     self.assertEqual(blm.elements_added, 1)
Beispiel #16
0
    def test_bf_export_file(self):
        """ test exporting bloom filter to file """
        filename = "test.blm"
        md5_val = "7f590086f9b962387e145899dd001256"
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Beispiel #17
0
 def test_bf_intersec_invalid_msg(self):
     """ check invalid type in a intersection message """
     msg = "The parameter second must be of type BloomFilter or " "a BloomFilterOnDisk"
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add("this is a test")
     try:
         blm.intersection(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Beispiel #18
0
    def test_bf_export_file(self):
        ''' test exporting bloom filter to file '''
        filename = 'test.blm'
        md5_val = '7f590086f9b962387e145899dd001256'
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Beispiel #19
0
    def test_bf_export_file(self):
        ''' test exporting bloom filter to file '''
        filename = 'test.blm'
        md5_val = '7f590086f9b962387e145899dd001256'
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)
        os.remove(filename)
Beispiel #20
0
 def test_bf_union_invalid_msg(self):
     ''' check invalid type in a union message '''
     msg = ('The parameter second must be of type BloomFilter or '
            'a BloomFilterOnDisk')
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     try:
         blm.union(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Beispiel #21
0
    def test_bf_load_file(self):
        ''' test loading bloom filter from file '''
        filename = 'test.blm'

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        blm2 = BloomFilter(filepath=filename)
        self.assertEqual('this is a test' in blm2, True)
        self.assertEqual('this is not a test' in blm2, False)
        os.remove(filename)
Beispiel #22
0
    def test_bf_load_file(self):
        ''' test loading bloom filter from file '''
        filename = 'test.blm'

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.export(filename)

        blm2 = BloomFilter(filepath=filename)
        self.assertEqual('this is a test' in blm2, True)
        self.assertEqual('this is not a test' in blm2, False)
        os.remove(filename)
Beispiel #23
0
 def test_bf_intersec_invalid_msg(self):
     ''' check invalid type in a intersection message '''
     msg = ('The parameter second must be of type BloomFilter or '
            'a BloomFilterOnDisk')
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     try:
         blm.intersection(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Beispiel #24
0
    def test_bf_load_file(self):
        """ test loading bloom filter from file """
        filename = "test.blm"

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm.export(filename)

        blm2 = BloomFilter(filepath=filename)
        self.assertEqual("this is a test" in blm2, True)
        self.assertEqual("this is not a test" in blm2, False)
        os.remove(filename)
Beispiel #25
0
    def test_bf_export_file(self):
        """test exporting bloom filter to file"""
        md5_val = "8d27e30e1c5875b0edcf7413c7bdb221"
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")

        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
        self.assertEqual(md5_out, md5_val)
Beispiel #26
0
    def test_bfod_load_on_disk(self):
        """ test loading a previously saved blm on disk """
        filename = "tmp.blm"

        blm = BloomFilter(10, 0.05)
        blm.add("this is a test")
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual("this is a test" in blmd, True)
        self.assertEqual("this is not a test" in blmd, False)
        blmd.close()
        os.remove(filename)
Beispiel #27
0
    def test_bf_clear(self):
        ''' test clearing out the bloom filter '''
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)
Beispiel #28
0
    def test_bf_clear(self):
        """ test clearing out the bloom filter """
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)
Beispiel #29
0
    def test_bfod_load_on_disk(self):
        """test loading a previously saved blm on disk"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilter(10, 0.05)
            blm.add("this is a test")
            blm.export(fobj.name)

            blmd = BloomFilterOnDisk(fobj.name)
            self.assertEqual("this is a test" in blmd, True)
            self.assertEqual("this is not a test" in blmd, False)
            blmd.close()
Beispiel #30
0
    def test_bf_load_file(self):
        """test loading bloom filter from file"""
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")

        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export(fobj.name)
            blm2 = BloomFilter(filepath=fobj.name)

        self.assertEqual("this is a test" in blm2, True)
        self.assertEqual("this is not a test" in blm2, False)
Beispiel #31
0
    def test_bfod_load_on_disk(self):
        ''' test loading a previously saved blm on disk '''
        filename = 'tmp.blm'

        blm = BloomFilter(10, 0.05)
        blm.add('this is a test')
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual('this is a test' in blmd, True)
        self.assertEqual('this is not a test' in blmd, False)
        blmd.close()
        os.remove(filename)
Beispiel #32
0
    def test_bfod_load_on_disk(self):
        ''' test loading a previously saved blm on disk '''
        filename = 'tmp.blm'

        blm = BloomFilter(10, 0.05)
        blm.add('this is a test')
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual('this is a test' in blmd, True)
        self.assertEqual('this is not a test' in blmd, False)
        blmd.close()
        os.remove(filename)
Beispiel #33
0
class Doorkeeper:
    def __init__(self, cap=100000, false_positive=0.01):
        self.bloom = BloomFilter(cap, false_positive)

    def __insert(self, key: str):
        already_present = self.bloom.check(key)
        self.bloom.add(key)
        return already_present

    def allow(self, key: str):
        return self.__insert(key)

    def reset(self):
        self.bloom.clear()
Beispiel #34
0
    def test_bfod_jaccard(self):
        """ test the on disk jaccard index of two bloom filters """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(10, 0.05)
        blm2.add("this is another test")
        blm2.add("this is yet another test")

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
        blm.close()
        os.remove(filename)
Beispiel #35
0
    def test_bfod_jaccard(self):
        ''' test the on disk jaccard index of two bloom filters '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is another test')
        blm2.add('this is yet another test')

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
        blm.close()
        os.remove(filename)
Beispiel #36
0
    def test_bf_frombytes(self):
        """test loading BloomFilter from bytes"""
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        bytes_out = bytes(blm)

        blm2 = BloomFilter.frombytes(bytes_out)

        self.assertEqual(blm2.false_positive_rate, 0.05000000074505806)
        self.assertEqual(blm2.estimated_elements, 10)
        self.assertEqual(blm2.number_hashes, 4)
        self.assertEqual(blm2.number_bits, 63)
        self.assertEqual(blm2.elements_added, 1)
        self.assertEqual(blm2.is_on_disk, False)
        self.assertEqual(blm2.bloom_length, 63 // 8 + 1)
Beispiel #37
0
    def test_another_hashing_algo(self):
        """ test defining a completely different hashing strategy """
        md5_val = "7f590086f9b962387e145899dd001256"  # for default hash used
        filename = "test.blm"
        results = [
            14409285476674975580,
            1383622036369840193,
            10825905054403519891,
            3456253732347153957,
            1494124715262089992,
        ]

        def my_hash(key, depth, encoding="utf-8"):
            """ my hashing strategy """
            max64mod = UINT64_T_MAX + 1
            results = list()
            for i in range(0, depth):
                tmp = key[i:] + key[:i]
                val = int(hashlib.sha512(tmp.encode(encoding)).hexdigest(), 16)
                results.append(val % max64mod)
            return results

        blm = BloomFilter(est_elements=10,
                          false_positive_rate=0.05,
                          hash_function=my_hash)

        self.assertEqual(blm.elements_added, 0)
        blm.add("this is a test")
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes("this is a test", 5), results)
        res = blm.hashes("this is a test", 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Beispiel #38
0
    def test_bfod_jaccard(self):
        """test the on disk jaccard index of two bloom filters"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name, 20, 0.05)
            blm.add("this is a test")
            blm.add("this is another test")
            blm2 = BloomFilter(20, 0.05)
            blm2.add("this is another test")
            blm2.add("this is yet another test")

            res = blm.jaccard_index(blm2)
            self.assertGreater(res, 0.33)
            self.assertLess(res, 0.50)
            blm.close()
Beispiel #39
0
    def test_another_hashing_algo(self):
        ''' test defining a completely different hashing strategy '''
        md5_val = '7f590086f9b962387e145899dd001256'  # for default hash used
        filename = 'test.blm'
        results = [14409285476674975580,
                   1383622036369840193,
                   10825905054403519891,
                   3456253732347153957,
                   1494124715262089992]

        def my_hash(key, depth, encoding='utf-8'):
            ''' my hashing strategy '''
            max64mod = UINT64_T_MAX + 1
            results = list()
            for i in range(0, depth):
                tmp = key[i:] + key[:i]
                val = int(hashlib.sha512(tmp.encode(encoding)).hexdigest(), 16)
                results.append(val % max64mod)
            return results

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05,
                          hash_function=my_hash)

        self.assertEqual(blm.elements_added, 0)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes('this is a test', 5), results)
        res = blm.hashes('this is a test', 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Beispiel #40
0
    def test_bf_use_different_hash(self):
        """test that the different hash works as intended"""
        md5_val = "7f590086f9b962387e145899dd001256"  # for default hash used
        results = [
            14409285476674975580,
            6203976290780191624,
            5074829385518853901,
            3953072760750514173,
            11782747630324011555,
        ]

        @hash_with_depth_int
        def my_hash(key, depth=1, encoding="utf-8"):
            """my hash function"""
            max64mod = UINT64_T_MAX + 1
            val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16)
            return val % max64mod

        blm = BloomFilter(est_elements=10,
                          false_positive_rate=0.05,
                          hash_function=my_hash)
        self.assertEqual(blm.elements_added, 0)
        blm.add("this is a test")
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export(fobj.name)

            md5_out = calc_file_md5(fobj.name)
        self.assertNotEqual(md5_out, md5_val)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes("this is a test", 5), results)
        res = blm.hashes("this is a test", 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Beispiel #41
0
    def test_bf_export_c_header(self):
        """test exporting a c header"""

        hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd"
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm.export_c_header(fobj.name)

            # now load the file, parse it and do some tests!
            with open(fobj.name, "r") as fobj:
                data = fobj.readlines()
        data = [x.strip() for x in data]

        self.assertEqual("/* BloomFilter Export of a standard BloomFilter */",
                         data[0])
        self.assertEqual("#include <inttypes.h>", data[1])
        self.assertEqual(
            "const uint64_t estimated_elements = {};".format(
                blm.estimated_elements), data[2])
        self.assertEqual(
            "const uint64_t elements_added = {};".format(blm.elements_added),
            data[3])
        self.assertEqual(
            "const float false_positive_rate = {};".format(
                blm.false_positive_rate), data[4])
        self.assertEqual(
            "const uint64_t number_bits = {};".format(blm.number_bits),
            data[5])
        self.assertEqual(
            "const unsigned int number_hashes = {};".format(blm.number_hashes),
            data[6])
        self.assertEqual("const unsigned char bloom[] = {", data[7])
        self.assertEqual("};", data[-1])

        # rebuild the hex version!
        new_hex = "".join([
            x.strip().replace("0x", "")
            for x in " ".join(data[8:-1]).split(",")
        ])
        self.assertEqual(hex_val, new_hex)
Beispiel #42
0
    def test_bfod_union(self):
        """ test the union of two bloom filters on disk """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(10, 0.05)
        blm2.add("this is yet another test")

        blm3 = blm.union(blm2)
        self.assertEqual(blm3.estimate_elements(), 3)
        self.assertEqual(blm3.elements_added, 3)
        self.assertEqual(blm3.check("this is a test"), True)
        self.assertEqual(blm3.check("this is another test"), True)
        self.assertEqual(blm3.check("this is yet another test"), True)
        self.assertEqual(blm3.check("this is not another test"), False)
        blm.close()
        os.remove(filename)
Beispiel #43
0
    def test_bfod_union(self):
        ''' test the union of two bloom filters on disk '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is yet another test')

        blm3 = blm.union(blm2)
        self.assertEqual(blm3.estimate_elements(), 3)
        self.assertEqual(blm3.elements_added, 3)
        self.assertEqual(blm3.check('this is a test'), True)
        self.assertEqual(blm3.check('this is another test'), True)
        self.assertEqual(blm3.check('this is yet another test'), True)
        self.assertEqual(blm3.check('this is not another test'), False)
        blm.close()
        os.remove(filename)
Beispiel #44
0
    def test_bf_jaccard(self):
        ''' test the jaccard index of two bloom filters '''
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm2.add('this is another test')
        blm2.add('this is yet another test')

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
Beispiel #45
0
    def test_bf_jaccard(self):
        """ test the jaccard index of two bloom filters """
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm2.add("this is another test")
        blm2.add("this is yet another test")

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
Beispiel #46
0
 def test_bf_stats(self):
     ''' test that the information in the stats is correct '''
     msg = ('BloomFilter:\n'
            '\tbits: 63\n'
            '\testimated elements: 10\n'
            '\tnumber hashes: 4\n'
            '\tmax false positive rate: 0.050000\n'
            '\tbloom length (8 bits): 8\n'
            '\telements added: 10\n'
            '\testimated elements added: 9\n'
            '\tcurrent false positive rate: 0.048806\n'
            '\texport size (bytes): 28\n'
            '\tnumber bits set: 29\n'
            '\tis on disk: no\n')
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     for i in range(0, 10):
         tmp = 'this is a test {0}'.format(i)
         blm.add(tmp)
     stats = str(blm)
     self.assertEqual(stats, msg)
Beispiel #47
0
    def test_bf_use_different_hash(self):
        ''' test that the different hash works as intended '''
        md5_val = '7f590086f9b962387e145899dd001256'  # for default hash used
        filename = 'test.blm'
        results = [14409285476674975580,
                   6203976290780191624,
                   5074829385518853901,
                   3953072760750514173,
                   11782747630324011555]

        @hash_with_depth_int
        def my_hash(key, encoding='utf-8'):
            ''' my hash function '''
            max64mod = UINT64_T_MAX + 1
            val = int(hashlib.sha512(key.encode(encoding)).hexdigest(), 16)
            return val % max64mod

        blm = BloomFilter(est_elements=10, false_positive_rate=0.05,
                          hash_function=my_hash)
        self.assertEqual(blm.elements_added, 0)
        blm.add('this is a test')
        blm.export(filename)

        md5_out = calc_file_md5(filename)
        self.assertNotEqual(md5_out, md5_val)
        os.remove(filename)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)

        self.assertEqual(blm.elements_added, 11)

        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            self.assertTrue(blm.check(tmp))

        self.assertEqual(blm.hashes('this is a test', 5), results)
        res = blm.hashes('this is a test', 1)
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0], results[0])
Beispiel #48
0
    def test_bf_intersection(self):
        ''' test the union of two bloom filters '''
        blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05)
        blm2.add('this is another test')
        blm2.add('this is yet another test')

        blm3 = blm.intersection(blm2)
        self.assertEqual(blm3.estimate_elements(), 1)
        self.assertEqual(blm3.elements_added, 1)
        self.assertEqual(blm3.check('this is a test'), False)
        self.assertEqual(blm3.check('this is another test'), True)
        self.assertEqual(blm3.check('this is yet another test'), False)
        self.assertEqual(blm3.check('this is not another test'), False)
Beispiel #49
0
 def test_bf_intersection_invalid(self):
     ''' use an invalid type in a intersection '''
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     blm.add('this is a test')
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
Beispiel #50
0
 def test_bf_ea(self):
     ''' test elements added is correct '''
     blm = BloomFilter(est_elements=10, false_positive_rate=0.05)
     self.assertEqual(blm.elements_added, 0)
     blm.add('this is a test')
     self.assertEqual(blm.elements_added, 1)