Exemple #1
0
 def test_cbf_jaccard_invalid(self):
     ''' use an invalid type in a jaccard index cbf '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, est_elements=10,
                             false_positive_rate=0.05)
     blm.add('this is a test')
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
Exemple #2
0
 def test_bfod_ea(self):
     ''' test on disk elements added is correct '''
     filename = 'tmp.blm'
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     self.assertEqual(blmd.elements_added, 0)
     blmd.add('this is a test')
     self.assertEqual(blmd.elements_added, 1)
     blmd.close()
     os.remove(filename)
Exemple #3
0
 def test_bfod_ee(self):
     ''' test on disk estimate elements is correct on disk '''
     filename = 'tmp.blm'
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     res1 = blmd.estimate_elements()
     blmd.add('this is a test')
     res2 = blmd.estimate_elements()
     self.assertNotEqual(res1, res2)
     self.assertEqual(res1, 0)
     self.assertEqual(res2, 1)
     blmd.close()
     os.remove(filename)
Exemple #4
0
 def test_bfod_check(self):
     ''' ensure the use of check works on disk bloom '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.add('this is a test')
     blm.add('this is another test')
     self.assertEqual(blm.check('this is a test'), True)
     self.assertEqual(blm.check('this is another test'), True)
     self.assertEqual(blm.check('this is yet another test'), False)
     self.assertEqual(blm.check('this is not another test'), False)
     blm.close()
     os.remove(filename)
Exemple #5
0
 def test_bfod_check(self):
     """ensure the use of check works on disk bloom"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
         blm.add("this is a test")
         blm.add("this is another test")
         self.assertEqual(blm.check("this is a test"), True)
         self.assertEqual(blm.check("this is another test"), True)
         self.assertEqual(blm.check("this is yet another test"), False)
         self.assertEqual(blm.check("this is not another test"), False)
         blm.close()
Exemple #6
0
 def test_bfod_ee(self):
     """test on disk estimate elements is correct on disk"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blmd = BloomFilterOnDisk(fobj.name, 20, 0.05)
         res1 = blmd.estimate_elements()
         blmd.add("this is a test")
         res2 = blmd.estimate_elements()
         self.assertNotEqual(res1, res2)
         self.assertEqual(res1, 0)
         self.assertEqual(res2, 1)
         blmd.close()
Exemple #7
0
 def test_cbf_intersec_invalid_msg(self):
     ''' check invalid type in a intersection message cbf '''
     msg = ('The parameter second must be of type BloomFilter or '
            'a BloomFilterOnDisk')
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, est_elements=10,
                             false_positive_rate=0.05)
     blm.add('this is a test')
     try:
         blm.intersection(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     os.remove(filename)
Exemple #8
0
    def test_bfod_union_diff(self):
        ''' make sure checking for different bloom filters on disk works union
        '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, est_elements=10,
                                false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.union(blm2)
        self.assertEqual(blm3, None)
        os.remove(filename)
Exemple #9
0
 def test_cbf_intersec_invalid_msg(self):
     """check invalid type in a intersection message cbf"""
     msg = "The parameter second must be of type BloomFilter or a BloomFilterOnDisk"
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name,
                                 est_elements=10,
                                 false_positive_rate=0.05)
         blm.add("this is a test")
         try:
             blm.intersection(1)
         except TypeError as ex:
             self.assertEqual(str(ex), msg)
Exemple #10
0
 def test_cbf_intersec_invalid_msg(self):
     ''' check invalid type in a intersection message cbf '''
     msg = ('The parameter second must be of type BloomFilter or '
            'a BloomFilterOnDisk')
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename,
                             est_elements=10,
                             false_positive_rate=0.05)
     blm.add('this is a test')
     try:
         blm.intersection(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     os.remove(filename)
Exemple #11
0
    def test_bfod_export_c_header(self):
        """test exporting a c header"""
        hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd"
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name,
                                    est_elements=10,
                                    false_positive_rate=0.05)
            for i in range(0, 10):
                tmp = "this is a test {0}".format(i)
                blm.add(tmp)
            with NamedTemporaryFile(dir=os.getcwd(),
                                    suffix=".blm",
                                    delete=DELETE_TEMP_FILES) as fobj:
                blm.export_c_header(fobj.name)

                # now load the file, parse it and do some tests!
                with open(fobj.name, "r") as fobj:
                    data = fobj.readlines()

        data = [x.strip() for x in data]

        self.assertEqual("/* BloomFilter Export of a standard BloomFilter */",
                         data[0])
        self.assertEqual("#include <inttypes.h>", data[1])
        self.assertEqual(
            "const uint64_t estimated_elements = {};".format(
                blm.estimated_elements), data[2])
        self.assertEqual(
            "const uint64_t elements_added = {};".format(blm.elements_added),
            data[3])
        self.assertEqual(
            "const float false_positive_rate = {};".format(
                blm.false_positive_rate), data[4])
        self.assertEqual(
            "const uint64_t number_bits = {};".format(blm.number_bits),
            data[5])
        self.assertEqual(
            "const unsigned int number_hashes = {};".format(blm.number_hashes),
            data[6])
        self.assertEqual("const unsigned char bloom[] = {", data[7])
        self.assertEqual("};", data[-1])

        # rebuild the hex version!
        new_hex = "".join([
            x.strip().replace("0x", "")
            for x in " ".join(data[8:-1]).split(",")
        ])
        self.assertEqual(hex_val, new_hex)
Exemple #12
0
 def test_bfod_close_del(self):
     """ close an on disk bloom using the del syntax """
     filename = "tmp.blm"
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.add("this is a test")
     del blm
     try:
         self.assertEqual(True, blm)
     except UnboundLocalError as ex:
         msg = "local variable 'blm' referenced before assignment"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
Exemple #13
0
    def test_bfod_union_diff(self):
        """make sure checking for different bloom filters on disk works union"""
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename,
                                est_elements=10,
                                false_positive_rate=0.05)
        blm.add("this is a test")
        blm2 = BloomFilter(est_elements=10,
                           false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.union(blm2)
        self.assertEqual(blm3, None)
        os.remove(filename)
Exemple #14
0
 def test_bfod_export_hex(self):
     """test that page error is thrown correctly"""
     hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd"
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name,
                                 est_elements=10,
                                 false_positive_rate=0.05)
         for i in range(0, 10):
             tmp = "this is a test {0}".format(i)
             blm.add(tmp)
         hex_out = blm.export_hex()
         self.assertEqual(hex_out, hex_val)
Exemple #15
0
 def test_bfod_close_del(self):
     ''' close an on disk bloom using the del syntax '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.add('this is a test')
     del blm
     try:
         self.assertEqual(True, blm)
     except UnboundLocalError as ex:
         msg = "local variable 'blm' referenced before assignment"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
Exemple #16
0
    def test_bfod_export(self):
        ''' export to on disk to new file '''
        filename = 'tmp.blm'
        filename2 = 'tmp2.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')

        blm.export(filename2)
        blm.close()

        md5_1 = calc_file_md5(filename)
        md5_2 = calc_file_md5(filename2)
        self.assertEqual(md5_1, md5_2)
        os.remove(filename)
        os.remove(filename2)
Exemple #17
0
    def test_bfod_jaccard(self):
        ''' test the on disk jaccard index of two bloom filters '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is another test')
        blm2.add('this is yet another test')

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
        blm.close()
        os.remove(filename)
Exemple #18
0
 def test_cbf_union_invalid_msg(self):
     """ check invalid type in a union message cbf """
     msg = "The parameter second must be of type BloomFilter or " "a BloomFilterOnDisk"
     filename = "tmp.blm"
     blm = BloomFilterOnDisk(filename,
                             est_elements=10,
                             false_positive_rate=0.05)
     blm.add("this is a test")
     try:
         blm.union(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
Exemple #19
0
 def test_bfod_frombytes_msg(self):
     """test loading an on disk BloomFilter from bytes (message)"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
         blm.add("this is a test")
         bytes_out = bytes(blm)
     try:
         BloomFilterOnDisk.frombytes(bytes_out)
     except NotSupportedError as ex:
         msg = "Loading from bytes is currently not supported by the on disk Bloom Filter"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Exemple #20
0
    def test_bfod_export(self):
        """ export to on disk to new file """
        filename = "tmp.blm"
        filename2 = "tmp2.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")

        blm.export(filename2)
        blm.close()

        md5_1 = calc_file_md5(filename)
        md5_2 = calc_file_md5(filename2)
        self.assertEqual(md5_1, md5_2)
        os.remove(filename)
        os.remove(filename2)
Exemple #21
0
    def test_bfod_jaccard(self):
        """ test the on disk jaccard index of two bloom filters """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(10, 0.05)
        blm2.add("this is another test")
        blm2.add("this is yet another test")

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
        blm.close()
        os.remove(filename)
Exemple #22
0
    def test_bfod_jaccard_diff(self):
        """make sure checking for different bloom filters on disk works jaccard"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name,
                                    est_elements=10,
                                    false_positive_rate=0.05)
            blm.add("this is a test")
            blm2 = BloomFilter(est_elements=10,
                               false_positive_rate=0.05,
                               hash_function=different_hash)

            blm3 = blm.jaccard_index(blm2)
            self.assertEqual(blm3, None)
Exemple #23
0
 def test_bfod_close_del(self):
     """close an on disk bloom using the del syntax"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
         blm.add("this is a test")
         del blm
         try:
             self.assertEqual(True, blm)
         except UnboundLocalError as ex:
             msg = "local variable 'blm' referenced before assignment"
             self.assertEqual(str(ex), msg)
         else:
             self.assertEqual(True, False)
Exemple #24
0
    def test_bfod_export(self):
        ''' export to on disk to new file '''
        filename = 'tmp.blm'
        filename2 = 'tmp2.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')

        blm.export(filename2)
        blm.close()

        md5_1 = calc_file_md5(filename)
        md5_2 = calc_file_md5(filename2)
        self.assertEqual(md5_1, md5_2)
        os.remove(filename)
        os.remove(filename2)
Exemple #25
0
    def test_bfod_jaccard(self):
        """test the on disk jaccard index of two bloom filters"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name, 20, 0.05)
            blm.add("this is a test")
            blm.add("this is another test")
            blm2 = BloomFilter(20, 0.05)
            blm2.add("this is another test")
            blm2.add("this is yet another test")

            res = blm.jaccard_index(blm2)
            self.assertGreater(res, 0.33)
            self.assertLess(res, 0.50)
            blm.close()
Exemple #26
0
    def test_bfod_jaccard_diff(self):
        ''' make sure checking for different bloom filters on disk works
            jaccard
        '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename,
                                est_elements=10,
                                false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=10,
                           false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.jaccard_index(blm2)
        self.assertEqual(blm3, None)
        os.remove(filename)
Exemple #27
0
    def test_bfod_clear(self):
        ''' test clearing out the bloom filter on disk '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filepath=filename, est_elements=10,
                                false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)

        os.remove(filename)
Exemple #28
0
    def test_bfod_export(self):
        """export to on disk to new file"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            with NamedTemporaryFile(dir=os.getcwd(),
                                    suffix=".blm",
                                    delete=DELETE_TEMP_FILES) as fobj1:
                blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
                blm.add("this is a test")

                blm.export(fobj1.name)
                blm.close()

                md5_1 = calc_file_md5(fobj.name)
                md5_2 = calc_file_md5(fobj1.name)
                self.assertEqual(md5_1, md5_2)
Exemple #29
0
    def test_bfod_clear(self):
        """ test clearing out the bloom filter on disk """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filepath=filename,
                                est_elements=10,
                                false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            tmp = "this is a test {0}".format(i)
            blm.add(tmp)
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)

        os.remove(filename)
Exemple #30
0
    def test_bfod_union(self):
        """ test the union of two bloom filters on disk """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(10, 0.05)
        blm2.add("this is yet another test")

        blm3 = blm.union(blm2)
        self.assertEqual(blm3.estimate_elements(), 3)
        self.assertEqual(blm3.elements_added, 3)
        self.assertEqual(blm3.check("this is a test"), True)
        self.assertEqual(blm3.check("this is another test"), True)
        self.assertEqual(blm3.check("this is yet another test"), True)
        self.assertEqual(blm3.check("this is not another test"), False)
        blm.close()
        os.remove(filename)
Exemple #31
0
    def test_bfod_clear(self):
        """test clearing out the bloom filter on disk"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(filepath=fobj.name,
                                    est_elements=10,
                                    false_positive_rate=0.05)
            self.assertEqual(blm.elements_added, 0)
            for i in range(0, 10):
                tmp = "this is a test {0}".format(i)
                blm.add(tmp)
            self.assertEqual(blm.elements_added, 10)

            blm.clear()
            self.assertEqual(blm.elements_added, 0)
            for idx in range(blm.bloom_length):
                self.assertEqual(blm._get_element(idx), 0)
Exemple #32
0
    def test_bfod_union(self):
        ''' test the union of two bloom filters on disk '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is yet another test')

        blm3 = blm.union(blm2)
        self.assertEqual(blm3.estimate_elements(), 3)
        self.assertEqual(blm3.elements_added, 3)
        self.assertEqual(blm3.check('this is a test'), True)
        self.assertEqual(blm3.check('this is another test'), True)
        self.assertEqual(blm3.check('this is yet another test'), True)
        self.assertEqual(blm3.check('this is not another test'), False)
        blm.close()
        os.remove(filename)
Exemple #33
0
    def test_bfod_intersection(self):
        ''' test the intersection of two bloom filters on disk '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is another test')
        blm2.add('this is yet another test')

        blm3 = blm.intersection(blm2)
        self.assertEqual(blm3.estimate_elements(), 1)
        self.assertEqual(blm3.elements_added, 1)
        self.assertEqual(blm3.check('this is a test'), False)
        self.assertEqual(blm3.check('this is another test'), True)
        self.assertEqual(blm3.check('this is yet another test'), False)
        self.assertEqual(blm3.check('this is not another test'), False)
        blm.close()
        os.remove(filename)
Exemple #34
0
    def test_bfod_union(self):
        """test the union of two bloom filters on disk"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name, 20, 0.05)
            blm.add("this is a test")
            blm.add("this is another test")
            blm2 = BloomFilter(20, 0.05)
            blm2.add("this is yet another test")

            blm3 = blm.union(blm2)
            self.assertEqual(blm3.estimate_elements(), 3)
            self.assertEqual(blm3.elements_added, 3)
            self.assertEqual(blm3.check("this is a test"), True)
            self.assertEqual(blm3.check("this is another test"), True)
            self.assertEqual(blm3.check("this is yet another test"), True)
            self.assertEqual(blm3.check("this is not another test"), False)
            blm.close()