예제 #1
0
 def test_cbf_jaccard_invalid(self):
     ''' use an invalid type in a jaccard index cbf '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, est_elements=10,
                             false_positive_rate=0.05)
     blm.add('this is a test')
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
예제 #2
0
 def test_cbf_jaccard_invalid(self):
     """ use an invalid type in a jaccard index cbf """
     filename = "tmp.blm"
     blm = BloomFilterOnDisk(filename,
                             est_elements=10,
                             false_positive_rate=0.05)
     blm.add("this is a test")
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
예제 #3
0
 def test_cbf_intersection_invalid(self):
     ''' use an invalid type in a intersection cbf '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename,
                             est_elements=10,
                             false_positive_rate=0.05)
     blm.add('this is a test')
     self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
     os.remove(filename)
예제 #4
0
 def test_bfod_ea(self):
     ''' test on disk elements added is correct '''
     filename = 'tmp.blm'
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     self.assertEqual(blmd.elements_added, 0)
     blmd.add('this is a test')
     self.assertEqual(blmd.elements_added, 1)
     blmd.close()
     os.remove(filename)
예제 #5
0
 def test_bfod_frombytes(self):
     """test loading an on disk BloomFilter from bytes (raises exception)"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
         blm.add("this is a test")
         bytes_out = bytes(blm)
     self.assertRaises(NotSupportedError,
                       lambda: BloomFilterOnDisk.frombytes(bytes_out))
예제 #6
0
 def test_bfod_jaccard_invalid(self):
     """use an invalid type in a jaccard index cbf"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name,
                                 est_elements=10,
                                 false_positive_rate=0.05)
         blm.add("this is a test")
         self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
예제 #7
0
 def test_bfod_bytes(self):
     """test exporting an on disk Bloom Filter to bytes"""
     md5_val = "8d27e30e1c5875b0edcf7413c7bdb221"
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
         blm.add("this is a test")
         b = bytes(blm)
         md5_out = hashlib.md5(b).hexdigest()
         self.assertEqual(md5_out, md5_val)
예제 #8
0
 def test_bfod_all_bits_set(self):
     """test inserting too many elements so that the all bits are set"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name,
                                 est_elements=10,
                                 false_positive_rate=0.05)
         for i in range(100):
             blm.add(str(i))
     # NOTE: this causes an exception when all bits are set
     self.assertEqual(-1, blm.estimate_elements())
예제 #9
0
 def test_bfod_check(self):
     ''' ensure the use of check works on disk bloom '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.add('this is a test')
     blm.add('this is another test')
     self.assertEqual(blm.check('this is a test'), True)
     self.assertEqual(blm.check('this is another test'), True)
     self.assertEqual(blm.check('this is yet another test'), False)
     self.assertEqual(blm.check('this is not another test'), False)
     blm.close()
     os.remove(filename)
예제 #10
0
 def test_bfod_export_hex_msg(self):
     """ test that page error is thrown correctly """
     filename = "tmp.blm"
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     try:
         blm.export_hex()
     except NotSupportedError as ex:
         msg = "`export_hex` is currently not supported by the on disk " "Bloom Filter"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
예제 #11
0
 def test_cbf_intersec_invalid_msg(self):
     ''' check invalid type in a intersection message cbf '''
     msg = ('The parameter second must be of type BloomFilter or '
            'a BloomFilterOnDisk')
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, est_elements=10,
                             false_positive_rate=0.05)
     blm.add('this is a test')
     try:
         blm.intersection(1)
     except TypeError as ex:
         self.assertEqual(str(ex), msg)
     os.remove(filename)
예제 #12
0
    def test_bfod_union_diff(self):
        ''' make sure checking for different bloom filters on disk works union
        '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, est_elements=10,
                                false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.union(blm2)
        self.assertEqual(blm3, None)
        os.remove(filename)
예제 #13
0
 def test_bfod_export_hex_msg(self):
     ''' test that page error is thrown correctly '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     try:
         blm.export_hex()
     except NotSupportedError as ex:
         msg = ('`export_hex` is currently not supported by the on disk '
                'Bloom Filter')
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
예제 #14
0
    def test_bfod_load_on_disk(self):
        ''' test loading a previously saved blm on disk '''
        filename = 'tmp.blm'

        blm = BloomFilter(10, 0.05)
        blm.add('this is a test')
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual('this is a test' in blmd, True)
        self.assertEqual('this is not a test' in blmd, False)
        blmd.close()
        os.remove(filename)
예제 #15
0
    def test_bfod_load_on_disk(self):
        ''' test loading a previously saved blm on disk '''
        filename = 'tmp.blm'

        blm = BloomFilter(10, 0.05)
        blm.add('this is a test')
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual('this is a test' in blmd, True)
        self.assertEqual('this is not a test' in blmd, False)
        blmd.close()
        os.remove(filename)
예제 #16
0
 def test_bfod_init(self):
     ''' test the initalization of the on disk version '''
     filename = 'tmp.blm'
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     self.assertEqual(blmd.false_positive_rate, 0.05000000074505806)
     self.assertEqual(blmd.estimated_elements, 10)
     self.assertEqual(blmd.number_hashes, 4)
     self.assertEqual(blmd.number_bits, 63)
     self.assertEqual(blmd.elements_added, 0)
     self.assertEqual(blmd.is_on_disk, True)
     self.assertEqual(blmd.bloom_length, 63 // 8 + 1)
     blmd.close()
     os.remove(filename)
예제 #17
0
    def test_bfod_load_on_disk(self):
        """test loading a previously saved blm on disk"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilter(10, 0.05)
            blm.add("this is a test")
            blm.export(fobj.name)

            blmd = BloomFilterOnDisk(fobj.name)
            self.assertEqual("this is a test" in blmd, True)
            self.assertEqual("this is not a test" in blmd, False)
            blmd.close()
예제 #18
0
    def test_bfod_load_on_disk(self):
        """ test loading a previously saved blm on disk """
        filename = "tmp.blm"

        blm = BloomFilter(10, 0.05)
        blm.add("this is a test")
        blm.export(filename)

        blmd = BloomFilterOnDisk(filename)
        self.assertEqual("this is a test" in blmd, True)
        self.assertEqual("this is not a test" in blmd, False)
        blmd.close()
        os.remove(filename)
예제 #19
0
 def test_bfod_init(self):
     """ test the initalization of the on disk version """
     filename = "tmp.blm"
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     self.assertEqual(blmd.false_positive_rate, 0.05000000074505806)
     self.assertEqual(blmd.estimated_elements, 10)
     self.assertEqual(blmd.number_hashes, 4)
     self.assertEqual(blmd.number_bits, 63)
     self.assertEqual(blmd.elements_added, 0)
     self.assertEqual(blmd.is_on_disk, True)
     self.assertEqual(blmd.bloom_length, 63 // 8 + 1)
     blmd.close()
     os.remove(filename)
예제 #20
0
 def test_bfod_close_del(self):
     ''' close an on disk bloom using the del syntax '''
     filename = 'tmp.blm'
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.add('this is a test')
     del blm
     try:
         self.assertEqual(True, blm)
     except UnboundLocalError as ex:
         msg = "local variable 'blm' referenced before assignment"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
예제 #21
0
 def test_bfod_init(self):
     """test the initalization of the on disk version"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blmd = BloomFilterOnDisk(fobj.name, 10, 0.05)
         self.assertEqual(blmd.false_positive_rate, 0.05000000074505806)
         self.assertEqual(blmd.estimated_elements, 10)
         self.assertEqual(blmd.number_hashes, 4)
         self.assertEqual(blmd.number_bits, 63)
         self.assertEqual(blmd.elements_added, 0)
         self.assertEqual(blmd.is_on_disk, True)
         self.assertEqual(blmd.bloom_length, 63 // 8 + 1)
         blmd.close()
예제 #22
0
 def test_bfod_close_del(self):
     """ close an on disk bloom using the del syntax """
     filename = "tmp.blm"
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.add("this is a test")
     del blm
     try:
         self.assertEqual(True, blm)
     except UnboundLocalError as ex:
         msg = "local variable 'blm' referenced before assignment"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
     os.remove(filename)
예제 #23
0
    def test_bfod_union_diff(self):
        """make sure checking for different bloom filters on disk works union"""
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename,
                                est_elements=10,
                                false_positive_rate=0.05)
        blm.add("this is a test")
        blm2 = BloomFilter(est_elements=10,
                           false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.union(blm2)
        self.assertEqual(blm3, None)
        os.remove(filename)
예제 #24
0
 def test_bfod_export_hex(self):
     """test that page error is thrown correctly"""
     hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd"
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name,
                                 est_elements=10,
                                 false_positive_rate=0.05)
         for i in range(0, 10):
             tmp = "this is a test {0}".format(i)
             blm.add(tmp)
         hex_out = blm.export_hex()
         self.assertEqual(hex_out, hex_val)
예제 #25
0
 def test_bfod_close_del(self):
     """close an on disk bloom using the del syntax"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blm = BloomFilterOnDisk(fobj.name, 10, 0.05)
         blm.add("this is a test")
         del blm
         try:
             self.assertEqual(True, blm)
         except UnboundLocalError as ex:
             msg = "local variable 'blm' referenced before assignment"
             self.assertEqual(str(ex), msg)
         else:
             self.assertEqual(True, False)
예제 #26
0
    def test_bfod_jaccard_diff(self):
        """make sure checking for different bloom filters on disk works jaccard"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name,
                                    est_elements=10,
                                    false_positive_rate=0.05)
            blm.add("this is a test")
            blm2 = BloomFilter(est_elements=10,
                               false_positive_rate=0.05,
                               hash_function=different_hash)

            blm3 = blm.jaccard_index(blm2)
            self.assertEqual(blm3, None)
예제 #27
0
 def test_bfod_load_hex(self):
     ''' test that page error is thrown correctly '''
     filename = 'tmp.blm'
     hex_val = '85f240623b6d9459000000000000000a000000000000000a3d4ccccd'
     self.assertRaises(
         NotSupportedError,
         lambda: BloomFilterOnDisk(filepath=filename, hex_string=hex_val))
예제 #28
0
    def test_bfod_jaccard_diff(self):
        ''' make sure checking for different bloom filters on disk works
            jaccard
        '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename,
                                est_elements=10,
                                false_positive_rate=0.05)
        blm.add('this is a test')
        blm2 = BloomFilter(est_elements=10,
                           false_positive_rate=0.05,
                           hash_function=different_hash)

        blm3 = blm.jaccard_index(blm2)
        self.assertEqual(blm3, None)
        os.remove(filename)
예제 #29
0
 def test_bfod_load_hex(self):
     """ test that page error is thrown correctly """
     filename = "tmp.blm"
     hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd"
     self.assertRaises(
         NotSupportedError,
         lambda: BloomFilterOnDisk(filepath=filename, hex_string=hex_val),
     )
예제 #30
0
    def test_bfod_jaccard(self):
        """ test the on disk jaccard index of two bloom filters """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(10, 0.05)
        blm2.add("this is another test")
        blm2.add("this is yet another test")

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
        blm.close()
        os.remove(filename)
예제 #31
0
 def test_bfod_invalid_params_msg(self):
     """test importing a bloom filter on disk from an invalid filepath msg"""
     filename = "invalid.blm"
     msg = "Insufecient parameters to set up the On Disk Bloom Filter"
     try:
         BloomFilterOnDisk(filepath=filename)
     except InitializationError as ex:
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
예제 #32
0
 def test_bfod_ee(self):
     """ test on disk estimate elements is correct on disk """
     filename = "tmp.blm"
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     res1 = blmd.estimate_elements()
     blmd.add("this is a test")
     res2 = blmd.estimate_elements()
     self.assertNotEqual(res1, res2)
     self.assertEqual(res1, 0)
     self.assertEqual(res2, 1)
     blmd.close()
     os.remove(filename)
예제 #33
0
    def test_bfod_jaccard(self):
        """test the on disk jaccard index of two bloom filters"""
        with NamedTemporaryFile(dir=os.getcwd(),
                                suffix=".blm",
                                delete=DELETE_TEMP_FILES) as fobj:
            blm = BloomFilterOnDisk(fobj.name, 20, 0.05)
            blm.add("this is a test")
            blm.add("this is another test")
            blm2 = BloomFilter(20, 0.05)
            blm2.add("this is another test")
            blm2.add("this is yet another test")

            res = blm.jaccard_index(blm2)
            self.assertGreater(res, 0.33)
            self.assertLess(res, 0.50)
            blm.close()
예제 #34
0
 def test_bfod_ee(self):
     """test on disk estimate elements is correct on disk"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         blmd = BloomFilterOnDisk(fobj.name, 20, 0.05)
         res1 = blmd.estimate_elements()
         blmd.add("this is a test")
         res2 = blmd.estimate_elements()
         self.assertNotEqual(res1, res2)
         self.assertEqual(res1, 0)
         self.assertEqual(res2, 1)
         blmd.close()
예제 #35
0
 def test_bfod_load_hex(self):
     """test that page error is thrown correctly"""
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd"
         self.assertRaises(
             NotSupportedError,
             lambda: BloomFilterOnDisk(filepath=fobj.name,
                                       hex_string=hex_val),
         )
예제 #36
0
 def test_bfod_load_hex_msg(self):
     """ test that page error is thrown correctly """
     hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd"
     filename = "tmp.blm"
     try:
         BloomFilterOnDisk(filepath=filename, hex_string=hex_val)
     except NotSupportedError as ex:
         msg = "Loading from hex_string is currently not supported by " "the on disk Bloom Filter"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
예제 #37
0
    def test_bfod_union(self):
        """ test the union of two bloom filters on disk """
        filename = "tmp.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")
        blm.add("this is another test")
        blm2 = BloomFilter(10, 0.05)
        blm2.add("this is yet another test")

        blm3 = blm.union(blm2)
        self.assertEqual(blm3.estimate_elements(), 3)
        self.assertEqual(blm3.elements_added, 3)
        self.assertEqual(blm3.check("this is a test"), True)
        self.assertEqual(blm3.check("this is another test"), True)
        self.assertEqual(blm3.check("this is yet another test"), True)
        self.assertEqual(blm3.check("this is not another test"), False)
        blm.close()
        os.remove(filename)
예제 #38
0
 def test_bfod_load_hex_msg(self):
     """test that page error is thrown correctly"""
     hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd"
     with NamedTemporaryFile(dir=os.getcwd(),
                             suffix=".blm",
                             delete=DELETE_TEMP_FILES) as fobj:
         try:
             BloomFilterOnDisk(filepath=fobj.name, hex_string=hex_val)
         except NotSupportedError as ex:
             msg = "Loading from hex_string is currently not supported by the on disk Bloom Filter"
             self.assertEqual(str(ex), msg)
         else:
             self.assertEqual(True, False)
예제 #39
0
    def test_bfod_jaccard(self):
        ''' test the on disk jaccard index of two bloom filters '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is another test')
        blm2.add('this is yet another test')

        res = blm.jaccard_index(blm2)
        self.assertGreater(res, 0.33)
        self.assertLess(res, 0.50)
        blm.close()
        os.remove(filename)
예제 #40
0
    def test_bfod_export(self):
        """ export to on disk to new file """
        filename = "tmp.blm"
        filename2 = "tmp2.blm"
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add("this is a test")

        blm.export(filename2)
        blm.close()

        md5_1 = calc_file_md5(filename)
        md5_2 = calc_file_md5(filename2)
        self.assertEqual(md5_1, md5_2)
        os.remove(filename)
        os.remove(filename2)
예제 #41
0
 def test_bfod_ee(self):
     ''' test on disk estimate elements is correct on disk '''
     filename = 'tmp.blm'
     blmd = BloomFilterOnDisk(filename, 10, 0.05)
     res1 = blmd.estimate_elements()
     blmd.add('this is a test')
     res2 = blmd.estimate_elements()
     self.assertNotEqual(res1, res2)
     self.assertEqual(res1, 0)
     self.assertEqual(res2, 1)
     blmd.close()
     os.remove(filename)
예제 #42
0
    def test_bfod_union(self):
        ''' test the union of two bloom filters on disk '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')
        blm.add('this is another test')
        blm2 = BloomFilter(10, 0.05)
        blm2.add('this is yet another test')

        blm3 = blm.union(blm2)
        self.assertEqual(blm3.estimate_elements(), 3)
        self.assertEqual(blm3.elements_added, 3)
        self.assertEqual(blm3.check('this is a test'), True)
        self.assertEqual(blm3.check('this is another test'), True)
        self.assertEqual(blm3.check('this is yet another test'), True)
        self.assertEqual(blm3.check('this is not another test'), False)
        blm.close()
        os.remove(filename)
예제 #43
0
    def test_bfod_export(self):
        ''' export to on disk to new file '''
        filename = 'tmp.blm'
        filename2 = 'tmp2.blm'
        blm = BloomFilterOnDisk(filename, 10, 0.05)
        blm.add('this is a test')

        blm.export(filename2)
        blm.close()

        md5_1 = calc_file_md5(filename)
        md5_2 = calc_file_md5(filename2)
        self.assertEqual(md5_1, md5_2)
        os.remove(filename)
        os.remove(filename2)
예제 #44
0
    def test_bfod_clear(self):
        ''' test clearing out the bloom filter on disk '''
        filename = 'tmp.blm'
        blm = BloomFilterOnDisk(filepath=filename, est_elements=10,
                                false_positive_rate=0.05)
        self.assertEqual(blm.elements_added, 0)
        for i in range(0, 10):
            tmp = 'this is a test {0}'.format(i)
            blm.add(tmp)
        self.assertEqual(blm.elements_added, 10)

        blm.clear()
        self.assertEqual(blm.elements_added, 0)
        for idx in range(blm.bloom_length):
            self.assertEqual(blm._get_element(idx), 0)

        os.remove(filename)
예제 #45
0
 def runner():
     ''' runner '''
     blm = BloomFilterOnDisk(filename, 10, 0.05)
     blm.export_hex()