def test_cbf_jaccard_invalid(self): ''' use an invalid type in a jaccard index cbf ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
def test_cbf_jaccard_invalid(self): """ use an invalid type in a jaccard index cbf """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
def test_cbf_intersection_invalid(self): ''' use an invalid type in a intersection cbf ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') self.assertRaises(TypeError, lambda: blm.jaccard_index(1)) os.remove(filename)
def test_bfod_ea(self): ''' test on disk elements added is correct ''' filename = 'tmp.blm' blmd = BloomFilterOnDisk(filename, 10, 0.05) self.assertEqual(blmd.elements_added, 0) blmd.add('this is a test') self.assertEqual(blmd.elements_added, 1) blmd.close() os.remove(filename)
def test_bfod_frombytes(self): """test loading an on disk BloomFilter from bytes (raises exception)""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") bytes_out = bytes(blm) self.assertRaises(NotSupportedError, lambda: BloomFilterOnDisk.frombytes(bytes_out))
def test_bfod_jaccard_invalid(self): """use an invalid type in a jaccard index cbf""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
def test_bfod_bytes(self): """test exporting an on disk Bloom Filter to bytes""" md5_val = "8d27e30e1c5875b0edcf7413c7bdb221" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") b = bytes(blm) md5_out = hashlib.md5(b).hexdigest() self.assertEqual(md5_out, md5_val)
def test_bfod_all_bits_set(self): """test inserting too many elements so that the all bits are set""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) for i in range(100): blm.add(str(i)) # NOTE: this causes an exception when all bits are set self.assertEqual(-1, blm.estimate_elements())
def test_bfod_check(self): ''' ensure the use of check works on disk bloom ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') self.assertEqual(blm.check('this is a test'), True) self.assertEqual(blm.check('this is another test'), True) self.assertEqual(blm.check('this is yet another test'), False) self.assertEqual(blm.check('this is not another test'), False) blm.close() os.remove(filename)
def test_bfod_export_hex_msg(self): """ test that page error is thrown correctly """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) try: blm.export_hex() except NotSupportedError as ex: msg = "`export_hex` is currently not supported by the on disk " "Bloom Filter" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_cbf_intersec_invalid_msg(self): ''' check invalid type in a intersection message cbf ''' msg = ('The parameter second must be of type BloomFilter or ' 'a BloomFilterOnDisk') filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') try: blm.intersection(1) except TypeError as ex: self.assertEqual(str(ex), msg) os.remove(filename)
def test_bfod_union_diff(self): ''' make sure checking for different bloom filters on disk works union ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.union(blm2) self.assertEqual(blm3, None) os.remove(filename)
def test_bfod_export_hex_msg(self): ''' test that page error is thrown correctly ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) try: blm.export_hex() except NotSupportedError as ex: msg = ('`export_hex` is currently not supported by the on disk ' 'Bloom Filter') self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_bfod_load_on_disk(self): ''' test loading a previously saved blm on disk ''' filename = 'tmp.blm' blm = BloomFilter(10, 0.05) blm.add('this is a test') blm.export(filename) blmd = BloomFilterOnDisk(filename) self.assertEqual('this is a test' in blmd, True) self.assertEqual('this is not a test' in blmd, False) blmd.close() os.remove(filename)
def test_bfod_init(self): ''' test the initalization of the on disk version ''' filename = 'tmp.blm' blmd = BloomFilterOnDisk(filename, 10, 0.05) self.assertEqual(blmd.false_positive_rate, 0.05000000074505806) self.assertEqual(blmd.estimated_elements, 10) self.assertEqual(blmd.number_hashes, 4) self.assertEqual(blmd.number_bits, 63) self.assertEqual(blmd.elements_added, 0) self.assertEqual(blmd.is_on_disk, True) self.assertEqual(blmd.bloom_length, 63 // 8 + 1) blmd.close() os.remove(filename)
def test_bfod_load_on_disk(self): """test loading a previously saved blm on disk""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilter(10, 0.05) blm.add("this is a test") blm.export(fobj.name) blmd = BloomFilterOnDisk(fobj.name) self.assertEqual("this is a test" in blmd, True) self.assertEqual("this is not a test" in blmd, False) blmd.close()
def test_bfod_load_on_disk(self): """ test loading a previously saved blm on disk """ filename = "tmp.blm" blm = BloomFilter(10, 0.05) blm.add("this is a test") blm.export(filename) blmd = BloomFilterOnDisk(filename) self.assertEqual("this is a test" in blmd, True) self.assertEqual("this is not a test" in blmd, False) blmd.close() os.remove(filename)
def test_bfod_init(self): """ test the initalization of the on disk version """ filename = "tmp.blm" blmd = BloomFilterOnDisk(filename, 10, 0.05) self.assertEqual(blmd.false_positive_rate, 0.05000000074505806) self.assertEqual(blmd.estimated_elements, 10) self.assertEqual(blmd.number_hashes, 4) self.assertEqual(blmd.number_bits, 63) self.assertEqual(blmd.elements_added, 0) self.assertEqual(blmd.is_on_disk, True) self.assertEqual(blmd.bloom_length, 63 // 8 + 1) blmd.close() os.remove(filename)
def test_bfod_close_del(self): ''' close an on disk bloom using the del syntax ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') del blm try: self.assertEqual(True, blm) except UnboundLocalError as ex: msg = "local variable 'blm' referenced before assignment" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_bfod_init(self): """test the initalization of the on disk version""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blmd = BloomFilterOnDisk(fobj.name, 10, 0.05) self.assertEqual(blmd.false_positive_rate, 0.05000000074505806) self.assertEqual(blmd.estimated_elements, 10) self.assertEqual(blmd.number_hashes, 4) self.assertEqual(blmd.number_bits, 63) self.assertEqual(blmd.elements_added, 0) self.assertEqual(blmd.is_on_disk, True) self.assertEqual(blmd.bloom_length, 63 // 8 + 1) blmd.close()
def test_bfod_close_del(self): """ close an on disk bloom using the del syntax """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") del blm try: self.assertEqual(True, blm) except UnboundLocalError as ex: msg = "local variable 'blm' referenced before assignment" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_bfod_union_diff(self): """make sure checking for different bloom filters on disk works union""" filename = "tmp.blm" blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.union(blm2) self.assertEqual(blm3, None) os.remove(filename)
def test_bfod_export_hex(self): """test that page error is thrown correctly""" hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) hex_out = blm.export_hex() self.assertEqual(hex_out, hex_val)
def test_bfod_close_del(self): """close an on disk bloom using the del syntax""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") del blm try: self.assertEqual(True, blm) except UnboundLocalError as ex: msg = "local variable 'blm' referenced before assignment" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_bfod_jaccard_diff(self): """make sure checking for different bloom filters on disk works jaccard""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.jaccard_index(blm2) self.assertEqual(blm3, None)
def test_bfod_load_hex(self): ''' test that page error is thrown correctly ''' filename = 'tmp.blm' hex_val = '85f240623b6d9459000000000000000a000000000000000a3d4ccccd' self.assertRaises( NotSupportedError, lambda: BloomFilterOnDisk(filepath=filename, hex_string=hex_val))
def test_bfod_jaccard_diff(self): ''' make sure checking for different bloom filters on disk works jaccard ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.jaccard_index(blm2) self.assertEqual(blm3, None) os.remove(filename)
def test_bfod_load_hex(self): """ test that page error is thrown correctly """ filename = "tmp.blm" hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd" self.assertRaises( NotSupportedError, lambda: BloomFilterOnDisk(filepath=filename, hex_string=hex_val), )
def test_bfod_jaccard(self): """ test the on disk jaccard index of two bloom filters """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(10, 0.05) blm2.add("this is another test") blm2.add("this is yet another test") res = blm.jaccard_index(blm2) self.assertGreater(res, 0.33) self.assertLess(res, 0.50) blm.close() os.remove(filename)
def test_bfod_invalid_params_msg(self): """test importing a bloom filter on disk from an invalid filepath msg""" filename = "invalid.blm" msg = "Insufecient parameters to set up the On Disk Bloom Filter" try: BloomFilterOnDisk(filepath=filename) except InitializationError as ex: self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_bfod_ee(self): """ test on disk estimate elements is correct on disk """ filename = "tmp.blm" blmd = BloomFilterOnDisk(filename, 10, 0.05) res1 = blmd.estimate_elements() blmd.add("this is a test") res2 = blmd.estimate_elements() self.assertNotEqual(res1, res2) self.assertEqual(res1, 0) self.assertEqual(res2, 1) blmd.close() os.remove(filename)
def test_bfod_jaccard(self): """test the on disk jaccard index of two bloom filters""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 20, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(20, 0.05) blm2.add("this is another test") blm2.add("this is yet another test") res = blm.jaccard_index(blm2) self.assertGreater(res, 0.33) self.assertLess(res, 0.50) blm.close()
def test_bfod_ee(self): """test on disk estimate elements is correct on disk""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blmd = BloomFilterOnDisk(fobj.name, 20, 0.05) res1 = blmd.estimate_elements() blmd.add("this is a test") res2 = blmd.estimate_elements() self.assertNotEqual(res1, res2) self.assertEqual(res1, 0) self.assertEqual(res2, 1) blmd.close()
def test_bfod_load_hex(self): """test that page error is thrown correctly""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd" self.assertRaises( NotSupportedError, lambda: BloomFilterOnDisk(filepath=fobj.name, hex_string=hex_val), )
def test_bfod_load_hex_msg(self): """ test that page error is thrown correctly """ hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd" filename = "tmp.blm" try: BloomFilterOnDisk(filepath=filename, hex_string=hex_val) except NotSupportedError as ex: msg = "Loading from hex_string is currently not supported by " "the on disk Bloom Filter" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_bfod_union(self): """ test the union of two bloom filters on disk """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(10, 0.05) blm2.add("this is yet another test") blm3 = blm.union(blm2) self.assertEqual(blm3.estimate_elements(), 3) self.assertEqual(blm3.elements_added, 3) self.assertEqual(blm3.check("this is a test"), True) self.assertEqual(blm3.check("this is another test"), True) self.assertEqual(blm3.check("this is yet another test"), True) self.assertEqual(blm3.check("this is not another test"), False) blm.close() os.remove(filename)
def test_bfod_load_hex_msg(self): """test that page error is thrown correctly""" hex_val = "85f240623b6d9459000000000000000a000000000000000a3d4ccccd" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: try: BloomFilterOnDisk(filepath=fobj.name, hex_string=hex_val) except NotSupportedError as ex: msg = "Loading from hex_string is currently not supported by the on disk Bloom Filter" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_bfod_jaccard(self): ''' test the on disk jaccard index of two bloom filters ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') blm2 = BloomFilter(10, 0.05) blm2.add('this is another test') blm2.add('this is yet another test') res = blm.jaccard_index(blm2) self.assertGreater(res, 0.33) self.assertLess(res, 0.50) blm.close() os.remove(filename)
def test_bfod_export(self): """ export to on disk to new file """ filename = "tmp.blm" filename2 = "tmp2.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") blm.export(filename2) blm.close() md5_1 = calc_file_md5(filename) md5_2 = calc_file_md5(filename2) self.assertEqual(md5_1, md5_2) os.remove(filename) os.remove(filename2)
def test_bfod_ee(self): ''' test on disk estimate elements is correct on disk ''' filename = 'tmp.blm' blmd = BloomFilterOnDisk(filename, 10, 0.05) res1 = blmd.estimate_elements() blmd.add('this is a test') res2 = blmd.estimate_elements() self.assertNotEqual(res1, res2) self.assertEqual(res1, 0) self.assertEqual(res2, 1) blmd.close() os.remove(filename)
def test_bfod_union(self): ''' test the union of two bloom filters on disk ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') blm2 = BloomFilter(10, 0.05) blm2.add('this is yet another test') blm3 = blm.union(blm2) self.assertEqual(blm3.estimate_elements(), 3) self.assertEqual(blm3.elements_added, 3) self.assertEqual(blm3.check('this is a test'), True) self.assertEqual(blm3.check('this is another test'), True) self.assertEqual(blm3.check('this is yet another test'), True) self.assertEqual(blm3.check('this is not another test'), False) blm.close() os.remove(filename)
def test_bfod_export(self): ''' export to on disk to new file ''' filename = 'tmp.blm' filename2 = 'tmp2.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.export(filename2) blm.close() md5_1 = calc_file_md5(filename) md5_2 = calc_file_md5(filename2) self.assertEqual(md5_1, md5_2) os.remove(filename) os.remove(filename2)
def test_bfod_clear(self): ''' test clearing out the bloom filter on disk ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filepath=filename, est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 10) blm.clear() self.assertEqual(blm.elements_added, 0) for idx in range(blm.bloom_length): self.assertEqual(blm._get_element(idx), 0) os.remove(filename)
def runner(): ''' runner ''' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.export_hex()