def test_cms_different_hash(self): """test using a different hash function""" cms = CountMinSketch(width=1000, depth=5) hashes1 = cms.hashes("this is a test") cms2 = CountMinSketch(width=1000, depth=5, hash_function=different_hash) hashes2 = cms2.hashes("this is a test") self.assertNotEqual(hashes1, hashes2)
def test_cms_different_hash(self): ''' test using a different hash function ''' cms = CountMinSketch(width=1000, depth=5) hashes1 = cms.hashes('this is a test') cms2 = CountMinSketch(width=1000, depth=5, hash_function=different_hash) hashes2 = cms2.hashes('this is a test') self.assertNotEqual(hashes1, hashes2)
def test_cms_load_diff_hash(self): """test loading a count-min sketch from file""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj: cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add("this is a test", 100), 100) cms.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val) cms2 = CountMinSketch(filepath=fobj.name, hash_function=different_hash) self.assertEqual(cms2.elements_added, 100) # should not work since it is a different hash self.assertNotEqual(cms.check("this is a test"), True) self.assertNotEqual(cms.hashes("this is a test"), cms2.hashes("this is a test"))
def test_cms_load_diff_hash(self): ''' test loading a count-min sketch from file ''' md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449' filename = 'test.cms' cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add('this is a test', 100), 100) cms.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) cms2 = CountMinSketch(filepath=filename, hash_function=different_hash) self.assertEqual(cms2.elements_added, 100) # should not work since it is a different hash self.assertNotEqual(cms.check('this is a test'), True) self.assertNotEqual(cms.hashes('this is a test'), cms2.hashes('this is a test')) os.remove(filename)