def test_cms_min_val(self): """test when we come to the bottom of the 32 bit int (stop overflow)""" too_large = INT64_T_MAX + 5 cms = CountMinSketch(width=1000, depth=5) cms.remove("this is a test", too_large) self.assertEqual(cms.check("this is a test"), INT32_T_MIN) self.assertEqual(cms.elements_added, INT64_T_MIN)
def test_cms_min_val(self): ''' test when we come to the bottom of the 32 bit int (stop overflow) ''' too_large = INT64_T_MAX + 5 cms = CountMinSketch(width=1000, depth=5) cms.remove('this is a test', too_large) self.assertEqual(cms.check('this is a test'), INT32_T_MIN) self.assertEqual(cms.elements_added, INT64_T_MIN)
def test_cms_join_underflow(self): """test count-min sketch underflow""" too_large = INT32_T_MAX + 5 cms = CountMinSketch(width=1000, depth=5) cms.remove("this is a test", too_large // 2) cms.join(cms) self.assertEqual(INT32_T_MIN, cms.check("this is a test")) self.assertEqual(cms.elements_added, -too_large) cms.remove("this is a test 2 ", INT64_T_MAX // 2) cms.join(cms) self.assertEqual(cms.elements_added, INT64_T_MIN)
def test_cms_remove_mult(self): ''' test the removal of multiple elements at a time ''' cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add('this is a test', 16), 16) self.assertEqual(cms.elements_added, 16) self.assertEqual(cms.remove('this is a test', 4), 12) self.assertEqual(cms.elements_added, 12)
def test_cms_remove_mult(self): """test the removal of multiple elements at a time""" cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add("this is a test", 16), 16) self.assertEqual(cms.elements_added, 16) self.assertEqual(cms.remove("this is a test", 4), 12) self.assertEqual(cms.elements_added, 12)
def test_cms_remove_single(self): ''' test the removal of a single element at a time ''' cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add('this is a test', 4), 4) self.assertEqual(cms.elements_added, 4) self.assertEqual(cms.remove('this is a test'), 3) self.assertEqual(cms.remove('this is a test'), 2) self.assertEqual(cms.elements_added, 2)
def test_cms_remove_single(self): """test the removal of a single element at a time""" cms = CountMinSketch(width=1000, depth=5) self.assertEqual(cms.add("this is a test", 4), 4) self.assertEqual(cms.elements_added, 4) self.assertEqual(cms.remove("this is a test"), 3) self.assertEqual(cms.remove("this is a test"), 2) self.assertEqual(cms.elements_added, 2)
class CM4: def __init__(self, width=128): if width < 1: raise RuntimeError("bad width for cm4") self.cm4 = CountMinSketch(width, 4) self.keys = set() def add(self, key: str): self.cm4.add(key) self.keys.add(key) def estimate(self, key: str): return self.cm4.check(key) def reset(self): for key in self.keys.copy(): down = self.cm4.check(key) >> 1 & 9223372036854775807 # if down > 1, it will be half of the count if down == 0: down = 1 self.keys.discard(key) self.cm4.remove(key, down)