def test_heavy_hitters_window(): heavy_hitters_object = HeavyHitters(width=1000, depth=5) x = Stream('input') ## y = Stream('output') window_size = 4 y = heavy_hitters_window(x, window_size, heavy_hitters_object) #heavy_hitters_window(x, y, window_size, heavy_hitters_object) x.extend([ 'a', 'a', 'a', 'b', # next window 'a', 'b', 'c', 'a', # next window 'b', 'c', 'b', 'b' ]) run() #Stream.scheduler.step() print(recent_values(y))
def test_hh_bytes(self): """test exporting a heavy hitters sketch as bytes""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) hh1.add("this is a test", 100) self.assertEqual(hashlib.md5(bytes(hh1)).hexdigest(), md5_val)
def test_hh_export(self): """test exporting a heavy hitters sketch""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj: hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) hh1.add("this is a test", 100) hh1.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val)
def test_heavyhitters_init_ce(self): """test initializing heavy hitters""" hh1 = HeavyHitters(num_hitters=1000, confidence=0.96875, error_rate=0.002) self.assertEqual(hh1.width, 1000) self.assertEqual(hh1.depth, 5) self.assertEqual(hh1.confidence, 0.96875) self.assertEqual(hh1.error_rate, 0.002) self.assertEqual(hh1.elements_added, 0) self.assertEqual(hh1.heavy_hitters, dict()) self.assertEqual(hh1.number_heavy_hitters, 1000)
def test_heavyhitters_init_wd(self): ''' test initializing heavy hitters ''' hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) self.assertEqual(hh1.width, 1000) self.assertEqual(hh1.depth, 5) self.assertEqual(hh1.confidence, 0.96875) self.assertEqual(hh1.error_rate, 0.002) self.assertEqual(hh1.elements_added, 0) self.assertEqual(hh1.heavy_hitters, dict()) self.assertEqual(hh1.number_heavy_hitters, 1000)
def test_hh_export(self): ''' test exporting a heavy hitters sketch ''' md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449' filename = 'test.cms' hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) hh1.add('this is a test', 100) hh1.export(filename) md5_out = calc_file_md5(filename) os.remove(filename) self.assertEqual(md5_out, md5_val)
def test_hh_load(self): """test loading a heavy hitters from file""" md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e" with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj: hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) self.assertEqual(hh1.add("this is a test", 100), 100) self.assertEqual(hh1.elements_added, 100) self.assertEqual(hh1.heavy_hitters, {"this is a test": 100}) hh1.export(fobj.name) md5_out = calc_file_md5(fobj.name) self.assertEqual(md5_out, md5_val) # try loading directly to file! hh2 = HeavyHitters(num_hitters=1000, filepath=fobj.name) self.assertEqual(hh2.width, 1000) self.assertEqual(hh2.depth, 5) self.assertEqual(hh2.elements_added, 100) self.assertEqual(hh2.check("this is a test"), 100) # show on load that the tracking of heavy hitters is gone self.assertEqual(hh2.heavy_hitters, dict()) self.assertEqual(hh2.add("this is a test", 1), 101) self.assertEqual(hh2.heavy_hitters, {"this is a test": 101})
def test_heavy_hitters_stream(): heavy_hitters_object = HeavyHitters(width=1000, depth=5) x = Stream('input') ## y = Stream('output') y = ggg(x, heavy_hitters_object=heavy_hitters_object) #heavy_hitters_stream(x, y, heavy_hitters_object) x.extend([('add', 'a'), ('add', 'a'), ('add', 'a'), ('add', 'b'), ('heavy_hitters'), ('add', 'a'), ('add', 'b'), ('add', 'c'), ('add', 'a'), ('heavy_hitters'), ('add', 'b'), ('add', 'c'), ('add', 'b'), ('add', 'b'), ('heavy_hitters')]) run() #Stream.scheduler.step() print(recent_values(y))
def test_hh_str(self): ''' test the string representation of the heavy hitters sketch ''' hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add('this is a test', 100), 100) msg = ('Heavy Hitters Count-Min Sketch:\n' '\tWidth: 1000\n' '\tDepth: 5\n' '\tConfidence: 0.96875\n' '\tError Rate: 0.002\n' '\tElements Added: 100\n' '\tNumber Hitters: 2\n' '\tNumber Recorded: 1') self.assertEqual(str(hh1), msg)
def test_hh_remove_msg(self): ''' test remove from heavy hitters exception message ''' hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add('this is a test', 3), 3) try: hh1.remove('this is a test') except NotSupportedError as ex: msg = ('Unable to remove elements in the HeavyHitters ' 'class as it is an un supported action (and does not' 'make sense)!') self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_hh_frombytes(self): """testinitializing a heavy hitters from bytes""" hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) hh1.add("this is a test", 100) bytes_out = bytes(hh1) hh2 = HeavyHitters.frombytes(bytes_out, num_hitters=500) self.assertEqual(hh2.width, 1000) self.assertEqual(hh2.depth, 5) self.assertEqual(hh2.number_heavy_hitters, 500) self.assertEqual(hh2.elements_added, 100) self.assertEqual(bytes(hh2), bytes(hh1)) self.assertEqual(hh2.check("this is a test"), 100)
def test_heavyhitters_add_mult(self): """test adding things (multiple) to the heavy hitters""" hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add("this is a test", 3), 3) self.assertEqual(hh1.add("this is also a test"), 1) self.assertEqual(hh1.add("this is not a test", 2), 2) self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is not a test": 2}) self.assertEqual(hh1.add("this is also a test", 3), 4) self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is also a test": 4}) self.assertEqual(hh1.add("this is not a test", 2), 4) self.assertEqual(hh1.add("this is not a test", 2), 6) self.assertEqual(hh1.add("this is not a test", 2), 8) self.assertEqual(hh1.add("this is not a test", 2), 10) self.assertEqual(hh1.heavy_hitters, {"this is not a test": 10, "this is also a test": 4})
def test_hh_load(self): ''' test loading a heavy hitters from file ''' md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449' filename = 'test.cms' hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) self.assertEqual(hh1.add('this is a test', 100), 100) self.assertEqual(hh1.elements_added, 100) self.assertEqual(hh1.heavy_hitters, {'this is a test': 100}) hh1.export(filename) md5_out = calc_file_md5(filename) self.assertEqual(md5_out, md5_val) # try loading directly to file! hh2 = HeavyHitters(num_hitters=1000, filepath=filename) self.assertEqual(hh2.width, 1000) self.assertEqual(hh2.depth, 5) self.assertEqual(hh2.elements_added, 100) self.assertEqual(hh2.check('this is a test'), 100) # show on load that the tracking of heavy hitters is gone self.assertEqual(hh2.heavy_hitters, dict()) self.assertEqual(hh2.add('this is a test', 1), 101) self.assertEqual(hh2.heavy_hitters, {'this is a test': 101}) os.remove(filename)
def test_heavyhitters_add(self): """test adding things (singular) to the heavy hitters""" hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add("this is a test"), 1) self.assertEqual(hh1.add("this is a test"), 2) self.assertEqual(hh1.add("this is a test"), 3) self.assertEqual(hh1.add("this is also a test"), 1) self.assertEqual(hh1.add("this is not a test"), 1) self.assertEqual(hh1.add("this is not a test"), 2) self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is not a test": 2}) self.assertEqual(hh1.add("this is also a test"), 2) self.assertEqual(hh1.add("this is also a test"), 3) self.assertEqual(hh1.add("this is also a test"), 4) self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is also a test": 4})
def test_hh_remove_msg(self): """test remove from heavy hitters exception message""" hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add("this is a test", 3), 3) try: hh1.remove("this is a test") except NotSupportedError as ex: msg = ( "Unable to remove elements in the HeavyHitters " "class as it is an un supported action (and does not" "make sense)!" ) self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_hh_clear(self): """test clearing out the heavy hitters object""" hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5) self.assertEqual(hh1.width, 1000) self.assertEqual(hh1.depth, 5) self.assertEqual(hh1.confidence, 0.96875) self.assertEqual(hh1.error_rate, 0.002) self.assertEqual(hh1.elements_added, 0) self.assertEqual(hh1.heavy_hitters, dict()) self.assertEqual(hh1.number_heavy_hitters, 1000) self.assertEqual(hh1.add("this is a test", 3), 3) self.assertEqual(hh1.elements_added, 3) self.assertEqual(hh1.heavy_hitters, {"this is a test": 3}) hh1.clear() self.assertEqual(hh1.elements_added, 0) self.assertEqual(hh1.heavy_hitters, dict())
def test_heavyhitters_add(self): ''' test adding things (singular) to the heavy hitters ''' hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add('this is a test'), 1) self.assertEqual(hh1.add('this is a test'), 2) self.assertEqual(hh1.add('this is a test'), 3) self.assertEqual(hh1.add('this is also a test'), 1) self.assertEqual(hh1.add('this is not a test'), 1) self.assertEqual(hh1.add('this is not a test'), 2) self.assertEqual(hh1.heavy_hitters, { 'this is a test': 3, 'this is not a test': 2 }) self.assertEqual(hh1.add('this is also a test'), 2) self.assertEqual(hh1.add('this is also a test'), 3) self.assertEqual(hh1.add('this is also a test'), 4) self.assertEqual(hh1.heavy_hitters, { 'this is a test': 3, 'this is also a test': 4 })
def test_heavyhitters_add_mult(self): ''' test adding things (multiple) to the heavy hitters ''' hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add('this is a test', 3), 3) self.assertEqual(hh1.add('this is also a test'), 1) self.assertEqual(hh1.add('this is not a test', 2), 2) self.assertEqual(hh1.heavy_hitters, { 'this is a test': 3, 'this is not a test': 2 }) self.assertEqual(hh1.add('this is also a test', 3), 4) self.assertEqual(hh1.heavy_hitters, { 'this is a test': 3, 'this is also a test': 4 }) self.assertEqual(hh1.add('this is not a test', 2), 4) self.assertEqual(hh1.add('this is not a test', 2), 6) self.assertEqual(hh1.add('this is not a test', 2), 8) self.assertEqual(hh1.add('this is not a test', 2), 10) self.assertEqual(hh1.heavy_hitters, { 'this is not a test': 10, 'this is also a test': 4 })
def test_hh_join(self): """test that stream threshold raises exception""" hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) hh2 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertRaises(NotSupportedError, lambda: hh1.join(hh2))
def test_hh_remove(self): """test remove from heavy hitters exception""" hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5) self.assertEqual(hh1.add("this is a test", 3), 3) self.assertRaises(NotSupportedError, lambda: hh1.remove("this is a test"))