예제 #1
0
    def test_hh_bytes(self):
        """test exporting a heavy hitters sketch as bytes"""
        md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"

        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        hh1.add("this is a test", 100)
        self.assertEqual(hashlib.md5(bytes(hh1)).hexdigest(), md5_val)
예제 #2
0
    def test_hh_export(self):
        ''' test exporting a heavy hitters sketch '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        hh1.add('this is a test', 100)
        hh1.export(filename)
        md5_out = calc_file_md5(filename)
        os.remove(filename)

        self.assertEqual(md5_out, md5_val)
예제 #3
0
 def test_hh_str(self):
     ''' test the string representation of the heavy hitters sketch '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 100), 100)
     msg = ('Heavy Hitters Count-Min Sketch:\n'
            '\tWidth: 1000\n'
            '\tDepth: 5\n'
            '\tConfidence: 0.96875\n'
            '\tError Rate: 0.002\n'
            '\tElements Added: 100\n'
            '\tNumber Hitters: 2\n'
            '\tNumber Recorded: 1')
     self.assertEqual(str(hh1), msg)
예제 #4
0
    def test_hh_frombytes(self):
        """testinitializing a heavy hitters from bytes"""
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        hh1.add("this is a test", 100)
        bytes_out = bytes(hh1)

        hh2 = HeavyHitters.frombytes(bytes_out, num_hitters=500)
        self.assertEqual(hh2.width, 1000)
        self.assertEqual(hh2.depth, 5)
        self.assertEqual(hh2.number_heavy_hitters, 500)
        self.assertEqual(hh2.elements_added, 100)
        self.assertEqual(bytes(hh2), bytes(hh1))
        self.assertEqual(hh2.check("this is a test"), 100)
예제 #5
0
 def test_hh_remove_msg(self):
     ''' test remove from heavy hitters exception message '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 3), 3)
     try:
         hh1.remove('this is a test')
     except NotSupportedError as ex:
         msg = ('Unable to remove elements in the HeavyHitters '
                'class as it is an un supported action (and does not'
                'make sense)!')
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
예제 #6
0
 def test_hh_remove_msg(self):
     ''' test remove from heavy hitters exception message '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 3), 3)
     try:
         hh1.remove('this is a test')
     except NotSupportedError as ex:
         msg = ('Unable to remove elements in the HeavyHitters '
                'class as it is an un supported action (and does not'
                'make sense)!')
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
예제 #7
0
 def test_hh_str(self):
     ''' test the string representation of the heavy hitters sketch '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 100), 100)
     msg = ('Heavy Hitters Count-Min Sketch:\n'
            '\tWidth: 1000\n'
            '\tDepth: 5\n'
            '\tConfidence: 0.96875\n'
            '\tError Rate: 0.002\n'
            '\tElements Added: 100\n'
            '\tNumber Hitters: 2\n'
            '\tNumber Recorded: 1')
     self.assertEqual(str(hh1), msg)
예제 #8
0
 def test_hh_remove_msg(self):
     """test remove from heavy hitters exception message"""
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add("this is a test", 3), 3)
     try:
         hh1.remove("this is a test")
     except NotSupportedError as ex:
         msg = (
             "Unable to remove elements in the HeavyHitters "
             "class as it is an un supported action (and does not"
             "make sense)!"
         )
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
예제 #9
0
def test_heavy_hitters_window():
    heavy_hitters_object = HeavyHitters(width=1000, depth=5)
    x = Stream('input')
    ## y = Stream('output')
    window_size = 4
    y = heavy_hitters_window(x, window_size, heavy_hitters_object)
    #heavy_hitters_window(x, y, window_size, heavy_hitters_object)
    x.extend([
        'a',
        'a',
        'a',
        'b',
        # next window
        'a',
        'b',
        'c',
        'a',
        # next window
        'b',
        'c',
        'b',
        'b'
    ])
    run()
    #Stream.scheduler.step()
    print(recent_values(y))
예제 #10
0
    def test_hh_clear(self):
        """test clearing out the heavy hitters object"""
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        self.assertEqual(hh1.width, 1000)
        self.assertEqual(hh1.depth, 5)
        self.assertEqual(hh1.confidence, 0.96875)
        self.assertEqual(hh1.error_rate, 0.002)
        self.assertEqual(hh1.elements_added, 0)
        self.assertEqual(hh1.heavy_hitters, dict())
        self.assertEqual(hh1.number_heavy_hitters, 1000)

        self.assertEqual(hh1.add("this is a test", 3), 3)
        self.assertEqual(hh1.elements_added, 3)
        self.assertEqual(hh1.heavy_hitters, {"this is a test": 3})

        hh1.clear()
        self.assertEqual(hh1.elements_added, 0)
        self.assertEqual(hh1.heavy_hitters, dict())
예제 #11
0
    def test_hh_clear(self):
        ''' test clearing out the heavy hitters object '''
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        self.assertEqual(hh1.width, 1000)
        self.assertEqual(hh1.depth, 5)
        self.assertEqual(hh1.confidence, 0.96875)
        self.assertEqual(hh1.error_rate, 0.002)
        self.assertEqual(hh1.elements_added, 0)
        self.assertEqual(hh1.heavy_hitters, dict())
        self.assertEqual(hh1.number_heavy_hitters, 1000)

        self.assertEqual(hh1.add('this is a test', 3), 3)
        self.assertEqual(hh1.elements_added, 3)
        self.assertEqual(hh1.heavy_hitters, {'this is a test': 3})

        hh1.clear()
        self.assertEqual(hh1.elements_added, 0)
        self.assertEqual(hh1.heavy_hitters, dict())
예제 #12
0
 def test_heavyhitters_init_ce(self):
     """test initializing heavy hitters"""
     hh1 = HeavyHitters(num_hitters=1000, confidence=0.96875, error_rate=0.002)
     self.assertEqual(hh1.width, 1000)
     self.assertEqual(hh1.depth, 5)
     self.assertEqual(hh1.confidence, 0.96875)
     self.assertEqual(hh1.error_rate, 0.002)
     self.assertEqual(hh1.elements_added, 0)
     self.assertEqual(hh1.heavy_hitters, dict())
     self.assertEqual(hh1.number_heavy_hitters, 1000)
예제 #13
0
 def test_heavyhitters_init_wd(self):
     ''' test initializing heavy hitters '''
     hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
     self.assertEqual(hh1.width, 1000)
     self.assertEqual(hh1.depth, 5)
     self.assertEqual(hh1.confidence, 0.96875)
     self.assertEqual(hh1.error_rate, 0.002)
     self.assertEqual(hh1.elements_added, 0)
     self.assertEqual(hh1.heavy_hitters, dict())
     self.assertEqual(hh1.number_heavy_hitters, 1000)
예제 #14
0
 def test_heavyhitters_add(self):
     """test adding things (singular) to the heavy hitters"""
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add("this is a test"), 1)
     self.assertEqual(hh1.add("this is a test"), 2)
     self.assertEqual(hh1.add("this is a test"), 3)
     self.assertEqual(hh1.add("this is also a test"), 1)
     self.assertEqual(hh1.add("this is not a test"), 1)
     self.assertEqual(hh1.add("this is not a test"), 2)
     self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is not a test": 2})
     self.assertEqual(hh1.add("this is also a test"), 2)
     self.assertEqual(hh1.add("this is also a test"), 3)
     self.assertEqual(hh1.add("this is also a test"), 4)
     self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is also a test": 4})
예제 #15
0
 def test_heavyhitters_add_mult(self):
     """test adding things (multiple) to the heavy hitters"""
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add("this is a test", 3), 3)
     self.assertEqual(hh1.add("this is also a test"), 1)
     self.assertEqual(hh1.add("this is not a test", 2), 2)
     self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is not a test": 2})
     self.assertEqual(hh1.add("this is also a test", 3), 4)
     self.assertEqual(hh1.heavy_hitters, {"this is a test": 3, "this is also a test": 4})
     self.assertEqual(hh1.add("this is not a test", 2), 4)
     self.assertEqual(hh1.add("this is not a test", 2), 6)
     self.assertEqual(hh1.add("this is not a test", 2), 8)
     self.assertEqual(hh1.add("this is not a test", 2), 10)
     self.assertEqual(hh1.heavy_hitters, {"this is not a test": 10, "this is also a test": 4})
예제 #16
0
def test_heavy_hitters_stream():
    heavy_hitters_object = HeavyHitters(width=1000, depth=5)
    x = Stream('input')
    ## y = Stream('output')
    y = ggg(x, heavy_hitters_object=heavy_hitters_object)
    #heavy_hitters_stream(x, y, heavy_hitters_object)
    x.extend([('add', 'a'), ('add', 'a'), ('add', 'a'), ('add', 'b'),
              ('heavy_hitters'), ('add', 'a'), ('add', 'b'), ('add', 'c'),
              ('add', 'a'), ('heavy_hitters'), ('add', 'b'), ('add', 'c'),
              ('add', 'b'), ('add', 'b'), ('heavy_hitters')])
    run()
    #Stream.scheduler.step()
    print(recent_values(y))
예제 #17
0
 def test_hh_export(self):
     """test exporting a heavy hitters sketch"""
     md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
     with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
         hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
         hh1.add("this is a test", 100)
         hh1.export(fobj.name)
         md5_out = calc_file_md5(fobj.name)
     self.assertEqual(md5_out, md5_val)
예제 #18
0
    def test_hh_export(self):
        ''' test exporting a heavy hitters sketch '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        hh1.add('this is a test', 100)
        hh1.export(filename)
        md5_out = calc_file_md5(filename)
        os.remove(filename)

        self.assertEqual(md5_out, md5_val)
예제 #19
0
    def test_hh_load(self):
        """test loading a heavy hitters from file"""
        md5_val = "fb1c39dd1a73f1ef0d7fc79f60fc028e"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cms", delete=DELETE_TEMP_FILES) as fobj:
            hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
            self.assertEqual(hh1.add("this is a test", 100), 100)
            self.assertEqual(hh1.elements_added, 100)
            self.assertEqual(hh1.heavy_hitters, {"this is a test": 100})
            hh1.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5_out, md5_val)

            # try loading directly to file!
            hh2 = HeavyHitters(num_hitters=1000, filepath=fobj.name)
            self.assertEqual(hh2.width, 1000)
            self.assertEqual(hh2.depth, 5)
            self.assertEqual(hh2.elements_added, 100)
            self.assertEqual(hh2.check("this is a test"), 100)
            # show on load that the tracking of heavy hitters is gone
            self.assertEqual(hh2.heavy_hitters, dict())
            self.assertEqual(hh2.add("this is a test", 1), 101)
            self.assertEqual(hh2.heavy_hitters, {"this is a test": 101})
예제 #20
0
 def test_heavyhitters_add(self):
     ''' test adding things (singular) to the heavy hitters '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test'), 1)
     self.assertEqual(hh1.add('this is a test'), 2)
     self.assertEqual(hh1.add('this is a test'), 3)
     self.assertEqual(hh1.add('this is also a test'), 1)
     self.assertEqual(hh1.add('this is not a test'), 1)
     self.assertEqual(hh1.add('this is not a test'), 2)
     self.assertEqual(hh1.heavy_hitters,
                      {'this is a test': 3, 'this is not a test': 2})
     self.assertEqual(hh1.add('this is also a test'), 2)
     self.assertEqual(hh1.add('this is also a test'), 3)
     self.assertEqual(hh1.add('this is also a test'), 4)
     self.assertEqual(hh1.heavy_hitters,
                      {'this is a test': 3, 'this is also a test': 4})
예제 #21
0
    def test_hh_load(self):
        ''' test loading a heavy hitters from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        self.assertEqual(hh1.add('this is a test', 100), 100)
        self.assertEqual(hh1.elements_added, 100)
        self.assertEqual(hh1.heavy_hitters, {'this is a test': 100})
        hh1.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        # try loading directly to file!
        hh2 = HeavyHitters(num_hitters=1000, filepath=filename)
        self.assertEqual(hh2.width, 1000)
        self.assertEqual(hh2.depth, 5)
        self.assertEqual(hh2.elements_added, 100)
        self.assertEqual(hh2.check('this is a test'), 100)
        # show on load that the tracking of heavy hitters is gone
        self.assertEqual(hh2.heavy_hitters, dict())
        self.assertEqual(hh2.add('this is a test', 1), 101)
        self.assertEqual(hh2.heavy_hitters, {'this is a test': 101})
        os.remove(filename)
예제 #22
0
 def test_heavyhitters_add(self):
     ''' test adding things (singular) to the heavy hitters '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test'), 1)
     self.assertEqual(hh1.add('this is a test'), 2)
     self.assertEqual(hh1.add('this is a test'), 3)
     self.assertEqual(hh1.add('this is also a test'), 1)
     self.assertEqual(hh1.add('this is not a test'), 1)
     self.assertEqual(hh1.add('this is not a test'), 2)
     self.assertEqual(hh1.heavy_hitters, {
         'this is a test': 3,
         'this is not a test': 2
     })
     self.assertEqual(hh1.add('this is also a test'), 2)
     self.assertEqual(hh1.add('this is also a test'), 3)
     self.assertEqual(hh1.add('this is also a test'), 4)
     self.assertEqual(hh1.heavy_hitters, {
         'this is a test': 3,
         'this is also a test': 4
     })
예제 #23
0
 def test_heavyhitters_add_mult(self):
     ''' test adding things (multiple) to the heavy hitters '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 3), 3)
     self.assertEqual(hh1.add('this is also a test'), 1)
     self.assertEqual(hh1.add('this is not a test', 2), 2)
     self.assertEqual(hh1.heavy_hitters,
                      {'this is a test': 3, 'this is not a test': 2})
     self.assertEqual(hh1.add('this is also a test', 3), 4)
     self.assertEqual(hh1.heavy_hitters,
                      {'this is a test': 3, 'this is also a test': 4})
     self.assertEqual(hh1.add('this is not a test', 2), 4)
     self.assertEqual(hh1.add('this is not a test', 2), 6)
     self.assertEqual(hh1.add('this is not a test', 2), 8)
     self.assertEqual(hh1.add('this is not a test', 2), 10)
     self.assertEqual(hh1.heavy_hitters,
                      {'this is not a test': 10, 'this is also a test': 4})
예제 #24
0
 def test_heavyhitters_add_mult(self):
     ''' test adding things (multiple) to the heavy hitters '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 3), 3)
     self.assertEqual(hh1.add('this is also a test'), 1)
     self.assertEqual(hh1.add('this is not a test', 2), 2)
     self.assertEqual(hh1.heavy_hitters, {
         'this is a test': 3,
         'this is not a test': 2
     })
     self.assertEqual(hh1.add('this is also a test', 3), 4)
     self.assertEqual(hh1.heavy_hitters, {
         'this is a test': 3,
         'this is also a test': 4
     })
     self.assertEqual(hh1.add('this is not a test', 2), 4)
     self.assertEqual(hh1.add('this is not a test', 2), 6)
     self.assertEqual(hh1.add('this is not a test', 2), 8)
     self.assertEqual(hh1.add('this is not a test', 2), 10)
     self.assertEqual(hh1.heavy_hitters, {
         'this is not a test': 10,
         'this is also a test': 4
     })
예제 #25
0
    def test_hh_load(self):
        ''' test loading a heavy hitters from file '''
        md5_val = '61d2ea9d0cb09b7bb284e1cf1a860449'
        filename = 'test.cms'
        hh1 = HeavyHitters(num_hitters=1000, width=1000, depth=5)
        self.assertEqual(hh1.add('this is a test', 100), 100)
        self.assertEqual(hh1.elements_added, 100)
        self.assertEqual(hh1.heavy_hitters, {'this is a test': 100})
        hh1.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5_out, md5_val)

        # try loading directly to file!
        hh2 = HeavyHitters(num_hitters=1000, filepath=filename)
        self.assertEqual(hh2.width, 1000)
        self.assertEqual(hh2.depth, 5)
        self.assertEqual(hh2.elements_added, 100)
        self.assertEqual(hh2.check('this is a test'), 100)
        # show on load that the tracking of heavy hitters is gone
        self.assertEqual(hh2.heavy_hitters, dict())
        self.assertEqual(hh2.add('this is a test', 1), 101)
        self.assertEqual(hh2.heavy_hitters, {'this is a test': 101})
        os.remove(filename)
예제 #26
0
 def test_hh_join(self):
     """test that stream threshold raises exception"""
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     hh2 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertRaises(NotSupportedError, lambda: hh1.join(hh2))
예제 #27
0
 def test_hh_remove(self):
     """test remove from heavy hitters exception"""
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add("this is a test", 3), 3)
     self.assertRaises(NotSupportedError, lambda: hh1.remove("this is a test"))
예제 #28
0
 def test_hh_remove(self):
     ''' test remove from heavy hitters exception '''
     hh1 = HeavyHitters(num_hitters=2, width=1000, depth=5)
     self.assertEqual(hh1.add('this is a test', 3), 3)
     self.assertRaises(NotSupportedError,
                       lambda: hh1.remove('this is a test'))