Ejemplo n.º 1
0
 def test_c_cuckoo_filter_bytes(self):
     """test exporting a counting cuckoo filter"""
     md5sum = "6a98c2df1ec9fbb4f75f8e6392696b9b"
     cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
     for i in range(100):
         cko.add(str(i))
     md5_out = hashlib.md5(bytes(cko)).hexdigest()
     self.assertEqual(md5sum, md5_out)
Ejemplo n.º 2
0
 def runner():
     """ runner """
     cko = CountingCuckooFilter(capacity=100,
                                bucket_size=2,
                                max_swaps=100,
                                auto_expand=False)
     for i in range(175):
         cko.add(str(i))
Ejemplo n.º 3
0
 def test_c_cuckoo_filter_auto_exp(self):
     ''' test inserting until counting cuckoo filter is full '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(375):  # this would fail if it doesn't expand
         cko.add(str(i))
     self.assertEqual(400, cko.capacity)
     self.assertEqual(375, cko.elements_added)
     for i in range(375):
         self.assertGreater(cko.check(str(i)), 0)
Ejemplo n.º 4
0
 def test_c_cuckoo_filter_expand_els(self):
     """ test out the expansion of the counting cuckoo filter """
     cko = CountingCuckooFilter()
     for i in range(200):
         cko.add(str(i))
     cko.expand()
     for i in range(200):
         self.assertGreater(cko.check(str(i)), 0)
     self.assertEqual(20000, cko.capacity)
Ejemplo n.º 5
0
 def test_c_cuckoo_filter_auto_exp(self):
     """ test inserting until counting cuckoo filter is full """
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(375):  # this would fail if it doesn't expand
         cko.add(str(i))
     self.assertEqual(400, cko.capacity)
     self.assertEqual(375, cko.elements_added)
     for i in range(375):
         self.assertGreater(cko.check(str(i)), 0)
Ejemplo n.º 6
0
 def test_c_cuckoo_filter_expand_els(self):
     ''' test out the expansion of the counting cuckoo filter '''
     cko = CountingCuckooFilter()
     for i in range(200):
         cko.add(str(i))
     cko.expand()
     for i in range(200):
         self.assertGreater(cko.check(str(i)), 0)
     self.assertEqual(20000, cko.capacity)
Ejemplo n.º 7
0
 def test_c_cuckoo_full_msg(self):
     """test exception message for full counting cuckoo filter"""
     try:
         cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100, auto_expand=False)
         for i in range(175):
             cko.add(str(i))
     except CuckooFilterFullError as ex:
         msg = "The CountingCuckooFilter is currently full"
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Ejemplo n.º 8
0
    def test_c_cuckoo_filter_export(self):
        """test exporting a counting cuckoo filter"""
        md5sum = "6a98c2df1ec9fbb4f75f8e6392696b9b"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cck", delete=DELETE_TEMP_FILES) as fobj:
            cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
            for i in range(100):
                cko.add(str(i))

            cko.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5sum, md5_out)
Ejemplo n.º 9
0
 def test_c_cuckoo_full_msg(self):
     ''' test exception message for full counting cuckoo filter '''
     try:
         cko = CountingCuckooFilter(capacity=100, bucket_size=2,
                                    max_swaps=100, auto_expand=False)
         for i in range(175):
             cko.add(str(i))
     except CuckooFilterFullError as ex:
         msg = 'The CountingCuckooFilter is currently full'
         self.assertEqual(str(ex), msg)
     else:
         self.assertEqual(True, False)
Ejemplo n.º 10
0
    def test_c_cuckoo_filter_export(self):
        ''' test exporting a counting cuckoo filter '''
        filename = './test.cck'
        md5sum = '60e7072e44947b9b6e5d7bd08a64d8a3'
        cko = CountingCuckooFilter(capacity=1000, bucket_size=2,
                                   auto_expand=False)
        for i in range(100):
            cko.add(str(i))

        cko.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)
        os.remove(filename)
Ejemplo n.º 11
0
    def test_c_cuckoo_filter_frombytes(self):
        """test initializing a counting cuckoo filter frombytes"""
        cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
        for i in range(100):
            cko.add(str(i))
        bytes_out = bytes(cko)

        cko2 = CountingCuckooFilter.frombytes(bytes_out)

        self.assertEqual(bytes_out, bytes(cko2))
        for i in range(100):
            self.assertTrue(cko2.check(str(i)))
        self.assertFalse(cko2.check("999"))
Ejemplo n.º 12
0
 def test_c_cuckoo_filter_str(self):
     ''' test the str representation of the counting cuckoo filter '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(75):
         cko.add(str(i))
     msg = ('CountingCuckooFilter:\n'
            '\tCapacity: 100\n'
            '\tTotal Bins: 200\n'
            '\tLoad Factor: 37.5%\n'
            '\tInserted Elements: 75\n'
            '\tMax Swaps: 100\n'
            '\tExpansion Rate: 2\n'
            '\tAuto Expand: True')
     self.assertEqual(str(cko), msg)
Ejemplo n.º 13
0
    def test_c_cuckoo_filter_export(self):
        """ test exporting a counting cuckoo filter """
        filename = "./test.cck"
        md5sum = "60e7072e44947b9b6e5d7bd08a64d8a3"
        cko = CountingCuckooFilter(capacity=1000,
                                   bucket_size=2,
                                   auto_expand=False)
        for i in range(100):
            cko.add(str(i))

        cko.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)
        os.remove(filename)
Ejemplo n.º 14
0
 def test_c_cuckoo_filter_str(self):
     """ test the str representation of the counting cuckoo filter """
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(75):
         cko.add(str(i))
     msg = ("CountingCuckooFilter:\n"
            "\tCapacity: 100\n"
            "\tTotal Bins: 200\n"
            "\tLoad Factor: 37.5%\n"
            "\tInserted Elements: 75\n"
            "\tMax Swaps: 100\n"
            "\tExpansion Rate: 2\n"
            "\tAuto Expand: True")
     self.assertEqual(str(cko), msg)
Ejemplo n.º 15
0
    def test_c_cuckoo_filter_load(self):
        """test loading a saved counting cuckoo filter"""
        md5sum = "6a98c2df1ec9fbb4f75f8e6392696b9b"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cck", delete=DELETE_TEMP_FILES) as fobj:
            cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
            for i in range(100):
                cko.add(str(i))

            cko.export(fobj.name)
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5sum, md5_out)

            ckf = CountingCuckooFilter(filepath=fobj.name)
            for i in range(100):
                self.assertEqual(ckf.check(str(i)), 1)

            self.assertEqual(1000, ckf.capacity)
            self.assertEqual(2, ckf.bucket_size)
            self.assertEqual(500, ckf.max_swaps)
            self.assertEqual(0.05, ckf.load_factor())
Ejemplo n.º 16
0
    def test_c_cuckoo_filter_load(self):
        ''' test loading a saved counting cuckoo filter '''
        filename = './test.cck'
        md5sum = '60e7072e44947b9b6e5d7bd08a64d8a3'
        cko = CountingCuckooFilter(capacity=1000, bucket_size=2,
                                   auto_expand=False)
        for i in range(100):
            cko.add(str(i))

        cko.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)

        ckf = CountingCuckooFilter(filepath=filename)
        for i in range(100):
            self.assertEqual(ckf.check(str(i)), 1)

        self.assertEqual(1000, ckf.capacity)
        self.assertEqual(2, ckf.bucket_size)
        self.assertEqual(500, ckf.max_swaps)
        self.assertEqual(0.05, ckf.load_factor())
        os.remove(filename)
Ejemplo n.º 17
0
    def test_c_cuckoo_filter_load(self):
        """ test loading a saved counting cuckoo filter """
        filename = "./test.cck"
        md5sum = "60e7072e44947b9b6e5d7bd08a64d8a3"
        cko = CountingCuckooFilter(capacity=1000,
                                   bucket_size=2,
                                   auto_expand=False)
        for i in range(100):
            cko.add(str(i))

        cko.export(filename)
        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)

        ckf = CountingCuckooFilter(filepath=filename)
        for i in range(100):
            self.assertEqual(ckf.check(str(i)), 1)

        self.assertEqual(1000, ckf.capacity)
        self.assertEqual(2, ckf.bucket_size)
        self.assertEqual(500, ckf.max_swaps)
        self.assertEqual(0.05, ckf.load_factor())
        os.remove(filename)
Ejemplo n.º 18
0
 def test_c_cuckoo_filter_add(self):
     """ test adding to the counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     self.assertEqual(cko.elements_added, 1)
     cko.add("this is another test")
     self.assertEqual(cko.elements_added, 2)
     cko.add("this is yet another test")
     self.assertEqual(cko.elements_added, 3)
Ejemplo n.º 19
0
 def test_c_cuckoo_filter_add(self):
     ''' test adding to the counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     self.assertEqual(cko.elements_added, 1)
     cko.add('this is another test')
     self.assertEqual(cko.elements_added, 2)
     cko.add('this is yet another test')
     self.assertEqual(cko.elements_added, 3)
Ejemplo n.º 20
0
    def test_c_cuckoo_filter_remove(self):
        ''' test removing from the counting cuckoo filter '''
        cko = CountingCuckooFilter()
        cko.add('this is a test')
        self.assertEqual(cko.elements_added, 1)
        cko.add('this is another test')
        self.assertEqual(cko.elements_added, 2)
        cko.add('this is yet another test')
        self.assertEqual(cko.elements_added, 3)
        self.assertEqual(cko.unique_elements, 3)
        cko.add('this is a test')
        cko.add('this is a test')
        cko.add('this is a test')
        self.assertEqual(cko.elements_added, 6)
        self.assertEqual(cko.unique_elements, 3)

        res = cko.remove('this is another test')
        self.assertTrue(res)
        self.assertEqual(cko.elements_added, 5)
        self.assertEqual(cko.unique_elements, 2)

        self.assertTrue(cko.check('this is a test'))
        self.assertFalse(cko.check('this is another test'))
        self.assertTrue(cko.check('this is yet another test'))
Ejemplo n.º 21
0
    def test_c_cuckoo_filter_remove(self):
        """ test removing from the counting cuckoo filter """
        cko = CountingCuckooFilter()
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 1)
        cko.add("this is another test")
        self.assertEqual(cko.elements_added, 2)
        cko.add("this is yet another test")
        self.assertEqual(cko.elements_added, 3)
        self.assertEqual(cko.unique_elements, 3)
        cko.add("this is a test")
        cko.add("this is a test")
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 6)
        self.assertEqual(cko.unique_elements, 3)

        res = cko.remove("this is another test")
        self.assertTrue(res)
        self.assertEqual(cko.elements_added, 5)
        self.assertEqual(cko.unique_elements, 2)

        self.assertTrue(cko.check("this is a test"))
        self.assertFalse(cko.check("this is another test"))
        self.assertTrue(cko.check("this is yet another test"))
Ejemplo n.º 22
0
 def test_c_cuckoo_filter_in(self):
     """ test checking using 'in' counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual("this is a test" in cko, True)
     self.assertEqual("this is another test" in cko, True)
     self.assertEqual("this is yet another test" in cko, True)
     self.assertEqual("this is not another test" in cko, False)
     self.assertEqual("this is not a test" in cko, False)
Ejemplo n.º 23
0
 def test_c_cuckoo_filter_check(self):
     """ test checking if element in counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual(cko.check("this is a test"), True)
     self.assertEqual(cko.check("this is another test"), True)
     self.assertEqual(cko.check("this is yet another test"), True)
     self.assertEqual(cko.check("this is not another test"), False)
     self.assertEqual(cko.check("this is not a test"), False)
Ejemplo n.º 24
0
 def test_c_cuckoo_filter_check(self):
     ''' test checking if element in counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.check('this is a test'), True)
     self.assertEqual(cko.check('this is another test'), True)
     self.assertEqual(cko.check('this is yet another test'), True)
     self.assertEqual(cko.check('this is not another test'), False)
     self.assertEqual(cko.check('this is not a test'), False)
Ejemplo n.º 25
0
 def test_c_cuckoo_filter_in(self):
     ''' test checking using 'in' counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual('this is a test' in cko, True)
     self.assertEqual('this is another test' in cko, True)
     self.assertEqual('this is yet another test' in cko, True)
     self.assertEqual('this is not another test' in cko, False)
     self.assertEqual('this is not a test' in cko, False)
Ejemplo n.º 26
0
 def test_c_cuckoo_filter_in(self):
     ''' test checking using 'in' counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual('this is a test' in cko, True)
     self.assertEqual('this is another test' in cko, True)
     self.assertEqual('this is yet another test' in cko, True)
     self.assertEqual('this is not another test' in cko, False)
     self.assertEqual('this is not a test' in cko, False)
Ejemplo n.º 27
0
 def test_c_cuckoo_filter_check(self):
     ''' test checking if element in counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.check('this is a test'), True)
     self.assertEqual(cko.check('this is another test'), True)
     self.assertEqual(cko.check('this is yet another test'), True)
     self.assertEqual(cko.check('this is not another test'), False)
     self.assertEqual(cko.check('this is not a test'), False)
Ejemplo n.º 28
0
    def test_c_cuckoo_filter_rmv_miss(self):
        ''' test removing from the counting cuckoo filter when not present '''
        cko = CountingCuckooFilter()
        cko.add('this is a test')
        self.assertEqual(cko.elements_added, 1)
        cko.add('this is another test')
        self.assertEqual(cko.elements_added, 2)
        cko.add('this is yet another test')
        self.assertEqual(cko.elements_added, 3)

        res = cko.remove('this is still a test')
        self.assertFalse(res)
        self.assertEqual(cko.elements_added, 3)
        self.assertTrue(cko.check('this is a test'))
        self.assertTrue(cko.check('this is another test'))
        self.assertTrue(cko.check('this is yet another test'))
Ejemplo n.º 29
0
    def test_c_cuckoo_filter_rmv_miss(self):
        """ test removing from the counting cuckoo filter when not present """
        cko = CountingCuckooFilter()
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 1)
        cko.add("this is another test")
        self.assertEqual(cko.elements_added, 2)
        cko.add("this is yet another test")
        self.assertEqual(cko.elements_added, 3)

        res = cko.remove("this is still a test")
        self.assertFalse(res)
        self.assertEqual(cko.elements_added, 3)
        self.assertTrue(cko.check("this is a test"))
        self.assertTrue(cko.check("this is another test"))
        self.assertTrue(cko.check("this is yet another test"))
Ejemplo n.º 30
0
    def test_c_cuckoo_filter_l_fact(self):
        """ test the load factor of the counting cuckoo filter """
        cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=10)
        self.assertEqual(cko.load_factor(), 0.0)
        for i in range(50):
            cko.add(str(i))
        self.assertEqual(cko.load_factor(), 0.25)
        for i in range(50):
            cko.add(str(i + 50))

        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
        else:
            self.assertEqual(cko.load_factor(), 0.50)

        for i in range(100):
            cko.add(str(i))
        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
        else:
            self.assertEqual(cko.load_factor(), 0.50)
Ejemplo n.º 31
0
    def test_c_cuckoo_filter_l_fact(self):
        ''' test the load factor of the counting cuckoo filter '''
        cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=10)
        self.assertEqual(cko.load_factor(), 0.0)
        for i in range(50):
            cko.add(str(i))
        self.assertEqual(cko.load_factor(), 0.25)
        for i in range(50):
            cko.add(str(i + 50))

        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
        else:
            self.assertEqual(cko.load_factor(), 0.50)

        for i in range(100):
            cko.add(str(i))
        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
        else:
            self.assertEqual(cko.load_factor(), 0.50)
Ejemplo n.º 32
0
 def test_c_cuckoo_filter_lots(self):
     ''' test inserting lots into the counting cuckoo filter '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(125):
         cko.add(str(i))
     self.assertEqual(cko.elements_added, 125)
Ejemplo n.º 33
0
 def test_c_cuckoo_filter_dup_add(self):
     ''' test adding same item multiple times counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.elements_added, 3)
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.elements_added, 6)
     self.assertEqual(cko.unique_elements, 3)
Ejemplo n.º 34
0
 def test_c_cuckoo_filter_bin(self):
     ''' test the cuckoo bin repr '''
     cko = CountingCuckooFilter(capacity=1, bucket_size=2, max_swaps=100)
     cko.add('this is a test')
     self.assertEqual('[(fingerprint:3057276164 count:1)]',
                      str(cko.buckets[0]))
Ejemplo n.º 35
0
 def test_c_cuckoo_filter_bin(self):
     """ test the cuckoo bin repr """
     cko = CountingCuckooFilter(capacity=1, bucket_size=2, max_swaps=100)
     cko.add("this is a test")
     self.assertEqual("[(fingerprint:3057276164 count:1)]",
                      str(cko.buckets[0]))
Ejemplo n.º 36
0
 def test_c_cuckoo_filter_lots(self):
     """ test inserting lots into the counting cuckoo filter """
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(125):
         cko.add(str(i))
     self.assertEqual(cko.elements_added, 125)
Ejemplo n.º 37
0
from probables import (CuckooFilter)
from probables import (CountingCuckooFilter)
cko = CuckooFilter(capacity=100, max_swaps=10)
cko.add('google.com')
cko.check('facebook.com')  # should return False
cko.check('google.com')  # should return True

cck = CountingCuckooFilter(capacity=100, max_swaps=10)
cck.add("google")
cck.add("google")
cck.add("google")
print(cck.check("google"))
cck.remove("google")
print(cck.check("google"))
Ejemplo n.º 38
0
 def test_c_cuckoo_filter_dup_add(self):
     """ test adding same item multiple times counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual(cko.elements_added, 3)
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual(cko.elements_added, 6)
     self.assertEqual(cko.unique_elements, 3)
Ejemplo n.º 39
0
 def runner():
     ''' runner '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2,
                                max_swaps=100, auto_expand=False)
     for i in range(175):
         cko.add(str(i))
Ejemplo n.º 40
0
 def test_c_cuckoo_filter_bin(self):
     ''' test the cuckoo bin repr '''
     cko = CountingCuckooFilter(capacity=1, bucket_size=2, max_swaps=100)
     cko.add('this is a test')
     self.assertEqual('[(fingerprint:3057276164 count:1)]',
                      str(cko.buckets[0]))