Example #1
    def test_c_cuckoo_filter_load(self):
        """test loading a saved cuckoo filter"""
        md5sum = "88bc3a08bfc967f9ba60e9d57c21207f"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cko", delete=DELETE_TEMP_FILES) as fobj:
            cko = CountingCuckooFilter.init_error_rate(0.00001)
            for i in range(1000):
                if i % 2 == 1:
            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5sum, md5_out)

            ckf = CountingCuckooFilter.load_error_rate(error_rate=0.00001, filepath=fobj.name)
            for i in range(1000):
                self.assertEqual(ckf.check(str(i)), (i % 2) + 1)

            self.assertEqual(10000, ckf.capacity)
            self.assertEqual(4, ckf.bucket_size)
            self.assertEqual(500, ckf.max_swaps)
            self.assertEqual(2, ckf.expansion_rate)
            self.assertEqual(True, ckf.auto_expand)
            self.assertEqual(20, ckf.fingerprint_size_bits)
            self.assertEqual(3, ckf.fingerprint_size)
            self.assertEqual(0.00001, ckf.error_rate)
            self.assertEqual(0.025, ckf.load_factor())
Example #2
 def test_c_cuckoo_filter_bytes(self):
     """test exporting a counting cuckoo filter"""
     md5sum = "6a98c2df1ec9fbb4f75f8e6392696b9b"
     cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
     for i in range(100):
     md5_out = hashlib.md5(bytes(cko)).hexdigest()
     self.assertEqual(md5sum, md5_out)
 def runner():
     """ runner """
     cko = CountingCuckooFilter(capacity=100,
     for i in range(175):
 def test_c_cuckoo_filter_auto_exp(self):
     ''' test inserting until counting cuckoo filter is full '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(375):  # this would fail if it doesn't expand
     self.assertEqual(400, cko.capacity)
     self.assertEqual(375, cko.elements_added)
     for i in range(375):
         self.assertGreater(cko.check(str(i)), 0)
 def test_c_cuckoo_idx(self):
     ''' test that the indexing works correctly for counting cuckoo filter
         swap '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=5)
     txt = 'this is a test'
     idx_1, idx_2, fingerprint = cko._generate_fingerprint_info(txt)
     index_1, index_2 = cko._indicies_from_fingerprint(fingerprint)
     self.assertEqual(idx_1, index_1)
     self.assertEqual(idx_2, index_2)
 def test_c_cuckoo_idx(self):
     """test that the indexing works correctly for counting cuckoo filter
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=5)
     txt = "this is a test"
     idx_1, idx_2, fingerprint = cko._generate_fingerprint_info(txt)
     index_1, index_2 = cko._indicies_from_fingerprint(fingerprint)
     self.assertEqual(idx_1, index_1)
     self.assertEqual(idx_2, index_2)
 def test_c_cuckoo_filter_auto_exp(self):
     """ test inserting until counting cuckoo filter is full """
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(375):  # this would fail if it doesn't expand
     self.assertEqual(400, cko.capacity)
     self.assertEqual(375, cko.elements_added)
     for i in range(375):
         self.assertGreater(cko.check(str(i)), 0)
Example #8
 def test_c_cuckoo_full_msg(self):
     """test exception message for full counting cuckoo filter"""
         cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100, auto_expand=False)
         for i in range(175):
     except CuckooFilterFullError as ex:
         msg = "The CountingCuckooFilter is currently full"
         self.assertEqual(str(ex), msg)
         self.assertEqual(True, False)
 def test_c_cuckoo_full_msg(self):
     ''' test exception message for full counting cuckoo filter '''
         cko = CountingCuckooFilter(capacity=100, bucket_size=2,
                                    max_swaps=100, auto_expand=False)
         for i in range(175):
     except CuckooFilterFullError as ex:
         msg = 'The CountingCuckooFilter is currently full'
         self.assertEqual(str(ex), msg)
         self.assertEqual(True, False)
    def test_c_cuckoo_filter_export(self):
        ''' test exporting a counting cuckoo filter '''
        filename = './test.cck'
        md5sum = '60e7072e44947b9b6e5d7bd08a64d8a3'
        cko = CountingCuckooFilter(capacity=1000, bucket_size=2,
        for i in range(100):

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)
Example #11
    def test_c_cuckoo_filter_frombytes(self):
        """test initializing a counting cuckoo filter frombytes"""
        cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
        for i in range(100):
        bytes_out = bytes(cko)

        cko2 = CountingCuckooFilter.frombytes(bytes_out)

        self.assertEqual(bytes_out, bytes(cko2))
        for i in range(100):
Example #12
    def test_c_cuckoo_filter_er_frombytes(self):
        """test initializing a couting cuckoo filter from bytes"""
        cko = CountingCuckooFilter.init_error_rate(0.00001, capacity=3000)
        for i in range(1000):
        bytes_out = bytes(cko)

        cko2 = CountingCuckooFilter.frombytes(bytes_out, error_rate=0.00001)

        self.assertEqual(bytes_out, bytes(cko2))
        for i in range(1000):
        self.assertEqual(cko2.capacity, 3000)
 def test_c_cuckoo_filter_str(self):
     ''' test the str representation of the counting cuckoo filter '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(75):
     msg = ('CountingCuckooFilter:\n'
            '\tCapacity: 100\n'
            '\tTotal Bins: 200\n'
            '\tLoad Factor: 37.5%\n'
            '\tInserted Elements: 75\n'
            '\tMax Swaps: 100\n'
            '\tExpansion Rate: 2\n'
            '\tAuto Expand: True')
     self.assertEqual(str(cko), msg)
 def test_c_cuckoo_filter_str(self):
     """ test the str representation of the counting cuckoo filter """
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(75):
     msg = ("CountingCuckooFilter:\n"
            "\tCapacity: 100\n"
            "\tTotal Bins: 200\n"
            "\tLoad Factor: 37.5%\n"
            "\tInserted Elements: 75\n"
            "\tMax Swaps: 100\n"
            "\tExpansion Rate: 2\n"
            "\tAuto Expand: True")
     self.assertEqual(str(cko), msg)
 def test_c_cuckoo_filter_add(self):
     """ test adding to the counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     self.assertEqual(cko.elements_added, 1)
     cko.add("this is another test")
     self.assertEqual(cko.elements_added, 2)
     cko.add("this is yet another test")
     self.assertEqual(cko.elements_added, 3)
Example #16
 def test_c_cuckoo_filter_er_bytes(self):
     """test exporting a cuckoo filter to bytes"""
     md5sum = "f68767bd97b21426f5d2315fb38961ad"
     cko = CountingCuckooFilter.init_error_rate(0.00001)
     for i in range(1000):
     md5_out = hashlib.md5(bytes(cko)).hexdigest()
     self.assertEqual(md5sum, md5_out)
 def test_c_cuckoo_filter_default(self):
     """ test counting cuckoo filter default properties """
     cko = CountingCuckooFilter()
     self.assertEqual(10000, cko.capacity)
     self.assertEqual(4, cko.bucket_size)
     self.assertEqual(500, cko.max_swaps)
     self.assertEqual(2, cko.expansion_rate)
     self.assertEqual(True, cko.auto_expand)
Example #18
 def test_c_cuckoo_filter_in(self):
     ''' test checking using 'in' counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual('this is a test' in cko, True)
     self.assertEqual('this is another test' in cko, True)
     self.assertEqual('this is yet another test' in cko, True)
     self.assertEqual('this is not another test' in cko, False)
     self.assertEqual('this is not a test' in cko, False)
 def test_c_cuckoo_filter_in(self):
     """ test checking using 'in' counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual("this is a test" in cko, True)
     self.assertEqual("this is another test" in cko, True)
     self.assertEqual("this is yet another test" in cko, True)
     self.assertEqual("this is not another test" in cko, False)
     self.assertEqual("this is not a test" in cko, False)
Example #20
 def test_c_cuckoo_filter_er_export(self):
     """test exporting a cuckoo filter"""
     md5sum = "f68767bd97b21426f5d2315fb38961ad"
     with NamedTemporaryFile(dir=os.getcwd(), suffix=".cko", delete=DELETE_TEMP_FILES) as fobj:
         cko = CountingCuckooFilter.init_error_rate(0.00001)
         for i in range(1000):
         md5_out = calc_file_md5(fobj.name)
         self.assertEqual(md5sum, md5_out)
 def test_c_cuckoo_filter_add(self):
     ''' test adding to the counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     self.assertEqual(cko.elements_added, 1)
     cko.add('this is another test')
     self.assertEqual(cko.elements_added, 2)
     cko.add('this is yet another test')
     self.assertEqual(cko.elements_added, 3)
 def test_c_cuckoo_filter_expand_els(self):
     """ test out the expansion of the counting cuckoo filter """
     cko = CountingCuckooFilter()
     for i in range(200):
     for i in range(200):
         self.assertGreater(cko.check(str(i)), 0)
     self.assertEqual(20000, cko.capacity)
Example #23
 def test_c_cuckoo_filter_er_default(self):
     """test cuckoo filter default properties"""
     cko = CountingCuckooFilter.init_error_rate(0.00001)
     self.assertEqual(10000, cko.capacity)
     self.assertEqual(4, cko.bucket_size)
     self.assertEqual(500, cko.max_swaps)
     self.assertEqual(2, cko.expansion_rate)
     self.assertEqual(True, cko.auto_expand)
     self.assertEqual(3, cko.fingerprint_size)
     self.assertEqual(20, cko.fingerprint_size_bits)
     self.assertEqual(0.00001, cko.error_rate)
Example #24
    def test_c_cuckoo_filter_load(self):
        """test loading a saved counting cuckoo filter"""
        md5sum = "6a98c2df1ec9fbb4f75f8e6392696b9b"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cck", delete=DELETE_TEMP_FILES) as fobj:
            cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
            for i in range(100):

            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5sum, md5_out)

            ckf = CountingCuckooFilter(filepath=fobj.name)
            for i in range(100):
                self.assertEqual(ckf.check(str(i)), 1)

            self.assertEqual(1000, ckf.capacity)
            self.assertEqual(2, ckf.bucket_size)
            self.assertEqual(500, ckf.max_swaps)
            self.assertEqual(0.05, ckf.load_factor())
Example #25
 def test_c_cuckoo_filter_diff(self):
     ''' test counting cuckoo filter non-standard properties '''
     cko = CountingCuckooFilter(capacity=100,
     self.assertEqual(100, cko.capacity)
     self.assertEqual(2, cko.bucket_size)
     self.assertEqual(5, cko.max_swaps)
     self.assertEqual(4, cko.expansion_rate)
     self.assertEqual(False, cko.auto_expand)
 def test_c_cuckoo_filter_in(self):
     ''' test checking using 'in' counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual('this is a test' in cko, True)
     self.assertEqual('this is another test' in cko, True)
     self.assertEqual('this is yet another test' in cko, True)
     self.assertEqual('this is not another test' in cko, False)
     self.assertEqual('this is not a test' in cko, False)
    def test_c_cuckoo_filter_load(self):
        """ test loading a saved counting cuckoo filter """
        filename = "./test.cck"
        md5sum = "60e7072e44947b9b6e5d7bd08a64d8a3"
        cko = CountingCuckooFilter(capacity=1000,
        for i in range(100):

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)

        ckf = CountingCuckooFilter(filepath=filename)
        for i in range(100):
            self.assertEqual(ckf.check(str(i)), 1)

        self.assertEqual(1000, ckf.capacity)
        self.assertEqual(2, ckf.bucket_size)
        self.assertEqual(500, ckf.max_swaps)
        self.assertEqual(0.05, ckf.load_factor())
 def test_c_cuckoo_filter_expand_els(self):
     ''' test out the expansion of the counting cuckoo filter '''
     cko = CountingCuckooFilter()
     for i in range(200):
     for i in range(200):
         self.assertGreater(cko.check(str(i)), 0)
     self.assertEqual(20000, cko.capacity)
Example #29
    def test_c_cuckoo_filter_export(self):
        """test exporting a counting cuckoo filter"""
        md5sum = "6a98c2df1ec9fbb4f75f8e6392696b9b"
        with NamedTemporaryFile(dir=os.getcwd(), suffix=".cck", delete=DELETE_TEMP_FILES) as fobj:
            cko = CountingCuckooFilter(capacity=1000, bucket_size=2, auto_expand=False)
            for i in range(100):

            md5_out = calc_file_md5(fobj.name)
            self.assertEqual(md5sum, md5_out)
    def test_c_cuckoo_filter_export(self):
        """ test exporting a counting cuckoo filter """
        filename = "./test.cck"
        md5sum = "60e7072e44947b9b6e5d7bd08a64d8a3"
        cko = CountingCuckooFilter(capacity=1000,
        for i in range(100):

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)
    def test_c_cuckoo_filter_load(self):
        ''' test loading a saved counting cuckoo filter '''
        filename = './test.cck'
        md5sum = '60e7072e44947b9b6e5d7bd08a64d8a3'
        cko = CountingCuckooFilter(capacity=1000, bucket_size=2,
        for i in range(100):

        md5_out = calc_file_md5(filename)
        self.assertEqual(md5sum, md5_out)

        ckf = CountingCuckooFilter(filepath=filename)
        for i in range(100):
            self.assertEqual(ckf.check(str(i)), 1)

        self.assertEqual(1000, ckf.capacity)
        self.assertEqual(2, ckf.bucket_size)
        self.assertEqual(500, ckf.max_swaps)
        self.assertEqual(0.05, ckf.load_factor())
Example #32
    def test_c_cuckoo_filter_er_add_check(self):
        """test adding to the cuckoo filter"""
        cko = CountingCuckooFilter.init_error_rate(0.00001)
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 1)
        cko.add("this is another test")
        self.assertEqual(cko.elements_added, 2)
        cko.add("this is yet another test")
        self.assertEqual(cko.elements_added, 3)

        # check
        self.assertEqual(cko.check("this is a test"), True)
        self.assertEqual(cko.check("this is another test"), True)
        self.assertEqual(cko.check("this is yet another test"), True)
        self.assertEqual(cko.check("this is not another test"), False)
        self.assertEqual(cko.check("this is not a test"), False)

        # use of `in`
        self.assertEqual("this is a test" in cko, True)
        self.assertEqual("this is another test" in cko, True)
        self.assertEqual("this is yet another test" in cko, True)
        self.assertEqual("this is not another test" in cko, False)
        self.assertEqual("this is not a test" in cko, False)
Example #33
    def test_c_cuckoo_filter_er_remove(self):
        """test removing from the counting cuckoo filter"""
        cko = CountingCuckooFilter.init_error_rate(0.00001)
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 1)
        cko.add("this is another test")
        self.assertEqual(cko.elements_added, 2)
        cko.add("this is yet another test")
        self.assertEqual(cko.elements_added, 3)
        self.assertEqual(cko.unique_elements, 3)
        cko.add("this is a test")
        cko.add("this is a test")
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 6)
        self.assertEqual(cko.unique_elements, 3)

        res = cko.remove("this is another test")
        self.assertEqual(cko.elements_added, 5)
        self.assertEqual(cko.unique_elements, 2)

        self.assertTrue(cko.check("this is a test"))
        self.assertFalse(cko.check("this is another test"))
        self.assertTrue(cko.check("this is yet another test"))
 def test_c_cuckoo_filter_lots(self):
     """ test inserting lots into the counting cuckoo filter """
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(125):
     self.assertEqual(cko.elements_added, 125)
 def test_c_cuckoo_filter_check(self):
     """ test checking if element in counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual(cko.check("this is a test"), True)
     self.assertEqual(cko.check("this is another test"), True)
     self.assertEqual(cko.check("this is yet another test"), True)
     self.assertEqual(cko.check("this is not another test"), False)
     self.assertEqual(cko.check("this is not a test"), False)
    def test_c_cuckoo_filter_l_fact(self):
        ''' test the load factor of the counting cuckoo filter '''
        cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=10)
        self.assertEqual(cko.load_factor(), 0.0)
        for i in range(50):
        self.assertEqual(cko.load_factor(), 0.25)
        for i in range(50):
            cko.add(str(i + 50))

        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
            self.assertEqual(cko.load_factor(), 0.50)

        for i in range(100):
        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
            self.assertEqual(cko.load_factor(), 0.50)
 def test_c_cuckoo_filter_lots(self):
     ''' test inserting lots into the counting cuckoo filter '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=100)
     for i in range(125):
     self.assertEqual(cko.elements_added, 125)
    def test_c_cuckoo_filter_rmv_miss(self):
        ''' test removing from the counting cuckoo filter when not present '''
        cko = CountingCuckooFilter()
        cko.add('this is a test')
        self.assertEqual(cko.elements_added, 1)
        cko.add('this is another test')
        self.assertEqual(cko.elements_added, 2)
        cko.add('this is yet another test')
        self.assertEqual(cko.elements_added, 3)

        res = cko.remove('this is still a test')
        self.assertEqual(cko.elements_added, 3)
        self.assertTrue(cko.check('this is a test'))
        self.assertTrue(cko.check('this is another test'))
        self.assertTrue(cko.check('this is yet another test'))
    def test_c_cuckoo_filter_remove(self):
        ''' test removing from the counting cuckoo filter '''
        cko = CountingCuckooFilter()
        cko.add('this is a test')
        self.assertEqual(cko.elements_added, 1)
        cko.add('this is another test')
        self.assertEqual(cko.elements_added, 2)
        cko.add('this is yet another test')
        self.assertEqual(cko.elements_added, 3)
        self.assertEqual(cko.unique_elements, 3)
        cko.add('this is a test')
        cko.add('this is a test')
        cko.add('this is a test')
        self.assertEqual(cko.elements_added, 6)
        self.assertEqual(cko.unique_elements, 3)

        res = cko.remove('this is another test')
        self.assertEqual(cko.elements_added, 5)
        self.assertEqual(cko.unique_elements, 2)

        self.assertTrue(cko.check('this is a test'))
        self.assertFalse(cko.check('this is another test'))
        self.assertTrue(cko.check('this is yet another test'))
 def test_c_cuckoo_filter_bin(self):
     """ test the cuckoo bin repr """
     cko = CountingCuckooFilter(capacity=1, bucket_size=2, max_swaps=100)
     cko.add("this is a test")
     self.assertEqual("[(fingerprint:3057276164 count:1)]",
 def test_c_cuckoo_filter_dup_add(self):
     ''' test adding same item multiple times counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.elements_added, 3)
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.elements_added, 6)
     self.assertEqual(cko.unique_elements, 3)
 def test_c_cuckoo_filter_check(self):
     ''' test checking if element in counting cuckoo filter '''
     cko = CountingCuckooFilter()
     cko.add('this is a test')
     cko.add('this is another test')
     cko.add('this is yet another test')
     self.assertEqual(cko.check('this is a test'), True)
     self.assertEqual(cko.check('this is another test'), True)
     self.assertEqual(cko.check('this is yet another test'), True)
     self.assertEqual(cko.check('this is not another test'), False)
     self.assertEqual(cko.check('this is not a test'), False)
 def test_c_cuckoo_filter_dup_add(self):
     """ test adding same item multiple times counting cuckoo filter """
     cko = CountingCuckooFilter()
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual(cko.elements_added, 3)
     cko.add("this is a test")
     cko.add("this is another test")
     cko.add("this is yet another test")
     self.assertEqual(cko.elements_added, 6)
     self.assertEqual(cko.unique_elements, 3)
    def test_c_cuckoo_filter_rmv_miss(self):
        """ test removing from the counting cuckoo filter when not present """
        cko = CountingCuckooFilter()
        cko.add("this is a test")
        self.assertEqual(cko.elements_added, 1)
        cko.add("this is another test")
        self.assertEqual(cko.elements_added, 2)
        cko.add("this is yet another test")
        self.assertEqual(cko.elements_added, 3)

        res = cko.remove("this is still a test")
        self.assertEqual(cko.elements_added, 3)
        self.assertTrue(cko.check("this is a test"))
        self.assertTrue(cko.check("this is another test"))
        self.assertTrue(cko.check("this is yet another test"))
    def test_c_cuckoo_filter_l_fact(self):
        """ test the load factor of the counting cuckoo filter """
        cko = CountingCuckooFilter(capacity=100, bucket_size=2, max_swaps=10)
        self.assertEqual(cko.load_factor(), 0.0)
        for i in range(50):
        self.assertEqual(cko.load_factor(), 0.25)
        for i in range(50):
            cko.add(str(i + 50))

        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
            self.assertEqual(cko.load_factor(), 0.50)

        for i in range(100):
        if cko.capacity == 200:  # self expanded
            self.assertEqual(cko.load_factor(), 0.25)
            self.assertEqual(cko.load_factor(), 0.50)
 def runner():
     ''' runner '''
     cko = CountingCuckooFilter(capacity=100, bucket_size=2,
                                max_swaps=100, auto_expand=False)
     for i in range(175):
 def test_c_cuckoo_filter_bin(self):
     ''' test the cuckoo bin repr '''
     cko = CountingCuckooFilter(capacity=1, bucket_size=2, max_swaps=100)
     cko.add('this is a test')
     self.assertEqual('[(fingerprint:3057276164 count:1)]',