def test_cuckoo_insert_no_duplicates(): """ Ensures the cuckoo filter can insert items we should be able to, when using insert_no_duplicates""" cuckooFilter = cuckoo_filter.CuckooFilter(10, 8, 2, 500) assert cuckooFilter.insert_no_duplicates("GCGTTT") == True assert cuckooFilter.insert_no_duplicates("GCGTTT") == False
def test_cuckoo_insert_no_duplicates_full_bucket(): cuckooFilter = cuckoo_filter.CuckooFilter(10, 8, 1, 500) assert cuckooFilter.insert_no_duplicates('ABC') assert not cuckooFilter.insert_no_duplicates('ABC')
def test_delete(): """ Ensures you can delete strings present in filter and cannot delete those not in filter """ cuckooFilter = cuckoo_filter.CuckooFilter(10, 8, 1, 500) cuckooFilter.insert("GCGTTT") assert cuckooFilter.delete("GCGTTT") == True assert cuckooFilter.delete("GCGTTT") == False
def test_contains(): """ Ensures you can find inserted strings and not find strings not inserted""" cuckooFilter = cuckoo_filter.CuckooFilter(10, 8, 1, 500) cuckooFilter.insert("GCGTTT") assert cuckooFilter.contains("GCGTTT") == True assert cuckooFilter.contains("TTT") == False
def test_insert(): """ Ensures the cuckoo filter can insert items we should be able to """ cuckooFilter = cuckoo_filter.CuckooFilter(10, 8, 1, 500) assert cuckooFilter.insert("GCGTTT") == True assert cuckooFilter.insert("GCGTTT") == True
def test_construction(): """ Ensures the cuckoo filter is constructed as we would expect """ cuckooFilter = cuckoo_filter.CuckooFilter(10, 8, 1, 500) assert cuckooFilter.num_buckets == 10 assert cuckooFilter.fp_size == 8
def create_cuckoo_filter(sketch_config, filter_stats): global cuckooFilter insertion_tput_records = [] # print("Creating the sketch. This might take a while ...") if sketch_config.auto: sketch_config.num_buckets, sketch_config.fp_size, sketch_config.bucket_size = cuckoo_filter.get_cuckoo_filter_params( sketch_config.expected_items, sketch_config.fp_prob) if sketch_config.stash != 0: cuckooFilter = cuckoo_filter.CuckooFilterStash( sketch_config.num_buckets, sketch_config.fp_size, sketch_config.bucket_size, sketch_config.max_iter, sketch_config.stash) if sketch_config.bitarray_variant: cuckooFilter = cuckoo_filter.CuckooFilterBit(sketch_config.num_buckets, sketch_config.fp_size, sketch_config.bucket_size, sketch_config.max_iter) else: cuckooFilter = cuckoo_filter.CuckooFilter(sketch_config.num_buckets, sketch_config.fp_size, sketch_config.bucket_size, sketch_config.max_iter) items = 0 load_factor_step_size = (cuckooFilter.num_buckets * cuckooFilter.bucket_size) / 10 step = 1 start = time.time() if sketch_config.k == 0: t1 = time.time() for read in read_list: if cuckooFilter.insert(read.line) == False: break items += 1 if items >= load_factor_step_size * step: insertion_tput_records.append(load_factor_step_size / (time.time() - t1)) step += 1 t1 = time.time() else: failed = False r = 0 t1 = time.time() while not failed and r < len(read_list): for i in range(len(read_list[r].line) - sketch_config.k): if cuckooFilter.insert( read_list[r].line[i:i + sketch_config.k]) == False: failed = True break items += 1 if items >= load_factor_step_size * step: insertion_tput_records.append(load_factor_step_size / (time.time() - t1)) step += 1 t1 = time.time() r += 1 end = time.time() filter_stats["items"] = items filter_stats["constr_speed"] = items / (end - start) filter_stats["load_factor"] = items / (cuckooFilter.num_buckets * cuckooFilter.bucket_size) filter_stats["total_size"] = cuckooFilter.get_size() filter_stats["bpi"] = (filter_stats["total_size"] / items) * 8 filter_stats["insertion_tput"] = insertion_tput_records