def test_trace_cache():
    print("Test trace cache")
    cache = TraceCache(0)
    k = TraceRecord(
        access_time=0,
        block_id=1,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=0,
        is_hit=1,
        referenced_key_exist_in_block=1,
        num_keys_in_block=0,
        table_id=0,
        seq_number=0,
        block_key_size=0,
        key_size=0,
        block_offset_in_file=0,
        next_access_seq_no=7,
    )
    cache.access(k)
    assert cache.miss_ratio_stats.num_accesses == 1
    assert cache.miss_ratio_stats.num_misses == 0
    k.is_hit = 0
    cache.access(k)
    assert cache.miss_ratio_stats.num_accesses == 2
    assert cache.miss_ratio_stats.num_misses == 1
    print("Test trace cache: Success")
def test_mix(cache):
    print("Test Mix {} cache".format(cache.cache_name()))
    n = 100000
    records = 199
    for i in range(n):
        key_id = random.randint(0, records)
        vs = random.randint(0, 10)
        k = TraceRecord(
            access_time=i,
            block_id=key_id,
            block_type=1,
            block_size=vs,
            cf_id=0,
            cf_name="",
            level=0,
            fd=0,
            caller=1,
            no_insert=0,
            get_id=key_id,
            key_id=key_id,
            kv_size=5,
            is_hit=1,
        )
        cache.access(k)
    assert cache.miss_ratio_stats.miss_ratio() > 0
    print("Test Mix {} cache: Success".format(cache.cache_name()))
def test_mix(cache):
    print("Test Mix {} cache".format(cache.cache_name()))
    n = 100000
    records = 100
    block_size_table = {}
    trace_num_misses = 0
    for i in range(n):
        key_id = random.randint(0, records)
        vs = random.randint(0, 10)
        now = i * kMicrosInSecond
        block_size = vs
        if key_id in block_size_table:
            block_size = block_size_table[key_id]
        else:
            block_size_table[key_id] = block_size
        is_hit = key_id % 2
        if is_hit == 0:
            trace_num_misses += 1
        k = TraceRecord(
            access_time=now,
            block_id=key_id,
            block_type=1,
            block_size=block_size,
            cf_id=0,
            cf_name="",
            level=0,
            fd=0,
            caller=1,
            no_insert=0,
            get_id=key_id,
            key_id=key_id,
            kv_size=5,
            is_hit=is_hit,
            referenced_key_exist_in_block=1,
            num_keys_in_block=0,
            table_id=0,
            seq_number=0,
            block_key_size=0,
            key_size=0,
            block_offset_in_file=0,
            next_access_seq_no=vs,
        )
        cache.access(k)
    assert cache.miss_ratio_stats.miss_ratio() > 0
    if cache.cache_name() == "Trace":
        assert cache.miss_ratio_stats.num_accesses == n
        assert cache.miss_ratio_stats.num_misses == trace_num_misses
    else:
        assert cache.used_size <= cache.cache_size
        all_values = cache.table.values()
        cached_size = 0
        for value in all_values:
            cached_size += value.value_size
        assert cached_size == cache.used_size, "Expeced {} Actual {}".format(
            cache.used_size, cached_size
        )
    print("Test Mix {} cache: Success".format(cache.cache_name()))
def test_cache(policies, expected_value):
    cache = ThompsonSamplingCache(3, False, policies)
    k1 = TraceRecord(
        access_time=0,
        block_id=1,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
    )
    k2 = TraceRecord(
        access_time=1,
        block_id=2,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
    )
    k3 = TraceRecord(
        access_time=2,
        block_id=3,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
    )
    k4 = TraceRecord(
        access_time=3,
        block_id=4,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
    )
    sequence = [k1, k1, k2, k3, k3, k3]
    index = 0
    expected_values = []
    # Access k1, miss.
    expected_values.append([1, 1, 1, [1], []])
    # Access k1, hit.
    expected_values.append([1, 2, 1, [1], []])
    # Access k2, miss.
    expected_values.append([2, 3, 2, [1, 2], []])
    # Access k3, miss.
    expected_values.append([3, 4, 3, [1, 2, 3], []])
    # Access k3, hit.
    expected_values.append([3, 5, 3, [1, 2, 3], []])
    # Access k3, hit.
    expected_values.append([3, 6, 3, [1, 2, 3], []])
    for access in sequence:
        cache.access(access)
        assert_metrics(cache, expected_values[index])
        index += 1
    cache.access(k4)
    assert_metrics(cache, expected_value)
def test_hybrid(cache):
    print("Test {} cache".format(cache.cache_name()))
    k = TraceRecord(
        access_time=0,
        block_id=1,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,  # the first get request.
        key_id=1,
        kv_size=0,  # no size.
        is_hit=1,
    )
    cache.access(k)  # Expect a miss.
    # used size, num accesses, num misses, hash table size, blocks, get keys.
    assert_metrics(cache, [1, 1, 1, [1], []])
    k.access_time += 1
    k.kv_size = 1
    k.block_id = 2
    cache.access(k)  # k should be inserted.
    assert_metrics(cache, [3, 2, 2, [1, 2], [1]])
    k.access_time += 1
    k.block_id = 3
    cache.access(k)  # k should not be inserted again.
    assert_metrics(cache, [4, 3, 3, [1, 2, 3], [1]])
    # A second get request referencing the same key.
    k.access_time += 1
    k.get_id = 2
    k.block_id = 4
    k.kv_size = 0
    cache.access(k)  # k should observe a hit. No block access.
    assert_metrics(cache, [4, 4, 3, [1, 2, 3], [1]])

    # A third get request searches three files, three different keys.
    # And the second key observes a hit.
    k.access_time += 1
    k.kv_size = 1
    k.get_id = 3
    k.block_id = 3
    k.key_id = 2
    cache.access(k)  # k should observe a miss. block 3 observes a hit.
    assert_metrics(cache, [5, 5, 3, [1, 2, 3], [1, 2]])

    k.access_time += 1
    k.kv_size = 1
    k.get_id = 3
    k.block_id = 4
    k.kv_size = 1
    k.key_id = 1
    cache.access(k)  # k1 should observe a hit.
    assert_metrics(cache, [5, 6, 3, [1, 2, 3], [1, 2]])

    k.access_time += 1
    k.kv_size = 1
    k.get_id = 3
    k.block_id = 4
    k.kv_size = 1
    k.key_id = 3
    # k3 should observe a miss.
    # However, as the get already complete, we should not access k3 any more.
    cache.access(k)
    assert_metrics(cache, [5, 7, 3, [1, 2, 3], [1, 2]])

    # A fourth get request searches one file and two blocks. One row key.
    k.access_time += 1
    k.get_id = 4
    k.block_id = 5
    k.key_id = 4
    k.kv_size = 1
    cache.access(k)
    assert_metrics(cache, [7, 8, 4, [1, 2, 3, 5], [1, 2, 4]])

    # A bunch of insertions which evict cached row keys.
    for i in range(6, 100):
        k.access_time += 1
        k.get_id = 0
        k.block_id = i
        cache.access(k)

    k.get_id = 4
    k.block_id = 100  # A different block.
    k.key_id = 4  # Same row key and should not be inserted again.
    k.kv_size = 1
    cache.access(k)
    assert_metrics(cache, [16, 103, 99, [i for i in range(101 - kSampleSize, 101)], []])
    print("Test {} cache: Success".format(cache.cache_name()))
def test_opt_cache():
    print("Test OPT cache")
    cache = OPTCache(3)
    # seq:         0,  1,  2,  3,  4,  5,  6,  7,  8
    # key:         k1, k2, k3, k4, k5, k6, k7, k1, k8
    # next_access: 7,  19, 18, M,  M,  17, 16, 25, M
    k = TraceRecord(
        access_time=0,
        block_id=1,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,  # the first get request.
        key_id=1,
        kv_size=0,  # no size.
        is_hit=1,
        referenced_key_exist_in_block=1,
        num_keys_in_block=0,
        table_id=0,
        seq_number=0,
        block_key_size=0,
        key_size=0,
        block_offset_in_file=0,
        next_access_seq_no=7,
    )
    cache.access(k)
    assert_metrics(
        cache, [1, 1, 1, [1], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 2
    k.next_access_seq_no = 19
    cache.access(k)
    assert_metrics(
        cache, [2, 2, 2, [1, 2], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 3
    k.next_access_seq_no = 18
    cache.access(k)
    assert_metrics(
        cache, [3, 3, 3, [1, 2, 3], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 4
    k.next_access_seq_no = sys.maxsize  # Never accessed again.
    cache.access(k)
    # Evict 2 since its next access 19 is the furthest in the future.
    assert_metrics(
        cache, [3, 4, 4, [1, 3, 4], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 5
    k.next_access_seq_no = sys.maxsize  # Never accessed again.
    cache.access(k)
    # Evict 4 since its next access MAXINT is the furthest in the future.
    assert_metrics(
        cache, [3, 5, 5, [1, 3, 5], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 6
    k.next_access_seq_no = 17
    cache.access(k)
    # Evict 5 since its next access MAXINT is the furthest in the future.
    assert_metrics(
        cache, [3, 6, 6, [1, 3, 6], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 7
    k.next_access_seq_no = 16
    cache.access(k)
    # Evict 3 since its next access 18 is the furthest in the future.
    assert_metrics(
        cache, [3, 7, 7, [1, 6, 7], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 1
    k.next_access_seq_no = 25
    cache.access(k)
    assert_metrics(
        cache, [3, 8, 7, [1, 6, 7], []], expected_value_size=1, custom_hashtable=False
    )
    k.access_time += 1
    k.block_id = 8
    k.next_access_seq_no = sys.maxsize
    cache.access(k)
    # Evict 1 since its next access 25 is the furthest in the future.
    assert_metrics(
        cache, [3, 9, 8, [6, 7, 8], []], expected_value_size=1, custom_hashtable=False
    )

    # Insert a large kv pair to evict all keys.
    k.access_time += 1
    k.block_id = 10
    k.block_size = 3
    k.next_access_seq_no = sys.maxsize
    cache.access(k)
    assert_metrics(
        cache, [3, 10, 9, [10], []], expected_value_size=3, custom_hashtable=False
    )
    print("Test OPT cache: Success")
def test_cache(cache, expected_value, custom_hashtable=True):
    k1 = TraceRecord(
        access_time=0,
        block_id=1,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
        referenced_key_exist_in_block=1,
        num_keys_in_block=0,
        table_id=0,
        seq_number=0,
        block_key_size=0,
        key_size=0,
        block_offset_in_file=0,
        next_access_seq_no=0,
    )
    k2 = TraceRecord(
        access_time=1,
        block_id=2,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
        referenced_key_exist_in_block=1,
        num_keys_in_block=0,
        table_id=0,
        seq_number=0,
        block_key_size=0,
        key_size=0,
        block_offset_in_file=0,
        next_access_seq_no=0,
    )
    k3 = TraceRecord(
        access_time=2,
        block_id=3,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
        referenced_key_exist_in_block=1,
        num_keys_in_block=0,
        table_id=0,
        seq_number=0,
        block_key_size=0,
        key_size=0,
        block_offset_in_file=0,
        next_access_seq_no=0,
    )
    k4 = TraceRecord(
        access_time=3,
        block_id=4,
        block_type=1,
        block_size=1,
        cf_id=0,
        cf_name="",
        level=0,
        fd=0,
        caller=1,
        no_insert=0,
        get_id=1,
        key_id=1,
        kv_size=5,
        is_hit=1,
        referenced_key_exist_in_block=1,
        num_keys_in_block=0,
        table_id=0,
        seq_number=0,
        block_key_size=0,
        key_size=0,
        block_offset_in_file=0,
        next_access_seq_no=0,
    )
    sequence = [k1, k1, k2, k3, k3, k3]
    index = 0
    expected_values = []
    # Access k1, miss.
    expected_values.append([1, 1, 1, [1], []])
    # Access k1, hit.
    expected_values.append([1, 2, 1, [1], []])
    # Access k2, miss.
    expected_values.append([2, 3, 2, [1, 2], []])
    # Access k3, miss.
    expected_values.append([3, 4, 3, [1, 2, 3], []])
    # Access k3, hit.
    expected_values.append([3, 5, 3, [1, 2, 3], []])
    # Access k3, hit.
    expected_values.append([3, 6, 3, [1, 2, 3], []])
    access_time = 0
    for access in sequence:
        access.access_time = access_time
        cache.access(access)
        assert_metrics(
            cache,
            expected_values[index],
            expected_value_size=1,
            custom_hashtable=custom_hashtable,
        )
        access_time += 1
        index += 1
    k4.access_time = access_time
    cache.access(k4)
    assert_metrics(
        cache, expected_value, expected_value_size=1, custom_hashtable=custom_hashtable
    )