예제 #1
0
 def default_alru_params():
     alru_params = FlushParametersAlru()
     alru_params.activity_threshold = Time(milliseconds=10000)
     alru_params.flush_max_buffers = 100
     alru_params.staleness_time = Time(seconds=120)
     alru_params.wake_up_time = Time(seconds=20)
     return alru_params
예제 #2
0
    def __init__(self, device_name: str, raw_stats: list = None):
        metrics_number = 13
        if raw_stats is None:
            raw_stats = [0] * metrics_number
        if len(raw_stats) < metrics_number:
            raw_stats.extend([0] * metrics_number)

        self.device_name = device_name
        # rrqm/s
        self.read_requests_merged_per_sec = float(raw_stats[0])
        # wrqm/s
        self.write_requests_merged_per_sec = float(raw_stats[1])
        # r/s
        self.read_requests_per_sec = float(raw_stats[2])
        # w/s
        self.write_requests_per_sec = float(raw_stats[3])
        # rkB/s
        self.reads_per_sec = Size(float(raw_stats[4]), UnitPerSecond(Unit.KiloByte))
        # wkB/s
        self.writes_per_sec = Size(float(raw_stats[5]), UnitPerSecond(Unit.KiloByte))
        # avgrq-sz
        self.average_request_size = float(raw_stats[6])  # `in sectors`
        # avgqu-sz
        self.average_queue_length = float(raw_stats[7])
        # await
        self.average_service_time = Time(milliseconds=float(raw_stats[8]))
        # r_await
        self.read_average_service_time = Time(milliseconds=float(raw_stats[9]))
        # w_await
        self.write_average_service_time = Time(milliseconds=float(raw_stats[10]))
        # iostat's documentation says to not trust 11th field
        # util
        self.utilization = float(raw_stats[12])
예제 #3
0
 def __init__(self, trace):
     self.average = Time(nanoseconds=int(trace["average"]))
     self.min = Time(nanoseconds=int(trace["min"]))
     self.max = Time(nanoseconds=int(trace["max"]))
     self.total = Time(nanoseconds=int(trace["total"]))
     self.percentiles = Percentiles(trace["percentiles"],
                                    "of reqs have latency lower than")
예제 #4
0
def prepare():
    cache_dev = TestRun.disks["cache"]
    core_dev = TestRun.disks["core"]

    cache_dev.create_partitions([Size(100, Unit.MiB)])
    core_dev.create_partitions([Size(200, Unit.MiB)])

    Udev.disable()
    cache = casadm.start_cache(cache_dev.partitions[0], force=True, cache_mode=CacheMode.WB)
    core = cache.add_core(core_dev.partitions[0])
    cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)
    cache.set_cleaning_policy(CleaningPolicy.alru)
    cache.set_params_alru(
        FlushParametersAlru(
            activity_threshold=Time(seconds=100),
            staleness_time=Time(seconds=1),
        )
    )

    return cache, core
예제 #5
0
 def read_runtime(self):
     return Time(microseconds=self.job.read.runtime)
예제 #6
0
 def trim_completion_latency_average(self):
     return Time(nanoseconds=self.job.trim.lat_ns.mean)
예제 #7
0
 def trim_completion_latency_max(self):
     return Time(nanoseconds=self.job.trim.lat_ns.max)
예제 #8
0
 def trim_runtime(self):
     return Time(microseconds=self.job.trim.runtime)
예제 #9
0
 def write_completion_latency_average(self):
     return Time(nanoseconds=self.job.write.lat_ns.mean)
예제 #10
0
def test_acp_param_flush_max_buffers(cache_line_size, cache_mode):
    """
        title: Functional test for ACP flush-max-buffers parameter.
        description: |
          Verify if there is appropriate number of I/O requests between wake-up time intervals,
          which depends on flush-max-buffer parameter.
        pass_criteria:
          - ACP triggered dirty data flush
          - Number of writes to core is lower or equal than flush_max_buffers
    """
    with TestRun.step("Test prepare."):
        buffer_values = get_random_list(
            min_val=FlushParametersAcp.acp_params_range().flush_max_buffers[0],
            max_val=FlushParametersAcp.acp_params_range().flush_max_buffers[1],
            n=10,
        )

        default_config = FlushParametersAcp.default_acp_params()
        acp_configs = [
            FlushParametersAcp(flush_max_buffers=buf,
                               wake_up_time=Time(seconds=1))
            for buf in buffer_values
        ]
        acp_configs.append(default_config)

    with TestRun.step("Prepare partitions."):
        core_size = Size(5, Unit.GibiByte)
        cache_device = TestRun.disks["cache"]
        core_device = TestRun.disks["core"]
        cache_device.create_partitions([Size(10, Unit.GibiByte)])
        core_device.create_partitions([core_size])

    with TestRun.step(
            f"Start cache in {cache_mode} with {cache_line_size} and add core."
    ):
        cache = casadm.start_cache(cache_device.partitions[0], cache_mode,
                                   cache_line_size)
        core = cache.add_core(core_device.partitions[0])

    with TestRun.step("Set cleaning policy to NOP."):
        cache.set_cleaning_policy(CleaningPolicy.nop)

    with TestRun.step("Start IO in background."):
        fio = get_fio_cmd(core, core_size)
        fio.run_in_background()
        time.sleep(10)

    with TestRun.step("Set cleaning policy to ACP."):
        cache.set_cleaning_policy(CleaningPolicy.acp)

    with TestRun.group(
            "Verify IO number for different max_flush_buffers values."):
        for acp_config in acp_configs:
            with TestRun.step(f"Setting {acp_config}"):
                cache.set_params_acp(acp_config)

            with TestRun.step(
                    "Using blktrace verify if there is appropriate number of I/O requests, "
                    "which depends on flush-max-buffer parameter."):
                blktrace = BlkTrace(core.core_device, BlkTraceMask.write)
                blktrace.start_monitoring()
                time.sleep(20)
                blktrace_output = blktrace.stop_monitoring()

                cleaning_started = False
                flush_writes = 0
                for (prev, curr) in zip(blktrace_output, blktrace_output[1:]):
                    if cleaning_started and write_to_core(prev):
                        flush_writes += 1
                    if new_acp_iteration(prev, curr):
                        if cleaning_started:
                            if flush_writes <= acp_config.flush_max_buffers:
                                flush_writes = 0
                            else:
                                TestRun.LOGGER.error(
                                    f"Incorrect number of handled io requests. "
                                    f"Expected {acp_config.flush_max_buffers} - "
                                    f"actual {flush_writes}")
                                flush_writes = 0

                        cleaning_started = True

                if not cleaning_started:
                    TestRun.fail(f"ACP flush not triggered for {acp_config}")

    with TestRun.step("Stop all caches"):
        kill_all_io()
        casadm.stop_all_caches()
예제 #11
0
def test_recovery_all_options(cache_mode, cache_line_size, cleaning_policy,
                              filesystem):
    """
        title: Test for recovery after reset with various cache options.
        description: Verify that unflushed data can be safely recovered after reset.
        pass_criteria:
          - CAS recovers successfully after reboot
          - No data corruption
    """
    with TestRun.step("Prepare cache and core devices."):
        cache_disk = TestRun.disks['cache']
        core_disk = TestRun.disks['core']
        cache_disk.create_partitions([Size(200, Unit.MebiByte)])
        core_disk.create_partitions([Size(2000, Unit.MebiByte)] * 2)
        cache_device = cache_disk.partitions[0]
        core_device = core_disk.partitions[0]

        test_file = File(os.path.join(mount_point, filename))
        file_operation(test_file.full_path, pattern, ReadWrite.write)
        file_md5 = test_file.md5sum()

    with TestRun.step(f"Make {filesystem} on core device."):
        core_device.create_filesystem(filesystem)

    with TestRun.step("Mount core device."):
        core_device.mount(mount_point)
        file_operation(test_file.full_path, other_pattern, ReadWrite.write)
        os_utils.drop_caches(DropCachesMode.ALL)

    with TestRun.step("Unmount core device."):
        core_device.unmount()

    with TestRun.step(
            f"Start cache in {cache_mode.name} with given configuration."):
        cache = casadm.start_cache(cache_device,
                                   cache_mode,
                                   cache_line_size,
                                   force=True)
        cache.set_cleaning_policy(cleaning_policy)
        if cleaning_policy == CleaningPolicy.acp:
            cache.set_params_acp(
                FlushParametersAcp(wake_up_time=Time(seconds=1)))

    with TestRun.step("Add core."):
        core = cache.add_core(core_device)

    with TestRun.step("Mount CAS device."):
        core.mount(mount_point)
        file_operation(test_file.full_path, pattern, ReadWrite.write)

    with TestRun.step(
            "Change cache mode to Write-Through without flush option."):
        cache.set_cache_mode(CacheMode.WT, flush=False)

    with TestRun.step("Reset platform."):
        os_utils.sync()
        core.unmount()
        TestRun.LOGGER.info(
            f"Number of dirty blocks in cache: {cache.get_dirty_blocks()}")
        power_cycle_dut()

    with TestRun.step("Try to start cache without load and force option."):
        try:
            casadm.start_cache(cache_device, cache_mode, cache_line_size)
            TestRun.fail("Cache started without load or force option.")
        except Exception:
            TestRun.LOGGER.info(
                "Cache did not start without load and force option.")

    with TestRun.step("Load cache and stop it with flush."):
        cache = casadm.load_cache(cache_device)
        cache.stop()

    with TestRun.step("Check md5sum of tested file on core device."):
        core_device.mount(mount_point)
        cas_md5 = test_file.md5sum()
        core_device.unmount()
        if cas_md5 == file_md5:
            TestRun.LOGGER.info(
                "Source and target file checksums are identical.")
        else:
            TestRun.fail("Source and target file checksums are different.")
예제 #12
0
 def default_acp_params():
     acp_params = FlushParametersAcp()
     acp_params.flush_max_buffers = 128
     acp_params.wake_up_time = Time(milliseconds=10)
     return acp_params
예제 #13
0
def test_flush_inactive_devices():
    """
        title: Negative test for flushing inactive CAS devices.
        description: Validate that CAS prevents flushing dirty data from inactive CAS devices.
        pass_criteria:
          - No kernel error
          - Exported object appears after plugging core device
          - Flushing inactive CAS devices is possible neither by cleaning thread,
            nor by calling cleaning methods
    """
    with TestRun.step("Prepare devices."):
        devices = prepare_devices([("cache", 1), ("core1", 1), ("core2", 1)])
        cache_dev = devices["cache"].partitions[0]
        first_core_dev = devices["core1"].partitions[0]
        second_core_dev = devices["core2"].partitions[0]
        plug_device = devices["core1"]

    with TestRun.step("Start cache in WB mode and set alru cleaning policy."):
        cache = casadm.start_cache(cache_dev,
                                   cache_mode=CacheMode.WB,
                                   force=True)
        cache.set_cleaning_policy(CleaningPolicy.alru)
        cache.set_params_alru(
            FlushParametersAlru(staleness_time=Time(seconds=10),
                                wake_up_time=Time(seconds=1),
                                activity_threshold=Time(milliseconds=500)))

    with TestRun.step("Add two cores."):
        first_core = cache.add_core(first_core_dev)
        second_core = cache.add_core(second_core_dev)

    with TestRun.step(
            "Create init config file using running CAS configuration."):
        InitConfig.create_init_config_from_running_configuration()

    with TestRun.step("Run random writes to CAS device."):
        run_fio([first_core.path, second_core.path])

    with TestRun.step("Stop cache without flushing dirty data."):
        cache.stop(no_data_flush=True)

    with TestRun.step("Unplug one core disk."):
        plug_device.unplug()

    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_dev)

    with TestRun.step(
            "Wait longer than required for alru cleaning thread to start and verify "
            "that dirty data is flushed only from active device."):
        dirty_lines_before = {
            first_core: first_core.get_dirty_blocks(),
            second_core: second_core.get_dirty_blocks()
        }
        time.sleep(30)
        check_amount_of_dirty_data(dirty_lines_before)

    with TestRun.step("Try to call 'flush cache' command."):
        dirty_lines_before = {
            first_core: first_core.get_dirty_blocks(),
            second_core: second_core.get_dirty_blocks()
        }
        try:
            cache.flush_cache()
            TestRun.fail(
                "Flush cache operation should be blocked due to inactive cache devices, "
                "but it executed successfully.")
        except Exception as e:
            TestRun.LOGGER.info(
                f"Flush cache operation is blocked as expected.\n{str(e)}")
            check_amount_of_dirty_data(dirty_lines_before)

    with TestRun.step("Try to call 'flush core' command for inactive core."):
        dirty_lines_before = {
            first_core: first_core.get_dirty_blocks(),
            second_core: second_core.get_dirty_blocks()
        }
        try:
            first_core.flush_core()
            TestRun.fail(
                "Flush core operation should be blocked for inactive CAS devices, "
                "but it executed successfully.")
        except Exception as e:
            TestRun.LOGGER.info(
                f"Flush core operation is blocked as expected.\n{str(e)}")
            check_amount_of_dirty_data(dirty_lines_before)

    with TestRun.step(
            "Plug core disk and verify that this change is reflected on the cache list."
    ):
        plug_device.plug()
        time.sleep(1)
        first_core.wait_for_status_change(CoreStatus.active)
        cache_status = cache.get_status()
        if cache_status != CacheStatus.running:
            TestRun.fail(
                f"Cache did not change status to 'running' after plugging core device. "
                f"Actual state: {cache_status}.")

    with TestRun.step("Stop cache."):
        cache.stop()
예제 #14
0
 def read_completion_latency_max(self):
     return Time(nanoseconds=self.job.read.lat_ns.max)
예제 #15
0
 def write_runtime(self):
     return Time(microseconds=self.job.write.runtime)
예제 #16
0
 def read_completion_latency_average(self):
     return Time(nanoseconds=self.job.read.lat_ns.mean)
예제 #17
0
 def write_completion_latency_max(self):
     return Time(nanoseconds=self.job.write.lat_ns.max)
예제 #18
0
def test_acp_param_wake_up_time(cache_line_size, cache_mode):
    """
        title: Functional test for ACP wake-up parameter.
        description: |
          Verify if interval between ACP cleaning iterations is not longer than
          wake-up time parameter value.
        pass_criteria:
          - ACP flush iterations are triggered with defined frequency.
    """
    with TestRun.step("Test prepare."):
        error_threshold_ms = 50
        generated_vals = get_random_list(
            min_val=FlushParametersAcp.acp_params_range().wake_up_time[0],
            max_val=FlushParametersAcp.acp_params_range().wake_up_time[1],
            n=10,
        )
        acp_configs = []
        for config in generated_vals:
            acp_configs.append(
                FlushParametersAcp(wake_up_time=Time(milliseconds=config)))
        acp_configs.append(FlushParametersAcp.default_acp_params())

    with TestRun.step("Prepare partitions."):
        core_size = Size(5, Unit.GibiByte)
        cache_device = TestRun.disks["cache"]
        core_device = TestRun.disks["core"]
        cache_device.create_partitions([Size(10, Unit.GibiByte)])
        core_device.create_partitions([core_size])

    with TestRun.step(
            f"Start cache in {cache_mode} with {cache_line_size} and add core."
    ):
        cache = casadm.start_cache(cache_device.partitions[0], cache_mode,
                                   cache_line_size)
        core = cache.add_core(core_device.partitions[0])

    with TestRun.step("Set cleaning policy to NOP."):
        cache.set_cleaning_policy(CleaningPolicy.nop)

    with TestRun.step("Start IO in background."):
        fio = get_fio_cmd(core, core_size)
        fio.run_in_background()
        time.sleep(10)

    with TestRun.step("Set cleaning policy to ACP."):
        cache.set_cleaning_policy(CleaningPolicy.acp)

    with TestRun.group("Verify IO number for different wake_up_time values."):
        for acp_config in acp_configs:
            with TestRun.step(f"Setting {acp_config}"):
                cache.set_params_acp(acp_config)
                accepted_interval_threshold = (
                    acp_config.wake_up_time.total_milliseconds() +
                    error_threshold_ms)
            with TestRun.step(
                    "Using blktrace verify if interval between ACP cleaning iterations "
                    f"is shorter or equal than wake-up parameter value "
                    f"(including {error_threshold_ms}ms error threshold)"):
                blktrace = BlkTrace(core.core_device, BlkTraceMask.write)
                blktrace.start_monitoring()
                time.sleep(15)
                blktrace_output = blktrace.stop_monitoring()

                for (prev, curr) in zip(blktrace_output, blktrace_output[1:]):
                    if not new_acp_iteration(prev, curr):
                        continue

                    interval_ms = (curr.timestamp - prev.timestamp) / 10**6

                    if interval_ms > accepted_interval_threshold:
                        TestRun.LOGGER.error(
                            f"{interval_ms} is not within accepted range for "
                            f"{acp_config.wake_up_time.total_milliseconds()} "
                            f"wake_up_time param value.")

    with TestRun.step("Stop all caches"):
        kill_all_io()
        casadm.stop_all_caches()
예제 #19
0
def test_alru_no_idle():
    """
        title: Test ALRU with activity threshold set to 0
        description: |
          Verify that ALRU is able to perform cleaning if cache is under constant load and
          activity threshold is set to 0. Constant load is performed by using fio instance running
          in background.
        pass_criteria:
          - Dirty cache lines are cleaned successfuly.
    """

    with TestRun.step("Prepare configuration"):
        cache, core = prepare()

    with TestRun.step("Prepare dirty data to be cleaned"):
        bg_size = Size(2, Unit.MiB)
        (
            Fio()
            .create_command()
            .io_engine(IoEngine.libaio)
            .offset(bg_size)
            .size(Size(10, Unit.MiB))
            .block_size(Size(4, Unit.KiB))
            .target(core)
            .direct()
            .read_write(ReadWrite.randwrite)
            .run()
        )

    with TestRun.step("Run background fio"):
        (
            Fio()
            .create_command()
            .io_engine(IoEngine.libaio)
            .size(bg_size)
            .block_size(Size(4, Unit.KiB))
            .target(core)
            .direct()
            .time_based(True)
            .run_time(timedelta(hours=1))
            .read_write(ReadWrite.randwrite)
            .run_in_background()
        )

    with TestRun.step("Verify that cache is dirty"):
        # Wait for bg fio to dirty whole workset
        time.sleep(5)
        dirty_before = cache.get_statistics().usage_stats.dirty

        if dirty_before == Size(0):
            TestRun.fail("Cache should be dirty")

    with TestRun.step("Check that cleaning doesn't occur under constant load"):
        time.sleep(5)

        dirty_now = cache.get_statistics().usage_stats.dirty

        if dirty_before > dirty_now:
            TestRun.fail(
                f"Cleaning has run, while it shouldn't"
                " (dirty down from {dirty_before} to {dirty_now}"
            )

    with TestRun.step("Set 0 idle time and wake up time for ALRU"):
        cache.set_params_alru(FlushParametersAlru(activity_threshold=Time(0), wake_up_time=Time(0)))

    with TestRun.step("Check that cleaning is progressing"):
        time.sleep(5)

        if dirty_before <= cache.get_statistics().usage_stats.dirty:
            TestRun.fail("Cleaning didn't run")

    kill_all_io()