Exemple #1
0
 def block_size(self, value: size.Size):
     return self.set_param('bs', int(value.get_value()))
 def write_bandwidth(self):
     return Size(self.job.write.bw, UnitPerSecond(Unit.KibiByte))
 def write_bandwidth_deviation(self):
     return Size(self.job.write.bw_dev, UnitPerSecond(Unit.KibiByte))
 def read_bandwidth(self):
     return Size(self.job.read.bw, UnitPerSecond(Unit.KibiByte))
 def read_bandwidth_deviation(self):
     return Size(self.job.read.bw_dev, UnitPerSecond(Unit.KibiByte))
 def trim_bandwidth(self):
     return Size(self.job.trim.bw, UnitPerSecond(Unit.KibiByte))
 def trim_bandwidth_deviation(self):
     return Size(self.job.trim.bw_dev, UnitPerSecond(Unit.KibiByte))
Exemple #8
0
def test_preserve_data_for_inactive_device():
    """
        title: Validate preserving data for inactive CAS devices.
        description: Validate that cached data for inactive CAS devices is preserved.
        pass_criteria:
          - No kernel error
          - File md5 checksums match in every iteration.
          - Cache read hits increase after reads (md5 checksum) from CAS device with attached core.
    """
    mount_dir = "/mnt/test"
    with TestRun.step("Prepare devices."):
        devices = prepare_devices([("cache", 1), ("core", 1)])
        cache_dev = devices["cache"].partitions[0]
        core_dev = devices["core"].partitions[0]
        plug_device = devices["core"]
    with TestRun.step("Start cache and add core."):
        cache = casadm.start_cache(cache_dev,
                                   cache_mode=CacheMode.WB,
                                   force=True)
        cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)
        cache.set_cleaning_policy(CleaningPolicy.nop)
        core = cache.add_core(core_dev)
    with TestRun.step(
            "Create init config file using current CAS configuration."):
        InitConfig.create_init_config_from_running_configuration()
    with TestRun.step("Create filesystem on CAS device and mount it."):
        core.create_filesystem(Filesystem.ext3)
        core.mount(mount_dir)
    with TestRun.step(
            "Create a test file with random writes on mount point and count it's md5."
    ):
        file_path = f"{mount_dir}/test_file"
        test_file = File.create_file(file_path)
        dd = Dd().input("/dev/random") \
            .output(file_path) \
            .count(100) \
            .block_size(Size(1, Unit.Blocks512))
        dd.run()
        os_utils.sync()
        md5_after_create = test_file.md5sum()
        cache_stats_before_stop = cache.get_statistics()
        core_stats_before_stop = core.get_statistics()
    with TestRun.step("Unmount CAS device."):
        core.unmount()
    with TestRun.step("Stop cache without flushing dirty data."):
        cache.stop(no_data_flush=True)
    with TestRun.step("Unplug core device."):
        plug_device.unplug()
    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_dev)
        cache_stats_after_load = cache.get_statistics()
        core_stats_after_load = core.get_statistics()
        if cache_stats_before_stop.usage_stats.clean != cache_stats_after_load.usage_stats.clean or\
                cache_stats_before_stop.usage_stats.dirty != \
                cache_stats_after_load.usage_stats.dirty or\
                core_stats_before_stop.usage_stats.clean != \
                core_stats_after_load.usage_stats.clean or\
                core_stats_before_stop.usage_stats.dirty != core_stats_after_load.usage_stats.dirty:
            TestRun.fail(
                f"Statistics after counting md5 are different than after cache load.\n"
                f"Cache stats before: {cache_stats_before_stop}\n"
                f"Cache stats after: {cache_stats_after_load}\n"
                f"Core stats before: {core_stats_before_stop}\n"
                f"Core stats after: {core_stats_after_load}")
    with TestRun.step(
            "Plug core disk using sysfs and verify this change is reflected "
            "on the cache list."):
        plug_device.plug()
        if cache.get_status() != CacheStatus.running or core.get_status(
        ) != CoreStatus.active:
            TestRun.fail(
                f"Expected cache status is running (actual - {cache.get_status()}).\n"
                f"Expected core status is active (actual - {core.get_status()})."
            )
    with TestRun.step("Mount CAS device"):
        core.mount(mount_dir)
    with TestRun.step(
            "Count md5 checksum for test file and compare it with previous value."
    ):
        cache_read_hits_before_md5 = cache.get_statistics(
        ).request_stats.read.hits
        md5_after_cache_load = test_file.md5sum()
        if md5_after_create != md5_after_cache_load:
            TestRun.fail(
                "Md5 checksum after cache load operation is different than before "
                "stopping cache.")
        else:
            TestRun.LOGGER.info(
                "Md5 checksum is identical before and after cache load operation "
                "with inactive CAS device.")
    with TestRun.step(
            "Verify that cache read hits increased after counting md5 checksum."
    ):
        cache_read_hits_after_md5 = cache.get_statistics(
        ).request_stats.read.hits
        if cache_read_hits_after_md5 - cache_read_hits_before_md5 < 0:
            TestRun.fail(
                f"Cache read hits did not increase after counting md5 checksum. "
                f"Before: {cache_read_hits_before_md5}. "
                f"After: {cache_read_hits_after_md5}.")
        else:
            TestRun.LOGGER.info("Cache read hits increased as expected.")
    with TestRun.step("Unmount CAS device and stop cache."):
        core.unmount()
        cache.stop()
Exemple #9
0
def test_remove_inactive_devices():
    """
        title: Validate removing inactive CAS devices.
        description: |
          Validate that it is possible to remove inactive CAS devices when there are no dirty
          cache lines associated with them and that removing CAS devices is prevented otherwise
          (unless ‘force’ option is used).
        pass_criteria:
          - No kernel error
          - Removing CAS devices without dirty data is successful.
          - Removing CAS devices with dirty data without ‘force’ option is blocked.
          - Removing CAS devices with dirty data with ‘force’ option is successful.
    """
    with TestRun.step("Prepare devices."):
        devices = prepare_devices([("cache", 1), ("core", 4)])
        cache_dev = devices["cache"].partitions[0]
        core_devs = devices["core"].partitions
        plug_device = devices["core"]
    with TestRun.step("Start cache and add four cores."):
        cache = casadm.start_cache(cache_dev,
                                   cache_mode=CacheMode.WB,
                                   force=True)
        cores = []
        for d in core_devs:
            cores.append(cache.add_core(d))
    with TestRun.step(
            "Create init config file using current CAS configuration."):
        InitConfig.create_init_config_from_running_configuration()
    with TestRun.step("Run random writes to all CAS devices."):
        run_fio([c.system_path for c in cores])
    with TestRun.step(
            "Flush dirty data from two CAS devices and verify than other two "
            "contain dirty data."):
        for core in cores:
            if core.core_id % 2 == 0:
                core.flush_core()
                if core.get_dirty_blocks() != Size.zero():
                    TestRun.fail("Failed to flush CAS device.")
            elif core.get_dirty_blocks() == Size.zero():
                TestRun.fail("There should be dirty data on CAS device.")
    with TestRun.step("Stop cache without flushing dirty data."):
        cache.stop(no_data_flush=True)
    with TestRun.step("Unplug core disk."):
        plug_device.unplug()
    with TestRun.step("Load cache."):
        casadm.load_cache(cache_dev)
    with TestRun.step(
            "Verify that all previously created CAS devices are listed with "
            "proper status."):
        for core in cores:
            if core.get_status() != CoreStatus.inactive:
                TestRun.fail(f"Each core should be in inactive state. "
                             f"Actual states:\n{casadm.list_caches().stdout}")
    with TestRun.step(
            "Try removing CAS device without ‘force’ option. Verify that for "
            "dirty CAS devices operation is blocked, proper message is displayed "
            "and device is still listed."):
        shuffle(cores)
        for core in cores:
            try:
                dirty_blocks = core.get_dirty_blocks()
                core.remove_core()
                if dirty_blocks != Size.zero():
                    TestRun.fail(
                        "Removing dirty CAS device should be impossible but remove "
                        "command executed without any error.")
                TestRun.LOGGER.info(
                    "Removing core with force option skipped for clean CAS device."
                )
            except CmdException as e:
                if dirty_blocks == Size.zero():
                    TestRun.fail(
                        "Removing clean CAS device should be possible but remove "
                        "command returned an error.")
                TestRun.LOGGER.info(
                    "Remove operation without force option is blocked for "
                    "dirty CAS device as expected.")
                cli_messages.check_stderr_msg(
                    e.output, cli_messages.remove_inactive_core)
                output = casadm.list_caches().stdout
                if core.system_path not in output:
                    TestRun.fail(
                        f"CAS device is not listed in casadm list output but it should be."
                        f"\n{output}")
                core.remove_core(force=True)
    with TestRun.step("Plug missing disk and stop cache."):
        plug_device.plug()
        casadm.stop_all_caches()
Exemple #10
0
def test_trim_start_discard():
    """
    title: Check discarding cache device at cache start
    description: |
       Create 2 partitions on trim-supporting device, write pattern to both partitions,
       start blktrace against first one, start cache on first partition and check if discard
       requests were sent at all and only to the first partition.
    pass_criteria:
      - Partition used for cache is discarded.
      - Second partition is untouched - written pattern is preserved.
    """
    with TestRun.step("Clearing dmesg"):
        TestRun.executor.run_expect_success("dmesg -C")

    with TestRun.step("Preparing cache device"):
        dev = TestRun.disks['cache']
        dev.create_partitions([Size(500, Unit.MebiByte), Size(500, Unit.MebiByte)])
        cas_part = dev.partitions[0]
        non_cas_part = dev.partitions[1]

    with TestRun.step("Writing different pattern on partitions"):
        cas_fio = write_pattern(cas_part.path)
        non_cas_fio = write_pattern(non_cas_part.path)
        cas_fio.run()
        non_cas_fio.run()

    # TODO add blktracing for non-cas part
    with TestRun.step("Starting blktrace against first (cache) partition"):
        blktrace = BlkTrace(cas_part, BlkTraceMask.discard)
        blktrace.start_monitoring()

    with TestRun.step("Starting cache"):
        cache = casadm.start_cache(cas_part, force=True)
        metadata_size = get_metadata_size_from_dmesg()

    with TestRun.step("Stop blktrace and check if discard requests were issued"):
        cache_reqs = blktrace.stop_monitoring()
        cache_part_start = cas_part.begin

        # CAS should discard cache device during cache start
        if len(cache_reqs) == 0:
            TestRun.fail("No discard requests issued to the cas partition!")

        non_meta_sector = (cache_part_start + metadata_size).get_value(Unit.Blocks512)
        non_meta_size = (cas_part.size - metadata_size).get_value(Unit.Byte)
        for req in cache_reqs:
            if req.sector_number != non_meta_sector:
                TestRun.fail(f"Discard request issued to wrong sector: {req.sector_number}, "
                             f"expected: {non_meta_sector}")
            if req.byte_count != non_meta_size:
                TestRun.fail(f"Discard request issued with wrong bytes count: {req.byte_count}, "
                             f"expected: {non_meta_size} bytes")

        cas_fio.read_write(ReadWrite.read)
        non_cas_fio.read_write(ReadWrite.read)
        cas_fio.verification_with_pattern("0x00")
        cas_fio.offset(metadata_size)
        cas_fio.run()
        non_cas_fio.run()

    with TestRun.step("Stopping cache"):
        cache.stop()
Exemple #11
0
#

import random
import re
import pytest

from api.cas.cache_config import CacheMode, CacheLineSize, CacheModeTrait
from api.cas.casadm import OutputFormat, print_statistics, start_cache
from core.test_run import TestRun
from storage_devices.disk import DiskType, DiskTypeSet, DiskTypeLowerThan
from test_tools.dd import Dd
from test_tools.disk_utils import Filesystem
from test_utils.size import Size, Unit

iterations = 64
cache_size = Size(8, Unit.GibiByte)


@pytest.mark.parametrizex("cache_line_size", CacheLineSize)
@pytest.mark.parametrizex(
    "cache_mode",
    CacheMode.with_any_trait(CacheModeTrait.InsertRead
                             | CacheModeTrait.InsertWrite))
@pytest.mark.parametrizex("test_object", ["cache", "core"])
@pytest.mark.require_disk("cache",
                          DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_output_consistency(cache_line_size, cache_mode, test_object):
    """
        title: Test consistency between different cache and core statistics' outputs.
        description: |
Exemple #12
0
def test_ioclass_stats_sum():
    """Check if stats for all set ioclasses sum up to cache stats"""
    cache, core = prepare()
    min_ioclass_id = 1
    max_ioclass_id = 11
    file_size_base = Unit.KibiByte.value * 4

    TestRun.LOGGER.info("Preparing ioclass config file")
    ioclass_config.create_ioclass_config(
        add_default_rule=True, ioclass_config_path=ioclass_config_path)
    for i in range(min_ioclass_id, max_ioclass_id):
        ioclass_config.add_ioclass(
            ioclass_id=i,
            eviction_priority=22,
            allocation=True,
            rule=f"file_size:le:{file_size_base*i}&done",
            ioclass_config_path=ioclass_config_path,
        )
    cache.load_io_class(ioclass_config_path)

    TestRun.LOGGER.info("Generating files with particular sizes")
    files_list = []
    for i in range(min_ioclass_id, max_ioclass_id):
        path = f"/tmp/test_file_{file_size_base*i}"
        File.create_file(path)
        f = File(path)
        f.padding(Size(file_size_base * i, Unit.Byte))
        files_list.append(f)

    core.create_filesystem(Filesystem.ext4)

    cache.reset_counters()

    # Name of stats, which should not be compared
    not_compare_stats = ["clean", "occupancy"]
    ioclass_id_list = list(range(min_ioclass_id, max_ioclass_id))
    # Append default ioclass id
    ioclass_id_list.append(0)
    TestRun.LOGGER.info("Copying files to mounted core and stats check")
    for f in files_list:
        # To prevent stats pollution by filesystem requests, umount core device
        # after file is copied
        core.mount(mountpoint)
        f.copy(mountpoint)
        sync()
        core.unmount()
        sync()

        cache_stats = cache.get_cache_statistics(
            stat_filter=[StatsFilter.usage, StatsFilter.req, StatsFilter.blk])
        for ioclass_id in ioclass_id_list:
            ioclass_stats = cache.get_cache_statistics(
                stat_filter=[
                    StatsFilter.usage, StatsFilter.req, StatsFilter.blk
                ],
                io_class_id=ioclass_id,
            )
            for stat_name in cache_stats:
                if stat_name in not_compare_stats:
                    continue
                cache_stats[stat_name] -= ioclass_stats[stat_name]

        for stat_name in cache_stats:
            if stat_name in not_compare_stats:
                continue
            stat_val = (cache_stats[stat_name].get_value() if isinstance(
                cache_stats[stat_name], Size) else cache_stats[stat_name])
            assert stat_val == 0, f"{stat_name} diverged!\n"

    # Test cleanup
    for f in files_list:
        f.remove()
Exemple #13
0
def test_kedr_basic_io_fs(module, unload_modules, install_kedr):
    """
    title: Basic IO test on core with ext4 filesystem with kedr started with memory leaks profile
    description: |
        Load CAS modules, start kedr against one of them, create filesystem on core, start cache
        and add core, run simple random IO, stop cache and unload modules
    pass_criteria:
      - No memory leaks observed
    """
    with TestRun.step("Preparing cache device"):
        cache_device = TestRun.disks['cache']
        cache_device.create_partitions([Size(500, Unit.MebiByte)])
        cache_part = cache_device.partitions[0]

    with TestRun.step("Preparing core device (creating partition, "
                      "filesystem and mounting core)"):
        core_device = TestRun.disks['core']
        core_device.create_partitions([Size(1, Unit.GibiByte)])
        core_part = core_device.partitions[0]
        core_part.create_filesystem(Filesystem.ext4)
        sync()

    with TestRun.step("Unload CAS modules if needed"):
        if os_utils.is_kernel_module_loaded(module.value):
            cas_module.unload_all_cas_modules()

    with TestRun.step(f"Starting kedr against {module.value}"):
        Kedr.start(module.value)

    with TestRun.step(f"Loading CAS modules"):
        os_utils.load_kernel_module(cas_module.CasModule.cache.value)

    with TestRun.step("Starting cache"):
        cache = casadm.start_cache(cache_part, force=True)

    with TestRun.step("Adding core"):
        core = cache.add_core(core_part)

    with TestRun.step("Mounting core"):
        core.mount(mountpoint)

    with TestRun.step(f"Running IO"):
        (Fio().create_command().io_engine(IoEngine.libaio).size(
            cache.size * 2).read_write(
                ReadWrite.randrw).target(f"{core.mount_point}/test_file")
         ).run()

    with TestRun.step("Unmounting core"):
        core.unmount()

    with TestRun.step("Stopping cache"):
        cache.stop()

    with TestRun.step(f"Unloading CAS modules"):
        cas_module.unload_all_cas_modules()

    with TestRun.step(f"Checking for memory leaks for {module.value}"):
        try:
            Kedr.check_for_mem_leaks(module.value)
        except Exception as e:
            TestRun.LOGGER.error(f"{e}")

    with TestRun.step(f"Stopping kedr"):
        Kedr.stop()
def test_core_pool_exclusive_open():
    """
    title: Exclusive open of core pool.
    description: |
      Check that CAS exclusively opens core devices from core device pool so that the core device
      cannot be used in any other way.
    pass_criteria:
      - No system crash while reloading CAS modules.
      - Core device was added successfully to core pool.
      - Core device is exclusively open in the core pool and cannot be used otherwise.
    """
    with TestRun.step("Prepare core device and create filesystem on it."):
        core_disk = TestRun.disks["core"]
        core_disk.create_partitions([Size(1, Unit.GibiByte)])
        core_dev = core_disk.partitions[0]
        core_dev.create_filesystem(Filesystem.ext4)

    with TestRun.step(
            "Add core device to core device pool using --try-add flag."):
        core = casadm.try_add(core_dev, 1)

    with TestRun.step(
            "Check if core status of added core in core pool is detached."):
        status = core.get_status()
        if status is not CoreStatus.detached:
            TestRun.fail(f"Core status should be detached but is {status}.")

    with TestRun.step(
            "Check if it is impossible to add core device from core pool to "
            "running cache."):
        TestRun.disks["cache"].create_partitions([Size(2, Unit.GibiByte)])
        cache_dev = TestRun.disks["cache"].partitions[0]
        cache = casadm.start_cache(cache_dev, force=True)
        try:
            cache.add_core(core_dev)
            TestRun.fail(
                "Core from core pool added to cache, this is unexpected behaviour."
            )
        except CmdException:
            TestRun.LOGGER.info(
                "Adding core from core pool to cache is blocked as expected.")
        cache.stop()

    with TestRun.step(
            "Check if it is impossible to start cache with casadm start command on the "
            "core device from core pool."):
        try:
            cache = casadm.start_cache(core_dev)
            cache.stop()
            TestRun.fail(
                "Cache started successfully on core device from core pool, "
                "this is unexpected behaviour.")
        except CmdException:
            TestRun.LOGGER.info(
                "Using core device from core pool as cache is blocked as expected."
            )

    with TestRun.step(
            "Check if it is impossible to make filesystem on the core device "
            "from core pool."):
        try:
            core_dev.create_filesystem(Filesystem.ext4, force=False)
            TestRun.fail(
                "Successfully created filesystem on core from core pool, "
                "this is unexpected behaviour.")
        except Exception:
            TestRun.LOGGER.info(
                "Creating filesystem on core device from core pool is "
                "blocked as expected.")

    with TestRun.step(
            "Check if it is impossible to mount the core device from core pool."
    ):
        try:
            core_dev.mount("/mnt")
            TestRun.fail(
                "Successfully mounted core pool device, this is unexpected behaviour."
            )
        except Exception:
            TestRun.LOGGER.info(
                "Mounting core device form core pool is blocked as expected.")

    with TestRun.step("Remove core from core pool."):
        casadm.remove_all_detached_cores()
Exemple #15
0
def test_recovery_flush_reset_raw(cache_mode):
    """
        title: Recovery after reset during cache flushing - test on raw device.
        description: |
          Verify that unflushed data can be safely recovered, when reset was pressed during
          data flushing on raw device.
        pass_criteria:
          - CAS recovers successfully after reboot
          - No data corruption
    """
    with TestRun.step("Prepare cache and core devices."):
        cache_disk = TestRun.disks['cache']
        core_disk = TestRun.disks['core']
        cache_disk.create_partitions([Size(2, Unit.GibiByte)])
        core_disk.create_partitions([Size(16, Unit.GibiByte)] * 2)
        cache_device = cache_disk.partitions[0]
        core_device = core_disk.partitions[0]
        core_device_link = core_device.get_device_link("/dev/disk/by-id")
        cache_device_link = cache_device.get_device_link("/dev/disk/by-id")

    with TestRun.step("Create test files."):
        source_file, target_file = create_test_files(test_file_size)

    with TestRun.step("Setup cache and add core."):
        cache = casadm.start_cache(cache_device, cache_mode)
        core = cache.add_core(core_device)
        cache.set_cleaning_policy(CleaningPolicy.nop)
        cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)

    with TestRun.step("Copy file to CAS."):
        copy_file(source=source_file.full_path, target=core.system_path, size=test_file_size,
                  direct="oflag")

    with TestRun.step("Sync and flush buffers."):
        os_utils.sync()
        output = TestRun.executor.run(f"hdparm -f {core.system_path}")
        if output.exit_code != 0:
            raise CmdException("Error during hdparm", output)

    with TestRun.step("Trigger flush."):
        TestRun.executor.run_in_background(cli.flush_cache_cmd(f"{cache.cache_id}"))

    with TestRun.step("Hard reset DUT during data flushing."):
        power_cycle_dut(wait_for_flush_begin=True, core_device=core_device)
        cache_device.full_path = cache_device_link.get_target()
        core_device.full_path = core_device_link.get_target()

    with TestRun.step("Copy file from core and check if current md5sum is different than "
                      "before restart."):
        copy_file(source=core_device_link.get_target(), target=target_file.full_path,
                  size=test_file_size, direct="iflag")
        compare_files(source_file, target_file, should_differ=True)

    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_device)
        if cache.get_dirty_blocks() == Size.zero():
            TestRun.fail("There are no dirty blocks on cache device.")

    with TestRun.step("Stop cache with dirty data flush."):
        core_writes_before = core_device.get_io_stats().sectors_written
        cache.stop()
        if core_writes_before >= core_device.get_io_stats().sectors_written:
            TestRun.fail("No data was flushed after stopping cache started with load option.")

    with TestRun.step("Copy test file from core device to temporary location. "
                      "Compare it with the first version – they should be the same."):
        copy_file(source=core_device_link.get_target(), target=target_file.full_path,
                  size=test_file_size, direct="iflag")
        compare_files(source_file, target_file)

    with TestRun.step("Cleanup core device and remove test files."):
        target_file.remove()
        source_file.remove()
Exemple #16
0
def test_stop_cache_with_inactive_devices():
    """
        title: Validate stopping cache with inactive CAS devices.
        description: |
          Validate that cache with inactive CAS devices cannot be stopped
          unless ‘force’ option is used.
        pass_criteria:
          - No kernel error
          - Stopping cache with inactive CAS devices without ‘force’ option is blocked.
          - Stopping cache with inactive CAS devices with ‘force’ option is successful.
    """
    with TestRun.step("Prepare devices."):
        devices = prepare_devices([("cache", 1), ("core", 1)])
        cache_dev = devices["cache"].partitions[0]
        core_dev = devices["core"].partitions[0]
        plug_device = devices["core"]
    with TestRun.step("Start cache and add core."):
        cache = casadm.start_cache(cache_dev,
                                   cache_mode=CacheMode.WB,
                                   force=True)
        core = cache.add_core(core_dev)
    with TestRun.step(
            "Create init config file using current CAS configuration."):
        InitConfig.create_init_config_from_running_configuration()
    with TestRun.step(
            "Run random writes and verify that CAS device contains dirty data."
    ):
        run_fio([core.system_path])
        if core.get_dirty_blocks() == Size.zero():
            TestRun.fail("There is no dirty data on core device.")
    with TestRun.step("Stop cache without flushing dirty data."):
        cache.stop(no_data_flush=True)
    with TestRun.step("Unplug core disk."):
        plug_device.unplug()
    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_dev)
    with TestRun.step(
            "Verify that previously created CAS device is listed with proper status."
    ):
        core_status = core.get_status()
        if core_status != CoreStatus.inactive:
            TestRun.fail(
                f"CAS device should be in inactive state. Actual status: {core_status}."
            )
    with TestRun.step(
            "Try stopping cache without ‘no data flush’ option, verify that operation "
            "was blocked and proper message is displayed."):
        try_stop_incomplete_cache(cache)
    with TestRun.step("Stop cache with force option."):
        cache.stop(no_data_flush=True)
    with TestRun.step("Plug missing core device."):
        plug_device.plug()
    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_dev)
    with TestRun.step("Stop cache with flushing dirty data."):
        cache.stop()
    with TestRun.step("Unplug core device."):
        plug_device.unplug()
    with TestRun.step("Load cache and verify core status is inactive."):
        cache = casadm.load_cache(cache_dev)
        core_status = core.get_status()
        if core_status != CoreStatus.inactive:
            TestRun.fail(
                f"CAS device should be in inactive state. Actual state: {core_status}."
            )
    with TestRun.step(
            "Try stopping cache without ‘no data flush’ option, verify that "
            "operation was blocked and proper message is displayed."):
        try_stop_incomplete_cache(cache)
    with TestRun.step(
            "Stop cache with 'no data flush' option and plug missing core device."
    ):
        cache.stop(no_data_flush=True)
        plug_device.plug()
 def trim_io(self):
     return Size(self.job.trim.io_kbytes, Unit.KibiByte)
def test_recovery_unplug_cache_fs(cache_mode, cls, filesystem, direct):
    """
            title: Test for recovery after cache drive removal - test with filesystem.
            description: |
              Verify that unflushed data can be safely recovered after, when SSD drive is removed
              after write completion - test with filesystem.
            pass_criteria:
              - CAS recovers successfully after cache drive unplug
              - No data corruption
    """
    with TestRun.step("Prepare devices"):
        cache_disk = TestRun.disks['cache']
        core_disk = TestRun.disks['core']
        cache_disk.create_partitions([Size(2, Unit.GibiByte)])
        core_disk.create_partitions([Size(16, Unit.GibiByte)])
        cache_device = cache_disk.partitions[0]
        core_device = core_disk.partitions[0]

    with TestRun.step("Create test files."):
        source_file, target_file = create_test_files(test_file_size)

    with TestRun.step("Create filesystem on core device."):
        core_device.create_filesystem(filesystem)

    with TestRun.step("Start cache and add core."):
        cache = casadm.start_cache(cache_device, cache_mode, cls)
        core = cache.add_core(core_device)

    with TestRun.step("Mount CAS device."):
        core.mount(mount_point)

    with TestRun.step("Copy file to CAS."):
        copy_file(source=source_file.full_path,
                  target=test_file_path,
                  size=test_file_size,
                  direct="oflag" if direct else None)
        TestRun.LOGGER.info(str(core.get_statistics()))

    with TestRun.step("Unmount CAS device."):
        core.unmount()

    with TestRun.step("Unplug cache device."):
        cache_disk.unplug()
        TestRun.LOGGER.info(f"List caches:\n{casadm.list_caches().stdout}")
        TestRun.LOGGER.info(
            f"Dirty blocks on cache: "
            f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}")

    with TestRun.step("Stop cache."):
        cache.stop()

    with TestRun.step("Plug missing cache device."):
        cache_disk.plug()

    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_device)
        TestRun.LOGGER.info(
            f"Dirty blocks on cache: "
            f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}")

    with TestRun.step("Stop cache with data flush."):
        cache.stop()

    with TestRun.step("Mount core device."):
        core_device.mount(mount_point)

    with TestRun.step("Copy file from core device and check md5sum."):
        copy_file(source=test_file_path,
                  target=target_file.full_path,
                  size=test_file_size,
                  direct="iflag" if direct else None)
        compare_files(source_file, target_file)

    with TestRun.step("Unmount core device and remove files."):
        core_device.unmount()
        target_file.remove()
        source_file.remove()
 def trim_bandwidth_average(self):
     return Size(self.job.trim.bw_mean, UnitPerSecond(Unit.KibiByte))
    def start_tracing(self,
                      bdevs: list = [],
                      buffer: Size = None,
                      trace_file_size: Size = None,
                      timeout: timedelta = None,
                      label: str = None,
                      shortcut: bool = False):
        """
        Start tracing given block devices. Trace all available if none given.

        :param bdevs: Block devices to trace, can be empty
        (for all available)
        :param buffer: Size of the internal trace buffer in MiB
        :param trace_file_size: Max size of trace file in MiB
        :param timeout: Max trace duration time in seconds
        :param label: User defined custom label
        :param shortcut: Use shorter command
        :type bdevs: list of strings
        :type buffer: Size
        :type trace_file_size: Size
        :type timeout: timedelta
        :type label: str
        :type shortcut: bool
        """

        if len(bdevs) == 0:
            disks = TestRun.dut.disks
            for disk in disks:
                bdevs.append(disk.system_path)

        buffer_range = range(1, 1025)
        trace_file_size_range = range(1, 100000001)
        timeout_range = range(1, 4294967296)

        command = 'iotrace' + (' -S' if shortcut else ' --start-tracing')
        command += (' -d ' if shortcut else ' --devices ') + ','.join(bdevs)

        if buffer is not None:
            if not int(buffer.get_value(Unit.MebiByte)) in buffer_range:
                raise CmdException(
                    f"Given buffer is out of range {buffer_range}.")
            command += ' -b ' if shortcut else ' --buffer '
            command += f'{int(buffer.get_value(Unit.MebiByte))}'

        if trace_file_size is not None:
            if not int(trace_file_size.get_value(
                    Unit.MebiByte)) in trace_file_size_range:
                raise CmdException(
                    f"Given size is out of range {trace_file_size_range}.")
            command += ' -s ' if shortcut else ' --size '
            command += f'{int(trace_file_size.get_value(Unit.MebiByte))}'

        if timeout is not None:
            if not int(timeout.total_seconds()) in timeout_range:
                raise CmdException(
                    f"Given time is out of range {timeout_range}.")
            command += ' -t ' if shortcut else ' --time '
            command += f'{int(timeout.total_seconds())}'

        if label is not None:
            command += ' -l ' if shortcut else ' --label ' + f'{label}'

        self.pid = str(TestRun.executor.run_in_background(command))
        TestRun.LOGGER.info("Started tracing of: " + ','.join(bdevs))
 def read_io(self):
     return Size(self.job.read.io_kbytes, Unit.KibiByte)
def test_ioclass_directory_file_operations(filesystem):
    """
        title: Test IO classification by file operations.
        description: |
          Test if directory classification works properly after file operations like move or rename.
        pass_criteria:
          - No kernel bug.
          - The operations themselves should not cause reclassification but IO after those
            operations should be reclassified to proper IO class.
    """

    test_dir_path = f"{mountpoint}/test_dir"
    nested_dir_path = f"{test_dir_path}/nested_dir"
    dd_blocks = random.randint(5, 50)

    with TestRun.step("Prepare cache and core."):
        cache, core = prepare(default_allocation="1.00")
        Udev.disable()

    with TestRun.step("Create and load IO class config file."):
        ioclass_id = random.randint(2, ioclass_config.MAX_IO_CLASS_ID)
        ioclass_config.add_ioclass(ioclass_id=1,
                                   eviction_priority=1,
                                   allocation="1.00",
                                   rule="metadata",
                                   ioclass_config_path=ioclass_config_path)
        # directory IO class
        ioclass_config.add_ioclass(
            ioclass_id=ioclass_id,
            eviction_priority=1,
            allocation="1.00",
            rule=f"directory:{test_dir_path}",
            ioclass_config_path=ioclass_config_path,
        )
        casadm.load_io_classes(cache_id=cache.cache_id, file=ioclass_config_path)

    with TestRun.step(f"Prepare {filesystem.name} filesystem "
                      f"and mounting {core.path} at {mountpoint}."):
        core.create_filesystem(fs_type=filesystem)
        core.mount(mount_point=mountpoint)
        sync()

    with TestRun.step(f"Create directory {nested_dir_path}."):
        Directory.create_directory(path=nested_dir_path, parents=True)
        sync()
        drop_caches(DropCachesMode.ALL)

    with TestRun.step("Create test file."):
        classified_before = cache.get_io_class_statistics(
            io_class_id=ioclass_id).usage_stats.occupancy
        file_path = f"{test_dir_path}/test_file"
        (Dd().input("/dev/urandom").output(file_path).oflag("sync")
         .block_size(Size(1, Unit.MebiByte)).count(dd_blocks).run())
        sync()
        drop_caches(DropCachesMode.ALL)
        test_file = File(file_path).refresh_item()

    with TestRun.step("Check classified occupancy."):
        classified_after = cache.get_io_class_statistics(
            io_class_id=ioclass_id).usage_stats.occupancy
        check_occupancy(classified_before + test_file.size, classified_after)

    with TestRun.step("Move test file out of classified directory."):
        classified_before = classified_after
        non_classified_before = cache.get_io_class_statistics(io_class_id=0).usage_stats.occupancy
        test_file.move(destination=mountpoint)
        sync()
        drop_caches(DropCachesMode.ALL)

    with TestRun.step("Check classified occupancy."):
        classified_after = cache.get_io_class_statistics(
            io_class_id=ioclass_id).usage_stats.occupancy
        check_occupancy(classified_before, classified_after)
        TestRun.LOGGER.info("Checking non-classified occupancy")
        non_classified_after = cache.get_io_class_statistics(io_class_id=0).usage_stats.occupancy
        check_occupancy(non_classified_before, non_classified_after)

    with TestRun.step("Read test file."):
        classified_before = classified_after
        non_classified_before = non_classified_after
        (Dd().input(test_file.full_path).output("/dev/null")
         .block_size(Size(1, Unit.Blocks4096)).run())

    with TestRun.step("Check classified occupancy."):
        classified_after = cache.get_io_class_statistics(
            io_class_id=ioclass_id).usage_stats.occupancy
        check_occupancy(classified_before - test_file.size, classified_after)
        TestRun.LOGGER.info("Checking non-classified occupancy")
        non_classified_after = cache.get_io_class_statistics(io_class_id=0).usage_stats.occupancy
        check_occupancy(non_classified_before + test_file.size, non_classified_after)

    with TestRun.step(f"Move test file to {nested_dir_path}."):
        classified_before = classified_after
        non_classified_before = non_classified_after
        test_file.move(destination=nested_dir_path)
        sync()
        drop_caches(DropCachesMode.ALL)

    with TestRun.step("Check classified occupancy."):
        classified_after = cache.get_io_class_statistics(
            io_class_id=ioclass_id).usage_stats.occupancy
        check_occupancy(classified_before, classified_after)
        TestRun.LOGGER.info("Checking non-classified occupancy")
        non_classified_after = cache.get_io_class_statistics(io_class_id=0).usage_stats.occupancy
        check_occupancy(non_classified_before, non_classified_after)

    with TestRun.step("Read test file."):
        classified_before = classified_after
        non_classified_before = non_classified_after
        (Dd().input(test_file.full_path).output("/dev/null")
         .block_size(Size(1, Unit.Blocks4096)).run())

    with TestRun.step("Check classified occupancy."):
        classified_after = cache.get_io_class_statistics(
            io_class_id=ioclass_id).usage_stats.occupancy
        check_occupancy(classified_before + test_file.size, classified_after)

    with TestRun.step("Check non-classified occupancy."):
        non_classified_after = cache.get_io_class_statistics(io_class_id=0).usage_stats.occupancy
        check_occupancy(non_classified_before - test_file.size, non_classified_after)
 def read_bandwidth_average(self):
     return Size(self.job.read.bw_mean, UnitPerSecond(Unit.KibiByte))
def test_ioclass_directory_depth(filesystem):
    """
        title: Test IO classification by directory.
        description: |
          Test if directory classification works properly for deeply nested directories for read and
          write operations.
        pass_criteria:
          - No kernel bug.
          - Read and write operations to directories are classified properly.
    """
    base_dir_path = f"{mountpoint}/base_dir"

    with TestRun.step("Prepare cache and core."):
        cache, core = prepare()
        Udev.disable()

    with TestRun.step(f"Prepare {filesystem.name} filesystem and mount {core.path} "
                      f"at {mountpoint}."):
        core.create_filesystem(filesystem)
        core.mount(mountpoint)
        sync()

    with TestRun.step(f"Create the base directory: {base_dir_path}."):
        fs_utils.create_directory(base_dir_path)

    with TestRun.step(f"Create a nested directory."):
        nested_dir_path = base_dir_path
        random_depth = random.randint(40, 80)
        for i in range(random_depth):
            nested_dir_path += f"/dir_{i}"
        fs_utils.create_directory(path=nested_dir_path, parents=True)

    # Test classification in nested dir by reading a previously unclassified file
    with TestRun.step("Create the first file in the nested directory."):
        test_file_1 = File(f"{nested_dir_path}/test_file_1")
        dd = (
            Dd().input("/dev/urandom")
                .output(test_file_1.full_path)
                .count(random.randint(1, 200))
                .block_size(Size(1, Unit.MebiByte))
        )
        dd.run()
        sync()
        drop_caches(DropCachesMode.ALL)
        test_file_1.refresh_item()

    with TestRun.step("Load IO class config."):
        ioclass_id = random.randint(1, ioclass_config.MAX_IO_CLASS_ID)
        # directory IO class
        ioclass_config.add_ioclass(
            ioclass_id=ioclass_id,
            eviction_priority=1,
            allocation="1.00",
            rule=f"directory:{base_dir_path}",
            ioclass_config_path=ioclass_config_path,
        )
        casadm.load_io_classes(cache_id=cache.cache_id, file=ioclass_config_path)

    with TestRun.step("Read the file in the nested directory"):
        base_occupancy = cache.get_io_class_statistics(io_class_id=ioclass_id).usage_stats.occupancy
        dd = (
            Dd().input(test_file_1.full_path)
                .output("/dev/null")
                .block_size(Size(1, Unit.MebiByte))
        )
        dd.run()

    with TestRun.step("Check occupancy after creating the file."):
        new_occupancy = cache.get_io_class_statistics(io_class_id=ioclass_id).usage_stats.occupancy
        if new_occupancy != base_occupancy + test_file_1.size:
            TestRun.LOGGER.error("Wrong occupancy after reading file!\n"
                                 f"Expected: {base_occupancy + test_file_1.size}, "
                                 f"actual: {new_occupancy}")

    # Test classification in nested dir by creating a file
    with TestRun.step("Create the second file in the nested directory"):
        base_occupancy = new_occupancy
        test_file_2 = File(f"{nested_dir_path}/test_file_2")
        dd = (
            Dd().input("/dev/urandom")
                .output(test_file_2.full_path)
                .count(random.randint(25600, 51200))  # 100MB to 200MB
                .block_size(Size(1, Unit.Blocks4096))
        )
        dd.run()
        sync()
        drop_caches(DropCachesMode.ALL)
        test_file_2.refresh_item()

    with TestRun.step("Check occupancy after creating the second file."):
        new_occupancy = cache.get_io_class_statistics(io_class_id=ioclass_id).usage_stats.occupancy
        expected_occpuancy = (base_occupancy + test_file_2.size).set_unit(Unit.Blocks4096)
        if new_occupancy != base_occupancy + test_file_2.size:
            TestRun.LOGGER.error("Wrong occupancy after creating file!\n"
                                 f"Expected: {expected_occpuancy}, "
                                 f"actual: {new_occupancy}")
 def write_io(self):
     return Size(self.job.write.io_kbytes, Unit.KibiByte)
Exemple #26
0
import pytest
from api.cas import casadm, cli
from api.cas.cache_config import CacheMode, CacheModeTrait, CleaningPolicy, SeqCutOffPolicy
from core.test_run import TestRun
from storage_devices.disk import DiskTypeSet, DiskType, DiskTypeLowerThan
from test_tools.dd import Dd
from test_tools.disk_utils import Filesystem
from test_utils import os_utils
from test_utils.os_utils import Udev
from test_utils.output import CmdException
from test_utils.size import Size, Unit
from tests.lazy_writes.recovery.recovery_tests_methods import create_test_files, copy_file, \
    compare_files, power_cycle_dut

mount_point = "/mnt"
test_file_size = Size(1.5, Unit.GibiByte)


@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_plugin("power_control")
def test_recovery_flush_reset_raw(cache_mode):
    """
        title: Recovery after reset during cache flushing - test on raw device.
        description: |
          Verify that unflushed data can be safely recovered, when reset was pressed during
          data flushing on raw device.
        pass_criteria:
          - CAS recovers successfully after reboot
          - No data corruption
 def write_bandwidth_average(self):
     return Size(self.job.write.bw_mean, UnitPerSecond(Unit.KibiByte))
Exemple #28
0
def test_recovery_flush_reset_fs(cache_mode, fs):
    """
        title: Recovery after reset during cache flushing - test on filesystem.
        description: |
          Verify that unflushed data can be safely recovered, when reset was pressed during
          data flushing on filesystem.
        pass_criteria:
          - CAS recovers successfully after reboot
          - No data corruption
    """
    with TestRun.step("Prepare cache and core devices."):
        cache_disk = TestRun.disks['cache']
        core_disk = TestRun.disks['core']
        cache_disk.create_partitions([Size(2, Unit.GibiByte)])
        core_disk.create_partitions([Size(16, Unit.GibiByte)] * 2)
        cache_device = cache_disk.partitions[0]
        core_device = core_disk.partitions[0]
        core_device_link = core_device.get_device_link("/dev/disk/by-id")
        cache_device_link = cache_device.get_device_link("/dev/disk/by-id")

    with TestRun.step(f"Create {fs} filesystem on core."):
        core_device.create_filesystem(fs)

    with TestRun.step("Create test files."):
        source_file, target_file = create_test_files(test_file_size)

    with TestRun.step("Setup cache and add core."):
        cache = casadm.start_cache(cache_device, cache_mode)
        Udev.disable()
        core = cache.add_core(core_device)
        cache.set_cleaning_policy(CleaningPolicy.nop)
        cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)

    with TestRun.step("Mount CAS device."):
        core.mount(mount_point)

    with TestRun.step("Copy file to CAS."):
        copy_file(source=source_file.full_path,
                  target=os.path.join(mount_point, "source_test_file"),
                  size=test_file_size, direct="oflag")

    with TestRun.step("Unmount CAS device."):
        core.unmount()

    with TestRun.step("Trigger flush."):
        TestRun.executor.run_in_background(cli.flush_cache_cmd(f"{cache.cache_id}"))

    with TestRun.step("Hard reset DUT during data flushing."):
        power_cycle_dut(True, core_device)
        cache_device.full_path = cache_device_link.get_target()
        core_device.full_path = core_device_link.get_target()

    with TestRun.step("Load cache."):
        cache = casadm.load_cache(cache_device)
        if cache.get_dirty_blocks() == Size.zero():
            TestRun.fail("There are no dirty blocks on cache device.")

    with TestRun.step("Stop cache with dirty data flush."):
        core_writes_before = core_device.get_io_stats().sectors_written
        cache.stop()
        if core_writes_before >= core_device.get_io_stats().sectors_written:
            TestRun.fail("No data was flushed after stopping cache started with load option.")

    with TestRun.step("Mount core device."):
        core_device.mount(mount_point)

    with TestRun.step("Copy test file from core device to temporary location. "
                      "Compare it with the first version – they should be the same."):
        copy_file(source=os.path.join(mount_point, "source_test_file"),
                  target=target_file.full_path,
                  size=test_file_size, direct="iflag")
        compare_files(source_file, target_file)

    with TestRun.step("Unmount core device and remove test files."):
        core_device.unmount()
        target_file.remove()
        source_file.remove()
        Udev.enable()
Exemple #29
0
def test_core_inactive_stats():
    """
        1. Start cache with 3 cores.
        2. Switch cache into WB mode.
        3. Issue IO to each core.
        4. Stop cache without flush.
        5. Remove two core devices.
        6. Load cache.
        7. Check if cache stats are equal to sum of valid and inactive cores stats.
        8. Check if percentage values are calculated properly.
    """
    cache, core_device = prepare()

    cache_device = cache.cache_device

    TestRun.LOGGER.info("Switching cache mode to WB")
    cache.set_cache_mode(cache_mode=CacheMode.WB)
    cores = cache.get_core_devices()
    TestRun.LOGGER.info("Issue IO to each core")
    for core in cores:
        dd = (Dd().input("/dev/zero").output(core.path).count(1000).block_size(
            Size(4, Unit.KibiByte))).run()

    TestRun.LOGGER.info("Stopping cache with dirty data")
    cores[2].flush_core()
    cache.stop(no_data_flush=True)

    TestRun.LOGGER.info("Removing two of core devices")
    core_device.remove_partitions()
    core_device.create_partitions([Size(1, Unit.GibiByte)])

    TestRun.LOGGER.info("Loading cache with missing core device")
    cache = casadm.start_cache(cache_device, load=True)

    # Accumulate valid cores stats
    cores_occupancy = 0
    cores_clean = 0
    cores_dirty = 0
    cores = cache.get_core_devices()
    for core in cores:
        core_stats = core.get_statistics()
        cores_occupancy += core_stats.usage_stats.occupancy.value
        cores_clean += core_stats.usage_stats.clean.value
        cores_dirty += core_stats.usage_stats.dirty.value

    cache_stats = cache.get_statistics()
    # Add inactive core stats
    cores_occupancy += cache_stats.inactive_usage_stats.inactive_occupancy.value
    cores_clean += cache_stats.inactive_usage_stats.inactive_clean.value
    cores_dirty += cache_stats.inactive_usage_stats.inactive_dirty.value

    assert cache_stats.usage_stats.occupancy.value == cores_occupancy
    assert cache_stats.usage_stats.dirty.value == cores_dirty
    assert cache_stats.usage_stats.clean.value == cores_clean

    cache_stats_percentage = cache.get_statistics(percentage_val=True)
    # Calculate expected percentage value of inactive core stats
    inactive_occupancy_perc = (
        cache_stats.inactive_usage_stats.inactive_occupancy.value /
        cache_stats.config_stats.cache_size.value)
    inactive_clean_perc = (
        cache_stats.inactive_usage_stats.inactive_clean.value /
        cache_stats.usage_stats.occupancy.value)
    inactive_dirty_perc = (
        cache_stats.inactive_usage_stats.inactive_dirty.value /
        cache_stats.usage_stats.occupancy.value)

    inactive_occupancy_perc = round(100 * inactive_occupancy_perc, 1)
    inactive_clean_perc = round(100 * inactive_clean_perc, 1)
    inactive_dirty_perc = round(100 * inactive_dirty_perc, 1)

    TestRun.LOGGER.info(str(cache_stats_percentage))
    assert (inactive_occupancy_perc ==
            cache_stats_percentage.inactive_usage_stats.inactive_occupancy)
    assert (inactive_clean_perc ==
            cache_stats_percentage.inactive_usage_stats.inactive_clean)
    assert (inactive_dirty_perc ==
            cache_stats_percentage.inactive_usage_stats.inactive_dirty)
import random
import datetime

import pytest

from api.cas import casadm
from api.cas.cache_config import CacheMode
from core.test_run import TestRun
from test_tools.fio.fio import Fio
from test_tools.fio.fio_param import ReadWrite, IoEngine, VerifyMethod
from storage_devices.disk import DiskType, DiskTypeSet, DiskTypeLowerThan
from test_utils.size import Unit, Size


start_size = int(Size(512, Unit.Byte))
step = int(Size(512, Unit.Byte))
stop_size = int(Size(128, Unit.KibiByte))
runtime = datetime.timedelta(hours=12) / (stop_size / 512)


@pytest.mark.parametrize("cache_mode", [CacheMode.WT, CacheMode.WB])
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_data_integrity_12h(cache_mode):
    """
    title: Data integrity test in passed cache mode with duration time equal to 12h
        description: Create 1 cache with size between 40MB and 50MB and 1 core with size 150MB
        pass_criteria:
            - System does not crash.
            - All operations complete successfully.