def start_monitoring(core_dev, cache_dev, cas_dev): blktrace_core_dev = BlkTrace(core_dev, BlkTraceMask.discard) blktrace_cache_dev = BlkTrace(cache_dev, BlkTraceMask.discard) blktrace_cas = BlkTrace(cas_dev, BlkTraceMask.discard) blktrace_core_dev.start_monitoring() blktrace_cache_dev.start_monitoring() blktrace_cas.start_monitoring() return {"core": blktrace_core_dev, "cache": blktrace_cache_dev, "cas": blktrace_cas}
def test_acp_param_wake_up_time(cache_line_size, cache_mode): """ title: Functional test for ACP wake-up parameter. description: | Verify if interval between ACP cleaning iterations is not longer than wake-up time parameter value. pass_criteria: - ACP flush iterations are triggered with defined frequency. """ with TestRun.step("Test prepare."): error_threshold_ms = 50 generated_vals = get_random_list( min_val=FlushParametersAcp.acp_params_range().wake_up_time[0], max_val=FlushParametersAcp.acp_params_range().wake_up_time[1], n=10, ) acp_configs = [] for config in generated_vals: acp_configs.append( FlushParametersAcp(wake_up_time=Time(milliseconds=config))) acp_configs.append(FlushParametersAcp.default_acp_params()) with TestRun.step("Prepare partitions."): core_size = Size(5, Unit.GibiByte) cache_device = TestRun.disks["cache"] core_device = TestRun.disks["core"] cache_device.create_partitions([Size(10, Unit.GibiByte)]) core_device.create_partitions([core_size]) with TestRun.step( f"Start cache in {cache_mode} with {cache_line_size} and add core." ): cache = casadm.start_cache(cache_device.partitions[0], cache_mode, cache_line_size) core = cache.add_core(core_device.partitions[0]) with TestRun.step("Set cleaning policy to NOP."): cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step("Start IO in background."): fio = get_fio_cmd(core, core_size) fio.run_in_background() time.sleep(10) with TestRun.step("Set cleaning policy to ACP."): cache.set_cleaning_policy(CleaningPolicy.acp) with TestRun.group("Verify IO number for different wake_up_time values."): for acp_config in acp_configs: with TestRun.step(f"Setting {acp_config}"): cache.set_params_acp(acp_config) accepted_interval_threshold = ( acp_config.wake_up_time.total_milliseconds() + error_threshold_ms) with TestRun.step( "Using blktrace verify if interval between ACP cleaning iterations " f"is shorter or equal than wake-up parameter value " f"(including {error_threshold_ms}ms error threshold)"): blktrace = BlkTrace(core.core_device, BlkTraceMask.write) blktrace.start_monitoring() time.sleep(15) blktrace_output = blktrace.stop_monitoring() for (prev, curr) in zip(blktrace_output, blktrace_output[1:]): if not new_acp_iteration(prev, curr): continue interval_ms = (curr.timestamp - prev.timestamp) / 10**6 if interval_ms > accepted_interval_threshold: TestRun.LOGGER.error( f"{interval_ms} is not within accepted range for " f"{acp_config.wake_up_time.total_milliseconds()} " f"wake_up_time param value.") with TestRun.step("Stop all caches"): kill_all_io() casadm.stop_all_caches()
def test_acp_functional(cache_mode): """ title: Validate ACP behavior. description: | Validate that ACP is cleaning dirty data from chunks bucket - sorted by number of dirty pages. pass_criteria: - All chunks are cleaned in proper order """ chunks_count = 8 chunk_size = Size(100, Unit.MebiByte) chunk_list = [] def sector_in_chunk(chunk, blktrace_header): sector_to_size = Size(blktrace_header.sector_number, Unit.Blocks512) return chunk.offset <= sector_to_size < chunk.offset + chunk_size def get_header_chunk(bucket_chunks, blktrace_header): return next( (c for c in bucket_chunks if sector_in_chunk(c, blktrace_header)), None) def sector_in_tested_region(blktrace_header, list_of_chunks): return any( [sector_in_chunk(c, blktrace_header) for c in list_of_chunks]) with TestRun.step("Prepare devices."): cache_device = TestRun.disks['cache'] core_device = TestRun.disks['core'] cache_device.create_partitions([chunk_size * chunks_count]) cache_device = cache_device.partitions[0] with TestRun.step("Start cache in WB mode, set cleaning policy to NOP " "and add whole disk as core."): cache = casadm.start_cache(cache_device, cache_mode) cache.set_cleaning_policy(CleaningPolicy.nop) core = cache.add_core(core_device) with TestRun.step( "Run separate random writes with random amount of data on every " "100 MiB part of CAS device."): Chunk = namedtuple('Chunk', 'offset writes_size') random_chunk_writes = random.sample(range(1, 101), chunks_count) for i in range(chunks_count): c = Chunk(chunk_size * i, Size(random_chunk_writes[i], Unit.MebiByte)) chunk_list.append(c) fio = (Fio().create_command().io_engine(IoEngine.sync).read_write( ReadWrite.randwrite).direct().size(chunk_size).block_size( Size(1, Unit.Blocks4096)).target(f"{core.path}")) for chunk in chunk_list: fio.add_job().offset(chunk.offset).io_size(chunk.writes_size) fio.run() dirty_blocks = cache.get_dirty_blocks() if dirty_blocks == Size.zero(): TestRun.fail("No dirty data on cache after IO.") TestRun.LOGGER.info(str(cache.get_statistics())) with TestRun.step( "Switch cleaning policy to ACP and start blktrace monitoring."): trace = BlkTrace(core.core_device, BlkTraceMask.write) trace.start_monitoring() initial_dirty_blocks = cache.get_dirty_blocks() cache.set_cleaning_policy(CleaningPolicy.acp) while cache.get_dirty_blocks() > Size.zero(): time.sleep(10) if cache.get_dirty_blocks() == initial_dirty_blocks: TestRun.fail( f"No data flushed in 10s.\n{str(cache.get_statistics())}") initial_dirty_blocks = cache.get_dirty_blocks() TestRun.LOGGER.info(str(cache.get_statistics())) action_kind = ActionKind.IoHandled output = trace.stop_monitoring() blktrace_output = [ h for h in output if h.action == action_kind and RwbsKind.F not in h.rwbs ] if not blktrace_output: TestRun.fail(f"No {action_kind.name} entries in blktrace output!") TestRun.LOGGER.debug( f"Blktrace headers count: {len(blktrace_output)}.") with TestRun.step( "Using blktrace verify that cleaning thread cleans data from " "all CAS device parts in proper order."): all_writes_ok = True last_sector = None max_percent = 100 bucket_chunks = [] current_chunk = None for header in blktrace_output: # Sector not in current chunk - search for the next chunk if current_chunk is None or \ not sector_in_chunk(current_chunk, header): # Search for bucket with chunks that contain most dirty data while not bucket_chunks and max_percent > 0: bucket_chunks = [ chunk for chunk in chunk_list if max_percent >= chunk.writes_size.get_value( Unit.MebiByte) > max_percent - 10 ] max_percent -= 10 if not bucket_chunks: TestRun.fail( f"No chunks left for sector {header.sector_number} " f"({Size(header.sector_number, Unit.Blocks512)}).") # Get chunk within current bucket where current header sector is expected chunk = get_header_chunk(bucket_chunks, header) if not chunk: TestRun.LOGGER.error( f"Sector {header.sector_number} " f"({Size(header.sector_number, Unit.Blocks512)}) " f"not in current bucket.") all_writes_ok = False if not sector_in_tested_region(header, chunk_list): TestRun.LOGGER.error( f"Sector {header.sector_number} " f"({Size(header.sector_number, Unit.Blocks512)}) " f"outside of any tested chunk.") continue # Set new chunk as current if current_chunk: TestRun.LOGGER.info(f"Writes to chunk: {write_counter}") current_chunk = chunk write_counter = 1 bucket_chunks.remove(chunk) last_sector = header.sector_number TestRun.LOGGER.debug( f"First written sector in new chunk: {header.sector_number} " f"({Size(header.sector_number, Unit.Blocks512)})") continue # Sector in current chunk - check sequential order if last_sector is None or header.sector_number >= last_sector: last_sector = header.sector_number else: TestRun.LOGGER.error( f"Sectors in chunk <{current_chunk.offset}, " f"{str(current_chunk.offset + chunk_size)}) written in bad " f"order - sector {header.sector_number} (" f"{Size(header.sector_number, Unit.Blocks512)}) after sector " f"{last_sector} ({Size(last_sector, Unit.Blocks512)})") all_writes_ok = False write_counter += 1 TestRun.LOGGER.info(f"Writes to chunk: {write_counter}") if all_writes_ok: TestRun.LOGGER.info("All sectors written in proper order.")
def test_acp_param_flush_max_buffers(cache_line_size, cache_mode): """ title: Functional test for ACP flush-max-buffers parameter. description: | Verify if there is appropriate number of I/O requests between wake-up time intervals, which depends on flush-max-buffer parameter. pass_criteria: - ACP triggered dirty data flush - Number of writes to core is lower or equal than flush_max_buffers """ with TestRun.step("Test prepare."): buffer_values = get_random_list( min_val=FlushParametersAcp.acp_params_range().flush_max_buffers[0], max_val=FlushParametersAcp.acp_params_range().flush_max_buffers[1], n=10, ) default_config = FlushParametersAcp.default_acp_params() acp_configs = [ FlushParametersAcp(flush_max_buffers=buf, wake_up_time=Time(seconds=1)) for buf in buffer_values ] acp_configs.append(default_config) with TestRun.step("Prepare partitions."): core_size = Size(5, Unit.GibiByte) cache_device = TestRun.disks["cache"] core_device = TestRun.disks["core"] cache_device.create_partitions([Size(10, Unit.GibiByte)]) core_device.create_partitions([core_size]) with TestRun.step( f"Start cache in {cache_mode} with {cache_line_size} and add core." ): cache = casadm.start_cache(cache_device.partitions[0], cache_mode, cache_line_size) core = cache.add_core(core_device.partitions[0]) with TestRun.step("Set cleaning policy to NOP."): cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step("Start IO in background."): fio = get_fio_cmd(core, core_size) fio.run_in_background() time.sleep(10) with TestRun.step("Set cleaning policy to ACP."): cache.set_cleaning_policy(CleaningPolicy.acp) with TestRun.group( "Verify IO number for different max_flush_buffers values."): for acp_config in acp_configs: with TestRun.step(f"Setting {acp_config}"): cache.set_params_acp(acp_config) with TestRun.step( "Using blktrace verify if there is appropriate number of I/O requests, " "which depends on flush-max-buffer parameter."): blktrace = BlkTrace(core.core_device, BlkTraceMask.write) blktrace.start_monitoring() time.sleep(20) blktrace_output = blktrace.stop_monitoring() cleaning_started = False flush_writes = 0 for (prev, curr) in zip(blktrace_output, blktrace_output[1:]): if cleaning_started and write_to_core(prev): flush_writes += 1 if new_acp_iteration(prev, curr): if cleaning_started: if flush_writes <= acp_config.flush_max_buffers: flush_writes = 0 else: TestRun.LOGGER.error( f"Incorrect number of handled io requests. " f"Expected {acp_config.flush_max_buffers} - " f"actual {flush_writes}") flush_writes = 0 cleaning_started = True if not cleaning_started: TestRun.fail(f"ACP flush not triggered for {acp_config}") with TestRun.step("Stop all caches"): kill_all_io() casadm.stop_all_caches()
def test_trim_start_discard(): """ title: Check discarding cache device at cache start description: | Create 2 partitions on trim-supporting device, write pattern to both partitions, start blktrace against first one, start cache on first partition and check if discard requests were sent at all and only to the first partition. pass_criteria: - Partition used for cache is discarded. - Second partition is untouched - written pattern is preserved. """ with TestRun.step("Clearing dmesg"): TestRun.executor.run_expect_success("dmesg -C") with TestRun.step("Preparing cache device"): dev = TestRun.disks['cache'] dev.create_partitions( [Size(500, Unit.MebiByte), Size(500, Unit.MebiByte)]) cas_part = dev.partitions[0] non_cas_part = dev.partitions[1] with TestRun.step("Writing different pattern on partitions"): cas_fio = write_pattern(cas_part.path) non_cas_fio = write_pattern(non_cas_part.path) cas_fio.run() non_cas_fio.run() # TODO add blktracing for non-cas part with TestRun.step("Starting blktrace against first (cache) partition"): blktrace = BlkTrace(cas_part, BlkTraceMask.discard) blktrace.start_monitoring() with TestRun.step("Starting cache"): cache = casadm.start_cache(cas_part, force=True) metadata_size = get_metadata_size_from_dmesg() with TestRun.step( "Stop blktrace and check if discard requests were issued"): cache_reqs = blktrace.stop_monitoring() cache_part_start = cas_part.begin # CAS should discard cache device during cache start if len(cache_reqs) == 0: TestRun.fail("No discard requests issued to the cas partition!") non_meta_sector = (cache_part_start + metadata_size).get_value( Unit.Blocks512) non_meta_size = (cas_part.size - metadata_size).get_value(Unit.Byte) for req in cache_reqs: if req.sector_number != non_meta_sector: TestRun.fail( f"Discard request issued to wrong sector: {req.sector_number}, " f"expected: {non_meta_sector}") if req.byte_count != non_meta_size: TestRun.fail( f"Discard request issued with wrong bytes count: {req.byte_count}, " f"expected: {non_meta_size} bytes") cas_fio.read_write(ReadWrite.read) non_cas_fio.read_write(ReadWrite.read) cas_fio.verification_with_pattern("0x00") cas_fio.offset(metadata_size) cas_fio.run() non_cas_fio.run() with TestRun.step("Stopping cache"): cache.stop()