def debug_numa_node(self, pci_addr_heads): """Debug numa_node file by searching it in /sys and call hwloc-ls. Args: pci_addr_heads (list): List of PCI address head. """ for pci_addr_head in pci_addr_heads: self.log.debug("----- Search PCI Addr Head %s in /sys -----", pci_addr_head) run_pcmd(hosts=self.hostlist_servers, command="find /sys -name \"{}\"".format(pci_addr_head), verbose=True) # Another way to obtain the Socket ID is to use hwloc-ls --whole-io # --verbose. It contains something like: # Bridge Host->PCI L#9 (P#2 buses=0000:[80-81]) # Bridge PCI->PCI (P#524320 busid=0000:80:02.0 id=8086:2f04 # class=0604(PCI_B) buses=0000:[81-81]) # PCI 8086:2701 (P#528384 busid=0000:81:00.0 class=0108(NVMExp) # PCISlot=801) # In this case, the PCI address was 0000:81:00.0. We can figure out # which NUMA node section these lines are in. This approach is clearly # much more cumbersome than reading the numa_node, so it's called here # for mainly debugging purpose. self.log.debug("----- Show PCI Address in hwloc-ls -----") pcmd(hosts=self.hostlist_servers, command="hwloc-ls --whole-io --verbose")
def get_device_info(self): """Get the available device names, their numa nodes, and their domains.""" self.interfaces = {} command = "ls -1 {}".format(os.path.join(os.path.sep, "sys", "class", "net")) results = run_pcmd(self.hostlist_servers, command) if len(results) != 1: self.fail("Error obtaining interfaces - non-homogeneous config") try: # Find any ib* device in the listing and initially use default numa and domain values for index, interface in enumerate(re.findall(r"ib\d", "\n".join(results[0]["stdout"]))): self.interfaces[interface] = {"numa": index, "domain": "hfi1_{}".format(index)} except (IndexError, KeyError) as error: self.log.error("Error obtaining interfaces: %s", str(error)) self.fail("Error obtaining interfaces - unexpected error") # Update interface domain and NUMA node settings for Mellanox devices through mst output: # DEVICE_TYPE MST PCI RDMA NET NUMA # ConnectX6(rev:0) NA 86:00.0 mlx5_1 net-ib1 1 # ConnectX6(rev:0) NA 37:00.0 mlx5_0 net-ib0 0 command = "sudo mst status -v" results = run_pcmd(self.hostlist_servers, command) try: if results[0]["exit_status"] == 0: regex = r"(mlx\d_\d)\s+net-(ib\d)\s+(\d)" for match in re.findall(regex, "\n".join(results[0]["stdout"])): self.interfaces[match[1]]["numa"] = int(match[2]) self.interfaces[match[1]]["domain"] = match[0] except (IndexError, KeyError, ValueError) as error: self.log.error("Error obtaining interfaces: %s", str(error)) self.fail("Error obtaining interfaces - unexpected error") if not self.interfaces: self.fail("No ib* interfaces found!")
def test_cpu_usage(self): """ JIRA ID: DAOS-4826 Test Description: Test CPU usage of formatted and idle engine. :avocado: tags=all,full_regression :avocado: tags=server :avocado: tags=cpu_usage """ # Get PID of daos_engine with ps. ps_engine = r"ps -C daos_engine -o %\p" pid_found = False # At this point, daos_engine should be started, but do the repetitive # calls just in case. for _ in range(5): results = run_pcmd(hosts=self.hostlist_servers, command=ps_engine) for result in results: self.log.info("ps output = %s", "\n".join(result["stdout"])) pid = result["stdout"][-1] self.log.info("PID = %s", pid) if "PID" not in pid: pid_found = True if pid_found: break time.sleep(5) if not pid_found: self.fail("daos_engine PID couldn't be obtained!") for _ in range(10): # Get (instantaneous) CPU usage of the PID with top. top_pid = "top -p {} -b -n 1".format(pid) usage = -1 results = run_pcmd(hosts=self.hostlist_servers, command=top_pid) for result in results: process_row = result["stdout"][-1] self.log.info("Process row = %s", process_row) values = process_row.split() self.log.info("Values = %s", values) if len(values) < 9: self.fail("{} returned invalid output!".format(top_pid)) usage = values[8] self.log.info("CPU Usage = %s", usage) if usage != -1 and float(usage) < 100: break time.sleep(2) self.assertTrue(usage != -1, "daos_engine CPU usage couldn't be obtained!") self.assertTrue( float(usage) < 100, "CPU usage is above 100%: {}%".format(usage))
def dump_attachinfo(self): """Run dump-attachinfo on the daos_agent.""" self.manager.job.set_sub_command("dump-attachinfo") self.manager.job.sudo = True self.attachinfo = run_pcmd(self.hosts, str(self.manager.job))[0]["stdout"] self.log.info("Agent attachinfo: %s", self.attachinfo)
def get_current_state(self): """Get the current state of the daos_server ranks. Returns: dict: dictionary of server rank keys, each referencing a dictionary of information containing at least the following information: {"host": <>, "uuid": <>, "state": <>} This will be empty if there was error obtaining the dmg system query output. """ data = {} ranks = {host: rank for rank, host in enumerate(self._hosts)} if not self._verify_socket_dir: command = "systemctl is-active {}".format( self.manager.job.service_name) else: command = "prep {}".format(self.manager.job.command) results = run_pcmd(self._hosts, command, 30) for result in results: for node in result["hosts"]: # expecting single line output from run_pcmd data[ranks[node]] = { "host": node, "uuid": "-", "state": result["stdout"][-1] } return data
def get_cpu_usage(self, pid, usage_limit): """Monitor CPU usage and return if it gets below usage_limit. Args: pid (str): daos_engine PID. usage_limit (int): Limit that we want daos_engine to use. Returns: str: daos_engine CPU usage. """ usage = -1 for _ in range(10): # Get (instantaneous) CPU usage of the PID with top. top_pid = "top -p {} -b -n 1".format(pid) usage = -1 results = run_pcmd(hosts=self.hostlist_servers, command=top_pid) for result in results: process_row = result["stdout"][-1] self.log.info("Process row = %s", process_row) values = process_row.split() self.log.info("Values = %s", values) if len(values) < 9: self.fail("{} returned invalid output!".format(top_pid)) usage = values[8] self.log.info("CPU Usage = %s", usage) if usage != -1 and float(usage) < usage_limit: break time.sleep(2) return usage
def run_metrics_check(self, logging=True, prefix=None): """Monitor telemetry data. Args: self (obj): soak obj logging (bool): If True; output is logged to file prefix (str): add prefix to name; ie initial or final """ enable_telemetry = self.params.get("enable_telemetry", "/run/*") if enable_telemetry: engine_count = self.server_managers[0].get_config_value( "engines_per_host") for engine in range(engine_count): name = "pass" + str(self.loop) + "_metrics_{}.csv".format(engine) if prefix: name = prefix + "_metrics_{}.csv".format(engine) destination = self.outputsoakdir results = run_pcmd( hosts=self.hostlist_servers, command="sudo daos_metrics -S {} --csv".format(engine), verbose=(not logging), timeout=60) if logging: for result in results: hosts = result["hosts"] log_name = name + "-" + str(hosts) write_logfile(result["stdout"], log_name, destination)
def service_running(self): """Determine if the job's service is active via the systemctl command. The 'systemctl is-active <service>' command will return a string indicating one of the following states: active, inactive, activating, deactivating, failed, unknown If the <service> is "active" or "activating" return True. Returns: bool: True id the service is running, False otherwise """ status = True states = {} valid_states = ["active", "activating"] self._systemctl.unit_command.value = "is-active" results = run_pcmd(self._hosts, self.__str__(), False, self.timeout, None) for result in results: if result["interrupted"]: states["timeout"] = result["hosts"] status = False else: output = result["stdout"][-1] if output not in states: states[output] = NodeSet() states[output].add(result["hosts"]) status &= output in valid_states data = ["=".join([key, str(states[key])]) for key in sorted(states)] self.log.info(" Detected %s states: %s", self._systemctl.service.value, ", ".join(data)) return status
def prepare_storage(self, user, using_dcpm=None, using_nvme=None): """Prepare the server storage. Args: user (str): username using_dcpm (bool, optional): override option to prepare scm storage. Defaults to None, which uses the configuration file to determine if scm storage should be formatted. using_nvme (bool, optional): override option to prepare nvme storage. Defaults to None, which uses the configuration file to determine if nvme storage should be formatted. Raises: ServerFailed: if there was an error preparing the storage """ cmd = DaosServerCommand(self.manager.job.command_path) cmd.sudo = False cmd.debug.value = False cmd.set_sub_command("storage") cmd.sub_command_class.set_sub_command("prepare") cmd.sub_command_class.sub_command_class.target_user.value = user cmd.sub_command_class.sub_command_class.force.value = True # Use the configuration file settings if no overrides specified if using_dcpm is None: using_dcpm = self.manager.job.using_dcpm if using_nvme is None: using_nvme = self.manager.job.using_nvme if using_dcpm and not using_nvme: cmd.sub_command_class.sub_command_class.scm_only.value = True elif not using_dcpm and using_nvme: cmd.sub_command_class.sub_command_class.nvme_only.value = True self.log.info("Preparing DAOS server storage: %s", str(cmd)) results = run_pcmd(self._hosts, str(cmd), timeout=self.storage_prepare_timeout.value) # gratuitously lifted from pcmd() and get_current_state() result = {} stdouts = "" for res in results: stdouts += '\n'.join(res["stdout"] + ['']) if res["exit_status"] not in result: result[res["exit_status"]] = NodeSet() result[res["exit_status"]].add(res["hosts"]) if len(result) > 1 or 0 not in result or \ (using_dcpm and "No SCM modules detected; skipping operation" in stdouts): dev_type = "nvme" if using_dcpm and using_nvme: dev_type = "dcpm & nvme" elif using_dcpm: dev_type = "dcpm" pcmd(self._hosts, "sudo -n ipmctl show -v -dimm") pcmd(self._hosts, "ndctl list ") raise ServerFailed("Error preparing {} storage".format(dev_type))
def test_core_files(self): """Test to verify core file creation. This test will send a signal 6 to a random daos_engine process so that it will create a core file, allowing the core file collection code in launch.py to be tested. This test can be run in any CI stage: vm, small, medium, large :avocado: tags=all :avocado: tags=harness,harness_advanced_test,core_files :avocado: tags=test_core_files """ # Choose a server find the pid of its daos_engine process host = choice(self.server_managers[0].hosts) #nosec self.log.info("Obtaining pid of the daos_engine process on %s", host) pid = None result = run_pcmd([host], "pgrep --list-full daos_engine", 20) index = 0 while not pid and index < len(result): output = "\n".join(result[index]["stdout"]) match = findall(r"(\d+)\s+[A-Za-z0-9/]+", output) if match: pid = match[0] index += 1 if pid is None: self.fail( "Error obtaining pid of the daos_engine process on " "{}".format(host)) self.log.info("Found pid %s", pid) # Send a signal 6 to its daos_engine process self.log.info("Sending a signal 6 to %s", pid) result = run_pcmd([host], "sudo kill -6 {}".format(pid)) if len(result) > 1 or result[0]["exit_status"] != 0: self.fail("Error sending a signal 6 to {} on {}".format(pid, host)) # Display the journalctl log for the process that was sent the signal self.server_managers[0].manager.dump_logs([host]) # Simplify resolving the host name to rank by marking all ranks as # expected to be either running or errored (sent a signal 6) self.server_managers[0].update_expected_states( None, ["Joined", "Errored"])
def verify_ssd_sockets(self, storage_dict): """Main test component. Args: storage_dict (dict): Dictionary under "storage" Returns: list: List of errors. """ nvme_devices = storage_dict["nvme_devices"] pci_addr_heads = [] errors = [] # For every PCI address, verify its Socket ID against its NUMA socket # ID. for nvme_device in nvme_devices: cmd_socket_id = nvme_device["socket_id"] # Get the PCI Address Head and construct the path to numa_node. pci_addr = nvme_device["pci_addr"] pci_addr_values = pci_addr.split(":") pci_addr_head = "{}:{}".format(pci_addr_values[0], pci_addr_values[1]) pci_addr_heads.append(pci_addr_head) numa_node_path = "/sys/class/pci_bus/{}/device/numa_node".format( pci_addr_head) # Call cat on the server host, not necessarily the local test host. results = run_pcmd(hosts=[self.hostlist_servers[0]], command="cat {}".format(numa_node_path)) # Obtain the numa_node content. fs_socket_id = "" for result in results: # Test that the content is expected. fs_socket_id = result["stdout"][-1] if fs_socket_id != str(cmd_socket_id): errors.append( "Unexpected socket ID! Cmd: {}; FS: {}".format( cmd_socket_id, fs_socket_id)) if errors: # Since we're dealing with system files and we don't have access to # them in CI, we need some debugging info when the test fails to # better understand the result. self.debug_numa_node(pci_addr_heads) return errors
def test_nvme_io_stats(self): """Jira ID: DAOS-4722. Test Description: Purpose of this test is to run IO test and check when NVME_IO_STATS enabled in config, it generates the different statistics. Use case: Run ior and it will print the NVMe IO stats to control plane log file. :avocado: tags=all,hw,medium,nvme,ib2,nvme_io_stats,full_regression """ # run ior self.run_ior_with_pool() # Get the NVMe IO statistics from server control_log file. cmd = 'cat {}'.format(get_log_file(self.control_log)) results = run_pcmd(self.hostlist_servers, cmd) for result in results: if result["exit_status"] == 1: self.fail("Failed to run cmd {} on {}".format( cmd, result["hosts"])) # Verify statistics are increasing for IO target_stats = [] for _tmp in range(8): target_stats.append([ line for line in result["stdout"] if "tgt[{}]".format(_tmp) in line ]) for stats in NVME_STATS: for _tgt in range(len(target_stats)): first_stats = re.findall(r'\d+', [ x for x in target_stats[_tgt][0].split() if re.search(stats, x) ][0])[0] last_stats = re.findall(r'\d+', [ x for x in target_stats[_tgt][-1].split() if re.search(stats, x) ][0])[0] # Last statistic should be higher than initial statistics if int(first_stats) >= int(last_stats): self.fail( "Failed: Stats {} for target {} did not increased " "First_stat={} < Last_stat={}".format( stats, _tgt, first_stats, last_stats))
def stop_job_managers(self): """Cleanup dfuse in case of test failure.""" error_list = [] dfuse_cleanup_cmd = [ "pkill dfuse --signal KILL", "fusermount3 -uz {}".format(self.dfuse.mount_dir.value) ] for cmd in dfuse_cleanup_cmd: results = run_pcmd(self.hostlist_clients, cmd) for result in results: if result["exit_status"] != 0: error_list.append( "Errors detected during cleanup cmd %s on node %s", cmd, str(result["hosts"])) error_list.extend(super().stop_job_managers()) return error_list
def test_cpu_usage(self): """ JIRA ID: DAOS-4826 Test Description: Test CPU usage of formatted and idle engine. :avocado: tags=all,full_regression :avocado: tags=hw,small :avocado: tags=server,cpu_usage """ # Get PID of daos_engine with ps. ps_engine = r"ps -C daos_engine -o %\p" pid_found = False # At this point, daos_engine should be started, but do the repetitive # calls just in case. for _ in range(5): results = run_pcmd(hosts=self.hostlist_servers, command=ps_engine) for result in results: self.log.info("ps output = %s", "\n".join(result["stdout"])) pid = result["stdout"][-1] self.log.info("PID = %s", pid) if "PID" not in pid: pid_found = True if pid_found: break time.sleep(5) if not pid_found: self.fail("daos_engine PID couldn't be obtained!") # Get and verify CPU usage. usage_limit = self.params.get("usage_limit", '/run/*') usage = self.get_cpu_usage(pid=pid, usage_limit=usage_limit) self.verify_usage(usage=usage, usage_limit=usage_limit) # Create a pool, container, and run IOR. IO will invoke CPU usage by # daos_engine. self.run_ior_with_pool() # Verify that the CPU usage goes down after IO. usage = self.get_cpu_usage(pid=pid, usage_limit=usage_limit) self.verify_usage(usage=usage, usage_limit=usage_limit)
def verify_storage_scam_scm(self, storage_dict): """Main test component. Args: storage_dict (dict): Dictionary under "storage" Returns: list: List of errors. """ errors = [] RC_SUCCESS = 0 for scm_namespace in storage_dict["scm_namespaces"]: # Verify that all namespaces exist under /dev. pmem_name = scm_namespace["blockdev"] lscmd = "{} {}".format("ls", os.path.join("/dev", pmem_name)) # rc is a dictionary where return code is the key. rc = pcmd(hosts=self.hostlist_servers, command=lscmd) if RC_SUCCESS not in rc: errors.append("{} didn't exist under /dev!".format(pmem_name)) # Verify the Socket ID. numa_node_path = "/sys/class/block/{}/device/numa_node".format( pmem_name) command = "cat {}".format(numa_node_path) out_list = run_pcmd(hosts=self.hostlist_servers, command=command) # This one is in str. expected_numa_node = out_list[0]["stdout"][0] actual_numa_node = str(scm_namespace["numa_node"]) if expected_numa_node != actual_numa_node: msg = "Unexpected Socket ID! Expected: {}, Actual: {}".format( expected_numa_node, actual_numa_node) errors.append(msg) return errors
def get_daos_metrics(self, verbose=False, timeout=60): """Get daos_metrics for the server. Args: verbose (bool, optional): pass verbose to run_pcmd. Defaults to False. timeout (int, optional): pass timeout to each execution ofrun_pcmd. Defaults to 60. Returns: list: list of pcmd results for each host. See general_utils.run_pcmd for details. [ general_utils.run_pcmd(), # engine 0 general_utils.run_pcmd() # engine 1 ] """ engines_per_host = self.get_config_value("engines_per_host") or 1 engines = [] daos_metrics_exe = os.path.join(self.manager.job.command_path, "daos_metrics") for engine in range(engines_per_host): results = run_pcmd( hosts=self._hosts, verbose=verbose, timeout=timeout, command="sudo {} -S {} --csv".format(daos_metrics_exe, engine)) engines.append(results) return engines
def get_log_data(self, hosts, since, until=None, timeout=60): """Gather log output for the command running on each host. Note (from journalctl man page): Date specifications should be of the format "2012-10-30 18:17:16". If the time part is omitted, "00:00:00" is assumed. If only the seconds component is omitted, ":00" is assumed. If the date component is omitted, the current day is assumed. Alternatively the strings "yesterday", "today", "tomorrow" are understood, which refer to 00:00:00 of the day before the current day, the current day, or the day after the current day, respectively. "now" refers to the current time. Finally, relative times may be specified, prefixed with "-" or "+", referring to times before or after the current time, respectively. Args: hosts (list): list of hosts from which to gather log data. since (str): show log entries from this date. until (str, optional): show log entries up to this date. Defaults to None, in which case it is not utilized. timeout (int, optional): timeout for issuing the command. Defaults to 60 seconds. Returns: list: a list of dictionaries including: "hosts": <NodeSet() of hosts with this data> "data": <journalctl output> """ # Setup the journalctl command to capture all unit activity from the # specified start date to now or a specified end date # --output=json? command = self.get_journalctl_command(since, until) self.log.info("Gathering log data on %s: %s", str(hosts), command) # Gather the log information per host results = run_pcmd(hosts, command, False, timeout, None) # Determine if the command completed successfully without a timeout status = True for result in results: if result["interrupted"]: self.log.info(" Errors detected running \"%s\":", command) self.log.info(" %s: timeout detected after %s seconds", str(result["hosts"]), timeout) status = False elif result["exit_status"] != 0: self.log.info(" Errors detected running \"%s\":", command) status = False if not status: break # Display/return the command output log_data = [] for result in results: if result["exit_status"] == 0 and not result["interrupted"]: # Add the successful output from each node to the dictionary log_data.append({ "hosts": result["hosts"], "data": result["stdout"] }) else: # Display all of the results in the case of an error if len(result["stdout"]) > 1: self.log.info(" %s: rc=%s, output:", str(result["hosts"]), result["exit_status"]) for line in result["stdout"]: self.log.info(" %s", line) else: self.log.info(" %s: rc=%s, output: %s", str(result["hosts"]), result["exit_status"], result["stdout"][0]) # Report any errors through an exception if not status: raise CommandFailure( "Error(s) detected gathering {} log data on {}".format( self._systemctl.service.value, NodeSet.fromlist(hosts))) # Return the successful command output per set of hosts return log_data
def test_version(self): """Verify version number for dmg, daos, daos_server, and daos_agent against RPM. :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=control :avocado: tags=version_number """ errors = [] # Get RPM version. rpm_command = "rpm -qa|grep daos-server" output = run_pcmd(hosts=self.hostlist_servers, command=rpm_command) self.log.info("RPM output = %s", output) stdout = output[0]["stdout"][0] self.log.info("RPM stdout = %s", stdout) result = re.findall(r"daos-server-[tests-|tests_openmpi-]*([\d.]+)", stdout) if not result: errors.append("RPM version is not in the output! {}".format(output)) else: rpm_version = result[0] self.log.info("RPM version = %s", rpm_version) # Get dmg version. dmg_cmd = self.get_dmg_command() output = dmg_cmd.version().stdout.decode("utf-8") # Verify that "dmg version" is in the output. if "dmg version" not in output: errors.append("dmg version is not in the output! {}".format(output)) result = re.findall(r"dmg version ([\d.]+)", output) if not result: errors.append("Failed to obtain dmg version! {}".format(output)) else: dmg_version = result[0] self.log.info("dmg version = %s", dmg_version) # Get daos version. daos_cmd = self.get_daos_command() output = daos_cmd.version().stdout.decode("utf-8") # Verify that "daos version" is in the output. if "daos version" not in output: errors.append("daos version is not in the output! {}".format(output)) result = re.findall(r"daos version ([\d.]+)", output) if not result: errors.append("Failed to obtain daos version! {}".format(output)) else: daos_version = result[0] self.log.info("daos version = %s", daos_version) # Get daos_agent version. daos_agent_cmd = "daos_agent version" output = run_pcmd(hosts=self.hostlist_servers, command=daos_agent_cmd) stdout = output[0]["stdout"][0] # Verify that "DAOS Agent" is in the output. if "DAOS Agent" not in stdout: errors.append("DAOS Agent is not in the output! {}".format(stdout)) result = re.findall(r"DAOS Agent v([\d.]+)", stdout) if not result: errors.append("Failed to obtain daos_agent version! {}".format(output)) else: daos_agent_version = result[0] self.log.info("daos_agent version = %s", daos_agent_version) # Get daos_server version daos_server_cmd = "daos_server version" output = run_pcmd(hosts=self.hostlist_servers, command=daos_server_cmd) stdout = output[0]["stdout"][0] # Verify that "DAOS Control Server" is in the output. if "DAOS Control Server" not in stdout: errors.append("DAOS Control Server is not in the output! {}".format(stdout)) result = re.findall(r"DAOS Control Server v([\d.]+)", stdout) if not result: errors.append("Failed to obtain daos_server version! {}".format(output)) else: daos_server_version = result[0] self.log.info("daos_server version = %s", daos_server_version) # Verify the tool versions against the RPM. tool_versions = [ ("dmg", dmg_version), ("daos", daos_version), ("daos_agent", daos_agent_version), ("daos_server", daos_server_version) ] for tool_version in tool_versions: tool = tool_version[0] version = tool_version[1] if version != rpm_version: msg = "Unexpected version! {} = {}, RPM = {}".format( tool, version, rpm_version) errors.append(msg) if errors: self.fail("\n---- Errors detected! ----\n{}".format("\n".join(errors)))
def test_ec_truncate(self): """Jira ID: DAOS-7328. Test Description: Verify the truncate on EC object class works fine over fuse. Use Cases: Create the container with EC class Create the data file with verify pattern over Fuse Truncate the file and increase the size Verify the data content and file size Truncate the file and reduce the size to original Verify the data content and file size :avocado: tags=all,full_regression :avocado: tags=hw,large,ib2 :avocado: tags=ec,ec_fio,ec_posix :avocado: tags=ec_truncate """ truncate_size = int(self.params.get("truncate_size", '/run/fio/*')) fname = self.params.get("names", '/run/fio/*') # Write the file using Fio self.execute_fio(stop_dfuse=False) # Get the fuse file name. testfile = "{}.0.0".format( os.path.join(self.dfuse.mount_dir.value, fname[0])) original_fs = int(self.fio_cmd._jobs['test'].size.value) # Read and verify the original data. self.fio_cmd._jobs['test'].rw = 'read' self.fio_cmd.run() # Get the file stats and confirm size file_size = get_remote_file_size(self.hostlist_clients[0], testfile) self.assertEqual(original_fs, file_size) # Truncate the original file which will extend the size of file. result = run_pcmd(self.hostlist_clients, "truncate -s {} {}".format(truncate_size, testfile)) if result[0]["exit_status"] == 1: self.fail("Failed to truncate file {}".format(testfile)) # Verify the file size is extended. file_size = get_remote_file_size(self.hostlist_clients[0], testfile) self.assertEqual(truncate_size, file_size) # Read and verify the data after truncate. self.fio_cmd.run() # Truncate the original file and shrink to original size. result = run_pcmd(self.hostlist_clients, "truncate -s {} {}".format(original_fs, testfile)) if result[0]["exit_status"] == 1: self.fail("Failed to truncate file {}".format(testfile)) # Verify the file size is shrink to original. file_size = get_remote_file_size(self.hostlist_clients[0], testfile) self.assertEqual(original_fs, file_size) # Read and verify the data after truncate. self.fio_cmd.run()
def test_stat_parameters(self): """JIRA ID: DAOS-3769 Create files of 1M, 10M, 100M, 500M, and verify the size and creation time. :avocado: tags=all,full_regression :avocado: tags=small :avocado: tags=dfuse :avocado: tags=stat_parameters """ block_sizes = self.params.get("block_sizes", "/run/*") error_list = [] self.add_pool(connect=False) self.add_container(pool=self.pool) i = 1 for block_size in block_sizes: self.log.info("Block Size = %s", block_size) self.ior_cmd.block_size.update(block_size) # 1. Verify creation time. test_file_suffix = "_{}".format(i) i += 1 # Run ior command. try: self.run_ior_with_pool(timeout=200, stop_dfuse=False, create_pool=False, create_cont=False, test_file_suffix=test_file_suffix) except TestFail: self.log.info("ior command failed!") # Get current epoch. current_epoch = -1 output = run_pcmd(hosts=self.hostlist_clients, command="date +%s") stdout = output[0]["stdout"] self.log.info("date stdout = %s", stdout) current_epoch = stdout[-1] # Get epoch of the created file. (technically %Z is for last status # change. %W is file birth, but it returns 0.) creation_epoch = -1 # As in date command, run stat command in the client node. stat_command = "stat -c%Z {}".format(self.ior_cmd.test_file.value) output = run_pcmd(hosts=self.hostlist_clients, command=stat_command) stdout = output[0]["stdout"] self.log.info("stat stdout = %s", stdout) creation_epoch = stdout[-1] # Calculate the epoch difference between the creation time and the # value in the file metadata. They're usually 2 sec apart. creation_epoch_int = int(creation_epoch) current_epoch_int = int(current_epoch) diff_epoch = creation_epoch_int - current_epoch_int if diff_epoch > 10: msg = "Unexpected creation time! Expected = {}; Actual = {}" error_list.append( msg.format(current_epoch_int, creation_epoch_int)) # 2. Verify file size. # Get file size. file_size = get_remote_file_size(self.hostlist_clients[0], self.ior_cmd.test_file.value) # Adjust the file size and verify that it matches the expected size. expected_size = block_size[:-1] # Obtained size is in byte, so convert it to MB. file_size_adjusted = file_size / 1024 / 1024 if int(expected_size) != file_size_adjusted: msg = "Unexpected file size! Expected = {}; Actual = {}" error_list.append( msg.format(int(expected_size), file_size_adjusted)) if error_list: self.fail("\n----- Errors detected! -----\n{}".format( "\n".join(error_list)))