def run_conf(self, dmg_config_file): """Run the daos_run_io_conf command as a foreground process. Args: dmg_config_file: dmg file to run test. Return: Result bool: True if command success and false if any error. """ success_msg = 'daos_run_io_conf completed successfully' command = " ".join([os.path.join(self._path, "daos_run_io_conf"), " -n ", dmg_config_file, self.filename.value]) manager = Orterun(command, mpi_type=self.mpi_type) # run daos_run_io_conf Command using Openmpi try: out = manager.run() # Return False if "ERROR" in stdout for line in out.stdout_text.splitlines(): if 'ERROR' in line: return False # Return False if not expected message to confirm test completed. if success_msg not in out.stdout_text.splitlines()[-1]: return False # Return False if Command failed. except CommandFailure: return False return True
def run_daos_perf(self): """Run the daos_perf command.""" # Obtain the number of processes listed with the daos_perf options processes = self.params.get("processes", "/run/daos_perf/*") # Create the daos_perf command from the test yaml file daos_perf = DaosPerfCommand(self.bin) daos_perf.get_params(self) self.log.info("daos_perf command: %s", str(daos_perf)) daos_perf_env = daos_perf.get_environment(self.server_managers[0]) # Create the orterun command orterun = Orterun(daos_perf) orterun.assign_hosts(self.hostlist_clients, self.workdir, None) orterun.assign_processes(processes) orterun.assign_environment(daos_perf_env) self.log.info("orterun command: %s", str(orterun)) # Run the daos_perf command and check for errors result = orterun.run() errors = re.findall( r"(.*(?:non-zero exit code|errors|failed|Failed).*)", result.stdout_text) if errors: self.fail("Errors detected in daos_perf output:\n{}".format( " \n".join(errors)))
def run_conf(self): """Run the daos_run_io_conf command as a foreground process. Raises: None """ command = " ".join([os.path.join(self._path, "daos_run_io_conf"), self.filename.value]) manager = Orterun(command) # run daos_run_io_conf Command using Openmpi manager.run()
def test_self_test(self): """Run a few CaRT self-test scenarios. :avocado: tags=all,pr,smoke,unittest,tiny,cartselftest """ # Setup the orterun command orterun = Orterun(SelfTest(self.bin)) orterun.map_by.update(None, "orterun/map_by") orterun.enable_recovery.update(False, "orterun/enable_recovery") # Get the self_test command line parameters orterun.job.get_params(self) orterun.job.group_name.update(self.server_group, "group_name") orterun.job.message_sizes.update( self.params.get("size", "/run/muxtestparams/message_size/*")[0], "message_sizes") orterun.job.attach_info.update(os.path.dirname(self.uri_file), "attach_info") # Setup the environment variables for the self_test orterun command orterun.assign_environment(self.cart_env) # Run the test try: orterun.run() except CommandFailure as error: self.test_log.info("CaRT self_test returned non-zero: %s", str(error)) self.fail("CaRT self_test returned non-zero")
def test_self_test(self): """Run a few CaRT self-test scenarios. :avocado: tags=all,smoke,unittest,tiny,cartselftest """ # Setup the orterun command orterun = Orterun(SelfTest(self.cart_bin)) orterun.ompi_server.update("file:{}".format(self.uri_file), "orterun/ompi_server") orterun.map_by.update(None, "orterun/map_by") orterun.enable_recovery.update(False, "orterun/enable_recovery") # Get the self_test command line parameters orterun.job.get_params(self) orterun.job.group_name.value = self.server_group # Setup the environment variables for the self_test orterun command orterun.assign_environment(self.cart_env) # Run the test try: orterun.run() except CommandFailure as error: self.test_log.info("CaRT self_test returned non-zero: %s", str(error)) self.fail("CaRT self_test returned non-zero")
def get_mdtest_job_manager_command(self, manager): """Get the MPI job manager command for Mdtest. Returns: JobManager: the object for the mpi job manager command """ # Initialize MpioUtils if mdtest needs to be run using mpich if manager == "MPICH": mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") return Mpirun(self.mdtest_cmd, mpitype="mpich") return Orterun(self.mdtest_cmd)
def test_load_mpi(self): """Simple test of apricot test code to load the openmpi module. :avocado: tags=all :avocado: tags=harness,harness_basic_test,test_load_mpi :avocado: tags=load_mpi """ try: Orterun(None) except CommandFailure as error: self.fail("Orterun initialization failed: {}".format(error)) try: Mpirun(None, mpi_type="mpich") except CommandFailure as error: self.fail("Mpirun initialization failed: {}".format(error))
def test_metadata_server_restart(self): """JIRA ID: DAOS-1512. Test Description: This test will verify 2000 IOR small size container after server restart. Test will write IOR in 5 different threads for faster execution time. Each thread will create 400 (8bytes) containers to the same pool. Restart the servers, read IOR container file written previously and validate data integrity by using IOR option "-R -G 1". Use Cases: ? :avocado: tags=metadata,metadata_ior,nvme,large """ files_per_thread = 400 total_ior_threads = 5 self.out_queue = queue.Queue() processes = self.params.get("slots", "/run/ior/clientslots/*") list_of_uuid_lists = [ [str(uuid.uuid4()) for _ in range(files_per_thread)] for _ in range(total_ior_threads)] # Launch threads to run IOR to write data, restart the agents and # servers, and then run IOR to read the data for operation in ("write", "read"): # Create the IOR threads threads = [] for index in range(total_ior_threads): # Define the arguments for the ior_runner_thread method ior_cmd = IorCommand() ior_cmd.get_params(self) ior_cmd.set_daos_params(self.server_group, self.pool) ior_cmd.flags.value = self.params.get( "F", "/run/ior/ior{}flags/".format(operation)) # Define the job manager for the IOR command manager = Orterun(ior_cmd) env = ior_cmd.get_default_env(str(manager)) manager.assign_hosts(self.hostlist_clients, self.workdir, None) manager.assign_processes(processes) manager.assign_environment(env) # Add a thread for these IOR arguments threads.append( threading.Thread( target=ior_runner_thread, kwargs={ "manager": manager, "uuids": list_of_uuid_lists[index], "results": self.out_queue})) self.log.info( "Creatied %s thread %s with container uuids %s", operation, index, list_of_uuid_lists[index]) # Launch the IOR threads if self.thread_control(threads, operation) == "FAIL": self.d_log.error("IOR {} Thread FAIL".format(operation)) self.fail("IOR {} Thread FAIL".format(operation)) # Restart the agents and servers after the write / before the read if operation == "write": # Stop the agents errors = self.stop_agents() self.assertEqual( len(errors), 0, "Error stopping agents:\n {}".format("\n ".join(errors))) # Stop the servers errors = self.stop_servers() self.assertEqual( len(errors), 0, "Error stopping servers:\n {}".format("\n ".join(errors))) # Start the agents self.start_agent_managers() # Start the servers self.start_server_managers()
def test_metadata_server_restart(self): """JIRA ID: DAOS-1512. Test Description: This test will verify 2000 IOR small size container after server restart. Test will write IOR in 5 different threads for faster execution time. Each thread will create 400 (8bytes) containers to the same pool. Restart the servers, read IOR container file written previously and validate data integrity by using IOR option "-R -G 1". Use Cases: ? :avocado: tags=all,full_regression :avocado: tags=hw,large :avocado: tags=server,metadata,metadata_ior,nvme """ self.create_pool() files_per_thread = 400 total_ior_threads = 5 processes = self.params.get("slots", "/run/ior/clientslots/*") list_of_uuid_lists = [[ str(uuid.uuid4()) for _ in range(files_per_thread) ] for _ in range(total_ior_threads)] # Setup the thread manager thread_manager = ThreadManager(run_ior_loop, self.timeout - 30) # Launch threads to run IOR to write data, restart the agents and # servers, and then run IOR to read the data for operation in ("write", "read"): # Create the IOR threads for index in range(total_ior_threads): # Define the arguments for the run_ior_loop method ior_cmd = IorCommand() ior_cmd.get_params(self) ior_cmd.set_daos_params(self.server_group, self.pool) ior_cmd.flags.value = self.params.get( "F", "/run/ior/ior{}flags/".format(operation)) # Define the job manager for the IOR command self.ior_managers.append(Orterun(ior_cmd)) env = ior_cmd.get_default_env(str(self.ior_managers[-1])) self.ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None) self.ior_managers[-1].assign_processes(processes) self.ior_managers[-1].assign_environment(env) self.ior_managers[-1].verbose = False # Add a thread for these IOR arguments thread_manager.add(manager=self.ior_managers[-1], uuids=list_of_uuid_lists[index], tmpdir_base=self.test_dir) self.log.info("Created %s thread %s with container uuids %s", operation, index, list_of_uuid_lists[index]) # Launch the IOR threads self.log.info("Launching %d IOR %s threads", thread_manager.qty, operation) failed_thread_count = thread_manager.check_run() if failed_thread_count > 0: msg = "{} FAILED IOR {} Thread(s)".format( failed_thread_count, operation) self.d_log.error(msg) self.fail(msg) # Restart the agents and servers after the write / before the read if operation == "write": # Stop the agents errors = self.stop_agents() self.assertEqual( len(errors), 0, "Error stopping agents:\n {}".format("\n ".join(errors))) # Restart the servers w/o formatting the storage errors = self.restart_servers() self.assertEqual( len(errors), 0, "Error stopping servers:\n {}".format( "\n ".join(errors))) # Start the agents self.start_agent_managers() self.log.info("Test passed")
def build_cmd(self, env, host, **kwargs): """Build a command string.""" env_CCSA = self.params.get("env", "/run/env_CRT_CTX_SHARE_ADDR/*/") test_name = self.params.get("name", "/run/tests/*/") # Write memcheck result file(s) to $HOME or DAOS_TEST_SHARED_DIR. daos_test_shared_dir = os.getenv('DAOS_TEST_SHARED_DIR', os.getenv('HOME')) if env_CCSA is None: env_CCSA = "" f = r"{}/valgrind.%q\{{PMIX_ID\}}_{}-{}.memcheck" memcheck_xml = f.format(daos_test_shared_dir, test_name, env_CCSA) tst_cmd = "" tst_cont = None index = kwargs.get('index', None) daos_test_shared_dir = os.getenv('DAOS_TEST_SHARED_DIR', os.getenv('HOME')) # Return 0 on memory leaks while suppresion file is completed # (CART-975 and CART-977) memcheck_error_code = 0 tst_vgd = " valgrind --xml=yes " + \ "--xml-file={}".format(memcheck_xml) + " " + \ "--fair-sched=yes --partial-loads-ok=yes " + \ "--leak-check=full --show-leak-kinds=all " + \ " --gen-suppressions=all " + \ "--suppressions=" + self.supp_file + " " + \ "--track-origins=yes " + \ "--error-exitcode=" + str(memcheck_error_code) + " " \ "--show-reachable=yes --trace-children=yes" _tst_bin = self.params.get("{}_bin".format(host), "/run/tests/*/") _tst_arg = self.params.get("{}_arg".format(host), "/run/tests/*/") _tst_env = self.params.get("{}_env".format(host), "/run/tests/*/") _tst_slt = self.params.get("{}_slt".format(host), "/run/tests/*/") _tst_ctx = "16" if "{}_CRT_CTX_NUM".format(host) in os.environ: _tst_ctx = os.environ["{}_CRT_CTX_NUM".format(host)] # If the yaml parameter is a list, return the n-th element tst_bin = self.get_yaml_list_elem(_tst_bin, index) tst_arg = self.get_yaml_list_elem(_tst_arg, index) tst_env = self.get_yaml_list_elem(_tst_env, index) tst_slt = self.get_yaml_list_elem(_tst_slt, index) tst_ctx = self.get_yaml_list_elem(_tst_ctx, index) tst_host = self.params.get("{}".format(host), "/run/hosts/*/") tst_ppn = self.params.get("{}_ppn".format(host), "/run/tests/*/") tst_processes = len(tst_host) * int(tst_ppn) logparse = self.params.get("logparse", "/run/tests/*/") if tst_slt is not None: hostfile = write_host_file(tst_host, daos_test_shared_dir, tst_slt) else: hostfile = write_host_file(tst_host, daos_test_shared_dir, tst_ppn) mca_flags = ["btl self,tcp"] if self.provider == "ofi+psm2": mca_flags.append("pml ob1") tst_cmd = env tst_cont = os.getenv("CRT_TEST_CONT", "0") if tst_cont is not None: if tst_cont == "1": tst_cmd += " --continuous" if tst_ctx is not None: tst_cmd += " -x CRT_CTX_NUM=" + tst_ctx if tst_env is not None: tst_cmd += " " + tst_env if logparse: tst_cmd += " -x D_LOG_FILE_APPEND_PID=1" tst_mod = os.getenv("WITH_VALGRIND", "native") if tst_mod == "memcheck": tst_cmd += tst_vgd if tst_bin is not None: tst_cmd += " " + tst_bin if tst_arg is not None: tst_cmd += " " + tst_arg job = Orterun(tst_cmd) job.mca.update(mca_flags) job.hostfile.update(hostfile) job.pprnode.update(tst_ppn) job.processes.update(tst_processes) return str(job)
def run_test(self): """Run the HDF5 VOL testsuites. Raises: VolFailed: for an invalid test name or test execution failure """ # initialize test specific variables mpi_type = self.params.get("mpi_type", default="mpich") test_repo = self.params.get("daos_vol_repo") plugin_path = self.params.get("plugin_path") # test_list = self.params.get("daos_vol_tests", default=[]) testname = self.params.get("testname") client_processes = self.params.get("client_processes") # create pool, container and dfuse mount self.add_pool(connect=False) self.add_container(self.pool) # VOL needs to run from a file system that supports xattr. # Currently nfs does not have this attribute so it was recommended # to create a dfuse dir and run vol tests from there. # create dfuse container self.start_dfuse(self.hostlist_clients, self.pool, self.container) # for test_param in test_list: # testname = test_param[0][1] # client_processes = test_param[1][1] exe = os.path.join(test_repo, testname) if mpi_type == "openmpi": manager = Orterun(exe, subprocess=False) else: manager = Mpirun(exe, subprocess=False, mpitype="mpich") env = EnvironmentVariables() env["DAOS_POOL"] = "{}".format(self.pool.uuid) env["DAOS_SVCL"] = "{}".format(self.pool.svc_ranks[0]) env["DAOS_CONT"] = "{}".format(self.container.uuid) env["HDF5_VOL_CONNECTOR"] = "daos" env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path) manager.assign_hosts(self.hostlist_clients) manager.assign_processes(client_processes) manager.assign_environment(env, True) manager.working_dir.value = self.dfuse.mount_dir.value # run VOL Command try: manager.run() except CommandFailure as _error: self.fail("{} FAILED> \nException occurred: {}".format( exe, str(_error)))