class DfuseTestBase(TestWithServers): """Runs HDF5 vol test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super(DfuseTestBase, self).__init__(*args, **kwargs) self.dfuse = None def stop_job_managers(self): """Stop the test job manager followed by dfuse. Returns: list: a list of exceptions raised stopping the agents """ error_list = super(DfuseTestBase, self).stop_job_managers() try: self.stop_dfuse() except CommandFailure as error: error_list.append("Error stopping dfuse: {}".format(error)) return error_list def start_dfuse(self, hosts, pool=None, container=None, mount_dir=None): """Create a DfuseCommand object and use it to start Dfuse. Args: hosts (list): list of hosts on which to start Dfuse pool (TestPool, optional): pool to use with Dfuse container (TestContainer, optional): container to use with Dfuse mount_dir (str, optional): updated mount dir name. Defaults to None. """ self.dfuse = Dfuse(hosts, self.tmp) self.dfuse.get_params(self) # Update dfuse params if mount_dir: self.dfuse.mount_dir.update(mount_dir) if pool: self.dfuse.set_dfuse_params(pool) if container: self.dfuse.set_dfuse_cont_param(container) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # Start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), str(NodeSet.fromlist(self.dfuse.hosts)), exc_info=error) self.fail("Test was expected to pass but it failed.") def stop_dfuse(self): """Stop Dfuse and unset the DfuseCommand object.""" if self.dfuse: self.dfuse.stop() self.dfuse = None
class DfuseTestBase(TestWithServers): """Runs HDF5 vol test suites. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super(DfuseTestBase, self).__init__(*args, **kwargs) self.dfuse = None def tearDown(self): """Tear down each test case.""" try: self.stop_dfuse() finally: # Stop the servers and agents super(DfuseTestBase, self).tearDown() def start_dfuse(self, hosts, pool, container): """Create a DfuseCommand object and use it to start Dfuse. Args: hosts (list): list of hosts on which to start Dfuse pool (TestPool): pool to use with Dfuse container (TestContainer): container to use with Dfuse """ self.dfuse = Dfuse(hosts, self.tmp) self.dfuse.get_params(self) # Update dfuse params self.dfuse.set_dfuse_params(pool) self.dfuse.set_dfuse_cont_param(container) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # Start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), str(NodeSet.fromlist(self.dfuse.hosts)), exc_info=error) self.fail("Test was expected to pass but it failed.") def stop_dfuse(self): """Stop Dfuse and unset the DfuseCommand object.""" if self.dfuse: self.dfuse.stop() self.dfuse = None
class IorTestBase(TestWithServers): """Base IOR test class. :avocado: recursive """ IOR_WRITE_PATTERN = "Commencing write performance test" IOR_READ_PATTERN = "Commencing read performance test" def __init__(self, *args, **kwargs): """Initialize a IorTestBase object.""" super(IorTestBase, self).__init__(*args, **kwargs) self.ior_cmd = None self.processes = None self.hostfile_clients_slots = None self.dfuse = None self.container = None self.lock = None self.mpirun = None def setUp(self): """Set up each test case.""" # obtain separate logs self.update_log_file_names() # Start the servers and agents super(IorTestBase, self).setUp() # Get the parameters for IOR self.ior_cmd = IorCommand() self.ior_cmd.get_params(self) self.processes = self.params.get("np", '/run/ior/client_processes/*') self.subprocess = self.params.get("subprocess", '/run/ior/*', False) # lock is needed for run_multiple_ior method. self.lock = threading.Lock() def tearDown(self): """Tear down each test case.""" try: if self.dfuse: self.dfuse.stop() finally: # Stop the servers and agents super(IorTestBase, self).tearDown() def create_pool(self): """Create a TestPool object to use with ior.""" # Get the pool params self.pool = TestPool( self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) # Create a pool self.pool.create() def create_cont(self): """Create a TestContainer object to be used to create container.""" # Get container params self.container = TestContainer( self.pool, daos_command=DaosCommand(self.bin)) self.container.get_params(self) # create container self.container.create() def _start_dfuse(self): """Create a DfuseCommand object to start dfuse.""" # Get Dfuse params self.dfuse = Dfuse(self.hostlist_clients, self.tmp) self.dfuse.get_params(self) # update dfuse params self.dfuse.set_dfuse_params(self.pool) self.dfuse.set_dfuse_cont_param(self.container) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), str(NodeSet.fromlist(self.dfuse.hosts)), exc_info=error) self.fail("Test was expected to pass but it failed.\n") def run_ior_with_pool(self, intercept=None, test_file_suffix="", test_file="daos:testFile", create_pool=True, create_cont=True, stop_dfuse=True): """Execute ior with optional overrides for ior flags and object_class. If specified the ior flags and ior daos object class parameters will override the values read from the yaml file. Args: intercept (str, optional): path to the interception library. Shall be used only for POSIX through DFUSE. Defaults to None. test_file_suffix (str, optional): suffix to add to the end of the test file name. Defaults to "". test_file (str, optional): ior test file name. Defaults to "daos:testFile". Is ignored when using POSIX through DFUSE. create_pool (bool, optional): If it is true, create pool and container else just run the ior. Defaults to True. create_cont (bool, optional): Create new container. Default is True stop_dfuse (bool, optional): Stop dfuse after ior command is finished. Default is True. Returns: CmdResult: result of the ior command execution """ if create_pool: self.update_ior_cmd_with_pool(create_cont) # start dfuse if api is POSIX if self.ior_cmd.api.value == "POSIX": # Connect to the pool, create container and then start dfuse if not self.dfuse: self._start_dfuse() test_file = os.path.join(self.dfuse.mount_dir.value, "testfile") elif self.ior_cmd.api.value == "DFS": test_file = os.path.join("/", "testfile") self.ior_cmd.test_file.update("".join([test_file, test_file_suffix])) out = self.run_ior(self.get_ior_job_manager_command(), self.processes, intercept) if stop_dfuse and self.dfuse: self.dfuse.stop() self.dfuse = None return out def update_ior_cmd_with_pool(self, create_cont=True): """Update ior_cmd with pool.""" # Create a pool if one does not already exist if self.pool is None: self.create_pool() # Create a container, if needed. # Don't pass uuid and pool handle to IOR. # It will not enable checksum feature if create_cont: self.pool.connect() self.create_cont() # Update IOR params with the pool and container params self.ior_cmd.set_daos_params(self.server_group, self.pool, self.container.uuid) def get_ior_job_manager_command(self): """Get the MPI job manager command for IOR. Returns: str: the path for the mpi job manager command """ # Initialize MpioUtils if IOR is running in MPIIO or DFS mode if self.ior_cmd.api.value in ["MPIIO", "POSIX", "DFS"]: mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") else: self.fail("Unsupported IOR API") if self.subprocess: self.mpirun = Mpirun(self.ior_cmd, True, mpitype="mpich") else: self.mpirun = Mpirun(self.ior_cmd, mpitype="mpich") return self.mpirun def check_subprocess_status(self, operation="write"): """Check subprocess status """ if operation == "write": self.ior_cmd.pattern = self.IOR_WRITE_PATTERN elif operation == "read": self.ior_cmd.pattern = self.IOR_READ_PATTERN else: self.fail("Exiting Test: Inappropriate operation type \ for subprocess status check") if not self.ior_cmd.check_ior_subprocess_status( self.mpirun.process, self.ior_cmd): self.fail("Exiting Test: Subprocess not running") def run_ior(self, manager, processes, intercept=None, display_space=True): """Run the IOR command. Args: manager (str): mpi job manager command processes (int): number of host processes intercept (str): path to interception library. """ env = self.ior_cmd.get_default_env(str(manager), self.client_log) if intercept: env["LD_PRELOAD"] = intercept manager.assign_hosts( self.hostlist_clients, self.workdir, self.hostfile_clients_slots) manager.assign_processes(processes) manager.assign_environment(env) try: if display_space: self.pool.display_pool_daos_space() out = manager.run() if not self.subprocess: for line in out.stdout.splitlines(): if 'WARNING' in line: self.fail("IOR command issued warnings.\n") return out except CommandFailure as error: self.log.error("IOR Failed: %s", str(error)) self.fail("Test was expected to pass but it failed.\n") finally: if not self.subprocess and display_space: self.pool.display_pool_daos_space() def stop_ior(self): """Stop IOR process. Args: manager (str): mpi job manager command """ self.log.info( "<IOR> Stopping in-progress IOR command: %s", self.mpirun.__str__()) try: out = self.mpirun.stop() return out except CommandFailure as error: self.log.error("IOR stop Failed: %s", str(error)) self.fail("Test was expected to pass but it failed.\n") finally: self.pool.display_pool_daos_space() def run_multiple_ior_with_pool(self, results, intercept=None): """Execute ior with optional overrides for ior flags and object_class. If specified the ior flags and ior daos object class parameters will override the values read from the yaml file. Args: intercept (str): path to the interception library. Shall be used only for POSIX through DFUSE. ior_flags (str, optional): ior flags. Defaults to None. object_class (str, optional): daos object class. Defaults to None. """ self.update_ior_cmd_with_pool() # start dfuse for POSIX api. This is specific to interception # library test requirements. self._start_dfuse() # Create two jobs and run in parallel. # Job1 will have 3 client set up to use dfuse + interception # library # Job2 will have 1 client set up to use only dfuse. job1 = self.get_new_job(self.hostlist_clients[:-1], 1, results, intercept) job2 = self.get_new_job([self.hostlist_clients[-1]], 2, results, None) job1.start() # Since same ior_cmd is used to trigger the MPIRUN # with different parameters, pausing for 2 seconds to # avoid data collisions. time.sleep(2) job2.start() job1.join() job2.join() self.dfuse.stop() self.dfuse = None def get_new_job(self, clients, job_num, results, intercept=None): """Create a new thread for ior run. Args: clients (list): hosts on which to run ior job_num (int): Assigned job number results (dict): A dictionary object to store the ior metrics intercept (path): Path to interception library """ job = threading.Thread(target=self.run_multiple_ior, args=[ clients, results, job_num, intercept]) return job def run_multiple_ior(self, clients, results, job_num, intercept=None): """Run the IOR command. Args: clients (list): hosts on which to run ior results (dict): A dictionary object to store the ior metrics job_num (int): Assigned job number intercept (str, optional): path to interception library. Defaults to None. """ self.lock.acquire(True) tsize = self.ior_cmd.transfer_size.value testfile = os.path.join(self.dfuse.mount_dir.value, "testfile{}{}".format(tsize, job_num)) if intercept: testfile += "intercept" self.ior_cmd.test_file.update(testfile) manager = self.get_ior_job_manager_command() procs = (self.processes // len(self.hostlist_clients)) * len(clients) env = self.ior_cmd.get_default_env(str(manager), self.client_log) if intercept: env["LD_PRELOAD"] = intercept manager.assign_hosts(clients, self.workdir, self.hostfile_clients_slots) manager.assign_processes(procs) manager.assign_environment(env) self.lock.release() try: self.pool.display_pool_daos_space() out = manager.run() self.lock.acquire(True) results[job_num] = IorCommand.get_ior_metrics(out) self.lock.release() except CommandFailure as error: self.log.error("IOR Failed: %s", str(error)) self.fail("Test was expected to pass but it failed.\n") finally: self.pool.display_pool_daos_space() def verify_pool_size(self, original_pool_info, processes): """Validate the pool size. Args: original_pool_info (PoolInfo): Pool info prior to IOR processes (int): number of processes """ # Get the current pool size for comparison current_pool_info = self.pool.pool.pool_query() # If Transfer size is < 4K, Pool size will verified against NVMe, else # it will be checked against SCM if self.ior_cmd.transfer_size.value >= 4096: self.log.info( "Size is > 4K,Size verification will be done with NVMe size") storage_index = 1 else: self.log.info( "Size is < 4K,Size verification will be done with SCM size") storage_index = 0 actual_pool_size = \ original_pool_info.pi_space.ps_space.s_free[storage_index] - \ current_pool_info.pi_space.ps_space.s_free[storage_index] expected_pool_size = self.ior_cmd.get_aggregate_total(processes) if actual_pool_size < expected_pool_size: self.fail( "Pool Free Size did not match: actual={}, expected={}".format( actual_pool_size, expected_pool_size)) def execute_cmd(self, cmd, fail_on_err=True, display_output=True): """Execute cmd using general_utils.pcmd Args: cmd (str): String command to be executed fail_on_err (bool): Boolean for whether to fail the test if command execution returns non zero return code. display_output (bool): Boolean for whether to display output. Returns: dict: a dictionary of return codes keys and accompanying NodeSet values indicating which hosts yielded the return code. """ try: # execute bash cmds ret = pcmd( self.hostlist_clients, cmd, verbose=display_output, timeout=300) if 0 not in ret: error_hosts = NodeSet( ",".join( [str(node_set) for code, node_set in ret.items() if code != 0])) if fail_on_err: raise CommandFailure( "Error running '{}' on the following " "hosts: {}".format(cmd, error_hosts)) # report error if any command fails except CommandFailure as error: self.log.error("DfuseSparseFile Test Failed: %s", str(error)) self.fail("Test was expected to pass but " "it failed.\n") return ret
class FioBase(TestWithServers): """Base fio class. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a FioBase object.""" super(FioBase, self).__init__(*args, **kwargs) self.fio_cmd = None self.processes = None self.manager = None self.dfuse = None self.daos_cmd = None def setUp(self): """Set up each test case.""" # obtain separate logs self.update_log_file_names() # Start the servers and agents super(FioBase, self).setUp() # initialise daos_cmd self.daos_cmd = DaosCommand(self.bin) # Get the parameters for Fio self.fio_cmd = FioCommand() self.fio_cmd.get_params(self) self.processes = self.params.get("np", '/run/fio/client_processes/*') self.manager = self.params.get("manager", '/run/fio/*', "MPICH") def tearDown(self): """Tear down each test case.""" try: if self.dfuse: self.dfuse.stop() finally: # Stop the servers and agents super(FioBase, self).tearDown() def _create_pool(self): """Create a pool and execute Fio.""" # Get the pool params # pylint: disable=attribute-defined-outside-init self.pool = TestPool(self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) # Create a pool self.pool.create() def _create_cont(self): """Create a container. Returns: str: UUID of the created container """ cont_type = self.params.get("type", "/run/container/*") result = self.daos_cmd.container_create(pool=self.pool.uuid, svc=self.pool.svc_ranks, cont_type=cont_type) # Extract the container UUID from the daos container create output cont_uuid = re.findall(r"created\s+container\s+([0-9a-f-]+)", result.stdout) if not cont_uuid: self.fail("Error obtaining the container uuid from: {}".format( result.stdout)) return cont_uuid[0] def _start_dfuse(self): """Create a DfuseCommand object to start dfuse.""" # Get Dfuse params self.dfuse = Dfuse(self.hostlist_clients, self.tmp) self.dfuse.get_params(self) # update dfuse params self.dfuse.set_dfuse_params(self.pool) self.dfuse.set_dfuse_cont_param(self._create_cont()) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), str(NodeSet.fromlist(self.dfuse.hosts)), exc_info=error) self.fail("Unable to launch Dfuse.\n") def execute_fio(self): """Runner method for Fio.""" # Create a pool if one does not already exist if self.pool is None: self._create_pool() # start dfuse if api is POSIX if self.fio_cmd.api.value == "POSIX": # Connect to the pool, create container and then start dfuse # Uncomment below two lines once DAOS-3355 is resolved # self.pool.connect() # self.create_cont() self._start_dfuse() self.fio_cmd.update("global", "directory", self.dfuse.mount_dir.value, "fio --name=global --directory") # Run Fio self.fio_cmd.hosts = self.hostlist_clients self.fio_cmd.run() if self.dfuse: self.dfuse.stop() self.dfuse = None
class BashCmd(TestWithServers): """Base BashCmd test class. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a BashCmd object.""" super(BashCmd, self).__init__(*args, **kwargs) self.dfuse = None self.file_name = None self.dir_name = None self.pool_count = None self.cont_count = None def setUp(self): """Set up each test case.""" # Start the servers and agents super(BashCmd, self).setUp() # Get the parameters for BashCmd self.dir_name = self.params.get("dirname", '/run/bashcmd/*') self.file_name1 = self.params.get("filename1", '/run/bashcmd/*') self.file_name2 = self.params.get("filename2", '/run/bashcmd/*') self.dd_count = self.params.get("dd_count", '/run/bashcmd/*') self.dd_blocksize = self.params.get("dd_blocksize", '/run/bashcmd/*') def tearDown(self): """Tear down each test case.""" try: if self.dfuse: self.dfuse.stop() finally: # Stop the servers and agents super(BashCmd, self).tearDown() def create_pool(self): """Create a TestPool object to use with ior.""" # Get the pool params self.pool = TestPool(self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) # Create a pool self.pool.create() def create_cont(self): """Create a TestContainer object to be used to create container.""" # Get container params self.container = TestContainer(self.pool, daos_command=DaosCommand(self.bin)) self.container.get_params(self) # create container self.container.create() def start_dfuse(self, count): """Create a DfuseCommand object to start dfuse. Args: count(int): container index """ # Get Dfuse params self.dfuse = Dfuse(self.hostlist_clients, self.tmp) self.dfuse.get_params(self) # update dfuse params self.dfuse.mount_dir.update("/tmp/" + self.pool.uuid + "_daos_dfuse" + str(count)) self.dfuse.set_dfuse_params(self.pool) self.dfuse.set_dfuse_cont_param(self.container) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), self.dfuse.hosts, exc_info=error) self.fail("Test was expected to pass but it failed.\n") def test_bashcmd(self): """Jira ID: DAOS-3508. Test Description: Purpose of this test is to mount different mount points of dfuse for different container and pool sizes and perform basic bash commands. Use cases: Following list of bash commands have been incorporated as part of this test: mkdir, touch, ls, chmod, rm, dd, stat, cp, cmp, mv, rmdir. Create a directory. Create a file under that directory. List the created file. Remove the file. Write a file to the dfuse mounted location using dd. List the written file to verify if it's create. Verify the file created is of right size as desired. Copy the file Compare the copied file with original to verify the content is same. Remove copied file. Rename file Verify renamed file exist using list. Remove a directory :avocado: tags=all,hw,daosio,medium,ib2,full_regression,bashcmd """ self.cont_count = self.params.get("cont_count", '/run/container/*') self.pool_count = self.params.get("pool_count", '/run/pool/*') # Create a pool if one does not already exist. for _ in range(self.pool_count): self.create_pool() # perform test for multiple containers. for count in range(self.cont_count): self.create_cont() self.start_dfuse(count) abs_dir_path = os.path.join(self.dfuse.mount_dir.value, self.dir_name) abs_file_path1 = os.path.join(abs_dir_path, self.file_name1) abs_file_path2 = os.path.join(abs_dir_path, self.file_name2) # list of commands to be executed. commands = [ u"mkdir -p {}".format(abs_dir_path), u"touch {}".format(abs_file_path1), u"ls -a {}".format(abs_file_path1), u"rm {}".format(abs_file_path1), u"dd if=/dev/zero of={} count={} bs={}".format( abs_file_path1, self.dd_count, self.dd_blocksize), u"ls -al {}".format(abs_file_path1), u"filesize=$(stat -c%s '{}');\ if (( filesize != {}*{} )); then exit 1;\ fi".format(abs_file_path1, self.dd_count, self.dd_blocksize), u"cp -r {} {}".format(abs_file_path1, abs_file_path2), u"cmp --silent {} {}".format(abs_file_path1, abs_file_path2), u"rm {}".format(abs_file_path2), u"mv {} {}".format( abs_file_path1, abs_file_path2), u"ls -al {}".format(abs_file_path2), u"rm {}".format(abs_file_path2), u"rmdir {}".format(abs_dir_path) ] for cmd in commands: try: # execute bash cmds ret_code = general_utils.pcmd(self.hostlist_clients, cmd, timeout=30) if 0 not in ret_code: error_hosts = NodeSet(",".join([ str(node_set) for code, node_set in ret_code.items() if code != 0 ])) raise CommandFailure( "Error running '{}' on the following " "hosts: {}".format(cmd, error_hosts)) # report error if any command fails except CommandFailure as error: self.log.error("BashCmd Test Failed: %s", str(error)) self.fail("Test was expected to pass but " "it failed.\n") # stop dfuse self.dfuse.stop() # destroy container self.container.destroy() # destroy pool self.pool.destroy()
class IorTestBase(TestWithServers): """Base IOR test class. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a IorTestBase object.""" super(IorTestBase, self).__init__(*args, **kwargs) self.ior_cmd = None self.processes = None self.hostfile_clients_slots = None self.dfuse = None self.container = None self.lock = None def setUp(self): """Set up each test case.""" # obtain separate logs self.update_log_file_names() # Start the servers and agents super(IorTestBase, self).setUp() # Get the parameters for IOR self.ior_cmd = IorCommand() self.ior_cmd.get_params(self) self.processes = self.params.get("np", '/run/ior/client_processes/*') # Until DAOS-3320 is resolved run IOR for POSIX # with single client node if self.ior_cmd.api.value == "POSIX": self.hostlist_clients = [self.hostlist_clients[0]] self.hostfile_clients = write_host_file.write_host_file( self.hostlist_clients, self.workdir, self.hostfile_clients_slots) # lock is needed for run_multiple_ior method. self.lock = threading.Lock() def tearDown(self): """Tear down each test case.""" try: if self.dfuse: self.dfuse.stop() finally: # Stop the servers and agents super(IorTestBase, self).tearDown() def create_pool(self): """Create a TestPool object to use with ior.""" # Get the pool params self.pool = TestPool(self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) # Create a pool self.pool.create() def create_cont(self): """Create a TestContainer object to be used to create container.""" # Get container params self.container = TestContainer(self.pool, daos_command=DaosCommand(self.bin)) self.container.get_params(self) # create container self.container.create() def _start_dfuse(self): """Create a DfuseCommand object to start dfuse.""" # Get Dfuse params self.dfuse = Dfuse(self.hostlist_clients, self.tmp) self.dfuse.get_params(self) # update dfuse params self.dfuse.set_dfuse_params(self.pool) self.dfuse.set_dfuse_cont_param(self.container) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), str(NodeSet.fromlist(self.dfuse.hosts)), exc_info=error) self.fail("Test was expected to pass but it failed.\n") def run_ior_with_pool(self, intercept=None, test_file_suffix="", test_file="daos:testFile"): """Execute ior with optional overrides for ior flags and object_class. If specified the ior flags and ior daos object class parameters will override the values read from the yaml file. Args: intercept (str, optional): path to the interception library. Shall be used only for POSIX through DFUSE. Defaults to None. test_file_suffix (str, optional): suffix to add to the end of the test file name. Defaults to "". test_file (str, optional): ior test file name. Defaults to "daos:testFile". Is ignored when using POSIX through DFUSE. Returns: CmdResult: result of the ior command execution """ self.update_ior_cmd_with_pool() # start dfuse if api is POSIX if self.ior_cmd.api.value == "POSIX": # Connect to the pool, create container and then start dfuse # Uncomment below two lines once DAOS-3355 is resolved if self.ior_cmd.transfer_size.value == "256B": return "Skipping the case for transfer_size=256B" self._start_dfuse() test_file = os.path.join(self.dfuse.mount_dir.value, "testfile") elif self.ior_cmd.api.value == "DFS": test_file = os.path.join("/", "testfile") self.ior_cmd.test_file.update("".join([test_file, test_file_suffix])) out = self.run_ior(self.get_ior_job_manager_command(), self.processes, intercept) if self.dfuse: self.dfuse.stop() self.dfuse = None return out def update_ior_cmd_with_pool(self): """Update ior_cmd with pool.""" # Create a pool if one does not already exist if self.pool is None: self.create_pool() # Always create a container # Don't pass uuid and pool handle to IOR. # It will not enable checksum feature self.pool.connect() self.create_cont() # Update IOR params with the pool and container params self.ior_cmd.set_daos_params(self.server_group, self.pool, self.container.uuid) def get_ior_job_manager_command(self): """Get the MPI job manager command for IOR. Returns: str: the path for the mpi job manager command """ # Initialize MpioUtils if IOR is running in MPIIO or DAOS mode if self.ior_cmd.api.value in ["MPIIO", "DAOS", "POSIX", "DFS"]: mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") else: self.fail("Unsupported IOR API") return Mpirun(self.ior_cmd, mpitype="mpich") def run_ior(self, manager, processes, intercept=None): """Run the IOR command. Args: manager (str): mpi job manager command processes (int): number of host processes intercept (str): path to interception library. """ env = self.ior_cmd.get_default_env(str(manager), self.client_log) if intercept: env["LD_PRELOAD"] = intercept manager.setup_command(env, self.hostfile_clients, processes) try: self.pool.display_pool_daos_space() out = manager.run() return out except CommandFailure as error: self.log.error("IOR Failed: %s", str(error)) self.fail("Test was expected to pass but it failed.\n") finally: self.pool.display_pool_daos_space() def run_multiple_ior_with_pool(self, results, intercept=None): """Execute ior with optional overrides for ior flags and object_class. If specified the ior flags and ior daos object class parameters will override the values read from the yaml file. Args: intercept (str): path to the interception library. Shall be used only for POSIX through DFUSE. ior_flags (str, optional): ior flags. Defaults to None. object_class (str, optional): daos object class. Defaults to None. """ self.update_ior_cmd_with_pool() # start dfuse for POSIX api. This is specific to interception # library test requirements. self._start_dfuse() # Create two jobs and run in parallel. # Job1 will have 3 client set up to use dfuse + interception # library # Job2 will have 1 client set up to use only dfuse. job1 = self.get_new_job(self.hostlist_clients[:-1], 1, results, intercept) job2 = self.get_new_job([self.hostlist_clients[-1]], 2, results, None) job1.start() # Since same ior_cmd is used to trigger the MPIRUN # with different parameters, pausing for 2 seconds to # avoid data collisions. time.sleep(2) job2.start() job1.join() job2.join() self.dfuse.stop() self.dfuse = None def get_new_job(self, clients, job_num, results, intercept=None): """Create a new thread for ior run. Args: clients (lst): Number of clients the ior would run against. job_num (int): Assigned job number results (dict): A dictionary object to store the ior metrics intercept (path): Path to interception library """ hostfile = write_host_file.write_host_file(clients, self.workdir, self.hostfile_clients_slots) job = threading.Thread( target=self.run_multiple_ior, args=[hostfile, len(clients), results, job_num, intercept]) return job def run_multiple_ior(self, hostfile, num_clients, results, job_num, intercept=None): # pylint: disable=too-many-arguments """Run the IOR command. Args: manager (str): mpi job manager command processes (int): number of host processes intercept (str): path to interception library. """ self.lock.acquire(True) tsize = self.ior_cmd.transfer_size.value testfile = os.path.join(self.dfuse.mount_dir.value, "testfile{}{}".format(tsize, job_num)) if intercept: testfile += "intercept" self.ior_cmd.test_file.update(testfile) manager = self.get_ior_job_manager_command() procs = (self.processes // len(self.hostlist_clients)) * num_clients env = self.ior_cmd.get_default_env(str(manager), self.client_log) if intercept: env["LD_PRELOAD"] = intercept manager.setup_command(env, hostfile, procs) self.lock.release() try: self.pool.display_pool_daos_space() out = manager.run() self.lock.acquire(True) results[job_num] = IorCommand.get_ior_metrics(out) self.lock.release() except CommandFailure as error: self.log.error("IOR Failed: %s", str(error)) self.fail("Test was expected to pass but it failed.\n") finally: self.pool.display_pool_daos_space() def verify_pool_size(self, original_pool_info, processes): """Validate the pool size. Args: original_pool_info (PoolInfo): Pool info prior to IOR processes (int): number of processes """ # Get the current pool size for comparison current_pool_info = self.pool.pool.pool_query() # If Transfer size is < 4K, Pool size will verified against NVMe, else # it will be checked against SCM if self.ior_cmd.transfer_size.value >= 4096: self.log.info( "Size is > 4K,Size verification will be done with NVMe size") storage_index = 1 else: self.log.info( "Size is < 4K,Size verification will be done with SCM size") storage_index = 0 actual_pool_size = \ original_pool_info.pi_space.ps_space.s_free[storage_index] - \ current_pool_info.pi_space.ps_space.s_free[storage_index] expected_pool_size = self.ior_cmd.get_aggregate_total(processes) if actual_pool_size < expected_pool_size: self.fail( "Pool Free Size did not match: actual={}, expected={}".format( actual_pool_size, expected_pool_size))
class MdtestBase(TestWithServers): """Base mdtest class. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a MdtestBase object.""" super(MdtestBase, self).__init__(*args, **kwargs) self.mdtest_cmd = None self.processes = None self.hostfile_clients_slots = None self.dfuse = None self.daos_cmd = None def setUp(self): """Set up each test case.""" # obtain separate logs self.update_log_file_names() # Start the servers and agents super(MdtestBase, self).setUp() # initialize daos_cmd self.daos_cmd = DaosCommand(self.bin) # Get the parameters for Mdtest self.mdtest_cmd = MdtestCommand() self.mdtest_cmd.get_params(self) self.processes = self.params.get("np", '/run/mdtest/client_processes/*') self.manager = self.params.get("manager", '/run/mdtest/*', "MPICH") self.log.info('Clients %s', self.hostlist_clients) self.log.info('Servers %s', self.hostlist_servers) def tearDown(self): """Tear down each test case.""" try: if self.dfuse: self.dfuse.stop() finally: # Stop the servers and agents super(MdtestBase, self).tearDown() def create_pool(self): """Create a pool and execute Mdtest.""" # Get the pool params self.pool = TestPool(self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) # Create a pool self.pool.create() def _create_cont(self): """Create a container. Returns: str: UUID of the created container """ cont_type = self.params.get("type", "/run/container/*") result = self.daos_cmd.container_create(pool=self.pool.uuid, svc=self.pool.svc_ranks, cont_type=cont_type) # Extract the container UUID from the daos container create output cont_uuid = re.findall(r"created\s+container\s+([0-9a-f-]+)", result.stdout) if not cont_uuid: self.fail("Error obtaining the container uuid from: {}".format( result.stdout)) return cont_uuid[0] def _start_dfuse(self): """Create a DfuseCommand object to start dfuse.""" # Get Dfuse params self.dfuse = Dfuse(self.hostlist_clients, self.tmp) self.dfuse.get_params(self) # update dfuse params self.dfuse.set_dfuse_params(self.pool) self.dfuse.set_dfuse_cont_param(self._create_cont()) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # start dfuse self.dfuse.run() except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), self.dfuse.hosts, exc_info=error) self.fail("Unable to launch Dfuse.\n") def execute_mdtest(self): """Runner method for Mdtest.""" # Create a pool if one does not already exist if self.pool is None: self.create_pool() # set Mdtest params self.mdtest_cmd.set_daos_params(self.server_group, self.pool) # start dfuse if api is POSIX if self.mdtest_cmd.api.value == "POSIX": # Connect to the pool, create container and then start dfuse # Uncomment below two lines once DAOS-3355 is resolved # self.pool.connect() # self.create_cont() self._start_dfuse() self.mdtest_cmd.test_dir.update(self.dfuse.mount_dir.value) # Run Mdtest self.run_mdtest(self.get_mdtest_job_manager_command(self.manager), self.processes) if self.dfuse: self.dfuse.stop() self.dfuse = None def get_mdtest_job_manager_command(self, manager): """Get the MPI job manager command for Mdtest. Returns: JobManager: the object for the mpi job manager command """ # Initialize MpioUtils if mdtest needs to be run using mpich if manager == "MPICH": mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") return Mpirun(self.mdtest_cmd, mpitype="mpich") return Orterun(self.mdtest_cmd) def run_mdtest(self, manager, processes): """Run the Mdtest command. Args: manager (str): mpi job manager command processes (int): number of host processes """ env = self.mdtest_cmd.get_default_env(str(manager), self.client_log) manager.assign_hosts(self.hostlist_clients, self.workdir, self.hostfile_clients_slots) manager.assign_processes(processes) manager.assign_environment(env) try: self.pool.display_pool_daos_space() manager.run() except CommandFailure as error: self.log.error("Mdtest Failed: %s", str(error)) self.fail("Test was expected to pass but it failed.\n") finally: self.pool.display_pool_daos_space()
class DfuseContainerCheck(TestWithServers): """Base Dfuse Container check test class. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a DfuseContainerCheck object.""" super(DfuseContainerCheck, self).__init__(*args, **kwargs) self.dfuse = None self.pool = None self.container = None def setUp(self): """Set up each test case.""" # Start the servers and agents super(DfuseContainerCheck, self).setUp() def tearDown(self): """Tear down each test case.""" try: if self.dfuse: self.dfuse.stop() finally: # Stop the servers and agents super(DfuseContainerCheck, self).tearDown() def create_pool(self): """Create a TestPool object to use with ior.""" # Get the pool params self.pool = TestPool(self.context, dmg_command=self.get_dmg_command()) self.pool.get_params(self) # Create a pool self.pool.create() def start_dfuse(self): """Create a DfuseCommand object to start dfuse. """ # Get Dfuse params self.dfuse = Dfuse(self.hostlist_clients, self.tmp) self.dfuse.get_params(self) # update dfuse params self.dfuse.set_dfuse_params(self.pool) self.dfuse.set_dfuse_cont_param(self.container) self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log) try: # start dfuse self.dfuse.run(False) except CommandFailure as error: self.log.error("Dfuse command %s failed on hosts %s", str(self.dfuse), self.dfuse.hosts, exc_info=error) self.fail("Test was expected to pass but it failed.\n") def test_dfusecontainercheck(self): """Jira ID: DAOS-3635. Test Description: Purpose of this test is to try and mount different container types to dfuse and check the behavior. Use cases: Create pool Create container of type default Try to mount to dfuse and check the behaviour. Create container of type POSIX. Try to mount to dfuse and check the behaviour. :avocado: tags=all,small,full_regression,dfusecontainercheck """ # get test params for cont and pool count cont_types = self.params.get("cont_types", '/run/container/*') # Create a pool and start dfuse. self.create_pool() for cont_type in cont_types: # Get container params self.container = TestContainer(self.pool, daos_command=DaosCommand(self.bin)) self.container.get_params(self) # create container if cont_type == "POSIX": self.container.type.update(cont_type) self.container.create() try: # mount fuse self.start_dfuse() # check if fuse got mounted self.dfuse.check_running() # fail the test if fuse mounts with non-posix type container if cont_type == "": self.fail( "Non-Posix type container got mounted over dfuse") except CommandFailure as error: # expected to throw CommandFailure exception for non-posix type # container if cont_type == "": self.log.info( "Expected behaviour: Default container type \ is expected to fail on dfuse mount: %s", str(error)) # fail the test if exception is caught for POSIX type container elif cont_type == "POSIX": self.log.error( "Posix Container dfuse mount \ failed: %s", str(error)) self.fail("Posix container type was expected to mount \ over dfuse") # stop fuse and container for next iteration if not cont_type == "": self.dfuse.stop() self.container.destroy(1)