def create_containers(self, pool=None, pool_num=10, num_containers=100, with_io=False): """To create number of containers parallelly on pool. Args: pool(str): pool handle. pool_num (int): pool number to create containers. num_containers (int): number of containers to create. with_io (bool): enable container test with execute_io. """ self.log.info("==(2.%d)create_containers start.", pool_num) thread_manager = ThreadManager(self.create_container_and_test, self.timeout - 30) for cont_num in range(num_containers): thread_manager.add( pool=pool, pool_num=pool_num, cont_num=cont_num, with_io=with_io) # Launch the create_container_and_test threads self.log.info("==Launching %d create_container_and_test threads", thread_manager.qty) failed_thread_count = thread_manager.check_run() self.log.info( "==(2.%d) after thread_manager_run, %d containers created.", pool_num, num_containers) if failed_thread_count > 0: msg = "#(2.{}) FAILED create_container_and_test Threads".format(failed_thread_count) self.d_log.error(msg) self.fail(msg)
def create_pools(self, num_pools=10, num_containers=100, with_io=False): """To create number of pools and containers parallelly. Args: num_pools (int): number of pools to create. num_containers (int): number of containers to create. with_io (bool): enable container test with execute_io. """ # Setup the thread manager thread_manager = ThreadManager(self.create_containers, self.timeout - 30) for pool_number in range(num_pools): pool = self.get_pool() thread_manager.add( pool=pool, pool_num=pool_number, num_containers=num_containers, with_io=with_io) self.log.info("=(1.%d) pool created, %d.", pool_number, pool) # Launch the create_containers threads self.log.info("=Launching %d create_containers threads", thread_manager.qty) failed_thread_count = thread_manager.check_run() if failed_thread_count > 0: msg = "#(1.{}) FAILED create_containers Threads".format(failed_thread_count) self.d_log.error(msg) self.fail(msg)
def test_metadata_server_restart(self): """JIRA ID: DAOS-1512. Test Description: This test will verify 2000 IOR small size container after server restart. Test will write IOR in 5 different threads for faster execution time. Each thread will create 400 (8bytes) containers to the same pool. Restart the servers, read IOR container file written previously and validate data integrity by using IOR option "-R -G 1". Use Cases: ? :avocado: tags=all,full_regression :avocado: tags=hw,large :avocado: tags=server,metadata,metadata_ior,nvme """ self.create_pool() files_per_thread = 400 total_ior_threads = 5 processes = self.params.get("slots", "/run/ior/clientslots/*") list_of_uuid_lists = [[ str(uuid.uuid4()) for _ in range(files_per_thread) ] for _ in range(total_ior_threads)] # Setup the thread manager thread_manager = ThreadManager(run_ior_loop, self.timeout - 30) # Launch threads to run IOR to write data, restart the agents and # servers, and then run IOR to read the data for operation in ("write", "read"): # Create the IOR threads for index in range(total_ior_threads): # Define the arguments for the run_ior_loop method ior_cmd = IorCommand() ior_cmd.get_params(self) ior_cmd.set_daos_params(self.server_group, self.pool) ior_cmd.flags.value = self.params.get( "F", "/run/ior/ior{}flags/".format(operation)) # Define the job manager for the IOR command self.ior_managers.append(Orterun(ior_cmd)) env = ior_cmd.get_default_env(str(self.ior_managers[-1])) self.ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None) self.ior_managers[-1].assign_processes(processes) self.ior_managers[-1].assign_environment(env) self.ior_managers[-1].verbose = False # Add a thread for these IOR arguments thread_manager.add(manager=self.ior_managers[-1], uuids=list_of_uuid_lists[index], tmpdir_base=self.test_dir) self.log.info("Created %s thread %s with container uuids %s", operation, index, list_of_uuid_lists[index]) # Launch the IOR threads self.log.info("Launching %d IOR %s threads", thread_manager.qty, operation) failed_thread_count = thread_manager.check_run() if failed_thread_count > 0: msg = "{} FAILED IOR {} Thread(s)".format( failed_thread_count, operation) self.d_log.error(msg) self.fail(msg) # Restart the agents and servers after the write / before the read if operation == "write": # Stop the agents errors = self.stop_agents() self.assertEqual( len(errors), 0, "Error stopping agents:\n {}".format("\n ".join(errors))) # Restart the servers w/o formatting the storage errors = self.restart_servers() self.assertEqual( len(errors), 0, "Error stopping servers:\n {}".format( "\n ".join(errors))) # Start the agents self.start_agent_managers() self.log.info("Test passed")
def test_ior_intercept_verify_data(self): """Jira ID: DAOS-3502. Test Description: Purpose of this test is to run ior through dfuse with interception library on 5 clients and without interception library on 1 client for at least 30 minutes and verify the data integrity using ior's Read Verify and Write Verify options. Use case: Run ior with read, write, fpp, read verify write verify for 30 minutes Run ior with read, write, read verify write verify for 30 minutes :avocado: tags=all,full_regression :avocado: tags=hw,large :avocado: tags=daosio,dfuse,il,ior_intercept :avocado: tags=ior_intercept_verify_data """ self.add_pool() self.add_container(self.pool) # Start dfuse for POSIX api. This is specific to interception library test requirements. self.start_dfuse(self.hostlist_clients, self.pool, self.container) # Setup the thread manager thread_manager = ThreadManager(run_ior, self.timeout - 30) index_clients_intercept_file = [ (0, self.hostlist_clients[0:-1], os.path.join(self.prefix, 'lib64', 'libioil.so'), os.path.join(self.dfuse.mount_dir.value, "testfile_0_intercept")), (1, self.hostlist_clients[-1:], None, os.path.join(self.dfuse.mount_dir.value, "testfile_1")), ] self.job_manager = [] for index, clients, intercept, test_file in index_clients_intercept_file: # Add a job manager for each ior command. Use a timeout for the ior command that leaves # enough time to report the summary of all the threads job_manager = get_job_manager(self, "Mpirun", None, False, "mpich", self.get_remaining_time() - 30) # Define the parameters that will be used to run an ior command in this thread thread_manager.add( test=self, manager=job_manager, log=self.client_log, hosts=clients, path=self.workdir, slots=None, group=self.server_group, pool=self.pool, container=self.container, processes=(self.processes // len(self.hostlist_clients)) * len(clients), intercept=intercept, ior_params={"test_file": test_file}) self.log.info("Created thread %s for %s with intercept: %s", index, clients, str(intercept)) # Launch the IOR threads self.log.info("Launching %d IOR threads", thread_manager.qty) results = thread_manager.run() # Stop dfuse self.stop_dfuse() # Check the ior thread results failed_thread_count = thread_manager.check(results) if failed_thread_count > 0: msg = "{} FAILED IOR Thread(s)".format(failed_thread_count) self.d_log.error(msg) self.fail(msg) for index, clients, intercept, _ in index_clients_intercept_file: with_intercept = "without" if intercept is None else "with" IorCommand.log_metrics( self.log, "{} clients {} interception library".format( len(clients), with_intercept), IorCommand.get_ior_metrics(results[index].result))