def test_query_attr(self): """JIRA ID: DAOS-4624 Test Description: Test daos pool query and attribute commands as described above. Use Cases: Test query, set-attr, list-attr, and get-attr commands. :avocado: tags=all,pool,tiny,full_regression,pool_query_attr """ # 1. Test pool query. # Use the same format as pool query. expected_size = "1000000000" kwargs = {"scm_size": expected_size} pool_create_result = self.get_dmg_command().get_output( "pool_create", **kwargs) expected_uuid = pool_create_result[0] sr = pool_create_result[1] daos_cmd = DaosCommand(self.bin) # Call daos pool query, obtain pool UUID and SCM size, and compare # against those used when creating the pool. kwargs = {"pool": expected_uuid, "svc": sr} query_result = daos_cmd.get_output("pool_query", **kwargs) actual_uuid = query_result[0][0] actual_size = query_result[2][4] self.assertEqual(actual_uuid, expected_uuid) self.assertEqual(actual_size, expected_size) # 2. Test pool set-attr, get-attr, and list-attrs. expected_attrs = [] expected_attrs_dict = {} sample_attrs = [] sample_vals = [] # Create 5 attributes. for i in range(5): sample_attr = "attr" + str(i) sample_val = "val" + str(i) sample_attrs.append(sample_attr) sample_vals.append(sample_val) _ = daos_cmd.pool_set_attr(pool=actual_uuid, attr=sample_attr, value=sample_val, svc=sr).stdout expected_attrs.append(sample_attr) expected_attrs_dict[sample_attr] = sample_val # List the attribute names and compare against those set. kwargs = {"pool": actual_uuid, "svc": sr} actual_attrs = daos_cmd.get_output("pool_list_attrs", **kwargs) actual_attrs.sort() expected_attrs.sort() self.assertEqual(actual_attrs, expected_attrs) # Get each attribute's value and compare against those set. for i in range(5): kwargs = {"pool": actual_uuid, "attr": sample_attrs[i], "svc": sr} actual_val = daos_cmd.get_output("pool_get_attr", **kwargs)[0] self.assertEqual(sample_vals[i], actual_val)
class ContainerQueryAttributeTest(TestWithServers): # pylint: disable=anomalous-backslash-in-string """Test class for daos container query and attribute tests. Test Class Description: Query test: Create a pool, create a container, and call daos container query. From the output, verify the pool/container UUID matches the one that was returned when creating the pool/container. Attribute test: 1. Prepare 7 types of strings; alphabets, numbers, special characters, etc. 2. Create attributes with each of these 7 types in attr and value; i.e., 14 total attributes are created. 3. Call get-attr for each of the 14 attrs and verify the returned values. 4. Call list-attrs and verify the returned attrs. :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a ContainerQueryAttribute object.""" super(ContainerQueryAttributeTest, self).__init__(*args, **kwargs) self.expected_cont_uuid = None self.daos_cmd = None def create_pool_container(self): """Create a pool and a container in the pool. Save some variables so that we can use them in the tests. """ self.add_pool() self.daos_cmd = DaosCommand(self.bin) self.expected_cont_uuid = self.daos_cmd.get_output( "container_create", pool=self.pool.uuid)[0] def test_container_query_attr(self): """JIRA ID: DAOS-4640 Test Description: Test daos container query and attribute commands as described above. Use Cases: Test container query, set-attr, get-attr, and list-attrs. :avocado: tags=all,pool,small,full_regression,cont_query_attr """ # Test pool query. self.create_pool_container() # Call daos container query, obtain pool and container UUID, and # compare against those used when creating the pool and the container. kwargs = {"pool": self.pool.uuid, "cont": self.expected_cont_uuid} query_output = self.daos_cmd.get_output("container_query", **kwargs)[0] actual_pool_uuid = query_output[0] actual_cont_uuid = query_output[1] self.assertEqual(actual_pool_uuid, self.pool.uuid.lower()) self.assertEqual(actual_cont_uuid, self.expected_cont_uuid) # Test container set-attr, get-attr, and list-attrs with different # types of characters. test_strings = [ "abcd", "1234", "abc123", "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij", # Characters that don't require backslash. The backslashes in here # are required for the code to work, but not by daos. "~@#$%^*-=_+[]\{\}:/?,.", # Characters that require backslash. "\`\&\(\)\\\;\\'\\\"\!\<\>", # Characters that include space. "\"aa bb\"" ] # We added backslashes for the code to work, but get-attr output # does not contain them, so prepare the expected output that does not # include backslashes. escape_to_not = {} escape_to_not[test_strings[-3]] = "~@#$%^*-=_+[]{}:/?,." # We still need a backslash before the double quote for the code to # work. escape_to_not[test_strings[-2]] = "`&()\;'\"!<>" escape_to_not[test_strings[-1]] = "aa bb" # Prepare attr-value paris. Use the test_strings in value for the first # 7 and in attr for the next 7. attr_values = [] j = 0 for i in range(2): for test_string in test_strings: if i == 0: attr_values.append(["attr" + str(j), test_string]) else: attr_values.append([test_string, "attr" + str(j)]) j += 1 # Set and verify get-attr. errors = [] expected_attrs = [] for attr_value in attr_values: self.daos_cmd.container_set_attr(pool=actual_pool_uuid, cont=actual_cont_uuid, attr=attr_value[0], val=attr_value[1]) kwargs["attr"] = attr_value[0] output = self.daos_cmd.container_get_attr(**kwargs) actual_val = output["value"] if attr_value[1] in escape_to_not: # Special character string. if actual_val != escape_to_not[attr_value[1]]: errors.append( "Unexpected output for get_attr: {} != {}\n".format( actual_val, escape_to_not[attr_value[1]])) else: # Standard character string. if actual_val != attr_value[1]: errors.append( "Unexpected output for get_attr: {} != {}\n".format( actual_val, attr_value[1])) # Collect comparable attr as a preparation of list-attrs test. if attr_value[0] in escape_to_not: expected_attrs.append(escape_to_not[attr_value[0]]) else: expected_attrs.append(attr_value[0]) self.assertEqual(len(errors), 0, "; ".join(errors)) # Verify that attr-lists works with test_strings. expected_attrs.sort() kwargs = {"pool": actual_pool_uuid, "cont": actual_cont_uuid} data = self.daos_cmd.container_list_attrs(**kwargs) actual_attrs = data["attrs"] actual_attrs.sort() self.log.debug(str(actual_attrs)) self.assertEqual(actual_attrs, expected_attrs) def test_list_attrs_long(self): """JIRA ID: DAOS-4640 Test Description: Set many attributes and verify list-attrs works. Use Cases: Test daos container list-attrs with 50 attributes. :avocado: tags=all,pool,small,full_regression,cont_list_attrs """ self.create_pool_container() expected_attrs = [] vals = [] for i in range(50): expected_attrs.append("attr" + str(i)) vals.append("val" + str(i)) for expected_attr, val in zip(expected_attrs, vals): _ = self.daos_cmd.container_set_attr(pool=self.pool.uuid, cont=self.expected_cont_uuid, attr=expected_attr, val=val) expected_attrs.sort() kwargs = {"pool": self.pool.uuid, "cont": self.expected_cont_uuid} data = self.daos_cmd.container_list_attrs(**kwargs) actual_attrs = data["attrs"] actual_attrs.sort() self.assertEqual(expected_attrs, actual_attrs, "Unexpected output from list_attrs")
class IoAggregation(IorTestBase): # pylint: disable=too-many-ancestors """Test class Description: Verify Aggregation across system shutdown. :avocado: recursive """ def setUp(self): """Set up test before executing.""" super(IoAggregation, self).setUp() self.dmg = self.get_dmg_command() self.daos_cmd = DaosCommand(self.bin) def get_nvme_free_space(self): """Display pool free space.""" free_space = self.pool.get_pool_free_space("nvme") self.log.info("Free nvme space: %s", free_space) return free_space def highest_epoch(self, kwargs): """Return the Highest Epoch for the container. Args: kwargs (dict): Dictionary of arguments to be passed to container_query method. Returns: Highest epoch value for a given container. """ highest_epoch = self.daos_cmd.get_output( "container_query", **kwargs)[0][4] return highest_epoch def test_ioaggregation(self): """Jira ID: DAOS-4332. Test Description: Verify Aggregation across system shutdown. Use Cases: Create Pool. Create Container. Run IOR and keep the written. Capture Free space available after first ior write. Create snapshot and obtain the epoch id. Write to the same ior file and same amount of data, without overwriting the previous data. Capture free space again, after second ior write. Capture Highest epoch ID before snapshot destroy. Destroy the snapshot which was created. Shut down the servers and restart them again. After servers have successfully restarted, Look for aggregation to finish by checking the free space available and value of highest epoch which should be higher than the value of highest epoch before snapshot destroy. If current free space is equal to free space after first ior write, then pass otherwise fail the test after waiting for 4 attempts. :avocado: tags=all,daosio,hw,small,full_regression,ioaggregation """ # update ior signature option self.ior_cmd.signature.update("123") # run ior write process self.run_ior_with_pool() # capture free space before taking the snapshot self.get_nvme_free_space() # create snapshot self.container.create_snap() # write to same ior file again self.ior_cmd.signature.update("456") self.run_ior_with_pool(create_cont=False) # capture free space after second ior write free_space_before_snap_destroy = self.get_nvme_free_space() # obtain highest epoch before snapshot destroy via container query kwargs = { "pool": self.pool.uuid, "cont": self.container.uuid } highest_epc_before_snap_destroy = self.highest_epoch(kwargs) # delete snapshot self.container.destroy_snap(epc=self.container.epoch) # Shutdown the servers and restart self.get_dmg_command().system_stop(True) time.sleep(5) self.get_dmg_command().system_start() # check if all servers started as expected scan_info = self.get_dmg_command().system_query() if not check_system_query_status(scan_info): self.fail("One or more servers crashed") # Now check if the space is returned back and Highest epoch value # is higher than the the value just before snapshot destroy. counter = 1 returned_space = (self.get_nvme_free_space() - free_space_before_snap_destroy) while returned_space < int(self.ior_cmd.block_size.value) or \ highest_epc_before_snap_destroy >= self.highest_epoch(kwargs): # try to wait for 4 x 60 secs for aggregation to be completed or # else exit the test with a failure. if counter > 4: self.log.info("Free space before snapshot destroy: %s", free_space_before_snap_destroy) self.log.info("Free space when test terminated: %s", self.get_nvme_free_space()) self.log.info("Highest Epoch before IO Aggregation: %s", highest_epc_before_snap_destroy) self.log.info("Highest Epoch when test terminated: %s", self.highest_epoch(kwargs)) self.fail("Aggregation did not complete as expected") time.sleep(60) returned_space = (self.get_nvme_free_space() - free_space_before_snap_destroy) counter += 1
class NvmeEnospace(ServerFillUp): # pylint: disable=too-many-ancestors """ Test Class Description: To validate DER_NOSPACE for SCM and NVMe :avocado: recursive """ def __init__(self, *args, **kwargs): """Initialize a NvmeEnospace object.""" super(NvmeEnospace, self).__init__(*args, **kwargs) self.daos_cmd = None def setUp(self): super(NvmeEnospace, self).setUp() # initialize daos command self.daos_cmd = DaosCommand(self.bin) self.create_pool_max_size() self.der_nospace_count = 0 self.other_errors_count = 0 def verify_enspace_log(self, der_nospace_err_count): """ Function to verify there are no other error except DER_NOSPACE in client log and also DER_NOSPACE count is higher. args: expected_err_count(int): Expected DER_NOSPACE count from client log. """ #Get the DER_NOSPACE and other error count from log self.der_nospace_count, self.other_errors_count = error_count( "-1007", self.hostlist_clients, self.client_log) #Check there are no other errors in log file if self.other_errors_count > 0: self.fail('Found other errors, count {} in client log {}'.format( self.other_errors_count, self.client_log)) #Check the DER_NOSPACE error count is higher if not test will FAIL if self.der_nospace_count < der_nospace_err_count: self.fail( 'Expected DER_NOSPACE should be > {} and Found {}'.format( der_nospace_err_count, self.der_nospace_count)) def delete_all_containers(self): """ Delete all the containers. """ #List all the container kwargs = {"pool": self.pool.uuid, "svc": self.pool.svc_ranks} continers = (self.daos_cmd.get_output("pool_list_cont", **kwargs)) #Destroy all the containers for _cont in continers: kwargs["cont"] = _cont self.daos_cmd.container_destroy(**kwargs) def ior_bg_thread(self, results): """Start IOR Background thread, This will write small data set and keep reading it in loop until it fails or main program exit. Args: results (queue): queue for returning thread results """ mpio_util = MpioUtils() if mpio_util.mpich_installed(self.hostlist_clients) is False: self.fail("Exiting Test: Mpich not installed") # Define the IOR Command and use the parameter from yaml file. ior_bg_cmd = IorCommand() ior_bg_cmd.get_params(self) ior_bg_cmd.set_daos_params(self.server_group, self.pool) ior_bg_cmd.dfs_oclass.update(self.ior_cmd.dfs_oclass.value) ior_bg_cmd.api.update(self.ior_cmd.api.value) ior_bg_cmd.transfer_size.update(self.ior_scm_xfersize) ior_bg_cmd.block_size.update(self.ior_cmd.block_size.value) ior_bg_cmd.flags.update(self.ior_cmd.flags.value) ior_bg_cmd.test_file.update('/testfile_background') # Define the job manager for the IOR command manager = Mpirun(ior_bg_cmd, mpitype="mpich") self.create_cont() manager.job.dfs_cont.update(self.container.uuid) env = ior_bg_cmd.get_default_env(str(manager)) manager.assign_hosts(self.hostlist_clients, self.workdir, None) manager.assign_processes(1) manager.assign_environment(env, True) print('----Run IOR in Background-------') # run IOR Write Command try: manager.run() except (CommandFailure, TestFail) as _error: results.put("FAIL") return # run IOR Read Command in loop ior_bg_cmd.flags.update(self.ior_read_flags) while True: try: manager.run() except (CommandFailure, TestFail) as _error: results.put("FAIL") break def run_enospace_foreground(self): """ Function to run test and validate DER_ENOSPACE and expected storage size """ #Fill 75% more of SCM pool,Aggregation is Enabled so NVMe space will be #start filling print('Starting main IOR load') self.start_ior_load(storage='SCM', percent=75) print(self.pool.pool_percentage_used()) #Fill 50% more of SCM pool,Aggregation is Enabled so NVMe space will be #filled self.start_ior_load(storage='SCM', percent=50) print(self.pool.pool_percentage_used()) #Fill 60% more of SCM pool, now NVMe will be Full so data will not be #moved to NVMe but it will start filling SCM. SCM size will be going to #full and this command expected to fail with DER_NOSPACE try: self.start_ior_load(storage='SCM', percent=60) self.fail('This test suppose to FAIL because of DER_NOSPACE' 'but it got Passed') except TestFail as _error: self.log.info('Test expected to fail because of DER_NOSPACE') #Display the pool% print(self.pool.pool_percentage_used()) #verify the DER_NO_SAPCE error count is expected and no other Error in #client log self.verify_enspace_log(self.der_nospace_count) #Check both NVMe and SCM are full. pool_usage = self.pool.pool_percentage_used() #NVMe should be almost full if not test will fail. if pool_usage['nvme'] > 8: self.fail( 'Pool NVMe used percentage should be < 8%, instead {}'.format( pool_usage['nvme'])) #For SCM some % space used for system so it won't be 100% full. if pool_usage['scm'] > 50: self.fail( 'Pool SCM used percentage should be < 50%, instead {}'.format( pool_usage['scm'])) def run_enospace_with_bg_job(self): """ Function to run test and validate DER_ENOSPACE and expected storage size. Single IOR job will run in background while space is filling. """ #Get the initial DER_ENOSPACE count self.der_nospace_count, self.other_errors_count = error_count( "-1007", self.hostlist_clients, self.client_log) # Start the IOR Background thread which will write small data set and # read in loop, until storage space is full. out_queue = queue.Queue() job = threading.Thread(target=self.ior_bg_thread, kwargs={"results": out_queue}) job.daemon = True job.start() #Run IOR in Foreground self.run_enospace_foreground() # Verify the background job queue and make sure no FAIL for any IOR run while not self.out_queue.empty(): if self.out_queue.get() == "FAIL": self.fail("One of the Background IOR job failed") def test_enospace_lazy_with_bg(self): """Jira ID: DAOS-4756. Test Description: IO gets DER_NOSPACE when SCM and NVMe is full with default (lazy) Aggregation mode. Use Case: This tests will create the pool and fill 75% of SCM size which will trigger the aggregation because of space pressure, next fill 75% more which should fill NVMe. Try to fill 60% more and now SCM size will be full too. verify that last IO fails with DER_NOSPACE and SCM/NVMe pool capacity is full.One background IO job will be running continuously. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=der_enospace,enospc_lazy,enospc_lazy_bg """ print(self.pool.pool_percentage_used()) #Run IOR to fill the pool. self.run_enospace_with_bg_job() def test_enospace_lazy_with_fg(self): """Jira ID: DAOS-4756. Test Description: Fill up the system (default aggregation mode) and delete all containers in loop, which should release the space. Use Case: This tests will create the pool and fill 75% of SCM size which will trigger the aggregation because of space pressure, next fill 75% more which should fill NVMe. Try to fill 60% more and now SCM size will be full too. verify that last IO fails with DER_NOSPACE and SCM/NVMe pool capacity is full. Delete all the containers. Do this in loop for 10 times and verify space is released. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=der_enospace,enospc_lazy,enospc_lazy_fg """ print(self.pool.pool_percentage_used()) #Repeat the test in loop. for _loop in range(10): print("-------enospc_lazy_fg Loop--------- {}".format(_loop)) #Run IOR to fill the pool. self.run_enospace_foreground() #Delete all the containers self.delete_all_containers() #Delete container will take some time to release the space time.sleep(60) #Run last IO self.start_ior_load(storage='SCM', percent=1) def test_enospace_time_with_bg(self): """Jira ID: DAOS-4756. Test Description: IO gets DER_NOSPACE when SCM is full and it release the size when container destroy with Aggregation set on time mode. Use Case: This tests will create the pool. Set Aggregation mode to Time. Start filling 75% of SCM size. Aggregation will be triggered time to time, next fill 75% more which will fill up NVMe. Try to fill 60% more and now SCM size will be full too. Verify last IO fails with DER_NOSPACE and SCM/NVMe pool capacity is full.One background IO job will be running continuously. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=der_enospace,enospc_time,enospc_time_bg """ print(self.pool.pool_percentage_used()) # Enabled TIme mode for Aggregation. self.pool.set_property("reclaim", "time") #Run IOR to fill the pool. self.run_enospace_with_bg_job() def test_enospace_time_with_fg(self): """Jira ID: DAOS-4756. Test Description: Fill up the system (time aggregation mode) and delete all containers in loop, which should release the space. Use Case: This tests will create the pool. Set Aggregation mode to Time. Start filling 75% of SCM size. Aggregation will be triggered time to time, next fill 75% more which will fill up NVMe. Try to fill 60% more and now SCM size will be full too. Verify last IO fails with DER_NOSPACE and SCM/NVMe pool capacity is full. Delete all the containers. Do this in loop for 10 times and verify space is released. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=der_enospace,enospc_time,enospc_time_fg """ print(self.pool.pool_percentage_used()) # Enabled TIme mode for Aggregation. self.pool.set_property("reclaim", "time") #Repeat the test in loop. for _loop in range(10): print("-------enospc_time_fg Loop--------- {}".format(_loop)) #Run IOR to fill the pool. self.run_enospace_with_bg_job() #Delete all the containers self.delete_all_containers() #Delete container will take some time to release the space time.sleep(60) #Run last IO self.start_ior_load(storage='SCM', percent=1) @skipForTicket("DAOS-5403") def test_performance_storage_full(self): """Jira ID: DAOS-4756. Test Description: Verify IO Read performance when pool size is full. Use Case: This tests will create the pool. Run small set of IOR as baseline.Start IOR with < 4K which will start filling SCM and trigger aggregation and start filling up NVMe. Check the IOR baseline read number and make sure it's +- 5% to the number ran prior system storage was full. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=der_enospace,enospc_performance """ #Write the IOR Baseline and get the Read BW for later comparison. print(self.pool.pool_percentage_used()) #Write First self.start_ior_load(storage='SCM', percent=1) #Read the baseline data set self.start_ior_load(storage='SCM', operation='Read', percent=1) max_mib_baseline = float(self.ior_matrix[0][int(IorMetrics.Max_MiB)]) baseline_cont_uuid = self.ior_cmd.dfs_cont.value print("IOR Baseline Read MiB {}".format(max_mib_baseline)) #Run IOR to fill the pool. self.run_enospace_with_bg_job() #Read the same container which was written at the beginning. self.container.uuid = baseline_cont_uuid self.start_ior_load(storage='SCM', operation='Read', percent=1) max_mib_latest = float(self.ior_matrix[0][int(IorMetrics.Max_MiB)]) print("IOR Latest Read MiB {}".format(max_mib_latest)) #Check if latest IOR read performance is in Tolerance of 5%, when #Storage space is full. if abs(max_mib_baseline - max_mib_latest) > (max_mib_baseline / 100 * 5): self.fail( 'Latest IOR read performance is not under 5% Tolerance' ' Baseline Read MiB = {} and latest IOR Read MiB = {}'.format( max_mib_baseline, max_mib_latest)) def test_enospace_no_aggregation(self): """Jira ID: DAOS-4756. Test Description: IO gets DER_NOSPACE when SCM is full and it release the size when container destroy with Aggregation disabled. Use Case: This tests will create the pool and disable aggregation. Fill 75% of SCM size which should work, next try fill 10% more which should fail with DER_NOSPACE. Destroy the container and validate the Pool SCM free size is close to full (> 95%). Do this in loop ~10 times and verify the DER_NOSPACE and SCM free size after container destroy. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=der_enospace,enospc_no_aggregation """ # pylint: disable=attribute-defined-outside-init # pylint: disable=too-many-branches print(self.pool.pool_percentage_used()) # Disable the aggregation self.pool.set_property("reclaim", "disabled") #Get the DER_NOSPACE and other error count from log self.der_nospace_count, self.other_errors_count = error_count( "-1007", self.hostlist_clients, self.client_log) #Repeat the test in loop. for _loop in range(10): print( "-------enospc_no_aggregation Loop--------- {}".format(_loop)) #Fill 75% of SCM pool self.start_ior_load(storage='SCM', percent=40) print(self.pool.pool_percentage_used()) try: #Fill 10% more to SCM ,which should Fail because no SCM space self.start_ior_load(storage='SCM', percent=40) self.fail('This test suppose to fail because of DER_NOSPACE' 'but it got Passed') except TestFail as _error: self.log.info('Expected to fail because of DER_NOSPACE') #Verify DER_NO_SAPCE error count is expected and no other Error #in client log. self.verify_enspace_log(self.der_nospace_count) #Delete all the containers self.delete_all_containers() #Get the pool usage pool_usage = self.pool.pool_percentage_used() #Delay to release the SCM size. time.sleep(60) print(pool_usage) #SCM pool size should be released (some still be used for system) #Pool SCM free % should not be less than 50% if pool_usage['scm'] > 55: self.fail( 'SCM pool used percentage should be < 55, instead {}'. format(pool_usage['scm'])) #Run last IO self.start_ior_load(storage='SCM', percent=1)