def test_valid_labels(self): """Test ID: DAOS-7942 Test Description: Create and destroy pool with the following labels. * Random alpha numeric string of length 126. * Random alpha numeric string of length 127. * Random upper case string of length 50. * Random lower case string of length 50. * Random number string of length 50. :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=pool,pool_label :avocado: tags=create_valid_labels """ self.pool = [] errors = [] labels = [ get_random_string(126), get_random_string(127), get_random_string(length=50, include=string.ascii_uppercase), get_random_string(length=50, include=string.ascii_lowercase), get_random_string(length=50, include=string.digits) ] for label in labels: errors.extend(self.verify_create(label, False)) errors.extend(self.verify_destroy(self.pool[-1], False)) report_errors(self, errors)
def write_until_full(container): """Write until we get enospace back. Args: container (DaosContainer): container in which to write the data Returns: int: number of bytes written to the container """ total_written = 0 size = 2048 _oid = None try: while True: # make some stuff up and write dkey = get_random_string(5) akey = get_random_string(5) data = get_random_string(size) _oid = container.write_an_obj(data, size, dkey, akey) total_written += size # collapse down the commited epochs container.slip_epoch() except ValueError as exp: print(exp) return total_written
def write_object(self, container, record_qty, akey_size, dkey_size, data_size, rank=None, obj_class=None): """Write an object to the container. Args: container (TestContainer): container in which to write the object record_qty (int): [description] rank (int, optional): [description]. Defaults to None. obj_class (int, optional): [description]. Defaults to None. Raises: DaosTestError: if there was an error writing the object """ for _ in range(record_qty): akey = get_random_string(akey_size, self.get_akeys()) dkey = get_random_string(dkey_size, self.get_dkeys()) data = get_random_string(data_size) # Write single data to the container self.write_record(container, akey, dkey, data, rank, obj_class) # Verify the data was written correctly data_read = self.read_record(container, akey, dkey, data_size) if data != data_read: raise DaosTestError( "Written data confirmation failed:" "\n wrote: {}\n read: {}".format(data, data_read))
def write_single_objects( container, obj_qty, rec_qty, akey_size, dkey_size, data_size, rank, object_class, log=None): """Write random single objects to the container. Args: container (DaosContainer): the container in which to write objects obj_qty (int): the number of objects to create in the container rec_qty (int): the number of records to create in each object akey_size (int): the akey length dkey_size (int): the dkey length data_size (int): the length of data to write in each record rank (int): the server rank to which to write the records log (DaosLog|None): object for logging messages Returns: list: a list of dictionaries containing the object, transaction number, and data written to the container Raises: DaosTestError: if an error is detected writing the objects or verifying the write of the objects """ if log: log.info("Creating objects in the container") object_list = [] for index in range(obj_qty): object_list.append({"obj": None, "record": []}) for _ in range(rec_qty): akey = get_random_string( akey_size, [record["akey"] for record in object_list[index]["record"]]) dkey = get_random_string( dkey_size, [record["dkey"] for record in object_list[index]["record"]]) data = get_random_string(data_size) object_list[index]["record"].append( {"akey": akey, "dkey": dkey, "data": data}) # Write single data to the container try: (object_list[index]["obj"]) = \ container.write_an_obj( data, len(data), dkey, akey, object_list[index]["obj"], rank, object_class) except DaosApiError as error: raise DaosTestError( "Error writing data (dkey={}, akey={}, data={}) to " "the container: {}".format(dkey, akey, data, error)) # Verify the single data was written to the container data_read = read_single_objects( container, data_size, dkey, akey, object_list[index]["obj"]) if data != data_read: raise DaosTestError( "Written data confirmation failed:" "\n wrote: {}\n read: {}".format(data, data_read)) return object_list
def write_object(self, container, record_qty, akey_size, dkey_size, data_size, rank=None, obj_class=None, data_array_size=0): """Write an object to the container. Args: container (TestContainer): container in which to write the object record_qty (int): the number of records to write rank (int, optional): rank. Defaults to None. obj_class (int, optional): daos object class. Defaults to None. data_array_size (optional): write an array or single value. Defaults to 0. Raises: DaosTestError: if there was an error writing the object """ for _ in range(record_qty): akey = get_random_string(akey_size, self.get_akeys()) dkey = get_random_string(dkey_size, self.get_dkeys()) if data_array_size == 0: data = get_random_string(data_size) else: data = [ get_random_string(data_size) for _ in range(data_array_size)] # Write single data to the container self.write_record(container, akey, dkey, data, rank, obj_class) # Verify the data was written correctly data_read = self.read_record( container, akey, dkey, data_size, data_array_size) if data != data_read: raise DaosTestError( "Written data confirmation failed:" "\n wrote: {}\n read: {}".format(data, data_read))
def start_dfuse(self, pool): """Create dfuse start command line for slurm. Args: pool (obj): TestPool obj Returns dfuse(obj): Dfuse obj cmd(list): list of dfuse commands to add to jobscript """ commands = [] # Get Dfuse params dfuse = Dfuse(self.hostlist_clients, self.tmp) dfuse.get_params(self) # update dfuse params; mountpoint for each container unique = get_random_string(5, self.used) self.used.append(unique) mount_dir = dfuse.mount_dir.value + unique dfuse.mount_dir.update(mount_dir) dfuse.set_dfuse_params(pool) dfuse.set_dfuse_cont_param(self.create_dfuse_cont(pool)) # create dfuse mount point commands.append(slurm_utils.srun_str( hosts=None, cmd="mkdir -p {}".format(dfuse.mount_dir.value), srun_params=None)) commands.append(slurm_utils.srun_str( hosts=None, cmd="{}".format(dfuse.__str__()), srun_params=None)) commands.append("sleep 10") commands.append(slurm_utils.srun_str( hosts=None, cmd="df -h {}".format(dfuse.mount_dir.value), srun_params=None)) return dfuse, commands
def start_dfuse(self, pool): """Create dfuse start command line for slurm. Args: pool (obj): TestPool obj Returns dfuse(obj): Dfuse obj cmd(list): list of dfuse commands to add to jobscript """ # Get Dfuse params dfuse = Dfuse(self.hostlist_clients, self.tmp) dfuse.get_params(self) # update dfuse params; mountpoint for each container unique = get_random_string(5, self.used) self.used.append(unique) mount_dir = dfuse.mount_dir.value + unique dfuse.mount_dir.update(mount_dir) dfuse.set_dfuse_params(pool) dfuse.set_dfuse_cont_param(self.get_container(pool)) dfuse_start_cmds = [ "mkdir -p {}".format(dfuse.mount_dir.value), "{}".format(dfuse.__str__()), "df -h {}".format(dfuse.mount_dir.value) ] return dfuse, dfuse_start_cmds
def continuous_io(container, seconds): """Perform a combination of reads/writes for the specified time period. Args: container (DaosContainer): container in which to write the data seconds (int): how long to write data Returns: int: number of bytes written to the container Raises: ValueError: if a data mismatch is detected """ finish_time = time.time() + seconds oid = None total_written = 0 size = 500 while time.time() < finish_time: # make some stuff up dkey = get_random_string(5) akey = get_random_string(5) data = get_random_string(size) # write it then read it back oid = container.write_an_obj(data, size, dkey, akey, oid, 5) data2 = container.read_an_obj(size, dkey, akey, oid) # verify it came back correctly if data != data2.value: raise ValueError("Data mismatch in ContinousIo") # collapse down the commited epochs container.consolidate_epochs() total_written += size return total_written
def write_quantity(container, size_in_bytes): """Write a specific number of bytes. Note: The minimum amount that will be written is 2048 bytes. Args: container (DaosContainer): which container to write to, it should be in an open state prior to the call size_in_bytes (int): total number of bytes to be written, although no less that 2048 will be written. Returns: int: number of bytes written to the container """ total_written = 0 size = 2048 _oid = None try: while total_written < size_in_bytes: # make some stuff up and write dkey = get_random_string(5) akey = get_random_string(5) data = get_random_string(size) _oid = container.write_an_obj(data, size, dkey, akey) total_written += size # collapse down the commited epochs container.slip_epoch() except ValueError as exp: print(exp) return total_written
def start_dfuse(self, pool, container, nodesperjob, resource_mgr=None, name=None, job_spec=None): """Create dfuse start command line for slurm. Args: self (obj): soak obj pool (obj): TestPool obj Returns dfuse(obj): Dfuse obj cmd(list): list of dfuse commands to add to jobscript """ # Get Dfuse params dfuse = Dfuse(self.hostlist_clients, self.tmp) dfuse.namespace = os.path.join(os.sep, "run", job_spec, "dfuse", "*") dfuse.get_params(self) # update dfuse params; mountpoint for each container unique = get_random_string(5, self.used) self.used.append(unique) mount_dir = dfuse.mount_dir.value + unique dfuse.mount_dir.update(mount_dir) dfuse.set_dfuse_params(pool) dfuse.set_dfuse_cont_param(container) dfuse_log = os.path.join( self.test_log_dir, self.test_name + "_" + name + "_${SLURM_JOB_NODELIST}_" "" + "${SLURM_JOB_ID}_" + "daos_dfuse_" + unique) dfuse_env = "export D_LOG_MASK=ERR;export D_LOG_FILE={}".format(dfuse_log) dfuse_start_cmds = [ "mkdir -p {}".format(dfuse.mount_dir.value), "clush -S -w $SLURM_JOB_NODELIST \"cd {};{};{}\"".format( dfuse.mount_dir.value, dfuse_env, dfuse.__str__()), "sleep 10", "df -h {}".format(dfuse.mount_dir.value), ] if resource_mgr == "SLURM": cmds = [] for cmd in dfuse_start_cmds: if cmd.startswith("clush") or cmd.startswith("sleep"): cmds.append(cmd) else: cmds.append(get_srun_cmd(cmd, nodesperjob)) dfuse_start_cmds = cmds return dfuse, dfuse_start_cmds
def build_job_script(self, commands, job, nodesperjob): """Create a slurm batch script that will execute a list of cmdlines. Args: self (obj): soak obj commands(list): commandlines and cmd specific log_name job(str): the job name that will be defined in the slurm script Returns: script_list: list of slurm batch scripts """ job_timeout = self.params.get("job_timeout", "/run/" + job + "/*", 10) self.log.info("<<Build Script>> at %s", time.ctime()) script_list = [] # if additional cmds are needed in the batch script prepend_cmds = [ "set -e", "/usr/bin/daos pool query --pool {} ".format(self.pool[1].uuid), "/usr/bin/daos pool query --pool {} ".format(self.pool[0].uuid) ] append_cmds = [ "/usr/bin/daos pool query --pool {} ".format(self.pool[1].uuid), "/usr/bin/daos pool query --pool {} ".format(self.pool[0].uuid) ] exit_cmd = ["exit $status"] # Create the sbatch script for each list of cmdlines for cmd, log_name in commands: if isinstance(cmd, str): cmd = [cmd] output = os.path.join(self.test_log_dir, self.test_name + "_" + log_name + "_%N_" + "%j_") error = os.path.join(str(output) + "ERROR_") sbatch = { "time": str(job_timeout) + ":00", "exclude": NodeSet.fromlist(self.exclude_slurm_nodes), "error": str(error), "export": "ALL" } # include the cluster specific params sbatch.update(self.srun_params) unique = get_random_string(5, self.used) script = slurm_utils.write_slurm_script( self.test_log_dir, job, output, nodesperjob, prepend_cmds + cmd + append_cmds + exit_cmd, unique, sbatch) script_list.append(script) self.used.append(unique) return script_list
def build_job_script(self, commands, job, ppn, nodesperjob): """Create a slurm batch script that will execute a list of cmdlines. Args: commands(list): commandlines and cmd specific log_name job(str): the job name that will be defined in the slurm script ppn(int): number of tasks to run on each node Returns: script_list: list of slurm batch scripts """ self.log.info("<<Build Script>> at %s", time.ctime()) script_list = [] # Start the daos_agent in the batch script for now # TO-DO: daos_agents start with systemd agent_launch_cmds = [ "mkdir -p {}".format(os.environ.get("DAOS_TEST_LOG_DIR")) ] agent_launch_cmds.append(" ".join( [str(self.agent_managers[0].manager.job), "&"])) # Create the sbatch script for each cmdline used = [] for cmd, log_name in commands: output = os.path.join( self.test_log_dir, "%N_" + self.test_name + "_" + job + "_%j_%t_" + str(ppn * nodesperjob) + "_" + log_name + "_") error = os.path.join( self.test_log_dir, "%N_" + self.test_name + "_" + job + "_%j_%t_" + str(ppn * nodesperjob) + "_" + log_name + "_ERROR_") sbatch = { "time": str(self.job_timeout) + ":00", "exclude": NodeSet.fromlist(self.exclude_slurm_nodes), "error": str(error) } # include the cluster specific params sbatch.update(self.srun_params) unique = get_random_string(5, used) script = slurm_utils.write_slurm_script(self.test_log_dir, job, output, nodesperjob, agent_launch_cmds + [cmd], unique, sbatch) script_list.append(script) used.append(unique) return script_list
def build_job_script(self, commands, job, ppn, nodesperjob): """Create a slurm batch script that will execute a list of cmdlines. Args: commands(list): commandlines and cmd specific log_name job(str): the job name that will be defined in the slurm script ppn(int): number of tasks to run on each node Returns: script_list: list of slurm batch scripts """ self.log.info("<<Build Script>> at %s", time.ctime()) script_list = [] # if additional cmds are needed in the batch script additional_cmds = [] # Create the sbatch script for each list of cmdlines for cmd, log_name in commands: if isinstance(cmd, str): cmd = [cmd] output = os.path.join( self.test_log_dir, "%N_" + self.test_name + "_" + job + "_%j_%t_" + str(ppn * nodesperjob) + "_" + log_name + "_") error = os.path.join( self.test_log_dir, "%N_" + self.test_name + "_" + job + "_%j_%t_" + str(ppn * nodesperjob) + "_" + log_name + "_ERROR_") sbatch = { "time": str(self.job_timeout) + ":00", "exclude": NodeSet.fromlist(self.exclude_slurm_nodes), "error": str(error), "export": "ALL" } # include the cluster specific params sbatch.update(self.srun_params) unique = get_random_string(5, self.used) script = slurm_utils.write_slurm_script(self.test_log_dir, job, output, nodesperjob, additional_cmds + cmd, unique, sbatch) script_list.append(script) self.used.append(unique) return script_list
def test_invalid_labels(self): """Test ID: DAOS-7942 Test Description: Create pool with following invalid labels. * UUID format string: 23ab123e-5296-4f95-be14-641de40b4d5a * Long label - 128 random chars. :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=pool,pool_label :avocado: tags=create_invalid_labels """ self.pool = [] errors = [] label_outs = [("23ab123e-5296-4f95-be14-641de40b4d5a", "invalid label"), (get_random_string(128), "invalid label")] for label_out in label_outs: errors.extend(self.verify_create(label_out[0], True, label_out[1])) report_errors(self, errors)
def start_dfuse(self, pool, nodesperjob, resource_mgr=None): """Create dfuse start command line for slurm. Args: self (obj): soak obj pool (obj): TestPool obj Returns dfuse(obj): Dfuse obj cmd(list): list of dfuse commands to add to jobscript """ # Get Dfuse params dfuse = Dfuse(self.hostlist_clients, self.tmp) dfuse.get_params(self) # update dfuse params; mountpoint for each container unique = get_random_string(5, self.used) self.used.append(unique) add_containers(self, pool) mount_dir = dfuse.mount_dir.value + unique dfuse.mount_dir.update(mount_dir) dfuse.set_dfuse_params(pool) dfuse.set_dfuse_cont_param(self.container[-1]) dfuse_start_cmds = [ "mkdir -p {}".format(dfuse.mount_dir.value), "clush -w $SLURM_JOB_NODELIST \"cd {};{}\"".format( dfuse.mount_dir.value, dfuse.__str__()), "sleep 10", "df -h {}".format(dfuse.mount_dir.value), ] if resource_mgr == "SLURM": cmds = [] for cmd in dfuse_start_cmds: if cmd.startswith("clush") or cmd.startswith("sleep"): cmds.append(cmd) else: cmds.append(get_srun_cmd(cmd, nodesperjob)) dfuse_start_cmds = cmds return dfuse, dfuse_start_cmds
def test_no_space_cont_create(self): """ :avocado: tags=all,container,tiny,full_regression,fullpoolcontcreate """ # full storage rc err = "-1007" # probably should be -1007, revisit later err2 = "-1009" # create pool and connect self.prepare_pool() # query the pool self.d_log.debug("querying pool info") self.pool.get_info() self.d_log.debug("queried pool info") # create a container try: self.d_log.debug("creating container") self.cont = DaosContainer(self.context) self.cont.create(self.pool.pool.handle) self.d_log.debug("created container") except DaosApiError as excep: self.d_log.error("caught exception creating container: " "{0}".format(excep)) self.fail("caught exception creating container: {0}".format(excep)) self.d_log.debug("opening container") self.cont.open() self.d_log.debug("opened container") # generate random dkey, akey each time # write 1mb until no space, then 1kb, etc. to fill pool quickly for obj_sz in [1048576, 10240, 10, 1]: write_count = 0 while True: self.d_log.debug("writing obj {0}, sz {1} to " "container".format(write_count, obj_sz)) my_str = "a" * obj_sz my_str_sz = obj_sz dkey = (get_random_string(5)) akey = (get_random_string(5)) try: dummy_oid = self.cont.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls="OC_SX") self.d_log.debug("wrote obj {0}, sz {1}".format( write_count, obj_sz)) write_count += 1 except DaosApiError as excep: if not (err in repr(excep) or err2 in repr(excep)): self.d_log.error("caught exception while writing " "object: {0}".format(repr(excep))) self.fail("caught exception while writing object: {0}". format(repr(excep))) else: self.d_log.debug("pool is too full for {0} byte " "objects".format(obj_sz)) break self.d_log.debug("closing container") self.cont.close() self.d_log.debug("closed container") # create a 2nd container now that pool is full try: self.d_log.debug("creating 2nd container") self.cont2 = DaosContainer(self.context) self.cont2.create(self.pool.pool.handle) self.d_log.debug("created 2nd container") self.d_log.debug("opening container 2") self.cont2.open() self.d_log.debug("opened container 2") self.d_log.debug("writing one more object, write expected to fail") self.cont2.write_an_obj(my_str, my_str_sz, dkey, akey, obj_cls="OC_SX") self.fail("wrote one more object after pool was completely filled," " this should never print") except DaosApiError as excep: if not (err in repr(excep) or err2 in repr(excep)): self.d_log.error("caught unexpected exception while " "writing object: {0}".format(repr(excep))) self.fail("caught unexpected exception while writing " "object: {0}".format(repr(excep))) else: self.d_log.debug("correctly caught -1007 while attempting " "to write object in full pool")
def test_ior_small(self): """Jira ID: DAOS-2715, DAOS-3657, DAOS-4909. Test Description: Purpose of this test is to have small ior test to check basic functionality for DFS, MPIIO and HDF5 api Use case: Run ior with read, write, CheckWrite, CheckRead in ssf mode. Run ior with read, write, CheckWrite, CheckRead in fpp mode. Run ior with read, write, CheckWrite and access to random offset instead of sequential. All above three cases to be run with single client and multiple client processes in two separate nodes. :avocado: tags=all,pr,daily_regression :avocado: tags=hw,large :avocado: tags=daosio,checksum,iorsmall,mpich :avocado: tags=DAOS_5610 """ results = [] cncl_tickets = [] dfuse_mount_dir = None ior_timeout = self.params.get("ior_timeout", '/run/ior/*') flags = self.params.get("ior_flags", '/run/ior/iorflags/*') apis = self.params.get("ior_api", '/run/ior/iorflags/*') mount_dir = self.params.get("mount_dir", "/run/dfuse/*") transfer_block_size = self.params.get("transfer_block_size", '/run/ior/iorflags/*') obj_class = self.params.get("obj_class", '/run/ior/iorflags/*') for oclass in obj_class: self.ior_cmd.dfs_oclass.update(oclass) for api in apis: if api == "HDF5-VOL": self.ior_cmd.api.update("HDF5") hdf5_plugin_path = self.params.get("plugin_path", '/run/hdf5_vol/*') flags_w_k = " ".join([flags[0]] + ["-k"]) self.ior_cmd.flags.update(flags_w_k, "ior.flags") else: # run tests for different variants self.ior_cmd.flags.update(flags[0], "ior.flags") hdf5_plugin_path = None self.ior_cmd.api.update(api) for test in transfer_block_size: # update transfer and block size self.ior_cmd.transfer_size.update(test[0]) self.ior_cmd.block_size.update(test[1]) # run ior if api == "HDF5-VOL": sub_dir = get_random_string(5) dfuse_mount_dir = os.path.join(mount_dir, sub_dir) try: self.run_ior_with_pool(plugin_path=hdf5_plugin_path, timeout=ior_timeout, mount_dir=dfuse_mount_dir) results.append(["PASS", str(self.ior_cmd)]) except TestFail: results.append(["FAIL", str(self.ior_cmd)]) # Running a variant for ior fpp self.ior_cmd.flags.update(flags[1]) self.ior_cmd.api.update(apis[0]) self.ior_cmd.block_size.update((transfer_block_size[1])[1]) self.ior_cmd.transfer_size.update((transfer_block_size[1])[0]) self.ior_cmd.dfs_oclass.update(obj_class[0]) # run ior try: self.run_ior_with_pool(plugin_path=None, timeout=ior_timeout) results.append(["PASS", str(self.ior_cmd)]) except TestFail: results.append(["FAIL", str(self.ior_cmd)]) self.log.error("Summary of IOR small test results:") errors = False for item in results: self.log.info(" %s %s", item[0], item[1]) if item[0] == "FAIL": errors = True if errors: self.fail("Test FAILED") if cncl_tickets: self.cancelForTicket(",".join(cncl_tickets))
def launch_snapshot(self, pool, name): """Create a basic snapshot of the reserved pool. Args: self (obj): soak obj pool (obj): TestPool obj name (str): harasser """ self.log.info( "<<<PASS %s: %s started at %s>>>", self.loop, name, time.ctime()) status = True # Create container container = TestContainer(pool) container.namespace = "/run/container_reserved/*" container.get_params(self) container.create() container.open() obj_cls = self.params.get( "object_class", '/run/container_reserved/*') # write data to object data_pattern = get_random_string(500) datasize = len(data_pattern) + 1 dkey = "dkey" akey = "akey" obj = container.container.write_an_obj( data_pattern, datasize, dkey, akey, obj_cls=obj_cls) obj.close() # Take a snapshot of the container snapshot = DaosSnapshot(self.context) try: snapshot.create(container.container.coh) except (RuntimeError, TestFail, DaosApiError) as error: self.log.error("Snapshot failed", exc_info=error) status &= False if status: self.log.info("Sanpshot Created") # write more data to object data_pattern2 = get_random_string(500) datasize2 = len(data_pattern2) + 1 dkey = "dkey" akey = "akey" obj2 = container.container.write_an_obj( data_pattern2, datasize2, dkey, akey, obj_cls=obj_cls) obj2.close() self.log.info("Wrote additional data to container") # open the snapshot and read the data obj.open() snap_handle = snapshot.open(container.container.coh) try: data_pattern3 = container.container.read_an_obj( datasize, dkey, akey, obj, txn=snap_handle.value) except (RuntimeError, TestFail, DaosApiError) as error: self.log.error( "Error when retrieving the snapshot data %s", error) status &= False if status: # Compare the snapshot to the original written data. if data_pattern3.value != data_pattern: self.log.error("Snapshot data miscompere") status &= False # Destroy the snapshot try: snapshot.destroy(container.container.coh) except (RuntimeError, TestFail, DaosApiError) as error: self.log.error("Failed to destroy snapshot %s", error) status &= False # cleanup container.close() container.destroy() params = {"name": name, "status": status, "vars": {}} with H_LOCK: self.harasser_job_done(params) self.log.info( "<<<PASS %s: %s completed at %s>>>\n", self.loop, name, time.ctime())
def launch_snapshot(self): """Create a basic snapshot of the reserved pool.""" self.log.info("<<Launch Snapshot>> at %s", time.ctime()) status = True # Create container container = TestContainer(self.pool[0]) container.namespace = "/run/container_reserved/*" container.get_params(self) container.create() container.open() obj_cls = self.params.get( "object_class", '/run/container_reserved/*') # write data to object data_pattern = get_random_string(500) datasize = len(data_pattern) + 1 dkey = "dkey" akey = "akey" obj = container.container.write_an_obj( data_pattern, datasize, dkey, akey, obj_cls=obj_cls) obj.close() # Take a snapshot of the container snapshot = DaosSnapshot(self.context) try: snapshot.create(container.container.coh) except (RuntimeError, TestFail, DaosApiError) as error: self.log.error("Snapshot failed", exc_info=error) status &= False if status: self.log.info("Snapshot Created") # write more data to object data_pattern2 = get_random_string(500) datasize2 = len(data_pattern2) + 1 dkey = "dkey" akey = "akey" obj2 = container.container.write_an_obj( data_pattern2, datasize2, dkey, akey, obj_cls=obj_cls) obj2.close() self.log.info("Wrote additional data to container") # open the snapshot and read the data obj.open() snap_handle = snapshot.open(container.container.coh) try: data_pattern3 = container.container.read_an_obj( datasize, dkey, akey, obj, txn=snap_handle.value) except (RuntimeError, TestFail, DaosApiError) as error: self.log.error( "Error when retrieving the snapshot data %s", error) status &= False if status: # Compare the snapshot to the original written data. if data_pattern3.value != data_pattern: self.log.error("Snapshot data miscompere") status &= False # Destroy the snapshot try: snapshot.destroy(container.container.coh) except (RuntimeError, TestFail, DaosApiError) as error: self.log.error("Failed to destroy snapshot %s", error) status &= False # cleanup container.close() container.destroy() with H_LOCK: self.harasser_results["SNAPSHOT"] = status
def run_ior_with_pool(self, intercept=None, test_file_suffix="", test_file="daos:/testFile", create_pool=True, create_cont=True, stop_dfuse=True, plugin_path=None, timeout=None, fail_on_warning=False, mount_dir=None, out_queue=None, env=None): # pylint: disable=too-many-arguments """Execute ior with optional overrides for ior flags and object_class. If specified the ior flags and ior daos object class parameters will override the values read from the yaml file. Args: intercept (str, optional): path to the interception library. Shall be used only for POSIX through DFUSE. Defaults to None. test_file_suffix (str, optional): suffix to add to the end of the test file name. Defaults to "". test_file (str, optional): ior test file name. Defaults to "daos:/testFile". Is ignored when using POSIX through DFUSE. create_pool (bool, optional): If it is true, create pool and container else just run the ior. Defaults to True. create_cont (bool, optional): Create new container. Default is True stop_dfuse (bool, optional): Stop dfuse after ior command is finished. Default is True. plugin_path (str, optional): HDF5 vol connector library path. This will enable dfuse (xattr) working directory which is needed to run vol connector for DAOS. Default is None. timeout (int, optional): command timeout. Defaults to None. fail_on_warning (bool, optional): Controls whether the test should fail if a 'WARNING' is found. Default is False. mount_dir (str, optional): Create specific mount point out_queue (queue, optional): Pass the exception to the queue. Defaults to None env (EnvironmentVariables, optional): Pass the environment to be used when calling run_ior. Defaults to None Returns: CmdResult: result of the ior command execution """ if create_pool: self.update_ior_cmd_with_pool(create_cont) # start dfuse if api is POSIX or HDF5 with vol connector if self.ior_cmd.api.value == "POSIX" or plugin_path: # add a substring in case of HDF5-VOL if plugin_path: sub_dir = get_random_string(5) mount_dir = os.path.join(mount_dir, sub_dir) # Connect to the pool, create container and then start dfuse if not self.dfuse: self.start_dfuse(self.hostlist_clients, self.pool, self.container, mount_dir) # setup test file for POSIX or HDF5 with vol connector if self.ior_cmd.api.value == "POSIX" or plugin_path: test_file = os.path.join(self.dfuse.mount_dir.value, "testfile") elif self.ior_cmd.api.value == "DFS": test_file = os.path.join("/", "testfile") self.ior_cmd.test_file.update("".join([test_file, test_file_suffix])) job_manager = self.get_ior_job_manager_command() job_manager.timeout = timeout try: out = self.run_ior(job_manager, self.processes, intercept, plugin_path=plugin_path, fail_on_warning=fail_on_warning, out_queue=out_queue, env=env) finally: if stop_dfuse: self.stop_dfuse() return out
def test_basic_snapshot(self): """Test ID: DAOS-1370. Test Description: Create a pool, container in the pool, object in the container, add one key:value to the object. Commit the transaction. Perform a snapshot create on the container. Create 500 additional transactions with a small change to the object in each and commit each after the object update is done. Verify the snapshot is still available and the contents remain in their original state. :avocado: tags=snap,basicsnap """ # Set up the pool and container. try: # initialize a pool object then create the underlying # daos storage, and connect self.prepare_pool() # create a container self.container = DaosContainer(self.context) self.container.create(self.pool.pool.handle) # now open it self.container.open() except DaosApiError as error: self.log.error(str(error)) self.fail("Test failed before snapshot taken") try: # create an object and write some data into it obj_cls = self.params.get("obj_class", '/run/object_class/*') thedata = "Now is the winter of our discontent made glorious" datasize = len(thedata) + 1 dkey = "dkey" akey = "akey" tx_handle = self.container.get_new_tx() obj = self.container.write_an_obj(thedata, datasize, dkey, akey, obj_cls=obj_cls, txn=tx_handle) self.container.commit_tx(tx_handle) obj.close() # Take a snapshot of the container self.snapshot = DaosSnapshot(self.context) self.snapshot.create(self.container.coh, tx_handle) self.log.info("Wrote an object and created a snapshot") except DaosApiError as error: self.fail("Test failed during the initial object write.\n{0}" .format(error)) # Make 500 changes to the data object. The write_an_obj function does a # commit when the update is complete try: self.log.info( "Committing 500 additional transactions to the same KV") more_transactions = 500 while more_transactions: size = random.randint(1, 250) + 1 new_data = get_random_string(size) new_obj = self.container.write_an_obj( new_data, size, dkey, akey, obj_cls=obj_cls) new_obj.close() more_transactions -= 1 except DaosApiError as error: self.fail( "Test failed during the write of 500 objects.\n{0}".format( error)) # List the snapshot try: reported_epoch = self.snapshot.list(self.container.coh) except DaosApiError as error: self.fail( "Test was unable to list the snapshot\n{0}".format(error)) # Make sure the snapshot reflects the original epoch if self.snapshot.epoch != reported_epoch: self.fail( "The snapshot epoch returned from snapshot list is not the " "same as the original epoch snapshotted.") self.log.info( "After 500 additional commits the snapshot is still available") # Make sure the data in the snapshot is the original data. # Get a handle for the snapshot and read the object at dkey, akey. try: obj.open() snap_handle = self.snapshot.open(self.container.coh) thedata2 = self.container.read_an_obj( datasize, dkey, akey, obj, txn=snap_handle.value) except DaosApiError as error: self.fail( "Error when retrieving the snapshot data.\n{0}".format(error)) # Compare the snapshot to the originally written data. if thedata2.value != thedata: self.fail( "The data in the snapshot is not the same as the original data") self.log.info( "The snapshot data matches the data originally written.") # Now destroy the snapshot try: self.snapshot.destroy(self.container.coh) self.log.info("Snapshot successfully destroyed") except DaosApiError as error: self.fail(str(error))