def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob): """Create an IOR cmdline to run in slurm batch. Args: job_spec (str): ior job in yaml to run pool (obj): TestPool obj ppn(int): number of tasks to run on each node nodesperjob(int): number of nodes per job Returns: cmd: cmdline string """ commands = [] iteration = self.test_iteration ior_params = "/run/" + job_spec + "/*" # IOR job specs with a list of parameters; update each value api_list = self.params.get("api", ior_params + "*") tsize_list = self.params.get("transfer_size", ior_params + "*") bsize_list = self.params.get("block_size", ior_params + "*") oclass_list = self.params.get("daos_oclass", ior_params + "*") # check if capable of doing rebuild; if yes then daos_oclass = RP_*GX if self.is_harasser("rebuild"): oclass_list = self.params.get("daos_oclass", "/run/rebuild/*") # update IOR cmdline for each additional IOR obj for api in api_list: for b_size in bsize_list: for t_size in tsize_list: for o_type in oclass_list: ior_cmd = IorCommand() ior_cmd.namespace = ior_params ior_cmd.get_params(self) if iteration is not None and iteration < 0: ior_cmd.repetitions.update(1000000) if self.job_timeout is not None: ior_cmd.max_duration.update(self.job_timeout) else: ior_cmd.max_duration.update(10) ior_cmd.api.update(api) ior_cmd.block_size.update(b_size) ior_cmd.transfer_size.update(t_size) ior_cmd.daos_oclass.update(o_type) ior_cmd.set_daos_params(self.server_group, pool) # srun cmdline nprocs = nodesperjob * ppn env = ior_cmd.get_default_env("srun") if ior_cmd.api.value == "MPIIO": env["DAOS_CONT"] = ior_cmd.daos_cont.value cmd = Srun(ior_cmd) cmd.assign_processes(nprocs) cmd.assign_environment(env, True) cmd.ntasks_per_node.update(ppn) log_name = "{}_{}_{}_{}".format( api, b_size, t_size, o_type) commands.append([cmd.__str__(), log_name]) self.log.info("<<IOR cmdline>>: %s \n", commands[-1].__str__()) return commands
def create_ior_cmdline(self, job_params, job_spec, pool): """Create an IOR cmdline to run in slurm batch. Args: job_params (str): job params from yaml file job_spec (str): specific ior job to run pool (obj): TestPool obj Returns: cmd: cmdline string """ command = [] iteration = self.test_iteration ior_params = "/run/" + job_spec + "/" ior_cmd = IorCommand() ior_cmd.namespace = ior_params ior_cmd.get_params(self) if iteration is not None and iteration < 0: ior_cmd.repetitions.update(1000000) ior_cmd.max_duration.update(self.params.get("time", job_params + '*')) # IOR job specs with a list of parameters; update each value # transfer_size # block_size # daos object class tsize_list = ior_cmd.transfer_size.value bsize_list = ior_cmd.block_size.value oclass_list = ior_cmd.daos_oclass.value for b_size in bsize_list: ior_cmd.block_size.update(b_size) for o_type in oclass_list: ior_cmd.daos_oclass.update(o_type) for t_size in tsize_list: ior_cmd.transfer_size.update(t_size) ior_cmd.set_daos_params(self.server_group, pool) # export the user environment to test node exports = ["ALL"] if ior_cmd.api.value == "MPIIO": env = { "CRT_ATTACH_INFO_PATH": os.path.join( self.basepath, "install/tmp"), "DAOS_POOL": str(ior_cmd.daos_pool.value), "MPI_LIB": "\"\"", "DAOS_SVCL": str(ior_cmd.daos_svcl.value), "DAOS_SINGLETON_CLI": 1, "FI_PSM2_DISCONNECT": 1 } exports.extend( ["{}={}".format( key, val) for key, val in env.items()]) cmd = "srun -l --mpi=pmi2 --export={} {}".format( ",".join(exports), ior_cmd) command.append(cmd) self.log.debug("<<IOR cmdline >>: %s \n", cmd) return command
def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob): """Create an IOR cmdline to run in slurm batch. Args: job_spec (str): ior job in yaml to run pool (obj): TestPool obj ppn(int): number of tasks to run on each node nodesperjob(int): number of nodes per job Returns: cmd: cmdline string """ commands = [] iteration = self.test_iteration ior_params = "/run/" + job_spec + "/*" mpi_module = self.params.get( "mpi_module", "/run/", default="mpi/mpich-x86_64") # IOR job specs with a list of parameters; update each value api_list = self.params.get("api", ior_params + "*") tsize_list = self.params.get("transfer_size", ior_params + "*") bsize_list = self.params.get("block_size", ior_params + "*") oclass_list = self.params.get("dfs_oclass", ior_params + "*") plugin_path = self.params.get("plugin_path", "/run/hdf5_vol/") # check if capable of doing rebuild; if yes then dfs_oclass = RP_*GX if is_harasser(self, "rebuild"): oclass_list = self.params.get("dfs_oclass", "/run/rebuild/*") # update IOR cmdline for each additional IOR obj for api in api_list: for b_size in bsize_list: for t_size in tsize_list: for o_type in oclass_list: ior_cmd = IorCommand() ior_cmd.namespace = ior_params ior_cmd.get_params(self) if iteration is not None and iteration < 0: ior_cmd.repetitions.update(1000000) if self.job_timeout is not None: ior_cmd.max_duration.update(self.job_timeout) else: ior_cmd.max_duration.update(10) if api == "HDF5-VOL": ior_cmd.api.update("HDF5") else: ior_cmd.api.update(api) ior_cmd.block_size.update(b_size) ior_cmd.transfer_size.update(t_size) ior_cmd.dfs_oclass.update(o_type) if ior_cmd.api.value == "DFS": ior_cmd.test_file.update( os.path.join("/", "testfile")) ior_cmd.set_daos_params(self.server_group, pool) env = ior_cmd.get_default_env("srun") sbatch_cmds = ["module load -q {}".format(mpi_module)] # include dfuse cmdlines if api in ["HDF5-VOL", "POSIX"]: dfuse, dfuse_start_cmdlist = start_dfuse( self, pool, nodesperjob, "SLURM") sbatch_cmds.extend(dfuse_start_cmdlist) ior_cmd.test_file.update( os.path.join(dfuse.mount_dir.value, "testfile")) # add envs if api is HDF5-VOL if api == "HDF5-VOL": env["HDF5_VOL_CONNECTOR"] = "daos" env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path) # env["H5_DAOS_BYPASS_DUNS"] = 1 srun_cmd = Srun(ior_cmd) srun_cmd.assign_processes(nodesperjob * ppn) srun_cmd.assign_environment(env, True) srun_cmd.ntasks_per_node.update(ppn) srun_cmd.nodes.update(nodesperjob) sbatch_cmds.append(str(srun_cmd)) sbatch_cmds.append("status=$?") if api in ["HDF5-VOL", "POSIX"]: sbatch_cmds.extend( stop_dfuse(dfuse, nodesperjob, "SLURM")) sbatch_cmds.append("exit $status") log_name = "{}_{}_{}_{}".format( api, b_size, t_size, o_type) commands.append([sbatch_cmds, log_name]) self.log.info( "<<IOR {} cmdlines>>:".format(api)) for cmd in sbatch_cmds: self.log.info("%s", cmd) return commands
def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob): """Create an IOR cmdline to run in slurm batch. Args: self (obj): soak obj job_spec (str): ior job in yaml to run pool (obj): TestPool obj ppn(int): number of tasks to run on each node nodesperjob(int): number of nodes per job Returns: cmd: cmdline string """ commands = [] ior_params = os.path.join(os.sep, "run", job_spec, "*") ior_timeout = self.params.get("job_timeout", ior_params, 10) mpi_module = self.params.get("mpi_module", "/run/*", default="mpi/mpich-x86_64") # IOR job specs with a list of parameters; update each value api_list = self.params.get("api", ior_params) tsize_list = self.params.get("transfer_size", ior_params) bsize_list = self.params.get("block_size", ior_params) oclass_list = self.params.get("dfs_oclass", ior_params) plugin_path = self.params.get("plugin_path", "/run/hdf5_vol/") # update IOR cmdline for each additional IOR obj for api in api_list: for b_size in bsize_list: for t_size in tsize_list: for o_type in oclass_list: # Cancel for ticket DAOS-6095 if (api in ["HDF5-VOL", "HDF5", "POSIX"] and t_size == "4k" and o_type in ["RP_2G1", 'RP_2GX']): self.add_cancel_ticket( "DAOS-6095", "IOR -a {} with -t {} and -o {}".format( api, t_size, o_type)) continue # Cancel for ticket DAOS-6308 if api == "MPIIO" and o_type == "RP_2GX": self.add_cancel_ticket( "DAOS-6308", "IOR -a {} with -o {}".format(api, o_type)) continue if api in ["HDF5-VOL", "HDF5", "POSIX"] and ppn > 16: continue ior_cmd = IorCommand() ior_cmd.namespace = ior_params ior_cmd.get_params(self) ior_cmd.max_duration.update(ior_timeout) if api == "HDF5-VOL": ior_cmd.api.update("HDF5") else: ior_cmd.api.update(api) ior_cmd.block_size.update(b_size) ior_cmd.transfer_size.update(t_size) if (api in ["HDF5-VOL", "POSIX"]): ior_cmd.dfs_oclass.update(None) ior_cmd.dfs_dir_oclass.update(None) else: ior_cmd.dfs_oclass.update(o_type) ior_cmd.dfs_dir_oclass.update(o_type) if ior_cmd.api.value == "DFS": ior_cmd.test_file.update(os.path.join("/", "testfile")) add_containers(self, pool, o_type) ior_cmd.set_daos_params(self.server_group, pool, self.container[-1].uuid) env = ior_cmd.get_default_env("srun") sbatch_cmds = ["module load -q {}".format(mpi_module)] # include dfuse cmdlines log_name = "{}_{}_{}_{}_{}_{}_{}_{}".format( job_spec, api, b_size, t_size, o_type, nodesperjob * ppn, nodesperjob, ppn) if api in ["HDF5-VOL", "POSIX"]: dfuse, dfuse_start_cmdlist = start_dfuse( self, pool, self.container[-1], nodesperjob, "SLURM", name=log_name, job_spec=job_spec) sbatch_cmds.extend(dfuse_start_cmdlist) ior_cmd.test_file.update( os.path.join(dfuse.mount_dir.value, "testfile")) # add envs if api is HDF5-VOL if api == "HDF5-VOL": env["HDF5_VOL_CONNECTOR"] = "daos" env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path) # env["H5_DAOS_BYPASS_DUNS"] = 1 srun_cmd = Srun(ior_cmd) srun_cmd.assign_processes(nodesperjob * ppn) srun_cmd.assign_environment(env, True) srun_cmd.ntasks_per_node.update(ppn) srun_cmd.nodes.update(nodesperjob) sbatch_cmds.append(str(srun_cmd)) sbatch_cmds.append("status=$?") if api in ["HDF5-VOL", "POSIX"]: sbatch_cmds.extend( stop_dfuse(dfuse, nodesperjob, "SLURM")) commands.append([sbatch_cmds, log_name]) self.log.info("<<IOR {} cmdlines>>:".format(api)) for cmd in sbatch_cmds: self.log.info("%s", cmd) return commands