Beispiel #1
0
    def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob):
        """Create an IOR cmdline to run in slurm batch.

        Args:

            job_spec (str):   ior job in yaml to run
            pool (obj):       TestPool obj
            ppn(int):         number of tasks to run on each node
            nodesperjob(int): number of nodes per job

        Returns:
            cmd: cmdline string

        """
        commands = []

        iteration = self.test_iteration
        ior_params = "/run/" + job_spec + "/*"
        # IOR job specs with a list of parameters; update each value
        api_list = self.params.get("api", ior_params + "*")
        tsize_list = self.params.get("transfer_size", ior_params + "*")
        bsize_list = self.params.get("block_size", ior_params + "*")
        oclass_list = self.params.get("daos_oclass", ior_params + "*")
        # check if capable of doing rebuild; if yes then daos_oclass = RP_*GX
        if self.is_harasser("rebuild"):
            oclass_list = self.params.get("daos_oclass", "/run/rebuild/*")
        # update IOR cmdline for each additional IOR obj
        for api in api_list:
            for b_size in bsize_list:
                for t_size in tsize_list:
                    for o_type in oclass_list:
                        ior_cmd = IorCommand()
                        ior_cmd.namespace = ior_params
                        ior_cmd.get_params(self)
                        if iteration is not None and iteration < 0:
                            ior_cmd.repetitions.update(1000000)
                        if self.job_timeout is not None:
                            ior_cmd.max_duration.update(self.job_timeout)
                        else:
                            ior_cmd.max_duration.update(10)
                        ior_cmd.api.update(api)
                        ior_cmd.block_size.update(b_size)
                        ior_cmd.transfer_size.update(t_size)
                        ior_cmd.daos_oclass.update(o_type)
                        ior_cmd.set_daos_params(self.server_group, pool)
                        # srun cmdline
                        nprocs = nodesperjob * ppn
                        env = ior_cmd.get_default_env("srun")
                        if ior_cmd.api.value == "MPIIO":
                            env["DAOS_CONT"] = ior_cmd.daos_cont.value
                        cmd = Srun(ior_cmd)
                        cmd.assign_processes(nprocs)
                        cmd.assign_environment(env, True)
                        cmd.ntasks_per_node.update(ppn)
                        log_name = "{}_{}_{}_{}".format(
                            api, b_size, t_size, o_type)
                        commands.append([cmd.__str__(), log_name])
                        self.log.info("<<IOR cmdline>>: %s \n",
                                      commands[-1].__str__())
        return commands
Beispiel #2
0
    def create_ior_cmdline(self, job_params, job_spec, pool):
        """Create an IOR cmdline to run in slurm batch.

        Args:
            job_params (str): job params from yaml file
            job_spec (str): specific ior job to run
            pool (obj):   TestPool obj

        Returns:
            cmd: cmdline string

        """
        command = []
        iteration = self.test_iteration
        ior_params = "/run/" + job_spec + "/"

        ior_cmd = IorCommand()
        ior_cmd.namespace = ior_params
        ior_cmd.get_params(self)
        if iteration is not None and iteration < 0:
            ior_cmd.repetitions.update(1000000)
        ior_cmd.max_duration.update(self.params.get("time", job_params + '*'))
        # IOR job specs with a list of parameters; update each value
        #   transfer_size
        #   block_size
        #   daos object class
        tsize_list = ior_cmd.transfer_size.value
        bsize_list = ior_cmd.block_size.value
        oclass_list = ior_cmd.daos_oclass.value
        for b_size in bsize_list:
            ior_cmd.block_size.update(b_size)
            for o_type in oclass_list:
                ior_cmd.daos_oclass.update(o_type)
                for t_size in tsize_list:
                    ior_cmd.transfer_size.update(t_size)
                    ior_cmd.set_daos_params(self.server_group, pool)
                    # export the user environment to test node
                    exports = ["ALL"]
                    if ior_cmd.api.value == "MPIIO":
                        env = {
                            "CRT_ATTACH_INFO_PATH": os.path.join(
                                self.basepath, "install/tmp"),
                            "DAOS_POOL": str(ior_cmd.daos_pool.value),
                            "MPI_LIB": "\"\"",
                            "DAOS_SVCL": str(ior_cmd.daos_svcl.value),
                            "DAOS_SINGLETON_CLI": 1,
                            "FI_PSM2_DISCONNECT": 1
                        }
                        exports.extend(
                            ["{}={}".format(
                                key, val) for key, val in env.items()])
                    cmd = "srun -l --mpi=pmi2 --export={} {}".format(
                        ",".join(exports), ior_cmd)
                    command.append(cmd)
                    self.log.debug("<<IOR cmdline >>: %s \n", cmd)
        return command
Beispiel #3
0
def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob):
    """Create an IOR cmdline to run in slurm batch.

    Args:

        job_spec (str):   ior job in yaml to run
        pool (obj):       TestPool obj
        ppn(int):         number of tasks to run on each node
        nodesperjob(int): number of nodes per job

    Returns:
        cmd: cmdline string

    """
    commands = []
    iteration = self.test_iteration
    ior_params = "/run/" + job_spec + "/*"
    mpi_module = self.params.get(
        "mpi_module", "/run/", default="mpi/mpich-x86_64")
    # IOR job specs with a list of parameters; update each value
    api_list = self.params.get("api", ior_params + "*")
    tsize_list = self.params.get("transfer_size", ior_params + "*")
    bsize_list = self.params.get("block_size", ior_params + "*")
    oclass_list = self.params.get("dfs_oclass", ior_params + "*")
    plugin_path = self.params.get("plugin_path", "/run/hdf5_vol/")
    # check if capable of doing rebuild; if yes then dfs_oclass = RP_*GX
    if is_harasser(self, "rebuild"):
        oclass_list = self.params.get("dfs_oclass", "/run/rebuild/*")
    # update IOR cmdline for each additional IOR obj
    for api in api_list:
        for b_size in bsize_list:
            for t_size in tsize_list:
                for o_type in oclass_list:
                    ior_cmd = IorCommand()
                    ior_cmd.namespace = ior_params
                    ior_cmd.get_params(self)
                    if iteration is not None and iteration < 0:
                        ior_cmd.repetitions.update(1000000)
                    if self.job_timeout is not None:
                        ior_cmd.max_duration.update(self.job_timeout)
                    else:
                        ior_cmd.max_duration.update(10)
                    if api == "HDF5-VOL":
                        ior_cmd.api.update("HDF5")
                    else:
                        ior_cmd.api.update(api)
                    ior_cmd.block_size.update(b_size)
                    ior_cmd.transfer_size.update(t_size)
                    ior_cmd.dfs_oclass.update(o_type)
                    if ior_cmd.api.value == "DFS":
                        ior_cmd.test_file.update(
                            os.path.join("/", "testfile"))
                    ior_cmd.set_daos_params(self.server_group, pool)
                    env = ior_cmd.get_default_env("srun")
                    sbatch_cmds = ["module load -q {}".format(mpi_module)]
                    # include dfuse cmdlines
                    if api in ["HDF5-VOL", "POSIX"]:
                        dfuse, dfuse_start_cmdlist = start_dfuse(
                            self, pool, nodesperjob, "SLURM")
                        sbatch_cmds.extend(dfuse_start_cmdlist)
                        ior_cmd.test_file.update(
                            os.path.join(dfuse.mount_dir.value, "testfile"))
                    # add envs if api is HDF5-VOL
                    if api == "HDF5-VOL":
                        env["HDF5_VOL_CONNECTOR"] = "daos"
                        env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path)
                        # env["H5_DAOS_BYPASS_DUNS"] = 1
                    srun_cmd = Srun(ior_cmd)
                    srun_cmd.assign_processes(nodesperjob * ppn)
                    srun_cmd.assign_environment(env, True)
                    srun_cmd.ntasks_per_node.update(ppn)
                    srun_cmd.nodes.update(nodesperjob)
                    sbatch_cmds.append(str(srun_cmd))
                    sbatch_cmds.append("status=$?")
                    if api in ["HDF5-VOL", "POSIX"]:
                        sbatch_cmds.extend(
                            stop_dfuse(dfuse, nodesperjob, "SLURM"))
                    sbatch_cmds.append("exit $status")
                    log_name = "{}_{}_{}_{}".format(
                        api, b_size, t_size, o_type)
                    commands.append([sbatch_cmds, log_name])
                    self.log.info(
                        "<<IOR {} cmdlines>>:".format(api))
                    for cmd in sbatch_cmds:
                        self.log.info("%s", cmd)
    return commands
Beispiel #4
0
def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob):
    """Create an IOR cmdline to run in slurm batch.

    Args:

        self (obj): soak obj
        job_spec (str):   ior job in yaml to run
        pool (obj):       TestPool obj
        ppn(int):         number of tasks to run on each node
        nodesperjob(int): number of nodes per job

    Returns:
        cmd: cmdline string

    """
    commands = []
    ior_params = os.path.join(os.sep, "run", job_spec, "*")
    ior_timeout = self.params.get("job_timeout", ior_params, 10)
    mpi_module = self.params.get("mpi_module",
                                 "/run/*",
                                 default="mpi/mpich-x86_64")
    # IOR job specs with a list of parameters; update each value
    api_list = self.params.get("api", ior_params)
    tsize_list = self.params.get("transfer_size", ior_params)
    bsize_list = self.params.get("block_size", ior_params)
    oclass_list = self.params.get("dfs_oclass", ior_params)
    plugin_path = self.params.get("plugin_path", "/run/hdf5_vol/")
    # update IOR cmdline for each additional IOR obj
    for api in api_list:
        for b_size in bsize_list:
            for t_size in tsize_list:
                for o_type in oclass_list:
                    # Cancel for ticket DAOS-6095
                    if (api in ["HDF5-VOL", "HDF5", "POSIX"] and t_size == "4k"
                            and o_type in ["RP_2G1", 'RP_2GX']):
                        self.add_cancel_ticket(
                            "DAOS-6095",
                            "IOR -a {} with -t {} and -o {}".format(
                                api, t_size, o_type))
                        continue
                    # Cancel for ticket DAOS-6308
                    if api == "MPIIO" and o_type == "RP_2GX":
                        self.add_cancel_ticket(
                            "DAOS-6308",
                            "IOR -a {} with -o {}".format(api, o_type))
                        continue
                    if api in ["HDF5-VOL", "HDF5", "POSIX"] and ppn > 16:
                        continue
                    ior_cmd = IorCommand()
                    ior_cmd.namespace = ior_params
                    ior_cmd.get_params(self)
                    ior_cmd.max_duration.update(ior_timeout)
                    if api == "HDF5-VOL":
                        ior_cmd.api.update("HDF5")
                    else:
                        ior_cmd.api.update(api)
                    ior_cmd.block_size.update(b_size)
                    ior_cmd.transfer_size.update(t_size)
                    if (api in ["HDF5-VOL", "POSIX"]):
                        ior_cmd.dfs_oclass.update(None)
                        ior_cmd.dfs_dir_oclass.update(None)
                    else:
                        ior_cmd.dfs_oclass.update(o_type)
                        ior_cmd.dfs_dir_oclass.update(o_type)
                    if ior_cmd.api.value == "DFS":
                        ior_cmd.test_file.update(os.path.join("/", "testfile"))
                    add_containers(self, pool, o_type)
                    ior_cmd.set_daos_params(self.server_group, pool,
                                            self.container[-1].uuid)
                    env = ior_cmd.get_default_env("srun")
                    sbatch_cmds = ["module load -q {}".format(mpi_module)]
                    # include dfuse cmdlines
                    log_name = "{}_{}_{}_{}_{}_{}_{}_{}".format(
                        job_spec, api, b_size, t_size, o_type,
                        nodesperjob * ppn, nodesperjob, ppn)
                    if api in ["HDF5-VOL", "POSIX"]:
                        dfuse, dfuse_start_cmdlist = start_dfuse(
                            self,
                            pool,
                            self.container[-1],
                            nodesperjob,
                            "SLURM",
                            name=log_name,
                            job_spec=job_spec)
                        sbatch_cmds.extend(dfuse_start_cmdlist)
                        ior_cmd.test_file.update(
                            os.path.join(dfuse.mount_dir.value, "testfile"))
                    # add envs if api is HDF5-VOL
                    if api == "HDF5-VOL":
                        env["HDF5_VOL_CONNECTOR"] = "daos"
                        env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path)
                        # env["H5_DAOS_BYPASS_DUNS"] = 1
                    srun_cmd = Srun(ior_cmd)
                    srun_cmd.assign_processes(nodesperjob * ppn)
                    srun_cmd.assign_environment(env, True)
                    srun_cmd.ntasks_per_node.update(ppn)
                    srun_cmd.nodes.update(nodesperjob)
                    sbatch_cmds.append(str(srun_cmd))
                    sbatch_cmds.append("status=$?")
                    if api in ["HDF5-VOL", "POSIX"]:
                        sbatch_cmds.extend(
                            stop_dfuse(dfuse, nodesperjob, "SLURM"))
                    commands.append([sbatch_cmds, log_name])
                    self.log.info("<<IOR {} cmdlines>>:".format(api))
                    for cmd in sbatch_cmds:
                        self.log.info("%s", cmd)
    return commands