Python Orterun Examples, job_manager_utils.Orterun Python Examples

Example #1

0

Show file

    def run_conf(self, dmg_config_file):
        """Run the daos_run_io_conf command as a foreground process.

        Args:
            dmg_config_file: dmg file to run test.

        Return:
            Result bool: True if command success and false if any error.

        """
        success_msg = 'daos_run_io_conf completed successfully'
        command = " ".join([os.path.join(self._path, "daos_run_io_conf"), " -n ",
                            dmg_config_file, self.filename.value])

        manager = Orterun(command, mpi_type=self.mpi_type)
        # run daos_run_io_conf Command using Openmpi
        try:
            out = manager.run()

            # Return False if "ERROR" in stdout
            for line in out.stdout_text.splitlines():
                if 'ERROR' in line:
                    return False
            # Return False if not expected message to confirm test completed.
            if success_msg not in out.stdout_text.splitlines()[-1]:
                return False

        # Return False if Command failed.
        except CommandFailure:
            return False

        return True

Example #2

0

Show file

File: daos_perf_base.py Project: zzh-wisdom/daos

    def run_daos_perf(self):
        """Run the daos_perf command."""
        # Obtain the number of processes listed with the daos_perf options
        processes = self.params.get("processes", "/run/daos_perf/*")

        # Create the daos_perf command from the test yaml file
        daos_perf = DaosPerfCommand(self.bin)
        daos_perf.get_params(self)
        self.log.info("daos_perf command: %s", str(daos_perf))
        daos_perf_env = daos_perf.get_environment(self.server_managers[0])

        # Create the orterun command
        orterun = Orterun(daos_perf)
        orterun.assign_hosts(self.hostlist_clients, self.workdir, None)
        orterun.assign_processes(processes)
        orterun.assign_environment(daos_perf_env)
        self.log.info("orterun command: %s", str(orterun))

        # Run the daos_perf command and check for errors
        result = orterun.run()
        errors = re.findall(
            r"(.*(?:non-zero exit code|errors|failed|Failed).*)",
            result.stdout_text)
        if errors:
            self.fail("Errors detected in daos_perf output:\n{}".format(
                "  \n".join(errors)))

Example #3

0

Show file

    def run_conf(self):
        """Run the daos_run_io_conf command as a foreground process.

        Raises:
            None

        """
        command = " ".join([os.path.join(self._path, "daos_run_io_conf"),
                            self.filename.value])

        manager = Orterun(command)
        # run daos_run_io_conf Command using Openmpi
        manager.run()

Example #4

0

Show file

    def test_self_test(self):
        """Run a few CaRT self-test scenarios.

        :avocado: tags=all,pr,smoke,unittest,tiny,cartselftest
        """
        # Setup the orterun command
        orterun = Orterun(SelfTest(self.bin))
        orterun.map_by.update(None, "orterun/map_by")
        orterun.enable_recovery.update(False, "orterun/enable_recovery")

        # Get the self_test command line parameters
        orterun.job.get_params(self)
        orterun.job.group_name.update(self.server_group, "group_name")
        orterun.job.message_sizes.update(
            self.params.get("size", "/run/muxtestparams/message_size/*")[0],
            "message_sizes")
        orterun.job.attach_info.update(os.path.dirname(self.uri_file),
                                       "attach_info")

        # Setup the environment variables for the self_test orterun command
        orterun.assign_environment(self.cart_env)

        # Run the test
        try:
            orterun.run()
        except CommandFailure as error:
            self.test_log.info("CaRT self_test returned non-zero: %s",
                               str(error))
            self.fail("CaRT self_test returned non-zero")

Example #5

0

Show file

File: cart_self_test.py Project: marcelarosalesj/daos

    def test_self_test(self):
        """Run a few CaRT self-test scenarios.

        :avocado: tags=all,smoke,unittest,tiny,cartselftest
        """
        # Setup the orterun command
        orterun = Orterun(SelfTest(self.cart_bin))
        orterun.ompi_server.update("file:{}".format(self.uri_file),
                                   "orterun/ompi_server")
        orterun.map_by.update(None, "orterun/map_by")
        orterun.enable_recovery.update(False, "orterun/enable_recovery")

        # Get the self_test command line parameters
        orterun.job.get_params(self)
        orterun.job.group_name.value = self.server_group

        # Setup the environment variables for the self_test orterun command
        orterun.assign_environment(self.cart_env)

        # Run the test
        try:
            orterun.run()
        except CommandFailure as error:
            self.test_log.info("CaRT self_test returned non-zero: %s",
                               str(error))
            self.fail("CaRT self_test returned non-zero")

Example #6

0

Show file

    def get_mdtest_job_manager_command(self, manager):
        """Get the MPI job manager command for Mdtest.

        Returns:
            JobManager: the object for the mpi job manager command

        """
        # Initialize MpioUtils if mdtest needs to be run using mpich
        if manager == "MPICH":
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
            return Mpirun(self.mdtest_cmd, mpitype="mpich")

        return Orterun(self.mdtest_cmd)

Example #7

0

Show file

    def test_load_mpi(self):
        """Simple test of apricot test code to load the openmpi module.

        :avocado: tags=all
        :avocado: tags=harness,harness_basic_test,test_load_mpi
        :avocado: tags=load_mpi
        """
        try:
            Orterun(None)
        except CommandFailure as error:
            self.fail("Orterun initialization failed: {}".format(error))

        try:
            Mpirun(None, mpi_type="mpich")
        except CommandFailure as error:
            self.fail("Mpirun initialization failed: {}".format(error))

Example #8

0

Show file

File: metadata.py Project: zzh-wisdom/daos

    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=metadata,metadata_ior,nvme,large
        """
        files_per_thread = 400
        total_ior_threads = 5
        self.out_queue = queue.Queue()

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [
            [str(uuid.uuid4()) for _ in range(files_per_thread)]
            for _ in range(total_ior_threads)]

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            threads = []
            for index in range(total_ior_threads):
                # Define the arguments for the ior_runner_thread method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Define the job manager for the IOR command
                manager = Orterun(ior_cmd)
                env = ior_cmd.get_default_env(str(manager))
                manager.assign_hosts(self.hostlist_clients, self.workdir, None)
                manager.assign_processes(processes)
                manager.assign_environment(env)

                # Add a thread for these IOR arguments
                threads.append(
                    threading.Thread(
                        target=ior_runner_thread,
                        kwargs={
                            "manager": manager,
                            "uuids": list_of_uuid_lists[index],
                            "results": self.out_queue}))

                self.log.info(
                    "Creatied %s thread %s with container uuids %s", operation,
                    index, list_of_uuid_lists[index])

            # Launch the IOR threads
            if self.thread_control(threads, operation) == "FAIL":
                self.d_log.error("IOR {} Thread FAIL".format(operation))
                self.fail("IOR {} Thread FAIL".format(operation))

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents
                errors = self.stop_agents()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping agents:\n  {}".format("\n  ".join(errors)))

                # Stop the servers
                errors = self.stop_servers()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping servers:\n  {}".format("\n  ".join(errors)))

                # Start the agents
                self.start_agent_managers()

                # Start the servers
                self.start_server_managers()

Example #9

0

Show file

    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=all,full_regression
        :avocado: tags=hw,large
        :avocado: tags=server,metadata,metadata_ior,nvme
        """
        self.create_pool()
        files_per_thread = 400
        total_ior_threads = 5

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [[
            str(uuid.uuid4()) for _ in range(files_per_thread)
        ] for _ in range(total_ior_threads)]

        # Setup the thread manager
        thread_manager = ThreadManager(run_ior_loop, self.timeout - 30)

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            for index in range(total_ior_threads):
                # Define the arguments for the run_ior_loop method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Define the job manager for the IOR command
                self.ior_managers.append(Orterun(ior_cmd))
                env = ior_cmd.get_default_env(str(self.ior_managers[-1]))
                self.ior_managers[-1].assign_hosts(self.hostlist_clients,
                                                   self.workdir, None)
                self.ior_managers[-1].assign_processes(processes)
                self.ior_managers[-1].assign_environment(env)
                self.ior_managers[-1].verbose = False

                # Add a thread for these IOR arguments
                thread_manager.add(manager=self.ior_managers[-1],
                                   uuids=list_of_uuid_lists[index],
                                   tmpdir_base=self.test_dir)
                self.log.info("Created %s thread %s with container uuids %s",
                              operation, index, list_of_uuid_lists[index])

            # Launch the IOR threads
            self.log.info("Launching %d IOR %s threads", thread_manager.qty,
                          operation)
            failed_thread_count = thread_manager.check_run()
            if failed_thread_count > 0:
                msg = "{} FAILED IOR {} Thread(s)".format(
                    failed_thread_count, operation)
                self.d_log.error(msg)
                self.fail(msg)

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents
                errors = self.stop_agents()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping agents:\n  {}".format("\n  ".join(errors)))

                # Restart the servers w/o formatting the storage
                errors = self.restart_servers()
                self.assertEqual(
                    len(errors), 0, "Error stopping servers:\n  {}".format(
                        "\n  ".join(errors)))

                # Start the agents
                self.start_agent_managers()

        self.log.info("Test passed")

Example #10

0

Show file

File: cart_utils.py Project: liw/daos

    def build_cmd(self, env, host, **kwargs):
        """Build a command string."""

        env_CCSA = self.params.get("env", "/run/env_CRT_CTX_SHARE_ADDR/*/")
        test_name = self.params.get("name", "/run/tests/*/")

        # Write memcheck result file(s) to $HOME or DAOS_TEST_SHARED_DIR.
        daos_test_shared_dir = os.getenv('DAOS_TEST_SHARED_DIR',
                                         os.getenv('HOME'))

        if env_CCSA is None:
            env_CCSA = ""

        f = r"{}/valgrind.%q\{{PMIX_ID\}}_{}-{}.memcheck"
        memcheck_xml = f.format(daos_test_shared_dir, test_name, env_CCSA)

        tst_cmd = ""
        tst_cont = None

        index = kwargs.get('index', None)

        daos_test_shared_dir = os.getenv('DAOS_TEST_SHARED_DIR',
                                         os.getenv('HOME'))

        # Return 0 on memory leaks while suppresion file is completed
        # (CART-975 and CART-977)
        memcheck_error_code = 0

        tst_vgd = " valgrind --xml=yes " + \
                  "--xml-file={}".format(memcheck_xml) + " " + \
                  "--fair-sched=yes --partial-loads-ok=yes " + \
                  "--leak-check=full --show-leak-kinds=all " + \
                  " --gen-suppressions=all " + \
                  "--suppressions=" + self.supp_file + " " + \
                  "--track-origins=yes " + \
                  "--error-exitcode=" + str(memcheck_error_code) + " " \
                  "--show-reachable=yes --trace-children=yes"

        _tst_bin = self.params.get("{}_bin".format(host), "/run/tests/*/")
        _tst_arg = self.params.get("{}_arg".format(host), "/run/tests/*/")
        _tst_env = self.params.get("{}_env".format(host), "/run/tests/*/")
        _tst_slt = self.params.get("{}_slt".format(host), "/run/tests/*/")
        _tst_ctx = "16"
        if "{}_CRT_CTX_NUM".format(host) in os.environ:
            _tst_ctx = os.environ["{}_CRT_CTX_NUM".format(host)]

        # If the yaml parameter is a list, return the n-th element
        tst_bin = self.get_yaml_list_elem(_tst_bin, index)
        tst_arg = self.get_yaml_list_elem(_tst_arg, index)
        tst_env = self.get_yaml_list_elem(_tst_env, index)
        tst_slt = self.get_yaml_list_elem(_tst_slt, index)
        tst_ctx = self.get_yaml_list_elem(_tst_ctx, index)

        tst_host = self.params.get("{}".format(host), "/run/hosts/*/")
        tst_ppn = self.params.get("{}_ppn".format(host), "/run/tests/*/")
        tst_processes = len(tst_host) * int(tst_ppn)
        logparse = self.params.get("logparse", "/run/tests/*/")

        if tst_slt is not None:
            hostfile = write_host_file(tst_host, daos_test_shared_dir, tst_slt)
        else:
            hostfile = write_host_file(tst_host, daos_test_shared_dir, tst_ppn)
        mca_flags = ["btl self,tcp"]

        if self.provider == "ofi+psm2":
            mca_flags.append("pml ob1")

        tst_cmd = env

        tst_cont = os.getenv("CRT_TEST_CONT", "0")
        if tst_cont is not None:
            if tst_cont == "1":
                tst_cmd += " --continuous"

        if tst_ctx is not None:
            tst_cmd += " -x CRT_CTX_NUM=" + tst_ctx

        if tst_env is not None:
            tst_cmd += " " + tst_env

        if logparse:
            tst_cmd += " -x D_LOG_FILE_APPEND_PID=1"

        tst_mod = os.getenv("WITH_VALGRIND", "native")

        if tst_mod == "memcheck":
            tst_cmd += tst_vgd

        if tst_bin is not None:
            tst_cmd += " " + tst_bin

        if tst_arg is not None:
            tst_cmd += " " + tst_arg

        job = Orterun(tst_cmd)
        job.mca.update(mca_flags)
        job.hostfile.update(hostfile)
        job.pprnode.update(tst_ppn)
        job.processes.update(tst_processes)
        return str(job)

Example #11

0

Show file

File: vol_test_base.py Project: wli5/daos

    def run_test(self):
        """Run the HDF5 VOL testsuites.

        Raises:
            VolFailed: for an invalid test name or test execution failure

        """
        # initialize test specific variables
        mpi_type = self.params.get("mpi_type", default="mpich")
        test_repo = self.params.get("daos_vol_repo")
        plugin_path = self.params.get("plugin_path")
        # test_list = self.params.get("daos_vol_tests", default=[])
        testname = self.params.get("testname")
        client_processes = self.params.get("client_processes")

        # create pool, container and dfuse mount
        self.add_pool(connect=False)
        self.add_container(self.pool)

        # VOL needs to run from a file system that supports xattr.
        #  Currently nfs does not have this attribute so it was recommended
        #  to create a dfuse dir and run vol tests from there.
        # create dfuse container
        self.start_dfuse(self.hostlist_clients, self.pool, self.container)

        # for test_param in test_list:
        # testname = test_param[0][1]
        # client_processes = test_param[1][1]
        exe = os.path.join(test_repo, testname)
        if mpi_type == "openmpi":
            manager = Orterun(exe, subprocess=False)
        else:
            manager = Mpirun(exe, subprocess=False, mpitype="mpich")

        env = EnvironmentVariables()
        env["DAOS_POOL"] = "{}".format(self.pool.uuid)
        env["DAOS_SVCL"] = "{}".format(self.pool.svc_ranks[0])
        env["DAOS_CONT"] = "{}".format(self.container.uuid)
        env["HDF5_VOL_CONNECTOR"] = "daos"
        env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path)
        manager.assign_hosts(self.hostlist_clients)
        manager.assign_processes(client_processes)
        manager.assign_environment(env, True)
        manager.working_dir.value = self.dfuse.mount_dir.value

        # run VOL Command
        try:
            manager.run()
        except CommandFailure as _error:
            self.fail("{} FAILED> \nException occurred: {}".format(
                exe, str(_error)))