Python Orterun.assign_hosts Exemples, job_manager_utils.Orterun.assign_hosts Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : daos_perf_base.py Projet : zzh-wisdom/daos

    def run_daos_perf(self):
        """Run the daos_perf command."""
        # Obtain the number of processes listed with the daos_perf options
        processes = self.params.get("processes", "/run/daos_perf/*")

        # Create the daos_perf command from the test yaml file
        daos_perf = DaosPerfCommand(self.bin)
        daos_perf.get_params(self)
        self.log.info("daos_perf command: %s", str(daos_perf))
        daos_perf_env = daos_perf.get_environment(self.server_managers[0])

        # Create the orterun command
        orterun = Orterun(daos_perf)
        orterun.assign_hosts(self.hostlist_clients, self.workdir, None)
        orterun.assign_processes(processes)
        orterun.assign_environment(daos_perf_env)
        self.log.info("orterun command: %s", str(orterun))

        # Run the daos_perf command and check for errors
        result = orterun.run()
        errors = re.findall(
            r"(.*(?:non-zero exit code|errors|failed|Failed).*)",
            result.stdout_text)
        if errors:
            self.fail("Errors detected in daos_perf output:\n{}".format(
                "  \n".join(errors)))

Exemple #2

0

Afficher le fichier

Fichier : vol_test_base.py Projet : wli5/daos

    def run_test(self):
        """Run the HDF5 VOL testsuites.

        Raises:
            VolFailed: for an invalid test name or test execution failure

        """
        # initialize test specific variables
        mpi_type = self.params.get("mpi_type", default="mpich")
        test_repo = self.params.get("daos_vol_repo")
        plugin_path = self.params.get("plugin_path")
        # test_list = self.params.get("daos_vol_tests", default=[])
        testname = self.params.get("testname")
        client_processes = self.params.get("client_processes")

        # create pool, container and dfuse mount
        self.add_pool(connect=False)
        self.add_container(self.pool)

        # VOL needs to run from a file system that supports xattr.
        #  Currently nfs does not have this attribute so it was recommended
        #  to create a dfuse dir and run vol tests from there.
        # create dfuse container
        self.start_dfuse(self.hostlist_clients, self.pool, self.container)

        # for test_param in test_list:
        # testname = test_param[0][1]
        # client_processes = test_param[1][1]
        exe = os.path.join(test_repo, testname)
        if mpi_type == "openmpi":
            manager = Orterun(exe, subprocess=False)
        else:
            manager = Mpirun(exe, subprocess=False, mpitype="mpich")

        env = EnvironmentVariables()
        env["DAOS_POOL"] = "{}".format(self.pool.uuid)
        env["DAOS_SVCL"] = "{}".format(self.pool.svc_ranks[0])
        env["DAOS_CONT"] = "{}".format(self.container.uuid)
        env["HDF5_VOL_CONNECTOR"] = "daos"
        env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path)
        manager.assign_hosts(self.hostlist_clients)
        manager.assign_processes(client_processes)
        manager.assign_environment(env, True)
        manager.working_dir.value = self.dfuse.mount_dir.value

        # run VOL Command
        try:
            manager.run()
        except CommandFailure as _error:
            self.fail("{} FAILED> \nException occurred: {}".format(
                exe, str(_error)))

Exemple #3

0

Afficher le fichier

Fichier : metadata.py Projet : zzh-wisdom/daos

    def test_metadata_server_restart(self):
        """JIRA ID: DAOS-1512.

        Test Description:
            This test will verify 2000 IOR small size container after server
            restart. Test will write IOR in 5 different threads for faster
            execution time. Each thread will create 400 (8bytes) containers to
            the same pool. Restart the servers, read IOR container file written
            previously and validate data integrity by using IOR option
            "-R -G 1".

        Use Cases:
            ?

        :avocado: tags=metadata,metadata_ior,nvme,large
        """
        files_per_thread = 400
        total_ior_threads = 5
        self.out_queue = queue.Queue()

        processes = self.params.get("slots", "/run/ior/clientslots/*")

        list_of_uuid_lists = [
            [str(uuid.uuid4()) for _ in range(files_per_thread)]
            for _ in range(total_ior_threads)]

        # Launch threads to run IOR to write data, restart the agents and
        # servers, and then run IOR to read the data
        for operation in ("write", "read"):
            # Create the IOR threads
            threads = []
            for index in range(total_ior_threads):
                # Define the arguments for the ior_runner_thread method
                ior_cmd = IorCommand()
                ior_cmd.get_params(self)
                ior_cmd.set_daos_params(self.server_group, self.pool)
                ior_cmd.flags.value = self.params.get(
                    "F", "/run/ior/ior{}flags/".format(operation))

                # Define the job manager for the IOR command
                manager = Orterun(ior_cmd)
                env = ior_cmd.get_default_env(str(manager))
                manager.assign_hosts(self.hostlist_clients, self.workdir, None)
                manager.assign_processes(processes)
                manager.assign_environment(env)

                # Add a thread for these IOR arguments
                threads.append(
                    threading.Thread(
                        target=ior_runner_thread,
                        kwargs={
                            "manager": manager,
                            "uuids": list_of_uuid_lists[index],
                            "results": self.out_queue}))

                self.log.info(
                    "Creatied %s thread %s with container uuids %s", operation,
                    index, list_of_uuid_lists[index])

            # Launch the IOR threads
            if self.thread_control(threads, operation) == "FAIL":
                self.d_log.error("IOR {} Thread FAIL".format(operation))
                self.fail("IOR {} Thread FAIL".format(operation))

            # Restart the agents and servers after the write / before the read
            if operation == "write":
                # Stop the agents
                errors = self.stop_agents()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping agents:\n  {}".format("\n  ".join(errors)))

                # Stop the servers
                errors = self.stop_servers()
                self.assertEqual(
                    len(errors), 0,
                    "Error stopping servers:\n  {}".format("\n  ".join(errors)))

                # Start the agents
                self.start_agent_managers()

                # Start the servers
                self.start_server_managers()