Esempio n. 1
0
    def get_job_manager_command(self):
        """Get the MPI job manager command for IOR.

        Returns:
            str: the path for the mpi job manager command

        """
        # Initialize MpioUtils if IOR is running in MPIIO or DAOS mode
        if self.ior_cmd.api.value in ["MPIIO", "DAOS", "POSIX"]:
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
        else:
            self.fail("Unsupported IOR API")

        mpirun_path = os.path.join(mpio_util.mpichinstall, "bin")
        return Mpirun(self.ior_cmd, mpirun_path)
Esempio n. 2
0
    def get_job_manager_command(self, manager):
        """Get the MPI job manager command for Mdtest.

        Returns:
            JobManager: the object for the mpi job manager command

        """
        # Initialize MpioUtils if mdtest needs to be run using mpich
        if manager == "MPICH":
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
            path = os.path.join(mpio_util.mpichinstall, "bin")
            return Mpirun(self.mdtest_cmd, path)

        path = os.path.join(self.ompi_prefix, "bin")
        return Orterun(self.mdtest_cmd, path)
Esempio n. 3
0
    def test_rebuild_container_create(self):
        """Jira ID: DAOS-1168.

        Test Description:
            Configure 4 servers and 1 client with 1 or 2 pools and a pool
            service leader quantity of 2.  Add 1 container to the first pool
            configured with 3 replicas.  Populate the container with 1GB of
            objects.  Exclude a server that has shards of this object and
            verify that rebuild is initiated.  While rebuild is active, create
            1000 additional containers in the same pool or the second pool
            (when available).  Finally verify that rebuild completes and the
            pool info indicates the correct number of rebuilt objects and
            records.  Also confirm that all 1000 additional containers created
            during rebuild are accessible.

        Use Cases:
            Basic rebuild of container objects of array values with sufficient
            numbers of rebuild targets and no available rebuild targets.

        :avocado: tags=all,medium,full_regression,rebuild,rebuildcontcreate
        """
        # Get test params
        targets = self.params.get("targets", "/run/server_config/*")
        pool_qty = self.params.get("pools", "/run/test/*")
        loop_qty = self.params.get("loops", "/run/test/*")
        cont_qty = self.params.get("containers", "/run/test/*")
        cont_obj_cls = self.params.get("container_obj_class", "/run/test/*")
        rank = self.params.get("rank", "/run/test/*")
        use_ior = self.params.get("use_ior", "/run/test/*", False)
        node_qty = len(self.hostlist_servers)

        # Get pool params
        self.pool = []
        for index in range(pool_qty):
            self.pool.append(TestPool(self.context, self.log))
            self.pool[-1].get_params(self)

        if use_ior:
            # Get ior params
            mpirun_path = os.path.join(self.ompi_prefix, "bin")
            mpirun = Mpirun(IorCommand(), mpirun_path)
            mpirun.job.get_params(self)
            mpirun.setup_command(
                mpirun.job.get_default_env("mpirun", self.tmp),
                self.hostfile_clients, len(self.hostlist_clients))

        # Cancel any tests with tickets already assigned
        if rank == 1 or rank == 2:
            self.cancelForTicket("DAOS-2434")

        errors = [0 for _ in range(loop_qty)]
        for loop in range(loop_qty):
            # Log the start of the loop
            loop_id = "LOOP {}/{}".format(loop + 1, loop_qty)
            self.log.info("%s", "-" * 80)
            self.log.info("%s: Starting loop", loop_id)

            # Start this loop with a fresh list of containers
            self.container = []

            # Create the requested number of pools
            info_checks = []
            rebuild_checks = []
            for pool in self.pool:
                pool.create()
                info_checks.append(
                    {
                        "pi_uuid": pool.uuid,
                        "pi_ntargets": node_qty * targets,
                        "pi_nnodes": node_qty,
                        "pi_ndisabled": 0,
                    }
                )
                rebuild_checks.append(
                    {
                        "rs_errno": 0,
                        "rs_done": 1,
                        "rs_obj_nr": 0,
                        "rs_rec_nr": 0,
                    }
                )

            # Check the pool info
            status = True
            for index, pool in enumerate(self.pool):
                status &= pool.check_pool_info(**info_checks[index])
                status &= pool.check_rebuild_status(**rebuild_checks[index])
                pool.display_pool_daos_space("after creation")
            self.assertTrue(
                status,
                "Error verifying pool info prior to excluding rank {}".format(
                    rank))

            # Create a container with 1GB of data in the first pool
            if use_ior:
                mpirun.job.flags.update("-v -w -W -G 1 -k", "ior.flags")
                mpirun.job.daos_destroy.update(False, "ior.daos_destroy")
                mpirun.job.set_daos_params(self.server_group, self.pool[0])
                self.log.info(
                    "%s: Running IOR on pool %s to fill container %s with data",
                    loop_id, self.pool[0].uuid, mpirun.job.daos_cont.value)
                self.run_ior(loop_id, mpirun)
            else:
                self.container.append(TestContainer(self.pool[0]))
                self.container[-1].get_params(self)
                self.container[-1].create()
                self.log.info(
                    "%s: Writing to pool %s to fill container %s with data",
                    loop_id, self.pool[0].uuid, self.container[-1].uuid)
                self.container[-1].object_qty.value = 8
                self.container[-1].record_qty.value = 64
                self.container[-1].data_size.value = 1024 * 1024
                self.container[-1].write_objects(rank, cont_obj_cls, False)
                rank_list = self.container[-1].get_target_rank_lists(
                    " after writing data")
                self.container[-1].get_target_rank_count(rank, rank_list)

            # Display the updated pool space usage
            for pool in self.pool:
                pool.display_pool_daos_space("after container creation")

            # Exclude the first rank from the first pool to initiate rebuild
            self.pool[0].start_rebuild([rank], self.d_log)

            # Wait for rebuild to start
            self.pool[0].wait_for_rebuild(True, 1)

            # Create additional containers in the last pool
            start_index = len(self.container)
            self.add_containers_during_rebuild(
                loop_id, cont_qty, self.pool[0], self.pool[-1])

            # Confirm rebuild completes
            self.pool[0].wait_for_rebuild(False, 1)

            # Check the pool info
            info_checks[0]["pi_ndisabled"] += targets
            rebuild_checks[0]["rs_done"] = 1
            rebuild_checks[0]["rs_obj_nr"] = ">=0"
            rebuild_checks[0]["rs_rec_nr"] = ">=0"
            for index, pool in enumerate(self.pool):
                status &= pool.check_pool_info(**info_checks[index])
                status &= pool.check_rebuild_status(**rebuild_checks[index])
            self.assertTrue(status, "Error verifying pool info after rebuild")

            # Verify that each of created containers exist by openning them
            for index in range(start_index, len(self.container)):
                count = "{}/{}".format(
                    index - start_index + 1, len(self.container) - start_index)
                if not self.access_container(loop_id, index, count):
                    errors[loop] += 1

            # Destroy the containers created during rebuild
            for index in range(start_index, len(self.container)):
                self.container[index].destroy()

            # Read the data from the container created before rebuild
            if use_ior:
                self.log.info(
                    "%s: Running IOR on pool %s to verify container %s",
                    loop_id, self.pool[0].uuid, mpirun.job.daos_cont.value)
                mpirun.job.flags.update("-v -r -R -G 1 -E", "ior.flags")
                mpirun.job.daos_destroy.update(True, "ior.daos_destroy")
                self.run_ior(loop_id, mpirun)
            else:
                self.log.info(
                    "%s: Reading pool %s to verify container %s",
                    loop_id, self.pool[0].uuid, self.container[0].uuid)
                self.assertTrue(
                    self.container[0].read_objects(),
                    "Error verifying data written before rebuild")
                self.container[0].destroy()

            # Destroy the pools
            for pool in self.pool:
                pool.destroy(1)

            self.log.info(
                "%s: Loop %s", loop_id,
                "passed" if errors[loop] == 0 else "failed")

        self.log.info("Test %s", "passed" if sum(errors) == 0 else "failed")