Beispiel #1
0
class DfuseTestBase(TestWithServers):
    """Runs HDF5 vol test suites.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a TestWithServers object."""
        super(DfuseTestBase, self).__init__(*args, **kwargs)
        self.dfuse = None

    def stop_job_managers(self):
        """Stop the test job manager followed by dfuse.

        Returns:
            list: a list of exceptions raised stopping the agents

        """
        error_list = super(DfuseTestBase, self).stop_job_managers()
        try:
            self.stop_dfuse()
        except CommandFailure as error:
            error_list.append("Error stopping dfuse: {}".format(error))
        return error_list

    def start_dfuse(self, hosts, pool=None, container=None, mount_dir=None):
        """Create a DfuseCommand object and use it to start Dfuse.

        Args:
            hosts (list): list of hosts on which to start Dfuse
            pool (TestPool, optional): pool to use with Dfuse
            container (TestContainer, optional): container to use with Dfuse
            mount_dir (str, optional): updated mount dir name. Defaults to None.
        """
        self.dfuse = Dfuse(hosts, self.tmp)
        self.dfuse.get_params(self)

        # Update dfuse params
        if mount_dir:
            self.dfuse.mount_dir.update(mount_dir)
        if pool:
            self.dfuse.set_dfuse_params(pool)
        if container:
            self.dfuse.set_dfuse_cont_param(container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # Start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.")

    def stop_dfuse(self):
        """Stop Dfuse and unset the DfuseCommand object."""
        if self.dfuse:
            self.dfuse.stop()
            self.dfuse = None
Beispiel #2
0
class DfuseTestBase(TestWithServers):
    """Runs HDF5 vol test suites.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a TestWithServers object."""
        super(DfuseTestBase, self).__init__(*args, **kwargs)
        self.dfuse = None

    def tearDown(self):
        """Tear down each test case."""
        try:
            self.stop_dfuse()
        finally:
            # Stop the servers and agents
            super(DfuseTestBase, self).tearDown()

    def start_dfuse(self, hosts, pool, container):
        """Create a DfuseCommand object and use it to start Dfuse.

        Args:
            hosts (list): list of hosts on which to start Dfuse
            pool (TestPool): pool to use with Dfuse
            container (TestContainer): container to use with Dfuse
        """
        self.dfuse = Dfuse(hosts, self.tmp)
        self.dfuse.get_params(self)

        # Update dfuse params
        self.dfuse.set_dfuse_params(pool)
        self.dfuse.set_dfuse_cont_param(container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # Start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.")

    def stop_dfuse(self):
        """Stop Dfuse and unset the DfuseCommand object."""
        if self.dfuse:
            self.dfuse.stop()
            self.dfuse = None
Beispiel #3
0
class IorTestBase(TestWithServers):
    """Base IOR test class.

    :avocado: recursive
    """

    IOR_WRITE_PATTERN = "Commencing write performance test"
    IOR_READ_PATTERN = "Commencing read performance test"

    def __init__(self, *args, **kwargs):
        """Initialize a IorTestBase object."""
        super(IorTestBase, self).__init__(*args, **kwargs)
        self.ior_cmd = None
        self.processes = None
        self.hostfile_clients_slots = None
        self.dfuse = None
        self.container = None
        self.lock = None
        self.mpirun = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        self.subprocess = self.params.get("subprocess", '/run/ior/*', False)

        # lock is needed for run_multiple_ior method.
        self.lock = threading.Lock()

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(IorTestBase, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        self.pool = TestPool(
            self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # Get container params
        self.container = TestContainer(
            self.pool, daos_command=DaosCommand(self.bin))
        self.container.get_params(self)

        # create container
        self.container.create()

    def _start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self.container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def run_ior_with_pool(self, intercept=None, test_file_suffix="",
                          test_file="daos:testFile", create_pool=True,
                          create_cont=True, stop_dfuse=True):
        """Execute ior with optional overrides for ior flags and object_class.

        If specified the ior flags and ior daos object class parameters will
        override the values read from the yaml file.

        Args:
            intercept (str, optional): path to the interception library. Shall
                    be used only for POSIX through DFUSE. Defaults to None.
            test_file_suffix (str, optional): suffix to add to the end of the
                test file name. Defaults to "".
            test_file (str, optional): ior test file name. Defaults to
                "daos:testFile". Is ignored when using POSIX through DFUSE.
            create_pool (bool, optional): If it is true, create pool and
                container else just run the ior. Defaults to True.
            create_cont (bool, optional): Create new container. Default is True
            stop_dfuse (bool, optional): Stop dfuse after ior command is
                finished. Default is True.

        Returns:
            CmdResult: result of the ior command execution

        """
        if create_pool:
            self.update_ior_cmd_with_pool(create_cont)

        # start dfuse if api is POSIX
        if self.ior_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            if not self.dfuse:
                self._start_dfuse()
            test_file = os.path.join(self.dfuse.mount_dir.value, "testfile")
        elif self.ior_cmd.api.value == "DFS":
            test_file = os.path.join("/", "testfile")

        self.ior_cmd.test_file.update("".join([test_file, test_file_suffix]))

        out = self.run_ior(self.get_ior_job_manager_command(), self.processes,
                           intercept)

        if stop_dfuse and self.dfuse:
            self.dfuse.stop()
            self.dfuse = None
        return out

    def update_ior_cmd_with_pool(self, create_cont=True):
        """Update ior_cmd with pool."""
        # Create a pool if one does not already exist
        if self.pool is None:
            self.create_pool()
        # Create a container, if needed.
        # Don't pass uuid and pool handle to IOR.
        # It will not enable checksum feature
        if create_cont:
            self.pool.connect()
            self.create_cont()
        # Update IOR params with the pool and container params
        self.ior_cmd.set_daos_params(self.server_group, self.pool,
                                     self.container.uuid)

    def get_ior_job_manager_command(self):
        """Get the MPI job manager command for IOR.

        Returns:
            str: the path for the mpi job manager command

        """
        # Initialize MpioUtils if IOR is running in MPIIO or DFS mode
        if self.ior_cmd.api.value in ["MPIIO", "POSIX", "DFS"]:
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
        else:
            self.fail("Unsupported IOR API")

        if self.subprocess:
            self.mpirun = Mpirun(self.ior_cmd, True, mpitype="mpich")
        else:
            self.mpirun = Mpirun(self.ior_cmd, mpitype="mpich")

        return self.mpirun

    def check_subprocess_status(self, operation="write"):
        """Check subprocess status """
        if operation == "write":
            self.ior_cmd.pattern = self.IOR_WRITE_PATTERN
        elif operation == "read":
            self.ior_cmd.pattern = self.IOR_READ_PATTERN
        else:
            self.fail("Exiting Test: Inappropriate operation type \
                      for subprocess status check")

        if not self.ior_cmd.check_ior_subprocess_status(
                self.mpirun.process, self.ior_cmd):
            self.fail("Exiting Test: Subprocess not running")

    def run_ior(self, manager, processes, intercept=None, display_space=True):
        """Run the IOR command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
            intercept (str): path to interception library.
        """
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.assign_hosts(
            self.hostlist_clients, self.workdir, self.hostfile_clients_slots)
        manager.assign_processes(processes)
        manager.assign_environment(env)

        try:
            if display_space:
                self.pool.display_pool_daos_space()
            out = manager.run()

            if not self.subprocess:
                for line in out.stdout.splitlines():
                    if 'WARNING' in line:
                        self.fail("IOR command issued warnings.\n")
            return out
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            if not self.subprocess and display_space:
                self.pool.display_pool_daos_space()

    def stop_ior(self):
        """Stop IOR process.
        Args:
            manager (str): mpi job manager command
        """
        self.log.info(
            "<IOR> Stopping in-progress IOR command: %s", self.mpirun.__str__())

        try:
            out = self.mpirun.stop()
            return out
        except CommandFailure as error:
            self.log.error("IOR stop Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()


    def run_multiple_ior_with_pool(self, results, intercept=None):
        """Execute ior with optional overrides for ior flags and object_class.

        If specified the ior flags and ior daos object class parameters will
        override the values read from the yaml file.

        Args:
            intercept (str): path to the interception library. Shall be used
                             only for POSIX through DFUSE.
            ior_flags (str, optional): ior flags. Defaults to None.
            object_class (str, optional): daos object class. Defaults to None.
        """
        self.update_ior_cmd_with_pool()

        # start dfuse for POSIX api. This is specific to interception
        # library test requirements.
        self._start_dfuse()

        # Create two jobs and run in parallel.
        # Job1 will have 3 client set up to use dfuse + interception
        # library
        # Job2 will have 1 client set up to use only dfuse.
        job1 = self.get_new_job(self.hostlist_clients[:-1], 1,
                                results, intercept)
        job2 = self.get_new_job([self.hostlist_clients[-1]], 2,
                                results, None)

        job1.start()
        # Since same ior_cmd is used to trigger the MPIRUN
        # with different parameters, pausing for 2 seconds to
        # avoid data collisions.
        time.sleep(2)
        job2.start()
        job1.join()
        job2.join()
        self.dfuse.stop()
        self.dfuse = None

    def get_new_job(self, clients, job_num, results, intercept=None):
        """Create a new thread for ior run.

        Args:
            clients (list): hosts on which to run ior
            job_num (int): Assigned job number
            results (dict): A dictionary object to store the ior metrics
            intercept (path): Path to interception library
        """
        job = threading.Thread(target=self.run_multiple_ior, args=[
            clients, results, job_num, intercept])
        return job

    def run_multiple_ior(self, clients, results, job_num, intercept=None):
        """Run the IOR command.

        Args:
            clients (list): hosts on which to run ior
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            intercept (str, optional): path to interception library. Defaults to
                None.
        """
        self.lock.acquire(True)
        tsize = self.ior_cmd.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        self.ior_cmd.test_file.update(testfile)
        manager = self.get_ior_job_manager_command()
        procs = (self.processes // len(self.hostlist_clients)) * len(clients)
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.assign_hosts(clients, self.workdir, self.hostfile_clients_slots)
        manager.assign_processes(procs)
        manager.assign_environment(env)
        self.lock.release()
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            self.lock.acquire(True)
            results[job_num] = IorCommand.get_ior_metrics(out)
            self.lock.release()
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()

    def verify_pool_size(self, original_pool_info, processes):
        """Validate the pool size.

        Args:
            original_pool_info (PoolInfo): Pool info prior to IOR
            processes (int): number of processes
        """
        # Get the current pool size for comparison
        current_pool_info = self.pool.pool.pool_query()

        # If Transfer size is < 4K, Pool size will verified against NVMe, else
        # it will be checked against SCM
        if self.ior_cmd.transfer_size.value >= 4096:
            self.log.info(
                "Size is > 4K,Size verification will be done with NVMe size")
            storage_index = 1
        else:
            self.log.info(
                "Size is < 4K,Size verification will be done with SCM size")
            storage_index = 0
        actual_pool_size = \
            original_pool_info.pi_space.ps_space.s_free[storage_index] - \
            current_pool_info.pi_space.ps_space.s_free[storage_index]
        expected_pool_size = self.ior_cmd.get_aggregate_total(processes)

        if actual_pool_size < expected_pool_size:
            self.fail(
                "Pool Free Size did not match: actual={}, expected={}".format(
                    actual_pool_size, expected_pool_size))

    def execute_cmd(self, cmd, fail_on_err=True, display_output=True):
        """Execute cmd using general_utils.pcmd

          Args:
            cmd (str): String command to be executed
            fail_on_err (bool): Boolean for whether to fail the test if command
                                execution returns non zero return code.
            display_output (bool): Boolean for whether to display output.

          Returns:
            dict: a dictionary of return codes keys and accompanying NodeSet
                  values indicating which hosts yielded the return code.
        """
        try:
            # execute bash cmds
            ret = pcmd(
                self.hostlist_clients, cmd, verbose=display_output, timeout=300)
            if 0 not in ret:
                error_hosts = NodeSet(
                    ",".join(
                        [str(node_set) for code, node_set in
                         ret.items() if code != 0]))
                if fail_on_err:
                    raise CommandFailure(
                        "Error running '{}' on the following "
                        "hosts: {}".format(cmd, error_hosts))

         # report error if any command fails
        except CommandFailure as error:
            self.log.error("DfuseSparseFile Test Failed: %s",
                           str(error))
            self.fail("Test was expected to pass but "
                      "it failed.\n")
        return ret
Beispiel #4
0
class FioBase(TestWithServers):
    """Base fio class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a FioBase object."""
        super(FioBase, self).__init__(*args, **kwargs)
        self.fio_cmd = None
        self.processes = None
        self.manager = None
        self.dfuse = None
        self.daos_cmd = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()

        # Start the servers and agents
        super(FioBase, self).setUp()

        # initialise daos_cmd
        self.daos_cmd = DaosCommand(self.bin)

        # Get the parameters for Fio
        self.fio_cmd = FioCommand()
        self.fio_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/fio/client_processes/*')
        self.manager = self.params.get("manager", '/run/fio/*', "MPICH")

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(FioBase, self).tearDown()

    def _create_pool(self):
        """Create a pool and execute Fio."""
        # Get the pool params
        # pylint: disable=attribute-defined-outside-init
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def _create_cont(self):
        """Create a container.

        Returns:
            str: UUID of the created container

        """
        cont_type = self.params.get("type", "/run/container/*")
        result = self.daos_cmd.container_create(pool=self.pool.uuid,
                                                svc=self.pool.svc_ranks,
                                                cont_type=cont_type)

        # Extract the container UUID from the daos container create output
        cont_uuid = re.findall(r"created\s+container\s+([0-9a-f-]+)",
                               result.stdout)
        if not cont_uuid:
            self.fail("Error obtaining the container uuid from: {}".format(
                result.stdout))
        return cont_uuid[0]

    def _start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self._create_cont())
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Unable to launch Dfuse.\n")

    def execute_fio(self):
        """Runner method for Fio."""
        # Create a pool if one does not already exist
        if self.pool is None:
            self._create_pool()

        # start dfuse if api is POSIX
        if self.fio_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            # Uncomment below two lines once DAOS-3355 is resolved
            # self.pool.connect()
            # self.create_cont()
            self._start_dfuse()
            self.fio_cmd.update("global", "directory",
                                self.dfuse.mount_dir.value,
                                "fio --name=global --directory")

        # Run Fio
        self.fio_cmd.hosts = self.hostlist_clients
        self.fio_cmd.run()

        if self.dfuse:
            self.dfuse.stop()
            self.dfuse = None
Beispiel #5
0
class BashCmd(TestWithServers):
    """Base BashCmd test class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a BashCmd object."""
        super(BashCmd, self).__init__(*args, **kwargs)
        self.dfuse = None
        self.file_name = None
        self.dir_name = None
        self.pool_count = None
        self.cont_count = None

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super(BashCmd, self).setUp()

        # Get the parameters for BashCmd
        self.dir_name = self.params.get("dirname", '/run/bashcmd/*')
        self.file_name1 = self.params.get("filename1", '/run/bashcmd/*')
        self.file_name2 = self.params.get("filename2", '/run/bashcmd/*')
        self.dd_count = self.params.get("dd_count", '/run/bashcmd/*')
        self.dd_blocksize = self.params.get("dd_blocksize", '/run/bashcmd/*')

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(BashCmd, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # Get container params
        self.container = TestContainer(self.pool,
                                       daos_command=DaosCommand(self.bin))
        self.container.get_params(self)

        # create container
        self.container.create()

    def start_dfuse(self, count):
        """Create a DfuseCommand object to start dfuse.

           Args:
             count(int): container index
        """

        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.mount_dir.update("/tmp/" + self.pool.uuid + "_daos_dfuse" +
                                    str(count))
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self.container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           self.dfuse.hosts,
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def test_bashcmd(self):
        """Jira ID: DAOS-3508.

        Test Description:
            Purpose of this test is to mount different mount points of dfuse
            for different container and pool sizes and perform basic bash
            commands.
        Use cases:
            Following list of bash commands have been incorporated
            as part of this test: mkdir, touch, ls, chmod, rm, dd, stat,
            cp, cmp, mv, rmdir.
              Create a directory.
              Create a file under that directory.
              List the created file.
              Remove the file.
              Write a file to the dfuse mounted location using dd.
              List the written file to verify if it's create.
              Verify the file created is of right size as desired.
              Copy the file
              Compare the copied file with original to verify the
              content is same.
              Remove copied file.
              Rename file
              Verify renamed file exist using list.
              Remove a directory
        :avocado: tags=all,hw,daosio,medium,ib2,full_regression,bashcmd
        """
        self.cont_count = self.params.get("cont_count", '/run/container/*')
        self.pool_count = self.params.get("pool_count", '/run/pool/*')

        # Create a pool if one does not already exist.
        for _ in range(self.pool_count):
            self.create_pool()
            # perform test for multiple containers.
            for count in range(self.cont_count):
                self.create_cont()
                self.start_dfuse(count)
                abs_dir_path = os.path.join(self.dfuse.mount_dir.value,
                                            self.dir_name)
                abs_file_path1 = os.path.join(abs_dir_path, self.file_name1)
                abs_file_path2 = os.path.join(abs_dir_path, self.file_name2)
                # list of commands to be executed.
                commands = [
                    u"mkdir -p {}".format(abs_dir_path),
                    u"touch {}".format(abs_file_path1),
                    u"ls -a {}".format(abs_file_path1),
                    u"rm {}".format(abs_file_path1),
                    u"dd if=/dev/zero of={} count={} bs={}".format(
                        abs_file_path1, self.dd_count, self.dd_blocksize),
                    u"ls -al {}".format(abs_file_path1),
                    u"filesize=$(stat -c%s '{}');\
                            if (( filesize != {}*{} )); then exit 1;\
                            fi".format(abs_file_path1, self.dd_count,
                                       self.dd_blocksize),
                    u"cp -r {} {}".format(abs_file_path1, abs_file_path2),
                    u"cmp --silent {} {}".format(abs_file_path1,
                                                 abs_file_path2),
                    u"rm {}".format(abs_file_path2), u"mv {} {}".format(
                        abs_file_path1,
                        abs_file_path2), u"ls -al {}".format(abs_file_path2),
                    u"rm {}".format(abs_file_path2),
                    u"rmdir {}".format(abs_dir_path)
                ]
                for cmd in commands:
                    try:
                        # execute bash cmds
                        ret_code = general_utils.pcmd(self.hostlist_clients,
                                                      cmd,
                                                      timeout=30)
                        if 0 not in ret_code:
                            error_hosts = NodeSet(",".join([
                                str(node_set)
                                for code, node_set in ret_code.items()
                                if code != 0
                            ]))
                            raise CommandFailure(
                                "Error running '{}' on the following "
                                "hosts: {}".format(cmd, error_hosts))
                    # report error if any command fails
                    except CommandFailure as error:
                        self.log.error("BashCmd Test Failed: %s", str(error))
                        self.fail("Test was expected to pass but "
                                  "it failed.\n")

                # stop dfuse
                self.dfuse.stop()
                # destroy container
                self.container.destroy()
            # destroy pool
            self.pool.destroy()
Beispiel #6
0
class IorTestBase(TestWithServers):
    """Base IOR test class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a IorTestBase object."""
        super(IorTestBase, self).__init__(*args, **kwargs)
        self.ior_cmd = None
        self.processes = None
        self.hostfile_clients_slots = None
        self.dfuse = None
        self.container = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')
        # Until DAOS-3320 is resolved run IOR for POSIX
        # with single client node
        if self.ior_cmd.api.value == "POSIX":
            self.hostlist_clients = [self.hostlist_clients[0]]
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)

    def tearDown(self):
        """Tear down each test case."""
        try:
            self.dfuse = None
        finally:
            # Stop the servers and agents
            super(IorTestBase, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        self.pool = TestPool(self.context, self.log)
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # TO-DO: Enable container using TestContainer object,
        # once DAOS-3355 is resolved.
        # Get Container params
        #self.container = TestContainer(self.pool)
        #self.container.get_params(self)

        # create container
        # self.container.create()
        env = Dfuse(self.hostlist_clients, self.tmp).get_default_env()
        # command to create container of posix type
        cmd = env + "daos cont create --pool={} --svc={} --type=POSIX".format(
            self.ior_cmd.daos_pool.value, self.ior_cmd.daos_svcl.value)
        try:
            container = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         shell=True)
            (output, err) = container.communicate()
            self.log.info("Container created with UUID %s", output.split()[3])

        except subprocess.CalledProcessError as err:
            self.fail("Container create failed:{}".format(err))

        return output.split()[3]

    def start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp, True)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self.create_cont())

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def run_ior_with_pool(self, intercept=None):
        """Execute ior with optional overrides for ior flags and object_class.

        If specified the ior flags and ior daos object class parameters will
        override the values read from the yaml file.

        Args:
            intercept (str): path to the interception library. Shall be used
                             only for POSIX through DFUSE.
            ior_flags (str, optional): ior flags. Defaults to None.
            object_class (str, optional): daos object class. Defaults to None.
        """
        # Create a pool if one does not already exist
        if self.pool is None:
            self.create_pool()
        # Update IOR params with the pool
        self.ior_cmd.set_daos_params(self.server_group, self.pool)

        # start dfuse if api is POSIX
        if self.ior_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            # Uncomment below two lines once DAOS-3355 is resolved
            # self.pool.connect()
            # self.create_cont()
            if self.ior_cmd.transfer_size.value == "256B":
                self.cancelForTicket("DAOS-3449")
            self.start_dfuse()
            self.ior_cmd.test_file.update(self.dfuse.mount_dir.value +
                                          "/testfile")

        out = self.run_ior(self.get_job_manager_command(), self.processes,
                           intercept)

        return out

    def get_job_manager_command(self):
        """Get the MPI job manager command for IOR.

        Returns:
            str: the path for the mpi job manager command

        """
        # Initialize MpioUtils if IOR is running in MPIIO or DAOS mode
        if self.ior_cmd.api.value in ["MPIIO", "DAOS", "POSIX"]:
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
        else:
            self.fail("Unsupported IOR API")

        mpirun_path = os.path.join(mpio_util.mpichinstall, "bin")
        return Mpirun(self.ior_cmd, mpirun_path)

    def run_ior(self, manager, processes, intercept=None):
        """Run the IOR command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
            intercept (str): path to interception library.
        """
        env = self.ior_cmd.get_default_env(str(manager), self.tmp,
                                           self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.setup_command(env, self.hostfile_clients, processes)
        try:
            out = manager.run()
            return out
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")

    def verify_pool_size(self, original_pool_info, processes):
        """Validate the pool size.

        Args:
            original_pool_info (PoolInfo): Pool info prior to IOR
            processes (int): number of processes
        """
        # Get the current pool size for comparison
        current_pool_info = self.pool.pool.pool_query()

        # If Transfer size is < 4K, Pool size will verified against NVMe, else
        # it will be checked against SCM
        if self.ior_cmd.transfer_size.value >= 4096:
            self.log.info(
                "Size is > 4K,Size verification will be done with NVMe size")
            storage_index = 1
        else:
            self.log.info(
                "Size is < 4K,Size verification will be done with SCM size")
            storage_index = 0
        actual_pool_size = \
            original_pool_info.pi_space.ps_space.s_free[storage_index] - \
            current_pool_info.pi_space.ps_space.s_free[storage_index]
        expected_pool_size = self.ior_cmd.get_aggregate_total(processes)

        if actual_pool_size < expected_pool_size:
            self.fail(
                "Pool Free Size did not match: actual={}, expected={}".format(
                    actual_pool_size, expected_pool_size))
Beispiel #7
0
class IorTestBase(TestWithServers):
    """Base IOR test class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a IorTestBase object."""
        super(IorTestBase, self).__init__(*args, **kwargs)
        self.ior_cmd = None
        self.processes = None
        self.hostfile_clients_slots = None
        self.dfuse = None
        self.container = None
        self.lock = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(IorTestBase, self).setUp()

        # Get the parameters for IOR
        self.ior_cmd = IorCommand()
        self.ior_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/ior/client_processes/*')

        # Until DAOS-3320 is resolved run IOR for POSIX
        # with single client node
        if self.ior_cmd.api.value == "POSIX":
            self.hostlist_clients = [self.hostlist_clients[0]]
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)
        # lock is needed for run_multiple_ior method.
        self.lock = threading.Lock()

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(IorTestBase, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # Get container params
        self.container = TestContainer(self.pool,
                                       daos_command=DaosCommand(self.bin))
        self.container.get_params(self)

        # create container
        self.container.create()

    def _start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self.container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def run_ior_with_pool(self,
                          intercept=None,
                          test_file_suffix="",
                          test_file="daos:testFile"):
        """Execute ior with optional overrides for ior flags and object_class.

        If specified the ior flags and ior daos object class parameters will
        override the values read from the yaml file.

        Args:
            intercept (str, optional): path to the interception library. Shall
                    be used only for POSIX through DFUSE. Defaults to None.
            test_file_suffix (str, optional): suffix to add to the end of the
                test file name. Defaults to "".
            test_file (str, optional): ior test file name. Defaults to
                "daos:testFile". Is ignored when using POSIX through DFUSE.

        Returns:
            CmdResult: result of the ior command execution

        """
        self.update_ior_cmd_with_pool()
        # start dfuse if api is POSIX
        if self.ior_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            # Uncomment below two lines once DAOS-3355 is resolved
            if self.ior_cmd.transfer_size.value == "256B":
                return "Skipping the case for transfer_size=256B"
            self._start_dfuse()
            test_file = os.path.join(self.dfuse.mount_dir.value, "testfile")
        elif self.ior_cmd.api.value == "DFS":
            test_file = os.path.join("/", "testfile")

        self.ior_cmd.test_file.update("".join([test_file, test_file_suffix]))

        out = self.run_ior(self.get_ior_job_manager_command(), self.processes,
                           intercept)

        if self.dfuse:
            self.dfuse.stop()
            self.dfuse = None
        return out

    def update_ior_cmd_with_pool(self):
        """Update ior_cmd with pool."""
        # Create a pool if one does not already exist
        if self.pool is None:
            self.create_pool()
        # Always create a container
        # Don't pass uuid and pool handle to IOR.
        # It will not enable checksum feature
        self.pool.connect()
        self.create_cont()
        # Update IOR params with the pool and container params
        self.ior_cmd.set_daos_params(self.server_group, self.pool,
                                     self.container.uuid)

    def get_ior_job_manager_command(self):
        """Get the MPI job manager command for IOR.

        Returns:
            str: the path for the mpi job manager command

        """
        # Initialize MpioUtils if IOR is running in MPIIO or DAOS mode
        if self.ior_cmd.api.value in ["MPIIO", "DAOS", "POSIX", "DFS"]:
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
        else:
            self.fail("Unsupported IOR API")

        return Mpirun(self.ior_cmd, mpitype="mpich")

    def run_ior(self, manager, processes, intercept=None):
        """Run the IOR command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
            intercept (str): path to interception library.
        """
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.setup_command(env, self.hostfile_clients, processes)
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            return out
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()

    def run_multiple_ior_with_pool(self, results, intercept=None):
        """Execute ior with optional overrides for ior flags and object_class.

        If specified the ior flags and ior daos object class parameters will
        override the values read from the yaml file.

        Args:
            intercept (str): path to the interception library. Shall be used
                             only for POSIX through DFUSE.
            ior_flags (str, optional): ior flags. Defaults to None.
            object_class (str, optional): daos object class. Defaults to None.
        """
        self.update_ior_cmd_with_pool()

        # start dfuse for POSIX api. This is specific to interception
        # library test requirements.
        self._start_dfuse()

        # Create two jobs and run in parallel.
        # Job1 will have 3 client set up to use dfuse + interception
        # library
        # Job2 will have 1 client set up to use only dfuse.
        job1 = self.get_new_job(self.hostlist_clients[:-1], 1, results,
                                intercept)
        job2 = self.get_new_job([self.hostlist_clients[-1]], 2, results, None)

        job1.start()
        # Since same ior_cmd is used to trigger the MPIRUN
        # with different parameters, pausing for 2 seconds to
        # avoid data collisions.
        time.sleep(2)
        job2.start()
        job1.join()
        job2.join()
        self.dfuse.stop()
        self.dfuse = None

    def get_new_job(self, clients, job_num, results, intercept=None):
        """Create a new thread for ior run.

        Args:
            clients (lst): Number of clients the ior would run against.
            job_num (int): Assigned job number
            results (dict): A dictionary object to store the ior metrics
            intercept (path): Path to interception library
        """
        hostfile = write_host_file.write_host_file(clients, self.workdir,
                                                   self.hostfile_clients_slots)
        job = threading.Thread(
            target=self.run_multiple_ior,
            args=[hostfile,
                  len(clients), results, job_num, intercept])
        return job

    def run_multiple_ior(self,
                         hostfile,
                         num_clients,
                         results,
                         job_num,
                         intercept=None):
        # pylint: disable=too-many-arguments
        """Run the IOR command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
            intercept (str): path to interception library.
        """
        self.lock.acquire(True)
        tsize = self.ior_cmd.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        self.ior_cmd.test_file.update(testfile)
        manager = self.get_ior_job_manager_command()
        procs = (self.processes // len(self.hostlist_clients)) * num_clients
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.setup_command(env, hostfile, procs)
        self.lock.release()
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            self.lock.acquire(True)
            results[job_num] = IorCommand.get_ior_metrics(out)
            self.lock.release()
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()

    def verify_pool_size(self, original_pool_info, processes):
        """Validate the pool size.

        Args:
            original_pool_info (PoolInfo): Pool info prior to IOR
            processes (int): number of processes
        """
        # Get the current pool size for comparison
        current_pool_info = self.pool.pool.pool_query()

        # If Transfer size is < 4K, Pool size will verified against NVMe, else
        # it will be checked against SCM
        if self.ior_cmd.transfer_size.value >= 4096:
            self.log.info(
                "Size is > 4K,Size verification will be done with NVMe size")
            storage_index = 1
        else:
            self.log.info(
                "Size is < 4K,Size verification will be done with SCM size")
            storage_index = 0
        actual_pool_size = \
            original_pool_info.pi_space.ps_space.s_free[storage_index] - \
            current_pool_info.pi_space.ps_space.s_free[storage_index]
        expected_pool_size = self.ior_cmd.get_aggregate_total(processes)

        if actual_pool_size < expected_pool_size:
            self.fail(
                "Pool Free Size did not match: actual={}, expected={}".format(
                    actual_pool_size, expected_pool_size))
Beispiel #8
0
class MdtestBase(TestWithServers):
    """Base mdtest class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a MdtestBase object."""
        super(MdtestBase, self).__init__(*args, **kwargs)
        self.mdtest_cmd = None
        self.processes = None
        self.hostfile_clients_slots = None
        self.dfuse = None
        self.daos_cmd = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(MdtestBase, self).setUp()

        # initialize daos_cmd
        self.daos_cmd = DaosCommand(self.bin)

        # Get the parameters for Mdtest
        self.mdtest_cmd = MdtestCommand()
        self.mdtest_cmd.get_params(self)
        self.processes = self.params.get("np",
                                         '/run/mdtest/client_processes/*')
        self.manager = self.params.get("manager", '/run/mdtest/*', "MPICH")

        self.log.info('Clients %s', self.hostlist_clients)
        self.log.info('Servers %s', self.hostlist_servers)

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(MdtestBase, self).tearDown()

    def create_pool(self):
        """Create a pool and execute Mdtest."""
        # Get the pool params
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def _create_cont(self):
        """Create a container.

        Returns:
            str: UUID of the created container

        """
        cont_type = self.params.get("type", "/run/container/*")
        result = self.daos_cmd.container_create(pool=self.pool.uuid,
                                                svc=self.pool.svc_ranks,
                                                cont_type=cont_type)

        # Extract the container UUID from the daos container create output
        cont_uuid = re.findall(r"created\s+container\s+([0-9a-f-]+)",
                               result.stdout)
        if not cont_uuid:
            self.fail("Error obtaining the container uuid from: {}".format(
                result.stdout))
        return cont_uuid[0]

    def _start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params

        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self._create_cont())
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           self.dfuse.hosts,
                           exc_info=error)
            self.fail("Unable to launch Dfuse.\n")

    def execute_mdtest(self):
        """Runner method for Mdtest."""
        # Create a pool if one does not already exist
        if self.pool is None:
            self.create_pool()
        # set Mdtest params
        self.mdtest_cmd.set_daos_params(self.server_group, self.pool)

        # start dfuse if api is POSIX
        if self.mdtest_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            # Uncomment below two lines once DAOS-3355 is resolved
            # self.pool.connect()
            # self.create_cont()
            self._start_dfuse()
            self.mdtest_cmd.test_dir.update(self.dfuse.mount_dir.value)

        # Run Mdtest
        self.run_mdtest(self.get_mdtest_job_manager_command(self.manager),
                        self.processes)
        if self.dfuse:
            self.dfuse.stop()
            self.dfuse = None

    def get_mdtest_job_manager_command(self, manager):
        """Get the MPI job manager command for Mdtest.

        Returns:
            JobManager: the object for the mpi job manager command

        """
        # Initialize MpioUtils if mdtest needs to be run using mpich
        if manager == "MPICH":
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
            return Mpirun(self.mdtest_cmd, mpitype="mpich")

        return Orterun(self.mdtest_cmd)

    def run_mdtest(self, manager, processes):
        """Run the Mdtest command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
        """
        env = self.mdtest_cmd.get_default_env(str(manager), self.client_log)
        manager.assign_hosts(self.hostlist_clients, self.workdir,
                             self.hostfile_clients_slots)
        manager.assign_processes(processes)
        manager.assign_environment(env)
        try:
            self.pool.display_pool_daos_space()
            manager.run()
        except CommandFailure as error:
            self.log.error("Mdtest Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()
Beispiel #9
0
class ParallelIo(FioBase, IorTestBase):
    """Base Parallel IO test class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a ParallelIo object."""
        super(ParallelIo, self).__init__(*args, **kwargs)
        self.dfuse = None
        self.cont_count = None
        self.pool_count = None
        self.statvfs_info_initial = None
        self.statvfs_before_cont_destroy = None
        self.statvfs_after_cont_destroy = None
        self.pool = []
        self.container = []

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super(ParallelIo, self).setUp()

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(ParallelIo, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        pool.get_params(self)

        # Create a pool
        pool.create()
        self.pool.append(pool)

    # pylint: disable=arguments-differ
    def create_cont(self, pool):
        """Create a TestContainer object to be used to create container.

          Args:
            pool (TestPool): TestPool object type for which container
                             needs to be created
        """
        # Get container params
        container = TestContainer(pool, daos_command=DaosCommand(self.bin))
        container.get_params(self)

        # create container
        container.create()
        self.container.append(container)

    def start_dfuse(self, pool=None):
        """Create a DfuseCommand object to start dfuse.

          Args:
            pool (TestPool): Test pool object if dfuse is intended to be
                             started using pool uuid option.
        """

        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        if pool:
            self.dfuse.set_dfuse_params(pool)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           self.dfuse.hosts,
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def stat_bfree(self, path):
        """Get stat bfree

          Args:
            path (str): path to get free block size of.
          Returns:
            integer value of stat free blocks
        """
        cmd = [
            "ssh", "{}@{}".format(getuser(), self.hostlist_clients[0]),
            "stat -c%a -f {}".format(path)
        ]
        try:
            result = subprocess.check_output(cmd)
        except subprocess.CalledProcessError as err:
            self.fail("Get free block size method failed with: {}".format(err))

        return int(result)

    def statvfs_pool(self, path):
        """Method to obtain free space using statvfs

          Args:
            path (str): path for which free space needs to be obtained for.

          Returns:
            List containing free space info for each pool supplied in pool_obj.
        """
        statvfs_list = []
        for _, pool in enumerate(self.pool):
            dfuse_pool_dir = str(path + "/" + pool.uuid)
            statvfs_info = self.stat_bfree(dfuse_pool_dir)
            statvfs_list.append(statvfs_info)
            self.log.info("Statvfs List Output: %s", statvfs_list)

        return statvfs_list

    def verify_aggregation(self, reduced_space, count):
        """Verify if expected space is returned for each pool after containers
           were destroyed. If not, wait for 60 secs and check again. Wait 4
           times, otherwise exit the test with a failure.

          Args:
            reduced_space: expected space to be returned
        """
        counter = 1
        while (self.statvfs_after_cont_destroy[count] <
               self.statvfs_before_cont_destroy[count] + reduced_space):
            # try to wait for 4 x 60 secs for aggregation to be completed
            # or else exit the test with a failure.
            if counter > 4:
                self.log.info("Free space before io: %s",
                              self.statvfs_info_initial)
                self.log.info("Free space after io: %s",
                              self.statvfs_before_cont_destroy)
                self.log.info("Free space at test termination: %s",
                              self.statvfs_after_cont_destroy)
                self.fail("Aggregation did not complete as expected")
            time.sleep(60)
            self.statvfs_after_cont_destroy = self.statvfs_pool(
                self.dfuse.mount_dir.value)
            counter += 1

    def test_parallelio(self):
        """Jira ID: DAOS-3775.

        Test Description:
            Purpose of this test is to mount dfuse and verify multiple
            containers using fio.
        Use cases:
            Mount dfuse using pool uuid.
            Create multiple containers under that dfuse mount point.
            Check those containers are accessible from that mount point.
            Perform io to those containers using FIO
            Delete one of the containers
            Check if dfuse is still running. If not, fail the test and exit.
            Otherwise, try accessing the deleted container.
            This should fail.
            Check dfuse again.
        :avocado: tags=all,hw,daosio,medium,ib2,full_regression,parallelio
        """
        # get test params for cont and pool count
        self.cont_count = self.params.get("cont_count", '/run/container/*')

        threads = []

        # Create a pool and start dfuse.
        self.create_pool()
        self.start_dfuse(self.pool[0])
        # create multiple containers
        for _ in range(self.cont_count):
            self.create_cont(self.pool[0])

        # check if all the created containers can be accessed and perform
        # io on each container using fio in parallel
        for _, cont in enumerate(self.container):
            dfuse_cont_dir = self.dfuse.mount_dir.value + "/" + cont.uuid
            cmd = u"ls -a {}".format(dfuse_cont_dir)
            try:
                # execute bash cmds
                ret_code = general_utils.pcmd(self.hostlist_clients,
                                              cmd,
                                              timeout=30)
                if 0 not in ret_code:
                    error_hosts = NodeSet(",".join([
                        str(node_set) for code, node_set in ret_code.items()
                        if code != 0
                    ]))
                    raise CommandFailure("Error running '{}' on the following "
                                         "hosts: {}".format(cmd, error_hosts))
            # report error if any command fails
            except CommandFailure as error:
                self.log.error("ParallelIo Test Failed: %s", str(error))
                self.fail("Test was expected to pass but " "it failed.\n")
            # run fio on all containers
            thread = threading.Thread(target=self.execute_fio,
                                      args=(self.dfuse.mount_dir.value + "/" +
                                            cont.uuid, False))
            threads.append(thread)
            thread.start()

        # wait for all fio jobs to be finished
        for job in threads:
            job.join()

        # destroy first container
        container_to_destroy = self.container[0].uuid
        self.container[0].destroy(1)

        # check dfuse if it is running fine
        self.dfuse.check_running()

        # try accessing destroyed container, it should fail
        try:
            self.execute_fio(self.dfuse.mount_dir.value + "/" + \
                container_to_destroy, False)
            self.fail("Fio was able to access destroyed container: {}".\
                format(self.container[0].uuid))
        except CommandFailure as error:
            self.log.info("This run is expected to fail")

            # check dfuse is still running after attempting to access deleted
            # container.
            self.dfuse.check_running()

    def test_multipool_parallelio(self):
        """Jira ID: DAOS-3775.

        Test Description:
            Purpose of this test is to verify aggregation across multiple
            pools and containers.
        Use cases:
            Create 10 pools
            Create 10 containers under each pool.
            Record statvfs free space for each pool.
            Perform parallel io to each pool without deleting the file
            after write.
            Record free space using statvfs after write.
            Delete half of the containers from each pool.
            Calculate the expected amount of data to be deleted when
            containers are destroyed.
            Record free space after container destroy.
            Loop until either the all space is returned back after aggregation
            completion or exit the loop after trying for 240 secs of wait and
            fail the test.

        :avocado: tags=all,hw,daosio,medium,ib2,full_regression
        :avocado: tags=multipoolparallelio
        """
        # test params
        threads = []
        pool_threads = []
        cont_threads = []
        self.pool_count = self.params.get("pool_count", '/run/pool/*')
        self.cont_count = self.params.get("cont_count", '/run/container/*')
        processes = self.params.get("np", '/run/ior/client_processes/*')

        # Create pools in parallel.
        for _ in range(self.pool_count):
            pool_thread = threading.Thread(target=self.create_pool())
            pool_threads.append(pool_thread)
            pool_thread.start()
        # wait for container create to finish
        for pool_job in pool_threads:
            pool_job.join()

        # start dfuse using --svc option only.
        self.start_dfuse()

        # record free space using statvfs before any data is written.
        self.statvfs_info_initial = self.statvfs_pool(
            self.dfuse.mount_dir.value)

        # Create 10 containers for each pool. Container create process cannot
        # be parallelised as different container create could complete at
        # different times and get appended in the self.container variable in
        # unorderly manner, causing problems during the write process.
        for _, pool in enumerate(self.pool):
            for _ in range(self.cont_count):
                self.create_cont(pool)

        # Try to access each dfuse mounted container using ls. Once it is
        # accessed successfully, go ahead and perform io on that location
        # using ior. This process of performing io is done in parallel for
        # all containers using threads.
        for pool_count, pool in enumerate(self.pool):
            dfuse_pool_dir = str(self.dfuse.mount_dir.value + "/" + pool.uuid)
            for counter in range(self.cont_count):
                cont_num = (pool_count * self.cont_count) + counter
                dfuse_cont_dir = str(dfuse_pool_dir + "/" +
                                     self.container[cont_num].uuid)
                cmd = u"###ls -a {}".format(dfuse_cont_dir)
                self.execute_cmd(cmd)

                # run ior on all containers
                test_file = dfuse_cont_dir + "/testfile"
                self.ior_cmd.test_file.update(test_file)
                self.ior_cmd.set_daos_params(self.server_group, pool,
                                             self.container[cont_num].uuid)
                thread = threading.Thread(
                    target=self.run_ior,
                    args=(self.get_ior_job_manager_command(), processes, None,
                          False))
                threads.append(thread)
                thread.start()

        # wait for all ior jobs to be finished
        for job in threads:
            job.join()

        # Record free space after io
        self.statvfs_before_cont_destroy = self.statvfs_pool(
            self.dfuse.mount_dir.value)

        # Destroy half of the containers from each pool
        pfinal = 0
        for count in range(self.cont_count):
            pinitial = pfinal
            pfinal = pinitial + (self.cont_count // 2)
            del self.container[pinitial:pfinal]

        for cont in self.container:
            cont_thread = threading.Thread(target=cont.destroy)
            cont_threads.append(cont_thread)
            cont_thread.start()

        for destroy_job in cont_threads:
            destroy_job.join()

        # Record free space after container destroy.
        self.statvfs_after_cont_destroy = self.statvfs_pool(
            self.dfuse.mount_dir.value)

        # Calculate the expected space to be returned after containers
        # are destroyed.
        reduced_space = (self.cont_count *
                         int(self.ior_cmd.block_size.value)) / 2

        # Verify if expected space is returned for each pool after containers
        # were destroyed. If not, wait for 60 secs and check again. Wait 4
        # times, otherwise exit the test with a failure.
        for count in range(self.pool_count):
            thread = threading.Thread(target=self.verify_aggregation,
                                      args=(reduced_space, count))
            threads.append(thread)
            thread.start()

        for job in threads:
            job.join()
Beispiel #10
0
class ParallelIo(FioBase):
    """Base Parallel IO test class.

    :avocado: recursive
    """

    def __init__(self, *args, **kwargs):
        """Initialize a ParallelIo object."""
        super(ParallelIo, self).__init__(*args, **kwargs)
        self.dfuse = None
        self.cont_count = None
        self.container = []

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super(ParallelIo, self).setUp()

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(ParallelIo, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        self.pool = TestPool(
            self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # Get container params
        container = TestContainer(
            self.pool, daos_command=DaosCommand(self.bin))
        container.get_params(self)

        # create container
        container.create()
        self.container.append(container)

    def start_dfuse(self):
        """Create a DfuseCommand object to start dfuse.
        """

        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           self.dfuse.hosts,
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def test_parallelio(self):
        """Jira ID: DAOS-3775.

        Test Description:
            Purpose of this test is to mount dfuse and verify multiple
            containers using fio.
        Use cases:
            Mount dfuse using pool uuid.
            Create multiple containers under that dfuse mount point.
            Check those containers are accessible from that mount point.
            Perform io to those containers using FIO
            Delete one of the containers
            Check if dfuse is still running. If not, fail the test and exit.
            Otherwise, try accessing the deleted container.
            This should fail.
            Check dfuse again.
        :avocado: tags=all,hw,daosio,medium,ib2,full_regression,parallelio
        """
        # get test params for cont and pool count
        self.cont_count = self.params.get("cont_count", '/run/container/*')

        threads = []

        # Create a pool and start dfuse.
        self.create_pool()
        self.start_dfuse()
        # create multiple containers in parallel
        cont_threads = []
        for _ in range(self.cont_count):
            cont_thread = threading.Thread(target=self.create_cont())
            cont_threads.append(cont_thread)
        # start container create job
        for cont_job in cont_threads:
            cont_job.start()
        # wait for container create to finish
        for cont_job in cont_threads:
            cont_job.join()

        # check if all the created containers can be accessed and perform
        # io on each container using fio in parallel
        for _, cont in enumerate(self.container):
            dfuse_cont_dir = self.dfuse.mount_dir.value + "/" + cont.uuid
            cmd = u"ls -a {}".format(dfuse_cont_dir)
            try:
                # execute bash cmds
                ret_code = general_utils.pcmd(
                    self.hostlist_clients, cmd, timeout=30)
                if 0 not in ret_code:
                    error_hosts = NodeSet(
                        ",".join(
                            [str(node_set) for code, node_set in
                             ret_code.items() if code != 0]))
                    raise CommandFailure(
                        "Error running '{}' on the following "
                        "hosts: {}".format(cmd, error_hosts))
            # report error if any command fails
            except CommandFailure as error:
                self.log.error("ParallelIo Test Failed: %s",
                               str(error))
                self.fail("Test was expected to pass but "
                          "it failed.\n")
            # run fio on all containers
            thread = threading.Thread(target=self.execute_fio, args=(
                self.dfuse.mount_dir.value + "/" + cont.uuid, False))
            threads.append(thread)
            thread.start()

        # wait for all fio jobs to be finished
        for job in threads:
            job.join()

        # destroy first container
        container_to_destroy = self.container[0].uuid
        self.container[0].destroy(1)

        # check dfuse if it is running fine
        self.dfuse.check_running()

        # try accessing destroyed container, it should fail
        try:
            self.execute_fio(self.dfuse.mount_dir.value + "/" + \
                container_to_destroy, False)
            self.fail("Fio was able to access destroyed container: {}".\
                format(self.container[0].uuid))
        except CommandFailure as error:
            self.log.info("This run is expected to fail")

        # check dfuse is still running after attempting to access deleted
        # container.
            self.dfuse.check_running()
Beispiel #11
0
class FioBase(TestWithServers):
    """Base fio class.

    :avocado: recursive
    """

    def __init__(self, *args, **kwargs):
        """Initialize a FioBase object."""
        super(FioBase, self).__init__(*args, **kwargs)
        self.fio_cmd = None
        self.processes = None
        self.manager = None
        self.dfuse = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()

        # Start the servers and agents
        super(FioBase, self).setUp()

        # removing runner node from hostlist_client, only need one client node.
        self.hostlist_clients = self.hostlist_clients[:-1]

        # Get the parameters for Fio
        self.fio_cmd = FioCommand()
        self.fio_cmd.get_params(self)
        self.processes = self.params.get("np", '/run/fio/client_processes/*')
        self.manager = self.params.get("manager", '/run/fio/*', "MPICH")

    def tearDown(self):
        """Tear down each test case."""
        try:
            self.dfuse = None
        finally:
            # Stop the servers and agents
            super(FioBase, self).tearDown()

    def _create_pool(self):
        """Create a pool and execute Fio."""
        # Get the pool params
        # pylint: disable=attribute-defined-outside-init
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def _create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # TO-DO: Enable container using TestContainer object,
        # once DAOS-3355 is resolved.
        # Get Container params
        # self.container = TestContainer(self.pool)
        # self.container.get_params(self)

        # create container
        # self.container.create()
        env = Dfuse(self.hostlist_clients, self.tmp).get_default_env()
        # command to create container of posix type
        cmd = env + "daos cont create --pool={} --svc={} --type=POSIX".format(
            self.pool.uuid, ":".join(
                [str(item) for item in self.pool.svc_ranks]))
        try:
            container = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                         shell=True)
            (output, err) = container.communicate()
            self.log.info("Container created with UUID %s", output.split()[3])

        except subprocess.CalledProcessError as err:
            self.fail("Container create failed:{}".format(err))

        return output.split()[3]

    def _start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp, self.basepath)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self._create_cont())

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse), str(
                               NodeSet.fromlist(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Unable to launch Dfuse.\n")

    def execute_fio(self):
        """Runner method for Fio."""
        # Create a pool if one does not already exist
        if self.pool is None:
            self._create_pool()

        # start dfuse if api is POSIX
        if self.fio_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            # Uncomment below two lines once DAOS-3355 is resolved
            # self.pool.connect()
            # self.create_cont()
            self._start_dfuse()
            self.fio_cmd.update(
                "global", "directory", self.dfuse.mount_dir.value,
                "fio --name=global --directory")

        # Run Fio
        self.fio_cmd.hosts = self.hostlist_clients
        self.fio_cmd.run()
Beispiel #12
0
class MdtestBase(TestWithServers):
    """Base mdtest class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a MdtestBase object."""
        super(MdtestBase, self).__init__(*args, **kwargs)
        self.mdtest_cmd = None
        self.processes = None
        self.hostfile_clients_slots = None
        self.dfuse = None
        self.container = None

    def setUp(self):
        """Set up each test case."""
        # obtain separate logs
        self.update_log_file_names()
        # Start the servers and agents
        super(MdtestBase, self).setUp()

        # Get the parameters for Mdtest
        self.mdtest_cmd = MdtestCommand()
        self.mdtest_cmd.get_params(self)
        self.processes = self.params.get("np",
                                         '/run/mdtest/client_processes/*')
        self.manager = self.params.get("manager", '/run/mdtest/*', "MPICH")

        # Until DAOS-3320 is resolved run IOR for POSIX
        # with single client node
        if self.mdtest_cmd.api.value == "POSIX":
            self.hostlist_clients = [self.hostlist_clients[0]]
            self.hostfile_clients = write_host_file.write_host_file(
                self.hostlist_clients, self.workdir,
                self.hostfile_clients_slots)

    def tearDown(self):
        """Tear down each test case."""
        try:
            self.dfuse = None
        finally:
            # Stop the servers and agents
            super(MdtestBase, self).tearDown()

    def _create_pool(self):
        """Create a pool and execute Mdtest."""
        # Get the pool params
        self.pool = TestPool(self.context, self.log)
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def _create_cont(self):
        """Create a TestContainer object to be used to create container."""
        # TO-DO: Enable container using TestContainer object,
        # once DAOS-3355 is resolved.
        # Get Container params
        #self.container = TestContainer(self.pool)
        #self.container.get_params(self)

        # create container
        # self.container.create()
        env = Dfuse(self.hostlist_clients, self.tmp).get_default_env()
        # command to create container of posix type
        cmd = env + "daos cont create --pool={} --svc={} --type=POSIX".format(
            self.mdtest_cmd.dfs_pool_uuid.value,
            self.mdtest_cmd.dfs_svcl.value)
        try:
            container = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         shell=True)
            (output, err) = container.communicate()
            self.log.info("Container created with UUID %s", output.split()[3])

        except subprocess.CalledProcessError as err:
            self.fail("Container create failed:{}".format(err))

        return output.split()[3]

    def _start_dfuse(self):
        """Create a DfuseCommand object to start dfuse."""
        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp, self.basepath)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self._create_cont())

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           str(NodeSet(self.dfuse.hosts)),
                           exc_info=error)
            self.fail("Unable to launch Dfuse.\n")

    def execute_mdtest(self):
        """Runner method for Mdtest."""

        # Create a pool if one does not already exist
        if self.pool is None:
            self._create_pool()
        # set Mdtest params
        self.mdtest_cmd.set_daos_params(self.server_group, self.pool)

        # start dfuse if api is POSIX
        if self.mdtest_cmd.api.value == "POSIX":
            # Connect to the pool, create container and then start dfuse
            # Uncomment below two lines once DAOS-3355 is resolved
            # self.pool.connect()
            # self.create_cont()
            self._start_dfuse()
            self.mdtest_cmd.test_dir.update(self.dfuse.mount_dir.value)

    # Run Mdtest
        self.run_mdtest(self.get_job_manager_command(self.manager),
                        self.processes)

    def get_job_manager_command(self, manager):
        """Get the MPI job manager command for Mdtest.

        Returns:
            JobManager: the object for the mpi job manager command

        """
        # Initialize MpioUtils if mdtest needs to be run using mpich
        if manager == "MPICH":
            mpio_util = MpioUtils()
            if mpio_util.mpich_installed(self.hostlist_clients) is False:
                self.fail("Exiting Test: Mpich not installed")
            path = os.path.join(mpio_util.mpichinstall, "bin")
            return Mpirun(self.mdtest_cmd, path)

        path = os.path.join(self.ompi_prefix, "bin")
        return Orterun(self.mdtest_cmd, path)

    def run_mdtest(self, manager, processes):
        """Run the Mdtest command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
        """
        env = self.mdtest_cmd.get_default_env(str(manager), self.tmp,
                                              self.client_log)
        manager.setup_command(env, self.hostfile_clients, processes)
        try:
            manager.run()
        except CommandFailure as error:
            self.log.error("Mdtest Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
Beispiel #13
0
class DfuseContainerCheck(TestWithServers):
    """Base Dfuse Container check test class.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a DfuseContainerCheck object."""
        super(DfuseContainerCheck, self).__init__(*args, **kwargs)
        self.dfuse = None
        self.pool = None
        self.container = None

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super(DfuseContainerCheck, self).setUp()

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(DfuseContainerCheck, self).tearDown()

    def create_pool(self):
        """Create a TestPool object to use with ior."""
        # Get the pool params
        self.pool = TestPool(self.context, dmg_command=self.get_dmg_command())
        self.pool.get_params(self)

        # Create a pool
        self.pool.create()

    def start_dfuse(self):
        """Create a DfuseCommand object to start dfuse.
        """

        # Get Dfuse params
        self.dfuse = Dfuse(self.hostlist_clients, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(self.pool)
        self.dfuse.set_dfuse_cont_param(self.container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run(False)
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           self.dfuse.hosts,
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def test_dfusecontainercheck(self):
        """Jira ID: DAOS-3635.

        Test Description:
            Purpose of this test is to try and mount different container types
            to dfuse and check the behavior.
        Use cases:
            Create pool
            Create container of type default
            Try to mount to dfuse and check the behaviour.
            Create container of type POSIX.
            Try to mount to dfuse and check the behaviour.
        :avocado: tags=all,small,full_regression,dfusecontainercheck
        """
        # get test params for cont and pool count
        cont_types = self.params.get("cont_types", '/run/container/*')

        # Create a pool and start dfuse.
        self.create_pool()

        for cont_type in cont_types:
            # Get container params
            self.container = TestContainer(self.pool,
                                           daos_command=DaosCommand(self.bin))
            self.container.get_params(self)
            # create container
            if cont_type == "POSIX":
                self.container.type.update(cont_type)
            self.container.create()
            try:
                # mount fuse
                self.start_dfuse()
                # check if fuse got mounted
                self.dfuse.check_running()
                # fail the test if fuse mounts with non-posix type container
                if cont_type == "":
                    self.fail(
                        "Non-Posix type container got mounted over dfuse")
            except CommandFailure as error:
                # expected to throw CommandFailure exception for non-posix type
                # container
                if cont_type == "":
                    self.log.info(
                        "Expected behaviour: Default container type \
                        is expected to fail on dfuse mount: %s", str(error))
                # fail the test if exception is caught for POSIX type container
                elif cont_type == "POSIX":
                    self.log.error(
                        "Posix Container dfuse mount \
                        failed: %s", str(error))
                    self.fail("Posix container type was expected to mount \
                        over dfuse")
            # stop fuse and container for next iteration
            if not cont_type == "":
                self.dfuse.stop()
            self.container.destroy(1)
Beispiel #14
0
class RootContainerTest(TestWithServers):
    """Base Dfuse Container check test class.

    :avocado: recursive
    """

    def __init__(self, *args, **kwargs):
        """Initialize a RootContainerTest object."""
        super(RootContainerTest, self).__init__(*args, **kwargs)
        self.pool = []
        self.container = []
        self.tmp_file_count = self.params.get(
            "tmp_file_count", '/run/container/*')
        self.cont_count = self.params.get(
            "cont_count", '/run/container/*')
        self.tmp_file_size = self.params.get(
            "tmp_file_size", '/run/container/*')
        self.tmp_file_name = self.params.get(
            "tmp_file_name", '/run/container/*')
        # device where the pools and containers are created
        self.device = "scm"

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super(RootContainerTest, self).setUp()
        self.dfuse = None
        self.dfuse_hosts = None

    def tearDown(self):
        """Tear down each test case."""
        try:
            if self.dfuse:
                self.dfuse.stop()
        finally:
            # Stop the servers and agents
            super(RootContainerTest, self).tearDown()

    def _create_pool(self):
        """Create a TestPool object to use with ior.
        """
        # Get the pool params
        pool = TestPool(
            self.context, dmg_command=self.get_dmg_command())
        pool.get_params(self)
        # Create a pool
        pool.create()
        self.pool.append(pool)
        return pool

    def _create_cont(self, pool, path=None):
        """Create a TestContainer object to be used to create container.

           Args:
               pool (TestPool): pool object
               path (str): Unified namespace path for container
        """
        # Get container params
        container = TestContainer(pool, daos_command=DaosCommand(self.bin))
        container.get_params(self)
        if path is not None:
            container.path.update(path)
        # create container
        container.create()
        self.container.append(container)
        return container

    def _start_dfuse(self, pool, container):
        """Create a DfuseCommand object to start dfuse.

           Args:
               container: Container to mount dfuse
        """

        # Get Dfuse params
        self.dfuse = Dfuse(self.dfuse_hosts, self.tmp)
        self.dfuse.get_params(self)

        # update dfuse params
        self.dfuse.set_dfuse_params(pool)
        self.dfuse.set_dfuse_cont_param(container)
        self.dfuse.set_dfuse_exports(self.server_managers[0], self.client_log)

        try:
            # start dfuse
            self.dfuse.run()
        except CommandFailure as error:
            self.log.error("Dfuse command %s failed on hosts %s",
                           str(self.dfuse),
                           self.dfuse.hosts,
                           exc_info=error)
            self.fail("Test was expected to pass but it failed.\n")

    def test_rootcontainer(self):
        """Jira ID: DAOS-3782.

        Test Description:
            Purpose of this test is to try and create a container and
            mount it over dfuse and use it as a root container and create
            subcontainers underneath it and insert several files and see
            if they can be accessed using ls and cd. Verify the pool size
            reflects the space occupied by container. Try to remove the
            files and containers and see the space is reclaimed.
            Test the above procedure with 100 sub containers.
            Test the above procedure with 5 pools and 50 containers
            spread across the pools.
        :avocado: tags=all,hw,small,full_regression,container
        :avocado: tags=rootcontainer
        """

        # Create a pool and start dfuse.
        pool = self._create_pool()
        container = self._create_cont(pool)
        self.dfuse_hosts = self.agent_managers[0].hosts
        # mount fuse
        self._start_dfuse(pool, container)
        # Create another container and add it as sub container under
        # root container
        sub_container = str(self.dfuse.mount_dir.value + "/cont0")
        container = self._create_cont(pool, path=sub_container)
        #Insert files into root container
        self.insert_files_and_verify("")
        #Insert files into sub container
        self.insert_files_and_verify("cont0")
        #Create 100 subcontainer and verify the temp files
        self.verify_create_delete_containers(pool, 100)
        self.verify_multi_pool_containers()

    def verify_multi_pool_containers(self):
        """Create several pools and containers and mount it
           under the root container and verify they're
           accessible.
        """
        pool_count = self.params.get("pool_count", "/run/pool/*")
        for i in range(pool_count):
            pool = self._create_pool()
            for j in range(self.cont_count):
                cont_name = "/cont_{}{}".format(i, j)
                sub_cont = str(self.dfuse.mount_dir.value + cont_name)
                self._create_cont(pool=pool, path=sub_cont)
                self.insert_files_and_verify(cont_name)

    def verify_create_delete_containers(self, pool, cont_count):
        """Create multiple containers and multiple multi-mb files
           in each of them and verify the space usage.
           Destroy half of the containers and verify the space
           usage is reclaimed.

           Args:
               cont_count (int): Number of containers to be created.
        """
        self.log.info("Verifying multiple container create delete")
        pool_space_before = pool.get_pool_free_space(self.device)
        self.log.info("Pool space before = %s", pool_space_before)
        for i in range(cont_count):
            sub_cont = str(self.dfuse.mount_dir.value + "/cont{}".format(i+1))
            self._create_cont(pool, path=sub_cont)
            self.insert_files_and_verify("cont{}".format(i+1))
        expected = pool_space_before - \
                   cont_count * self.tmp_file_count * self.tmp_file_size
        pool_space_after = pool.get_pool_free_space(self.device)
        self.log.info("Pool space <= Expected")
        self.log.info("%s <= %s", pool_space_after, expected)
        self.assertTrue(pool_space_after <= expected)
        self.log.info("Destroying half of the containers = %s",
                      cont_count//2)
        for i in range(cont_count // 2):
            self.container[-1].destroy(1)
            self.container.pop()
        expected = pool_space_after + \
                   ((cont_count // 2) * self.tmp_file_count *\
                    self.tmp_file_size)
        pool_space_after_cont_destroy = \
                   pool.get_pool_free_space(self.device)
        self.log.info("After container destroy")
        self.log.info("Free Pool space >= Expected")
        self.log.info("%s >= %s", pool_space_after_cont_destroy, expected)
        self.assertTrue(pool_space_after_cont_destroy >= expected)

    def insert_files_and_verify(self, container_name):
        """ Insert files into the specific container and verify
            they're navigable and accessible.

        Args:
            container_name: Name of the POSIX Container
            file_name_prefix: Prefix of the file name that will be created
            no_of_files: Number of files to be created iteratively

        Return:
            None
        """
        cont_dir = self.dfuse.mount_dir.value
        if container_name:
            cont_dir = "{}/{}".format(cont_dir, container_name)

        cmds = []
        ls_cmds = []

        for i in range(self.tmp_file_count):
            # Create 40 MB files
            file_name = "{}{}".format(self.tmp_file_name, i+1)
            cmd = "head -c {} /dev/urandom > {}/{}".format(
                self.tmp_file_size, cont_dir, file_name)
            ls_cmds.append("ls {}".format(file_name))
            cmds.append(cmd)
        self._execute_cmd(";".join(cmds))

        cmds = []
        # Run ls to verify the temp files are actually created
        cmds = ["cd {}".format(cont_dir)]
        cmds.extend(ls_cmds)
        self._execute_cmd(";".join(cmds))

    def _execute_cmd(self, cmd):
        """Execute command on the host clients

           Args:
               cmd (str): Command to run
        """

        try:
            # execute bash cmds
            ret = pcmd(
                self.dfuse_hosts, cmd, verbose=True, timeout=30)
            if 0 not in ret:
                error_hosts = NodeSet(
                    ",".join(
                        [str(node_set) for code, node_set in
                         ret.items() if code != 0]))
                raise CommandFailure(
                    "Error running '{}' on the following "
                    "hosts: {}".format(cmd, error_hosts))

         # report error if any command fails
        except CommandFailure as error:
            self.log.error("DfuseSparseFile Test Failed: %s",
                           str(error))
            self.fail("Test was expected to pass but "
                      "it failed.\n")
        return ret