def test_ior_intercept_multi_client(self):
        """Jira ID: DAOS-3499.

        Test Description:
            Purpose of this test is to run ior through dfuse in multiple
            clients for 5 minutes and capture the metrics and use the
            intercepiton library by exporting LD_PRELOAD to the libioil.so
            path and rerun the above ior and capture the metrics and
            compare the performance difference and check using interception
            library make significant performance improvement.

        Use case:
            Run ior with read, write for 5 minutes
            Run ior with read, write for 5 minutes with interception library
            Compare the results and check whether using interception
                library provides better performance.

        :avocado: tags=all,full_regression,hw,large
        :avocado: tags=daosio,iorinterceptmulticlient
        """
        suffix = self.ior_cmd.transfer_size.value
        out = self.run_ior_with_pool(test_file_suffix=suffix)
        without_intercept = IorCommand.get_ior_metrics(out)
        intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
        suffix = suffix + "intercept"
        out = self.run_ior_with_pool(intercept, test_file_suffix=suffix)
        with_intercept = IorCommand.get_ior_metrics(out)
        max_mib = int(IorMetrics.Max_MiB)
        min_mib = int(IorMetrics.Min_MiB)
        mean_mib = int(IorMetrics.Mean_MiB)

        write_x = self.params.get("write_x", "/run/ior/iorflags/ssf/*", 1)

        # Verifying write performance
        self.assertTrue(
            float(with_intercept[0][max_mib]) > write_x *
            float(without_intercept[0][max_mib]))
        self.assertTrue(
            float(with_intercept[0][min_mib]) > write_x *
            float(without_intercept[0][min_mib]))
        self.assertTrue(
            float(with_intercept[0][mean_mib]) > write_x *
            float(without_intercept[0][mean_mib]))

        # Verifying read performance
        # The read performance is almost same with or without intercept
        # library. But arbitarily the read performance with interception
        # library can be bit lower than without it. Verifying that it is
        # not drastically lower by checking it is at least  60% or above.
        read_x = 0.6
        self.assertTrue(
            float(with_intercept[1][max_mib]) > read_x *
            float(without_intercept[1][max_mib]))
        self.assertTrue(
            float(with_intercept[1][min_mib]) > read_x *
            float(without_intercept[1][min_mib]))
        self.assertTrue(
            float(with_intercept[1][mean_mib]) > read_x *
            float(without_intercept[1][mean_mib]))
Beispiel #2
0
    def test_ior_intercept(self):
        """Jira ID: DAOS-3498.

        Test Description:
            Purpose of this test is to run ior using dfuse for 5 minutes
            and capture the metrics and use the intercepiton library by
            exporting LD_PRELOAD to the libioil.so path and rerun the
            above ior and capture the metrics and compare the
            performance difference and check using interception
            library make significant performance improvement.

        Use case:
            Run ior with read, write, CheckWrite, CheckRead
                for 5 minutes
            Run ior with read, write, CheckWrite, CheckRead
                for 5 minutes with interception library
            Compare the results and check whether using interception
                library provides better performance.

        :avocado: tags=all,full_regression,hw,small,daosio,iorinterceptbasic
        """
        apis = self.params.get("ior_api", '/run/ior/iorflags/ssf/*')
        for api in apis:
            self.ior_cmd.api.update(api)
            out = self.run_ior_with_pool(fail_on_warning=False)
            without_intercept = IorCommand.get_ior_metrics(out)
            if api == "POSIX":
                intercept = os.path.join(self.prefix, 'lib64', 'libioil.so')
                out = self.run_ior_with_pool(intercept, fail_on_warning=False)
                with_intercept = IorCommand.get_ior_metrics(out)
                max_mib = int(IorMetrics.Max_MiB)
                min_mib = int(IorMetrics.Min_MiB)
                mean_mib = int(IorMetrics.Mean_MiB)
                write_x = self.params.get("write_x", "/run/ior/iorflags/ssf/*",
                                          1)
                read_x = self.params.get("read_x", "/run/ior/iorflags/ssf/*",
                                         1)

                # Verifying write performance
                self.assertTrue(
                    float(with_intercept[0][max_mib]) > write_x *
                    float(without_intercept[0][max_mib]))
                self.assertTrue(
                    float(with_intercept[0][min_mib]) > write_x *
                    float(without_intercept[0][min_mib]))
                self.assertTrue(
                    float(with_intercept[0][mean_mib]) > write_x *
                    float(without_intercept[0][mean_mib]))

                # Verifying read performance
                self.assertTrue(
                    float(with_intercept[1][max_mib]) > read_x *
                    float(without_intercept[1][max_mib]))
                self.assertTrue(
                    float(with_intercept[1][min_mib]) > read_x *
                    float(without_intercept[1][min_mib]))
                self.assertTrue(
                    float(with_intercept[1][mean_mib]) > read_x *
                    float(without_intercept[1][mean_mib]))
Beispiel #3
0
    def run_il_perf_check(self):
        """Verify IOR performance with DFUSE + IL is similar to DFS.

        Steps:
            Run IOR with DFS.
            Run IOR with DFUSE + IL.
            Verify performance with DFUSE + IL is similar to DFS.

        """
        # Write and read performance thresholds
        write_x = self.params.get("write_x", self.ior_cmd.namespace, None)
        read_x = self.params.get("read_x", self.ior_cmd.namespace, None)
        if write_x is None or read_x is None:
            self.fail("Failed to get write_x and read_x from config")

        # Run IOR with DFS
        self.ior_cmd.api.update("DFS")
        dfs_out = self.run_ior_with_pool(fail_on_warning=self.log.info)
        dfs_perf = IorCommand.get_ior_metrics(dfs_out)

        # Destroy and use a new pool and container
        self.container.destroy()
        self.container = None
        self.pool.destroy()
        self.pool = None

        # Run IOR with dfuse + IL
        self.ior_cmd.api.update("POSIX")
        dfuse_out = self.run_ior_with_pool(intercept=os.path.join(
            self.prefix, 'lib64', 'libioil.so'),
                                           fail_on_warning=self.log.info)
        dfuse_perf = IorCommand.get_ior_metrics(dfuse_out)

        # Verify write and read performance are within the thresholds.
        # Since Min can have a lot of variance, don't check Min or Mean.
        # Ideally, we might want to look at the Std Dev to ensure the results are admissible.
        dfs_max_write = float(dfs_perf[0][IorMetrics.Max_MiB])
        dfuse_max_write = float(dfuse_perf[0][IorMetrics.Max_MiB])
        actual_write_x = percent_change(dfs_max_write, dfuse_max_write)
        self.log.info("DFS Max Write:      %.2f", dfs_max_write)
        self.log.info("DFUSE IL Max Write: %.2f", dfuse_max_write)
        self.log.info("Percent Diff:       %.2f%%", actual_write_x * 100)
        self.assertLessEqual(abs(actual_write_x), write_x,
                             "Max Write Diff too large")

        dfs_max_read = float(dfs_perf[1][IorMetrics.Max_MiB])
        dfuse_max_read = float(dfuse_perf[1][IorMetrics.Max_MiB])
        actual_read_x = percent_change(dfs_max_read, dfuse_max_read)
        self.log.info("DFS Max Read:      %.2f", dfs_max_read)
        self.log.info("DFUSE IL Max Read: %.2f", dfuse_max_read)
        self.log.info("Percent Diff:      %.2f%%", actual_read_x * 100)
        self.assertLessEqual(abs(actual_read_x), read_x,
                             "Max Read Diff too large")
Beispiel #4
0
    def start_ior_thread(self, results, create_cont, operation='WriteRead'):
        """Start IOR write/read threads and wait until all threads are finished.

        Args:
            results (queue): queue for returning thread results
            operation (str): IOR operation for read/write.
                             Default it will do whatever mention in ior_flags
                             set.
        """
        self.ior_cmd.flags.value = self.ior_default_flags

        #For IOR Other operation, calculate the block size based on server %
        #to fill up. Store the container UUID for future reading operation.
        if operation == 'Write':
            block_size = self.calculate_ior_block_size()
            self.ior_cmd.block_size.update('{}'.format(block_size))
        #For IOR Read only operation, retrieve the stored container UUID
        elif operation == 'Read':
            create_cont = False
            self.ior_cmd.flags.value = self.ior_read_flags

        # run IOR Command
        try:
            out = self.run_ior_with_pool(create_cont=create_cont,
                                         fail_on_warning=self.fail_on_warning)
            self.ior_matrix = IorCommand.get_ior_metrics(out)
            results.put("PASS")
        except (CommandFailure, TestFail) as _error:
            results.put("FAIL")
Beispiel #5
0
    def start_ior_thread(self, results, create_cont, operation):
        """Start IOR write/read threads and wait until all threads are finished.

        Args:
            results (queue): queue for returning thread results
            create_cont (Bool): To create the new container or not.
            operation (str):
                Write/WriteRead: It will Write or Write/Read base on IOR parameter in yaml file.
                Auto_Write/Auto_Read: It will calculate the IOR block size based on requested
                                        storage % to be fill.
        """
        # IOR flag can be Write only or Write/Read based on test yaml
        self.ior_cmd.flags.value = self.ior_default_flags

        # Calculate the block size based on server % to fill up.
        if 'Auto' in operation:
            block_size = self.calculate_ior_block_size()
            self.ior_cmd.block_size.update('{}'.format(block_size))

        # For IOR Read operation update the read flax from yaml file.
        if 'Auto_Read' in operation or operation == "Read":
            create_cont = False
            self.ior_cmd.flags.value = self.ior_read_flags

        # run IOR Command
        try:
            out = self.run_ior_with_pool(create_cont=create_cont,
                                         fail_on_warning=self.fail_on_warning)
            self.ior_matrix = IorCommand.get_ior_metrics(out)
            results.put("PASS")
        except (CommandFailure, TestFail) as _error:
            results.put("FAIL")
Beispiel #6
0
    def run_ior_collect_error(self, results, job_num, file_name, clients):
        """Run IOR command and store error in results.

        Args:
            results (dict): A dictionary object to store the ior metrics.
            job_num (int): Assigned job number.
            file_name (str): File name used for self.ior_cmd.test_file.
            clients (list): Client hostnames to run IOR from.
        """
        ior_cmd = IorCommand()
        ior_cmd.get_params(self)
        ior_cmd.set_daos_params(
            group=self.server_group, pool=self.pool, cont_uuid=self.container.uuid)
        testfile = os.path.join("/", file_name)
        ior_cmd.test_file.update(testfile)

        manager = get_job_manager(
            test=self, class_name="Mpirun", job=ior_cmd, subprocess=self.subprocess,
            mpi_type="mpich")
        manager.assign_hosts(clients, self.workdir, self.hostfile_clients_slots)
        ppn = self.params.get("ppn", '/run/ior/client_processes/*')
        manager.ppn.update(ppn, 'mpirun.ppn')
        manager.processes.update(None, 'mpirun.np')

        try:
            ior_output = manager.run()
            results[job_num] = [True]
            # For debugging.
            results[job_num].extend(IorCommand.get_ior_metrics(ior_output))
            # We'll verify the error message.
            results[job_num].append(ior_output.stderr_text)
        except CommandFailure as error:
            results[job_num] = [False, "IOR failed: {}".format(error)]
Beispiel #7
0
    def run_custom_ior_cmd(self,
                           ior_command,
                           clients,
                           results,
                           job_num,
                           intercept=None):
        """Run customized IOR command, not self.ior_cmd.

        Expected to be used with a threaded code where multiple IOR commands are
        executed in parallel.

        Display pool space before running it for a reference.

        Args:
            ior_command (IorCommand): Custom IOR command instance.
            clients (list): hosts on which to run ior
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            intercept (str, optional): path to interception library. Defaults to
                None.
        """
        self.log.info("--- IOR Thread %d: Start ---", job_num)
        tsize = ior_command.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        ior_command.test_file.update(testfile)

        # Get the custom job manager that's associated with this thread.
        manager = get_job_manager(self, "Mpirun", ior_command, self.subprocess,
                                  "mpich")

        procs = (self.processes // len(self.hostlist_clients)) * len(clients)
        env = ior_command.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.assign_hosts(clients, self.workdir,
                             self.hostfile_clients_slots)
        manager.assign_processes(procs)
        manager.assign_environment(env)

        self.log.info("--- IOR Thread %d: Starting IOR ---", job_num)
        self.display_pool_space()
        try:
            ior_output = manager.run()
            results[job_num] = [True]
            results[job_num].extend(IorCommand.get_ior_metrics(ior_output))
        except CommandFailure as error:
            results[job_num] = [False, "IOR failed: {}".format(error)]
        finally:
            self.display_pool_space()

        self.log.info("--- IOR Thread %d: End ---", job_num)
Beispiel #8
0
    def start_ior_thread(self, create_cont, operation):
        """Start IOR write/read threads and wait until all threads are finished.

        Args:
            create_cont (Bool): To create the new container or not.
            operation (str):
                Write/WriteRead: It will Write or Write/Read base on IOR parameter in yaml file.
                Auto_Write/Auto_Read: It will calculate the IOR block size based on requested
                                        storage % to be fill.
        """
        # IOR flag can Write/Read based on test yaml
        self.ior_local_cmd.flags.value = self.ior_default_flags

        # Calculate the block size based on server % to fill up.
        if 'Auto' in operation:
            block_size = self.calculate_ior_block_size()
            self.ior_local_cmd.block_size.update('{}'.format(block_size))

        # For IOR Read operation update the read only flag from yaml file.
        if 'Auto_Read' in operation or operation == "Read":
            create_cont = False
            self.ior_local_cmd.flags.value = self.ior_read_flags

        self.ior_local_cmd.set_daos_params(self.server_group, self.pool)
        self.ior_local_cmd.test_file.update('/testfile')

        # Created new container or use the existing container for reading
        if create_cont:
            self.create_container()
        self.ior_local_cmd.dfs_cont.update(self.nvme_local_cont.uuid)

        # Define the job manager for the IOR command
        job_manager_main = get_job_manager(self, "Mpirun", self.ior_local_cmd, mpi_type="mpich")
        env = self.ior_local_cmd.get_default_env(str(job_manager_main))
        job_manager_main.assign_hosts(self.hostlist_clients, self.workdir, None)
        job_manager_main.assign_environment(env, True)
        job_manager_main.assign_processes(self.params.get("np", '/run/ior/client_processes/*'))

        # run IOR Command
        try:
            output = job_manager_main.run()
            self.ior_matrix = IorCommand.get_ior_metrics(output)

            for line in output.stdout_text.splitlines():
                if 'WARNING' in line and self.fail_on_warning:
                    self.result.append("FAIL-IOR command issued warnings.")
        except (CommandFailure, TestFail) as error:
            self.result.append("FAIL - {}".format(error))
Beispiel #9
0
    def run_ior_report_error(self, results, job_num, file_name, pool,
                             container, namespace):
        """Run IOR command and store the results to results dictionary.

        Create a new IorCommand object instead of using the one in IorTestBase because
        we'll run a test that runs multiple IOR processes at the same time.

        Args:
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            file_name (str): File name used for self.ior_cmd.test_file.
            oclass (str): Value for dfs_oclass and dfs_dir_oclass.
            pool (TestPool): Pool to run IOR.
            container (TestContainer): Container to run IOR.
        """
        # Update the object class depending on the test case.
        ior_cmd = IorCommand(namespace=namespace)
        ior_cmd.get_params(self)

        # Standard IOR prep sequence.
        ior_cmd.set_daos_params(self.server_group, pool, container.uuid)
        testfile = os.path.join("/", file_name)
        ior_cmd.test_file.update(testfile)

        manager = get_job_manager(test=self,
                                  class_name="Mpirun",
                                  job=ior_cmd,
                                  subprocess=self.subprocess,
                                  mpi_type="mpich")
        manager.assign_hosts(self.hostlist_clients, self.workdir,
                             self.hostfile_clients_slots)
        ppn = self.params.get("ppn", '/run/ior/client_processes/*')
        manager.ppn.update(ppn, 'mpirun.ppn')
        manager.processes.update(None, 'mpirun.np')

        # Run the command.
        try:
            self.log.info("--- IOR command %d start ---", job_num)
            ior_output = manager.run()
            results[job_num] = [True]
            # For debugging.
            results[job_num].extend(IorCommand.get_ior_metrics(ior_output))
            # Command worked, but append the error message if any.
            results[job_num].append(ior_output.stderr_text)
            self.log.info("--- IOR command %d end ---", job_num)
        except CommandFailure as error:
            self.log.info("--- IOR command %d failed ---", job_num)
            results[job_num] = [False, "IOR failed: {}".format(error)]
Beispiel #10
0
    def run_multiple_ior(self,
                         hostfile,
                         num_clients,
                         results,
                         job_num,
                         intercept=None):
        # pylint: disable=too-many-arguments
        """Run the IOR command.

        Args:
            manager (str): mpi job manager command
            processes (int): number of host processes
            intercept (str): path to interception library.
        """
        self.lock.acquire(True)
        tsize = self.ior_cmd.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        self.ior_cmd.test_file.update(testfile)
        manager = self.get_ior_job_manager_command()
        procs = (self.processes // len(self.hostlist_clients)) * num_clients
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.setup_command(env, hostfile, procs)
        self.lock.release()
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            self.lock.acquire(True)
            results[job_num] = IorCommand.get_ior_metrics(out)
            self.lock.release()
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()
Beispiel #11
0
    def run_multiple_ior(self, clients, results, job_num, intercept=None):
        """Run the IOR command.

        Args:
            clients (list): hosts on which to run ior
            results (dict): A dictionary object to store the ior metrics
            job_num (int): Assigned job number
            intercept (str, optional): path to interception library. Defaults to
                None.
        """
        self.lock.acquire(True)
        tsize = self.ior_cmd.transfer_size.value
        testfile = os.path.join(self.dfuse.mount_dir.value,
                                "testfile{}{}".format(tsize, job_num))
        if intercept:
            testfile += "intercept"
        self.ior_cmd.test_file.update(testfile)
        manager = self.get_ior_job_manager_command()
        procs = (self.processes // len(self.hostlist_clients)) * len(clients)
        env = self.ior_cmd.get_default_env(str(manager), self.client_log)
        if intercept:
            env["LD_PRELOAD"] = intercept
        manager.assign_hosts(clients, self.workdir,
                             self.hostfile_clients_slots)
        manager.assign_processes(procs)
        manager.assign_environment(env)
        self.lock.release()
        try:
            self.pool.display_pool_daos_space()
            out = manager.run()
            self.lock.acquire(True)
            results[job_num] = IorCommand.get_ior_metrics(out)
            self.lock.release()
        except CommandFailure as error:
            self.log.error("IOR Failed: %s", str(error))
            self.fail("Test was expected to pass but it failed.\n")
        finally:
            self.pool.display_pool_daos_space()
    def test_aggregation_throttling(self):
        """Jira ID: DAOS-3749

        Test Description:
            Verify the ior throttling during aggregation
            in the background is affecting the ior
            performance only by +/- 30%
        Use case:
            Create a pool and container
            Disable the aggregation
            Run ior with same file option
            Capture the initial ior performance.
            Run ior the second time with same file option, so the
            logical partitioning can be overwritten.
            Enable the aggregation and wait for 90 seconds
            Now as the aggregation is running in the background, run
            ior again so both aggregation and ior runs in parallel
            Capture the ior performance now and verify that it is
            +/- 30% of the initial performance.
            Also, verify the aggregation reclaimed the space used by
            second ior.

        :avocado: tags=all,hw,large,full_regression,aggregate,daosio
        :avocado: tags=aggregatethrottling
        """

        # Create pool and container
        self.update_ior_cmd_with_pool()

        # Disable the aggregation
        self.pool.set_property("reclaim", "disabled")

        # Run ior with -k option to retain the file and not delete it
        out = self.run_ior_with_pool()
        metric_before_aggregate = IorCommand.get_ior_metrics(out)

        # Run ior the second time on the same pool and container, so another
        # copy of the file is inserted in DAOS.
        out = self.run_ior_with_pool(create_pool=False)

        # wait 90 seconds for files to get old enough for aggregation
        self.log.info("Waiting for 90 seconds for aggregation to start")
        time.sleep(90)
        # Enable the aggregation
        self.pool.set_property("reclaim", "time")

        # Run ior the third time while the aggregation of first two runs
        # are running in the background.
        out = self.run_ior_with_pool(create_pool=False)
        metric_after_aggregate = IorCommand.get_ior_metrics(out)

        # When DAOS-5057 is fixed, adjust the percentage. For now,
        # keep it at 30 %
        expected_perf_diff = 30.0

        self.verify_performance(
            metric_before_aggregate,
            metric_after_aggregate,
            0,  # write_perf
            expected_perf_diff)  # 30% perf difference

        self.verify_performance(
            metric_before_aggregate,
            metric_after_aggregate,
            1,  # read_perf
            expected_perf_diff)
Beispiel #13
0
    def test_caching_check(self):
        """Jira ID: DAOS-4874.

        Test Description:
            Purpose of this test is to check if dfuse caching is working.
        Use case:
            Write using ior over dfuse with caching disabled.
            Perform ior read twice to get base read performance.
            Unmount dfuse and mount it again with caching enabled.
            Perform ior read after fresh mount to get read performance.
            Run ior again to get second read performance numbers with caching enabled.
            Compared cached read performance numbers after refresh to the baseline
            read performance and confirm cached read performance is multiple folds
            higher than with caching disabled.

        :avocado: tags=all,full_regression
        :avocado: tags=hw,medium,ib2
        :avocado: tags=daosio,dfuse
        :avocado: tags=dfusecachingcheck
        """
        # get params
        flags = self.params.get("iorflags", '/run/ior/*')
        read_x = self.params.get("read_x", "/run/ior/*", 1)

        # update flag
        self.ior_cmd.flags.update(flags[0])

        # run ior to write to the dfuse mount point
        self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)

        # update ior flag to read
        self.ior_cmd.flags.update(flags[1])
        # run ior to read and store the read performance
        base_read_arr = []
        out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)
        base_read_arr.append(IorCommand.get_ior_metrics(out))
        # run ior again to read with caching disabled and store performance
        out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)
        base_read_arr.append(IorCommand.get_ior_metrics(out))

        # the index of max_mib
        max_mib = int(IorMetrics.Max_MiB)

        # unmount dfuse and mount again with caching enabled
        pcmd(self.hostlist_clients,
             self.dfuse.get_umount_command(),
             expect_rc=None)
        self.dfuse.disable_caching.update(False)
        self.dfuse.run()
        # run ior to obtain first read performance after mount
        out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)
        base_read_arr.append(IorCommand.get_ior_metrics(out))
        # run ior again to obtain second read performance with caching enabled
        # second read should be multiple times greater than first read
        out = self.run_ior_with_pool(fail_on_warning=False)
        with_caching = IorCommand.get_ior_metrics(out)
        # verify cached read performance is multiple times greater than without caching
        for base_read in base_read_arr:
            actual_change = percent_change(base_read[0][max_mib],
                                           with_caching[0][max_mib])
            self.log.info('assert actual_change > min_change: %f > %f',
                          actual_change, read_x)
            self.assertTrue(actual_change > read_x)
    def test_ior_intercept_verify_data(self):
        """Jira ID: DAOS-3502.

        Test Description:
            Purpose of this test is to run ior through dfuse with
            interception library on 5 clients and without interception
            library on 1 client for at least 30 minutes and verify the
            data integrity using ior's Read Verify and Write Verify
            options.

        Use case:
            Run ior with read, write, fpp, read verify
            write verify for 30 minutes
            Run ior with read, write, read verify
            write verify for 30 minutes

        :avocado: tags=all,full_regression
        :avocado: tags=hw,large
        :avocado: tags=daosio,dfuse,il,ior_intercept
        :avocado: tags=ior_intercept_verify_data
        """
        self.add_pool()
        self.add_container(self.pool)

        # Start dfuse for POSIX api. This is specific to interception library test requirements.
        self.start_dfuse(self.hostlist_clients, self.pool, self.container)

        # Setup the thread manager
        thread_manager = ThreadManager(run_ior, self.timeout - 30)
        index_clients_intercept_file = [
            (0, self.hostlist_clients[0:-1],
             os.path.join(self.prefix, 'lib64', 'libioil.so'),
             os.path.join(self.dfuse.mount_dir.value, "testfile_0_intercept")),
            (1, self.hostlist_clients[-1:], None,
             os.path.join(self.dfuse.mount_dir.value, "testfile_1")),
        ]
        self.job_manager = []
        for index, clients, intercept, test_file in index_clients_intercept_file:
            # Add a job manager for each ior command. Use a timeout for the ior command that leaves
            # enough time to report the summary of all the threads
            job_manager = get_job_manager(self, "Mpirun", None, False, "mpich",
                                          self.get_remaining_time() - 30)

            # Define the parameters that will be used to run an ior command in this thread
            thread_manager.add(
                test=self,
                manager=job_manager,
                log=self.client_log,
                hosts=clients,
                path=self.workdir,
                slots=None,
                group=self.server_group,
                pool=self.pool,
                container=self.container,
                processes=(self.processes // len(self.hostlist_clients)) *
                len(clients),
                intercept=intercept,
                ior_params={"test_file": test_file})
            self.log.info("Created thread %s for %s with intercept: %s", index,
                          clients, str(intercept))

        # Launch the IOR threads
        self.log.info("Launching %d IOR threads", thread_manager.qty)
        results = thread_manager.run()

        # Stop dfuse
        self.stop_dfuse()

        # Check the ior thread results
        failed_thread_count = thread_manager.check(results)
        if failed_thread_count > 0:
            msg = "{} FAILED IOR Thread(s)".format(failed_thread_count)
            self.d_log.error(msg)
            self.fail(msg)

        for index, clients, intercept, _ in index_clients_intercept_file:
            with_intercept = "without" if intercept is None else "with"
            IorCommand.log_metrics(
                self.log, "{} clients {} interception library".format(
                    len(clients), with_intercept),
                IorCommand.get_ior_metrics(results[index].result))
Beispiel #15
0
    def test_dfuse_caching_check(self):
        """Jira ID: DAOS-4874.

        Test Description:
            Purpose of this test is to check if dfuse caching is working.
        Use case:
            Write using ior over dfuse with caching disabled.
            Perform ior read to get base read performance.
            Run ior read to get second read performance with caching disabled.
            Compare first and second read performance numbers and they should
            be similar.
            Unmount dfuse and mount it again with caching enabled.
            Perform ior read after fresh mount to get read performance.
            Run ior again to get second read performance numbers with caching
            enabled.
            Compare first and second read performance numbers after dfuse
            refresh and the second read should be multiple folds higher than
            the first one.

        :avocado: tags=all,full_regression
        :avocado: tags=hw,small
        :avocado: tags=daosio,dfuse
        :avocado: tags=dfusecachingcheck
        """
        # get params
        flags = self.params.get("iorflags", '/run/ior/*')
        read_x = self.params.get("read_x", "/run/ior/*", 1)

        # update flag
        self.ior_cmd.flags.update(flags[0])

        # run ior to write to the dfuse mount point
        self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)

        # update ior flag to read
        self.ior_cmd.flags.update(flags[1])
        # run ior to read and store the read performance
        out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)
        base_read = IorCommand.get_ior_metrics(out)
        # run ior again to read with caching disabled and store performance
        out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)
        without_caching = IorCommand.get_ior_metrics(out)
        max_mib = int(IorMetrics.Max_MiB)
        # Compare read performance with caching disabled
        # it should be similar to last read
        lower_bound = (float(base_read[0][max_mib]) -
                       (float(base_read[0][max_mib]) * read_x[0]))
        upper_bound = (float(base_read[0][max_mib]) +
                       (float(base_read[0][max_mib]) * read_x[0]))
        # verify read performance is similar to last read and within
        # the range of 1% up or down the first read performance
        self.assertTrue(
            lower_bound <= float(without_caching[0][max_mib]) <= upper_bound)

        # unmount dfuse and mount again with caching enabled
        pcmd(self.hostlist_clients,
             self.dfuse.get_umount_command(),
             expect_rc=None)
        self.dfuse.disable_caching.update(False)
        self.dfuse.run()
        # run ior to obtain first read performance after mount
        out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False)
        base_read = IorCommand.get_ior_metrics(out)
        # run ior again to obtain second read performance with caching enabled
        # second read should be multiple times greater than first read
        out = self.run_ior_with_pool(fail_on_warning=False)
        with_caching = IorCommand.get_ior_metrics(out)
        # verifying read performance
        self.assertTrue(
            float(with_caching[0][max_mib]) > read_x[1] *
            float(base_read[0][max_mib]))