Ejemplo n.º 1
0
    def setup(self):
        """
        Setting up test parameters
        """
        log.info("Starting the test setup")
        self.benchmark_name = "FIO"
        self.client_pod_name = "fio-client"
        if config.PERF.get("deploy_internal_es"):
            self.es = ElasticSearch()
        else:
            if config.PERF.get("internal_es_server") == "":
                self.es = None
                return
            else:
                self.es = {
                    "server": config.PERF.get("internal_es_server"),
                    "port": config.PERF.get("internal_es_port"),
                    "url":
                    f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
                    "parallel": True,
                }
                # verify that the connection to the elasticsearch server is OK
                if not super(TestFIOBenchmark, self).es_connect():
                    self.es = None
                    return

        super(TestFIOBenchmark, self).setup()
        # deploy the benchmark-operator
        self.deploy_benchmark_operator()
Ejemplo n.º 2
0
    def setup(self):
        self.es = ElasticSearch()

        # Deploy the benchmark operator
        log.info("Apply Operator CRD")
        self.operator = benchmark_operator.BenchmarkOperator()
        self.operator.deploy()
Ejemplo n.º 3
0
    def setup(self):
        """
        Setting up test parameters
        """
        log.info("Starting the test setup")
        self.benchmark_name = "SmallFiles"
        self.client_pod_name = "smallfile-client"
        if config.PERF.get("deploy_internal_es"):
            self.es = ElasticSearch()
        else:
            if config.PERF.get("internal_es_server") == "":
                self.es = None
                return
            else:
                self.es = {
                    "server":
                    config.PERF.get("internal_es_server"),
                    "port":
                    config.PERF.get("internal_es_port"),
                    "url":
                    f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
                }
                # verify that the connection to the elasticsearch server is OK
                if not super(TestSmallFileWorkload, self).es_connect():
                    self.es = None
                    return

        super(TestSmallFileWorkload, self).setup()
        # deploy the benchmark-operator (ripsaw)
        self.ripsaw = RipSaw()
        self.ripsaw_deploy(self.ripsaw)
Ejemplo n.º 4
0
def es(request):
    def teardown():
        es.cleanup()

    request.addfinalizer(teardown)
    es = ElasticSearch()
    return es
    def setup(self):
        """
        Initialize the test environment

        """
        # Deploy internal ES server - not need to keep results,
        # so don't use production ES
        self.es = ElasticSearch()

        # Initial the Small Files workload, based on benchmark-operator
        self.small_files = SmallFiles(self.es)

        self.ceph_cluster = CephCluster()

        # Get the total storage capacity
        self.ceph_capacity = self.ceph_cluster.get_ceph_capacity()
        log.info(f"Total storage capacity is {self.ceph_capacity:,.2f} GiB")

        # Collect the pulls usage before the test is starting
        self.orig_data = self.get_cephfs_data()
Ejemplo n.º 6
0
    def setup_internal_es(self):
        """
        Setting up the internal ElasticSearch server to used by the benchmark

        """
        if config.PERF.get("deploy_internal_es"):
            self.es = ElasticSearch()
        else:
            if config.PERF.get("internal_es_server") == "":
                self.es = None
            else:
                self.es = {
                    "server":
                    config.PERF.get("internal_es_server"),
                    "port":
                    config.PERF.get("internal_es_port"),
                    "url":
                    f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
                }
                # verify that the connection to the elasticsearch server is OK
                if not self.es_connect():
                    self.es = None
Ejemplo n.º 7
0
def es(request):

    # Create internal ES only if Cloud platform is tested
    if node.get_provider().lower() in constants.CLOUD_PLATFORMS:
        es = ElasticSearch()
    else:
        es = None

    def teardown():
        if es is not None:
            es.cleanup()
            time.sleep(10)

    request.addfinalizer(teardown)
    return es
Ejemplo n.º 8
0
class TestFIOBenchmark(PASTest):
    """
    Run FIO perf test using benchmark operator

    """
    def setup(self):
        """
        Setting up test parameters
        """
        log.info("Starting the test setup")
        self.benchmark_name = "FIO"
        self.client_pod_name = "fio-client"
        if config.PERF.get("deploy_internal_es"):
            self.es = ElasticSearch()
        else:
            if config.PERF.get("internal_es_server") == "":
                self.es = None
                return
            else:
                self.es = {
                    "server": config.PERF.get("internal_es_server"),
                    "port": config.PERF.get("internal_es_port"),
                    "url":
                    f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
                    "parallel": True,
                }
                # verify that the connection to the elasticsearch server is OK
                if not super(TestFIOBenchmark, self).es_connect():
                    self.es = None
                    return

        super(TestFIOBenchmark, self).setup()
        # deploy the benchmark-operator
        self.deploy_benchmark_operator()

    def setting_storage_usage(self):
        """
        Getting the storage capacity, calculate the usage of the storage and
        setting the workload CR rile parameters.

        """

        # for development mode - use parameters for short test run
        if self.dev_mode:
            log.info("Setting up parameters for development mode")
            self.crd_data["spec"]["workload"]["args"]["filesize"] = "1GiB"
            self.crd_data["spec"]["workload"]["args"]["storagesize"] = "5Gi"
            self.crd_data["spec"]["workload"]["args"]["servers"] = 2
            self.crd_data["spec"]["workload"]["args"]["samples"] = 2
            self.crd_data["spec"]["workload"]["args"]["read_runtime"] = 30
            self.crd_data["spec"]["workload"]["args"]["write_runtime"] = 30
            self.crd_data["spec"]["workload"]["args"]["bs"] = ["64KiB"]
            self.total_data_set = 20
            self.filesize = 3
            return

        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()
        log.info(f"Total storage capacity is {ceph_capacity} GiB")
        self.total_data_set = int(ceph_capacity * 0.4)
        self.filesize = int(
            self.crd_data["spec"]["workload"]["args"]["filesize"].replace(
                "GiB", ""))
        # To make sure the number of App pods will not be more then 50, in case
        # of large data set, changing the size of the file each pod will work on
        if self.total_data_set > 500:
            self.filesize = int(ceph_capacity * 0.008)
            self.crd_data["spec"]["workload"]["args"][
                "filesize"] = f"{self.filesize}GiB"
            # make sure that the storage size is larger then the file size
            self.crd_data["spec"]["workload"]["args"][
                "storagesize"] = f"{int(self.filesize * 1.2)}Gi"
        self.crd_data["spec"]["workload"]["args"]["servers"] = int(
            self.total_data_set / self.filesize)
        log.info(f"Total Data set to work on is : {self.total_data_set} GiB")

    def setting_io_pattern(self, io_pattern):
        """
        Setting the test jobs according to the io pattern - random / sequential

        Args:
            io_pattern (str): the I/O pattern to run (random / sequential)

        """
        if io_pattern == "sequential":
            self.crd_data["spec"]["workload"]["args"]["jobs"] = [
                "write", "read"
            ]
            self.crd_data["spec"]["workload"]["args"]["iodepth"] = 1
        if io_pattern == "random":
            self.crd_data["spec"]["workload"]["args"]["jobs"] = [
                "randwrite",
                "randread",
            ]

    def init_full_results(self, full_results):
        """
        Initialize the full results object which will send to the ES server

        Args:
            full_results (obj): an empty FIOResultsAnalyse object

        Returns:
            FIOResultsAnalyse (obj): the input object fill with data

        """
        for key in self.environment:
            full_results.add_key(key, self.environment[key])

        # Setting the global parameters of the test
        full_results.add_key("dataset", f"{self.total_data_set}GiB")
        full_results.add_key(
            "file_size", self.crd_data["spec"]["workload"]["args"]["filesize"])
        full_results.add_key(
            "servers", self.crd_data["spec"]["workload"]["args"]["servers"])
        full_results.add_key(
            "samples", self.crd_data["spec"]["workload"]["args"]["samples"])
        full_results.add_key("operations",
                             self.crd_data["spec"]["workload"]["args"]["jobs"])
        full_results.add_key("block_sizes",
                             self.crd_data["spec"]["workload"]["args"]["bs"])
        full_results.add_key(
            "io_depth", self.crd_data["spec"]["workload"]["args"]["iodepth"])
        full_results.add_key(
            "jobs", self.crd_data["spec"]["workload"]["args"]["numjobs"])
        full_results.add_key(
            "runtime",
            {
                "read":
                self.crd_data["spec"]["workload"]["args"]["read_runtime"],
                "write":
                self.crd_data["spec"]["workload"]["args"]["write_runtime"],
            },
        )
        full_results.add_key(
            "storageclass",
            self.crd_data["spec"]["workload"]["args"]["storageclass"])
        full_results.add_key(
            "vol_size",
            self.crd_data["spec"]["workload"]["args"]["storagesize"])
        return full_results

    def cleanup(self):
        """
        Do cleanup in the benchmark-operator namespace.
        delete the benchmark, an make sure no PVC's an no PV's are left.

        """
        log.info("Deleting FIO benchmark")
        self.benchmark_obj.delete()
        time.sleep(180)

        # Getting all PVCs created in the test (if left).
        NL = "\\n"  # NewLine character
        command = ["oc", "get", "pvc", "-n"]
        command.append(benchmark_operator.BMO_NAME)
        command.append("-o")
        command.append("template")
        command.append("--template")
        command.append("'{{range .items}}{{.metadata.name}}{{\"" + NL +
                       "\"}}{{end}}'")
        pvcs_list = run_command(command, out_format="list")
        log.info(f"list of all PVCs :{pvcs_list}")
        for pvc in pvcs_list:
            pvc = pvc.replace("'", "")
            run_command(
                f"oc -n {benchmark_operator.BMO_NAME} delete pvc {pvc}")

        # Getting all PVs created in the test (if left).
        command[2] = "pv"
        command[8] = (
            "'{{range .items}}{{.metadata.name}} {{.spec.claimRef.namespace}}{{\""
            + NL + "\"}}{{end}}'")
        command.remove("-n")
        command.remove(benchmark_operator.BMO_NAME)
        pvs_list = run_command(command, out_format="list")
        log.info(f"list of all PVs :{pvs_list}")

        for line in pvs_list:
            try:
                pv, ns = line.split(" ")
                pv = pv.replace("'", "")
                if ns == benchmark_operator.BMO_NAME:
                    log.info(f"Going to delete {pv}")
                    run_command(f"oc delete pv {pv}")
            except Exception:
                pass

    def run(self):
        """
        Run the test, and wait until it finished

        """
        self.deploy_and_wait_for_wl_to_start(timeout=900)
        # Getting the UUID from inside the benchmark pod
        self.uuid = self.operator.get_uuid(self.client_pod)
        # Setting back the original elastic-search information
        if hasattr(self, "backup_es"):
            self.crd_data["spec"]["elasticsearch"] = self.backup_es
        if self.dev_mode:
            sleeptime = 30
        else:
            sleeptime = 300

        self.wait_for_wl_to_finish(sleep=sleeptime)

        try:
            if "Fio failed to execute" not in self.test_logs:
                log.info("FIO has completed successfully")
        except IOError:
            log.warning("FIO failed to complete")

    def teardown(self):
        """
        The teardown of the test environment in the end.

        """
        log.info("cleanup the environment")
        self.operator.cleanup()
        if isinstance(self.es, ElasticSearch):
            self.es.cleanup()

        sleep_time = 5
        log.info(
            f"Going to sleep for {sleep_time} Minute, for background cleanup to complete"
        )
        time.sleep(sleep_time * 60)

    @pytest.mark.parametrize(
        argnames=["interface", "io_pattern"],
        argvalues=[
            pytest.param(
                *[constants.CEPHBLOCKPOOL, "sequential"],
                marks=pytest.mark.polarion_id("OCS-844"),
            ),
            pytest.param(
                *[constants.CEPHFILESYSTEM, "sequential"],
                marks=pytest.mark.polarion_id("OCS-845"),
            ),
            pytest.param(
                *[constants.CEPHBLOCKPOOL, "random"],
                marks=pytest.mark.polarion_id("OCS-846"),
            ),
            pytest.param(
                *[constants.CEPHFILESYSTEM, "random"],
                marks=pytest.mark.polarion_id("OCS-847"),
            ),
        ],
    )
    def test_fio_workload_simple(self, interface, io_pattern):
        """
        This is a basic fio perf test - non-compressed volumes

        Args:
            interface (str): the interface that need to be tested - CephFS / RBD
            io_pattern (str): the I/O pattern to do - random / sequential

        """

        # verify that there is an elasticsearch server for the benchmark
        if not self.es:
            log.error("This test must have an Elasticsearch server")
            return False

        # Getting the full path for the test logs
        self.full_log_path = get_full_test_logs_path(cname=self)
        self.full_log_path += f"-{interface}-{io_pattern}"
        log.info(f"Logs file path name is : {self.full_log_path}")

        log.info("Create resource file for fio workload")
        self.crd_data = templating.load_yaml(constants.FIO_CR_YAML)

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        self.es_info_backup(self.es)

        self.set_storageclass(interface=interface)

        # Setting the data set to 40% of the total storage capacity
        self.setting_storage_usage()

        self.get_env_info()

        self.setting_io_pattern(io_pattern)

        self.run()

        # Initialize the results doc file.
        full_results = self.init_full_results(
            FIOResultsAnalyse(self.uuid, self.crd_data, self.full_log_path,
                              self.main_es))

        # Setting the global parameters of the test
        full_results.add_key("io_pattern", io_pattern)

        # Clean up fio benchmark
        self.cleanup()

        log.debug(f"Full results is : {full_results.results}")
        if isinstance(self.es, ElasticSearch):
            # Using internal deployed elasticsearch
            # if self.es:
            log.info("Getting data from internal ES")
            if self.main_es:
                self.copy_es_data(self.es)
            else:
                log.info("Dumping data from the Internal ES to tar ball file")
                self.es.dumping_all_data(self.full_log_path)

        full_results.analyze_results(self)  # Analyze the results
        full_results.add_key("test_time", {
            "start": self.start_time,
            "end": self.end_time
        })

        # Writing the analyzed test results to the Elastic-Search server
        if full_results.es_write():
            log.info(
                f"The Result can be found at : {full_results.results_link()}")

    @skipif_ocs_version("<4.6")
    @pytest.mark.parametrize(
        argnames=["io_pattern", "bs", "cmp_ratio"],
        argvalues=[
            pytest.param(*["random", "1024KiB", 60]),
            pytest.param(*["random", "64KiB", 60]),
            pytest.param(*["random", "16KiB", 60]),
            pytest.param(*["sequential", "1024KiB", 60]),
            pytest.param(*["sequential", "64KiB", 60]),
            pytest.param(*["sequential", "16KiB", 60]),
        ],
    )
    @pytest.mark.polarion_id("OCS-2617")
    def test_fio_compressed_workload(self, storageclass_factory, io_pattern,
                                     bs, cmp_ratio):
        """
        This is a basic fio perf test which run on compression enabled volume

        Args:
            io_pattern (str): the I/O pattern to do - random / sequential
            bs (str): block size to use in the test
            cmp_ratio (int): the expected compression ratio

        """

        # Getting the full path for the test logs
        self.full_log_path = get_full_test_logs_path(cname=self)
        self.full_log_path += f"-{io_pattern}-{bs}-{cmp_ratio}"
        log.info(f"Logs file path name is : {self.full_log_path}")

        log.info("Create resource file for fio workload")
        self.crd_data = templating.load_yaml(
            "ocs_ci/templates/workloads/fio/benchmark_fio_cmp.yaml")

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        self.es_info_backup(self.es)

        log.info("Creating compressed pool & SC")
        sc_obj = storageclass_factory(
            interface=constants.CEPHBLOCKPOOL,
            new_rbd_pool=True,
            replica=3,
            compression="aggressive",
        )

        sc = sc_obj.name
        pool_name = run_cmd(
            f"oc get sc {sc} -o jsonpath={{'.parameters.pool'}}")
        # Create fio benchmark
        self.crd_data["spec"]["workload"]["args"]["bs"] = [bs]
        self.crd_data["spec"]["workload"]["args"]["cmp_ratio"] = cmp_ratio

        # Setting the data set to 40% of the total storage capacity
        self.setting_storage_usage()
        self.crd_data["spec"]["workload"]["args"][
            "prefill_bs"] = self.crd_data["spec"]["workload"]["args"]["bs"][0]

        self.get_env_info()

        self.crd_data["spec"]["workload"]["args"]["storageclass"] = sc
        self.setting_io_pattern(io_pattern)
        self.run()

        # Initialize the results doc file.
        full_results = self.init_full_results(
            FIOResultsAnalyse(self.uuid, self.crd_data, self.full_log_path,
                              self.main_es))

        # Setting the global parameters of the test
        full_results.add_key("io_pattern", io_pattern)

        if isinstance(self.es, ElasticSearch):
            # Using internal deployed elasticsearch
            # if self.es:
            log.info("Getting data from internal ES")
            if self.main_es:
                self.copy_es_data(self.es)
            else:
                log.info("Dumping data from the Internal ES to tar ball file")
                self.es.dumping_all_data(self.full_log_path)

        log.info("verifying compression ratio")
        ratio = calculate_compression_ratio(pool_name)

        full_results.add_key("cmp_ratio", {
            "expected": cmp_ratio,
            "actual": ratio
        })
        log.debug(f"Full results is : {full_results.results}")
        full_results.analyze_results(self)  # Analyze the results
        if (cmp_ratio + 5) < ratio or ratio < (cmp_ratio - 5):
            log.warning(f"The compression ratio is {ratio}% "
                        f"while the expected ratio is {cmp_ratio}%")
        else:
            log.info(f"The compression ratio is {ratio}%")
        full_results.add_key("test_time", {
            "start": self.start_time,
            "end": self.end_time
        })

        # Writing the analyzed test results to the Elastic-Search server
        if full_results.es_write():
            log.info(
                f"The Result can be found at : {full_results.results_link()}")

        # Clean up fio benchmark
        self.cleanup()
        sc_obj.delete()
        sc_obj.ocp.wait_for_delete(resource_name=sc, timeout=300, sleep=5)
    def test_smallfile_workload(self, file_size, files, threads, samples,
                                clients, interface):
        """
        Run SmallFile Workload

        Args:
            file_size (int) : the size of the file to be used
            files (int) : number of files to use
            threads (int) : number of threads to be use in the test
            samples (int) : how meany samples to run for each test
            interface (str) : the volume type (rbd / cephfs)

        """
        if config.PERF.get("deploy_internal_es"):
            self.es = ElasticSearch()
        else:
            if config.PERF.get("internal_es_server") == "":
                self.es = None
                return
            else:
                self.es = {
                    "server":
                    config.PERF.get("internal_es_server"),
                    "port":
                    config.PERF.get("internal_es_port"),
                    "url":
                    f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
                }
                # verify that the connection to the elasticsearch server is OK
                if not super(TestSmallFileWorkload, self).es_connect():
                    self.es = None
                    return

        # deploy the benchmark-operator
        self.deploy_benchmark_operator()

        # verify that there is an elasticsearch server for the benchmark
        if not self.es:
            log.error("This test must have an Elasticsearch server")
            return False

        # Getting the full path for the test logs
        self.full_log_path = get_full_test_logs_path(cname=self)
        self.results_path = get_full_test_logs_path(cname=self)
        self.full_log_path += (
            f"-{file_size}-{files}-{threads}-{samples}-{clients}-{interface}")
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Loading the main template yaml file for the benchmark
        log.info("Create resource file for small_files workload")
        self.crd_data = templating.load_yaml(
            constants.SMALLFILE_BENCHMARK_YAML)

        # Saving the Original elastic-search IP and PORT - if defined in yaml
        self.es_info_backup(self.es)

        self.set_storageclass(interface=interface)

        # Setting the data set to 40% of the total storage capacity
        self.setting_storage_usage(file_size, files, threads, samples, clients)

        self.get_env_info()

        if not self.run():
            log.error("The benchmark failed to run !")
            return

        # Setting back the original elastic-search information
        if self.backup_es:
            self.crd_data["spec"]["elasticsearch"] = self.backup_es

        # Initialize the results doc file.
        full_results = self.init_full_results(
            SmallFileResultsAnalyse(self.uuid, self.crd_data,
                                    self.full_log_path, self.main_es))

        log.info(f"Full results is : {full_results.results}")
        if isinstance(self.es, ElasticSearch):
            # Using internal deployed elasticsearch
            log.info("Getting data from internal ES")
            if self.main_es:
                self.copy_es_data(self.es)
                full_results.read()
            else:
                log.info("Dumping data from the Internal ES to tar ball file")
                self.es.dumping_all_data(self.full_log_path)
        else:
            log.info(self.es)
            self.es = Elasticsearch(hosts=[{
                "host": self.es["server"],
                "port": self.es["port"]
            }])
            full_results.read()

        full_results.add_key("test_time", {
            "start": self.start_time,
            "end": self.end_time
        })

        if self.main_es:
            full_results.es = self.main_es

        if not full_results.dont_check:
            full_results.add_key("hosts", full_results.get_clients_list())
            full_results.init_full_results()
            full_results.aggregate_host_results()
            test_status = full_results.aggregate_samples_results()

            # Generate link for the all data in the kibana
            columens = "optype,files,filesPerSec,elapsed,sample,tid"
            klink = self.generate_kibana_link("ripsaw-smallfile-results",
                                              columens)

            # Generate link for the all response-time data in the kibana
            columens = "optype,sample,iops,max,min,mean,'90%25','95%25','99%25'"
            rtlink = self.generate_kibana_link("ripsaw-smallfile-rsptimes",
                                               columens)

            full_results.all_results = {
                "kibana_all": klink,
                "kibana_rsptime": rtlink
            }

            if full_results.es_write():
                res_link = full_results.results_link()
                log.info(f"The Result can be found at : {res_link}")

                # Create text file with results of all subtest (4 - according to the parameters)
                self.write_result_to_file(res_link)

        else:
            test_status = True

        assert test_status, "Test Failed !"
    def test_pvc_snapshot_performance_multiple_files(self, file_size, files,
                                                     threads, interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Deploying elastic-search server in the cluster for use by the
        # SmallFiles workload, since it is mandatory for the workload.
        # This is deployed once for all test iterations and will be deleted
        # in the end of the test.
        self.es = ElasticSearch()

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        sf_data["spec"]["elasticsearch"] = {
            "url": f"http://{self.es.get_ip()}:{self.es.get_port()}"
        }
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the benchmark-operator, so we can use the SmallFiles workload
        # to fill up the volume with files, and switch to the benchmark-operator namespace.
        log.info("Deploy the benchmark-operator")
        self.deploy_benchmark_operator()
        switch_to_project(BMO_NAME)

        all_results = []

        self.results_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf_multiple_files",
            ))
        self.full_results.add_key("file_size_inKB", file_size)
        self.full_results.add_key("threads", threads)
        self.full_results.add_key("interface", interface)
        for test_num in range(self.tests_numbers):

            test_results = {"creation_time": None, "csi_creation_time": None}

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    BMO_NAME,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=BMO_NAME)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            # Initialize the pvc_name variable so it will not be in loop scope only.
            pvc_name = ""
            for item in bench_pod.get()["items"]:
                if item.get("metadata").get("name") == small_file_client_pod:
                    for volume in item.get("spec").get("volumes"):
                        if "persistentVolumeClaim" in volume:
                            pvc_name = volume["persistentVolumeClaim"][
                                "claimName"]
                            break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 1M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")

            # Taking snapshot of the PVC (which contain files)
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")

            start_time = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%SZ")

            test_results["creation_time"] = self.measure_create_snapshot_time(
                pvc_name=pvc_name,
                snap_name=snap_name,
                namespace=BMO_NAME,
                interface=interface,
                start_time=start_time,
            )
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is"
                f' {test_results["creation_time"]} seconds')

            test_results[
                "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time)
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is"
                f' {test_results["csi_creation_time"]} seconds')

            all_results.append(test_results)

            # Delete the smallfile workload - which will delete also the PVC
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

            # Sleep for 1 Min. between test samples
            time.sleep(60)

        # Cleanup the elasticsearch instance.
        log.info("Deleting the elastic-search instance")
        self.es.cleanup()

        creation_times = [t["creation_time"] for t in all_results]
        avg_c_time = statistics.mean(creation_times)
        csi_creation_times = [t["csi_creation_time"] for t in all_results]
        avg_csi_c_time = statistics.mean(csi_creation_times)

        t_dateset = int(data_set / 3)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot creation results are {creation_times} seconds")
        log.info(
            f"The average snapshot creation time is : {avg_c_time} seconds")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot csi creation results are {csi_creation_times}")
        log.info(
            f"The average csi snapshot creation time is : {avg_csi_c_time}")

        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {t_dateset} GiB")

        self.full_results.add_key("avg_snapshot_creation_time_insecs",
                                  avg_c_time)
        self.full_results.all_results["total_files"] = total_files
        self.full_results.all_results["total_dataset"] = t_dateset
        self.full_results.all_results["creation_time"] = creation_times
        self.full_results.all_results["csi_creation_time"] = csi_creation_times

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        if self.full_results.es_write():
            res_link = self.full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtest
            self.write_result_to_file(res_link)
class TestPvcSnapshotPerformance(PASTest):
    """
    Tests to verify PVC snapshot creation and deletion performance
    """

    tests_numbers = 3  # number of tests to run

    @pytest.fixture()
    def base_setup(
        self,
        interface_iterate,
        storageclass_factory,
        pvc_size,
    ):
        """
        A setup phase for the test - creating resources

        Args:
            interface_iterate: A fixture to iterate over ceph interfaces
            storageclass_factory: A fixture to create everything needed for a
                storageclass
            pvc_size: The size of the PVC in Gi

        """
        self.interface = interface_iterate
        self.sc_obj = storageclass_factory(self.interface)

        if self.interface == constants.CEPHBLOCKPOOL:
            self.sc = "RBD"
        elif self.interface == constants.CEPHFILESYSTEM:
            self.sc = "CephFS"
        elif self.interface == constants.CEPHBLOCKPOOL_THICK:
            self.sc = "RBD-Thick"

        self.create_test_project()

        self.pvc_obj = helpers.create_pvc(sc_name=self.sc_obj.name,
                                          size=pvc_size + "Gi",
                                          namespace=self.namespace)
        helpers.wait_for_resource_state(self.pvc_obj, constants.STATUS_BOUND)
        self.pvc_obj.reload()

        # Create a POD and attach it the the PVC
        try:
            self.pod_object = helpers.create_pod(
                interface_type=self.interface,
                pvc_name=self.pvc_obj.name,
                namespace=self.namespace,
            )
            helpers.wait_for_resource_state(self.pod_object,
                                            constants.STATUS_RUNNING)
            self.pod_object.reload()
        except Exception as e:
            log.error(
                f"Pod on PVC {self.pvc_obj.name} was not created, exception {str(e)}"
            )
            raise ex.PodNotCreated("Pod on PVC was not created.")

    def setup(self):
        """
        Setting up test parameters
        """
        log.info("Starting the test setup")
        super(TestPvcSnapshotPerformance, self).setup()
        self.benchmark_name = "pvc_snaspshot_performance"
        self.uuid = uuid4().hex
        self.crd_data = {
            "spec": {
                "test_user": "******",
                "clustername": "test_cluster",
                "elasticsearch": {
                    "server":
                    config.PERF.get("production_es_server"),
                    "port":
                    config.PERF.get("production_es_port"),
                    "url":
                    f"http://{config.PERF.get('production_es_server')}:{config.PERF.get('production_es_port')}",
                },
            }
        }
        # during development use the dev ES so the data in the Production ES will be clean.
        if self.dev_mode:
            self.crd_data["spec"]["elasticsearch"] = {
                "server":
                config.PERF.get("dev_es_server"),
                "port":
                config.PERF.get("dev_es_port"),
                "url":
                f"http://{config.PERF.get('dev_es_server')}:{config.PERF.get('dev_es_port')}",
            }

    def init_full_results(self, full_results):
        """
        Initialize the full results object which will send to the ES server

        Args:
            full_results (obj): an empty ResultsAnalyse object

        Returns:
            ResultsAnalyse (obj): the input object filled with data

        """
        for key in self.environment:
            full_results.add_key(key, self.environment[key])
        full_results.add_key("index", full_results.new_index)
        return full_results

    def measure_create_snapshot_time(self,
                                     pvc_name,
                                     snap_name,
                                     namespace,
                                     interface,
                                     start_time=None):
        """
        Creation volume snapshot, and measure the creation time

        Args:
            pvc_name (str): the PVC name to create a snapshot of
            snap_name (str): the name of the snapshot to be created
            interface (str): the interface (rbd / cephfs) to used

        Returns:
            int : the snapshot creation time in seconds

        """

        # Find the snapshot yaml according to the interface
        snap_yaml = constants.CSI_RBD_SNAPSHOT_YAML
        if interface == constants.CEPHFILESYSTEM:
            snap_yaml = constants.CSI_CEPHFS_SNAPSHOT_YAML

        # Create the Snapshot of the PVC
        self.snap_obj = pvc.create_pvc_snapshot(
            pvc_name=pvc_name,
            snap_yaml=snap_yaml,
            snap_name=snap_name,
            namespace=namespace,
            sc_name=helpers.default_volumesnapshotclass(interface).name,
        )

        # Wait until the snapshot is bound and ready to use
        self.snap_obj.ocp.wait_for_resource(
            condition="true",
            resource_name=self.snap_obj.name,
            column=constants.STATUS_READYTOUSE,
            timeout=600,
        )

        # Getting the snapshot content name
        self.snap_content = helpers.get_snapshot_content_obj(self.snap_obj)
        self.snap_uid = (self.snap_content.data.get("spec").get(
            "volumeSnapshotRef").get("uid"))
        log.info(f"The snapshot UID is :{self.snap_uid}")

        # Measure the snapshot creation time
        c_time = performance_lib.measure_total_snapshot_creation_time(
            snap_name, start_time)

        return c_time

    @pytest.mark.parametrize(
        argnames=["pvc_size"],
        argvalues=[
            pytest.param(*["1"]),
            pytest.param(*["10"]),
            pytest.param(*["100"])
        ],
    )
    @pytest.mark.usefixtures(base_setup.__name__)
    def test_pvc_snapshot_performance(self, pvc_size):
        """
        1. Run I/O on a pod file
        2. Calculate md5sum of the file
        3. Take a snapshot of the PVC
        4. Measure the total snapshot creation time and the CSI snapshot creation time
        4. Restore From the snapshot and measure the time
        5. Attach a new pod to it
        6. Verify that the file is present on the new pod also
        7. Verify that the md5sum of the file on the new pod matches
           with the md5sum of the file on the original pod

        This scenario run 3 times and report all the average results of the 3 runs
        and will send them to the ES
        Args:
            pvc_size: the size of the PVC to be tested - parametrize

        """

        # Getting the total Storage capacity
        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()

        log.info(f"Total capacity size is : {ceph_capacity}")
        log.info(f"PVC Size is : {pvc_size}")
        log.info(f"Needed capacity is {int(int(pvc_size) * 5)}")
        if int(ceph_capacity) < int(pvc_size) * 5:
            log.error(
                f"PVC size is {pvc_size}GiB and it is too large for this system"
                f" which have only {ceph_capacity}GiB")
            return
        # Calculating the file size as 25% of the PVC size
        # in the end the PVC will be 75% full
        filesize = self.pvc_obj.size * 0.25
        # Change the file size to MB and from int to str
        file_size = f"{int(filesize * 1024)}M"

        all_results = []

        self.results_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf",
            ))
        self.full_results.add_key("pvc_size", pvc_size + " GiB")
        self.full_results.add_key("interface", self.sc)
        self.full_results.all_results["creation_time"] = []
        self.full_results.all_results["csi_creation_time"] = []
        self.full_results.all_results["creation_speed"] = []
        self.full_results.all_results["restore_time"] = []
        self.full_results.all_results["restore_speed"] = []
        self.full_results.all_results["restore_csi_time"] = []
        for test_num in range(self.tests_numbers):
            test_results = {
                "test_num": test_num + 1,
                "dataset": (test_num + 1) * filesize * 1024,  # size in MiB
                "create": {
                    "time": None,
                    "csi_time": None,
                    "speed": None
                },
                "restore": {
                    "time": None,
                    "speed": None
                },
            }
            log.info(f"Starting test phase number {test_num}")
            # Step 1. Run I/O on a pod file.
            file_name = f"{self.pod_object.name}-{test_num}"
            log.info(f"Starting IO on the POD {self.pod_object.name}")
            # Going to run only write IO to fill the PVC for the snapshot
            self.pod_object.fillup_fs(size=file_size, fio_filename=file_name)

            # Wait for fio to finish
            fio_result = self.pod_object.get_fio_results()
            err_count = fio_result.get("jobs")[0].get("error")
            assert (
                err_count == 0
            ), f"IO error on pod {self.pod_object.name}. FIO result: {fio_result}"
            log.info("IO on the PVC Finished")

            # Verify presence of the file
            file_path = pod.get_file_path(self.pod_object, file_name)
            log.info(f"Actual file path on the pod {file_path}")
            assert pod.check_file_existence(
                self.pod_object, file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {self.pod_object.name}")

            # Step 2. Calculate md5sum of the file.
            orig_md5_sum = pod.cal_md5sum(self.pod_object, file_name)

            # Step 3. Take a snapshot of the PVC and measure the time of creation.
            snap_name = self.pvc_obj.name.replace("pvc-test",
                                                  f"snapshot-test{test_num}")
            log.info(f"Taking snapshot of the PVC {snap_name}")

            start_time = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%SZ")

            test_results["create"]["time"] = self.measure_create_snapshot_time(
                pvc_name=self.pvc_obj.name,
                snap_name=snap_name,
                namespace=self.pod_object.namespace,
                interface=self.interface,
                start_time=start_time,
            )

            test_results["create"][
                "csi_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=self.interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time,
                )

            test_results["create"]["speed"] = int(
                test_results["dataset"] / test_results["create"]["time"])
            log.info(
                f' Test {test_num} dataset is {test_results["dataset"]} MiB')
            log.info(
                f"Snapshot name {snap_name} and id {self.snap_uid} creation time is"
                f' : {test_results["create"]["time"]} sec.')
            log.info(
                f"Snapshot name {snap_name} and id {self.snap_uid} csi creation time is"
                f' : {test_results["create"]["csi_time"]} sec.')
            log.info(
                f'Snapshot speed is : {test_results["create"]["speed"]} MB/sec'
            )

            # Step 4. Restore the PVC from the snapshot and measure the time
            # Same Storage class of the original PVC
            sc_name = self.pvc_obj.backed_sc

            # Size should be same as of the original PVC
            pvc_size = str(self.pvc_obj.size) + "Gi"

            # Create pvc out of the snapshot
            # Both, the snapshot and the restore PVC should be in same namespace

            log.info("Restoring from the Snapshot")
            restore_pvc_name = self.pvc_obj.name.replace(
                "pvc-test", f"restore-pvc{test_num}")
            restore_pvc_yaml = constants.CSI_RBD_PVC_RESTORE_YAML
            if self.interface == constants.CEPHFILESYSTEM:
                restore_pvc_yaml = constants.CSI_CEPHFS_PVC_RESTORE_YAML

            csi_start_time = self.get_time("csi")
            log.info("Restoring the PVC from Snapshot")
            restore_pvc_obj = pvc.create_restore_pvc(
                sc_name=sc_name,
                snap_name=self.snap_obj.name,
                namespace=self.snap_obj.namespace,
                size=pvc_size,
                pvc_name=restore_pvc_name,
                restore_pvc_yaml=restore_pvc_yaml,
            )
            helpers.wait_for_resource_state(
                restore_pvc_obj,
                constants.STATUS_BOUND,
                timeout=3600  # setting this to 60 Min.
                # since it can be take long time to restore, and we want it to finished.
            )
            restore_pvc_obj.reload()
            log.info("PVC was restored from the snapshot")
            test_results["restore"][
                "time"] = helpers.measure_pvc_creation_time(
                    self.interface, restore_pvc_obj.name)

            test_results["restore"]["speed"] = int(
                test_results["dataset"] / test_results["restore"]["time"])
            log.info(
                f'Snapshot restore time is : {test_results["restore"]["time"]}'
            )
            log.info(
                f'restore speed is : {test_results["restore"]["speed"]} MB/sec'
            )

            test_results["restore"][
                "csi_time"] = performance_lib.csi_pvc_time_measure(
                    self.interface, restore_pvc_obj, "create", csi_start_time)
            log.info(
                f'Snapshot csi restore time is : {test_results["restore"]["csi_time"]}'
            )

            # Step 5. Attach a new pod to the restored PVC
            restore_pod_object = helpers.create_pod(
                interface_type=self.interface,
                pvc_name=restore_pvc_obj.name,
                namespace=self.snap_obj.namespace,
            )

            # Confirm that the pod is running
            helpers.wait_for_resource_state(resource=restore_pod_object,
                                            state=constants.STATUS_RUNNING)
            restore_pod_object.reload()

            # Step 6. Verify that the file is present on the new pod also.
            log.info(f"Checking the existence of {file_name} "
                     f"on restore pod {restore_pod_object.name}")
            assert pod.check_file_existence(
                restore_pod_object,
                file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {restore_pod_object.name}")

            # Step 7. Verify that the md5sum matches
            log.info(
                f"Verifying that md5sum of {file_name} "
                f"on pod {self.pod_object.name} matches with md5sum "
                f"of the same file on restore pod {restore_pod_object.name}")
            assert pod.verify_data_integrity(
                restore_pod_object, file_name,
                orig_md5_sum), "Data integrity check failed"
            log.info("Data integrity check passed, md5sum are same")

            restore_pod_object.delete()
            restore_pvc_obj.delete()

            all_results.append(test_results)

        # clean the enviroment
        self.pod_object.delete()
        self.pvc_obj.delete()
        self.delete_test_project()

        # logging the test summary, all info in one place for easy log reading
        c_speed, c_runtime, c_csi_runtime, r_speed, r_runtime, r_csi_runtime = (
            0 for i in range(6))

        log.info("Test summary :")
        for tst in all_results:
            c_speed += tst["create"]["speed"]
            c_runtime += tst["create"]["time"]
            c_csi_runtime += tst["create"]["csi_time"]
            r_speed += tst["restore"]["speed"]
            r_runtime += tst["restore"]["time"]
            r_csi_runtime += tst["restore"]["csi_time"]

            self.full_results.all_results["creation_time"].append(
                tst["create"]["time"])
            self.full_results.all_results["csi_creation_time"].append(
                tst["create"]["csi_time"])
            self.full_results.all_results["creation_speed"].append(
                tst["create"]["speed"])
            self.full_results.all_results["restore_time"].append(
                tst["restore"]["time"])
            self.full_results.all_results["restore_speed"].append(
                tst["restore"]["speed"])
            self.full_results.all_results["restore_csi_time"].append(
                tst["restore"]["csi_time"])
            self.full_results.all_results["dataset_inMiB"] = tst["dataset"]
            log.info(
                f"Test {tst['test_num']} results : dataset is {tst['dataset']} MiB. "
                f"Take snapshot time is {tst['create']['time']} "
                f"at {tst['create']['speed']} MiB/Sec "
                f"Restore from snapshot time is {tst['restore']['time']} "
                f"at {tst['restore']['speed']} MiB/Sec ")

        avg_snap_c_time = c_runtime / self.tests_numbers
        avg_snap_csi_c_time = c_csi_runtime / self.tests_numbers
        avg_snap_c_speed = c_speed / self.tests_numbers
        avg_snap_r_time = r_runtime / self.tests_numbers
        avg_snap_r_speed = r_speed / self.tests_numbers
        avg_snap_r_csi_time = r_csi_runtime / self.tests_numbers
        log.info(f" Average snapshot creation time is {avg_snap_c_time} sec.")
        log.info(
            f" Average csi snapshot creation time is {avg_snap_csi_c_time} sec."
        )
        log.info(
            f" Average snapshot creation speed is {avg_snap_c_speed} MiB/sec")
        log.info(f" Average snapshot restore time is {avg_snap_r_time} sec.")
        log.info(
            f" Average snapshot restore speed is {avg_snap_r_speed} MiB/sec")
        log.info(
            f" Average snapshot restore csi time is {avg_snap_r_csi_time} sec."
        )

        self.full_results.add_key("avg_snap_creation_time_insecs",
                                  avg_snap_c_time)
        self.full_results.add_key("avg_snap_csi_creation_time_insecs",
                                  avg_snap_csi_c_time)
        self.full_results.add_key("avg_snap_creation_speed", avg_snap_c_speed)
        self.full_results.add_key("avg_snap_restore_time_insecs",
                                  avg_snap_r_time)
        self.full_results.add_key("avg_snap_restore_speed", avg_snap_r_speed)
        self.full_results.add_key("avg_snap_restore_csi_time_insecs",
                                  avg_snap_r_csi_time)

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        if self.full_results.es_write():
            res_link = self.full_results.results_link()

            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            self.write_result_to_file(res_link)

    @pytest.mark.parametrize(
        argnames=["file_size", "files", "threads", "interface"],
        argvalues=[
            pytest.param(
                *[32, 125000, 8, constants.CEPHBLOCKPOOL],
                marks=[pytest.mark.polarion_id("OCS-2624")],
            ),
            pytest.param(
                *[32, 125000, 8, constants.CEPHFILESYSTEM],
                marks=[pytest.mark.polarion_id("OCS-2625")],
            ),
        ],
    )
    def test_pvc_snapshot_performance_multiple_files(self, file_size, files,
                                                     threads, interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Deploying elastic-search server in the cluster for use by the
        # SmallFiles workload, since it is mandatory for the workload.
        # This is deployed once for all test iterations and will be deleted
        # in the end of the test.
        self.es = ElasticSearch()

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        sf_data["spec"]["elasticsearch"] = {
            "url": f"http://{self.es.get_ip()}:{self.es.get_port()}"
        }
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the benchmark-operator, so we can use the SmallFiles workload
        # to fill up the volume with files, and switch to the benchmark-operator namespace.
        log.info("Deploy the benchmark-operator")
        self.deploy_benchmark_operator()
        switch_to_project(BMO_NAME)

        all_results = []

        self.results_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf_multiple_files",
            ))
        self.full_results.add_key("file_size_inKB", file_size)
        self.full_results.add_key("threads", threads)
        self.full_results.add_key("interface", interface)
        for test_num in range(self.tests_numbers):

            test_results = {"creation_time": None, "csi_creation_time": None}

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    BMO_NAME,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=BMO_NAME)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            # Initialize the pvc_name variable so it will not be in loop scope only.
            pvc_name = ""
            for item in bench_pod.get()["items"]:
                if item.get("metadata").get("name") == small_file_client_pod:
                    for volume in item.get("spec").get("volumes"):
                        if "persistentVolumeClaim" in volume:
                            pvc_name = volume["persistentVolumeClaim"][
                                "claimName"]
                            break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 1M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")

            # Taking snapshot of the PVC (which contain files)
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")

            start_time = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%SZ")

            test_results["creation_time"] = self.measure_create_snapshot_time(
                pvc_name=pvc_name,
                snap_name=snap_name,
                namespace=BMO_NAME,
                interface=interface,
                start_time=start_time,
            )
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is"
                f' {test_results["creation_time"]} seconds')

            test_results[
                "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time)
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is"
                f' {test_results["csi_creation_time"]} seconds')

            all_results.append(test_results)

            # Delete the smallfile workload - which will delete also the PVC
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

            # Sleep for 1 Min. between test samples
            time.sleep(60)

        # Cleanup the elasticsearch instance.
        log.info("Deleting the elastic-search instance")
        self.es.cleanup()

        creation_times = [t["creation_time"] for t in all_results]
        avg_c_time = statistics.mean(creation_times)
        csi_creation_times = [t["csi_creation_time"] for t in all_results]
        avg_csi_c_time = statistics.mean(csi_creation_times)

        t_dateset = int(data_set / 3)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot creation results are {creation_times} seconds")
        log.info(
            f"The average snapshot creation time is : {avg_c_time} seconds")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot csi creation results are {csi_creation_times}")
        log.info(
            f"The average csi snapshot creation time is : {avg_csi_c_time}")

        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {t_dateset} GiB")

        self.full_results.add_key("avg_snapshot_creation_time_insecs",
                                  avg_c_time)
        self.full_results.all_results["total_files"] = total_files
        self.full_results.all_results["total_dataset"] = t_dateset
        self.full_results.all_results["creation_time"] = creation_times
        self.full_results.all_results["csi_creation_time"] = csi_creation_times

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        if self.full_results.es_write():
            res_link = self.full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtest
            self.write_result_to_file(res_link)

    def test_pvc_snapshot_performance_results(self):
        """
        This is not a test - it is only check that previous tests ran and finished as expected
        and reporting the full results (links in the ES) of previous tests (6 + 2)
        """

        workloads = [
            {
                "name": "test_pvc_snapshot_performance",
                "tests": 6,
                "test_name": "PVC Snapshot",
            },
            {
                "name": "test_pvc_snapshot_performance_multiple_files",
                "tests": 2,
                "test_name": "PVC Snapshot - Multiple Files",
            },
        ]
        for wl in workloads:
            self.number_of_tests = wl["tests"]
            self.results_path = get_full_test_logs_path(cname=self,
                                                        fname=wl["name"])
            self.results_file = os.path.join(self.results_path,
                                             "all_results.txt")
            log.info(
                f"Check results for [{wl['name']}] in : {self.results_file}")
            self.check_tests_results()
            self.push_to_dashboard(test_name=wl["test_name"])
Ejemplo n.º 12
0
class TestPvcSnapshotPerformance(PASTest):
    """
    Tests to verify PVC snapshot creation and deletion performance
    """

    tests_numbers = 3  # number of tests to run

    @pytest.fixture()
    def base_setup(
        self,
        request,
        interface_iterate,
        storageclass_factory,
        pvc_factory,
        pod_factory,
        pvc_size,
    ):
        """
        A setup phase for the test - creating resources

        Args:
            interface_iterate: A fixture to iterate over ceph interfaces
            storageclass_factory: A fixture to create everything needed for a
                storageclass
            pvc_factory: A fixture to create new pvc
            pod_factory: A fixture to create new pod
            pvc_size: The size of the PVC in Gi

        """
        self.interface = interface_iterate
        self.sc_obj = storageclass_factory(self.interface)

        self.pvc_obj = pvc_factory(interface=self.interface,
                                   size=pvc_size,
                                   status=constants.STATUS_BOUND)

        self.pod_object = pod_factory(interface=self.interface,
                                      pvc=self.pvc_obj,
                                      status=constants.STATUS_RUNNING)

    def measure_create_snapshot_time(self, pvc_name, snap_name, interface):
        """
        Creation volume snapshot, and measure the creation time

        Args:
            pvc_name (str): the PVC name to create a snapshot of
            snap_name (str): the name of the snapshot to be created
            interface (str): the interface (rbd / cephfs) to used

        Returns:
            int : the snapshot creation time in seconds

        """

        # Find the snapshot yaml according to the interface
        snap_yaml = constants.CSI_RBD_SNAPSHOT_YAML
        if interface == constants.CEPHFILESYSTEM:
            snap_yaml = constants.CSI_CEPHFS_SNAPSHOT_YAML

        # Create the Snapshot of the PVC
        self.snap_obj = pvc.create_pvc_snapshot(
            pvc_name=pvc_name,
            snap_yaml=snap_yaml,
            snap_name=snap_name,
            sc_name=helpers.default_volumesnapshotclass(interface).name,
        )

        # Wait until the snapshot is bound and ready to use
        self.snap_obj.ocp.wait_for_resource(
            condition="true",
            resource_name=self.snap_obj.name,
            column=constants.STATUS_READYTOUSE,
            timeout=600,
        )

        # Getting the snapshot content name
        self.snap_content = helpers.get_snapshot_content_obj(self.snap_obj)
        self.snap_uid = (self.snap_content.data.get("spec").get(
            "volumeSnapshotRef").get("uid"))
        log.info(f"The snapshot UID is :{self.snap_uid}")

        # Measure the snapshot creation time
        c_time = helpers.measure_snapshot_creation_time(
            interface, snap_name, self.snap_content.name, self.snap_uid)
        return c_time

    @pytest.mark.parametrize(
        argnames=["pvc_size"],
        argvalues=[
            pytest.param(*["1"]),
            pytest.param(*["10"]),
            pytest.param(*["100"])
        ],
    )
    @pytest.mark.usefixtures(base_setup.__name__)
    def test_pvc_snapshot_performance(self, teardown_factory, pvc_size):
        """
        1. Run I/O on a pod file.
        2. Calculate md5sum of the file.
        3. Take a snapshot of the PVC and measure the time of creation.
        4. Restore From the snapshot and measure the time
        5. Attach a new pod to it.
        6. Verify that the file is present on the new pod also.
        7. Verify that the md5sum of the file on the new pod matches
           with the md5sum of the file on the original pod.

        This scenario run 3 times and report all results
        Args:
            teardown_factory: A fixture to destroy objects
            pvc_size: the size of the PVC to be tested - parametrize

        """

        # Getting the total Storage capacity
        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()

        log.info(f"Total capacity size is : {ceph_capacity}")
        log.info(f"PVC Size is : {pvc_size}")
        log.info(f"Needed capacity is {int(int(pvc_size) * 5)}")
        if int(ceph_capacity) < int(pvc_size) * 5:
            log.error(
                f"PVC size is {pvc_size}GiB and it is too large for this system"
                f" which have only {ceph_capacity}GiB")
            return
        # Calculating the file size as 25% of the PVC size
        # in the end the PVC will be 75% full
        filesize = self.pvc_obj.size * 0.25
        # Change the file size to MB and from int to str
        file_size = f"{int(filesize * 1024)}M"

        all_results = []

        for test_num in range(self.tests_numbers):
            test_results = {
                "test_num": test_num + 1,
                "dataset": (test_num + 1) * filesize * 1024,  # size in MiB
                "create": {
                    "time": None,
                    "speed": None
                },
                "restore": {
                    "time": None,
                    "speed": None
                },
            }
            log.info(f"Starting test phase number {test_num}")
            # Step 1. Run I/O on a pod file.
            file_name = f"{self.pod_object.name}-{test_num}"
            log.info(f"Starting IO on the POD {self.pod_object.name}")
            # Going to run only write IO to fill the PVC for the snapshot
            self.pod_object.fillup_fs(size=file_size, fio_filename=file_name)

            # Wait for fio to finish
            fio_result = self.pod_object.get_fio_results()
            err_count = fio_result.get("jobs")[0].get("error")
            assert (
                err_count == 0
            ), f"IO error on pod {self.pod_object.name}. FIO result: {fio_result}"
            log.info("IO on the PVC Finished")

            # Verify presence of the file
            file_path = pod.get_file_path(self.pod_object, file_name)
            log.info(f"Actual file path on the pod {file_path}")
            assert pod.check_file_existence(
                self.pod_object, file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {self.pod_object.name}")

            # Step 2. Calculate md5sum of the file.
            orig_md5_sum = pod.cal_md5sum(self.pod_object, file_name)

            # Step 3. Take a snapshot of the PVC and measure the time of creation.
            snap_name = self.pvc_obj.name.replace("pvc-test",
                                                  f"snapshot-test{test_num}")
            log.info(f"Taking snapshot of the PVC {snap_name}")

            test_results["create"]["time"] = self.measure_create_snapshot_time(
                pvc_name=self.pvc_obj.name,
                snap_name=snap_name,
                interface=self.interface,
            )
            test_results["create"]["speed"] = int(
                test_results["dataset"] / test_results["create"]["time"])
            log.info(
                f' Test {test_num} dataset is {test_results["dataset"]} MiB')
            log.info(
                f'Snapshot creation time is : {test_results["create"]["time"]} sec.'
            )
            log.info(
                f'Snapshot speed is : {test_results["create"]["speed"]} MB/sec'
            )

            # Step 4. Restore the PVC from the snapshot and measure the time
            # Same Storage class of the original PVC
            sc_name = self.pvc_obj.backed_sc

            # Size should be same as of the original PVC
            pvc_size = str(self.pvc_obj.size) + "Gi"

            # Create pvc out of the snapshot
            # Both, the snapshot and the restore PVC should be in same namespace

            log.info("Restoring from the Snapshot")
            restore_pvc_name = self.pvc_obj.name.replace(
                "pvc-test", f"restore-pvc{test_num}")
            restore_pvc_yaml = constants.CSI_RBD_PVC_RESTORE_YAML
            if self.interface == constants.CEPHFILESYSTEM:
                restore_pvc_yaml = constants.CSI_CEPHFS_PVC_RESTORE_YAML

            log.info("Resorting the PVC from Snapshot")
            restore_pvc_obj = pvc.create_restore_pvc(
                sc_name=sc_name,
                snap_name=self.snap_obj.name,
                namespace=self.snap_obj.namespace,
                size=pvc_size,
                pvc_name=restore_pvc_name,
                restore_pvc_yaml=restore_pvc_yaml,
            )
            helpers.wait_for_resource_state(
                restore_pvc_obj,
                constants.STATUS_BOUND,
                timeout=3600  # setting this to 60 Min.
                # since it can be take long time to restore, and we want it to finished.
            )
            teardown_factory(restore_pvc_obj)
            restore_pvc_obj.reload()
            log.info("PVC was restored from the snapshot")
            test_results["restore"][
                "time"] = helpers.measure_pvc_creation_time(
                    self.interface, restore_pvc_obj.name)
            test_results["restore"]["speed"] = int(
                test_results["dataset"] / test_results["restore"]["time"])
            log.info(
                f'Snapshot restore time is : {test_results["restore"]["time"]}'
            )
            log.info(
                f'restore sped is : {test_results["restore"]["speed"]} MB/sec')

            # Step 5. Attach a new pod to the restored PVC
            restore_pod_object = helpers.create_pod(
                interface_type=self.interface,
                pvc_name=restore_pvc_obj.name,
                namespace=self.snap_obj.namespace,
                pod_dict_path=constants.NGINX_POD_YAML,
            )

            # Confirm that the pod is running
            helpers.wait_for_resource_state(resource=restore_pod_object,
                                            state=constants.STATUS_RUNNING)
            teardown_factory(restore_pod_object)
            restore_pod_object.reload()

            # Step 6. Verify that the file is present on the new pod also.
            log.info(f"Checking the existence of {file_name} "
                     f"on restore pod {restore_pod_object.name}")
            assert pod.check_file_existence(
                restore_pod_object,
                file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {restore_pod_object.name}")

            # Step 7. Verify that the md5sum matches
            log.info(
                f"Verifying that md5sum of {file_name} "
                f"on pod {self.pod_object.name} matches with md5sum "
                f"of the same file on restore pod {restore_pod_object.name}")
            assert pod.verify_data_integrity(
                restore_pod_object, file_name,
                orig_md5_sum), "Data integrity check failed"
            log.info("Data integrity check passed, md5sum are same")

            all_results.append(test_results)

        # logging the test summery, all info in one place for easy log reading
        c_speed, c_runtime, r_speed, r_runtime = (0 for i in range(4))
        log.info("Test summery :")
        for tst in all_results:
            c_speed += tst["create"]["speed"]
            c_runtime += tst["create"]["time"]
            r_speed += tst["restore"]["speed"]
            r_runtime += tst["restore"]["time"]
            log.info(
                f"Test {tst['test_num']} results : dataset is {tst['dataset']} MiB. "
                f"Take snapshot time is {tst['create']['time']} "
                f"at {tst['create']['speed']} MiB/Sec "
                f"Restore from snapshot time is {tst['restore']['time']} "
                f"at {tst['restore']['speed']} MiB/Sec ")
        log.info(
            f" Average snapshot creation time is {c_runtime / self.tests_numbers} sec."
        )
        log.info(
            f" Average snapshot creation speed is {c_speed / self.tests_numbers} MiB/sec"
        )
        log.info(
            f" Average snapshot restore time is {r_runtime / self.tests_numbers} sec."
        )
        log.info(
            f" Average snapshot restore speed is {r_speed / self.tests_numbers} MiB/sec"
        )

    @pytest.mark.parametrize(
        argnames=["file_size", "files", "threads", "interface"],
        argvalues=[
            pytest.param(
                *[32, 125000, 8, constants.CEPHBLOCKPOOL],
                marks=[pytest.mark.polarion_id("OCS-2624")],
            ),
            pytest.param(
                *[32, 125000, 8, constants.CEPHFILESYSTEM],
                marks=[pytest.mark.polarion_id("OCS-2625")],
            ),
        ],
    )
    def test_pvc_snapshot_performance_multiple_files(self, file_size, files,
                                                     threads, interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Deploying elastic-search server in the cluster for use by the
        # SmallFiles workload, since it is mandatory for the workload.
        # This is deployed once for all test iterations and will be deleted
        # in the end of the test.
        self.es = ElasticSearch()

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        sf_data["spec"]["elasticsearch"] = {
            "url": f"http://{self.es.get_ip()}:{self.es.get_port()}"
        }
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the benchmark-operator, so we can use the SmallFiles workload
        # to fill up the volume with files, and switch to the benchmark-operator namespace.
        log.info("Deploy the benchmark-operator")
        self.deploy_benchmark_operator()
        switch_to_project(BMO_NAME)

        all_results = []

        for test_num in range(self.tests_numbers):

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    BMO_NAME,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=BMO_NAME)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            # Initialize the pvc_name variable so it will not be in loop scope only.
            pvc_name = ""
            for item in bench_pod.get()["items"]:
                if item.get("metadata").get("name") == small_file_client_pod:
                    for volume in item.get("spec").get("volumes"):
                        if "persistentVolumeClaim" in volume:
                            pvc_name = volume["persistentVolumeClaim"][
                                "claimName"]
                            break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 1M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")

            # Taking snapshot of the PVC (which contain files)
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")
            creation_time = self.measure_create_snapshot_time(
                pvc_name=pvc_name, snap_name=snap_name, interface=interface)
            log.info(f"Snapshot creation time is {creation_time} seconds")
            all_results.append(creation_time)

            # Delete the smallfile workload - which will delete also the PVC
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

            # Sleep for 1 Min. between test samples
            time.sleep(60)

        # Cleanup the elasticsearch instance.
        log.info("Deleting the elastic-search instance")
        self.es.cleanup()

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All results are {all_results}")
        log.info(
            f"The average creation time is : {statistics.mean(all_results)}")
        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {int(data_set / 3)} GiB")
class TestSmallFileWorkloadScale(E2ETest):
    """
    Deploy benchmark operator and run different scale tests.
    Call common small files workload routine to run SmallFile workload
    """

    def setup(self):
        """
        Initialize the test environment

        """
        # Deploy internal ES server - not need to keep results,
        # so don't use production ES
        self.es = ElasticSearch()

        # Initial the Small Files workload, based on benchmark-operator
        self.small_files = SmallFiles(self.es)

        self.ceph_cluster = CephCluster()

        # Get the total storage capacity
        self.ceph_capacity = self.ceph_cluster.get_ceph_capacity()
        log.info(f"Total storage capacity is {self.ceph_capacity:,.2f} GiB")

        # Collect the pulls usage before the test is starting
        self.orig_data = self.get_cephfs_data()

    def teardown(self):
        """
        Teardown the test environment

        """
        self.small_files.cleanup()
        self.es.cleanup()

    def get_cephfs_data(self):
        """
        Look through ceph pods and find space usage on all ceph filesystem pods

        Returns:
            Dictionary of byte usage, indexed by pod name.
        """
        ceph_status = self.ceph_cluster.toolbox.exec_ceph_cmd(ceph_cmd="ceph df")
        ret_value = {}
        for pool in ceph_status["pools"]:
            # Only the data pool is in our interest (not metadata)
            if "cephfilesystem" in pool["name"]:
                ret_value[pool["name"]] = pool["stats"]["bytes_used"]
        return ret_value

    def display_ceph_usage(self, msg, data):
        """
        Display the pool usage in a pretty way

        Args:
            msg (str): the message string to display with the values
            data (dict): dictionary of pools -> capacity (in bytes)

        """
        log.info(f"The pools usage {msg} is :")
        for entry in data:
            log.info(f"{entry} now uses {data[entry]:,} bytes")

    @pytest.mark.parametrize(
        argnames=["file_size", "files", "threads", "interface"],
        argvalues=[
            # 500K Files, ~4GB
            pytest.param(*[8, 125000, 4, constants.CEPHFILESYSTEM]),
            # 5M Files, ~152GB
            pytest.param(*[32, 1250000, 4, constants.CEPHFILESYSTEM]),
        ],
    )
    def test_scale_smallfile_workload(self, file_size, files, threads, interface):
        # updating the benchmark parameters
        self.small_files.setup_storageclass(interface)
        self.small_files.setup_test_params(file_size, files, threads, 1)

        # Verify we have enough storage capacity to run the test.
        self.small_files.setup_vol_size(file_size, files, threads, self.ceph_capacity)

        # Run the benchmark to create files on the volume
        self.small_files.setup_operations("create")
        self.small_files.run()

        # Collect pools usage after creation is done.
        self.run_data = self.get_cephfs_data()

        # Delete the benchmark data
        self.small_files.delete()

        # Getting the usage capacity immediately after deletion
        self.now_data = self.get_cephfs_data()

        # Wait 3 minutes for the backend deletion actually start.
        time.sleep(180)

        # Quarry the storage usage every 2 Min. if no difference between two
        # samples, the backend cleanup is done.
        still_going_down = True
        while still_going_down:
            log.info("Waiting for Ceph to finish cleaning up")
            time.sleep(120)
            self.new_data = self.get_cephfs_data()
            still_going_down = False
            for entry in self.new_data:
                if self.new_data[entry] < self.now_data[entry]:
                    still_going_down = True
                    self.now_data[entry] = self.new_data[entry]

        self.display_ceph_usage("Before ths test", self.orig_data)
        self.display_ceph_usage("After data creation", self.run_data)

        # Make sure that the test actually wrote data to the volume
        # at least 1GiB.
        for entry in self.run_data:
            if re.search("metadata", entry):
                # Since we are interesting in the data written and not the metadata
                # skipping the metadata pool
                continue
            written = self.run_data[entry] - self.orig_data[entry]
            check = written > constants.GB
            errmsg = (
                f"{written:,.2f} bytes was written to {entry} -"
                "This is not enough for the test"
            )
            assert check, errmsg

        self.display_ceph_usage("After data deletion", self.now_data)

        for entry in self.now_data:
            # Leak indicated if over %20 more storage is used and more then 5 GiB.
            try:
                ratio = self.now_data[entry] / self.orig_data[entry]
            except ZeroDivisionError:
                ratio = self.now_data[entry]

            added_data = (self.now_data[entry] - self.orig_data[entry]) / constants.GB
            # in some cases (especially for metadata), it might be that after the
            # test there is less data in the pool than before the test.
            if added_data < 0:
                added_data = 0
                ratio = 1

            log.info(
                "The ratio between capacity before and after the test "
                f"on {entry} is : {ratio:.2f} ; {added_data:,.2f} GiB"
            )

            check = (ratio < 1.20) or (added_data < 3)
            errmsg = f"{entry} is over 20% (or 3 GiB) larger [{ratio} ; {added_data}]-- possible leak"
            assert check, errmsg
    def test_pvc_snapshot_performance_multiple_files(self, file_size, files,
                                                     threads, interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        # Deploying elastic-search server in the cluster for use by the
        # SmallFiles workload, since it is mandatory for the workload.
        # This is deployed once for all test iterations and will be deleted
        # in the end of the test.
        if config.PERF.get("deploy_internal_es"):
            self.es = ElasticSearch()
            sf_data["spec"]["elasticsearch"] = {
                "url": f"http://{self.es.get_ip()}:{self.es.get_port()}"
            }
        else:
            if config.PERF.get("internal_es_server") == "":
                self.es = None
                return
            else:
                self.es = {
                    "server":
                    config.PERF.get("internal_es_server"),
                    "port":
                    config.PERF.get("internal_es_port"),
                    "url":
                    f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
                }
                # verify that the connection to the elasticsearch server is OK
                if not super(TestPvcSnapshotPerformance, self).es_connect():
                    self.es = None
                    log.error(
                        "ElasticSearch doesn't exist ! The test cannot run")
                    return
                sf_data["spec"]["elasticsearch"] = {"url": self.es["url"]}

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the benchmark-operator, so we can use the SmallFiles workload
        # to fill up the volume with files, and switch to the benchmark-operator namespace.
        log.info("Deploy the benchmark-operator")
        self.deploy_benchmark_operator()
        switch_to_project(BMO_NAME)

        all_results = []

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf_multiple_files",
            ))
        self.full_results.add_key("file_size_inKB", file_size)
        self.full_results.add_key("threads", threads)
        self.full_results.add_key("interface", interface)
        for test_num in range(self.tests_numbers):

            test_results = {"creation_time": None, "csi_creation_time": None}

            # deploy the smallfile workload
            self.crd_data = sf_data
            self.client_pod_name = "smallfile-client"
            self.deploy_and_wait_for_wl_to_start(timeout=240)
            # Initialize the pvc_name variable so it will not be in loop scope only.
            pvc_name = (OCP(kind="pvc", namespace=BMO_NAME).get().get("items")
                        [0].get("metadata").get("name"))
            log.info(f"Benchmark PVC name is : {pvc_name}")
            self.wait_for_wl_to_finish(sleep=30)

            # Taking snapshot of the PVC (which contain files)
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")

            start_time = self.get_time("csi")

            test_results["creation_time"] = self.measure_create_snapshot_time(
                pvc_name=pvc_name,
                snap_name=snap_name,
                namespace=BMO_NAME,
                interface=interface,
                start_time=start_time,
            )
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is"
                f' {test_results["creation_time"]} seconds')

            test_results[
                "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time)
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is"
                f' {test_results["csi_creation_time"]} seconds')

            all_results.append(test_results)

            # Delete the smallfile workload - which will delete also the PVC
            log.info("Deleting the smallfile workload")
            if self.benchmark_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

            # Sleep for 1 Min. between test samples
            time.sleep(60)

        # Cleanup the elasticsearch instance, if needed.
        if isinstance(self.es, ElasticSearch):
            log.info("Deleting the elastic-search instance")
            self.es.cleanup()

        creation_times = [t["creation_time"] for t in all_results]
        avg_c_time = statistics.mean(creation_times)
        csi_creation_times = [t["csi_creation_time"] for t in all_results]
        avg_csi_c_time = statistics.mean(csi_creation_times)

        t_dateset = int(data_set / 3)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot creation results are {creation_times} seconds")
        log.info(
            f"The average snapshot creation time is : {avg_c_time} seconds")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot csi creation results are {csi_creation_times}")
        log.info(
            f"The average csi snapshot creation time is : {avg_csi_c_time}")

        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {t_dateset} GiB")

        self.full_results.add_key("avg_snapshot_creation_time_insecs",
                                  avg_c_time)
        self.full_results.all_results["total_files"] = total_files
        self.full_results.all_results["total_dataset"] = t_dateset
        self.full_results.all_results["creation_time"] = creation_times
        self.full_results.all_results["csi_creation_time"] = csi_creation_times

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        self.results_path = helpers.get_full_test_logs_path(cname=self)
        if self.full_results.es_write():
            res_link = self.full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtest
            self.write_result_to_file(res_link)
Ejemplo n.º 15
0
class TestElasticsearch:
    def setup(self):
        self.es = ElasticSearch()

        # Deploy the benchmark operator
        log.info("Apply Operator CRD")
        self.operator = benchmark_operator.BenchmarkOperator()
        self.operator.deploy()

    def teardown(self):
        self.es.cleanup()
        self.operator.cleanup()

    def smallfile_run(self, es):
        """
        Run the smallfiles workload so the elasticsearch server will have some data
        in it for copy

        Args:
            es (Elasticsearch): elastic search object

        Returns:
            str: the UUID of the test

        """

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        # Setting up the parameters for this test
        sf_data["spec"]["elasticsearch"]["server"] = es.get_ip()
        sf_data["spec"]["elasticsearch"]["port"] = es.get_port()
        sf_data["spec"]["elasticsearch"][
            "url"
        ] = f"http://{es.get_ip()}:{es.get_port()}"

        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = 4
        sf_data["spec"]["workload"]["args"]["files"] = 500000
        sf_data["spec"]["workload"]["args"]["threads"] = 4
        sf_data["spec"]["workload"]["args"][
            "storageclass"
        ] = constants.DEFAULT_STORAGECLASS_RBD
        sf_data["spec"]["workload"]["args"]["storagesize"] = "100Gi"

        # deploy the smallfile workload
        log.info("Running SmallFile bench")
        sf_obj = OCS(**sf_data)
        sf_obj.create()

        # wait for benchmark pods to get created - takes a while
        for bench_pod in TimeoutSampler(
            240,
            10,
            get_pod_name_by_pattern,
            "smallfile-client",
            benchmark_operator.BMO_NAME,
        ):
            try:
                if bench_pod[0] is not None:
                    small_file_client_pod = bench_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        bench_pod = OCP(kind="pod", namespace=benchmark_operator.BMO_NAME)
        log.info("Waiting for SmallFile benchmark to Run")
        bench_pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            resource_name=small_file_client_pod,
            sleep=30,
            timeout=600,
        )
        for item in bench_pod.get()["items"][1]["spec"]["volumes"]:
            if "persistentVolumeClaim" in item:
                break
        uuid = self.operator.get_uuid(small_file_client_pod)
        timeout = 600
        while timeout >= 0:
            logs = bench_pod.get_logs(name=small_file_client_pod)
            if "RUN STATUS DONE" in logs:
                break
            timeout -= 30
            if timeout == 0:
                raise TimeoutError("Timed out waiting for benchmark to complete")
            time.sleep(30)
        return uuid

    def test_elasticsearch(self):
        """
        This test only deploy the elasticsearch module, connect to it with and
        without credentials and teardown the environment

        Args:
            es (fixture) : fixture that deploy / teardown the elasticsearch

        """

        full_log_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {full_log_path}")
        log.info("The ElasticSearch deployment test started.")
        if self.es.get_health():
            log.info("The Status of the elasticsearch is OK")
        else:
            log.warning("The Status of the elasticsearch is Not OK")
            log.info("Waiting another 30 sec.")
            time.sleep(30)
            if self.es.get_health():
                log.info("The Status of the elasticsearch is OK")
            else:
                log.error("The Status of the elasticsearch is Not OK ! Exiting.")

        if self.es.get_health():
            log.info("\nThe Elastic-Search server information :\n")
            log.info(f"The Elasticsearch IP is {self.es.get_ip()}")
            log.info(f"The Elasticsearch port is {self.es.get_port()}")
            log.info(f"The Password to connect is {self.es.get_password()}")

        else:
            assert False, "The Elasticsearch module is not ready !"

        log.info(f"Test UUDI is : {self.smallfile_run(self.es)}")

        assert self.es.dumping_all_data(full_log_path), "Can not Retrieve the test data"

        assert run_command(
            f"ls {full_log_path}/FullResults.tgz"
        ), "Results file did not retrieve from pod"

        try:
            main_es = Elasticsearch(
                [
                    {
                        "host": defaults.ELASTICSEARCH_DEV_IP,
                        "port": defaults.ELASTICSEARCE_PORT,
                        "url": f"http://{defaults.ELASTICSEARCH_DEV_IP}:{defaults.ELASTICSEARCE_PORT}",
                    }
                ]
            )
        except esexp.ConnectionError:
            log.warning("Cannot connect to ES server in the LocalServer")
            main_es = None
        assert elasticsearch_load(
            main_es, full_log_path
        ), "Can not load data into Main ES server"
Ejemplo n.º 16
0
    def test_elasticsearch(self):
        """
        This test do the following operations:

            * deploy the elasticsearch module
            * connect to it
            * run a simple SmallFile benchmark (to verify usability)
            * dump the results to a file
            * push the results from the file to the Dev. ES.
            * teardown the environment

        """

        log.info("Test with 'Dummy' Storageclass")
        try:
            self.es = ElasticSearch(sc="dummy")
        except ElasticSearchNotDeployed:
            log.info("Raised as expected !")

        log.info("Test with 'Real' Storageclass")
        try:
            self.es = ElasticSearch()
        except ElasticSearchNotDeployed as ex:
            log.error("Raise as expected !")
            raise ex

        full_log_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {full_log_path}")
        log.info("The ElasticSearch deployment test started.")
        if self.es.get_health():
            log.info("The Status of the elasticsearch is OK")
        else:
            log.warning("The Status of the elasticsearch is Not OK")
            log.info("Waiting another 30 sec.")
            time.sleep(30)
            if self.es.get_health():
                log.info("The Status of the elasticsearch is OK")
            else:
                log.error(
                    "The Status of the elasticsearch is Not OK ! Exiting.")

        if self.es.get_health():
            log.info("\nThe Elastic-Search server information :\n")
            log.info(f"The Elasticsearch IP is {self.es.get_ip()}")
            log.info(f"The Elasticsearch port is {self.es.get_port()}")
            log.info(f"The Password to connect is {self.es.get_password()}")

        else:
            assert False, "The Elasticsearch module is not ready !"

        log.info(f"Test UUDI is : {self.smallfile_run(self.es)}")

        assert self.es.dumping_all_data(
            full_log_path), "Can not Retrieve the test data"

        assert run_command(f"ls {full_log_path}/FullResults.tgz"
                           ), "Results file did not retrieve from pod"

        # Try to use the development ES server for testing the elasticsearch_load
        # function to push data into ES server
        try:
            main_es = Elasticsearch([{
                "host":
                defaults.ELASTICSEARCH_DEV_IP,
                "port":
                defaults.ELASTICSEARCE_PORT,
                "url":
                f"http://{defaults.ELASTICSEARCH_DEV_IP}:{defaults.ELASTICSEARCE_PORT}",
            }])
        except esexp.ConnectionError:
            log.warning("Cannot connect to ES server in the LocalServer")
            main_es = None
        assert elasticsearch_load(
            main_es, full_log_path), "Can not load data into Main ES server"