Beispiel #1
0
def log_output_of_job_pods(job_name, namespace):
    """
    Log (via standard logger) output of all pods of given job. Expected to be
    used in case of error, when evidence needs to be captured in logs.

    Args:
        job_name (str): name of the job to wait for
        namespace (str): name of the namespace where the job is running
    """
    job_pods = get_job_pods(
        job_name=job_name,
        namespace=namespace,
        names_only=True,
    )
    ocp_pod = OCP(kind="Pod", namespace=namespace)
    for pod_name in job_pods:
        log.info(
            "fetching output of pod %s of job/%s (see DEBUG logs)",
            pod_name,
            job_name,
        )
        output = ocp_pod.get_logs(pod_name)
        log.debug(output)
Beispiel #2
0
    def smallfile_run(self, es):
        """
        Run the smallfiles workload so the elasticsearch server will have some data
        in it for copy

        Args:
            es (Elasticsearch): elastic search object

        Returns:
            str: the UUID of the test

        """

        ripsaw = RipSaw()

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        # Setting up the parameters for this test
        sf_data["spec"]["elasticsearch"]["server"] = es.get_ip()
        sf_data["spec"]["elasticsearch"]["port"] = es.get_port()

        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = 4
        sf_data["spec"]["workload"]["args"]["files"] = 500000
        sf_data["spec"]["workload"]["args"]["threads"] = 4
        sf_data["spec"]["workload"]["args"][
            "storageclass"] = constants.DEFAULT_STORAGECLASS_RBD
        sf_data["spec"]["workload"]["args"]["storagesize"] = "100Gi"

        # Deploy the ripsaw operator
        log.info("Apply Operator CRD")
        ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml")

        # deploy the smallfile workload
        log.info("Running SmallFile bench")
        sf_obj = OCS(**sf_data)
        sf_obj.create()

        # wait for benchmark pods to get created - takes a while
        for bench_pod in TimeoutSampler(
                240,
                10,
                get_pod_name_by_pattern,
                "smallfile-client",
                constants.RIPSAW_NAMESPACE,
        ):
            try:
                if bench_pod[0] is not None:
                    small_file_client_pod = bench_pod[0]
                    break
            except IndexError:
                log.info("Bench pod not ready yet")

        bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE)
        log.info("Waiting for SmallFile benchmark to Run")
        bench_pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            resource_name=small_file_client_pod,
            sleep=30,
            timeout=600,
        )
        for item in bench_pod.get()["items"][1]["spec"]["volumes"]:
            if "persistentVolumeClaim" in item:
                break
        uuid = ripsaw.get_uuid(small_file_client_pod)
        timeout = 600
        while timeout >= 0:
            logs = bench_pod.get_logs(name=small_file_client_pod)
            if "RUN STATUS DONE" in logs:
                break
            timeout -= 30
            if timeout == 0:
                raise TimeoutError(
                    "Timed out waiting for benchmark to complete")
            time.sleep(30)
        ripsaw.cleanup()
        return uuid
    def test_pvc_snapshot_performance_multiple_files(self, file_size, files,
                                                     threads, interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Deploying elastic-search server in the cluster for use by the
        # SmallFiles workload, since it is mandatory for the workload.
        # This is deployed once for all test iterations and will be deleted
        # in the end of the test.
        self.es = ElasticSearch()

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        sf_data["spec"]["elasticsearch"] = {
            "url": f"http://{self.es.get_ip()}:{self.es.get_port()}"
        }
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the benchmark-operator, so we can use the SmallFiles workload
        # to fill up the volume with files, and switch to the benchmark-operator namespace.
        log.info("Deploy the benchmark-operator")
        self.deploy_benchmark_operator()
        switch_to_project(BMO_NAME)

        all_results = []

        self.results_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf_multiple_files",
            ))
        self.full_results.add_key("file_size_inKB", file_size)
        self.full_results.add_key("threads", threads)
        self.full_results.add_key("interface", interface)
        for test_num in range(self.tests_numbers):

            test_results = {"creation_time": None, "csi_creation_time": None}

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    BMO_NAME,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=BMO_NAME)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            # Initialize the pvc_name variable so it will not be in loop scope only.
            pvc_name = ""
            for item in bench_pod.get()["items"]:
                if item.get("metadata").get("name") == small_file_client_pod:
                    for volume in item.get("spec").get("volumes"):
                        if "persistentVolumeClaim" in volume:
                            pvc_name = volume["persistentVolumeClaim"][
                                "claimName"]
                            break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 1M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")

            # Taking snapshot of the PVC (which contain files)
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")

            start_time = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%SZ")

            test_results["creation_time"] = self.measure_create_snapshot_time(
                pvc_name=pvc_name,
                snap_name=snap_name,
                namespace=BMO_NAME,
                interface=interface,
                start_time=start_time,
            )
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} creation time is"
                f' {test_results["creation_time"]} seconds')

            test_results[
                "csi_creation_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time)
            log.info(
                f"Snapshot with name {snap_name} and id {self.snap_uid} csi creation time is"
                f' {test_results["csi_creation_time"]} seconds')

            all_results.append(test_results)

            # Delete the smallfile workload - which will delete also the PVC
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

            # Sleep for 1 Min. between test samples
            time.sleep(60)

        # Cleanup the elasticsearch instance.
        log.info("Deleting the elastic-search instance")
        self.es.cleanup()

        creation_times = [t["creation_time"] for t in all_results]
        avg_c_time = statistics.mean(creation_times)
        csi_creation_times = [t["csi_creation_time"] for t in all_results]
        avg_csi_c_time = statistics.mean(csi_creation_times)

        t_dateset = int(data_set / 3)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot creation results are {creation_times} seconds")
        log.info(
            f"The average snapshot creation time is : {avg_c_time} seconds")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All snapshot csi creation results are {csi_creation_times}")
        log.info(
            f"The average csi snapshot creation time is : {avg_csi_c_time}")

        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {t_dateset} GiB")

        self.full_results.add_key("avg_snapshot_creation_time_insecs",
                                  avg_c_time)
        self.full_results.all_results["total_files"] = total_files
        self.full_results.all_results["total_dataset"] = t_dateset
        self.full_results.all_results["creation_time"] = creation_times
        self.full_results.all_results["csi_creation_time"] = csi_creation_times

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        if self.full_results.es_write():
            res_link = self.full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtest
            self.write_result_to_file(res_link)
    def test_pvc_snapshot_performance_multiple_files(self, ripsaw, file_size,
                                                     files, threads,
                                                     interface):
        """
        Run SmallFile Workload and the take snapshot.
        test will run with 1M, 2M and 4M of file on the volume - total data set
        is the same for all tests, ~30GiB, and then take snapshot and measure
        the time it takes.
        the test will run 3 time to check consistency.

        Args:
            ripsaw : benchmark operator fixture which will run the workload
            file_size (int): the size of the file to be create - in KiB
            files (int): number of files each thread will create
            threads (int): number of threads will be used in the workload
            interface (str): the volume interface that will be used
                             CephBlockPool / CephFileSystem

        Raises:
            TimeoutError : in case of creation files take too long time
                           more then 2 Hours

        """

        # Loading the main template yaml file for the benchmark and update some
        # fields with new values
        sf_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

        if interface == constants.CEPHBLOCKPOOL:
            storageclass = constants.DEFAULT_STORAGECLASS_RBD
        else:
            storageclass = constants.DEFAULT_STORAGECLASS_CEPHFS
        log.info(f"Using {storageclass} Storageclass")

        # Setting up the parameters for this test
        sf_data["spec"]["workload"]["args"]["samples"] = 1
        sf_data["spec"]["workload"]["args"]["operation"] = ["create"]
        sf_data["spec"]["workload"]["args"]["file_size"] = file_size
        sf_data["spec"]["workload"]["args"]["files"] = files
        sf_data["spec"]["workload"]["args"]["threads"] = threads
        sf_data["spec"]["workload"]["args"]["storageclass"] = storageclass
        del sf_data["spec"]["elasticsearch"]
        """
        Calculating the size of the volume that need to be test, it should
        be at least twice in the size then the size of the files, and at
        least 100Gi.

        Since the file_size is in Kb and the vol_size need to be in Gb, more
        calculation is needed.
        """
        total_files = int(files * threads)
        total_data = int(files * threads * file_size / constants.GB2KB)
        data_set = int(total_data * 3)  # calculate data with replica
        vol_size = data_set if data_set >= 100 else 100
        sf_data["spec"]["workload"]["args"]["storagesize"] = f"{vol_size}Gi"

        environment = get_environment_info()
        if not environment["user"] == "":
            sf_data["spec"]["test_user"] = environment["user"]
        else:
            # since full results object need this parameter, initialize it from CR file
            environment["user"] = sf_data["spec"]["test_user"]

        sf_data["spec"]["clustername"] = environment["clustername"]
        log.debug(f"The smallfile yaml file is {sf_data}")

        # Deploy the ripsaw operator
        log.info("Apply Operator CRD")
        ripsaw.apply_crd("resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml")

        all_results = []

        for test_num in range(self.tests_numbers):

            # deploy the smallfile workload
            log.info("Running SmallFile bench")
            sf_obj = OCS(**sf_data)
            sf_obj.create()

            # wait for benchmark pods to get created - takes a while
            for bench_pod in TimeoutSampler(
                    240,
                    10,
                    get_pod_name_by_pattern,
                    "smallfile-client",
                    constants.RIPSAW_NAMESPACE,
            ):
                try:
                    if bench_pod[0] is not None:
                        small_file_client_pod = bench_pod[0]
                        break
                except IndexError:
                    log.info("Bench pod not ready yet")

            bench_pod = OCP(kind="pod", namespace=constants.RIPSAW_NAMESPACE)
            log.info("Waiting for SmallFile benchmark to Run")
            assert bench_pod.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=small_file_client_pod,
                sleep=30,
                timeout=600,
            )
            for item in bench_pod.get()["items"][1]["spec"]["volumes"]:
                if "persistentVolumeClaim" in item:
                    pvc_name = item["persistentVolumeClaim"]["claimName"]
                    break
            log.info(f"Benchmark PVC name is : {pvc_name}")
            # Creation of 4M files on CephFS can take a lot of time
            timeout = 7200
            while timeout >= 0:
                logs = bench_pod.get_logs(name=small_file_client_pod)
                if "RUN STATUS DONE" in logs:
                    break
                timeout -= 30
                if timeout == 0:
                    raise TimeoutError(
                        "Timed out waiting for benchmark to complete")
                time.sleep(30)
            log.info(f"Smallfile test ({test_num + 1}) finished.")
            snap_name = pvc_name.replace("claim", "snapshot-")
            log.info(f"Taking snapshot of the PVC {pvc_name}")
            log.info(f"Snapshot name : {snap_name}")
            creation_time = self.measure_create_snapshot_time(
                pvc_name=pvc_name, snap_name=snap_name, interface=interface)
            log.info(f"Snapshot creation time is {creation_time} seconds")
            all_results.append(creation_time)

            # Delete the smallfile workload
            log.info("Deleting the smallfile workload")
            if sf_obj.delete(wait=True):
                log.info("The smallfile workload was deleted successfully")

            # Delete VolumeSnapshots
            log.info("Deleting the snapshots")
            if self.snap_obj.delete(wait=True):
                log.info("The snapshot deleted successfully")
            log.info("Verify (and wait if needed) that ceph health is OK")
            ceph_health_check(tries=45, delay=60)

        log.info(f"Full test report for {interface}:")
        log.info(f"Test ran {self.tests_numbers} times, "
                 f"All results are {all_results}")
        log.info(
            f"The average creation time is : {statistics.mean(all_results)}")
        log.info(f"Number of Files on the volume : {total_files:,}, "
                 f"Total dataset : {int(data_set / 3)} GiB")