Esempio n. 1
0
def test_start_fio_job(
    tmp_path,
    fio_pvc_dict,
    fio_job_dict,
    fio_configmap_dict,
):
    """
    Start a fio job performing IO load, check that it's running, and keep
    it running even after the test finishes.
    """
    # creating project directly to set it's name and prevent it's deletion
    project = ocp.OCP(kind="Project", namespace=TEST_NS)
    project.new_project(TEST_NS)

    # size of the volume for fio
    pvc_size = 10  # GiB

    # test uses cephfs based volume, could be either parametrized or we can
    # try to start more jobs
    storage_class_name = "ocs-storagecluster-cephfs"

    # fio config file: random mixed read and write IO will be running for one
    # day (we expect that the other test will stop it), only 1/2 of the volume
    # is used, we don't need to utilize the PV 100%
    fio_size = int(pvc_size / 2)  # GiB
    fio_conf = textwrap.dedent(f"""
        [readwrite]
        readwrite=randrw
        buffered=1
        blocksize=4k
        ioengine=libaio
        directory=/mnt/target
        size={fio_size}G
        time_based
        runtime=24h
        """)

    # put the dicts together into yaml file of the Job
    fio_configmap_dict["data"]["workload.fio"] = fio_conf
    fio_pvc_dict["spec"]["storageClassName"] = storage_class_name
    fio_pvc_dict["spec"]["resources"]["requests"]["storage"] = f"{pvc_size}Gi"
    fio_objs = [fio_pvc_dict, fio_configmap_dict, fio_job_dict]
    job_file = ObjectConfFile("fio_continuous", fio_objs, project, tmp_path)

    # deploy the Job to the cluster and start it
    job_file.create()

    # wait for a pod for the job to be deployed and running
    ocp_pod = ocp.OCP(kind="Pod", namespace=project.namespace)
    try:
        ocp_pod.wait_for_resource(resource_count=1,
                                  condition=constants.STATUS_RUNNING,
                                  timeout=300,
                                  sleep=30)
    except TimeoutExpiredError:
        logger.error("pod for fio job wasn't deployed properly")
        raise
    def test_scale_obc_creation_noobaa_pod_respin(self, tmp_path, pod_name,
                                                  sc_name, mcg_job_factory):
        """
        OBC creation using RGW storage class
        This test case only runs on vSphere cluster deployment
        """

        # Create OBCs with FIO running using mcg_job_factory()
        for i in range(self.scale_obc_count_io):
            exec(f"job{i} = mcg_job_factory()")

        log.info(f"Start creating  {self.scale_obc_count} "
                 f"OBC in a batch of {self.num_obc_batch}")
        for i in range(int(self.scale_obc_count / self.num_obc_batch)):
            obc_dict_list = (
                scale_noobaa_lib.construct_obc_creation_yaml_bulk_for_kube_job(
                    no_of_obc=self.num_obc_batch,
                    sc_name=sc_name,
                    namespace=self.namespace,
                ))
            # Create job profile
            job_file = ObjectConfFile(
                name="job_profile",
                obj_dict_list=obc_dict_list,
                project=self.namespace,
                tmp_path=tmp_path,
            )
            # Create kube_job
            job_file.create(namespace=self.namespace)

            # Check all the OBCs reached Bound state
            obc_bound_list = (
                scale_noobaa_lib.check_all_obc_reached_bound_state_in_kube_job(
                    kube_job_obj=job_file,
                    namespace=self.namespace,
                    no_of_obc=self.num_obc_batch,
                ))
            log.info(f"Number of OBCs in Bound state: {len(obc_bound_list)}")

        # Reset node which noobaa pods is running on
        # And validate noobaa pods are re-spinned and in running state
        scale_noobaa_lib.noobaa_running_node_restart(pod_name=pod_name)

        # Verify all OBCs are in Bound state after node restart
        log.info("Verify all OBCs are in Bound state after node restart.....")
        obc_status_list = scale_noobaa_lib.check_all_obcs_status(
            namespace=self.namespace)
        log.info(f"Number of OBCs in Bound state after node reset: "
                 f"{len(obc_status_list[0])}")
        assert (len(obc_status_list[0]) == self.scale_obc_count
                ), "Not all OBCs in Bound state"
Esempio n. 3
0
    def test_bulk_clone_performance(self, tmp_path, interface_iterate):
        """
        Creates number of PVCs in a bulk using kube job
        Write 60% of PVC capacity to each one of the created PVCs
        Creates 1 clone per each PVC altogether in a bulk
        Measuring total and csi creation times for bulk of clones

        """
        self.interface = interface_iterate
        job_pod_file, job_pvc_file, job_clone_file = [None, None, None]
        log.info(f"Start creating {self.interface} {self.pvc_count} PVC")

        try:
            pvc_dict_list = scale_lib.construct_pvc_creation_yaml_bulk_for_kube_job(
                no_of_pvc=self.pvc_count,
                access_mode=Interfaces_info[self.interface]["accessmode"],
                sc_name=Interfaces_info[self.interface]["sc_name"],
                pvc_size=self.vol_size,
            )

            job_pvc_file = ObjectConfFile(
                name="job_profile_pvc",
                obj_dict_list=pvc_dict_list,
                project=self.namespace,
                tmp_path=tmp_path,
            )

            # Create kube_job
            job_pvc_file.create(namespace=self.namespace)

            # Check all the PVC reached Bound state
            performance_lib.wait_for_resource_bulk_status(
                resource="pvc",
                resource_count=self.pvc_count,
                namespace=self.namespace,
                status=constants.STATUS_BOUND,
                timeout=120,
                sleep_time=5,
            )
            log.info(
                f"All the PVCs ({self.pvc_count}) was created and are in Bound state"
            )

            # Getting the list of the PVC names
            pvc_bound_list = [
                p.name for p in pvc.get_all_pvc_objs(namespace=self.namespace)
            ]

            # Kube_job to Create pod
            log.info(
                "Attaching PODs to the PVCs and filling them with data (60%)")
            pod_dict_list = self.attach_pvcs_to_pod_dict(pvc_bound_list)
            job_pod_file = ObjectConfFile(
                name="job_profile_pod",
                obj_dict_list=pod_dict_list,
                project=self.namespace,
                tmp_path=tmp_path,
            )
            job_pod_file.create(namespace=self.namespace)

            # Check all PODs are in Completed state
            performance_lib.wait_for_resource_bulk_status(
                resource="pod",
                resource_count=self.pvc_count,
                namespace=self.namespace,
                status=constants.STATUS_COMPLETED,
                timeout=1200,
                sleep_time=30,
            )
            log.info("All the PODs completed writing data to the PVC's")

            clone_dict_list = scale_lib.construct_pvc_clone_yaml_bulk_for_kube_job(
                pvc_dict_list,
                Interfaces_info[self.interface]["clone_yaml"],
                Interfaces_info[self.interface]["sc_name"],
            )

            log.info("Created clone dict list")

            csi_bulk_start_time = self.get_time(time_format="csi")

            job_clone_file = ObjectConfFile(
                name="job_profile_clone",
                obj_dict_list=clone_dict_list,
                project=self.namespace,
                tmp_path=tmp_path,
            )

            # Create kube_job that creates clones
            job_clone_file.create(namespace=self.namespace)

            log.info("Going to check bound status for clones")
            # Check all the clones reached Bound state
            try:
                performance_lib.wait_for_resource_bulk_status(
                    resource="pvc",
                    resource_count=self.pvc_count * 2,
                    namespace=self.namespace,
                    status=constants.STATUS_BOUND,
                    timeout=1200,
                    sleep_time=30,
                )
            except Exception as ex:
                log.error("Failed to cvreate clones for PVCs")
                raise ex

            log.info(
                f"All the Clones ({self.pvc_count}) was created and are in Bound state"
            )

            all_pvc_objs = pvc.get_all_pvc_objs(namespace=self.namespace)
            clone_objs = [
                cl for cl in all_pvc_objs if re.match("clone", cl.name)
            ]
            for clone_yaml in clone_dict_list:
                name = clone_yaml["metadata"]["name"]
                size = clone_yaml["spec"]["resources"]["requests"]["storage"]
                log.info(f"Clone {name} of size {size} created")

            start_time = get_provision_time(self.interface,
                                            clone_objs,
                                            status="start")
            end_time = get_provision_time(self.interface,
                                          clone_objs,
                                          status="end")
            total_time = (end_time - start_time).total_seconds()
            speed = round(self.total_files_size / total_time, 2)

            csi_creation_time = performance_lib.csi_bulk_pvc_time_measure(
                self.interface, clone_objs, "create", csi_bulk_start_time)

            log.info(
                f"Total creation time = {total_time} secs, csi creation time = {csi_creation_time},"
                f" data size = {self.total_files_size} MB, speed = {speed} MB/sec "
                f"for {self.interface} clone in bulk of {self.pvc_count} clones."
            )

            # Produce ES report
            # Collecting environment information
            self.get_env_info()

            # Initialize the results' doc file.
            full_results = self.init_full_results(
                ResultsAnalyse(
                    self.uuid,
                    self.crd_data,
                    self.full_log_path,
                    "bulk_clone_perf_fullres",
                ))

            full_results.add_key("interface", self.interface)
            full_results.add_key("bulk_size", self.pvc_count)
            full_results.add_key("clone_size", self.vol_size)
            full_results.add_key("bulk_creation_time", total_time)
            full_results.add_key("bulk_csi_creation_time", csi_creation_time)
            full_results.add_key("data_size(MB)", self.total_files_size)
            full_results.add_key("speed", speed)
            full_results.add_key("es_results_link",
                                 full_results.results_link())

            # Write the test results into the ES server
            full_results.es_write()
            self.results_path = get_full_test_logs_path(cname=self)
            res_link = full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtest (3 - according to the parameters)
            self.write_result_to_file(res_link)

        # Finally, is used to clean up the resources created
        # Irrespective of try block pass/fail finally will be executed.
        finally:
            # Cleanup activities
            log.info(
                "Cleanup of all the resources created during test execution")
            for object_file in [job_pod_file, job_clone_file, job_pvc_file]:
                if object_file:
                    object_file.delete(namespace=self.namespace)
                    try:
                        object_file.wait_for_delete(
                            resource_name=object_file.name,
                            namespace=self.namespace)
                    except Exception:
                        log.error(f"{object_file['name']} didnt deleted !")

            # Check ceph health status
            utils.ceph_health_check(tries=20)
Esempio n. 4
0
def workload_fio_storageutilization(
    fixture_name,
    target_percentage,
    project,
    fio_pvc_dict,
    fio_job_dict,
    fio_configmap_dict,
    measurement_dir,
    tmp_path,
):
    """
    This function implements core functionality of fio storage utilization
    workload fixture. This is necessary because we can't parametrize single
    general fixture over multiple parameters (it would mess with test case id
    and polarion test case tracking).
    """
    if fixture_name.endswith("rbd"):
        storage_class_name = "ocs-storagecluster-ceph-rbd"
        ceph_pool_name = "ocs-storagecluster-cephblockpool"
    elif fixture_name.endswith("cephfs"):
        storage_class_name = "ocs-storagecluster-cephfs"
        ceph_pool_name = "ocs-storagecluster-cephfilesystem-data0"
    else:
        raise UnexpectedVolumeType(
            "unexpected volume type, ocs-ci code is wrong")

    # make sure we communicate what is going to happen
    logger.info((f"starting {fixture_name} fixture, "
                 f"using {storage_class_name} storage class "
                 f"backed by {ceph_pool_name} ceph pool"))

    pvc_size = get_storageutilization_size(target_percentage, ceph_pool_name)

    # For cephfs we can't use fill_fs because of BZ 1763808 (the process
    # will get *Disk quota exceeded* error instead of *No space left on
    # device* error).
    # On the other hand, we can't use size={pvc_size} for rbd, as we can't
    # write pvc_size bytes to a filesystem on a block device of {pvc_size}
    # size (obviously, some space is used by filesystem metadata).
    if fixture_name.endswith("rbd"):
        fio_conf = textwrap.dedent("""
            [simple-write]
            readwrite=write
            buffered=1
            blocksize=4k
            ioengine=libaio
            directory=/mnt/target
            fill_fs=1
            """)
    else:
        fio_conf = textwrap.dedent(f"""
            [simple-write]
            readwrite=write
            buffered=1
            blocksize=4k
            ioengine=libaio
            directory=/mnt/target
            size={pvc_size}G
            """)

    # put the dicts together into yaml file of the Job
    fio_configmap_dict["data"]["workload.fio"] = fio_conf
    fio_pvc_dict["spec"]["storageClassName"] = storage_class_name
    fio_pvc_dict["spec"]["resources"]["requests"]["storage"] = f"{pvc_size}Gi"
    fio_objs = [fio_pvc_dict, fio_configmap_dict, fio_job_dict]
    fio_job_file = ObjectConfFile(fixture_name, fio_objs, project, tmp_path)

    # how long do we let the job running while writing data to the volume
    # TODO: increase this value or make it configurable
    write_timeout = pvc_size * 30  # seconds
    logger.info((f"fixture will wait {write_timeout} seconds for the Job "
                 f"to write {pvc_size} Gi data on OCS backed volume"))

    def write_data():
        """
        Write data via fio Job (specified in ``tf`` tmp file) to reach desired
        utilization level, and keep this level for ``minimal_time`` seconds.
        """
        # deploy the fio Job to the cluster
        fio_job_file.create()

        # This is a WORKAROUND of particular ocsci design choices: I just wait
        # for one pod in the namespace, and then ask for the pod again to get
        # it's name (but it would be much better to just wait for the job to
        # finish instead, then ask for a name of the successful pod and use it
        # to get logs ...)
        ocp_pod = ocp.OCP(kind="Pod", namespace=project.namespace)
        ocp_pod.wait_for_resource(resource_count=1,
                                  condition=constants.STATUS_COMPLETED,
                                  timeout=write_timeout,
                                  sleep=30)
        pod_data = ocp_pod.get()

        # explicit list of assumptions, if these assumptions are not met, the
        # code won't work and it either means that something went terrible
        # wrong or that the code needs to be changed
        assert pod_data['kind'] == "List"
        pod_dict = pod_data['items'][0]
        assert pod_dict['kind'] == "Pod"
        pod_name = pod_dict['metadata']['name']
        logger.info(f"Identified pod name of the finished fio Job: {pod_name}")

        fio_output = ocp_pod.exec_oc_cmd(f"logs {pod_name}",
                                         out_yaml_format=False)

        # parse fio output
        fio_report = fio_to_dict(fio_output)

        logger.info(fio_report)

        # data which will be available to the test via:
        # fixture_name['result']
        result = {
            'fio': fio_report,
            'pvc_size': pvc_size,
            'target_p': target_percentage,
            'namespace': project.namespace
        }

        return result

    test_file = os.path.join(measurement_dir, f"{fixture_name}.json")
    measured_op = measure_operation(write_data,
                                    test_file,
                                    measure_after=True,
                                    minimal_time=480)
    # we don't need to delete anything if this fixture has been already
    # executed
    if measured_op['first_run']:
        # make sure we communicate what is going to happen
        logger.info(f"going to delete {fixture_name} Job")
        fio_job_file.delete()

    return measured_op
Esempio n. 5
0
class TestBulkPodAttachPerformance(PASTest):
    """
    Test to measure performance of attaching pods to pvc in a bulk
    """

    pvc_size = "1Gi"

    def setup(self):
        """
        Setting up test parameters
        """
        log.info("Starting the test setup")
        super(TestBulkPodAttachPerformance, self).setup()
        self.benchmark_name = "bulk_pod_attach_time"

        self.create_test_project()
        # Pulling the pod image to the worker node, so pull image will not calculate
        # in the total attach time
        helpers.pull_images(constants.PERF_IMAGE)

        # Initializing some parameters
        self.pvc_objs = list()
        self.pods_obj = locals()

    def teardown(self):
        """
        Cleanup the test environment
        """
        log.info("Starting the test environment celanup")

        # Deleting All POD(s)
        log.info("Try to delete all created PODs")
        try:
            self.pods_obj.delete(namespace=self.namespace)
        except Exception as ex:
            log.warn(f"Failed to delete POD(s) [{ex}]")
        log.info("Wait for all PODs to be deleted")
        performance_lib.wait_for_resource_bulk_status("pod", 0, self.namespace,
                                                      constants.STATUS_BOUND,
                                                      len(self.pvc_objs) * 2,
                                                      10)
        log.info("All POD(s) was deleted")

        # Deleting PVC(s) for deletion time mesurment
        log.info("Try to delete all created PVCs")
        for pvc_obj in self.pvc_objs:
            pvc_obj.delete()
        log.info("Wait for all PVC(s) to be deleted")
        performance_lib.wait_for_resource_bulk_status("pvc", 0, self.namespace,
                                                      constants.STATUS_BOUND,
                                                      len(self.pvc_objs) * 2,
                                                      10)
        log.info("All PVC(s) was deleted")
        log.info("Wait for all PVC(s) backed PV(s) to be deleted")
        # Timeout for each PV to be deleted is 20 sec.
        performance_lib.wait_for_resource_bulk_status("pv", 0, self.namespace,
                                                      self.namespace,
                                                      len(self.pvc_objs) * 20,
                                                      10)
        log.info("All backed PV(s) was deleted")

        # Delete the test project (namespace)
        self.delete_test_project()

        super(TestBulkPodAttachPerformance, self).teardown()

    @pytest.mark.parametrize(
        argnames=["interface_type", "bulk_size"],
        argvalues=[
            pytest.param(*[constants.CEPHBLOCKPOOL, 120], ),
            pytest.param(*[constants.CEPHBLOCKPOOL, 240], ),
            pytest.param(*[constants.CEPHFILESYSTEM, 120], ),
            pytest.param(*[constants.CEPHFILESYSTEM, 240], ),
        ],
    )
    @polarion_id("OCS-1620")
    def test_bulk_pod_attach_performance(self, interface_type, bulk_size):
        """
        Measures pods attachment time in bulk_size bulk

        Args:
            interface_type (str): The interface type to be tested - CephBlockPool / CephFileSystem.
            bulk_size (int): Size of the bulk to be tested
        Returns:

        """
        self.interface = interface_type

        if self.dev_mode:
            bulk_size = 3

        # Initialize some variables
        timeout = bulk_size * 5
        pvc_names_list = list()
        pod_data_list = list()

        # Getting the test start time
        test_start_time = self.get_time()
        csi_start_time = self.get_time("csi")

        log.info(f"Start creating bulk of new {bulk_size} PVCs")
        self.pvc_objs, _ = helpers.create_multiple_pvcs(
            sc_name=Interfaces_info[self.interface]["sc"],
            namespace=self.namespace,
            number_of_pvc=bulk_size,
            size=self.pvc_size,
            burst=True,
            do_reload=False,
        )
        log.info("Wait for all of the PVCs to be in Bound state")
        performance_lib.wait_for_resource_bulk_status("pvc", bulk_size,
                                                      self.namespace,
                                                      constants.STATUS_BOUND,
                                                      timeout, 10)
        # in case of creation faliure, the wait_for_resource_bulk_status function
        # will raise an exception. so in this point the creation succeed
        log.info("All PVCs was created and in Bound state.")

        # Reload all PVC(s) information
        for pvc_obj in self.pvc_objs:
            pvc_obj.reload()
            pvc_names_list.append(pvc_obj.name)
        log.debug(f"The PVCs names are : {pvc_names_list}")

        # Create kube_job for pod creation
        pod_data_list.extend(
            scale_lib.attach_multiple_pvc_to_pod_dict(
                pvc_list=pvc_names_list,
                namespace=self.namespace,
                pvcs_per_pod=1,
            ))
        self.pods_obj = ObjectConfFile(
            name="pod_kube_obj",
            obj_dict_list=pod_data_list,
            project=self.namespace,
            tmp_path=pathlib.Path(ocsci_log_path()),
        )
        log.debug(f"PODs data list is : {json.dumps(pod_data_list, indent=3)}")

        log.info(f"{self.interface} : Before pod attach")
        bulk_start_time = time.time()
        self.pods_obj.create(namespace=self.namespace)
        # Check all the PODs reached Running state
        log.info("Checking that pods are running")
        performance_lib.wait_for_resource_bulk_status("pod", bulk_size,
                                                      self.namespace,
                                                      constants.STATUS_RUNNING,
                                                      timeout, 2)
        log.info("All POD(s) are in Running State.")
        bulk_end_time = time.time()
        bulk_total_time = bulk_end_time - bulk_start_time
        log.info(
            f"Bulk attach time of {bulk_size} pods is {bulk_total_time} seconds"
        )

        csi_bulk_total_time = performance_lib.pod_bulk_attach_csi_time(
            self.interface, self.pvc_objs, csi_start_time, self.namespace)

        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        full_results = self.init_full_results(
            ResultsAnalyse(self.uuid, self.crd_data, self.full_log_path,
                           "pod_bulk_attachtime"))

        full_results.add_key("storageclass",
                             Interfaces_info[self.interface]["name"])
        full_results.add_key("pod_bulk_attach_time", bulk_total_time)
        full_results.add_key("pod_csi_bulk_attach_time", csi_bulk_total_time)
        full_results.add_key("pvc_size", self.pvc_size)
        full_results.add_key("bulk_size", bulk_size)

        # Getting the test end time
        test_end_time = self.get_time()

        # Add the test time to the ES report
        full_results.add_key("test_time", {
            "start": test_start_time,
            "end": test_end_time
        })

        # Write the test results into the ES server
        self.results_path = helpers.get_full_test_logs_path(cname=self)
        if full_results.es_write():
            res_link = full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtests (4 - according to the parameters)
            self.write_result_to_file(res_link)

    def test_bulk_pod_attach_results(self):
        """
        This is not a test - it is only check that previous test ran and finish as expected
        and reporting the full results (links in the ES) of previous tests (4)
        """

        self.add_test_to_results_check(
            test="test_bulk_pod_attach_performance",
            test_count=4,
            test_name="Bulk Pod Attach Time",
        )
        self.check_results_and_push_to_dashboard()

    def init_full_results(self, full_results):
        """
        Initialize the full results object which will send to the ES server

        Args:
            full_results (obj): an empty ResultsAnalyse object

        Returns:
            ResultsAnalyse (obj): the input object filled with data

        """
        for key in self.environment:
            full_results.add_key(key, self.environment[key])
        return full_results
Esempio n. 6
0
    def test_bulk_pod_attach_performance(self, interface_type, bulk_size):
        """
        Measures pods attachment time in bulk_size bulk

        Args:
            interface_type (str): The interface type to be tested - CephBlockPool / CephFileSystem.
            bulk_size (int): Size of the bulk to be tested
        Returns:

        """
        self.interface = interface_type

        if self.dev_mode:
            bulk_size = 3

        # Initialize some variables
        timeout = bulk_size * 5
        pvc_names_list = list()
        pod_data_list = list()

        # Getting the test start time
        test_start_time = self.get_time()
        csi_start_time = self.get_time("csi")

        log.info(f"Start creating bulk of new {bulk_size} PVCs")
        self.pvc_objs, _ = helpers.create_multiple_pvcs(
            sc_name=Interfaces_info[self.interface]["sc"],
            namespace=self.namespace,
            number_of_pvc=bulk_size,
            size=self.pvc_size,
            burst=True,
            do_reload=False,
        )
        log.info("Wait for all of the PVCs to be in Bound state")
        performance_lib.wait_for_resource_bulk_status("pvc", bulk_size,
                                                      self.namespace,
                                                      constants.STATUS_BOUND,
                                                      timeout, 10)
        # in case of creation faliure, the wait_for_resource_bulk_status function
        # will raise an exception. so in this point the creation succeed
        log.info("All PVCs was created and in Bound state.")

        # Reload all PVC(s) information
        for pvc_obj in self.pvc_objs:
            pvc_obj.reload()
            pvc_names_list.append(pvc_obj.name)
        log.debug(f"The PVCs names are : {pvc_names_list}")

        # Create kube_job for pod creation
        pod_data_list.extend(
            scale_lib.attach_multiple_pvc_to_pod_dict(
                pvc_list=pvc_names_list,
                namespace=self.namespace,
                pvcs_per_pod=1,
            ))
        self.pods_obj = ObjectConfFile(
            name="pod_kube_obj",
            obj_dict_list=pod_data_list,
            project=self.namespace,
            tmp_path=pathlib.Path(ocsci_log_path()),
        )
        log.debug(f"PODs data list is : {json.dumps(pod_data_list, indent=3)}")

        log.info(f"{self.interface} : Before pod attach")
        bulk_start_time = time.time()
        self.pods_obj.create(namespace=self.namespace)
        # Check all the PODs reached Running state
        log.info("Checking that pods are running")
        performance_lib.wait_for_resource_bulk_status("pod", bulk_size,
                                                      self.namespace,
                                                      constants.STATUS_RUNNING,
                                                      timeout, 2)
        log.info("All POD(s) are in Running State.")
        bulk_end_time = time.time()
        bulk_total_time = bulk_end_time - bulk_start_time
        log.info(
            f"Bulk attach time of {bulk_size} pods is {bulk_total_time} seconds"
        )

        csi_bulk_total_time = performance_lib.pod_bulk_attach_csi_time(
            self.interface, self.pvc_objs, csi_start_time, self.namespace)

        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        full_results = self.init_full_results(
            ResultsAnalyse(self.uuid, self.crd_data, self.full_log_path,
                           "pod_bulk_attachtime"))

        full_results.add_key("storageclass",
                             Interfaces_info[self.interface]["name"])
        full_results.add_key("pod_bulk_attach_time", bulk_total_time)
        full_results.add_key("pod_csi_bulk_attach_time", csi_bulk_total_time)
        full_results.add_key("pvc_size", self.pvc_size)
        full_results.add_key("bulk_size", bulk_size)

        # Getting the test end time
        test_end_time = self.get_time()

        # Add the test time to the ES report
        full_results.add_key("test_time", {
            "start": test_start_time,
            "end": test_end_time
        })

        # Write the test results into the ES server
        self.results_path = helpers.get_full_test_logs_path(cname=self)
        if full_results.es_write():
            res_link = full_results.results_link()
            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            # Create text file with results of all subtests (4 - according to the parameters)
            self.write_result_to_file(res_link)
Esempio n. 7
0
def setup_netsplit(tmp_path,
                   master_zones,
                   worker_zones,
                   x_addr_list=None,
                   arbiter_zone=None):
    """
    Deploy machineconfig with network split scripts and configuration, tailored
    for the current cluster state.

    Args:
        tmp_path(pathlib.Path): Directory where a temporary yaml file will
                be created. In test context, use pytest fixture ``tmp_path``.
        master_zones(list[str]): zones where master nodes are placed
        worker_zones(list[str]): zones where worker nodes are placed
        x_addr_list(list[str]): IP addressess of external services (zone x)
        arbiter_zone(str): name of arbiter zone if arbiter deployment is used

    Raises:
        UnexpectedDeploymentConfiguration: in case of invalid cluster
            configuration, which prevents deployment of network split scripts
        ValueError: in case given zone configuration doesn't make any sense
    """
    logger.info("going to deploy ocpnetsplit scripts")
    # checking assumptions: each node has a zone label
    if not are_zone_labels_present():
        msg = "to use network_split_setup, all nodes needs a zone label"
        logger.error(msg)
        raise exceptions.UnexpectedDeploymentConfiguration(msg)
    # check zone assummtions: all worker zones are master zones as well
    worker_zones_without_master = set(worker_zones).difference(
        set(master_zones))
    if len(worker_zones_without_master) != 0:
        msg = ("there are zones which contains worker nodes, "
               f"but no master nodes: {worker_zones_without_master}")
        logger.error(msg)
        raise exceptions.UnexpectedDeploymentConfiguration(msg)
    if (arbiter_zone is not None) and (arbiter_zone not in master_zones):
        msg = "given arbiter zone not found among master zones"
        logger.error(msg)
        raise ValueError(msg)
    if len(master_zones) == 3:
        zone_a, zone_b, zone_c = master_zones
        # handle arbiter (so that zone a is always arbiter) if specified
        if arbiter_zone is not None:
            zone_a = arbiter_zone
            other_zones = master_zones.copy()
            other_zones.remove(arbiter_zone)
            zone_b, zone_c = other_zones
    else:
        msg = "ocpnetsplit can handle only 3 zones, setup can't continue"
        logger.error(msg)
        raise exceptions.UnexpectedDeploymentConfiguration(msg)
    # we assume that there are just 2 machine config pools: master and worker
    mcp_h = OCP(kind="MachineConfigPool", namespace="openshift-config")
    mcp_objects = mcp_h.get()
    mcp_names = [i["metadata"]["name"] for i in mcp_objects["items"]]
    if len(mcp_names) != 2:
        msg = ("ocpnetsplit can handle only 2 machine config pools, "
               f"but there are {mcp_names}")
        logger.error(msg)
        raise exceptions.UnexpectedDeploymentConfiguration(msg)
    for exp_pool in ("master", "worker"):
        if exp_pool not in mcp_names:
            msg = f"MachineConfigPool/{exp_pool} not found"
            logger.error(msg)
            raise exceptions.UnexpectedDeploymentConfiguration(msg)
    # generate zone config (list of node ip addressess for each zone)
    zone_config = ocpnetsplit.main.get_zone_config(zone_a, zone_b, zone_c,
                                                   x_addr_list)
    zone_env = zone_config.get_env_file()
    # get machinecofnig for network split firewall scripts
    mc = ocpnetsplit.main.get_networksplit_mc_spec(zone_env)
    # deploy it within openshift-config namespace
    mc_file = ObjectConfFile("network-split", mc, None, tmp_path)
    mc_file.create(namespace="openshift-config")
    # now let's make sure the MCO (machine config operator) noticed just
    # deployed network-split machine config and started to process it
    logger.info("waiting for both machineconfigpools to be updating "
                "as a result of deployment of network-split machineconfig")
    mcp_h.wait_for_resource(
        resource_count=2,
        condition="True",
        column="UPDATING",
        sleep=5,
        timeout=120,
    )
    # and now wait for MachineConfigPools to be updated and ready
    logger.info("waiting for both machineconfigpools to be updated and ready")
    mcp_h.wait_for_resource(
        resource_count=2,
        condition="True",
        column="UPDATED",
        sleep=60,
        timeout=1800,
    )
    # also check that no pools are degraded
    mcp_h.wait_for_resource(
        resource_count=2,
        condition="False",
        column="DEGRADED",
        sleep=10,
        timeout=120,
    )