def test_rwo_dynamic_pvc(self, setup_base):
        """
        RWO Dynamic PVC creation tests with Reclaim policy set to Delete/Retain
        """

        logger.info(
            f"Creating second pod on node: {self.worker_nodes_list[1]}")

        pod_obj2 = helpers.create_pod(interface_type=self.interface_type,
                                      pvc_name=self.pvc_obj.name,
                                      do_reload=False,
                                      namespace=self.namespace,
                                      node_name=self.worker_nodes_list[1],
                                      pod_dict_path=constants.NGINX_POD_YAML)
        node_pod1 = self.pod_obj1.get().get('spec').get('nodeName')
        node_pod2 = pod_obj2.get().get('spec').get('nodeName')

        assert node_pod1 != node_pod2, 'Both pods are on the same node'

        logger.info(f"Running IO on pod {self.pod_obj1.name}")
        file_name = self.pod_obj1.name
        self.pod_obj1.run_io(storage_type=self.storage_type,
                             size=self.io_size,
                             runtime=30,
                             fio_filename=file_name)
        pod.get_fio_rw_iops(self.pod_obj1)
        md5sum_pod1_data = pod.cal_md5sum(pod_obj=self.pod_obj1,
                                          file_name=file_name)
        # Verify that second pod is still in ContainerCreating state and not able to
        # attain Running state due to expected failure
        helpers.wait_for_resource_state(
            resource=pod_obj2, state=constants.STATUS_CONTAINER_CREATING)
        self.verify_expected_failure_event(
            ocs_obj=pod_obj2, failure_str=self.expected_pod_failure)
        logger.info(f"Deleting first pod so that second pod can attach"
                    f" {self.pvc_obj.name}")
        self.pod_obj1.delete()
        self.pod_obj1.ocp.wait_for_delete(resource_name=self.pod_obj1.name)

        # Wait for second pod to be in Running state
        helpers.wait_for_resource_state(resource=pod_obj2,
                                        state=constants.STATUS_RUNNING,
                                        timeout=240)

        assert pod.verify_data_integrity(pod_obj=pod_obj2,
                                         file_name=file_name,
                                         original_md5sum=md5sum_pod1_data)

        pod_obj2.run_io(storage_type=self.storage_type,
                        size=self.io_size,
                        runtime=30,
                        fio_filename=pod_obj2.name)
        pod.get_fio_rw_iops(pod_obj2)

        # Again verify data integrity
        assert pod.verify_data_integrity(pod_obj=pod_obj2,
                                         file_name=file_name,
                                         original_md5sum=md5sum_pod1_data)
    def test_snapshot_at_different_usage_level(
        self, snapshot_factory, snapshot_restore_factory, pod_factory
    ):
        """
        Test to take multiple snapshots of same PVC when the PVC usage is at
        0%, 20%, 40%, 60%, and 80%, then delete the parent PVC and restore the
        snapshots to create new PVCs. Delete snapshots and attach the restored
        PVCs to pods to verify the data.

        """
        snapshots = []
        usage_percent = [0, 20, 40, 60, 80]
        for usage in usage_percent:
            if usage != 0:
                for pod_obj in self.pods:
                    log.info(f"Running IO on pod {pod_obj.name} to utilize {usage}%")
                    pod_obj.pvc.filename = f"{pod_obj.name}_{usage}"
                    pod_obj.run_io(
                        storage_type="fs",
                        size=f"{int(self.pvc_size/len(usage_percent))}G",
                        runtime=20,
                        fio_filename=pod_obj.pvc.filename,
                    )
                log.info(f"IO started on all pods to utilize {usage}%")

                for pod_obj in self.pods:
                    # Wait for fio to finish
                    pod_obj.get_fio_results()
                    log.info(
                        f"IO to utilize {usage}% finished on pod " f"{pod_obj.name}"
                    )
                    # Calculate md5sum
                    md5_sum = pod.cal_md5sum(pod_obj, pod_obj.pvc.filename)
                    if not getattr(pod_obj.pvc, "md5_sum", None):
                        setattr(pod_obj.pvc, "md5_sum", {})
                    pod_obj.pvc.md5_sum[pod_obj.pvc.filename] = md5_sum

            # Take snapshot of all PVCs
            log.info(f"Creating snapshot of all PVCs at {usage}%")
            for pvc_obj in self.pvcs:
                log.info(f"Creating snapshot of PVC {pvc_obj.name} at {usage}%")
                snap_obj = snapshot_factory(pvc_obj, wait=False)
                # Set a dict containing filename:md5sum for later verification
                setattr(snap_obj, "md5_sum", deepcopy(getattr(pvc_obj, "md5_sum", {})))
                snap_obj.usage_on_mount = get_used_space_on_mount_point(
                    pvc_obj.get_attached_pods()[0]
                )
                snapshots.append(snap_obj)
                log.info(f"Created snapshot of PVC {pvc_obj.name} at {usage}%")
            log.info(f"Created snapshot of all PVCs at {usage}%")
        log.info("Snapshots creation completed.")

        # Verify snapshots are ready
        log.info("Verify snapshots are ready")
        for snapshot in snapshots:
            snapshot.ocp.wait_for_resource(
                condition="true",
                resource_name=snapshot.name,
                column=constants.STATUS_READYTOUSE,
                timeout=90,
            )

        # Delete pods
        log.info("Deleting the pods")
        for pod_obj in self.pods:
            pod_obj.delete()
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)
        log.info("Deleted all the pods")

        # Delete parent PVCs
        log.info("Deleting parent PVCs")
        for pvc_obj in self.pvcs:
            pv_obj = pvc_obj.backed_pv_obj
            pvc_obj.delete()
            pvc_obj.ocp.wait_for_delete(resource_name=pvc_obj.name)
            log.info(
                f"Deleted PVC {pvc_obj.name}. Verifying whether PV "
                f"{pv_obj.name} is deleted."
            )
            pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name)
        log.info(
            "Deleted parent PVCs before restoring snapshot. " "PVs are also deleted."
        )

        restore_pvc_objs = []

        # Create PVCs out of the snapshots
        log.info("Creating new PVCs from snapshots")
        for snapshot in snapshots:
            log.info(f"Creating a PVC from snapshot {snapshot.name}")
            restore_pvc_obj = snapshot_restore_factory(
                snapshot_obj=snapshot,
                size=f"{self.pvc_size}Gi",
                volume_mode=snapshot.parent_volume_mode,
                access_mode=snapshot.parent_access_mode,
                status="",
            )

            log.info(
                f"Created PVC {restore_pvc_obj.name} from snapshot " f"{snapshot.name}"
            )
            restore_pvc_objs.append(restore_pvc_obj)
        log.info("Created new PVCs from all the snapshots")

        # Confirm that the restored PVCs are Bound
        # Increased wait time to 600 seconds as a workaround for BZ 1899968
        # TODO: Revert wait time to 200 seconds once BZ 1899968 is fixed
        log.info("Verify the restored PVCs are Bound")
        for pvc_obj in restore_pvc_objs:
            wait_for_resource_state(
                resource=pvc_obj, state=constants.STATUS_BOUND, timeout=600
            )
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        snapcontent_objs = []
        # Get VolumeSnapshotContent form VolumeSnapshots and delete
        # VolumeSnapshots
        log.info("Deleting snapshots")
        for snapshot in snapshots:
            snapcontent_objs.append(get_snapshot_content_obj(snap_obj=snapshot))
            snapshot.delete()

        # Verify volume snapshots are deleted
        log.info("Verify snapshots are deleted")
        for snapshot in snapshots:
            snapshot.ocp.wait_for_delete(resource_name=snapshot.name)
        log.info("Verified: Snapshots are deleted")

        # Verify VolumeSnapshotContents are deleted
        for snapcontent_obj in snapcontent_objs:
            snapcontent_obj.ocp.wait_for_delete(
                resource_name=snapcontent_obj.name, timeout=180
            )

        # Attach the restored PVCs to pods
        log.info("Attach the restored PVCs to pods")
        restore_pod_objs = []
        for restore_pvc_obj in restore_pvc_objs:
            interface = (
                constants.CEPHFILESYSTEM
                if (constants.CEPHFS_INTERFACE in restore_pvc_obj.snapshot.parent_sc)
                else constants.CEPHBLOCKPOOL
            )
            restore_pod_obj = pod_factory(
                interface=interface, pvc=restore_pvc_obj, status=""
            )
            log.info(
                f"Attached the PVC {restore_pvc_obj.name} to pod "
                f"{restore_pod_obj.name}"
            )
            restore_pod_objs.append(restore_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            timeout = (
                300
                if config.ENV_DATA["platform"] == constants.IBMCLOUD_PLATFORM
                else 60
            )
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING, timeout)
        log.info("Verified: New pods are running")

        # Verify md5sum of files
        log.info("Verifying md5sum of files on all the pods")
        for restore_pod_obj in restore_pod_objs:
            log.info(
                f"Verifying md5sum of these files on pod "
                f"{restore_pod_obj.name}:"
                f"{restore_pod_obj.pvc.snapshot.md5_sum}"
            )
            for (
                file_name,
                actual_md5_sum,
            ) in restore_pod_obj.pvc.snapshot.md5_sum.items():
                file_path = pod.get_file_path(restore_pod_obj, file_name)
                log.info(
                    f"Checking the existence of file {file_name} on pod "
                    f"{restore_pod_obj.name}"
                )
                assert pod.check_file_existence(restore_pod_obj, file_path), (
                    f"File {file_name} does not exist on pod " f"{restore_pod_obj.name}"
                )
                log.info(f"File {file_name} exists on pod {restore_pod_obj.name}")

                # Verify that the md5sum matches
                log.info(
                    f"Verifying md5sum of file {file_name} on pod "
                    f"{restore_pod_obj.name}"
                )
                pod.verify_data_integrity(restore_pod_obj, file_name, actual_md5_sum)
                log.info(
                    f"Verified md5sum of file {file_name} on pod "
                    f"{restore_pod_obj.name}"
                )
            log.info(
                f"Verified md5sum of these files on pod "
                f"{restore_pod_obj.name}:"
                f"{restore_pod_obj.pvc.snapshot.md5_sum}"
            )
        log.info("md5sum verified")

        # Verify usage on mount point
        log.info("Verify usage on new pods")
        for pod_obj in restore_pod_objs:
            usage_on_pod = get_used_space_on_mount_point(pod_obj)
            assert usage_on_pod == pod_obj.pvc.snapshot.usage_on_mount, (
                f"Usage on mount point is not the expected value on pod "
                f"{pod_obj.name}. Usage in percentage {usage_on_pod}. "
                f"Expected usage in percentage "
                f"{pod_obj.pvc.snapshot.usage_on_mount}"
            )
            log.info(
                f"Verified usage on new pod {pod_obj.name}. Usage in "
                f"percentage {usage_on_pod}. Expected usage in percentage "
                f"{pod_obj.pvc.snapshot.usage_on_mount}"
            )
        log.info("Verified usage on new pods")
    def test_pvc_rwx_writeable_after_pod_deletions(
        self, pvc_factory, teardown_factory
    ):
        """
        Test assign nodeName to a pod using RWX pvc

        1. Create a new project.
        2. Create a RWX CEPHFS based PVC
        3. Attach the same PVC to multiple PODs and start IO on all the PODs
        4. Delete all but one pod.
        5. Verify mount point is still write-able.
             - Start IO again on the Running pod.
        6. Also, access the data written by deleted pods from the Running pod

        """
        worker_nodes_list = helpers.get_worker_nodes()

        # Create a RWX PVC
        pvc_obj = pvc_factory(
            interface=constants.CEPHFILESYSTEM, access_mode=constants.ACCESS_MODE_RWX,
            size=10, status=constants.STATUS_BOUND
        )
        logger.info(
            f"Creating pods on all worker nodes backed"
            f"with same pvc {pvc_obj.name}"
        )

        pod_list = []

        for each_node in worker_nodes_list:
            pod_obj = helpers.create_pod(
                interface_type=constants.CEPHFILESYSTEM, pvc_name=pvc_obj.name,
                namespace=pvc_obj.namespace, node_name=each_node,
                pod_dict_path=constants.NGINX_POD_YAML
            )
            pod_list.append(pod_obj)
            teardown_factory(pod_obj)

        # Confirm pods are created and are running on designated nodes
        node_count = 0
        for pod_obj in pod_list:
            helpers.wait_for_resource_state(
                resource=pod_obj, state=constants.STATUS_RUNNING,
                timeout=120
            )
            pod_obj.reload()
            assert pod.verify_node_name(pod_obj, worker_nodes_list[node_count]), (
                f'Pod {pod_obj.name} is running on a different node '
                f'than the selected node'
            )
            node_count = node_count + 1

        # Run IOs on all pods. FIO Filename is kept same as pod name
        with ThreadPoolExecutor() as p:
            for pod_obj in pod_list:
                logger.info(f"Running IO on pod {pod_obj.name}")
                p.submit(
                    pod_obj.run_io, storage_type='fs', size='512M',
                    runtime=30, fio_filename=pod_obj.name
                )

        # Check IO from all pods
        for pod_obj in pod_list:
            pod.get_fio_rw_iops(pod_obj)

        # Calculate md5sum of each file
        md5sum_pod_data = []
        for pod_obj in pod_list:
            md5sum_pod_data.append(pod.cal_md5sum(
                pod_obj=pod_obj, file_name=pod_obj.name
            ))

        # Delete all but the last app pod.
        for index in range(node_count - 1):
            pod_list[index].delete()
            pod_list[index].ocp.wait_for_delete(
                resource_name=pod_list[index].name
            )

        # Verify presence of files written by each pod
        logger.info(
            f"Verify existence of each file from app pod "
            f"{pod_list[-1].name} "
        )
        for pod_obj in pod_list:
            file_path = pod.get_file_path(pod_list[-1], pod_obj.name)
            assert pod.check_file_existence(pod_list[-1], file_path), (
                f"File {pod_obj.name} doesnt exist"
            )
            logger.info(
                f"File {pod_obj.name} exists in {pod_list[-1].name}"
            )

        # From surviving pod, verify data integrity of files
        # written by deleted pods
        logger.info(f"verify all data from {pod_list[-1].name}")

        for index, pod_obj in enumerate(pod_list):
            assert pod.verify_data_integrity(
                pod_obj=pod_list[-1], file_name=pod_obj.name,
                original_md5sum=md5sum_pod_data[index]
            )

        # From surviving pod, confirm mount point is still write-able
        logger.info(f"Re-running IO on pod {pod_list[-1].name}")
        fio_new_file = f"{pod_list[-1].name}-new-file"
        pod_list[-1].run_io(
            storage_type='fs', size='512M', runtime=30,
            fio_filename=fio_new_file
        )
        pod.get_fio_rw_iops(pod_list[-1])
        file_path = pod.get_file_path(pod_list[-1], fio_new_file)
        assert pod.check_file_existence(pod_list[-1], file_path), (
            f"File {fio_new_file} doesnt exist"
        )
        logger.info(
            f"File {fio_new_file} exists in {pod_list[-1].name} "
        )
    def test_rwx_dynamic_pvc(self, setup_base):
        """
        RWX Dynamic PVC creation tests with Reclaim policy set to Delete/Retain
        """
        logger.info(f"CephFS RWX test")
        logger.info(
            f"Creating second pod on node: {self.worker_nodes_list[1]} "
            f"with pvc {self.pvc_obj.name}")

        pod_obj2 = helpers.create_pod(interface_type=self.interface_type,
                                      pvc_name=self.pvc_obj.name,
                                      namespace=self.namespace,
                                      node_name=self.worker_nodes_list[1],
                                      pod_dict_path=constants.NGINX_POD_YAML)
        helpers.wait_for_resource_state(pod_obj2, constants.STATUS_RUNNING)
        pod_obj2.reload()
        node_pod1 = self.pod_obj1.get().get('spec').get('nodeName')
        node_pod2 = pod_obj2.get().get('spec').get('nodeName')

        assert node_pod1 != node_pod2, 'Both pods are on the same node'

        # Run IO on both the pods
        logger.info(f"Running IO on pod {self.pod_obj1.name}")
        file_name1 = self.pod_obj1.name
        logger.info(file_name1)
        self.pod_obj1.run_io(storage_type=self.storage_type,
                             size=self.io_size,
                             runtime=30,
                             fio_filename=file_name1)

        logger.info(f"Running IO on pod {pod_obj2.name}")
        file_name2 = pod_obj2.name
        pod_obj2.run_io(storage_type=self.storage_type,
                        size=self.io_size,
                        runtime=30,
                        fio_filename=file_name2)

        # Check IO and calculate md5sum of files
        pod.get_fio_rw_iops(self.pod_obj1)
        md5sum_pod1_data = pod.cal_md5sum(pod_obj=self.pod_obj1,
                                          file_name=file_name1)

        pod.get_fio_rw_iops(pod_obj2)
        md5sum_pod2_data = pod.cal_md5sum(pod_obj=pod_obj2,
                                          file_name=file_name2)

        logger.info(f"verify data from alternate pods")

        assert pod.verify_data_integrity(pod_obj=pod_obj2,
                                         file_name=file_name1,
                                         original_md5sum=md5sum_pod1_data)

        assert pod.verify_data_integrity(pod_obj=self.pod_obj1,
                                         file_name=file_name2,
                                         original_md5sum=md5sum_pod2_data)

        # Verify that data is mutable from any pod

        logger.info(f"Perform modification of files from alternate pod")
        # Access and rename file written by pod-2 from pod-1
        file_path2 = pod.get_file_path(pod_obj2, file_name2)
        logger.info(file_path2)
        self.pod_obj1.exec_cmd_on_pod(
            command=f"bash -c \"mv {file_path2} {file_path2}-renamed\"",
            out_yaml_format=False)

        # Access and rename file written by pod-1 from pod-2
        file_path1 = pod.get_file_path(self.pod_obj1, file_name1)
        logger.info(file_path1)
        pod_obj2.exec_cmd_on_pod(
            command=f"bash -c \"mv {file_path1} {file_path1}-renamed\"",
            out_yaml_format=False)

        logger.info(f"Verify presence of renamed files from both pods")
        file_names = [f"{file_path1}-renamed", f"{file_path2}-renamed"]
        for file in file_names:
            assert pod.check_file_existence(
                self.pod_obj1, file), (f"File {file} doesn't exist")
            logger.info(f"File {file} exists in {self.pod_obj1.name} ")
            assert pod.check_file_existence(
                pod_obj2, file), (f"File {file} doesn't exist")
            logger.info(f"File {file} exists in {pod_obj2.name}")
    def test_rbd_block_pvc_snapshot(self, snapshot_factory,
                                    snapshot_restore_factory, pod_factory):
        """
        Test to take snapshots of RBD Block VolumeMode PVCs

        """
        # Run IO
        log.info("Find initial md5sum value and run IO on all pods")
        for pod_obj in self.pod_objs:
            # Find initial md5sum
            pod_obj.md5sum_before_io = cal_md5sum(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                block=True,
            )
            pod_obj.run_io(
                storage_type="block",
                size=f"{self.pvc_size - 1}G",
                io_direction="write",
                runtime=60,
            )
        log.info("IO started on all pods")

        # Wait for IO completion
        for pod_obj in self.pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on all pods")

        snap_objs = []

        # Verify md5sum has changed after IO. Create snapshot
        log.info("Verify md5sum has changed after IO and create snapshot from "
                 "all PVCs")
        for pod_obj in self.pod_objs:
            md5sum_after_io = cal_md5sum(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                block=True,
            )
            assert (pod_obj.md5sum_before_io != md5sum_after_io
                    ), f"md5sum has not changed after IO on pod {pod_obj.name}"
            log.info(f"Creating snapshot of PVC {pod_obj.pvc.name}")
            snap_obj = snapshot_factory(pod_obj.pvc, wait=False)
            snap_obj.md5sum = md5sum_after_io
            snap_objs.append(snap_obj)
        log.info("Snapshots created")

        # Verify snapshots are ready
        log.info("Verify snapshots are ready")
        for snap_obj in snap_objs:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=180,
            )

        # Delete pods
        log.info("Deleting the pods")
        for pod_obj in self.pod_objs:
            pod_obj.delete()
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)
        log.info("Deleted all the pods")

        # Delete parent PVCs to verify snapshot is independent
        log.info("Deleting parent PVCs")
        for pvc_obj in self.pvc_objs:
            pv_obj = pvc_obj.backed_pv_obj
            pvc_obj.delete()
            pvc_obj.ocp.wait_for_delete(resource_name=pvc_obj.name)
            log.info(f"Deleted PVC {pvc_obj.name}. Verifying whether PV "
                     f"{pv_obj.name} is deleted.")
            pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name)
        log.info("Deleted parent PVCs before restoring snapshot. "
                 "PVs are also deleted.")

        restore_pvc_objs = []

        # Create PVCs out of the snapshots
        log.info("Creating new PVCs from snapshots")
        for snap_obj in snap_objs:
            log.info(f"Creating a PVC from snapshot {snap_obj.name}")
            restore_pvc_obj = snapshot_restore_factory(
                snapshot_obj=snap_obj,
                size=f"{self.pvc_size}Gi",
                volume_mode=snap_obj.parent_volume_mode,
                access_mode=snap_obj.parent_access_mode,
                status="",
            )

            log.info(f"Created PVC {restore_pvc_obj.name} from snapshot "
                     f"{snap_obj.name}")
            restore_pvc_obj.md5sum = snap_obj.md5sum
            restore_pvc_objs.append(restore_pvc_obj)
        log.info("Created new PVCs from all the snapshots")

        # Confirm that the restored PVCs are Bound
        log.info("Verify the restored PVCs are Bound")
        for pvc_obj in restore_pvc_objs:
            wait_for_resource_state(resource=pvc_obj,
                                    state=constants.STATUS_BOUND,
                                    timeout=180)
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        # Attach the restored PVCs to pods. Attach RWX PVC on two pods
        log.info("Attach the restored PVCs to pods")
        restore_pod_objs = create_pods(
            restore_pvc_objs,
            pod_factory,
            constants.CEPHBLOCKPOOL,
            pods_for_rwx=2,
            status="",
        )

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        log.info("Verifying md5sum on new pods")
        for pod_obj in restore_pod_objs:
            log.info(f"Verifying md5sum on pod {pod_obj.name}")
            verify_data_integrity(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                original_md5sum=pod_obj.pvc.md5sum,
                block=True,
            )
            log.info(f"Verified md5sum on pod {pod_obj.name}")
        log.info("Verified md5sum on all pods")

        # Run IO on new pods
        log.info("Starting IO on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.run_io(storage_type="block", size="500M", runtime=15)

        # Wait for IO completion on new pods
        log.info("Waiting for IO completion on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on new pods.")
Example #6
0
    def test_rwo_pvc_fencing_node_prolonged_and_short_network_failure(
            self, nodes, setup, node_restart_teardown):
        """
        OCS-1431/OCS-1436:
        - Start DeploymentConfig based app pods on 1 node
        - Make the node (where app pods are running) unresponsive
            by bringing its main network interface down
        - Disrupt the leader provisioner pods if not running on above selected
            node
        - Check new app pods and/or mon, osd pods scheduled on another node
            are stuck due to Multi-Attach error.
        - Power off the unresponsive node
        - Force delete the app pods and/or mon,osd pods on the unresponsive node
        - Check new app pods and/or mon, osd pods scheduled on another node comes
            into Running state
        - Run IOs on new app pods
        - Again make the node (where app pods are running) unresponsive
            by bringing its main network interface down
        - Check new app pods scheduled on another node are stuck due to
            Multi-Attach error.
        - Reboot the unresponsive node
        - When unresponsive node recovers, run IOs on new app pods

        """
        ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor = setup

        external_mode = helpers.storagecluster_independent_check()
        extra_nodes = list(set(test_nodes) - set(app_pod_nodes))
        helpers.remove_label_from_worker_node(node_list=extra_nodes[:-1],
                                              label_key="nodetype")

        # Run IO on pods
        md5sum_data = self.run_and_verify_io(pod_list=dc_pods,
                                             fio_filename="io_file1",
                                             run_io_in_bg=True)

        # Disrupt leader plugin-provisioner pods, skip if running on node to be failed
        if disruptor:
            [disruption.delete_resource() for disruption in disruptor]

        # Induce network failure on the nodes
        node.node_network_failure(app_pod_nodes)
        logger.info(f"Waiting for {self.prolong_nw_fail_time} seconds")
        sleep(self.prolong_nw_fail_time)

        # Wait for pods to be rescheduled
        for pod_obj in dc_pods + ceph_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_TERMINATING,
                resource_name=pod_obj.name)

        # Fetch info of new pods and verify Multi-Attach error
        new_dc_pods = self.get_new_pods(dc_pods)
        assert len(new_dc_pods) == len(
            dc_pods), "Unexpected number of app pods"
        self.verify_multi_attach_error(new_dc_pods)

        new_ceph_pods = []
        if ceph_pods:
            new_ceph_pods = self.get_new_pods(ceph_pods)
            assert len(new_ceph_pods) > 0, "Unexpected number of osd pods"
            self.verify_multi_attach_error(new_ceph_pods)

        logger.info("Executing manual recovery steps")
        # Power off the unresponsive node
        logger.info(f"Powering off the unresponsive node: {app_pod_nodes}")
        nodes.stop_nodes(node.get_node_objs(app_pod_nodes))

        # Force delete the app pods and/or mon,osd pods on the unresponsive node
        for pod_obj in dc_pods + ceph_pods:
            pod_obj.delete(force=True)

        # Wait for new app pods to reach Running state
        for pod_obj in new_dc_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=pod_obj.name,
                timeout=1200,
                sleep=30,
            ), (f"App pod with name {pod_obj.name} did not reach Running state"
                )

        if not external_mode:
            # Wait for mon and osd pods to reach Running state
            selectors_to_check = {
                constants.MON_APP_LABEL: self.expected_mon_count,
                constants.OSD_APP_LABEL: ceph_cluster.osd_count,
            }
            for selector, count in selectors_to_check.items():
                assert ceph_cluster.POD.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    selector=selector,
                    resource_count=count,
                    timeout=1800,
                    sleep=60,
                ), f"{count} expected pods with selector {selector} are not in Running state"

            if ceph_cluster.mon_count == self.expected_mon_count:
                # Check ceph health
                toolbox_status = ceph_cluster.POD.get_resource_status(
                    ceph_cluster.toolbox.name)
                if toolbox_status == constants.STATUS_TERMINATING:
                    ceph_cluster.toolbox.delete(force=True)

                assert ceph_health_check(), "Ceph cluster health is not OK"
                logger.info("Ceph cluster health is OK")

        # Verify data integrity from new pods
        for num, pod_obj in enumerate(new_dc_pods):
            pod.verify_data_integrity(pod_obj=pod_obj,
                                      file_name="io_file1",
                                      original_md5sum=md5sum_data[num])

        # Run IO on new pods
        md5sum_data2 = self.run_and_verify_io(pod_list=new_dc_pods,
                                              fio_filename="io_file2",
                                              run_io_in_bg=True)

        helpers.label_worker_node(node_list=extra_nodes[:-1],
                                  label_key="nodetype",
                                  label_value="app-pod")

        # Induce network failure on the node
        node.node_network_failure(extra_nodes[-1])
        logger.info(f"Waiting for {self.short_nw_fail_time} seconds")
        sleep(self.short_nw_fail_time)

        # Wait for pods to be rescheduled
        for pod_obj in new_dc_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_TERMINATING,
                resource_name=pod_obj.name,
                timeout=600,
                sleep=30,
            )

        # Fetch info of new pods and verify Multi-Attach error
        new_dc_pods2 = self.get_new_pods(new_dc_pods)
        assert len(new_dc_pods2) == len(
            new_dc_pods), "Unexpected number of app pods"
        self.verify_multi_attach_error(new_dc_pods2)

        # Reboot the unresponsive node
        logger.info(f"Rebooting the unresponsive node: {extra_nodes[-1]}")
        nodes.restart_nodes_by_stop_and_start(
            node.get_node_objs([extra_nodes[-1]]))
        node.wait_for_nodes_status(node_names=[extra_nodes[-1]],
                                   status=constants.NODE_READY)

        # Wait for new app pods to reach Running state
        for pod_obj in new_dc_pods2:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=pod_obj.name,
                timeout=1200,
                sleep=30,
            ), (f"App pod with name {pod_obj.name} did not reach Running state"
                )

        if not external_mode:
            # Wait for mon and osd pods to reach Running state
            for selector, count in selectors_to_check.items():
                assert ceph_cluster.POD.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    selector=selector,
                    resource_count=count,
                    timeout=1800,
                    sleep=60,
                ), f"{count} expected pods with selector {selector} are not in Running state"

            if ceph_cluster.mon_count == 3:
                # Check ceph health
                assert ceph_health_check(), "Ceph cluster health is not OK"
                logger.info("Ceph cluster health is OK")

        # Verify data integrity from new pods
        for num, pod_obj in enumerate(new_dc_pods2):
            pod.verify_data_integrity(pod_obj=pod_obj,
                                      file_name="io_file2",
                                      original_md5sum=md5sum_data2[num])

        for num, pod_obj in enumerate(new_dc_pods2):
            pod.verify_data_integrity(pod_obj=pod_obj,
                                      file_name="io_file1",
                                      original_md5sum=md5sum_data[num])

        # Run IO on new pods
        self.run_and_verify_io(pod_list=new_dc_pods2,
                               fio_filename="io_file3",
                               return_md5sum=False)
Example #7
0
    def test_rwo_pvc_fencing_node_short_network_failure(
            self, nodes, setup, node_restart_teardown):
        """
        OCS-1423/OCS-1428/OCS-1426:
        - Start DeploymentConfig based app pods on 1 OCS/Non-OCS node
        - Make the node (where app pods are running) unresponsive
            by bringing its main network interface down
        - Check new app pods and/or mon, osd pods scheduled on another node
            are stuck due to Multi-Attach error.
        - Reboot the unresponsive node
        - When unresponsive node recovers, run IOs on new app pods

        OCS-1424/OCS-1434:
        - Start DeploymentConfig based app pods on multiple node
            Colocated scenario: Select 1 node where osd and/or mon is running,
                select other 2 nodes where mon/osd are not running
            Dedicated scenario: 3 Non-OCS nodes
        - Disrupt the leader provisioner pods if not running on above selected
            nodes
        - Make the nodes (where app pods are running) unresponsive
            by bringing their main network interface down
        - Check new app pods and/or mon, osd pods scheduled on another node and
            are stuck due to Multi-Attach error.
        - Reboot the unresponsive nodes
        - When unresponsive nodes recover, run IOs on new app pods

        """
        ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor = setup

        # Run IO on pods
        md5sum_data = self.run_and_verify_io(pod_list=dc_pods,
                                             fio_filename="io_file1",
                                             run_io_in_bg=True)

        # OCS-1424/OCS-1434
        # Disrupt leader plugin-provisioner pods, skip if running on node to be failed
        if disruptor:
            [disruption.delete_resource() for disruption in disruptor]

        # Induce network failure on the nodes
        node.node_network_failure(app_pod_nodes)
        logger.info(f"Waiting for {self.short_nw_fail_time} seconds")
        sleep(self.short_nw_fail_time)

        # Wait for pods to be rescheduled
        for pod_obj in dc_pods + ceph_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_TERMINATING,
                resource_name=pod_obj.name,
                timeout=600,
                sleep=30,
            )

        # Fetch info of new pods and verify Multi-Attach error
        new_dc_pods = self.get_new_pods(dc_pods)
        assert len(new_dc_pods) == len(
            dc_pods), "Unexpected number of app pods"
        self.verify_multi_attach_error(new_dc_pods)

        if ceph_pods:
            new_ceph_pods = self.get_new_pods(ceph_pods)
            assert len(new_ceph_pods) > 0, "Unexpected number of osd pods"
            self.verify_multi_attach_error(new_ceph_pods)

        # Reboot the unresponsive node(s)
        logger.info(f"Rebooting the unresponsive node(s): {app_pod_nodes}")
        nodes.restart_nodes_by_stop_and_start(
            node.get_node_objs(app_pod_nodes))
        node.wait_for_nodes_status(node_names=app_pod_nodes,
                                   status=constants.NODE_READY)

        # Wait for new app pods to reach Running state
        for pod_obj in new_dc_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=pod_obj.name,
                timeout=1200,
                sleep=30,
            ), (f"App pod with name {pod_obj.name} did not reach Running state"
                )

        if not helpers.storagecluster_independent_check():
            # Wait for mon and osd pods to reach Running state
            selectors_to_check = {
                constants.MON_APP_LABEL: ceph_cluster.mon_count,
                constants.OSD_APP_LABEL: ceph_cluster.osd_count,
            }
            for selector, count in selectors_to_check.items():
                assert ceph_cluster.POD.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    selector=selector,
                    resource_count=count,
                    timeout=1800,
                    sleep=60,
                ), f"{count} expected pods with selector {selector} are not in Running state"

            assert ceph_health_check(), "Ceph cluster health is not OK"
            logger.info("Ceph cluster health is OK")

        # Verify data integrity from new pods
        for num, pod_obj in enumerate(new_dc_pods):
            assert pod.verify_data_integrity(pod_obj=pod_obj,
                                             file_name="io_file1",
                                             original_md5sum=md5sum_data[num]
                                             ), "Data integrity check failed"

        # Run IO on new pods
        self.run_and_verify_io(pod_list=new_dc_pods,
                               fio_filename="io_file2",
                               return_md5sum=False)
    def test_pvc_to_pvc_clone(self, kv_version, kms_provider, pod_factory):
        """
        Test to create a clone from an existing encrypted RBD PVC.
        Verify that the cloned PVC is encrypted and all the data is preserved.

        """

        log.info("Checking for encrypted device and running IO on all pods")
        for vol_handle, pod_obj in zip(self.vol_handles, self.pod_objs):
            if pod_obj.exec_sh_cmd_on_pod(
                    command=f"lsblk | grep {vol_handle} | grep crypt"):
                log.info(f"Encrypted device found in {pod_obj.name}")
            else:
                raise ResourceNotFoundError(
                    f"Encrypted device not found in {pod_obj.name}")
            log.info(f"File created during IO {pod_obj.name}")
            pod_obj.run_io(
                storage_type="block",
                size="500M",
                io_direction="write",
                runtime=60,
                end_fsync=1,
                direct=1,
            )
        log.info("IO started on all pods")

        # Wait for IO completion
        for pod_obj in self.pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on all pods")

        cloned_pvc_objs, cloned_vol_handles = ([] for i in range(2))

        # Calculate the md5sum value and create clones of exisiting PVCs
        log.info("Calculate the md5sum after IO and create clone of all PVCs")
        for pod_obj in self.pod_objs:
            pod_obj.md5sum_after_io = pod.cal_md5sum(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                block=True,
            )

            cloned_pvc_obj = pvc.create_pvc_clone(
                self.sc_obj.name,
                pod_obj.pvc.name,
                constants.CSI_RBD_PVC_CLONE_YAML,
                self.proj_obj.namespace,
                volume_mode=constants.VOLUME_MODE_BLOCK,
                access_mode=pod_obj.pvc.access_mode,
            )
            helpers.wait_for_resource_state(cloned_pvc_obj,
                                            constants.STATUS_BOUND)
            cloned_pvc_obj.reload()
            cloned_pvc_obj.md5sum = pod_obj.md5sum_after_io
            cloned_pvc_objs.append(cloned_pvc_obj)
        log.info("Clone of all PVCs created")

        # Create and attach pod to the pvc
        cloned_pod_objs = helpers.create_pods(
            cloned_pvc_objs,
            pod_factory,
            constants.CEPHBLOCKPOOL,
            pods_for_rwx=1,
            status="",
        )

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in cloned_pod_objs:
            helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
            pod_obj.reload()
        log.info("Verified: New pods are running")

        # Verify encryption keys are created for cloned PVCs in Vault
        for pvc_obj in cloned_pvc_objs:
            pv_obj = pvc_obj.backed_pv_obj
            vol_handle = pv_obj.get().get("spec").get("csi").get(
                "volumeHandle")
            cloned_vol_handles.append(vol_handle)

            if kms_provider == constants.VAULT_KMS_PROVIDER:
                if kms.is_key_present_in_path(
                        key=vol_handle, path=self.kms.vault_backend_path):
                    log.info(
                        f"Vault: Found key for restore PVC {pvc_obj.name}")
                else:
                    raise ResourceNotFoundError(
                        f"Vault: Key not found for restored PVC {pvc_obj.name}"
                    )
        # Verify encrypted device is present and md5sum on all pods
        for vol_handle, pod_obj in zip(cloned_vol_handles, cloned_pod_objs):
            if pod_obj.exec_sh_cmd_on_pod(
                    command=f"lsblk | grep {vol_handle} | grep crypt"):
                log.info(f"Encrypted device found in {pod_obj.name}")
            else:
                raise ResourceNotFoundError(
                    f"Encrypted device not found in {pod_obj.name}")

            log.info(f"Verifying md5sum on pod {pod_obj.name}")
            pod.verify_data_integrity(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                original_md5sum=pod_obj.pvc.md5sum,
                block=True,
            )
            log.info(f"Verified md5sum on pod {pod_obj.name}")

        # Run IO on new pods
        log.info("Starting IO on new pods")
        for pod_obj in cloned_pod_objs:
            pod_obj.run_io(storage_type="block", size="100M", runtime=10)

        # Wait for IO completion on new pods
        log.info("Waiting for IO completion on new pods")
        for pod_obj in cloned_pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on new pods.")

        # Delete the restored pods, PVC and snapshots
        log.info("Deleting all pods")
        for pod_obj in cloned_pod_objs + self.pod_objs:
            pod_obj.delete()
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)

        log.info("Deleting all PVCs")
        for pvc_obj in cloned_pvc_objs + self.pvc_objs:
            pv_obj = pvc_obj.backed_pv_obj
            pvc_obj.delete()
            pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name)

        if kms_provider == constants.VAULT_KMS_PROVIDER:
            # Verify if the keys for parent and cloned PVCs are deleted from Vault
            if kv_version == "v1" or Version.coerce(
                    config.ENV_DATA["ocs_version"]) >= Version.coerce("4.9"):
                log.info(
                    "Verify whether the keys for cloned PVCs are deleted from vault"
                )
                for key in cloned_vol_handles + self.vol_handles:
                    if not kms.is_key_present_in_path(
                            key=key, path=self.kms.vault_backend_path):
                        log.info(f"Vault: Key deleted for {key}")
                    else:
                        raise KMSResourceCleaneupError(
                            f"Vault: Key deletion failed for {key}")
                log.info("All keys from vault were deleted")
Example #9
0
    def test_snapshot_restore_with_different_access_mode(
            self, pod_factory, snapshot_factory, snapshot_restore_factory):
        """
        Restore snapshot with an access mode different than parent PVC

        """
        file_name = "fio_test"
        access_modes_dict = {
            constants.CEPHBLOCKPOOL: {
                constants.VOLUME_MODE_FILESYSTEM: [constants.ACCESS_MODE_RWO],
                constants.VOLUME_MODE_BLOCK: [
                    constants.ACCESS_MODE_RWX,
                    constants.ACCESS_MODE_RWO,
                ],
            },
            constants.CEPHFILESYSTEM: {
                constants.VOLUME_MODE_FILESYSTEM: [
                    constants.ACCESS_MODE_RWX,
                    constants.ACCESS_MODE_RWO,
                ]
            },
        }

        # Start IO
        log.info("Starting IO on all pods")
        for pod_obj in self.pods:
            storage_type = ("block" if pod_obj.pvc.volume_mode
                            == constants.VOLUME_MODE_BLOCK else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on all pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum to compare after restoring
            file_name_pod = (file_name if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            pod_obj.pvc.md5sum = pod.cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
        log.info("IO finished on all pods")

        # Create snapshots
        log.info("Creating snapshot of the PVCs")
        snap_objs = []
        for pvc_obj in self.pvcs:
            log.info(f"Creating snapshot of PVC {pvc_obj.name}")
            snap_obj = snapshot_factory(pvc_obj, wait=False)
            snap_obj.md5sum = pvc_obj.md5sum
            snap_obj.interface = pvc_obj.interface
            snap_objs.append(snap_obj)
            log.info(f"Created snapshot of PVC {pvc_obj.name}")

        log.info(
            "Snapshots are created. Wait for the snapshots to be in Ready state"
        )
        for snap_obj in snap_objs:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=180,
            )
            snap_obj.reload()
        log.info("Snapshots are Ready")

        # Restore snapshots
        log.info("Restoring snapshots to create new PVCs")
        restore_pvcs = []
        for snap_obj in snap_objs:
            access_modes = access_modes_dict[snap_obj.interface][
                snap_obj.parent_volume_mode]
            for access_mode in access_modes:
                restore_obj = snapshot_restore_factory(
                    snapshot_obj=snap_obj,
                    volume_mode=snap_obj.parent_volume_mode,
                    access_mode=access_mode,
                    status="",
                )
                restore_obj.interface = snap_obj.interface
                restore_obj.md5sum = snap_obj.md5sum
                log.info(
                    f"Created PVC {restore_obj.name} with accessMode "
                    f"{access_mode} from snapshot {snap_obj.name}. "
                    f"Parent PVC accessMode: {snap_obj.parent_access_mode}")
                restore_pvcs.append(restore_obj)
        log.info(
            "Restored all the snapshots to create PVCs with different access modes"
        )

        log.info("Verifying restored PVCs are Bound")
        for pvc_obj in restore_pvcs:
            helpers.wait_for_resource_state(resource=pvc_obj,
                                            state=constants.STATUS_BOUND,
                                            timeout=200)
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound")

        # Verify restored PVC volume mode"
        for pvc_obj in restore_pvcs:
            assert (pvc_obj.data["spec"]["volumeMode"] ==
                    pvc_obj.snapshot.parent_volume_mode
                    ), f"Volume mode mismatch in PVC {pvc_obj.name}"

        # Get worker node names and create an iterator
        nodes_iter = cycle(node.get_worker_nodes())

        # Attach the restored PVCs to pods
        log.info("Attach the restored PVCs to pods")
        restore_pod_objs = []
        for pvc_obj in restore_pvcs:
            if pvc_obj.data["spec"]["volumeMode"] == "Block":
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            # Create 2 pods if access mode is RWX, else 1
            for _ in range(
                    int(pvc_obj.get_pvc_access_mode !=
                        constants.ACCESS_MODE_RWX), 2):
                restore_pod_obj = pod_factory(
                    interface=pvc_obj.interface,
                    pvc=pvc_obj,
                    status="",
                    node_name=next(nodes_iter),
                    pod_dict_path=pod_dict_path,
                    raw_block_pv=pvc_obj.data["spec"]["volumeMode"] == "Block",
                )
                log.info(f"Attaching the PVC {pvc_obj.name} to pod "
                         f"{restore_pod_obj.name}")
                restore_pod_objs.append(restore_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        for pod_obj in restore_pod_objs:
            file_name_pod = (file_name if
                             (pod_obj.pvc.data["spec"]["volumeMode"]
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            pod.verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.md5sum,
                pod_obj.pvc.data["spec"]["volumeMode"] ==
                constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                "matches the original md5sum")
        log.info("Data integrity check passed on all pods")
    def test_rwx_dynamic_pvc(self, interface_type, reclaim_policy, setup,
                             pvc_factory, pod_factory):
        """
        RWX Dynamic PVC creation tests with Reclaim policy set to Retain/Delete

        """
        access_mode = constants.ACCESS_MODE_RWX
        storage_type = "fs"
        sc_obj, worker_nodes_list = setup

        logger.info("CephFS RWX test")
        logger.info(f"Creating PVC with {access_mode} access mode")
        pvc_obj = pvc_factory(
            interface=interface_type,
            storageclass=sc_obj,
            size=self.pvc_size,
            access_mode=access_mode,
            status=constants.STATUS_BOUND,
        )

        logger.info(f"Creating first pod on node: {worker_nodes_list[0]} "
                    f"with pvc {pvc_obj.name}")
        pod_obj1 = pod_factory(
            interface=interface_type,
            pvc=pvc_obj,
            status=constants.STATUS_RUNNING,
            node_name=worker_nodes_list[0],
            pod_dict_path=constants.NGINX_POD_YAML,
        )

        logger.info(f"Creating second pod on node: {worker_nodes_list[1]} "
                    f"with pvc {pvc_obj.name}")

        pod_obj2 = pod_factory(
            interface=interface_type,
            pvc=pvc_obj,
            status=constants.STATUS_RUNNING,
            node_name=worker_nodes_list[1],
            pod_dict_path=constants.NGINX_POD_YAML,
        )

        node_pod1 = pod_obj1.get().get("spec").get("nodeName")
        node_pod2 = pod_obj2.get().get("spec").get("nodeName")

        assert node_pod1 != node_pod2, "Both pods are on the same node"

        # Run IO on both the pods
        logger.info(f"Running IO on pod {pod_obj1.name}")
        file_name1 = pod_obj1.name
        logger.info(file_name1)
        pod_obj1.run_io(storage_type=storage_type,
                        size="1G",
                        fio_filename=file_name1)

        logger.info(f"Running IO on pod {pod_obj2.name}")
        file_name2 = pod_obj2.name
        pod_obj2.run_io(storage_type=storage_type,
                        size="1G",
                        fio_filename=file_name2)

        # Check IO and calculate md5sum of files
        pod.get_fio_rw_iops(pod_obj1)
        md5sum_pod1_data = pod.cal_md5sum(pod_obj=pod_obj1,
                                          file_name=file_name1)

        pod.get_fio_rw_iops(pod_obj2)
        md5sum_pod2_data = pod.cal_md5sum(pod_obj=pod_obj2,
                                          file_name=file_name2)

        logger.info("verify data from alternate pods")

        pod.verify_data_integrity(pod_obj=pod_obj2,
                                  file_name=file_name1,
                                  original_md5sum=md5sum_pod1_data)

        pod.verify_data_integrity(pod_obj=pod_obj1,
                                  file_name=file_name2,
                                  original_md5sum=md5sum_pod2_data)

        # Verify that data is mutable from any pod

        logger.info("Perform modification of files from alternate pod")
        # Access and rename file written by pod-2 from pod-1
        file_path2 = pod.get_file_path(pod_obj2, file_name2)
        logger.info(file_path2)
        pod_obj1.exec_cmd_on_pod(
            command=f'bash -c "mv {file_path2} {file_path2}-renamed"',
            out_yaml_format=False,
        )

        # Access and rename file written by pod-1 from pod-2
        file_path1 = pod.get_file_path(pod_obj1, file_name1)
        logger.info(file_path1)
        pod_obj2.exec_cmd_on_pod(
            command=f'bash -c "mv {file_path1} {file_path1}-renamed"',
            out_yaml_format=False,
        )

        logger.info("Verify presence of renamed files from both pods")
        file_names = [f"{file_path1}-renamed", f"{file_path2}-renamed"]
        for file in file_names:
            assert pod.check_file_existence(pod_obj1,
                                            file), f"File {file} doesn't exist"
            logger.info(f"File {file} exists in {pod_obj1.name} ")
            assert pod.check_file_existence(pod_obj2,
                                            file), f"File {file} doesn't exist"
            logger.info(f"File {file} exists in {pod_obj2.name}")
    def test_rwo_dynamic_pvc(self, interface_type, reclaim_policy, setup,
                             pvc_factory, pod_factory):
        """
        RWO Dynamic PVC creation tests with Reclaim policy set to Retain/Delete

        """
        access_mode = constants.ACCESS_MODE_RWO
        expected_failure_str = "Multi-Attach error for volume"
        storage_type = "fs"
        sc_obj, worker_nodes_list = setup

        logger.info(f"Creating PVC with {access_mode} access mode")
        pvc_obj = pvc_factory(
            interface=interface_type,
            storageclass=sc_obj,
            size=self.pvc_size,
            access_mode=access_mode,
            status=constants.STATUS_BOUND,
        )

        logger.info(f"Creating first pod on node: {worker_nodes_list[0]} "
                    f"with pvc {pvc_obj.name}")
        pod_obj1 = pod_factory(
            interface=interface_type,
            pvc=pvc_obj,
            status=constants.STATUS_RUNNING,
            node_name=worker_nodes_list[0],
            pod_dict_path=constants.NGINX_POD_YAML,
        )

        logger.info(f"Creating second pod on node: {worker_nodes_list[1]} "
                    f"with pvc {pvc_obj.name}")
        pod_obj2 = pod_factory(
            interface=interface_type,
            pvc=pvc_obj,
            status=constants.STATUS_CONTAINER_CREATING,
            node_name=worker_nodes_list[1],
            pod_dict_path=constants.NGINX_POD_YAML,
        )

        node_pod1 = pod_obj1.get().get("spec").get("nodeName")
        node_pod2 = pod_obj2.get().get("spec").get("nodeName")
        assert node_pod1 != node_pod2, "Both pods are on the same node"

        logger.info(f"Running IO on first pod {pod_obj1.name}")
        file_name = pod_obj1.name
        pod_obj1.run_io(storage_type=storage_type,
                        size="1G",
                        fio_filename=file_name)
        pod.get_fio_rw_iops(pod_obj1)
        md5sum_pod1_data = pod.cal_md5sum(pod_obj=pod_obj1,
                                          file_name=file_name)

        # Verify that second pod is still in ContainerCreating state and not
        # able to attain Running state due to expected failure
        logger.info(
            f"Verify that second pod {pod_obj2.name} is still in ContainerCreating state"
        )
        helpers.wait_for_resource_state(
            resource=pod_obj2, state=constants.STATUS_CONTAINER_CREATING)
        self.verify_expected_failure_event(ocs_obj=pod_obj2,
                                           failure_str=expected_failure_str)

        logger.info(
            f"Deleting first pod so that second pod can attach PVC {pvc_obj.name}"
        )
        pod_obj1.delete()
        pod_obj1.ocp.wait_for_delete(resource_name=pod_obj1.name)

        # Wait for second pod to be in Running state
        helpers.wait_for_resource_state(resource=pod_obj2,
                                        state=constants.STATUS_RUNNING,
                                        timeout=240)

        logger.info(f"Verify data from second pod {pod_obj2.name}")
        pod.verify_data_integrity(pod_obj=pod_obj2,
                                  file_name=file_name,
                                  original_md5sum=md5sum_pod1_data)

        pod_obj2.run_io(storage_type=storage_type,
                        size="1G",
                        fio_filename=pod_obj2.name)
        pod.get_fio_rw_iops(pod_obj2)

        # Again verify data integrity
        logger.info(f"Again verify data from second pod {pod_obj2.name}")
        pod.verify_data_integrity(pod_obj=pod_obj2,
                                  file_name=file_name,
                                  original_md5sum=md5sum_pod1_data)
Example #12
0
    def test_clone_when_full(self, pvc_clone_factory, pod_factory):
        """
        Create a clone from an existing PVC when the PVC is 100% utilized.
        Verify data integrity.
        Verify utilization alert in cloned PVC.
        Expand cloned PVC and ensure utilization alerts are stopped.

        """
        pvc_size_expanded = 6
        file_name = "fio_full"
        prometheus_api = PrometheusAPI()

        # Run IO to utilize 100% of volume
        log.info("Run IO on all pods to utilise 100% of PVCs")
        for pod_obj in self.pods:
            # Get available free space in M
            df_avail_size = pod_obj.exec_cmd_on_pod(
                command=f"df {pod_obj.get_storage_path()} -B M --output=avail")
            # Get the numeral value of available space. eg: 3070 from '3070M'
            available_size = int(df_avail_size.strip().split()[1][0:-1])
            pod_obj.run_io(
                "fs",
                size=f"{available_size-2}M",
                runtime=20,
                rate="100M",
                fio_filename=file_name,
                end_fsync=1,
            )
        log.info("Started IO on all pods to utilise 100% of PVCs")

        # Wait for IO to finish
        log.info("Wait for IO to finish on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")

            # Verify used space on pod is 100%
            used_space = pod.get_used_space_on_mount_point(pod_obj)
            assert used_space == "100%", (
                f"The used space on pod {pod_obj.name} is not 100% "
                f"but {used_space}")
            log.info(f"Verified: Used space on pod {pod_obj.name} is 100%")
            # Calculate md5sum of the file
            pod_obj.pvc.md5sum = pod.cal_md5sum(pod_obj, file_name)

        log.info("Creating clone of the PVCs")
        cloned_pvcs = [pvc_clone_factory(pvc_obj) for pvc_obj in self.pvcs]
        log.info("Created clone of the PVCs. Cloned PVCs are Bound")

        # Attach the cloned PVCs to pods
        log.info("Attach the cloned PVCs to pods")
        clone_pod_objs = []
        for clone_pvc_obj in cloned_pvcs:
            interface = (constants.CEPHFILESYSTEM if
                         (constants.CEPHFS_INTERFACE
                          in clone_pvc_obj.backed_sc) else
                         constants.CEPHBLOCKPOOL)
            clone_pod_obj = pod_factory(interface=interface,
                                        pvc=clone_pvc_obj,
                                        status="")
            log.info(f"Attached the PVC {clone_pvc_obj.name} to pod "
                     f"{clone_pod_obj.name}")
            clone_pod_objs.append(clone_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in clone_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify that the md5sum matches
        for pod_obj in clone_pod_objs:
            log.info(f"Verifying md5sum of {file_name} "
                     f"on pod {pod_obj.name}")
            pod.verify_data_integrity(pod_obj, file_name,
                                      pod_obj.pvc.parent.md5sum)
            log.info(f"Verified: md5sum of {file_name} on pod {pod_obj.name} "
                     f"matches with the original md5sum")

        # Wait till utilization alerts starts
        for response in TimeoutSampler(180, 5, prometheus_api.get, "alerts"):
            alerts = response.json()["data"]["alerts"]
            for pvc_obj in cloned_pvcs:
                alerts_pvc = [
                    alert for alert in alerts if alert.get("labels", {}).get(
                        "persistentvolumeclaim") == pvc_obj.name
                ]
                # At least 2 alerts should be present
                if len(alerts_pvc) < 2:
                    break

                # Verify 'PersistentVolumeUsageNearFull' alert is firing
                if not getattr(pvc_obj, "near_full_alert", False):
                    try:
                        log.info(
                            f"Checking 'PersistentVolumeUsageNearFull' alert "
                            f"for PVC {pvc_obj.name}")
                        near_full_msg = (
                            f"PVC {pvc_obj.name} is nearing full. Data "
                            f"deletion or PVC expansion is required.")
                        check_alert_list(
                            label="PersistentVolumeUsageNearFull",
                            msg=near_full_msg,
                            alerts=alerts_pvc,
                            states=["firing"],
                            severity="warning",
                        )
                        pvc_obj.near_full_alert = True
                    except AssertionError:
                        log.info(f"'PersistentVolumeUsageNearFull' alert not "
                                 f"started firing for PVC {pvc_obj.name}")

                # Verify 'PersistentVolumeUsageCritical' alert is firing
                if not getattr(pvc_obj, "critical_alert", False):
                    try:
                        log.info(
                            f"Checking 'PersistentVolumeUsageCritical' alert "
                            f"for PVC {pvc_obj.name}")
                        critical_msg = (
                            f"PVC {pvc_obj.name} is critically full. Data "
                            f"deletion or PVC expansion is required.")
                        check_alert_list(
                            label="PersistentVolumeUsageCritical",
                            msg=critical_msg,
                            alerts=alerts_pvc,
                            states=["firing"],
                            severity="error",
                        )
                        pvc_obj.critical_alert = True
                    except AssertionError:
                        log.info(f"'PersistentVolumeUsageCritical' alert not "
                                 f"started firing for PVC {pvc_obj.name}")

            # Collect list of PVCs for which alerts are not firing
            not_near_full_pvc = [
                pvc_ob.name for pvc_ob in cloned_pvcs
                if not getattr(pvc_ob, "near_full_alert", False)
            ]
            not_critical_pvc = [
                pvc_ob.name for pvc_ob in cloned_pvcs
                if not getattr(pvc_ob, "critical_alert", False)
            ]

            if (not not_near_full_pvc) and (not not_critical_pvc):
                log.info("'PersistentVolumeUsageNearFull' and "
                         "'PersistentVolumeUsageCritical' alerts are firing "
                         "for all cloned PVCs.")
                break
        log.info("Verified: Utilization alerts are firing")

        log.info("Expanding cloned PVCs.")
        for pvc_obj in cloned_pvcs:
            log.info(f"Expanding size of PVC {pvc_obj.name} to "
                     f"{pvc_size_expanded}Gi")
            # Expand PVC
            pvc_obj.resize_pvc(pvc_size_expanded, True)

        # Verify utilization alerts are stopped
        for response in TimeoutSampler(180, 5, prometheus_api.get, "alerts"):
            alerts = response.json()["data"]["alerts"]
            for pvc_obj in cloned_pvcs:
                alerts_pvc = [
                    alert for alert in alerts if alert.get("labels", {}).get(
                        "persistentvolumeclaim") == pvc_obj.name
                ]
                if not alerts_pvc:
                    pvc_obj.near_full_alert = False
                    pvc_obj.critical_alert = False
                    continue

                # Verify 'PersistentVolumeUsageNearFull' alert stopped firing
                if getattr(pvc_obj, "near_full_alert"):
                    try:
                        log.info(
                            f"Checking 'PrsistentVolumeUsageNearFull' alert "
                            f"is cleared for PVC {pvc_obj.name}")
                        near_full_msg = (
                            f"PVC {pvc_obj.name} is nearing full. Data "
                            f"deletion or PVC expansion is required.")
                        check_alert_list(
                            label="PersistentVolumeUsageNearFull",
                            msg=near_full_msg,
                            alerts=alerts_pvc,
                            states=["firing"],
                            severity="warning",
                        )
                        log.info(
                            f"'PersistentVolumeUsageNearFull' alert is not "
                            f"stopped for PVC {pvc_obj.name}")
                    except AssertionError:
                        pvc_obj.near_full_alert = False
                        log.info(
                            f"'PersistentVolumeUsageNearFull' alert stopped "
                            f"firing for PVC {pvc_obj.name}")

                # Verify 'PersistentVolumeUsageCritical' alert stopped firing
                if getattr(pvc_obj, "critical_alert"):
                    try:
                        log.info(
                            f"Checking 'PersistentVolumeUsageCritical' alert "
                            f"is cleared for PVC {pvc_obj.name}")
                        critical_msg = (
                            f"PVC {pvc_obj.name} is critically full. Data "
                            f"deletion or PVC expansion is required.")
                        check_alert_list(
                            label="PersistentVolumeUsageCritical",
                            msg=critical_msg,
                            alerts=alerts_pvc,
                            states=["firing"],
                            severity="error",
                        )
                        log.info(
                            f"'PersistentVolumeUsageCritical' alert is not "
                            f"stopped for PVC {pvc_obj.name}")
                    except AssertionError:
                        pvc_obj.critical_alert = False
                        log.info(
                            f"'PersistentVolumeUsageCritical' alert stopped "
                            f"firing for PVC {pvc_obj.name}")

            # Collect list of PVCs for which alerts are still firing
            near_full_pvcs = [
                pvc_ob.name for pvc_ob in cloned_pvcs
                if getattr(pvc_ob, "near_full_alert")
            ]
            critical_pvcs = [
                pvc_ob.name for pvc_ob in cloned_pvcs
                if getattr(pvc_ob, "critical_alert")
            ]

            if (not near_full_pvcs) and (not critical_pvcs):
                log.info(
                    "'PersistentVolumeUsageNearFull' and "
                    "'PersistentVolumeUsageCritical' alerts are cleared for "
                    "all cloned PVCs.")
                break

        log.info("Verified: Utilization alerts stopped firing")
Example #13
0
    def test_pvc_snapshot(self, interface, teardown_factory):
        """
        1. Run I/O on a pod file.
        2. Calculate md5sum of the file.
        3. Take a snapshot of the PVC.
        4. Create a new PVC out of that snapshot.
        5. Attach a new pod to it.
        6. Verify that the file is present on the new pod also.
        7. Verify that the md5sum of the file on the new pod matches
           with the md5sum of the file on the original pod.

        Args:
            interface(str): The type of the interface
            (e.g. CephBlockPool, CephFileSystem)
            pvc_factory: A fixture to create new pvc
            teardown_factory: A fixture to destroy objects
        """
        log.info(f"Running IO on pod {self.pod_obj.name}")
        file_name = self.pod_obj.name
        log.info(f"File created during IO {file_name}")
        self.pod_obj.run_io(storage_type="fs",
                            size="1G",
                            fio_filename=file_name)

        # Wait for fio to finish
        fio_result = self.pod_obj.get_fio_results()
        err_count = fio_result.get("jobs")[0].get("error")
        assert err_count == 0, (f"IO error on pod {self.pod_obj.name}. "
                                f"FIO result: {fio_result}")
        log.info(f"Verified IO on pod {self.pod_obj.name}.")

        # Verify presence of the file
        file_path = pod.get_file_path(self.pod_obj, file_name)
        log.info(f"Actual file path on the pod {file_path}")
        assert pod.check_file_existence(
            self.pod_obj, file_path), f"File {file_name} doesn't exist"
        log.info(f"File {file_name} exists in {self.pod_obj.name}")

        # Calculate md5sum
        orig_md5_sum = pod.cal_md5sum(self.pod_obj, file_name)
        # Take a snapshot
        snap_yaml = constants.CSI_RBD_SNAPSHOT_YAML
        if interface == constants.CEPHFILESYSTEM:
            snap_yaml = constants.CSI_CEPHFS_SNAPSHOT_YAML

        snap_name = helpers.create_unique_resource_name("test", "snapshot")
        snap_obj = pvc.create_pvc_snapshot(
            self.pvc_obj.name,
            snap_yaml,
            snap_name,
            self.pvc_obj.namespace,
            helpers.default_volumesnapshotclass(interface).name,
        )
        snap_obj.ocp.wait_for_resource(
            condition="true",
            resource_name=snap_obj.name,
            column=constants.STATUS_READYTOUSE,
            timeout=60,
        )
        teardown_factory(snap_obj)

        # Same Storage class of the original PVC
        sc_name = self.pvc_obj.backed_sc

        # Size should be same as of the original PVC
        pvc_size = str(self.pvc_obj.size) + "Gi"

        # Create pvc out of the snapshot
        # Both, the snapshot and the restore PVC should be in same namespace
        restore_pvc_name = helpers.create_unique_resource_name(
            "test", "restore-pvc")
        restore_pvc_yaml = constants.CSI_RBD_PVC_RESTORE_YAML
        if interface == constants.CEPHFILESYSTEM:
            restore_pvc_yaml = constants.CSI_CEPHFS_PVC_RESTORE_YAML

        restore_pvc_obj = pvc.create_restore_pvc(
            sc_name=sc_name,
            snap_name=snap_obj.name,
            namespace=snap_obj.namespace,
            size=pvc_size,
            pvc_name=restore_pvc_name,
            restore_pvc_yaml=restore_pvc_yaml,
        )
        helpers.wait_for_resource_state(restore_pvc_obj,
                                        constants.STATUS_BOUND)
        restore_pvc_obj.reload()
        teardown_factory(restore_pvc_obj)

        # Create and attach pod to the pvc
        restore_pod_obj = helpers.create_pod(
            interface_type=interface,
            pvc_name=restore_pvc_obj.name,
            namespace=snap_obj.namespace,
            pod_dict_path=constants.NGINX_POD_YAML,
        )

        # Confirm that the pod is running
        helpers.wait_for_resource_state(resource=restore_pod_obj,
                                        state=constants.STATUS_RUNNING)
        restore_pod_obj.reload()
        teardown_factory(restore_pod_obj)

        # Verify that the file is present on the new pod
        log.info(f"Checking the existence of {file_name} "
                 f"on restore pod {restore_pod_obj.name}")
        assert pod.check_file_existence(
            restore_pod_obj, file_path), f"File {file_name} doesn't exist"
        log.info(f"File {file_name} exists in {restore_pod_obj.name}")

        # Verify that the md5sum matches
        log.info(f"Verifying that md5sum of {file_name} "
                 f"on pod {self.pod_obj.name} matches with md5sum "
                 f"of the same file on restore pod {restore_pod_obj.name}")
        assert pod.verify_data_integrity(
            restore_pod_obj, file_name,
            orig_md5_sum), "Data integrity check failed"
        log.info("Data integrity check passed, md5sum are same")

        log.info("Running IO on new pod")
        # Run IO on new pod
        restore_pod_obj.run_io(storage_type="fs", size="1G", runtime=20)

        # Wait for fio to finish
        restore_pod_obj.get_fio_results()
        log.info("IO finished o new pod")
    def test_pvc_snapshot_performance(self, teardown_factory, pvc_size):
        """
        1. Run I/O on a pod file.
        2. Calculate md5sum of the file.
        3. Take a snapshot of the PVC and measure the time of creation.
        4. Restore From the snapshot and measure the time
        5. Attach a new pod to it.
        6. Verify that the file is present on the new pod also.
        7. Verify that the md5sum of the file on the new pod matches
           with the md5sum of the file on the original pod.

        This scenario run 3 times and report all results
        Args:
            teardown_factory: A fixture to destroy objects
            pvc_size: the size of the PVC to be tested - parametrize

        """

        # Getting the total Storage capacity
        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()

        log.info(f"Total capacity size is : {ceph_capacity}")
        log.info(f"PVC Size is : {pvc_size}")
        log.info(f"Needed capacity is {int(int(pvc_size) * 5)}")
        if int(ceph_capacity) < int(pvc_size) * 5:
            log.error(
                f"PVC size is {pvc_size}GiB and it is too large for this system"
                f" which have only {ceph_capacity}GiB")
            return
        # Calculating the file size as 25% of the PVC size
        # in the end the PVC will be 75% full
        filesize = self.pvc_obj.size * 0.25
        # Change the file size to MB and from int to str
        file_size = f"{int(filesize * 1024)}M"

        all_results = []

        for test_num in range(self.tests_numbers):
            test_results = {
                "test_num": test_num + 1,
                "dataset": (test_num + 1) * filesize * 1024,  # size in MiB
                "create": {
                    "time": None,
                    "speed": None
                },
                "restore": {
                    "time": None,
                    "speed": None
                },
            }
            log.info(f"Starting test phase number {test_num}")
            # Step 1. Run I/O on a pod file.
            file_name = f"{self.pod_obj.name}-{test_num}"
            log.info(f"Starting IO on the POD {self.pod_obj.name}")
            # Going to run only write IO to fill the PVC for the snapshot
            self.pod_obj.fillup_fs(size=file_size, fio_filename=file_name)

            # Wait for fio to finish
            fio_result = self.pod_obj.get_fio_results()
            err_count = fio_result.get("jobs")[0].get("error")
            assert (
                err_count == 0
            ), f"IO error on pod {self.pod_obj.name}. FIO result: {fio_result}"
            log.info("IO on the PVC Finished")

            # Verify presence of the file
            file_path = pod.get_file_path(self.pod_obj, file_name)
            log.info(f"Actual file path on the pod {file_path}")
            assert pod.check_file_existence(
                self.pod_obj, file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {self.pod_obj.name}")

            # Step 2. Calculate md5sum of the file.
            orig_md5_sum = pod.cal_md5sum(self.pod_obj, file_name)

            # Step 3. Take a snapshot of the PVC and measure the time of creation.
            snap_name = self.pvc_obj.name.replace("pvc-test",
                                                  f"snapshot-test{test_num}")
            log.info(f"Taking snapshot of the PVC {snap_name}")

            test_results["create"]["time"] = self.measure_create_snapshot_time(
                pvc_name=self.pvc_obj.name,
                snap_name=snap_name,
                interface=self.interface,
            )
            test_results["create"]["speed"] = int(
                test_results["dataset"] / test_results["create"]["time"])
            log.info(
                f' Test {test_num} dataset is {test_results["dataset"]} MiB')
            log.info(
                f'Snapshot creation time is : {test_results["create"]["time"]} sec.'
            )
            log.info(
                f'Snapshot speed is : {test_results["create"]["speed"]} MB/sec'
            )

            # Step 4. Restore the PVC from the snapshot and measure the time
            # Same Storage class of the original PVC
            sc_name = self.pvc_obj.backed_sc

            # Size should be same as of the original PVC
            pvc_size = str(self.pvc_obj.size) + "Gi"

            # Create pvc out of the snapshot
            # Both, the snapshot and the restore PVC should be in same namespace

            log.info("Restoring from the Snapshot")
            restore_pvc_name = self.pvc_obj.name.replace(
                "pvc-test", f"restore-pvc{test_num}")
            restore_pvc_yaml = constants.CSI_RBD_PVC_RESTORE_YAML
            if self.interface == constants.CEPHFILESYSTEM:
                restore_pvc_yaml = constants.CSI_CEPHFS_PVC_RESTORE_YAML

            log.info("Resorting the PVC from Snapshot")
            restore_pvc_obj = pvc.create_restore_pvc(
                sc_name=sc_name,
                snap_name=self.snap_obj.name,
                namespace=self.snap_obj.namespace,
                size=pvc_size,
                pvc_name=restore_pvc_name,
                restore_pvc_yaml=restore_pvc_yaml,
            )
            helpers.wait_for_resource_state(
                restore_pvc_obj,
                constants.STATUS_BOUND,
                timeout=3600  # setting this to 60 Min.
                # since it can be take long time to restore, and we want it to finished.
            )
            teardown_factory(restore_pvc_obj)
            restore_pvc_obj.reload()
            log.info("PVC was restored from the snapshot")
            test_results["restore"][
                "time"] = helpers.measure_pvc_creation_time(
                    self.interface, restore_pvc_obj.name)
            test_results["restore"]["speed"] = int(
                test_results["dataset"] / test_results["restore"]["time"])
            log.info(
                f'Snapshot restore time is : {test_results["restore"]["time"]}'
            )
            log.info(
                f'restore sped is : {test_results["restore"]["speed"]} MB/sec')

            # Step 5. Attach a new pod to the restored PVC
            restore_pod_obj = helpers.create_pod(
                interface_type=self.interface,
                pvc_name=restore_pvc_obj.name,
                namespace=self.snap_obj.namespace,
                pod_dict_path=constants.NGINX_POD_YAML,
            )

            # Confirm that the pod is running
            helpers.wait_for_resource_state(resource=restore_pod_obj,
                                            state=constants.STATUS_RUNNING)
            teardown_factory(restore_pod_obj)
            restore_pod_obj.reload()

            # Step 6. Verify that the file is present on the new pod also.
            log.info(f"Checking the existence of {file_name} "
                     f"on restore pod {restore_pod_obj.name}")
            assert pod.check_file_existence(
                restore_pod_obj, file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {restore_pod_obj.name}")

            # Step 7. Verify that the md5sum matches
            log.info(f"Verifying that md5sum of {file_name} "
                     f"on pod {self.pod_obj.name} matches with md5sum "
                     f"of the same file on restore pod {restore_pod_obj.name}")
            assert pod.verify_data_integrity(
                restore_pod_obj, file_name,
                orig_md5_sum), "Data integrity check failed"
            log.info("Data integrity check passed, md5sum are same")

            all_results.append(test_results)

        # logging the test summery, all info in one place for easy log reading
        c_speed, c_runtime, r_speed, r_runtime = (0 for i in range(4))
        log.info("Test summery :")
        for tst in all_results:
            c_speed += tst["create"]["speed"]
            c_runtime += tst["create"]["time"]
            r_speed += tst["restore"]["speed"]
            r_runtime += tst["restore"]["time"]
            log.info(
                f"Test {tst['test_num']} results : dataset is {tst['dataset']} MiB. "
                f"Take snapshot time is {tst['create']['time']} "
                f"at {tst['create']['speed']} MiB/Sec "
                f"Restore from snapshot time is {tst['restore']['time']} "
                f"at {tst['restore']['speed']} MiB/Sec ")
        log.info(
            f" Average snapshot creation time is {c_runtime / self.tests_numbers} sec."
        )
        log.info(
            f" Average snapshot creation speed is {c_speed / self.tests_numbers} MiB/sec"
        )
        log.info(
            f" Average snapshot restore time is {r_runtime / self.tests_numbers} sec."
        )
        log.info(
            f" Average snapshot restore speed is {r_speed / self.tests_numbers} MiB/sec"
        )
Example #15
0
    def test_rwo_dynamic_pvc(self, setup_base):
        logger.info(f"Creating two pods using same PVC {self.pvc_obj.name}")
        logger.info(f"Creating first pod on node: {self.worker_nodes_list[0]}")
        pod_obj1 = helpers.create_pod(interface_type=self.interface_type,
                                      pvc_name=self.pvc_obj.name,
                                      desired_status=constants.STATUS_RUNNING,
                                      wait=True,
                                      namespace=self.namespace,
                                      node_name=self.worker_nodes_list[0],
                                      pod_dict_path=constants.NGINX_POD_YAML)
        node_pod1 = pod_obj1.get().get('spec').get('nodeName')

        logger.info(
            f"Creating second pod on node: {self.worker_nodes_list[1]}")

        pod_obj2 = helpers.create_pod(interface_type=self.interface_type,
                                      pvc_name=self.pvc_obj.name,
                                      wait=False,
                                      namespace=self.namespace,
                                      node_name=self.worker_nodes_list[1],
                                      pod_dict_path=constants.NGINX_POD_YAML)
        node_pod2 = pod_obj2.get().get('spec').get('nodeName')

        assert node_pod1 != node_pod2, 'Both pods are on the same node'

        logger.info(f"Running IO on pod {pod_obj1.name}")
        file_name = pod_obj1.name
        pod_obj1.run_io(storage_type=self.storage_type,
                        size=self.io_size,
                        runtime=30,
                        fio_filename=file_name)
        pod.get_fio_rw_iops(pod_obj1)
        md5sum_pod1_data = pod.cal_md5sum(pod_obj=pod_obj1,
                                          file_name=file_name)

        # Verify that second pod is still in Pending state and not able to
        # attain Running state due to expected failure
        assert helpers.wait_for_resource_state(resource=pod_obj2,
                                               state=constants.STATUS_PENDING)
        self.verify_expected_failure_event(
            ocs_obj=pod_obj2, failure_str=self.expected_pod_failure)

        pod_obj1.delete()
        pod_obj1.ocp.wait_for_delete(resource_name=pod_obj1.name)

        # Wait for second pod to be in Running state
        assert helpers.wait_for_resource_state(resource=pod_obj2,
                                               state=constants.STATUS_RUNNING,
                                               timeout=240)

        assert pod.verify_data_integrity(pod_obj=pod_obj2,
                                         file_name=file_name,
                                         original_md5sum=md5sum_pod1_data)

        pod_obj2.run_io(storage_type=self.storage_type,
                        size=self.io_size,
                        runtime=30,
                        fio_filename=pod_obj2.name)
        pod.get_fio_rw_iops(pod_obj2)

        # Again verify data integrity
        assert pod.verify_data_integrity(pod_obj=pod_obj2,
                                         file_name=file_name,
                                         original_md5sum=md5sum_pod1_data)

        pod_obj2.delete()
        pod_obj1.ocp.wait_for_delete(resource_name=pod_obj2.name)
    def test_snapshot_restore_using_different_sc(
        self,
        storageclass_factory,
        snapshot_factory,
        snapshot_restore_factory,
        pod_factory,
    ):
        """
        Test to verify snapshot restore using an SC different than that of parent

        """
        snap_objs = []
        file_name = "file_snapshot"
        # Run IO
        log.info("Start IO on all pods")
        for pod_obj in self.pods:
            pod_obj.run_io(
                storage_type="fs",
                size=f"{self.pvc_size - 1}G",
                runtime=30,
                fio_filename=file_name,
            )
        log.info("IO started on all pods")

        # Wait for IO completion
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            # Get md5sum of the file
            pod_obj.pvc.md5sum = cal_md5sum(pod_obj=pod_obj, file_name=file_name)
        log.info("IO completed on all pods")

        # Create snapshots
        log.info("Create snapshots of all PVCs")
        for pvc_obj in self.pvcs:
            log.info(f"Creating snapshot of PVC {pvc_obj.name}")
            snap_obj = snapshot_factory(pvc_obj, wait=False)
            snap_obj.md5sum = pvc_obj.md5sum
            snap_obj.interface = pvc_obj.interface
            snap_objs.append(snap_obj)
        log.info("Snapshots created")

        # Verify snapshots are Ready
        log.info("Verify snapshots are ready")
        for snap_obj in snap_objs:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=180,
            )

        # Create storage classes.
        sc_objs = {
            constants.CEPHBLOCKPOOL: [
                storageclass_factory(
                    interface=constants.CEPHBLOCKPOOL,
                ).name
            ],
            constants.CEPHFILESYSTEM: [
                storageclass_factory(interface=constants.CEPHFILESYSTEM).name
            ],
        }

        # If ODF >=4.9 create one more storage class that will use new pool
        # to verify the bug 1901954
        if version.get_semantic_ocs_version_from_config() >= version.VERSION_4_9:
            sc_objs[constants.CEPHBLOCKPOOL].append(
                storageclass_factory(
                    interface=constants.CEPHBLOCKPOOL, new_rbd_pool=True
                ).name
            )

        # Create PVCs out of the snapshots
        restore_pvc_objs = []
        log.info("Creating new PVCs from snapshots")
        for snap_obj in snap_objs:
            for storageclass in sc_objs[snap_obj.interface]:
                log.info(f"Creating a PVC from snapshot {snap_obj.name}")
                restore_pvc_obj = snapshot_restore_factory(
                    snapshot_obj=snap_obj,
                    storageclass=storageclass,
                    size=f"{self.pvc_size}Gi",
                    volume_mode=snap_obj.parent_volume_mode,
                    access_mode=snap_obj.parent_access_mode,
                    status="",
                )

                log.info(
                    f"Created PVC {restore_pvc_obj.name} from snapshot {snap_obj.name}."
                    f"Used the storage class {storageclass}"
                )
                restore_pvc_obj.md5sum = snap_obj.md5sum
                restore_pvc_objs.append(restore_pvc_obj)
        log.info("Created new PVCs from all the snapshots")

        # Confirm that the restored PVCs are Bound
        log.info("Verify the restored PVCs are Bound")
        for pvc_obj in restore_pvc_objs:
            wait_for_resource_state(
                resource=pvc_obj, state=constants.STATUS_BOUND, timeout=180
            )
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        # Attach the restored PVCs to pods
        log.info("Attach the restored PVCs to pods")
        restore_pod_objs = []
        for restore_pvc_obj in restore_pvc_objs:
            restore_pod_obj = pod_factory(
                interface=restore_pvc_obj.snapshot.interface,
                pvc=restore_pvc_obj,
                status="",
            )
            log.info(
                f"Attached the PVC {restore_pvc_obj.name} to pod {restore_pod_obj.name}"
            )
            restore_pod_objs.append(restore_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        log.info("Verifying md5sum on new pods")
        for pod_obj in restore_pod_objs:
            log.info(f"Verifying md5sum on pod {pod_obj.name}")
            verify_data_integrity(
                pod_obj=pod_obj,
                file_name=file_name,
                original_md5sum=pod_obj.pvc.snapshot.md5sum,
            )
            log.info(f"Verified md5sum on pod {pod_obj.name}")
        log.info("Verified md5sum on all pods")

        # Run IO on new pods
        log.info("Starting IO on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.run_io(storage_type="fs", size="500M", runtime=15)

        # Wait for IO completion on new pods
        log.info("Waiting for IO completion on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on new pods.")
    def test_worker_node_restart_during_pvc_clone(
        self, nodes, pvc_clone_factory, pod_factory
    ):
        """
        Verify PVC cloning will succeed if a worker node is restarted
        while cloning is in progress

        """
        file_name = "fio_test"
        executor = ThreadPoolExecutor(max_workers=len(self.pvcs) + 1)
        selected_node = node.get_nodes(
            node_type=constants.WORKER_MACHINE, num_of_nodes=1
        )

        # Run IO
        log.info("Starting IO on all pods")
        for pod_obj in self.pods:
            storage_type = (
                "block"
                if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK
                else "fs"
            )
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on all pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum
            file_name_pod = (
                file_name
                if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
                else pod_obj.get_storage_path(storage_type="block")
            )
            pod_obj.pvc.md5sum = pod.cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )

        # Restart node
        log.info(f"Restart node {selected_node[0].name}")
        restart_thread = executor.submit(nodes.restart_nodes, nodes=selected_node)

        log.info("Creating clone of all PVCs.")
        for pvc_obj in self.pvcs:
            log.info(f"Creating clone of {pvc_obj.name}")
            pvc_obj.clone_proc = executor.submit(
                pvc_clone_factory, pvc_obj=pvc_obj, status=""
            )

        # Check result of 'restart_nodes'
        restart_thread.result()

        log.info("Verify status of node.")
        node.wait_for_nodes_status(
            node_names=[node.get_node_name(selected_node[0])],
            status=constants.NODE_READY,
            timeout=300,
        )

        # Get cloned PVCs
        cloned_pvcs = [pvc_obj.clone_proc.result() for pvc_obj in self.pvcs]

        log.info("Verifying cloned PVCs are Bound")
        for pvc_obj in cloned_pvcs:
            wait_for_resource_state(
                resource=pvc_obj, state=constants.STATUS_BOUND, timeout=540
            )
            pvc_obj.reload()
        log.info("Verified: Cloned PVCs are Bound")

        # Attach the cloned PVCs to pods
        log.info("Attach the cloned PVCs to pods")
        clone_pod_objs = []
        for pvc_obj in cloned_pvcs:
            if pvc_obj.volume_mode == "Block":
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            clone_pod_obj = pod_factory(
                interface=pvc_obj.parent.interface,
                pvc=pvc_obj,
                status="",
                pod_dict_path=pod_dict_path,
                raw_block_pv=pvc_obj.volume_mode == "Block",
            )
            log.info(f"Attaching the PVC {pvc_obj.name} to pod {clone_pod_obj.name}")
            clone_pod_objs.append(clone_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in clone_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        for pod_obj in clone_pod_objs:
            file_name_pod = (
                file_name
                if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
                else pod_obj.get_storage_path(storage_type="block")
            )
            pod.verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.parent.md5sum,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                f"matches with the original md5sum"
            )
        log.info("Data integrity check passed on all pods")

        # Run IO
        log.info("Starting IO on the new pods")
        for pod_obj in clone_pod_objs:
            storage_type = (
                "block"
                if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK
                else "fs"
            )
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=f"{file_name}_1",
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on the new pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on the new pods")
        for pod_obj in clone_pod_objs:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
        log.info("IO finished on the new pods")
    def test_resource_deletion_during_snapshot_restore(
            self, snapshot_factory, snapshot_restore_factory, pod_factory):
        """
        Verify PVC snapshot and restore will succeeded if rook-ceph,
        csi pods are re-spun while creating snapshot and while creating
        restore PVC

        """
        pods_to_delete = [
            "rbdplugin_provisioner",
            "cephfsplugin_provisioner",
            "cephfsplugin",
            "rbdplugin",
            "osd",
            "mgr",
        ]
        executor = ThreadPoolExecutor(max_workers=len(self.pvcs) +
                                      len(pods_to_delete))
        disruption_ops = [
            disruption_helpers.Disruptions() for _ in pods_to_delete
        ]
        file_name = "file_snap"

        # Run IO
        log.info("Running fio on all pods to create a file")
        for pod_obj in self.pods:
            storage_type = ("block" if
                            (pod_obj.pvc.volume_mode
                             == constants.VOLUME_MODE_BLOCK) else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=30,
                fio_filename=file_name,
                end_fsync=1,
            )

        log.info("Wait for IO to complete on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"Verified IO on pod {pod_obj.name}")
            # Calculate md5sum
            file_name_pod = (file_name if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            pod_obj.pvc.md5sum = cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(f"md5sum obtained from pod {pod_obj.name}")
        log.info("IO is successful on all pods")

        # Select the pods to be deleted
        for disruption, pod_type in zip(disruption_ops, pods_to_delete):
            # Select snapshotter leader if the pod is provisioner pod
            disruption.set_resource(
                resource=pod_type,
                leader_type="snapshotter" if "provisioner" in pod_type else "",
            )

        log.info("Start taking snapshot of all PVCs.")
        for pvc_obj in self.pvcs:
            log.info(f"Taking snapshot of PVC {pvc_obj.name}")
            pvc_obj.snap_proc = executor.submit(snapshot_factory,
                                                pvc_obj,
                                                wait=False)
        log.info("Started taking snapshot of all PVCs.")

        # Delete the pods 'pods_to_delete'
        log.info(f"Deleting pods {pods_to_delete}")
        for disruption in disruption_ops:
            disruption.delete_proc = executor.submit(
                disruption.delete_resource)

        # Wait for delete and recovery
        [disruption.delete_proc.result() for disruption in disruption_ops]

        # Get snapshots
        snap_objs = []
        for pvc_obj in self.pvcs:
            snap_obj = pvc_obj.snap_proc.result()
            snap_obj.md5sum = pvc_obj.md5sum
            snap_objs.append(snap_obj)

        # Wait for snapshots to be Ready
        log.info("Waiting for all snapshots to be Ready")
        for snap_obj in snap_objs:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=300,
            )
            log.info(f"Snapshot {snap_obj.name} is Ready")
            snap_obj.reload()
        log.info("All snapshots are Ready")

        # Select the pods to be deleted
        for disruption, pod_type in zip(disruption_ops, pods_to_delete):
            disruption.set_resource(resource=pod_type)

        restore_pvc_objs = []

        # Create PVCs out of the snapshots
        log.info("Start creating new PVCs from snapshots")
        for snap_obj in snap_objs:
            log.info(f"Creating a PVC from snapshot {snap_obj.name}")
            snap_obj.restore_proc = executor.submit(
                snapshot_restore_factory,
                snapshot_obj=snap_obj,
                size=f"{self.pvc_size}Gi",
                volume_mode=snap_obj.parent_volume_mode,
                access_mode=snap_obj.parent_access_mode,
                status="",
            )
        log.info("Started creating new PVCs from snapshots")

        # Delete the pods 'pods_to_delete'
        log.info(f"Deleting pods {pods_to_delete}")
        for disruption in disruption_ops:
            disruption.delete_proc = executor.submit(
                disruption.delete_resource)

        # Wait for delete and recovery
        [disruption.delete_proc.result() for disruption in disruption_ops]

        # Get restored PVCs
        for snap_obj in snap_objs:
            restore_pvc_obj = snap_obj.restore_proc.result()
            restore_pvc_objs.append(restore_pvc_obj)
            log.info(f"Created PVC {restore_pvc_obj.name} from snapshot "
                     f"{snap_obj.name}")
        log.info("Created new PVCs from all the snapshots")

        # Confirm that the restored PVCs are Bound
        log.info("Verifying the restored PVCs are Bound")
        for pvc_obj in restore_pvc_objs:
            wait_for_resource_state(resource=pvc_obj,
                                    state=constants.STATUS_BOUND,
                                    timeout=300)
            pvc_obj.reload()
            pvc_obj.volume_mode = pvc_obj.data["spec"]["volumeMode"]
        log.info("Verified: Restored PVCs are Bound.")

        restore_pod_objs = []

        # Attach the restored PVCs to pods
        log.info("Attach the restored PVCs to pods")
        for pvc_obj in restore_pvc_objs:
            if pvc_obj.volume_mode == constants.VOLUME_MODE_BLOCK:
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            restore_pod_obj = pod_factory(
                interface=pvc_obj.interface,
                pvc=pvc_obj,
                status="",
                pod_dict_path=pod_dict_path,
                raw_block_pv=pvc_obj.volume_mode ==
                constants.VOLUME_MODE_BLOCK,
            )
            restore_pod_objs.append(restore_pod_obj)
        log.info("Attach the restored PVCs to pods")

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        log.info("Verify md5sum")
        for pod_obj in restore_pod_objs:
            file_name_pod = (file_name if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.snapshot.md5sum,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                f"matches with the original md5sum")
        log.info("Data integrity check passed on all pods")

        # Run IO
        log.info("Running IO on new pods")
        for pod_obj in restore_pod_objs:
            storage_type = ("block" if
                            (pod_obj.pvc.volume_mode
                             == constants.VOLUME_MODE_BLOCK) else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )

        log.info("Wait for IO to complete on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.get_fio_results()
            log.info(f"Verified IO on new pod {pod_obj.name}")
        log.info("IO to completed on new pods")
Example #19
0
    def test_clone_with_different_access_mode(self, pvc_clone_factory, pod_factory):
        """
        Create clone of a PVC with an access mode different than parent PVC

        """
        file_name = "fio_test"
        access_modes_dict = {
            constants.CEPHBLOCKPOOL: {
                constants.VOLUME_MODE_FILESYSTEM: [constants.ACCESS_MODE_RWO],
                constants.VOLUME_MODE_BLOCK: [
                    constants.ACCESS_MODE_RWX,
                    constants.ACCESS_MODE_RWO,
                ],
            },
            constants.CEPHFILESYSTEM: {
                constants.VOLUME_MODE_FILESYSTEM: [
                    constants.ACCESS_MODE_RWX,
                    constants.ACCESS_MODE_RWO,
                ]
            },
        }

        # Run IO
        log.info("Starting IO on all pods")
        for pod_obj in self.pods:
            storage_type = (
                "block"
                if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK
                else "fs"
            )
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on all pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum
            file_name_pod = (
                file_name
                if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
                else pod_obj.get_storage_path(storage_type="block")
            )
            pod_obj.pvc.md5sum = pod.cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )

        log.info("Creating clone of the PVCs with different access modes")
        cloned_pvcs = []
        for pvc_obj in self.pvcs:
            access_modes = access_modes_dict[pvc_obj.interface][pvc_obj.volume_mode]
            for access_mode in access_modes:
                clone_obj = pvc_clone_factory(
                    pvc_obj=pvc_obj, status="", access_mode=access_mode
                )
                clone_obj.interface = pvc_obj.interface
                log.info(
                    f"Clone {clone_obj.name} created. "
                    f"Parent PVC: {pvc_obj.name}. "
                    f"Parent accessMode: {pvc_obj.get_pvc_access_mode}. "
                    f"Cloned PVC accessMode: {access_mode}"
                )
                cloned_pvcs.append(clone_obj)
        log.info("Created clone of the PVCs with different access modes")

        log.info("Verifying cloned PVCs are Bound")
        for pvc_obj in cloned_pvcs:
            helpers.wait_for_resource_state(
                resource=pvc_obj, state=constants.STATUS_BOUND, timeout=200
            )
            pvc_obj.reload()
        log.info("Verified: Cloned PVCs are Bound")

        # Get worker node names and create an iterator
        nodes_iter = cycle(node.get_worker_nodes())

        # Attach the cloned PVCs to pods
        log.info("Attach the cloned PVCs to pods")
        clone_pod_objs = []
        for pvc_obj in cloned_pvcs:
            if pvc_obj.volume_mode == "Block":
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            # Create 2 pods if access mode is RWX, else 1
            for _ in range(
                int(pvc_obj.get_pvc_access_mode != constants.ACCESS_MODE_RWX), 2
            ):
                clone_pod_obj = pod_factory(
                    interface=pvc_obj.interface,
                    pvc=pvc_obj,
                    status="",
                    node_name=next(nodes_iter),
                    pod_dict_path=pod_dict_path,
                    raw_block_pv=pvc_obj.volume_mode == "Block",
                )
                log.info(
                    f"Attaching the PVC {pvc_obj.name} to pod " f"{clone_pod_obj.name}"
                )
                clone_pod_objs.append(clone_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in clone_pod_objs:
            helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        for pod_obj in clone_pod_objs:
            file_name_pod = (
                file_name
                if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
                else pod_obj.get_storage_path(storage_type="block")
            )
            pod.verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.parent.md5sum,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                f"matches with the original md5sum"
            )
        log.info("Data integrity check passed on all pods")
Example #20
0
    def test_rwo_pvc_fencing_node_prolonged_network_failure(
            self, nodes, setup, node_restart_teardown):
        """
        OCS-1427/OCS-1429:
        - Start DeploymentConfig based app pods on 1 OCS/Non-OCS node
        - Make the node (where app pods are running) unresponsive
            by bringing its main network interface down
        - Check new app pods and/or mon, osd pods scheduled on another node
            are stuck due to Multi-Attach error.
        - Power off the unresponsive node
        - Force delete the app pods and/or mon,osd pods on the unresponsive node
        - Check new app pods and/or mon, osd pods scheduled on another node comes
            into Running state
        - Run IOs on new app pods

        OCS-1430/OCS-1435:
        - Start DeploymentConfig based app pods on multiple node
            Colocated scenario: Select 1 node where osd and/or mon is running,
                select other 2 nodes where mon/osd are not running
            Dedicated scenario: 3 Non-OCS nodes
        - Disrupt the leader provisioner pods if not running on above selected
            nodes
        - Make the nodes (where app pods are running) unresponsive
            by bringing their main network interface down
        - Check new app pods and/or mon, osd pods scheduled on another node
            are stuck due to Multi-Attach error.
        - Power off the unresponsive nodes
        - Force delete the app pods and/or mon,osd pods on the unresponsive node
        - Check new app pods and/or mon, osd pods scheduled on another node comes
            into Running state
        - Run IOs on new app pods

        """
        ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor = setup

        external_mode = helpers.storagecluster_independent_check()
        # Run IO on pods
        md5sum_data = self.run_and_verify_io(pod_list=dc_pods,
                                             fio_filename="io_file1",
                                             run_io_in_bg=True)

        # OCS-1430/OCS-1435
        # Disrupt leader plugin-provisioner pods, skip if running on node to be failed
        if disruptor:
            [disruption.delete_resource() for disruption in disruptor]

        # Induce network failure on the nodes
        node.node_network_failure(app_pod_nodes)
        logger.info(f"Waiting for {self.prolong_nw_fail_time} seconds")
        sleep(self.prolong_nw_fail_time)

        # Wait for pods to be rescheduled
        for pod_obj in dc_pods + ceph_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_TERMINATING,
                resource_name=pod_obj.name)

        # Fetch info of new pods and verify Multi-Attach error
        new_dc_pods = self.get_new_pods(dc_pods)
        assert len(new_dc_pods) == len(
            dc_pods), "Unexpected number of app pods"
        self.verify_multi_attach_error(new_dc_pods)

        if ceph_pods:
            new_ceph_pods = self.get_new_pods(ceph_pods)
            assert len(new_ceph_pods) > 0, "Unexpected number of osd pods"
            self.verify_multi_attach_error(new_ceph_pods)

        logger.info("Executing manual recovery steps")
        # Power off the unresponsive node(s)
        logger.info(f"Powering off the unresponsive node(s): {app_pod_nodes}")
        nodes.stop_nodes(node.get_node_objs(app_pod_nodes))

        # Force delete the app pods and/or mon,osd pods on the unresponsive node
        if float(config.ENV_DATA["ocs_version"]
                 ) < 4.4 and ceph_cluster.mon_count == 5:
            for pod_obj in ceph_cluster.mons:
                if pod.get_pod_node(pod_obj).name in app_pod_nodes:
                    ceph_pods.append(pod_obj)

        for pod_obj in dc_pods + ceph_pods:
            pod_obj.delete(force=True)

        # Wait for new app pods to reach Running state
        for pod_obj in new_dc_pods:
            pod_obj.ocp.wait_for_resource(
                condition=constants.STATUS_RUNNING,
                resource_name=pod_obj.name,
                timeout=1200,
                sleep=30,
            ), (f"App pod with name {pod_obj.name} did not reach Running state"
                )

        if not external_mode:
            # Wait for mon and osd pods to reach Running state
            selectors_to_check = {
                constants.MON_APP_LABEL: self.expected_mon_count,
                constants.OSD_APP_LABEL: ceph_cluster.osd_count,
            }
            for selector, count in selectors_to_check.items():
                assert ceph_cluster.POD.wait_for_resource(
                    condition=constants.STATUS_RUNNING,
                    selector=selector,
                    resource_count=count,
                    timeout=1800,
                    sleep=60,
                ), f"{count} expected pods with selector {selector} are not in Running state"

            if ceph_cluster.mon_count == self.expected_mon_count:
                # Check ceph health
                toolbox_status = ceph_cluster.POD.get_resource_status(
                    ceph_cluster.toolbox.name)
                if toolbox_status == constants.STATUS_TERMINATING:
                    ceph_cluster.toolbox.delete(force=True)

                assert ceph_health_check(), "Ceph cluster health is not OK"
                logger.info("Ceph cluster health is OK")

        # Verify data integrity from new pods
        for num, pod_obj in enumerate(new_dc_pods):
            pod.verify_data_integrity(pod_obj=pod_obj,
                                      file_name="io_file1",
                                      original_md5sum=md5sum_data[num])

        # Run IO on new pods
        self.run_and_verify_io(pod_list=new_dc_pods,
                               fio_filename="io_file2",
                               return_md5sum=False)
Example #21
0
def expand_verify_pvcs(pvc_objs, pod_objs, pvc_size_new, file_name, fio_size):
    """
    Expands size of each PVC in the provided list of PVCs,
    Verifies data integrity by checking the existence and md5sum of file in the expanded PVC
    and
    Runs FIO on expanded PVCs and verifies results.

    Args:
        pvc_objs (list) : List of PVC objects which are to be expanded.
        pod_objs (list) : List of POD objects attached to the PVCs.
        pvc_size_new (int) : Size of the expanded PVC in GB.
        file_name (str) : Name of the file on which FIO is performed.
        fio_size (int) : Size in MB of FIO.

    """
    # Expand original PVCs
    log.info("Started expansion of the PVCs.")
    for pvc_obj in pvc_objs:
        log.info(f"Expanding size of PVC {pvc_obj.name} to {pvc_size_new}G")
        pvc_obj.resize_pvc(pvc_size_new, True)
    log.info("Successfully expanded the PVCs.")

    # Verify that the fio exists and md5sum matches
    for pod_no in range(len(pod_objs)):
        pod_obj = pod_objs[pod_no]
        if pod_obj.pvc.get_pvc_vol_mode == constants.VOLUME_MODE_BLOCK:
            pod.verify_data_integrity_after_expansion_for_block_pvc(
                pod_obj, pvc_objs[pod_no], fio_size)
        else:
            pod.verify_data_integrity(pod_obj, file_name,
                                      pvc_objs[pod_no].md5sum)

    # Run IO to utilize 50% of volume
    log.info(
        "Run IO on all pods to utilise 50% of the expanded PVC used space")
    expanded_file_name = "fio_50"
    for pod_obj in pod_objs:
        log.info(f"Running IO on pod {pod_obj.name}")
        log.info(f"File created during IO {expanded_file_name}")
        fio_size = int(0.50 * pvc_size_new * 1000)
        storage_type = ("block" if pod_obj.pvc.get_pvc_vol_mode
                        == constants.VOLUME_MODE_BLOCK else "fs")
        pod_obj.wl_setup_done = True
        pod_obj.wl_obj = workload.WorkLoad(
            "test_workload_fio",
            pod_obj.get_storage_path(storage_type),
            "fio",
            storage_type,
            pod_obj,
            1,
        )
        pod_obj.run_io(
            storage_type=storage_type,
            size=f"{fio_size}M",
            runtime=20,
            fio_filename=expanded_file_name,
            end_fsync=1,
        )

    log.info("Started IO on all pods to utilise 50% of PVCs")

    for pod_obj in pod_objs:
        # Wait for IO to finish
        pod_obj.get_fio_results(3600)
        log.info(f"IO finished on pod {pod_obj.name}")
        is_block = (True if pod_obj.pvc.get_pvc_vol_mode
                    == constants.VOLUME_MODE_BLOCK else False)
        expanded_file_name_pod = (expanded_file_name if not is_block else
                                  pod_obj.get_storage_path(
                                      storage_type="block"))

        # Verify presence of the file
        expanded_file_path = (expanded_file_name_pod
                              if is_block else pod.get_file_path(
                                  pod_obj, expanded_file_name_pod))
        log.info(f"Actual file path on the pod {expanded_file_path}")
        assert pod.check_file_existence(
            pod_obj, expanded_file_path
        ), f"File {expanded_file_name_pod} does not exist"
        log.info(f"File {expanded_file_name_pod} exists in {pod_obj.name}")
Example #22
0
    def test_pvc_to_pvc_clone(self, interface_type, teardown_factory):
        """
        Create a clone from an existing pvc,
        verify data is preserved in the cloning.
        """
        logger.info(f"Running IO on pod {self.pod_obj.name}")
        file_name = self.pod_obj.name
        logger.info(f"File created during IO {file_name}")
        self.pod_obj.run_io(storage_type="fs",
                            size="500M",
                            fio_filename=file_name)

        # Wait for fio to finish
        self.pod_obj.get_fio_results()
        logger.info(f"Io completed on pod {self.pod_obj.name}.")

        # Verify presence of the file
        file_path = pod.get_file_path(self.pod_obj, file_name)
        logger.info(f"Actual file path on the pod {file_path}")
        assert pod.check_file_existence(
            self.pod_obj, file_path), f"File {file_name} does not exist"
        logger.info(f"File {file_name} exists in {self.pod_obj.name}")

        # Calculate md5sum of the file.
        orig_md5_sum = pod.cal_md5sum(self.pod_obj, file_name)

        # Create a clone of the existing pvc.
        sc_name = self.pvc_obj.backed_sc
        parent_pvc = self.pvc_obj.name
        clone_yaml = constants.CSI_RBD_PVC_CLONE_YAML
        if interface_type == constants.CEPHFILESYSTEM:
            clone_yaml = constants.CSI_CEPHFS_PVC_CLONE_YAML
        cloned_pvc_obj = pvc.create_pvc_clone(sc_name, parent_pvc, clone_yaml)
        teardown_factory(cloned_pvc_obj)
        helpers.wait_for_resource_state(cloned_pvc_obj, constants.STATUS_BOUND)
        cloned_pvc_obj.reload()

        # Create and attach pod to the pvc
        clone_pod_obj = helpers.create_pod(
            interface_type=interface_type,
            pvc_name=cloned_pvc_obj.name,
            namespace=cloned_pvc_obj.namespace,
            pod_dict_path=constants.NGINX_POD_YAML,
        )
        # Confirm that the pod is running
        helpers.wait_for_resource_state(resource=clone_pod_obj,
                                        state=constants.STATUS_RUNNING)
        clone_pod_obj.reload()
        teardown_factory(clone_pod_obj)

        # Verify file's presence on the new pod
        logger.info(f"Checking the existence of {file_name} on cloned pod "
                    f"{clone_pod_obj.name}")
        assert pod.check_file_existence(
            clone_pod_obj, file_path), f"File {file_path} does not exist"
        logger.info(f"File {file_name} exists in {clone_pod_obj.name}")

        # Verify Contents of a file in the cloned pvc
        # by validating if md5sum matches.
        logger.info(f"Verifying that md5sum of {file_name} "
                    f"on pod {self.pod_obj.name} matches with md5sum "
                    f"of the same file on restore pod {clone_pod_obj.name}")
        assert pod.verify_data_integrity(
            clone_pod_obj, file_name,
            orig_md5_sum), "Data integrity check failed"
        logger.info("Data integrity check passed, md5sum are same")

        logger.info("Run IO on new pod")
        clone_pod_obj.run_io(storage_type="fs", size="100M", runtime=10)

        # Wait for IO to finish on the new pod
        clone_pod_obj.get_fio_results()
        logger.info(f"IO completed on pod {clone_pod_obj.name}")
    def test_pvc_snapshot_performance(self, pvc_size):
        """
        1. Run I/O on a pod file
        2. Calculate md5sum of the file
        3. Take a snapshot of the PVC
        4. Measure the total snapshot creation time and the CSI snapshot creation time
        4. Restore From the snapshot and measure the time
        5. Attach a new pod to it
        6. Verify that the file is present on the new pod also
        7. Verify that the md5sum of the file on the new pod matches
           with the md5sum of the file on the original pod

        This scenario run 3 times and report all the average results of the 3 runs
        and will send them to the ES
        Args:
            pvc_size: the size of the PVC to be tested - parametrize

        """

        # Getting the total Storage capacity
        ceph_cluster = CephCluster()
        ceph_capacity = ceph_cluster.get_ceph_capacity()

        log.info(f"Total capacity size is : {ceph_capacity}")
        log.info(f"PVC Size is : {pvc_size}")
        log.info(f"Needed capacity is {int(int(pvc_size) * 5)}")
        if int(ceph_capacity) < int(pvc_size) * 5:
            log.error(
                f"PVC size is {pvc_size}GiB and it is too large for this system"
                f" which have only {ceph_capacity}GiB")
            return
        # Calculating the file size as 25% of the PVC size
        # in the end the PVC will be 75% full
        filesize = self.pvc_obj.size * 0.25
        # Change the file size to MB and from int to str
        file_size = f"{int(filesize * 1024)}M"

        all_results = []

        self.results_path = get_full_test_logs_path(cname=self)
        log.info(f"Logs file path name is : {self.full_log_path}")

        # Produce ES report
        # Collecting environment information
        self.get_env_info()

        # Initialize the results doc file.
        self.full_results = self.init_full_results(
            ResultsAnalyse(
                self.uuid,
                self.crd_data,
                self.full_log_path,
                "pvc_snapshot_perf",
            ))
        self.full_results.add_key("pvc_size", pvc_size + " GiB")
        self.full_results.add_key("interface", self.sc)
        self.full_results.all_results["creation_time"] = []
        self.full_results.all_results["csi_creation_time"] = []
        self.full_results.all_results["creation_speed"] = []
        self.full_results.all_results["restore_time"] = []
        self.full_results.all_results["restore_speed"] = []
        self.full_results.all_results["restore_csi_time"] = []
        for test_num in range(self.tests_numbers):
            test_results = {
                "test_num": test_num + 1,
                "dataset": (test_num + 1) * filesize * 1024,  # size in MiB
                "create": {
                    "time": None,
                    "csi_time": None,
                    "speed": None
                },
                "restore": {
                    "time": None,
                    "speed": None
                },
            }
            log.info(f"Starting test phase number {test_num}")
            # Step 1. Run I/O on a pod file.
            file_name = f"{self.pod_object.name}-{test_num}"
            log.info(f"Starting IO on the POD {self.pod_object.name}")
            # Going to run only write IO to fill the PVC for the snapshot
            self.pod_object.fillup_fs(size=file_size, fio_filename=file_name)

            # Wait for fio to finish
            fio_result = self.pod_object.get_fio_results()
            err_count = fio_result.get("jobs")[0].get("error")
            assert (
                err_count == 0
            ), f"IO error on pod {self.pod_object.name}. FIO result: {fio_result}"
            log.info("IO on the PVC Finished")

            # Verify presence of the file
            file_path = pod.get_file_path(self.pod_object, file_name)
            log.info(f"Actual file path on the pod {file_path}")
            assert pod.check_file_existence(
                self.pod_object, file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {self.pod_object.name}")

            # Step 2. Calculate md5sum of the file.
            orig_md5_sum = pod.cal_md5sum(self.pod_object, file_name)

            # Step 3. Take a snapshot of the PVC and measure the time of creation.
            snap_name = self.pvc_obj.name.replace("pvc-test",
                                                  f"snapshot-test{test_num}")
            log.info(f"Taking snapshot of the PVC {snap_name}")

            start_time = datetime.datetime.utcnow().strftime(
                "%Y-%m-%dT%H:%M:%SZ")

            test_results["create"]["time"] = self.measure_create_snapshot_time(
                pvc_name=self.pvc_obj.name,
                snap_name=snap_name,
                namespace=self.pod_object.namespace,
                interface=self.interface,
                start_time=start_time,
            )

            test_results["create"][
                "csi_time"] = performance_lib.measure_csi_snapshot_creation_time(
                    interface=self.interface,
                    snapshot_id=self.snap_uid,
                    start_time=start_time,
                )

            test_results["create"]["speed"] = int(
                test_results["dataset"] / test_results["create"]["time"])
            log.info(
                f' Test {test_num} dataset is {test_results["dataset"]} MiB')
            log.info(
                f"Snapshot name {snap_name} and id {self.snap_uid} creation time is"
                f' : {test_results["create"]["time"]} sec.')
            log.info(
                f"Snapshot name {snap_name} and id {self.snap_uid} csi creation time is"
                f' : {test_results["create"]["csi_time"]} sec.')
            log.info(
                f'Snapshot speed is : {test_results["create"]["speed"]} MB/sec'
            )

            # Step 4. Restore the PVC from the snapshot and measure the time
            # Same Storage class of the original PVC
            sc_name = self.pvc_obj.backed_sc

            # Size should be same as of the original PVC
            pvc_size = str(self.pvc_obj.size) + "Gi"

            # Create pvc out of the snapshot
            # Both, the snapshot and the restore PVC should be in same namespace

            log.info("Restoring from the Snapshot")
            restore_pvc_name = self.pvc_obj.name.replace(
                "pvc-test", f"restore-pvc{test_num}")
            restore_pvc_yaml = constants.CSI_RBD_PVC_RESTORE_YAML
            if self.interface == constants.CEPHFILESYSTEM:
                restore_pvc_yaml = constants.CSI_CEPHFS_PVC_RESTORE_YAML

            csi_start_time = self.get_time("csi")
            log.info("Restoring the PVC from Snapshot")
            restore_pvc_obj = pvc.create_restore_pvc(
                sc_name=sc_name,
                snap_name=self.snap_obj.name,
                namespace=self.snap_obj.namespace,
                size=pvc_size,
                pvc_name=restore_pvc_name,
                restore_pvc_yaml=restore_pvc_yaml,
            )
            helpers.wait_for_resource_state(
                restore_pvc_obj,
                constants.STATUS_BOUND,
                timeout=3600  # setting this to 60 Min.
                # since it can be take long time to restore, and we want it to finished.
            )
            restore_pvc_obj.reload()
            log.info("PVC was restored from the snapshot")
            test_results["restore"][
                "time"] = helpers.measure_pvc_creation_time(
                    self.interface, restore_pvc_obj.name)

            test_results["restore"]["speed"] = int(
                test_results["dataset"] / test_results["restore"]["time"])
            log.info(
                f'Snapshot restore time is : {test_results["restore"]["time"]}'
            )
            log.info(
                f'restore speed is : {test_results["restore"]["speed"]} MB/sec'
            )

            test_results["restore"][
                "csi_time"] = performance_lib.csi_pvc_time_measure(
                    self.interface, restore_pvc_obj, "create", csi_start_time)
            log.info(
                f'Snapshot csi restore time is : {test_results["restore"]["csi_time"]}'
            )

            # Step 5. Attach a new pod to the restored PVC
            restore_pod_object = helpers.create_pod(
                interface_type=self.interface,
                pvc_name=restore_pvc_obj.name,
                namespace=self.snap_obj.namespace,
            )

            # Confirm that the pod is running
            helpers.wait_for_resource_state(resource=restore_pod_object,
                                            state=constants.STATUS_RUNNING)
            restore_pod_object.reload()

            # Step 6. Verify that the file is present on the new pod also.
            log.info(f"Checking the existence of {file_name} "
                     f"on restore pod {restore_pod_object.name}")
            assert pod.check_file_existence(
                restore_pod_object,
                file_path), f"File {file_name} doesn't exist"
            log.info(f"File {file_name} exists in {restore_pod_object.name}")

            # Step 7. Verify that the md5sum matches
            log.info(
                f"Verifying that md5sum of {file_name} "
                f"on pod {self.pod_object.name} matches with md5sum "
                f"of the same file on restore pod {restore_pod_object.name}")
            assert pod.verify_data_integrity(
                restore_pod_object, file_name,
                orig_md5_sum), "Data integrity check failed"
            log.info("Data integrity check passed, md5sum are same")

            restore_pod_object.delete()
            restore_pvc_obj.delete()

            all_results.append(test_results)

        # clean the enviroment
        self.pod_object.delete()
        self.pvc_obj.delete()
        self.delete_test_project()

        # logging the test summary, all info in one place for easy log reading
        c_speed, c_runtime, c_csi_runtime, r_speed, r_runtime, r_csi_runtime = (
            0 for i in range(6))

        log.info("Test summary :")
        for tst in all_results:
            c_speed += tst["create"]["speed"]
            c_runtime += tst["create"]["time"]
            c_csi_runtime += tst["create"]["csi_time"]
            r_speed += tst["restore"]["speed"]
            r_runtime += tst["restore"]["time"]
            r_csi_runtime += tst["restore"]["csi_time"]

            self.full_results.all_results["creation_time"].append(
                tst["create"]["time"])
            self.full_results.all_results["csi_creation_time"].append(
                tst["create"]["csi_time"])
            self.full_results.all_results["creation_speed"].append(
                tst["create"]["speed"])
            self.full_results.all_results["restore_time"].append(
                tst["restore"]["time"])
            self.full_results.all_results["restore_speed"].append(
                tst["restore"]["speed"])
            self.full_results.all_results["restore_csi_time"].append(
                tst["restore"]["csi_time"])
            self.full_results.all_results["dataset_inMiB"] = tst["dataset"]
            log.info(
                f"Test {tst['test_num']} results : dataset is {tst['dataset']} MiB. "
                f"Take snapshot time is {tst['create']['time']} "
                f"at {tst['create']['speed']} MiB/Sec "
                f"Restore from snapshot time is {tst['restore']['time']} "
                f"at {tst['restore']['speed']} MiB/Sec ")

        avg_snap_c_time = c_runtime / self.tests_numbers
        avg_snap_csi_c_time = c_csi_runtime / self.tests_numbers
        avg_snap_c_speed = c_speed / self.tests_numbers
        avg_snap_r_time = r_runtime / self.tests_numbers
        avg_snap_r_speed = r_speed / self.tests_numbers
        avg_snap_r_csi_time = r_csi_runtime / self.tests_numbers
        log.info(f" Average snapshot creation time is {avg_snap_c_time} sec.")
        log.info(
            f" Average csi snapshot creation time is {avg_snap_csi_c_time} sec."
        )
        log.info(
            f" Average snapshot creation speed is {avg_snap_c_speed} MiB/sec")
        log.info(f" Average snapshot restore time is {avg_snap_r_time} sec.")
        log.info(
            f" Average snapshot restore speed is {avg_snap_r_speed} MiB/sec")
        log.info(
            f" Average snapshot restore csi time is {avg_snap_r_csi_time} sec."
        )

        self.full_results.add_key("avg_snap_creation_time_insecs",
                                  avg_snap_c_time)
        self.full_results.add_key("avg_snap_csi_creation_time_insecs",
                                  avg_snap_csi_c_time)
        self.full_results.add_key("avg_snap_creation_speed", avg_snap_c_speed)
        self.full_results.add_key("avg_snap_restore_time_insecs",
                                  avg_snap_r_time)
        self.full_results.add_key("avg_snap_restore_speed", avg_snap_r_speed)
        self.full_results.add_key("avg_snap_restore_csi_time_insecs",
                                  avg_snap_r_csi_time)

        # Write the test results into the ES server
        log.info("writing results to elastic search server")
        if self.full_results.es_write():
            res_link = self.full_results.results_link()

            # write the ES link to the test results in the test log.
            log.info(f"The result can be found at : {res_link}")

            self.write_result_to_file(res_link)
Example #24
0
    def test_snapshot_at_different_usage_level(self, snapshot_factory,
                                               snapshot_restore_factory,
                                               pod_factory):
        """
        Test to take multiple snapshots of same PVC when the PVC usage is at
        0%, 20%, 40%, 60%, and 80%, then delete the parent PVC and restore the
        snapshots to create new PVCs. Delete snapshots and attach the restored
        PVCs to pods to verify the data.

        """
        snapshots = []
        usage_percent = [0, 20, 40, 60, 80]
        for usage in usage_percent:
            if usage != 0:
                for pod_obj in self.pods:
                    log.info(
                        f"Running IO on pod {pod_obj.name} to utilize {usage}%"
                    )
                    pod_obj.pvc.filename = f'{pod_obj.name}_{usage}'
                    pod_obj.run_io(
                        storage_type='fs',
                        size=f'{int(self.pvc_size/len(usage_percent))}G',
                        runtime=20,
                        fio_filename=pod_obj.pvc.filename)
                log.info(f"IO started on all pods to utilize {usage}%")

                for pod_obj in self.pods:
                    # Wait for fio to finish
                    pod_obj.get_fio_results()
                    log.info(f"IO to utilize {usage}% finished on pod "
                             f"{pod_obj.name}")
                    # Calculate md5sum
                    md5_sum = pod.cal_md5sum(pod_obj, pod_obj.pvc.filename)
                    if not getattr(pod_obj.pvc, 'md5_sum', None):
                        setattr(pod_obj.pvc, 'md5_sum', {})
                    pod_obj.pvc.md5_sum[pod_obj.pvc.filename] = md5_sum

            # Take snapshot of all PVCs
            log.info(f"Creating snapshot of all PVCs at {usage}%")
            for pvc_obj in self.pvcs:
                log.info(
                    f"Creating snapshot of PVC {pvc_obj.name} at {usage}%")
                snap_obj = snapshot_factory(pvc_obj, wait=False)
                # Set a dict containing filename:md5sum for later verification
                setattr(snap_obj, 'md5_sum',
                        deepcopy(getattr(pvc_obj, 'md5_sum', {})))
                snapshots.append(snap_obj)
                log.info(f"Created snapshot of PVC {pvc_obj.name} at {usage}%")
            log.info(f"Created snapshot of all PVCs at {usage}%")
        log.info("Snapshots creation completed.")

        # Verify snapshots are ready
        log.info("Verify snapshots are ready")
        for snapshot in snapshots:
            snapshot.ocp.wait_for_resource(condition='true',
                                           resource_name=snapshot.name,
                                           column=constants.STATUS_READYTOUSE,
                                           timeout=90)

        # Delete pods
        log.info("Deleting the pods")
        for pod_obj in self.pods:
            pod_obj.delete()
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)
        log.info("Deleted all the pods")

        # Delete parent PVCs
        log.info("Deleting parent PVCs")
        for pvc_obj in self.pvcs:
            # TODO: Unblock parent PVC deletion for cephfs PVC when the bug 1854501 is fixed
            if constants.RBD_INTERFACE in pvc_obj.backed_sc:
                pv_obj = pvc_obj.backed_pv_obj
                pvc_obj.delete()
                pvc_obj.ocp.wait_for_delete(resource_name=pvc_obj.name)
                log.info(f"Deleted PVC {pvc_obj.name}. Verifying whether PV "
                         f"{pv_obj.name} is deleted.")
                pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name)
        log.info("Deleted parent PVCs before restoring snapshot. "
                 "PVs are also deleted.")

        restore_pvc_objs = []

        # Create PVCs out of the snapshots
        log.info("Creating new PVCs from snapshots")
        for snapshot in snapshots:
            log.info(f"Creating a PVC from snapshot {snapshot.name}")
            restore_pvc_obj = snapshot_restore_factory(
                snapshot_obj=snapshot,
                size=f'{self.pvc_size}Gi',
                volume_mode=snapshot.parent_volume_mode,
                access_mode=snapshot.parent_access_mode,
                status='')

            log.info(f"Created PVC {restore_pvc_obj.name} from snapshot "
                     f"{snapshot.name}")
            restore_pvc_objs.append(restore_pvc_obj)
        log.info("Created new PVCs from all the snapshots")

        # Confirm that the restored PVCs are Bound
        log.info("Verify the restored PVCs are Bound")
        for pvc_obj in restore_pvc_objs:
            wait_for_resource_state(resource=pvc_obj,
                                    state=constants.STATUS_BOUND,
                                    timeout=90)
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        # Delete volume snapshots
        log.info("Deleting snapshots")
        for snapshot in snapshots:
            snapshot.delete()

        # Verify volume snapshots are deleted
        log.info("Verify snapshots are deleted")
        for snapshot in snapshots:
            snapshot.ocp.wait_for_delete(resource_name=snapshot.name)
        log.info("Verified: Snapshots are deleted")

        # Attach the restored PVCs to pods
        log.info("Attach the restored PVCs to pods")
        restore_pod_objs = []
        for restore_pvc_obj in restore_pvc_objs:
            interface = constants.CEPHFILESYSTEM if (
                constants.CEPHFS_INTERFACE
                in restore_pvc_obj.snapshot.parent_sc
            ) else constants.CEPHBLOCKPOOL
            restore_pod_obj = pod_factory(interface=interface,
                                          pvc=restore_pvc_obj,
                                          status='')
            log.info(f"Attached the PVC {restore_pvc_obj.name} to pod "
                     f"{restore_pod_obj.name}")
            restore_pod_objs.append(restore_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum of files
        log.info("Verifying md5sum of files on all the pods")
        for restore_pod_obj in restore_pod_objs:
            log.info(f"Verifying md5sum of these files on pod "
                     f"{restore_pod_obj.name}:"
                     f"{restore_pod_obj.pvc.snapshot.md5_sum}")
            for file_name, actual_md5_sum in (
                    restore_pod_obj.pvc.snapshot.md5_sum.items()):
                file_path = pod.get_file_path(restore_pod_obj, file_name)
                log.info(f"Checking the existence of file {file_name} on pod "
                         f"{restore_pod_obj.name}")
                assert pod.check_file_existence(
                    restore_pod_obj,
                    file_path), (f"File {file_name} does not exist on pod "
                                 f"{restore_pod_obj.name}")
                log.info(
                    f"File {file_name} exists on pod {restore_pod_obj.name}")

                # Verify that the md5sum matches
                log.info(f"Verifying md5sum of file {file_name} on pod "
                         f"{restore_pod_obj.name}")
                pod.verify_data_integrity(restore_pod_obj, file_name,
                                          actual_md5_sum)
                log.info(f"Verified md5sum of file {file_name} on pod "
                         f"{restore_pod_obj.name}")
            log.info(f"Verified md5sum of these files on pod "
                     f"{restore_pod_obj.name}:"
                     f"{restore_pod_obj.pvc.snapshot.md5_sum}")
        log.info("md5sum verified")
    def test_expansion_snapshot_clone(self, snapshot_factory,
                                      snapshot_restore_factory,
                                      pvc_clone_factory, pod_factory):
        """
        This test performs the following operations :

        Expand parent PVC --> Take snapshot --> Expand parent PVC -->
        Take clone --> Restore snapshot --> Expand cloned and restored PVC -->
        Clone restored PVC --> Snapshot and restore of cloned PVCs -->
        Expand new PVCs

        Data integrity will be checked in each stage as required.
        This test verifies that the clone, snapshot and parent PVCs are
        independent and any operation in one will not impact the other.

        """
        filename = "fio_file"
        filename_restore_clone = "fio_file_restore_clone"
        pvc_size_expand_1 = 4
        pvc_size_expand_2 = 6
        pvc_size_expand_3 = 8
        snapshots = []

        # Run IO
        log.info("Start IO on pods")
        for pod_obj in self.pods:
            log.info(f"Running IO on pod {pod_obj.name}")
            pod_obj.run_io(storage_type="fs",
                           size="1G",
                           runtime=20,
                           fio_filename=filename)
        log.info("IO started on all pods")

        log.info("Wait for IO completion on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum
            md5sum = pod.cal_md5sum(pod_obj, filename)
            pod_obj.pvc.md5sum = md5sum
        log.info("IO completed on all pods")

        # Expand PVCs
        log.info(f"Expanding PVCs to {pvc_size_expand_1}Gi")
        for pvc_obj in self.pvcs:
            log.info(
                f"Expanding size of PVC {pvc_obj.name} to {pvc_size_expand_1}Gi"
            )
            pvc_obj.resize_pvc(pvc_size_expand_1, True)
        log.info(
            f"Verified: Size of all PVCs are expanded to {pvc_size_expand_1}Gi"
        )

        # Take snapshot of all PVCs
        log.info("Creating snapshot of all PVCs")
        for pvc_obj in self.pvcs:
            log.info(f"Creating snapshot of PVC {pvc_obj.name}")
            snap_obj = snapshot_factory(pvc_obj, wait=False)
            snap_obj.md5sum = pvc_obj.md5sum
            snapshots.append(snap_obj)
            log.info(f"Created snapshot of PVC {pvc_obj.name}")
        log.info("Created snapshot of all PVCs")

        # Verify snapshots are ready
        log.info("Verify snapshots are ready")
        for snap_obj in snapshots:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=180,
            )
            snap_obj.reload()
        log.info("Verified: Snapshots are Ready")

        # Expand PVCs
        log.info(f"Expanding PVCs to {pvc_size_expand_2}Gi")
        for pvc_obj in self.pvcs:
            log.info(
                f"Expanding size of PVC {pvc_obj.name} to {pvc_size_expand_2}Gi"
            )
            pvc_obj.resize_pvc(pvc_size_expand_2, True)
        log.info(
            f"Verified: Size of all PVCs are expanded to {pvc_size_expand_2}Gi"
        )

        # Clone PVCs
        log.info("Creating clone of all PVCs")
        clone_objs = []
        for pvc_obj in self.pvcs:
            log.info(f"Creating clone of PVC {pvc_obj.name}")
            clone_obj = pvc_clone_factory(
                pvc_obj=pvc_obj,
                status="",
                volume_mode=constants.VOLUME_MODE_FILESYSTEM)
            clone_obj.md5sum = pvc_obj.md5sum
            clone_objs.append(clone_obj)
            log.info(f"Created clone of PVC {pvc_obj.name}")
        log.info("Created clone of all PVCs")

        log.info("Wait for cloned PVcs to reach Bound state and verify size")
        for pvc_obj in clone_objs:
            helpers.wait_for_resource_state(resource=pvc_obj,
                                            state=constants.STATUS_BOUND,
                                            timeout=180)
            assert pvc_obj.size == pvc_size_expand_2, (
                f"Size is not {pvc_size_expand_2} but {pvc_obj.size} in "
                f"cloned PVC {pvc_obj.name}")
        log.info(
            f"Cloned PVCs reached Bound state. Verified the size of all PVCs "
            f"as {pvc_size_expand_2}Gi")

        # Ensure restore size is not impacted by parent PVC expansion
        log.info("Verify restore size of snapshots")
        for snapshot_obj in snapshots:
            snapshot_info = snapshot_obj.get()
            assert snapshot_info["status"]["restoreSize"] == (
                f"{pvc_size_expand_1}Gi"), (
                    f"Restore size mismatch in snapshot {snapshot_obj.name}\n"
                    f"{snapshot_info}")
        log.info(
            f"Verified: Restore size of all snapshots are {pvc_size_expand_1}Gi"
        )

        # Restore snapshots
        log.info("Restore snapshots")
        restore_objs = []
        for snap_obj in snapshots:
            restore_obj = snapshot_restore_factory(snapshot_obj=snap_obj,
                                                   status="")
            restore_obj.md5sum = snap_obj.md5sum
            restore_objs.append(restore_obj)

        log.info("Verify restored PVCs are Bound")
        for pvc_obj in restore_objs:
            helpers.wait_for_resource_state(resource=pvc_obj,
                                            state=constants.STATUS_BOUND,
                                            timeout=180)
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        # Attach the restored and cloned PVCs to pods
        log.info("Attach the restored and cloned PVCs to pods")
        restore_clone_pod_objs = []
        for pvc_obj in restore_objs + clone_objs:
            interface = (constants.CEPHFILESYSTEM if
                         (constants.CEPHFS_INTERFACE
                          in pvc_obj.backed_sc) else constants.CEPHBLOCKPOOL)
            pod_obj = pod_factory(interface=interface, pvc=pvc_obj, status="")
            log.info(f"Attached the PVC {pvc_obj.name} to pod {pod_obj.name}")
            restore_clone_pod_objs.append(pod_obj)

        log.info("Verify pods are Running")
        for pod_obj in restore_clone_pod_objs:
            helpers.wait_for_resource_state(resource=pod_obj,
                                            state=constants.STATUS_RUNNING,
                                            timeout=180)
            pod_obj.reload()
        log.info("Verified: Pods reached Running state")

        # Expand cloned and restored PVCs
        log.info(
            f"Expanding cloned and restored PVCs to {pvc_size_expand_3}Gi")
        for pvc_obj in clone_objs + restore_objs:
            log.info(f"Expanding size of PVC {pvc_obj.name} to "
                     f"{pvc_size_expand_3}Gi from {pvc_obj.size}")
            pvc_obj.resize_pvc(pvc_size_expand_3, True)
        log.info(
            f"Verified: Size of all cloned and restored PVCs are expanded to "
            f"{pvc_size_expand_3}G")

        # Run IO on pods attached with cloned and restored PVCs
        log.info("Starting IO on pods attached with cloned and restored PVCs")
        for pod_obj in restore_clone_pod_objs:
            log.info(f"Running IO on pod {pod_obj.name}")
            pod_obj.run_io(
                storage_type="fs",
                size="1G",
                runtime=20,
                fio_filename=filename_restore_clone,
            )
        log.info("IO started on all pods")

        log.info("Waiting for IO completion on pods attached with cloned and "
                 "restored PVCs")
        for pod_obj in restore_clone_pod_objs:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum of second file 'filename_restore_clone'
            md5sum = pod.cal_md5sum(pod_obj, filename_restore_clone)
            pod_obj.pvc.md5sum_new = md5sum
        log.info(f"IO completed on all pods. Obtained md5sum of file "
                 f"{filename_restore_clone}")

        # Verify md5sum of first file 'filename'
        log.info(f"Verify md5sum of file {filename} on pods")
        for pod_obj in restore_clone_pod_objs:
            pod.verify_data_integrity(pod_obj, filename, pod_obj.pvc.md5sum)
            log.info(f"Verified: md5sum of {filename} on pod {pod_obj.name} "
                     f"matches with the original md5sum")
        log.info("Data integrity check passed on all pods where restored and "
                 "cloned PVCs are attached")

        # Clone the restored PVCs
        log.info("Creating clone of restored PVCs")
        restored_clone_objs = []
        for pvc_obj in restore_objs:
            log.info(f"Creating clone of restored PVC {pvc_obj.name}")
            clone_obj = pvc_clone_factory(
                pvc_obj=pvc_obj,
                status="",
                volume_mode=constants.VOLUME_MODE_FILESYSTEM)
            clone_obj.md5sum = pvc_obj.md5sum
            clone_obj.md5sum_new = pvc_obj.md5sum_new
            restored_clone_objs.append(clone_obj)
            log.info(f"Created clone of restored PVC {pvc_obj.name}")
        log.info("Created clone of restored all PVCs")

        log.info("Wait for cloned PVcs to reach Bound state and verify size")
        for pvc_obj in restored_clone_objs:
            helpers.wait_for_resource_state(resource=pvc_obj,
                                            state=constants.STATUS_BOUND,
                                            timeout=180)
            assert pvc_obj.size == pvc_size_expand_3, (
                f"Size is not {pvc_size_expand_3} but {pvc_obj.size} in "
                f"cloned PVC {pvc_obj.name}")
        log.info(
            f"Cloned PVCs reached Bound state. Verified the size of all PVCs "
            f"as {pvc_size_expand_3}Gi")

        # Take snapshot of all cloned PVCs
        snapshots_new = []
        log.info("Creating snapshot of all cloned PVCs")
        for pvc_obj in clone_objs + restored_clone_objs:
            log.info(f"Creating snapshot of PVC {pvc_obj.name}")
            snap_obj = snapshot_factory(pvc_obj, wait=False)
            snap_obj.md5sum = pvc_obj.md5sum
            snap_obj.md5sum_new = pvc_obj.md5sum_new
            snapshots_new.append(snap_obj)
            log.info(f"Created snapshot of PVC {pvc_obj.name}")
        log.info("Created snapshot of all cloned PVCs")

        # Verify snapshots are ready
        log.info("Verify snapshots of cloned PVCs are Ready")
        for snap_obj in snapshots_new:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=180,
            )
            snap_obj.reload()
        log.info("Verified: Snapshots of cloned PVCs are Ready")

        # Restore snapshots
        log.info("Restoring snapshots of cloned PVCs")
        restore_objs_new = []
        for snap_obj in snapshots_new:
            restore_obj = snapshot_restore_factory(snap_obj, status="")
            restore_obj.md5sum = snap_obj.md5sum
            restore_obj.md5sum_new = snap_obj.md5sum_new
            restore_objs_new.append(restore_obj)

        log.info("Verify restored PVCs are Bound")
        for pvc_obj in restore_objs_new:
            helpers.wait_for_resource_state(resource=pvc_obj,
                                            state=constants.STATUS_BOUND,
                                            timeout=180)
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        # Delete pods to attach the cloned PVCs to new pods
        log.info("Delete pods")
        for pod_obj in restore_clone_pod_objs:
            pod_obj.delete()

        for pod_obj in restore_clone_pod_objs:
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)
        log.info("Pods are deleted")

        # Attach the restored and cloned PVCs to new pods
        log.info("Attach the restored and cloned PVCs to new pods")
        restore_clone_pod_objs.clear()
        for pvc_obj in restore_objs_new + clone_objs:
            interface = (constants.CEPHFILESYSTEM if
                         (constants.CEPHFS_INTERFACE
                          in pvc_obj.backed_sc) else constants.CEPHBLOCKPOOL)
            pod_obj = pod_factory(interface=interface, pvc=pvc_obj, status="")
            log.info(f"Attached the PVC {pvc_obj.name} to pod {pod_obj.name}")
            restore_clone_pod_objs.append(pod_obj)

        log.info("Verify pods are Running")
        for pod_obj in restore_clone_pod_objs:
            helpers.wait_for_resource_state(resource=pod_obj,
                                            state=constants.STATUS_RUNNING,
                                            timeout=180)
            pod_obj.reload()
        log.info("Verified: Pods reached Running state")

        # Expand PVCs
        pvc_size_expand_4 = pvc_size_expand_3 + 2
        log.info(
            f"Expanding restored and cloned PVCs to {pvc_size_expand_4}Gi")
        for pvc_obj in restore_objs_new + clone_objs:
            log.info(
                f"Expanding size of PVC {pvc_obj.name} to {pvc_size_expand_4}Gi"
            )
            pvc_obj.resize_pvc(pvc_size_expand_4, True)
        log.info(
            f"Verified: Size of all PVCs are expanded to {pvc_size_expand_4}Gi"
        )

        # Verify md5sum of both files
        log.info(
            f"Verify md5sum of files {filename} and {filename_restore_clone}")
        for pod_obj in restore_clone_pod_objs:
            pod.verify_data_integrity(pod_obj, filename, pod_obj.pvc.md5sum)
            log.info(f"Verified: md5sum of {filename} on pod {pod_obj.name} "
                     f"matches with the original md5sum")
            pod.verify_data_integrity(pod_obj, filename_restore_clone,
                                      pod_obj.pvc.md5sum_new)
            log.info(f"Verified: md5sum of {filename_restore_clone} on pod "
                     f"{pod_obj.name} matches with the original md5sum")
        log.info("Data integrity check passed on all pods where restored and "
                 "cloned PVCs are attached")
Example #26
0
    def test_resource_deletion_during_pvc_clone(self, pvc_clone_factory,
                                                pod_factory):
        """
        Verify PVC clone will succeeded if rook-ceph, csi pods are re-spun
        while creating the clone

        """
        pods_to_delete = [
            "rbdplugin_provisioner",
            "cephfsplugin_provisioner",
            "cephfsplugin",
            "rbdplugin",
            "osd",
            "mgr",
        ]
        executor = ThreadPoolExecutor(max_workers=len(self.pvcs) +
                                      len(pods_to_delete))
        disruption_ops = [
            disruption_helpers.Disruptions() for _ in pods_to_delete
        ]
        file_name = "file_clone"

        # Run IO
        log.info("Running fio on all pods to create a file")
        for pod_obj in self.pods:
            storage_type = ("block" if
                            (pod_obj.pvc.volume_mode
                             == constants.VOLUME_MODE_BLOCK) else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=30,
                fio_filename=file_name,
                end_fsync=1,
            )

        log.info("Wait for IO to complete on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"Verified IO on pod {pod_obj.name}")
            # Calculate md5sum
            file_name_pod = (file_name if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            pod_obj.pvc.md5sum = cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(f"md5sum obtained from pod {pod_obj.name}")
        log.info("IO is successful on all pods")

        # Select the pods to be deleted
        for disruption, pod_type in zip(disruption_ops, pods_to_delete):
            cluster_index = None
            # 'provider_index' will not be None if the platform is Managed Services
            if self.provider_index is not None:
                if pod_type in ["osd", "mgr"]:
                    cluster_index = self.provider_index
                    config.switch_to_provider()
                else:
                    cluster_index = self.consumer_index
                    config.switch_ctx(cluster_index)

            disruption.set_resource(resource=pod_type,
                                    cluster_index=cluster_index)

        # Switch cluster context if the platform is MS. 'provider_index' will not be None if platform is MS.
        if self.provider_index is not None:
            config.switch_ctx(self.consumer_index)

        # Clone PVCs
        log.info("Start creating clone of PVCs")
        for pvc_obj in self.pvcs:
            log.info(f"Creating clone of PVC {pvc_obj.name}")
            pvc_obj.clone_proc = executor.submit(
                pvc_clone_factory,
                pvc_obj=pvc_obj,
                status="",
                access_mode=pvc_obj.get_pvc_access_mode,
                volume_mode=pvc_obj.volume_mode,
            )
        log.info("Started creating clone")

        # Delete the pods 'pods_to_delete'
        log.info(f"Deleting pods {pods_to_delete}")
        for disruption in disruption_ops:
            disruption.delete_proc = executor.submit(
                disruption.delete_resource)

        # Wait for delete and recovery
        [disruption.delete_proc.result() for disruption in disruption_ops]

        # Get cloned PVCs
        clone_pvc_objs = []
        for pvc_obj in self.pvcs:
            clone_obj = pvc_obj.clone_proc.result()
            clone_pvc_objs.append(clone_obj)
            log.info(f"Created clone {clone_obj.name} of PVC {pvc_obj.name}")
        log.info("Created clone of all PVCs")

        # Confirm that the cloned PVCs are Bound
        log.info("Verifying the cloned PVCs are Bound")
        for pvc_obj in clone_pvc_objs:
            wait_for_resource_state(resource=pvc_obj,
                                    state=constants.STATUS_BOUND,
                                    timeout=300)
            pvc_obj.reload()
            pvc_obj.volume_mode = pvc_obj.data["spec"]["volumeMode"]
        log.info("Verified: Cloned PVCs are Bound.")

        clone_pod_objs = []

        # Attach the cloned PVCs to pods
        log.info("Attach the cloned PVCs to pods")
        for pvc_obj in clone_pvc_objs:
            if pvc_obj.volume_mode == constants.VOLUME_MODE_BLOCK:
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            restore_pod_obj = pod_factory(
                interface=pvc_obj.interface,
                pvc=pvc_obj,
                status="",
                pod_dict_path=pod_dict_path,
                raw_block_pv=pvc_obj.volume_mode ==
                constants.VOLUME_MODE_BLOCK,
            )
            clone_pod_objs.append(restore_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in clone_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        log.info("Verify md5sum")
        for pod_obj in clone_pod_objs:
            file_name_pod = (file_name if
                             (pod_obj.pvc.volume_mode
                              == constants.VOLUME_MODE_FILESYSTEM) else
                             pod_obj.get_storage_path(storage_type="block"))
            verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.parent.md5sum,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                f"matches with the original md5sum")
        log.info("Data integrity check passed on all pods")

        # Run IO
        log.info("Running IO on new pods")
        for pod_obj in clone_pod_objs:
            storage_type = ("block" if
                            (pod_obj.pvc.volume_mode
                             == constants.VOLUME_MODE_BLOCK) else "fs")
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )

        log.info("Wait for IO to complete on new pods")
        for pod_obj in clone_pod_objs:
            pod_obj.get_fio_results()
            log.info(f"Verified IO on new pod {pod_obj.name}")
        log.info("IO to completed on new pods")
    def test_encrypted_rbd_block_pvc_snapshot(
        self,
        kms_provider,
        snapshot_factory,
        snapshot_restore_factory,
        pod_factory,
        kv_version,
    ):
        """
        Test to take snapshots of encrypted RBD Block VolumeMode PVCs

        """

        log.info(
            "Check for encrypted device, find initial md5sum value and run IO on all pods"
        )
        for vol_handle, pod_obj in zip(self.vol_handles, self.pod_objs):

            # Verify whether encrypted device is present inside the pod
            if pod_obj.exec_sh_cmd_on_pod(
                    command=f"lsblk | grep {vol_handle} | grep crypt"):
                log.info(f"Encrypted device found in {pod_obj.name}")
            else:
                raise ResourceNotFoundError(
                    f"Encrypted device not found in {pod_obj.name}")

            # Find initial md5sum
            pod_obj.md5sum_before_io = cal_md5sum(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                block=True,
            )
            pod_obj.run_io(
                storage_type="block",
                size=f"{self.pvc_size - 1}G",
                io_direction="write",
                runtime=60,
            )
        log.info("IO started on all pods")

        # Wait for IO completion
        for pod_obj in self.pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on all pods")

        snap_objs, snap_handles = ([] for i in range(2))

        # Verify md5sum has changed after IO. Create snapshot
        log.info(
            "Verify md5sum has changed after IO and create snapshot from all PVCs"
        )
        for pod_obj in self.pod_objs:
            md5sum_after_io = cal_md5sum(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                block=True,
            )
            assert (pod_obj.md5sum_before_io != md5sum_after_io
                    ), f"md5sum has not changed after IO on pod {pod_obj.name}"
            log.info(f"Creating snapshot of PVC {pod_obj.pvc.name}")
            snap_obj = snapshot_factory(pod_obj.pvc, wait=False)
            snap_obj.md5sum = md5sum_after_io
            snap_objs.append(snap_obj)
        log.info("Snapshots created")

        # Verify snapshots are ready and verify if encryption key is created in vault
        log.info("Verify snapshots are ready")
        for snap_obj in snap_objs:
            snap_obj.ocp.wait_for_resource(
                condition="true",
                resource_name=snap_obj.name,
                column=constants.STATUS_READYTOUSE,
                timeout=180,
            )
            snapshot_content = get_snapshot_content_obj(snap_obj=snap_obj)
            snap_handle = snapshot_content.get().get("status").get(
                "snapshotHandle")
            if kms_provider == constants.VAULT_KMS_PROVIDER:
                if kms.is_key_present_in_path(
                        key=snap_handle, path=self.kms.vault_backend_path):
                    log.info(f"Vault: Found key for snapshot {snap_obj.name}")
                else:
                    raise ResourceNotFoundError(
                        f"Vault: Key not found for snapshot {snap_obj.name}")
            snap_handles.append(snap_handle)

        # Delete pods
        log.info("Deleting the pods")
        for pod_obj in self.pod_objs:
            pod_obj.delete()
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)
        log.info("Deleted all the pods")

        # Delete parent PVCs to verify snapshot is independent
        log.info("Deleting parent PVCs")
        for pvc_obj in self.pvc_objs:
            pv_obj = pvc_obj.backed_pv_obj
            pvc_obj.delete()
            pvc_obj.ocp.wait_for_delete(resource_name=pvc_obj.name)
            log.info(f"Deleted PVC {pvc_obj.name}. Verifying whether PV "
                     f"{pv_obj.name} is deleted.")
            pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name)
        log.info(
            "All parent PVCs and PVs are deleted before restoring snapshot.")

        restore_pvc_objs, restore_vol_handles = ([] for i in range(2))

        # Create PVCs out of the snapshots
        log.info("Creating new PVCs from snapshots")
        for snap_obj in snap_objs:
            log.info(f"Creating a PVC from snapshot {snap_obj.name}")
            restore_pvc_obj = snapshot_restore_factory(
                snapshot_obj=snap_obj,
                storageclass=self.sc_obj.name,
                size=f"{self.pvc_size}Gi",
                volume_mode=snap_obj.parent_volume_mode,
                access_mode=snap_obj.parent_access_mode,
                status="",
            )
            log.info(f"Created PVC {restore_pvc_obj.name} from snapshot "
                     f"{snap_obj.name}")
            restore_pvc_obj.md5sum = snap_obj.md5sum
            restore_pvc_objs.append(restore_pvc_obj)
        log.info("Created new PVCs from all the snapshots")

        # Confirm that the restored PVCs are Bound
        log.info("Verify the restored PVCs are Bound")
        for pvc_obj in restore_pvc_objs:
            wait_for_resource_state(resource=pvc_obj,
                                    state=constants.STATUS_BOUND,
                                    timeout=180)
            pvc_obj.reload()
        log.info("Verified: Restored PVCs are Bound.")

        # Attach the restored PVCs to pods. Attach RWX PVC on two pods
        log.info("Attach the restored PVCs to pods")
        restore_pod_objs = create_pods(
            restore_pvc_objs,
            pod_factory,
            constants.CEPHBLOCKPOOL,
            pods_for_rwx=1,
            status="",
        )

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in restore_pod_objs:
            timeout = (300 if config.ENV_DATA["platform"]
                       == constants.IBMCLOUD_PLATFORM else 60)
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING, timeout)
        log.info("Verified: New pods are running")

        # Verify encryption keys are created for restored PVCs in Vault
        for pvc_obj in restore_pvc_objs:
            pv_obj = pvc_obj.backed_pv_obj
            vol_handle = pv_obj.get().get("spec").get("csi").get(
                "volumeHandle")
            restore_vol_handles.append(vol_handle)
            if kms_provider == constants.VAULT_KMS_PROVIDER:
                if kms.is_key_present_in_path(
                        key=vol_handle, path=self.kms.vault_backend_path):
                    log.info(
                        f"Vault: Found key for restore PVC {pvc_obj.name}")
                else:
                    raise ResourceNotFoundError(
                        f"Vault: Key not found for restored PVC {pvc_obj.name}"
                    )

        # Verify encrypted device is present and md5sum on all pods
        for vol_handle, pod_obj in zip(restore_vol_handles, restore_pod_objs):
            if pod_obj.exec_sh_cmd_on_pod(
                    command=f"lsblk | grep {vol_handle} | grep crypt"):
                log.info(f"Encrypted device found in {pod_obj.name}")
            else:
                raise ResourceNotFoundError(
                    f"Encrypted device not found in {pod_obj.name}")

            log.info(f"Verifying md5sum on pod {pod_obj.name}")
            verify_data_integrity(
                pod_obj=pod_obj,
                file_name=pod_obj.get_storage_path(storage_type="block"),
                original_md5sum=pod_obj.pvc.md5sum,
                block=True,
            )
            log.info(f"Verified md5sum on pod {pod_obj.name}")

        # Run IO on new pods
        log.info("Starting IO on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.run_io(storage_type="block", size="500M", runtime=15)

        # Wait for IO completion on new pods
        log.info("Waiting for IO completion on new pods")
        for pod_obj in restore_pod_objs:
            pod_obj.get_fio_results()
        log.info("IO completed on new pods.")

        # Delete the restored pods, PVC and snapshots
        log.info("Deleting pods using restored PVCs")
        for pod_obj in restore_pod_objs:
            pod_obj.delete()
            pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name)

        log.info("Deleting restored PVCs")
        for pvc_obj in restore_pvc_objs:
            pv_obj = pvc_obj.backed_pv_obj
            pvc_obj.delete()
            pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name)

        log.info("Deleting the snapshots")
        for snap_obj in snap_objs:
            snapcontent_obj = get_snapshot_content_obj(snap_obj=snap_obj)
            snap_obj.delete()
            snapcontent_obj.ocp.wait_for_delete(
                resource_name=snapcontent_obj.name)

        if kms_provider == constants.VAULT_KMS_PROVIDER:
            # Verify if keys for PVCs and snapshots are deleted from  Vault
            if kv_version == "v1" or Version.coerce(
                    config.ENV_DATA["ocs_version"]) >= Version.coerce("4.9"):
                log.info(
                    "Verify whether the keys for PVCs and snapshots are deleted in vault"
                )
                for key in self.vol_handles + snap_handles + restore_vol_handles:
                    if not kms.is_key_present_in_path(
                            key=key, path=self.kms.vault_backend_path):
                        log.info(f"Vault: Key deleted for {key}")
                    else:
                        raise KMSResourceCleaneupError(
                            f"Vault: Key deletion failed for {key}")
                log.info("All keys from vault were deleted")