Ejemplo n.º 1
0
def osd_node_reboot():
    """
    Rebooting worker node that running OSD

    Raises:
        AssertionError: in case the ceph-tools pod was not recovered

    """
    nodes = PlatformNodesFactory().get_nodes_platform()
    osd_nodes_names = get_osd_running_nodes()
    osd_node_to_reboot = list()
    for node in get_nodes():
        node_name = get_node_name(node)
        if node_name == osd_nodes_names[0]:
            osd_node_to_reboot.append(node)
    log.info(f"Rebooting OSD node: {get_node_name(osd_node_to_reboot[0])}")
    nodes.restart_nodes(osd_node_to_reboot)

    log.info("Sleeping 5 minutes")
    time.sleep(320)
    assert (
        wait_for_ct_pod_recovery()
    ), "Ceph tools pod failed to come up on another node"
    def test_worker_node_restart_during_pvc_clone(
        self, nodes, pvc_clone_factory, pod_factory
    ):
        """
        Verify PVC cloning will succeed if a worker node is restarted
        while cloning is in progress

        """
        file_name = "fio_test"
        executor = ThreadPoolExecutor(max_workers=len(self.pvcs) + 1)
        selected_node = node.get_nodes(
            node_type=constants.WORKER_MACHINE, num_of_nodes=1
        )

        # Run IO
        log.info("Starting IO on all pods")
        for pod_obj in self.pods:
            storage_type = (
                "block"
                if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK
                else "fs"
            )
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=file_name,
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on all pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on pods")
        for pod_obj in self.pods:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
            # Calculate md5sum
            file_name_pod = (
                file_name
                if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
                else pod_obj.get_storage_path(storage_type="block")
            )
            pod_obj.pvc.md5sum = pod.cal_md5sum(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )

        # Restart node
        log.info(f"Restart node {selected_node[0].name}")
        restart_thread = executor.submit(nodes.restart_nodes, nodes=selected_node)

        log.info("Creating clone of all PVCs.")
        for pvc_obj in self.pvcs:
            log.info(f"Creating clone of {pvc_obj.name}")
            pvc_obj.clone_proc = executor.submit(
                pvc_clone_factory, pvc_obj=pvc_obj, status=""
            )

        # Check result of 'restart_nodes'
        restart_thread.result()

        log.info("Verify status of node.")
        node.wait_for_nodes_status(
            node_names=[node.get_node_name(selected_node[0])],
            status=constants.NODE_READY,
            timeout=300,
        )

        # Get cloned PVCs
        cloned_pvcs = [pvc_obj.clone_proc.result() for pvc_obj in self.pvcs]

        log.info("Verifying cloned PVCs are Bound")
        for pvc_obj in cloned_pvcs:
            wait_for_resource_state(
                resource=pvc_obj, state=constants.STATUS_BOUND, timeout=540
            )
            pvc_obj.reload()
        log.info("Verified: Cloned PVCs are Bound")

        # Attach the cloned PVCs to pods
        log.info("Attach the cloned PVCs to pods")
        clone_pod_objs = []
        for pvc_obj in cloned_pvcs:
            if pvc_obj.volume_mode == "Block":
                pod_dict_path = constants.CSI_RBD_RAW_BLOCK_POD_YAML
            else:
                pod_dict_path = ""
            clone_pod_obj = pod_factory(
                interface=pvc_obj.parent.interface,
                pvc=pvc_obj,
                status="",
                pod_dict_path=pod_dict_path,
                raw_block_pv=pvc_obj.volume_mode == "Block",
            )
            log.info(f"Attaching the PVC {pvc_obj.name} to pod {clone_pod_obj.name}")
            clone_pod_objs.append(clone_pod_obj)

        # Verify the new pods are running
        log.info("Verify the new pods are running")
        for pod_obj in clone_pod_objs:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
        log.info("Verified: New pods are running")

        # Verify md5sum
        for pod_obj in clone_pod_objs:
            file_name_pod = (
                file_name
                if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
                else pod_obj.get_storage_path(storage_type="block")
            )
            pod.verify_data_integrity(
                pod_obj,
                file_name_pod,
                pod_obj.pvc.parent.md5sum,
                pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
            )
            log.info(
                f"Verified: md5sum of {file_name_pod} on pod {pod_obj.name} "
                f"matches with the original md5sum"
            )
        log.info("Data integrity check passed on all pods")

        # Run IO
        log.info("Starting IO on the new pods")
        for pod_obj in clone_pod_objs:
            storage_type = (
                "block"
                if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK
                else "fs"
            )
            pod_obj.run_io(
                storage_type=storage_type,
                size="1G",
                runtime=20,
                fio_filename=f"{file_name}_1",
                end_fsync=1,
            )
            log.info(f"IO started on pod {pod_obj.name}")
        log.info("Started IO on the new pods")

        # Wait for IO to finish
        log.info("Wait for IO to finish on the new pods")
        for pod_obj in clone_pod_objs:
            pod_obj.get_fio_results()
            log.info(f"IO finished on pod {pod_obj.name}")
        log.info("IO finished on the new pods")
Ejemplo n.º 3
0
    def test_worker_node_restart_during_pvc_expansion(self, nodes):
        """
        Verify PVC expansion will succeed if a worker node is restarted
        during expansion

        """
        pvc_size_expanded = 30
        executor = ThreadPoolExecutor(max_workers=len(self.pods))
        selected_node = node.get_nodes(node_type=constants.WORKER_MACHINE,
                                       num_of_nodes=1)

        # Restart node
        log.info(f"Restart node {selected_node[0].name}")
        restart_thread = executor.submit(nodes.restart_nodes,
                                         nodes=selected_node)

        log.info("Expanding all PVCs.")
        for pvc_obj in self.pvcs:
            log.info(
                f"Expanding size of PVC {pvc_obj.name} to {pvc_size_expanded}G"
            )
            pvc_obj.expand_proc = executor.submit(pvc_obj.resize_pvc,
                                                  pvc_size_expanded, False)

        # Check result of node 'restart_nodes'
        restart_thread.result()

        log.info("Verify status of node.")
        node.wait_for_nodes_status(
            node_names=[node.get_node_name(selected_node[0])],
            status=constants.NODE_READY,
            timeout=300,
        )

        # Find respun pods
        new_pods_list = []
        wait_to_stabilize = True
        for pod_obj in self.pods:
            new_pods = get_all_pods(
                namespace=pod_obj.namespace,
                selector=[pod_obj.labels.get("deploymentconfig")],
                selector_label="deploymentconfig",
                wait=wait_to_stabilize,
            )
            for pod_ob in new_pods:
                pod_ob.pvc = pod_obj.pvc
            new_pods_list.extend(new_pods)
            # Given enough time for pods to respin. So wait time
            # is not needed for further iterations
            wait_to_stabilize = False
        assert len(new_pods_list) == len(
            self.pods), "Couldn't find all pods after node reboot"

        # Verify PVC expansion status
        for pvc_obj in self.pvcs:
            assert (pvc_obj.expand_proc.result()
                    ), f"Expansion failed for PVC {pvc_obj.name}"
            capacity = pvc_obj.get().get("status").get("capacity").get(
                "storage")
            assert capacity == f"{pvc_size_expanded}Gi", (
                f"Capacity of PVC {pvc_obj.name} is not {pvc_size_expanded}Gi as "
                f"expected, but {capacity}.")
        log.info("PVC expansion was successful on all PVCs")

        # Run IO
        log.info("Run IO after PVC expansion.")
        for pod_obj in new_pods_list:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
            storage_type = "block" if pod_obj.pvc.volume_mode == "Block" else "fs"
            pod_obj.io_proc = executor.submit(
                pod_obj.run_io,
                storage_type=storage_type,
                size="6G",
                runtime=30,
                fio_filename=f"{pod_obj.name}_file",
                end_fsync=1,
            )

        log.info("Wait for IO to complete on all pods")
        for pod_obj in new_pods_list:
            pod_obj.io_proc.result()
            fio_result = pod_obj.get_fio_results()
            err_count = fio_result.get("jobs")[0].get("error")
            assert err_count == 0, (f"IO error on pod {pod_obj.name}. "
                                    f"FIO result: {fio_result}")
            log.info(f"Verified IO on pod {pod_obj.name}.")
        log.info("IO is successful on all pods after PVC expansion.")
Ejemplo n.º 4
0
    def test_worker_node_restart_during_pvc_expansion(self, nodes):
        """
        Verify PVC expansion will succeed if a worker node is restarted
        during expansion

        """
        pvc_size_expanded = 30
        executor = ThreadPoolExecutor(max_workers=len(self.pods))
        selected_node = node.get_nodes(node_type=constants.WORKER_MACHINE,
                                       num_of_nodes=1)

        # Restart node
        log.info(f"Restart node {selected_node[0].name}")
        restart_thread = executor.submit(nodes.restart_nodes,
                                         nodes=selected_node)

        log.info("Expanding all PVCs.")
        for pvc_obj in self.pvcs:
            log.info(
                f"Expanding size of PVC {pvc_obj.name} to {pvc_size_expanded}G"
            )
            pvc_obj.expand_proc = executor.submit(pvc_obj.resize_pvc,
                                                  pvc_size_expanded, False)

        # Check result of node 'restart_nodes'
        restart_thread.result()

        log.info("Verify status of node.")
        node.wait_for_nodes_status(
            node_names=[node.get_node_name(selected_node[0])],
            status=constants.NODE_READY,
            timeout=300,
        )

        # Find respun pods
        new_pods_list = []
        wait_to_stabilize = True
        for pod_obj in self.pods:
            new_pods = get_all_pods(
                namespace=pod_obj.namespace,
                selector=[pod_obj.labels.get("deploymentconfig")],
                selector_label="deploymentconfig",
                wait=wait_to_stabilize,
            )
            for pod_ob in new_pods:
                pod_ob.pvc = pod_obj.pvc
            new_pods_list.extend(new_pods)
            # Given enough time for pods to respin. So wait time
            # is not needed for further iterations
            wait_to_stabilize = False
        assert len(new_pods_list) == len(
            self.pods), "Couldn't find all pods after node reboot"

        # Verify PVC expansion status
        for pvc_obj in self.pvcs:
            assert pvc_obj.expand_proc.result(), (
                f"Expansion failed for PVC {pvc_obj.name}\nDescribe output "
                f"of PVC and PV:\n{pvc_obj.describe()}\n"
                f"{pvc_obj.backed_pv_obj.describe()}")
            capacity = pvc_obj.get().get("status").get("capacity").get(
                "storage")
            assert capacity == f"{pvc_size_expanded}Gi", (
                f"Capacity of PVC {pvc_obj.name} is not {pvc_size_expanded}Gi as "
                f"expected, but {capacity}.")
        log.info("PVC expansion was successful on all PVCs")

        log.info("Verifying new size on pods.")
        for pod_obj in new_pods_list:
            if pod_obj.pvc.volume_mode == "Block":
                log.info(
                    f"Skipping check on pod {pod_obj.name} as volume mode is Block."
                )
                continue

            # Wait for 240 seconds to reflect the change on pod
            log.info(f"Checking pod {pod_obj.name} to verify the change.")
            for df_out in TimeoutSampler(240,
                                         3,
                                         pod_obj.exec_cmd_on_pod,
                                         command="df -kh"):
                df_out = df_out.split()
                new_size_mount = df_out[
                    df_out.index(pod_obj.get_storage_path()) - 4]
                if new_size_mount in [
                        f"{pvc_size_expanded - 0.1}G",
                        f"{float(pvc_size_expanded)}G",
                        f"{pvc_size_expanded}G",
                ]:
                    log.info(
                        f"Verified: Expanded size of PVC {pod_obj.pvc.name} "
                        f"is reflected on pod {pod_obj.name}")
                    break
                log.info(
                    f"Expanded size of PVC {pod_obj.pvc.name} is not reflected"
                    f" on pod {pod_obj.name}. New size on mount is not "
                    f"{pvc_size_expanded}G as expected, but {new_size_mount}. "
                    f"Checking again.")
        log.info(f"Verified: Expanded size {pvc_size_expanded}G is reflected "
                 f"on all pods.")

        # Run IO
        log.info("Run IO after PVC expansion.")
        for pod_obj in new_pods_list:
            wait_for_resource_state(pod_obj, constants.STATUS_RUNNING)
            storage_type = "block" if pod_obj.pvc.volume_mode == "Block" else "fs"
            pod_obj.io_proc = executor.submit(
                pod_obj.run_io,
                storage_type=storage_type,
                size="6G",
                runtime=30,
                fio_filename=f"{pod_obj.name}_file",
                end_fsync=1,
            )

        log.info("Wait for IO to complete on all pods")
        for pod_obj in new_pods_list:
            pod_obj.io_proc.result()
            fio_result = pod_obj.get_fio_results()
            err_count = fio_result.get("jobs")[0].get("error")
            assert err_count == 0, (f"IO error on pod {pod_obj.name}. "
                                    f"FIO result: {fio_result}")
            log.info(f"Verified IO on pod {pod_obj.name}.")
        log.info("IO is successful on all pods after PVC expansion.")