Esempio n. 1
0
    def test_pvc_snapshot_and_clone(
        self, nodes, multiple_snapshot_and_clone_of_postgres_pvc_factory
    ):
        """
        1. Deploy PGSQL workload
        2. Take a snapshot of the pgsql PVC.
        3. Create a new PVC out of that snapshot or restore snapshot
        4. Create a clone of restored snapshot
        5. Attach a new pgsql pod to it.
         5. Resize cloned pvc
        7. Create snapshots of cloned pvc and restore those snapshots
        8. Attach a new pgsql pod to it and Resize the new restored pvc
        9. Repeat the above steps in bg when performing base operation:
            restart pods, worker node reboot, node drain, device replacement

        """

        log.info("Starting multiple creation & clone of postgres PVC in Background")
        bg_handler = flowtest.BackgroundOps()
        executor_run_bg_ops = ThreadPoolExecutor(max_workers=1)
        pgsql_snapshot_and_clone = executor_run_bg_ops.submit(
            bg_handler.handler,
            multiple_snapshot_and_clone_of_postgres_pvc_factory,
            pvc_size_new=25,
            pgsql=self.pgsql,
            iterations=1,
        )
        log.info("Started creation of snapshots & clones in background")

        flow_ops = flowtest.FlowOperations()
        log.info("Starting operation 1: Pod Restarts")
        disruption = Disruptions()
        pod_obj_list = [
            "osd",
            "mon",
            "mgr",
            "operator",
            "rbdplugin",
            "rbdplugin_provisioner",
        ]
        for pod in pod_obj_list:
            disruption.set_resource(resource=f"{pod}")
            disruption.delete_resource()
        log.info("Verifying exit criteria for operation 1: Pod Restarts")
        flow_ops.validate_cluster(
            node_status=True, pod_status=True, operation_name="Pod Restarts"
        )

        log.info("Starting operation 2: Node Reboot")
        node_names = flow_ops.node_operations_entry_criteria(
            node_type="worker", number_of_nodes=3, operation_name="Node Reboot"
        )
        # Reboot node
        nodes.restart_nodes(node_names)
        log.info("Verifying exit criteria for operation 2: Node Reboot")
        flow_ops.validate_cluster(
            node_status=True, pod_status=True, operation_name="Node Reboot"
        )

        log.info("Starting operation 3: Node Drain")
        node_name = flow_ops.node_operations_entry_criteria(
            node_type="worker", number_of_nodes=1, operation_name="Node Drain"
        )
        # Node maintenance - to gracefully terminate all pods on the node
        drain_nodes([node_name[0].name])
        # Make the node schedulable again
        schedule_nodes([node_name[0].name])
        log.info("Verifying exit criteria for operation 3: Node Drain")
        flow_ops.validate_cluster(
            node_status=True, pod_status=True, operation_name="Node Drain"
        )

        log.info("Waiting for background operations to be completed")
        bg_handler.wait_for_bg_operations([pgsql_snapshot_and_clone], timeout=600)
    def test_base_operation_node_drain(
        self,
        node_drain_teardown,
        node_restart_teardown,
        nodes,
        pgsql_factory_fixture,
        project_factory,
        multi_pvc_factory,
        mcg_obj,
        bucket_factory,
    ):
        """
        Test covers following flow operations while running workloads in the background:
        1. Node drain
        2. Add capacity
        3. Node reboot
        4. Node n/w failure

        """
        logger.info("Starting IO operations in Background")
        project = project_factory()
        bg_handler = flowtest.BackgroundOps()
        executor_run_bg_ios_ops = ThreadPoolExecutor(max_workers=3)

        pgsql_workload = executor_run_bg_ios_ops.submit(
            bg_handler.handler,
            pgsql_factory_fixture,
            replicas=1,
            clients=1,
            transactions=100,
            timeout=100,
            iterations=1,
        )
        logging.info("Started pgsql workload in background")

        flow_ops = flowtest.FlowOperations()

        obc_ios = executor_run_bg_ios_ops.submit(
            bg_handler.handler,
            flow_ops.sanity_helpers.obc_put_obj_create_delete,
            mcg_obj,
            bucket_factory,
            iterations=30,
        )
        logging.info("Started object IOs in background")

        pvc_create_delete = executor_run_bg_ios_ops.submit(
            bg_handler.handler,
            flow_ops.sanity_helpers.create_pvc_delete,
            multi_pvc_factory,
            project,
            iterations=70,
        )
        logging.info("Started pvc create and delete in background")

        logger.info("Starting operation 1: Node Drain")
        node_name = flow_ops.node_operations_entry_criteria(
            node_type="worker", number_of_nodes=1, operation_name="Node Drain")
        # Node maintenance - to gracefully terminate all pods on the node
        node.drain_nodes([node_name[0].name])
        # Make the node schedulable again
        node.schedule_nodes([node_name[0].name])
        logger.info("Verifying exit criteria for operation 1: Node Drain")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node Drain")

        logger.info("Starting operation 2: Add Capacity")
        osd_pods_before, restart_count_before = flow_ops.add_capacity_entry_criteria(
        )
        # Add capacity
        osd_size = storage_cluster.get_osd_size()
        result = storage_cluster.add_capacity(osd_size)
        pod = OCP(kind=constants.POD,
                  namespace=config.ENV_DATA["cluster_namespace"])
        if is_flexible_scaling_enabled:
            replica_count = 1
        else:
            replica_count = 3
        pod.wait_for_resource(
            timeout=300,
            condition=constants.STATUS_RUNNING,
            selector="app=rook-ceph-osd",
            resource_count=result * replica_count,
        )
        logger.info("Verifying exit criteria for operation 2: Add Capacity")
        flow_ops.add_capacity_exit_criteria(restart_count_before,
                                            osd_pods_before)

        logger.info("Starting operation 3: Node Restart")
        node_name = flow_ops.node_operations_entry_criteria(
            node_type="worker",
            number_of_nodes=1,
            operation_name="Node Restart")
        # Node failure (reboot)
        nodes.restart_nodes(nodes=node_name)
        logger.info("Verifying exit criteria for operation 3: Node Restart")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node Restart")

        logger.info("Starting operation 4: Node network fail")
        node_name, nw_fail_time = flow_ops.node_operations_entry_criteria(
            node_type="worker",
            number_of_nodes=1,
            network_fail_time=300,
            operation_name="Node N/W failure",
        )
        # Node n/w interface failure
        node.node_network_failure(node_name[0].name)
        logger.info(f"Waiting for {nw_fail_time} seconds")
        sleep(nw_fail_time)
        # Reboot the unresponsive node(s)
        logger.info(
            f"Stop and start the unresponsive node(s): {node_name[0].name}")
        nodes.restart_nodes_by_stop_and_start(nodes=node_name)
        logger.info(
            "Verifying exit criteria for operation 4: Node network fail")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node N/W failure")

        logger.info(
            "Waiting for final iteration of background operations to be completed"
        )
        bg_ops = [pvc_create_delete, obc_ios, pgsql_workload]
        bg_handler.wait_for_bg_operations(bg_ops, timeout=600)
    def run_in_bg(self,
                  nodes,
                  multiple_snapshot_and_clone_of_postgres_pvc_factory,
                  sc_name=None):
        log.info(
            "Starting multiple creation & clone of postgres PVC in Background")
        bg_handler = flowtest.BackgroundOps()
        executor_run_bg_ops = ThreadPoolExecutor(max_workers=1)
        pgsql_snapshot_and_clone = executor_run_bg_ops.submit(
            bg_handler.handler,
            multiple_snapshot_and_clone_of_postgres_pvc_factory,
            pvc_size_new=25,
            pgsql=self.pgsql,
            sc_name=sc_name,
            iterations=1,
        )
        log.info("Started creation of snapshots & clones in background")

        flow_ops = flowtest.FlowOperations()
        log.info("Starting operation 1: Pod Restarts")
        disruption = Disruptions()
        pod_obj_list = [
            "osd",
            "mon",
            "mgr",
            "operator",
            "rbdplugin",
            "rbdplugin_provisioner",
        ]
        for pod in pod_obj_list:
            disruption.set_resource(resource=f"{pod}")
            disruption.delete_resource()
        log.info("Verifying exit criteria for operation 1: Pod Restarts")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Pod Restarts")

        log.info("Starting operation 2: Node Reboot")
        node_names = flow_ops.node_operations_entry_criteria(
            node_type="worker",
            number_of_nodes=3,
            operation_name="Node Reboot")
        # Reboot node
        nodes.restart_nodes(node_names)
        log.info("Verifying exit criteria for operation 2: Node Reboot")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node Reboot")

        log.info("Starting operation 3: Node Drain")
        node_name = flow_ops.node_operations_entry_criteria(
            node_type="worker", number_of_nodes=1, operation_name="Node Drain")
        # Node maintenance - to gracefully terminate all pods on the node
        drain_nodes([node_name[0].name])
        # Make the node schedulable again
        schedule_nodes([node_name[0].name])
        log.info("Verifying exit criteria for operation 3: Node Drain")
        flow_ops.validate_cluster(node_status=True,
                                  pod_status=True,
                                  operation_name="Node Drain")

        log.info("Waiting for background operations to be completed")
        bg_handler.wait_for_bg_operations([pgsql_snapshot_and_clone],
                                          timeout=600)