def test_pvc_snapshot_and_clone( self, nodes, multiple_snapshot_and_clone_of_postgres_pvc_factory ): """ 1. Deploy PGSQL workload 2. Take a snapshot of the pgsql PVC. 3. Create a new PVC out of that snapshot or restore snapshot 4. Create a clone of restored snapshot 5. Attach a new pgsql pod to it. 5. Resize cloned pvc 7. Create snapshots of cloned pvc and restore those snapshots 8. Attach a new pgsql pod to it and Resize the new restored pvc 9. Repeat the above steps in bg when performing base operation: restart pods, worker node reboot, node drain, device replacement """ log.info("Starting multiple creation & clone of postgres PVC in Background") bg_handler = flowtest.BackgroundOps() executor_run_bg_ops = ThreadPoolExecutor(max_workers=1) pgsql_snapshot_and_clone = executor_run_bg_ops.submit( bg_handler.handler, multiple_snapshot_and_clone_of_postgres_pvc_factory, pvc_size_new=25, pgsql=self.pgsql, iterations=1, ) log.info("Started creation of snapshots & clones in background") flow_ops = flowtest.FlowOperations() log.info("Starting operation 1: Pod Restarts") disruption = Disruptions() pod_obj_list = [ "osd", "mon", "mgr", "operator", "rbdplugin", "rbdplugin_provisioner", ] for pod in pod_obj_list: disruption.set_resource(resource=f"{pod}") disruption.delete_resource() log.info("Verifying exit criteria for operation 1: Pod Restarts") flow_ops.validate_cluster( node_status=True, pod_status=True, operation_name="Pod Restarts" ) log.info("Starting operation 2: Node Reboot") node_names = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=3, operation_name="Node Reboot" ) # Reboot node nodes.restart_nodes(node_names) log.info("Verifying exit criteria for operation 2: Node Reboot") flow_ops.validate_cluster( node_status=True, pod_status=True, operation_name="Node Reboot" ) log.info("Starting operation 3: Node Drain") node_name = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=1, operation_name="Node Drain" ) # Node maintenance - to gracefully terminate all pods on the node drain_nodes([node_name[0].name]) # Make the node schedulable again schedule_nodes([node_name[0].name]) log.info("Verifying exit criteria for operation 3: Node Drain") flow_ops.validate_cluster( node_status=True, pod_status=True, operation_name="Node Drain" ) log.info("Waiting for background operations to be completed") bg_handler.wait_for_bg_operations([pgsql_snapshot_and_clone], timeout=600)
def test_base_operation_node_drain( self, node_drain_teardown, node_restart_teardown, nodes, pgsql_factory_fixture, project_factory, multi_pvc_factory, mcg_obj, bucket_factory, ): """ Test covers following flow operations while running workloads in the background: 1. Node drain 2. Add capacity 3. Node reboot 4. Node n/w failure """ logger.info("Starting IO operations in Background") project = project_factory() bg_handler = flowtest.BackgroundOps() executor_run_bg_ios_ops = ThreadPoolExecutor(max_workers=3) pgsql_workload = executor_run_bg_ios_ops.submit( bg_handler.handler, pgsql_factory_fixture, replicas=1, clients=1, transactions=100, timeout=100, iterations=1, ) logging.info("Started pgsql workload in background") flow_ops = flowtest.FlowOperations() obc_ios = executor_run_bg_ios_ops.submit( bg_handler.handler, flow_ops.sanity_helpers.obc_put_obj_create_delete, mcg_obj, bucket_factory, iterations=30, ) logging.info("Started object IOs in background") pvc_create_delete = executor_run_bg_ios_ops.submit( bg_handler.handler, flow_ops.sanity_helpers.create_pvc_delete, multi_pvc_factory, project, iterations=70, ) logging.info("Started pvc create and delete in background") logger.info("Starting operation 1: Node Drain") node_name = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=1, operation_name="Node Drain") # Node maintenance - to gracefully terminate all pods on the node node.drain_nodes([node_name[0].name]) # Make the node schedulable again node.schedule_nodes([node_name[0].name]) logger.info("Verifying exit criteria for operation 1: Node Drain") flow_ops.validate_cluster(node_status=True, pod_status=True, operation_name="Node Drain") logger.info("Starting operation 2: Add Capacity") osd_pods_before, restart_count_before = flow_ops.add_capacity_entry_criteria( ) # Add capacity osd_size = storage_cluster.get_osd_size() result = storage_cluster.add_capacity(osd_size) pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]) if is_flexible_scaling_enabled: replica_count = 1 else: replica_count = 3 pod.wait_for_resource( timeout=300, condition=constants.STATUS_RUNNING, selector="app=rook-ceph-osd", resource_count=result * replica_count, ) logger.info("Verifying exit criteria for operation 2: Add Capacity") flow_ops.add_capacity_exit_criteria(restart_count_before, osd_pods_before) logger.info("Starting operation 3: Node Restart") node_name = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=1, operation_name="Node Restart") # Node failure (reboot) nodes.restart_nodes(nodes=node_name) logger.info("Verifying exit criteria for operation 3: Node Restart") flow_ops.validate_cluster(node_status=True, pod_status=True, operation_name="Node Restart") logger.info("Starting operation 4: Node network fail") node_name, nw_fail_time = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=1, network_fail_time=300, operation_name="Node N/W failure", ) # Node n/w interface failure node.node_network_failure(node_name[0].name) logger.info(f"Waiting for {nw_fail_time} seconds") sleep(nw_fail_time) # Reboot the unresponsive node(s) logger.info( f"Stop and start the unresponsive node(s): {node_name[0].name}") nodes.restart_nodes_by_stop_and_start(nodes=node_name) logger.info( "Verifying exit criteria for operation 4: Node network fail") flow_ops.validate_cluster(node_status=True, pod_status=True, operation_name="Node N/W failure") logger.info( "Waiting for final iteration of background operations to be completed" ) bg_ops = [pvc_create_delete, obc_ios, pgsql_workload] bg_handler.wait_for_bg_operations(bg_ops, timeout=600)
def run_in_bg(self, nodes, multiple_snapshot_and_clone_of_postgres_pvc_factory, sc_name=None): log.info( "Starting multiple creation & clone of postgres PVC in Background") bg_handler = flowtest.BackgroundOps() executor_run_bg_ops = ThreadPoolExecutor(max_workers=1) pgsql_snapshot_and_clone = executor_run_bg_ops.submit( bg_handler.handler, multiple_snapshot_and_clone_of_postgres_pvc_factory, pvc_size_new=25, pgsql=self.pgsql, sc_name=sc_name, iterations=1, ) log.info("Started creation of snapshots & clones in background") flow_ops = flowtest.FlowOperations() log.info("Starting operation 1: Pod Restarts") disruption = Disruptions() pod_obj_list = [ "osd", "mon", "mgr", "operator", "rbdplugin", "rbdplugin_provisioner", ] for pod in pod_obj_list: disruption.set_resource(resource=f"{pod}") disruption.delete_resource() log.info("Verifying exit criteria for operation 1: Pod Restarts") flow_ops.validate_cluster(node_status=True, pod_status=True, operation_name="Pod Restarts") log.info("Starting operation 2: Node Reboot") node_names = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=3, operation_name="Node Reboot") # Reboot node nodes.restart_nodes(node_names) log.info("Verifying exit criteria for operation 2: Node Reboot") flow_ops.validate_cluster(node_status=True, pod_status=True, operation_name="Node Reboot") log.info("Starting operation 3: Node Drain") node_name = flow_ops.node_operations_entry_criteria( node_type="worker", number_of_nodes=1, operation_name="Node Drain") # Node maintenance - to gracefully terminate all pods on the node drain_nodes([node_name[0].name]) # Make the node schedulable again schedule_nodes([node_name[0].name]) log.info("Verifying exit criteria for operation 3: Node Drain") flow_ops.validate_cluster(node_status=True, pod_status=True, operation_name="Node Drain") log.info("Waiting for background operations to be completed") bg_handler.wait_for_bg_operations([pgsql_snapshot_and_clone], timeout=600)