def verify_pv_not_exists(pvc_obj, cbp_name, rbd_image_id): """ Ensure that pv does not exists """ # Validate on ceph side logger.info(f"Verifying PV {pvc_obj.backed_pv} exists on backend") status = helpers.verify_volume_deleted_in_backend( interface=constants.CEPHBLOCKPOOL, image_uuid=rbd_image_id, pool_name=cbp_name) if not status: raise UnexpectedBehaviour(f"PV {pvc_obj.backed_pv} exists on backend") logger.info(f"Expected: PV {pvc_obj.backed_pv} " f"doesn't exist on backend after deleting PVC") # Validate on oc side logger.info("Verifying whether PV is deleted") try: assert helpers.validate_pv_delete(pvc_obj.backed_pv) except AssertionError as ecf: assert "not found" in str(ecf), ( f"Unexpected: PV {pvc_obj.backed_pv} still exists") logger.info(f"Expected: PV should not be found " f"after deleting corresponding PVC")
def create_pvc_and_verify_pvc_exists(sc_name, cbp_name): """ Create pvc, verify pvc is bound in state and pvc exists on ceph side """ pvc_obj = helpers.create_pvc(sc_name=sc_name, size='10Gi') helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() # Validate pv is created on ceph logger.info(f"Verifying PV exists on backend") assert not helpers.verify_volume_deleted_in_backend( interface=constants.CEPHBLOCKPOOL, image_uuid=pvc_obj.image_uuid, pool_name=cbp_name) return pvc_obj
def test_ceph_daemon_kill_during_pod_pvc_deletion(self, interface, operation_to_disrupt, resource_name, setup_base): """ Kill 'resource_name' daemon while deletion of PVCs/pods is progressing """ pvc_objs, self.pod_objs = setup_base sc_obj = pvc_objs[0].storageclass self.namespace = pvc_objs[0].project.namespace pod_functions = { 'mds': partial(get_mds_pods), 'mon': partial(get_mon_pods), 'mgr': partial(get_mgr_pods), 'osd': partial(get_osd_pods), 'rbdplugin': partial(get_plugin_pods, interface=interface), 'cephfsplugin': partial(get_plugin_pods, interface=interface), 'cephfsplugin_provisioner': partial(get_cephfsplugin_provisioner_pods), 'rbdplugin_provisioner': partial(get_rbdfsplugin_provisioner_pods), 'operator': partial(get_operator_pods) } disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_name) executor = ThreadPoolExecutor(max_workers=1) # Get number of pods of type 'resource_name' num_of_resource_pods = len(pod_functions[resource_name]()) # Fetch the number of Pods and PVCs initial_num_of_pods = len(get_all_pods(namespace=self.namespace)) initial_num_of_pvc = len( get_all_pvcs(namespace=self.namespace)['items']) # Fetch PV names pv_objs = [] for pvc_obj in pvc_objs: pvc_obj.reload() pv_objs.append(pvc_obj.backed_pv_obj) # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod_obj in self.pod_objs: pod_info = pod_obj.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim'][ 'claimName'] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Do setup for running IO on pods log.info("Setting up pods for running IO") for pod_obj in self.pod_objs: pod_obj.workload_setup(storage_type='fs') log.info("Setup for running IO is completed on pods") # Start IO on each pod. RWX PVC will be used on two pods. So split the # size accordingly log.info("Starting IO on pods") for pod_obj in self.pod_objs: if pod_obj.pvc.access_mode == constants.ACCESS_MODE_RWX: io_size = int((self.pvc_size - 1) / 2) else: io_size = self.pvc_size - 1 pod_obj.run_io(storage_type='fs', size=f'{io_size}G', fio_filename=f'{pod_obj.name}_io') log.info("IO started on all pods.") # Set the daemon to be killed disruption.select_daemon() # Start deleting pods pod_bulk_delete = executor.submit(self.delete_pods) if operation_to_disrupt == 'delete_pods': ret = self.verify_resource_deletion(get_all_pods, initial_num_of_pods) assert ret, "Wait timeout: Pods are not being deleted." log.info("Pods deletion has started.") disruption.kill_daemon() pods_deleted = pod_bulk_delete.result() assert pods_deleted, "Deletion of pods failed." # Verify pods are deleted for pod_obj in self.pod_objs: assert pod_obj.ocp.wait_for_delete( pod_obj.name, 180), (f"Pod {pod_obj.name} is not deleted") log.info("Verified: Pods are deleted.") # Verify that the mount point is removed from nodes after deleting pod for node, pvs in node_pv_dict.items(): cmd = f'oc debug nodes/{node} -- df' df_on_node = run_cmd(cmd) for pv in pvs: assert pv not in df_on_node, ( f"{pv} is still present on node {node} after " f"deleting the pods.") log.info( "Verified: mount points are removed from nodes after deleting " "the pods.") # Fetch image uuid associated with PVCs pvc_uuid_map = {} for pvc_obj in pvc_objs: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Start deleting PVCs pvc_bulk_delete = executor.submit(delete_pvcs, pvc_objs) if operation_to_disrupt == 'delete_pvcs': ret = self.verify_resource_deletion(get_all_pvcs, initial_num_of_pvc) assert ret, "Wait timeout: PVCs are not being deleted." log.info("PVCs deletion has started.") disruption.kill_daemon() pvcs_deleted = pvc_bulk_delete.result() assert pvcs_deleted, "Deletion of PVCs failed." # Verify PVCs are deleted for pvc_obj in pvc_objs: assert pvc_obj.ocp.wait_for_delete( pvc_obj.name), (f"PVC {pvc_obj.name} is not deleted") log.info("Verified: PVCs are deleted.") # Verify PVs are deleted for pv_obj in pv_objs: assert pv_obj.ocp.wait_for_delete( pv_obj.name, 120), (f"PV {pv_obj.name} is not deleted") log.info("Verified: PVs are deleted.") # Verify PV using ceph toolbox. Image/Subvolume should be deleted. for pvc_name, uuid in pvc_uuid_map.items(): if interface == constants.CEPHBLOCKPOOL: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid, pool_name=sc_obj.ceph_pool.name) if interface == constants.CEPHFILESYSTEM: ret = verify_volume_deleted_in_backend(interface=interface, image_uuid=uuid) assert ret, (f"Volume associated with PVC {pvc_name} still exists " f"in backend") # Verify number of pods of type 'resource_name' final_num_of_resource_pods = len(pod_functions[resource_name]()) assert final_num_of_resource_pods == num_of_resource_pods, ( f"Total number of {resource_name} pods is not matching with " f"initial value. Total number of pods before daemon kill: " f"{num_of_resource_pods}. Total number of pods present now: " f"{final_num_of_resource_pods}") # Check ceph status ceph_health_check(namespace=config.ENV_DATA['cluster_namespace']) log.info("Ceph cluster health is OK")
def test_disruptive_during_pod_pvc_deletion_and_io( self, interface, resource_to_delete, setup_base ): """ Delete ceph/rook pod while PVCs deletion, pods deletion and IO are progressing """ pvc_objs, pod_objs, rwx_pod_objs = setup_base sc_obj = pvc_objs[0].storageclass namespace = pvc_objs[0].project.namespace num_of_pods_to_delete = 10 num_of_io_pods = 5 # Select pods to be deleted pods_to_delete = pod_objs[:num_of_pods_to_delete] pods_to_delete.extend( [pod for pod in rwx_pod_objs for pod_obj in pods_to_delete if ( pod_obj.pvc == pod.pvc )] ) # Select pods to run IO io_pods = pod_objs[num_of_pods_to_delete:num_of_pods_to_delete + num_of_io_pods] io_pods.extend( [pod for pod in rwx_pod_objs for pod_obj in io_pods if ( pod_obj.pvc == pod.pvc )] ) # Select pods which are having PVCs to delete pods_for_pvc = pod_objs[num_of_pods_to_delete + num_of_io_pods:] pvcs_to_delete = [pod_obj.pvc for pod_obj in pods_for_pvc] pods_for_pvc.extend( [pod for pod in rwx_pod_objs for pod_obj in pods_for_pvc if ( pod_obj.pvc == pod.pvc )] ) log.info( f"{len(pods_to_delete)} pods selected for deletion in which " f"{len(pods_to_delete) - num_of_pods_to_delete} pairs of pod " f"share same RWX PVC" ) log.info( f"{len(io_pods)} pods selected for running IO in which " f"{len(io_pods) - num_of_io_pods} pairs of pod share same " f"RWX PVC" ) no_of_rwx_pvcs_delete = len(pods_for_pvc) - len(pvcs_to_delete) log.info( f"{len(pvcs_to_delete)} PVCs selected for deletion. " f"RWO PVCs: {len(pvcs_to_delete) - no_of_rwx_pvcs_delete}, " f"RWX PVCs: {no_of_rwx_pvcs_delete}" ) pod_functions = { 'mds': get_mds_pods, 'mon': get_mon_pods, 'mgr': get_mgr_pods, 'osd': get_osd_pods } disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) executor = ThreadPoolExecutor(max_workers=len(pod_objs)) # Get number of pods of type 'resource_to_delete' num_of_resource_to_delete = len(pod_functions[resource_to_delete]()) # Fetch the number of Pods and PVCs initial_num_of_pods = len(get_all_pods(namespace=namespace)) initial_num_of_pvc = len( get_all_pvcs(namespace=namespace)['items'] ) # Fetch PV names to verify after deletion pv_objs = [] for pvc_obj in pvcs_to_delete: pvc_obj.reload() pv_objs.append(pvc_obj.backed_pv_obj) # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod_obj in pods_to_delete: pod_info = pod_obj.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Fetch image uuid associated with PVCs to be deleted pvc_uuid_map = {} for pvc_obj in pvcs_to_delete: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Do setup on pods for running IO log.info("Setting up pods for running IO.") for pod_obj in pod_objs + rwx_pod_objs: executor.submit(pod_obj.workload_setup, storage_type='fs') # Wait for setup on pods to complete for pod_obj in pod_objs + rwx_pod_objs: for sample in TimeoutSampler( 180, 2, getattr, pod_obj, 'wl_setup_done' ): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on all pods.") # Start IO on pods having PVCs to delete to load data log.info("Starting IO on pods having PVCs to delete.") self.run_io_on_pods(pods_for_pvc) log.info("IO started on pods having PVCs to delete.") log.info("Fetching IO results from the pods having PVCs to delete.") for pod_obj in pods_for_pvc: get_fio_rw_iops(pod_obj) log.info("Verified IO result on pods having PVCs to delete.") # Delete pods having PVCs to delete. assert self.delete_pods(pods_for_pvc), ( "Couldn't delete pods which are having PVCs to delete." ) for pod_obj in pods_for_pvc: pod_obj.ocp.wait_for_delete(pod_obj.name) log.info("Verified: Deleted pods which are having PVCs to delete.") # Start IO on pods to be deleted log.info("Starting IO on pods to be deleted.") self.run_io_on_pods(pods_to_delete) log.info("IO started on pods to be deleted.") # Start deleting PVCs pvc_bulk_delete = executor.submit(delete_pvcs, pvcs_to_delete) log.info("Started deleting PVCs") # Start deleting pods pod_bulk_delete = executor.submit(self.delete_pods, pods_to_delete) log.info("Started deleting pods") # Start IO on IO pods self.run_io_on_pods(io_pods) log.info("Started IO on IO pods") # Verify pvc deletion has started pvc_deleting = executor.submit( wait_for_resource_count_change, func_to_use=get_all_pvcs, previous_num=initial_num_of_pvc, namespace=namespace, change_type='decrease', min_difference=1, timeout=30, interval=0.01 ) # Verify pod deletion has started pod_deleting = executor.submit( wait_for_resource_count_change, func_to_use=get_all_pods, previous_num=initial_num_of_pods, namespace=namespace, change_type='decrease', min_difference=1, timeout=30, interval=0.01 ) assert pvc_deleting.result(), ( "Wait timeout: PVCs are not being deleted." ) log.info("PVCs deletion has started.") assert pod_deleting.result(), ( "Wait timeout: Pods are not being deleted." ) log.info("Pods deletion has started.") # Delete pod of type 'resource_to_delete' disruption.delete_resource() pods_deleted = pod_bulk_delete.result() assert pods_deleted, "Deletion of pods failed." # Verify pods are deleted for pod_obj in pods_to_delete: pod_obj.ocp.wait_for_delete(pod_obj.name) log.info("Verified: Pods are deleted.") # Verify that the mount point is removed from nodes after deleting pod node_pv_mounted = verify_pv_mounted_on_node(node_pv_dict) for node, pvs in node_pv_mounted.items(): assert not pvs, ( f"PVs {pvs} is still present on node {node} after " f"deleting the pods." ) log.info( "Verified: mount points are removed from nodes after deleting " "the pods" ) pvcs_deleted = pvc_bulk_delete.result() assert pvcs_deleted, "Deletion of PVCs failed." # Verify PVCs are deleted for pvc_obj in pvcs_to_delete: pvc_obj.ocp.wait_for_delete(pvc_obj.name) logging.info("Verified: PVCs are deleted.") # Verify PVs are deleted for pv_obj in pv_objs: pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name, timeout=300) logging.info("Verified: PVs are deleted.") # Verify PV using ceph toolbox. Image/Subvolume should be deleted. for pvc_name, uuid in pvc_uuid_map.items(): if interface == constants.CEPHBLOCKPOOL: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid, pool_name=sc_obj.ceph_pool.name ) if interface == constants.CEPHFILESYSTEM: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid ) assert ret, ( f"Volume associated with PVC {pvc_name} still exists " f"in backend" ) log.info("Fetching IO results from the pods.") for pod_obj in io_pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get('jobs')[0].get('error') assert err_count == 0, ( f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" ) log.info("Verified IO result on pods.") # Verify number of pods of type 'resource_to_delete' final_num_resource_to_delete = len(pod_functions[resource_to_delete]()) assert final_num_resource_to_delete == num_of_resource_to_delete, ( f"Total number of {resource_to_delete} pods is not matching with " f"initial value. Total number of pods before deleting a pod: " f"{num_of_resource_to_delete}. Total number of pods present now: " f"{final_num_resource_to_delete}" ) # Check ceph status ceph_health_check(namespace=config.ENV_DATA['cluster_namespace']) log.info("Ceph cluster health is OK")
def test_multiple_pvc_concurrent_creation_deletion(self, interface, multi_pvc_factory): """ To exercise resource creation and deletion """ proj_obj = self.pvc_objs[0].project executor = ThreadPoolExecutor(max_workers=1) # Get PVs pv_objs = [] for pvc in self.pvc_objs: pv_objs.append(pvc.backed_pv_obj) # Fetch image uuid associated with PVCs pvc_uuid_map = {} for pvc_obj in self.pvc_objs: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Start deleting 100 PVCs log.info('Start deleting PVCs.') pvc_delete = executor.submit(delete_pvcs, self.pvc_objs) # Create 100 PVCs log.info('Start creating new PVCs') self.new_pvc_objs = multi_pvc_factory(interface=interface, project=proj_obj, size=self.pvc_size, access_modes=self.access_modes, status='', num_of_pvc=self.num_of_pvcs, wait_each=False) for pvc_obj in self.new_pvc_objs: wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() log.info(f'Newly created {self.num_of_pvcs} PVCs are in Bound state.') # Verify PVCs are deleted res = pvc_delete.result() assert res, 'Deletion of PVCs failed' log.info('PVC deletion was successful.') for pvc in self.pvc_objs: pvc.ocp.wait_for_delete(resource_name=pvc.name) log.info(f'Successfully deleted initial {self.num_of_pvcs} PVCs') # Verify PVs are deleted for pv_obj in pv_objs: pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name, timeout=180) log.info(f'Successfully deleted initial {self.num_of_pvcs} PVs') # Verify PV using ceph toolbox. Image/Subvolume should be deleted. for pvc_name, uuid in pvc_uuid_map.items(): pool_name = None if interface == constants.CEPHBLOCKPOOL: pool_name = default_ceph_block_pool() ret = verify_volume_deleted_in_backend(interface=interface, image_uuid=uuid, pool_name=pool_name) assert ret, (f"Volume associated with PVC {pvc_name} still exists " f"in backend") # Verify status of nodes for node in get_node_objs(): node_status = node.ocp.get_resource_status(node.name) assert (node_status == constants.NODE_READY), ( f"Node {node.name} is in {node_status} state.")
def test_change_reclaim_policy_of_pv(self, interface, reclaim_policy, pod_factory): """ This test case tests update of reclaim policy of PV """ reclaim_policy_to = 'Delete' if reclaim_policy == 'Retain' else ( 'Retain') # Fetch name of PVs pvs = [pvc_obj.backed_pv_obj for pvc_obj in self.pvc_objs] # Fetch image uuid associated with PVCs pvc_uuid_map = {} for pvc_obj in self.pvc_objs: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Select PVs to change reclaim policy changed_pvs = pvs[:5] # Run IO on pods self.run_and_verify_io(self.pod_objs) log.info("Verified IO result on pods.") # Change relaimPolicy to 'reclaim_policy_to' for pv_obj in changed_pvs: pv_name = pv_obj.name patch_param = (f'{{"spec":{{"persistentVolumeReclaimPolicy":' f'"{reclaim_policy_to}"}}}}') assert pv_obj.ocp.patch( resource_name=pv_name, params=patch_param, format_type='strategic'), ( f"Failed to change persistentVolumeReclaimPolicy of pv " f"{pv_name} to {reclaim_policy_to}") log.info(f"Changed persistentVolumeReclaimPolicy of pv {pv_name} " f"to {reclaim_policy_to}") retain_pvs = [] delete_pvs = [] # Verify reclaim policy of all PVs for pv_obj in pvs: policy = pv_obj.get().get('spec').get( 'persistentVolumeReclaimPolicy') retain_pvs.append(pv_obj) if policy == 'Retain' else ( delete_pvs.append(pv_obj)) if pv_obj in changed_pvs: assert policy == reclaim_policy_to, ( f"Reclaim policy of {pv_obj.name} is {policy}. " f"It has not changed to {reclaim_policy_to}") else: assert policy == reclaim_policy, ( f"Reclaim policy of {pv_obj.name} is {policy} instead " f"of {reclaim_policy}.") log.info("Verified reclaim policy of all PVs") # Run IO on pods self.run_and_verify_io(self.pod_objs, do_setup=False) log.info("Ran and verified IO on pods after changing reclaim policy.") # Delete all pods log.info("Deleting all pods") for pod_obj in self.pod_objs: pod_obj.delete() # Verify pods are deleted for pod_obj in self.pod_objs: pod_obj.ocp.wait_for_delete(pod_obj.name, 300) log.info("Verified: Pods are deleted.") # Create new pods mounting one volume on each pod log.info("Creating new pods.") new_pod_objs = [] for pvc_obj in self.pvc_objs: new_pod_objs.append( pod_factory(interface=interface, pvc=pvc_obj, status=None)) for pod in new_pod_objs: wait_for_resource_state(pod, constants.STATUS_RUNNING) pod.reload() # Run IO on new pods self.run_and_verify_io(new_pod_objs) log.info("Ran and verified IO on new pods.") # Delete all pods log.info("Deleting all new pods.") for pod_obj in new_pod_objs: pod_obj.delete() # Verify pods are deleted for pod_obj in new_pod_objs: pod_obj.ocp.wait_for_delete(pod_obj.name, 300) log.info("Verified: All new pods are deleted.") # Delete PVCs log.info("Deleting all PVCs.") for pvc_obj in self.pvc_objs: pvc_obj.delete() # Verify PVCs are deleted for pvc_obj in self.pvc_objs: pvc_obj.ocp.wait_for_delete(pvc_obj.name, 300) log.info("Verified: All PVCs are deleted") # PVs having reclaim policy 'Delete' will be deleted for pv_obj in delete_pvs: pv_obj.ocp.wait_for_delete(pv_obj.name, 300) log.info( "Verified: All PVs having reclaim policy 'Delete' are deleted.") # PVs having reclaim policy 'Retain' will be in Released state for pv_obj in retain_pvs: wait_for_resource_state(resource=pv_obj, state=constants.STATUS_RELEASED) log.info("Verified: All PVs having reclaim policy 'Retain' are " "in 'Released' state.") # Change relaimPolicy to Delete for pv_obj in retain_pvs: pv_name = pv_obj.name patch_param = '{"spec":{"persistentVolumeReclaimPolicy":"Delete"}}' assert pv_obj.ocp.patch( resource_name=pv_name, params=patch_param, format_type='strategic'), ( f"Failed to change persistentVolumeReclaimPolicy " f"for pv {pv_name} to Delete") log.info("Changed reclaim policy of all remaining PVs to Delete") # Verify PVs deleted. PVs will be deleted immediately after setting # reclaim policy to Delete for pv_obj in retain_pvs: pv_obj.ocp.wait_for_delete(pv_obj.name, 300) log.info( "Verified: All remaining PVs are deleted after changing reclaim " "policy to Delete.") # Verify PV using ceph toolbox. Wait for Image/Subvolume to be deleted. pool_name = default_ceph_block_pool( ) if interface == constants.CEPHBLOCKPOOL else None for pvc_name, uuid in pvc_uuid_map.items(): assert verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid, pool_name=pool_name ), f"Volume associated with PVC {pvc_name} still exists in backend" log.info("Verified: Image/Subvolume removed from backend.")