def create_resources(self, pvc_factory, pod_factory, run_io=True): """ Sanity validation - Create resources (FS and RBD) and run IO Args: pvc_factory (function): A call to pvc_factory function pod_factory (function): A call to pod_factory function run_io (bool): True for run IO, False otherwise """ logger.info( "Creating resources and running IO as a sanity functional validation" ) for interface in [constants.CEPHBLOCKPOOL, constants.CEPHFILESYSTEM]: pvc_obj = pvc_factory(interface) self.pvc_objs.append(pvc_obj) self.pod_objs.append(pod_factory(pvc=pvc_obj, interface=interface)) if run_io: for pod in self.pod_objs: pod.run_io("fs", "1G", runtime=30) for pod in self.pod_objs: get_fio_rw_iops(pod) self.create_obc() self.verify_obc()
def run_and_verify_io( self, pod_list, fio_filename='io_file', return_md5sum=True, run_io_in_bg=False ): """ Start IO on the pods and verify IO results Calculates md5sum of the io files which can be used to verify data integrity later Args: pod_list (list): list of pod objects to run ios fio_filename (str): name of the file for fio return_md5sum (bool): True if md5sum of fio file to be calculated, else False run_io_in_bg (bool): True if more background ios to be run, else False Returns: list: list of md5sum values for the fio file if return_md5sum is True """ # Start IO on the pods logger.info(f"Starting IO on {len(pod_list)} app pods") with ThreadPoolExecutor(max_workers=4) as executor: for pod_obj in pod_list: logger.info(f"Starting IO on pod {pod_obj.name}") executor.submit( pod_obj.run_io, storage_type='fs', size='1G', runtime=30, fio_filename=fio_filename ) logger.info(f"IO started on all {len(pod_list)} app pods") # Verify IO results for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj) if run_io_in_bg: logger.info( f"Starting IO in background on {len(pod_list)} app pods" ) for pod_obj in pod_list: logger.info(f"Starting IO on pod {pod_obj.name}") pod_obj.run_io( storage_type='fs', size='100M', runtime=600, fio_filename='bg_io_file' ) logger.info( f"IO started in background on all {len(pod_list)} app pods" ) # Calculate md5sum of io files md5sum_data = [] if return_md5sum: with ThreadPoolExecutor() as executor: for pod_obj in pod_list: md5sum_data.append( executor.submit(pod.cal_md5sum, pod_obj, fio_filename) ) md5sum_data = [future_obj.result() for future_obj in md5sum_data] return md5sum_data
def create_resources( self, pvc_factory, pod_factory, bucket_factory, rgw_bucket_factory, run_io=True ): """ Sanity validation: Create resources - pods, OBCs (RGW and MCG), PVCs (FS and RBD) and run IO Args: pvc_factory (function): A call to pvc_factory function pod_factory (function): A call to pod_factory function bucket_factory (function): A call to bucket_factory function rgw_bucket_factory (function): A call to rgw_bucket_factory function run_io (bool): True for run IO, False otherwise """ logger.info( "Creating resources and running IO as a sanity functional validation" ) for interface in [constants.CEPHBLOCKPOOL, constants.CEPHFILESYSTEM]: pvc_obj = pvc_factory(interface) self.pvc_objs.append(pvc_obj) self.pod_objs.append(pod_factory(pvc=pvc_obj, interface=interface)) if run_io: for pod in self.pod_objs: pod.run_io("fs", "1G", runtime=30) for pod in self.pod_objs: get_fio_rw_iops(pod) if rgw_bucket_factory: self.obc_objs.extend(rgw_bucket_factory(1, "rgw-oc")) self.obc_objs.extend(bucket_factory(amount=1, interface="OC")) self.ceph_cluster.wait_for_noobaa_health_ok()
def test_rwo_dynamic_pvc(self, setup_base): """ RWO Dynamic PVC creation tests with Reclaim policy set to Delete/Retain """ logger.info( f"Creating second pod on node: {self.worker_nodes_list[1]}") pod_obj2 = helpers.create_pod(interface_type=self.interface_type, pvc_name=self.pvc_obj.name, do_reload=False, namespace=self.namespace, node_name=self.worker_nodes_list[1], pod_dict_path=constants.NGINX_POD_YAML) node_pod1 = self.pod_obj1.get().get('spec').get('nodeName') node_pod2 = pod_obj2.get().get('spec').get('nodeName') assert node_pod1 != node_pod2, 'Both pods are on the same node' logger.info(f"Running IO on pod {self.pod_obj1.name}") file_name = self.pod_obj1.name self.pod_obj1.run_io(storage_type=self.storage_type, size=self.io_size, runtime=30, fio_filename=file_name) pod.get_fio_rw_iops(self.pod_obj1) md5sum_pod1_data = pod.cal_md5sum(pod_obj=self.pod_obj1, file_name=file_name) # Verify that second pod is still in ContainerCreating state and not able to # attain Running state due to expected failure helpers.wait_for_resource_state( resource=pod_obj2, state=constants.STATUS_CONTAINER_CREATING) self.verify_expected_failure_event( ocs_obj=pod_obj2, failure_str=self.expected_pod_failure) logger.info(f"Deleting first pod so that second pod can attach" f" {self.pvc_obj.name}") self.pod_obj1.delete() self.pod_obj1.ocp.wait_for_delete(resource_name=self.pod_obj1.name) # Wait for second pod to be in Running state helpers.wait_for_resource_state(resource=pod_obj2, state=constants.STATUS_RUNNING, timeout=240) assert pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name, original_md5sum=md5sum_pod1_data) pod_obj2.run_io(storage_type=self.storage_type, size=self.io_size, runtime=30, fio_filename=pod_obj2.name) pod.get_fio_rw_iops(pod_obj2) # Again verify data integrity assert pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name, original_md5sum=md5sum_pod1_data)
def test_run_io_multiple_dc_pods(self, dc_pods): """ Run IO on multiple dc pods in parallel """ for dc_pod in dc_pods: dc_pod.run_io("fs", f"{self.pvc_size - 1}G") for dc_pod in dc_pods: get_fio_rw_iops(dc_pod)
def test_create_multiple_sc_with_same_pool_name(self, interface_type, resources): """ This test function does below, *. Creates multiple Storage Classes with same pool name *. Creates PVCs using each Storage Class *. Mount each PVC to an app pod *. Run IO on each app pod """ # Unpack resources pods, pvcs, storageclasses = resources # Create 3 Storage Classes with same pool name if interface_type == constants.CEPHBLOCKPOOL: secret = self.rbd_secret_obj.name interface_name = self.cbp_obj.name else: interface_type = constants.CEPHFILESYSTEM secret = self.cephfs_secret_obj.name interface_name = helpers.get_cephfs_data_pool_name() for i in range(3): log.info(f"Creating a {interface_type} storage class") storageclasses.append( helpers.create_storage_class(interface_type=interface_type, interface_name=interface_name, secret_name=secret)) log.info(f"{interface_type}StorageClass: {storageclasses[i].name} " f"created successfully") # Create PVCs using each SC for i in range(3): log.info(f"Creating a PVC using {storageclasses[i].name}") pvcs.append(helpers.create_pvc(storageclasses[i].name)) for pvc in pvcs: helpers.wait_for_resource_state(pvc, constants.STATUS_BOUND) pvc.reload() # Create app pod and mount each PVC for i in range(3): log.info(f"Creating an app pod and mount {pvcs[i].name}") pods.append( helpers.create_pod(interface_type=interface_type, pvc_name=pvcs[i].name, namespace=defaults.ROOK_CLUSTER_NAMESPACE)) for pod in pods: helpers.wait_for_resource_state(pod, constants.STATUS_RUNNING) pod.reload() log.info(f"{pods[i].name} created successfully and " f"mounted {pvcs[i].name}") # Run IO on each app pod for sometime for pod in pods: log.info(f"Running FIO on {pod.name}") pod.run_io('fs', size='2G') for pod in pods: get_fio_rw_iops(pod)
def test_run_io_multiple_pods(self, pods): """ Run IO on multiple pods in parallel """ for pod in pods: pod.run_io("fs", f"{self.pvc_size - 1}G") for pod in pods: get_fio_rw_iops(pod)
def test_rwx_pvc_assign_pod_node(self, pvc_factory, teardown_factory): """ Test assign nodeName to a pod using RWX pvc """ interface = constants.CEPHFILESYSTEM worker_nodes_list = helpers.get_worker_nodes() # Create a RWX PVC pvc_obj = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWX, status=constants.STATUS_BOUND ) # Create two pods on selected nodes pod_list = [] selected_nodes = random.sample(worker_nodes_list, k=2) logger.info( f"Creating {len(selected_nodes)} pods with pvc {pvc_obj.name}" ) for node in selected_nodes: logger.info(f"Creating pod on node: {node}") pod_obj = helpers.create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=node, pod_dict_path=constants.NGINX_POD_YAML ) pod_list.append(pod_obj) teardown_factory(pod_obj) # Confirm that both pods are running on the selected_nodes logger.info('Checking whether pods are running on the selected nodes') for index in range(0, len(selected_nodes)): pod_obj = pod_list[index] selected_node = selected_nodes[index] helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120 ) pod_obj.reload() assert pod.verify_node_name(pod_obj, selected_node), ( f"Pod {pod_obj.name} is running on a different node " f"than the selected node" ) # Run IOs on all pods. FIO Filename is kept same as pod name with ThreadPoolExecutor() as p: for pod_obj in pod_list: logger.info(f"Running IO on pod {pod_obj.name}") p.submit( pod_obj.run_io, storage_type='fs', size='512M', runtime=30, fio_filename=pod_obj.name ) # Check IO from all pods for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj)
def run_io_multiple_dc_pods(self): """ Run IO on multiple dc pods in parallel """ for dc_pod_obj in self.dc_pod_objs: dc_pod_obj.run_io('fs', f'{self.pvc_size_int - 1}G') for dc_pod_obj in self.dc_pod_objs: pod.get_fio_rw_iops(dc_pod_obj)
def test_create_delete_pool( self, replica, compression, namespace, storage, pvc, pod, ): """ test create delete pool have the following workflow .* Create new RBD pool .* Associate the pool with storageclass .* Create PVC based on the storageclass .* Create POD based on the PVC .* Run IO on the POD .* Check replication and compression """ if not check_pool_compression_replica_ceph_level( self.pool_name, compression, replica): raise PoolCephValueNotMatch( f"Pool {self.pool_name} values do not match configuration") # Running IO on POD self.pod_obj.run_io( "fs", size="100m", rate="1500m", runtime=0, buffer_compress_percentage=60, buffer_pattern="0xdeadface", bs="8K", jobs=5, readwrite="readwrite", ) # Getting IO results get_fio_rw_iops(self.pod_obj) # Checking Results for compression and replication if compression: compression_result = validate_compression(self.pool_name) if compression_result is False: raise PoolNotCompressedAsExpected( f"Pool {self.pool_name} compression did not reach expected value" ) replica_result = validate_replica_data(self.pool_name, replica) if replica_result is False: raise PoolNotReplicatedAsNeeded( f"Pool {self.pool_name} not replicated to size {replica}")
def test_rwo_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory): """ Test assign nodeName to a pod using RWO pvc """ worker_nodes_list = get_worker_nodes() # Create a RWO PVC pvc_obj = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWO, status=constants.STATUS_BOUND, ) # Create a pod on a particular node selected_node = random.choice(worker_nodes_list) logger.info( f"Creating a pod on node: {selected_node} with pvc {pvc_obj.name}") pod_obj = helpers.create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=selected_node, pod_dict_path=constants.NGINX_POD_YAML, ) teardown_factory(pod_obj) # Confirm that the pod is running on the selected_node helpers.wait_for_resource_state(resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120) pod_obj.reload() assert pod.verify_node_name( pod_obj, selected_node ), "Pod is running on a different node than the selected node" # Run IO logger.info(f"Running IO on pod {pod_obj.name}") pod_obj.run_io(storage_type="fs", size="512M", runtime=30, invalidate=0) pod.get_fio_rw_iops(pod_obj)
def test_new_sc_new_rbd_pool( self, replica, compression, storageclass_factory, pvc_factory, pod_factory ): """ This test function does below, *. Creates Storage Class with creating new rbd pool *. Creates PVCs using new Storage Class *. Mount PVC to an app pod *. Run IO on an app pod """ interface_type = constants.CEPHBLOCKPOOL sc_obj = storageclass_factory( interface=interface_type, new_rbd_pool=True, replica=replica, compression=compression, ) log.info(f"Creating a PVC using {sc_obj.name}") pvc_obj = pvc_factory(interface=interface_type, storageclass=sc_obj) log.info(f"PVC: {pvc_obj.name} created successfully using " f"{sc_obj.name}") # Create app pod and mount each PVC log.info(f"Creating an app pod and mount {pvc_obj.name}") pod_obj = pod_factory(interface=interface_type, pvc=pvc_obj) log.info(f"{pod_obj.name} created successfully and mounted {pvc_obj.name}") # Run IO on each app pod for sometime log.info(f"Running FIO on {pod_obj.name}") pod_obj.run_io("fs", size="1G") get_fio_rw_iops(pod_obj) cluster_used_space = get_percent_used_capacity() log.info( f"Cluster used space with replica size {replica}, " f"compression mode {compression}={cluster_used_space}" ) cbp_name = sc_obj.get().get("parameters").get("pool") if compression != "none": validate_compression(cbp_name) validate_replica_data(cbp_name, replica)
def test_delete_plugin_pod(self, resource_to_delete): """ Test case to verify the impact of plugin pod deletion on app pod. Verifies bug 1970352. """ resource_id = None DISRUPTION_OPS.set_resource(resource=resource_to_delete) pod_node = self.pod_obj.get_node() log.info( f"Selecting {resource_to_delete} pod which is running on the same " f"node as that of the app pod") for index, res_obj in enumerate(DISRUPTION_OPS.resource_obj): if res_obj.get_node() == pod_node: resource_id = index log.info(f"Selected the pod {res_obj.name}") break assert ( resource_id is not None ), f"No {resource_to_delete} pod is running on the node {pod_node}" log.info( f"Deleting the pod {DISRUPTION_OPS.resource_obj[resource_id].name}" f" which is running on the node {pod_node}") DISRUPTION_OPS.delete_resource(resource_id=resource_id) log.info( f"Deleted {DISRUPTION_OPS.resource_obj[resource_id].name} pod and " f"new {resource_to_delete} pod reached Running state") # Run IO self.pod_obj.run_io(storage_type="fs", size="1G", runtime=20) log.info("FIO started on pod") log.info("Waiting for fio result") pod.get_fio_rw_iops(self.pod_obj) log.info("Fio completed on pod")
def test_rwx_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory): """ Test assign nodeName to a pod using RWX pvc """ worker_nodes_list = get_worker_nodes() if interface == constants.CEPHBLOCKPOOL: volume_mode = "Block" storage_type = "block" block_pv = True pod_yaml = constants.CSI_RBD_RAW_BLOCK_POD_YAML else: volume_mode = "" storage_type = "fs" block_pv = False pod_yaml = "" # Create a RWX PVC pvc_obj = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWX, status=constants.STATUS_BOUND, volume_mode=volume_mode, ) # Create two pods on selected nodes pod_list = [] selected_nodes = random.sample(worker_nodes_list, k=2) logger.info(f"Creating {len(selected_nodes)} pods with pvc {pvc_obj.name}") for node in selected_nodes: logger.info(f"Creating pod on node: {node}") pod_obj = helpers.create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=node, pod_dict_path=pod_yaml, raw_block_pv=block_pv, ) pod_list.append(pod_obj) teardown_factory(pod_obj) # Confirm that both pods are running on the selected_nodes logger.info("Checking whether pods are running on the selected nodes") for index in range(0, len(selected_nodes)): pod_obj = pod_list[index] selected_node = selected_nodes[index] helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120 ) pod_obj.reload() assert pod.verify_node_name(pod_obj, selected_node), ( f"Pod {pod_obj.name} is running on a different node " f"than the selected node" ) # Run IOs on all pods. FIO Filename is kept same as pod name with ThreadPoolExecutor() as p: for pod_obj in pod_list: logger.info(f"Running IO on pod {pod_obj.name}") p.submit( pod_obj.run_io, storage_type=storage_type, size="512M", runtime=30, fio_filename=pod_obj.name, ) # Check IO from all pods for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj)
def test_create_resize_delete_pvc( self, project_factory, teardown_factory, setup_ui, sc_name, access_mode, pvc_size, vol_mode, ): """ Test create, resize and delete pvc via UI """ # Creating a test project via CLI pro_obj = project_factory() project_name = pro_obj.namespace pvc_ui_obj = PvcUI(setup_ui) # Creating PVC via UI pvc_name = create_unique_resource_name("test", "pvc") pvc_ui_obj.create_pvc_ui( project_name, sc_name, pvc_name, access_mode, pvc_size, vol_mode ) pvc_objs = get_all_pvc_objs(namespace=project_name) pvc = [pvc_obj for pvc_obj in pvc_objs if pvc_obj.name == pvc_name] assert pvc[0].size == int(pvc_size), ( f"size error| expected size:{pvc_size} \n " f"actual size:{str(pvc[0].size)}" ) assert pvc[0].get_pvc_access_mode == access_mode, ( f"access mode error| expected access mode:{access_mode} " f"\n actual access mode:{pvc[0].get_pvc_access_mode}" ) assert pvc[0].backed_sc == sc_name, ( f"storage class error| expected storage class:{sc_name} " f"\n actual storage class:{pvc[0].backed_sc}" ) assert pvc[0].get_pvc_vol_mode == vol_mode, ( f"volume mode error| expected volume mode:{vol_mode} " f"\n actual volume mode:{pvc[0].get_pvc_vol_mode}" ) # Verifying PVC via UI logger.info("Verifying PVC Details via UI") pvc_ui_obj.verify_pvc_ui( pvc_size=pvc_size, access_mode=access_mode, vol_mode=vol_mode, sc_name=sc_name, pvc_name=pvc_name, project_name=project_name, ) logger.info("PVC Details Verified via UI..!!") # Creating Pod via CLI logger.info("Creating Pod") if sc_name in (constants.DEFAULT_STORAGECLASS_RBD,): interface_type = constants.CEPHBLOCKPOOL else: interface_type = constants.CEPHFILESYSTEM new_pod = helpers.create_pod( interface_type=interface_type, pvc_name=pvc_name, namespace=project_name, raw_block_pv=vol_mode == constants.VOLUME_MODE_BLOCK, ) logger.info(f"Waiting for Pod: state= {constants.STATUS_RUNNING}") wait_for_resource_state(resource=new_pod, state=constants.STATUS_RUNNING) # Calling the Teardown Factory Method to make sure Pod is deleted teardown_factory(new_pod) # Expanding the PVC logger.info("Pvc Resizing") new_size = int(pvc_size) + 3 pvc_ui_obj.pvc_resize_ui( pvc_name=pvc_name, new_size=new_size, project_name=project_name ) assert new_size > int( pvc_size ), f"New size of the PVC cannot be less than existing size: new size is {new_size})" ocp_version = get_ocp_version() self.pvc_loc = locators[ocp_version]["pvc"] # Verifying PVC expansion logger.info("Verifying PVC resize") expected_capacity = f"{new_size} GiB" pvc_resize = pvc_ui_obj.verify_pvc_resize_ui( project_name=project_name, pvc_name=pvc_name, expected_capacity=expected_capacity, ) assert pvc_resize, "PVC resize failed" logger.info( "Pvc resize verified..!!" f"New Capacity after PVC resize is {expected_capacity}" ) # Running FIO logger.info("Execute FIO on a Pod") if vol_mode == constants.VOLUME_MODE_BLOCK: storage_type = constants.WORKLOAD_STORAGE_TYPE_BLOCK else: storage_type = constants.WORKLOAD_STORAGE_TYPE_FS new_pod.run_io(storage_type, size=(new_size - 1), invalidate=0, rate="1000m") get_fio_rw_iops(new_pod) logger.info("FIO execution on Pod successfully completed..!!") # Checking if the Pod is deleted or not new_pod.delete(wait=True) new_pod.ocp.wait_for_delete(resource_name=new_pod.name) # Deleting the PVC via UI logger.info(f"Delete {pvc_name} pvc") pvc_ui_obj.delete_pvc_ui(pvc_name, project_name) pvc[0].ocp.wait_for_delete(pvc_name, timeout=120) pvc_objs = get_all_pvc_objs(namespace=project_name) pvcs = [pvc_obj for pvc_obj in pvc_objs if pvc_obj.name == pvc_name] if len(pvcs) > 0: assert f"PVC {pvcs[0].name} does not deleted"
def test_disruptive_during_pod_pvc_deletion_and_io( self, interface, resource_to_delete, setup_base ): """ Delete ceph/rook pod while PVCs deletion, pods deletion and IO are progressing """ pvc_objs, pod_objs, rwx_pod_objs = setup_base namespace = pvc_objs[0].project.namespace num_of_pods_to_delete = 3 num_of_io_pods = 1 # Select pods to be deleted pods_to_delete = pod_objs[:num_of_pods_to_delete] pods_to_delete.extend( [ pod for pod in rwx_pod_objs for pod_obj in pods_to_delete if (pod_obj.pvc == pod.pvc) ] ) # Select pods to run IO io_pods = pod_objs[ num_of_pods_to_delete : num_of_pods_to_delete + num_of_io_pods ] io_pods.extend( [ pod for pod in rwx_pod_objs for pod_obj in io_pods if (pod_obj.pvc == pod.pvc) ] ) # Select pods which are having PVCs to delete pods_for_pvc = pod_objs[num_of_pods_to_delete + num_of_io_pods :] pvcs_to_delete = [pod_obj.pvc for pod_obj in pods_for_pvc] pods_for_pvc.extend( [ pod for pod in rwx_pod_objs for pod_obj in pods_for_pvc if (pod_obj.pvc == pod.pvc) ] ) log.info( f"{len(pods_to_delete)} pods selected for deletion in which " f"{len(pods_to_delete) - num_of_pods_to_delete} pairs of pod " f"share same RWX PVC" ) log.info( f"{len(io_pods)} pods selected for running IO in which " f"{len(io_pods) - num_of_io_pods} pairs of pod share same " f"RWX PVC" ) no_of_rwx_pvcs_delete = len(pods_for_pvc) - len(pvcs_to_delete) log.info( f"{len(pvcs_to_delete)} PVCs selected for deletion. " f"RWO PVCs: {len(pvcs_to_delete) - no_of_rwx_pvcs_delete}, " f"RWX PVCs: {no_of_rwx_pvcs_delete}" ) pod_functions = { "mds": partial(get_mds_pods), "mon": partial(get_mon_pods), "mgr": partial(get_mgr_pods), "osd": partial(get_osd_pods), "rbdplugin": partial(get_plugin_pods, interface=interface), "cephfsplugin": partial(get_plugin_pods, interface=interface), "cephfsplugin_provisioner": partial(get_cephfsplugin_provisioner_pods), "rbdplugin_provisioner": partial(get_rbdfsplugin_provisioner_pods), "operator": partial(get_operator_pods), } disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) executor = ThreadPoolExecutor(max_workers=len(pod_objs) + len(rwx_pod_objs)) # Get number of pods of type 'resource_to_delete' num_of_resource_to_delete = len(pod_functions[resource_to_delete]()) # Fetch the number of Pods and PVCs initial_num_of_pods = len(get_all_pods(namespace=namespace)) initial_num_of_pvc = len(get_all_pvcs(namespace=namespace)["items"]) # Fetch PV names to verify after deletion pv_objs = [] for pvc_obj in pvcs_to_delete: pvc_obj.reload() pv_objs.append(pvc_obj.backed_pv_obj) # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod_obj in pods_to_delete: pod_info = pod_obj.get() node = pod_info["spec"]["nodeName"] pvc = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"]["claimName"] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Fetch image uuid associated with PVCs to be deleted pvc_uuid_map = {} for pvc_obj in pvcs_to_delete: pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Do setup on pods for running IO log.info("Setting up pods for running IO.") for pod_obj in pod_objs + rwx_pod_objs: pvc_info = pod_obj.pvc.get() if pvc_info["spec"]["volumeMode"] == "Block": storage_type = "block" else: storage_type = "fs" executor.submit(pod_obj.workload_setup, storage_type=storage_type) # Wait for setup on pods to complete for pod_obj in pod_objs + rwx_pod_objs: log.info(f"Waiting for IO setup to complete on pod {pod_obj.name}") for sample in TimeoutSampler(180, 2, getattr, pod_obj, "wl_setup_done"): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on all pods.") # Start IO on pods having PVCs to delete to load data log.info("Starting IO on pods having PVCs to delete.") self.run_io_on_pods(pods_for_pvc) log.info("IO started on pods having PVCs to delete.") log.info("Fetching IO results from the pods having PVCs to delete.") for pod_obj in pods_for_pvc: get_fio_rw_iops(pod_obj) log.info("Verified IO result on pods having PVCs to delete.") # Delete pods having PVCs to delete. delete_pods(pods_for_pvc) for pod_obj in pods_for_pvc: pod_obj.ocp.wait_for_delete(pod_obj.name) log.info("Verified: Deleted pods which are having PVCs to delete.") # Start IO on pods to be deleted log.info("Starting IO on pods to be deleted.") self.run_io_on_pods(pods_to_delete) log.info("IO started on pods to be deleted.") # Start deleting PVCs pvc_bulk_delete = executor.submit(delete_pvcs, pvcs_to_delete) log.info("Started deleting PVCs") # Start deleting pods pod_bulk_delete = executor.submit(delete_pods, pods_to_delete, wait=False) log.info("Started deleting pods") # Start IO on IO pods self.run_io_on_pods(io_pods) log.info("Started IO on IO pods") # Verify pvc deletion has started pvc_deleting = executor.submit( wait_for_resource_count_change, func_to_use=get_all_pvcs, previous_num=initial_num_of_pvc, namespace=namespace, change_type="decrease", min_difference=1, timeout=30, interval=0.01, ) # Verify pod deletion has started pod_deleting = executor.submit( wait_for_resource_count_change, func_to_use=get_all_pods, previous_num=initial_num_of_pods, namespace=namespace, change_type="decrease", min_difference=1, timeout=30, interval=0.01, ) assert pvc_deleting.result(), "Wait timeout: PVCs are not being deleted." log.info("PVCs deletion has started.") assert pod_deleting.result(), "Wait timeout: Pods are not being deleted." log.info("Pods deletion has started.") # Delete pod of type 'resource_to_delete' disruption.delete_resource() pod_bulk_delete.result() # Verify pods are deleted for pod_obj in pods_to_delete: pod_obj.ocp.wait_for_delete(pod_obj.name, 300) log.info("Verified: Pods are deleted.") # Verify that the mount point is removed from nodes after deleting pod node_pv_mounted = verify_pv_mounted_on_node(node_pv_dict) for node, pvs in node_pv_mounted.items(): assert not pvs, ( f"PVs {pvs} is still present on node {node} after " f"deleting the pods." ) log.info( "Verified: mount points are removed from nodes after deleting " "the pods" ) pvcs_deleted = pvc_bulk_delete.result() assert pvcs_deleted, "Deletion of PVCs failed." # Verify PVCs are deleted for pvc_obj in pvcs_to_delete: pvc_obj.ocp.wait_for_delete(pvc_obj.name) log.info("Verified: PVCs are deleted.") # Verify PVs are deleted for pv_obj in pv_objs: pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name, timeout=300) log.info("Verified: PVs are deleted.") # Verify PV using ceph toolbox. Image/Subvolume should be deleted. pool_name = default_ceph_block_pool() for pvc_name, uuid in pvc_uuid_map.items(): if interface == constants.CEPHBLOCKPOOL: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid, pool_name=pool_name ) if interface == constants.CEPHFILESYSTEM: ret = verify_volume_deleted_in_backend( interface=interface, image_uuid=uuid ) assert ret, ( f"Volume associated with PVC {pvc_name} still exists " f"in backend" ) log.info("Fetching IO results from the pods.") for pod_obj in io_pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get("jobs")[0].get("error") assert ( err_count == 0 ), f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" log.info("Verified IO result on pods.") # Verify number of pods of type 'resource_to_delete' final_num_resource_to_delete = len(pod_functions[resource_to_delete]()) assert final_num_resource_to_delete == num_of_resource_to_delete, ( f"Total number of {resource_to_delete} pods is not matching with " f"initial value. Total number of pods before deleting a pod: " f"{num_of_resource_to_delete}. Total number of pods present now: " f"{final_num_resource_to_delete}" ) # Check ceph status ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"]) log.info("Ceph cluster health is OK")
def test_add_capacity( self, project_factory, multi_dc_pod, multi_pvc_factory, pod_factory, mcg_obj, awscli_pod, bucket_factory, percent_to_fill, ): ##################################### # ENTRY CRITERIA # ##################################### # Prepare initial configuration : logger, cluster filling, loop for creating & deleting of PVCs and Pods, # noobaa IOs etc., # Perform Health checks: # Make sure cluster is healthy assert ceph_health_check( defaults.ROOK_CLUSTER_NAMESPACE ), "Entry criteria FAILED: Cluster is Unhealthy" # All OCS pods are in running state: # ToDo https://github.com/red-hat-storage/ocs-ci/issues/2361 assert ( pod_helpers.check_pods_in_running_state() ), "Entry criteria FAILED: one or more OCS pods are not in running state" # Create the namespace under which this test will execute: project = project_factory() # total pvc created will be 'num_of_pvcs' * 4 types of pvcs(rbd-rwo,rwx # & cephfs-rwo,rwx) num_of_pvcs = 40 rwo_rbd_pods = multi_dc_pod( num_of_pvcs=num_of_pvcs, pvc_size=175, project=project, access_mode="RWO", pool_type="rbd", timeout=360, ) # Note: Skipping cephfs pods creation # observing bug https://bugzilla.redhat.com/show_bug.cgi?id=1785399, # https://bugzilla.redhat.com/show_bug.cgi?id=1779421#c14 # Todo: https://github.com/red-hat-storage/ocs-ci/issues/2360 # Create rwx-rbd pods pods_ios_rwx_rbd = multi_dc_pod( num_of_pvcs=10, pvc_size=175, project=project, access_mode="RWX-BLK", pool_type="rbd", timeout=360, ) cluster_fill_io_pods = rwo_rbd_pods logger.info("The DC pods are up. Running IOs from them to fill the cluster") filler = cluster_exp_helpers.ClusterFiller( cluster_fill_io_pods, percent_to_fill, project.namespace ) assert filler.cluster_filler(), "IOs failed" # create separate threadpool for running IOs in the background executor_run_bg_ios_ops = ThreadPoolExecutor() bg_wrap = cluster_exp_helpers.BackgroundOps() status_cluster_ios = [] pods_for_copy = rwo_rbd_pods[0:5] + pods_ios_rwx_rbd for p in pods_for_copy: logger.info(f"running IOs on {p.name}") if p.pod_type == "rbd_block_rwx": status_cluster_ios.append( executor_run_bg_ios_ops.submit( bg_wrap.wrap, cluster_exp_helpers.raw_block_io, p, iterations=10 ) ) else: status_cluster_ios.append( executor_run_bg_ios_ops.submit( bg_wrap.wrap, cluster_exp_helpers.cluster_copy_ops, p, iterations=200, ) ) # Start pvc ops in the background.: logger.info("Started pvc create delete operations") executor_run_bg_ios_ops.submit( bg_wrap.wrap, test_create_delete_pvcs, multi_pvc_factory, pod_factory, project, iterations=200, ) # Start NooBaa IOs in the background.: logger.info("Started s3_io_create_delete...") executor_run_bg_ios_ops.submit( bg_wrap.wrap, s3_io_create_delete, mcg_obj, awscli_pod, bucket_factory, iterations=200, ) logger.info("Started obc_io_create_delete...") executor_run_bg_ios_ops.submit( bg_wrap.wrap, obc_io_create_delete, mcg_obj, awscli_pod, bucket_factory, iterations=200, ) # All ocs nodes are in Ready state (including master): executor_run_bg_ios_ops.submit( bg_wrap.wrap, cluster_exp_helpers.check_nodes_status, iterations=100 ) # Get restart count of ocs pods before expanstion restart_count_before = pod_helpers.get_pod_restarts_count( defaults.ROOK_CLUSTER_NAMESPACE ) # Get osd pods before expansion osd_pods_before = pod_helpers.get_osd_pods() # Get the total space in cluster before expansion ct_pod = pod_helpers.get_ceph_tools_pod() output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd df") total_space_b4_expansion = int(output.get("summary").get("total_kb")) logger.info(f"total_space_b4_expansion == {total_space_b4_expansion}") logger.info("############## Calling add_capacity $$$$$$$$$$") ##################### # Call add_capacity # ##################### osd_size = storage_cluster.get_osd_size() result = storage_cluster.add_capacity(osd_size) pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]) # New osd (all) pods corresponding to the additional capacity should be # in running state pod.wait_for_resource( timeout=1200, condition=constants.STATUS_RUNNING, selector="app=rook-ceph-osd", resource_count=result * 3, ) ################################# # Exit criteria verification: # ################################# cluster_exp_helpers.BackgroundOps.EXPANSION_COMPLETED = True # No ocs pods should get restarted unexpectedly # Get restart count of ocs pods after expansion and see any pods got # restated restart_count_after = pod_helpers.get_pod_restarts_count( defaults.ROOK_CLUSTER_NAMESPACE ) # # # TO DO # # Handle Bug 1814254 - All Mons respinned during add capacity and OSDs took longtime to come up # # implement function to make sure no pods are respun after expansion logger.info( f"sum(restart_count_before.values()) = {sum(restart_count_before.values())}" ) logger.info( f" sum(restart_count_after.values()) = {sum(restart_count_after.values())}" ) assert sum(restart_count_before.values()) == sum( restart_count_after.values() ), "Exit criteria verification FAILED: One or more pods got restarted" logger.info("Exit criteria verification Success: No pods were restarted") # Make sure right number of OSDs are added: # Get osd pods after expansion osd_pods_after = pod_helpers.get_osd_pods() number_of_osds_added = len(osd_pods_after) - len(osd_pods_before) logger.info( f"### number_of_osds_added = {number_of_osds_added}, " f"before = {len(osd_pods_before)}, after = {len(osd_pods_after) }" ) # If the difference b/w updated count of osds and old osd count is not # 3 then expansion failed assert ( number_of_osds_added == 3 ), "Exit criteria verification FAILED: osd count mismatch" logger.info( "Exit criteria verification Success: Correct number of OSDs are added" ) # The newly added capacity takes into effect at the storage level ct_pod = pod_helpers.get_ceph_tools_pod() output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd df") total_space_after_expansion = int(output.get("summary").get("total_kb")) osd_size = int(output.get("nodes")[0].get("kb")) expanded_space = osd_size * 3 # 3 OSDS are added of size = 'osd_size' logger.info(f"space output == {output} ") logger.info(f"osd size == {osd_size} ") logger.info(f"total_space_after_expansion == {total_space_after_expansion} ") expected_total_space_after_expansion = total_space_b4_expansion + expanded_space logger.info( f"expected_total_space_after_expansion == {expected_total_space_after_expansion} " ) assert ( total_space_after_expansion == expected_total_space_after_expansion ), "Exit criteria verification FAILED: Expected capacity mismatch" logger.info( "Exit criteria verification Success: Newly added capacity took into effect" ) logger.info("Exit criteria verification Success: IOs completed successfully") # 'ceph osd tree' should show the new osds under right nodes/hosts # Verification is different for 3 AZ and 1 AZ configs ct_pod = pod_helpers.get_ceph_tools_pod() tree_output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree") logger.info(f"### OSD tree output = {tree_output}") if config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM: assert cluster_helpers.check_osd_tree_1az_vmware( tree_output, len(osd_pods_after) ), "Exit criteria verification FAILED: Incorrect ceph osd tree formation found" aws_number_of_zones = 3 if config.ENV_DATA["platform"].lower() == constants.AWS_PLATFORM: # parse the osd tree. if it contains a node 'rack' then it's a # AWS_1AZ cluster. Else, 3 AWS_3AZ cluster for i in range(len(tree_output["nodes"])): if tree_output["nodes"][i]["name"] in "rack0": aws_number_of_zones = 1 if aws_number_of_zones == 1: assert cluster_helpers.check_osd_tree_1az_aws( tree_output, len(osd_pods_after) ), "Exit criteria verification FAILED: Incorrect ceph osd tree formation found" else: assert cluster_helpers.check_osd_tree_3az_aws( tree_output, len(osd_pods_after) ), "Exit criteria verification FAILED: Incorrect ceph osd tree formation found" logger.info("Exit criteria verification Success: osd tree verification success") # Make sure new pvcs and pods can be created and IOs can be run from # the pods num_of_pvcs = 1 rwo_rbd_pods = multi_dc_pod( num_of_pvcs=num_of_pvcs, pvc_size=5, project=project, access_mode="RWO", pool_type="rbd", ) rwo_cephfs_pods = multi_dc_pod( num_of_pvcs=num_of_pvcs, pvc_size=5, project=project, access_mode="RWO", pool_type="cephfs", ) rwx_cephfs_pods = multi_dc_pod( num_of_pvcs=num_of_pvcs, pvc_size=5, project=project, access_mode="RWX", pool_type="cephfs", ) # Create rwx-rbd pods pods_ios_rwx_rbd = multi_dc_pod( num_of_pvcs=num_of_pvcs, pvc_size=5, project=project, access_mode="RWX-BLK", pool_type="rbd", ) cluster_io_pods = ( rwo_rbd_pods + rwo_cephfs_pods + rwx_cephfs_pods + pods_ios_rwx_rbd ) with ThreadPoolExecutor() as pod_ios_executor: for p in cluster_io_pods: if p.pod_type == "rbd_block_rwx": logger.info(f"Calling block fio on pod {p.name}") pod_ios_executor.submit(cluster_exp_helpers.raw_block_io, p, "100M") else: logger.info(f"calling file fio on pod {p.name}") pod_ios_executor.submit(p.run_io, "fs", "100M") for pod_io in cluster_io_pods: pod_helpers.get_fio_rw_iops(pod_io) # Verify OSDs are encrypted if config.ENV_DATA.get("encryption_at_rest"): osd_encryption_verification() cluster_obj = cluster_helpers.CephCluster() assert ( cluster_obj.get_ceph_health() != "HEALTH_ERR" ), "Ceph cluster health checking failed" logger.info("ALL Exit criteria verification successfully") logger.info( "********************** TEST PASSED *********************************" )
def test_daemon_kill_during_pvc_pod_creation_deletion_and_io( self, setup_base, multi_pvc_factory, pod_factory ): """ Kill ceph daemons while PVCs creation, PVCs deletion, pods creation, pods deletion and IO are progressing """ daemons_to_kill = [ "mgr", "mon", "osd", "mds", ] ( pvc_objs, pod_objs, rwx_pod_objs, cephfs_pvc_for_pods, rbd_pvc_for_pods, ) = setup_base num_of_pods_to_delete = 3 num_of_io_pods = 1 num_pvc_create_during_disruption = len( self.access_modes_cephfs + self.access_modes_rbd ) # Select pods to be deleted pods_to_delete = pod_objs[:num_of_pods_to_delete] pods_to_delete.extend( [ pod for pod in rwx_pod_objs for pod_obj in pods_to_delete if (pod_obj.pvc == pod.pvc) ] ) # Select pods to run IO io_pods = pod_objs[ num_of_pods_to_delete : num_of_pods_to_delete + num_of_io_pods ] io_pods.extend( [ pod for pod in rwx_pod_objs for pod_obj in io_pods if (pod_obj.pvc == pod.pvc) ] ) # Select pods which are having PVCs to delete pods_for_pvc = pod_objs[num_of_pods_to_delete + num_of_io_pods :] pvcs_to_delete = [pod_obj.pvc for pod_obj in pods_for_pvc] pods_for_pvc.extend( [ pod for pod in rwx_pod_objs for pod_obj in pods_for_pvc if (pod_obj.pvc == pod.pvc) ] ) io_pods = [ pod_obj for pod_obj in io_pods if pod_obj.pvc in select_unique_pvcs([pod_obj.pvc for pod_obj in io_pods]) ] log.info( f"{len(pods_to_delete)} pods selected for deletion in which " f"{len(pods_to_delete) - num_of_pods_to_delete} pairs of pod " f"share same RWX PVC" ) log.info( f"{len(io_pods)} pods selected for running IO in which one " f"pair of pod share same RWX PVC" ) no_of_rwx_pvcs_delete = len(pods_for_pvc) - len(pvcs_to_delete) log.info( f"{len(pvcs_to_delete)} PVCs selected for deletion. " f"RWO PVCs: {len(pvcs_to_delete) - no_of_rwx_pvcs_delete}, " f"RWX PVCs: {no_of_rwx_pvcs_delete}" ) pod_functions = { "mds": partial(get_mds_pods), "mon": partial(get_mon_pods), "mgr": partial(get_mgr_pods), "osd": partial(get_osd_pods), } # Disruption object for each daemon type disruption_ops = [disruption_helpers.Disruptions() for _ in daemons_to_kill] # Select the resource of each type for disruption, pod_type in zip(disruption_ops, daemons_to_kill): disruption.set_resource(resource=pod_type) executor = ThreadPoolExecutor( max_workers=len(pod_objs) + len(rwx_pod_objs) + len(rbd_pvc_for_pods) + len(cephfs_pvc_for_pods) + len(daemons_to_kill) + num_pvc_create_during_disruption ) # Get number of pods of the type given in daemons_to_kill list num_of_resource_pods = [ len(pod_functions[resource_name]()) for resource_name in daemons_to_kill ] # Fetch PV names to verify after deletion pv_objs = [] for pvc_obj in pvcs_to_delete: pv_objs.append(pvc_obj.backed_pv_obj) # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod_obj in pods_to_delete: pod_info = pod_obj.get() node = pod_info["spec"]["nodeName"] pvc = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"]["claimName"] for pvc_obj in pvc_objs: if pvc_obj.name == pvc: pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Fetch image uuid associated with PVCs to be deleted pvc_uuid_map = {} for pvc_obj in pvcs_to_delete: pvc_uuid_map[pvc_obj] = pvc_obj.image_uuid log.info("Fetched image uuid associated with each PVC") # Do setup on pods for running IO log.info("Setting up pods for running IO.") for pod_obj in pod_objs + rwx_pod_objs: if pod_obj.pvc.get_pvc_vol_mode == "Block": storage_type = "block" else: storage_type = "fs" executor.submit(pod_obj.workload_setup, storage_type=storage_type) # Wait for setup on pods to complete for pod_obj in pod_objs + rwx_pod_objs: log.info(f"Waiting for IO setup to complete on pod {pod_obj.name}") for sample in TimeoutSampler(360, 2, getattr, pod_obj, "wl_setup_done"): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on all pods.") # Start IO on pods having PVCs to delete to load data pods_for_pvc_io = [ pod_obj for pod_obj in pods_for_pvc if pod_obj.pvc in select_unique_pvcs([pod_obj.pvc for pod_obj in pods_for_pvc]) ] log.info("Starting IO on pods having PVCs to delete.") self.run_io_on_pods(pods_for_pvc_io) log.info("IO started on pods having PVCs to delete.") log.info("Fetching IO results from the pods having PVCs to delete.") for pod_obj in pods_for_pvc_io: get_fio_rw_iops(pod_obj) log.info("Verified IO result on pods having PVCs to delete.") # Delete pods having PVCs to delete. assert self.delete_pods( pods_for_pvc ), "Couldn't delete pods which are having PVCs to delete." for pod_obj in pods_for_pvc: pod_obj.ocp.wait_for_delete(pod_obj.name) log.info("Verified: Deleted pods which are having PVCs to delete.") # Select daemon of each type of resource and identify the daemons running on each node nodes_and_pids = {} for disruption in disruption_ops: disruption.select_daemon() node_name = disruption.resource_obj[0].pod_data.get("spec").get("nodeName") # Create node-daemons dict. Value as string for passing in the 'kill' command nodes_and_pids[ node_name ] = f"{nodes_and_pids.get(node_name, '')} {disruption.daemon_pid}" # Start IO on pods to be deleted pods_to_delete_io = [ pod_obj for pod_obj in pods_to_delete if pod_obj.pvc in select_unique_pvcs([pod_obj.pvc for pod_obj in pods_to_delete]) ] log.info("Starting IO on selected pods to be deleted.") self.run_io_on_pods(pods_to_delete_io) log.info("IO started on selected pods to be deleted.") # Start creating new pods log.info("Start creating new pods.") pod_create_rbd = executor.submit( helpers.create_pods, rbd_pvc_for_pods, pod_factory, constants.CEPHBLOCKPOOL, 2, ) pod_create_cephfs = executor.submit( helpers.create_pods, cephfs_pvc_for_pods, pod_factory, constants.CEPHFILESYSTEM, 2, ) # Start creation of new CephFS PVCs. log.info("Start creating new CephFS PVCs.") pvc_create_cephfs = executor.submit( multi_pvc_factory, interface=constants.CEPHFILESYSTEM, project=self.project, storageclass=None, size=self.pvc_size, access_modes=self.access_modes_cephfs, access_modes_selection="distribute_random", status="", num_of_pvc=len(self.access_modes_cephfs), wait_each=False, ) # Start creation of new RBD PVCs log.info("Start creating new RBD PVCs.") pvc_create_rbd = executor.submit( multi_pvc_factory, interface=constants.CEPHBLOCKPOOL, project=self.project, storageclass=None, size=self.pvc_size, access_modes=self.access_modes_rbd, access_modes_selection="distribute_random", status="", num_of_pvc=len(self.access_modes_rbd), wait_each=False, ) # Start deleting PVCs pvc_bulk_delete = executor.submit(delete_pvcs, pvcs_to_delete) log.info("Started deleting PVCs") # Start deleting pods pod_bulk_delete = executor.submit(self.delete_pods, pods_to_delete) log.info("Started deleting pods") # Start IO on IO pods self.run_io_on_pods(io_pods) log.info("Started IO on IO pods") # Wait for 1 second before killing daemons. This is to wait for the create/delete operations to start sleep(1) # Kill daemons node_and_kill_proc = {} log.info(f"Killing daemons of {daemons_to_kill}") for node_name, pids in nodes_and_pids.items(): # Command to kill the daemon kill_cmd = f"oc debug node/{node_name} -- chroot /host kill -9 {pids}" # Create node-kill process map for verifying the result node_and_kill_proc[node_name] = executor.submit(run_cmd, kill_cmd) # Verify daemon kill process for node_name, daemon_kill_proc in node_and_kill_proc.items(): # Get the type of daemons killed on the particular node resources = [ disruption.resource for disruption in disruption_ops if disruption.daemon_pid in nodes_and_pids[node_name] ] # 'daemon_kill_proc' result will be an empty string if command is success cmd_out = daemon_kill_proc.result() assert isinstance(cmd_out, str) and (not cmd_out), ( f"Failed to kill {resources } daemons in the node {node_name}. " f"Daemon kill command output - {cmd_out}" ) # Wait for new daemon to come up [disruption.check_new_pid() for disruption in disruption_ops] log.info("Verified daemons kill") pods_deleted = pod_bulk_delete.result() assert pods_deleted, "Deletion of pods failed." # Verify pods are deleted for pod_obj in pods_to_delete: pod_obj.ocp.wait_for_delete(pod_obj.name, 300) log.info("Verified: Pods are deleted.") # Verify that the mount point is removed from nodes after deleting pod node_pv_mounted = verify_pv_mounted_on_node(node_pv_dict) for node, pvs in node_pv_mounted.items(): assert not pvs, ( f"PVs {pvs} is still present on node {node} after " f"deleting the pods." ) log.info( "Verified: mount points are removed from nodes after deleting " "the pods" ) pvcs_deleted = pvc_bulk_delete.result() assert pvcs_deleted, "Deletion of PVCs failed." # Verify PVCs are deleted for pvc_obj in pvcs_to_delete: pvc_obj.ocp.wait_for_delete(pvc_obj.name) log.info("Verified: PVCs are deleted.") # Getting result of PVC creation as list of PVC objects log.info("Getting the result of CephFS PVC creation process") pvc_objs_cephfs_new = pvc_create_cephfs.result() log.info("Getting the result of RBD PVC creation process") pvc_objs_rbd_new = pvc_create_rbd.result() # Set interface argument for reference for pvc_obj in pvc_objs_cephfs_new: pvc_obj.interface = constants.CEPHFILESYSTEM # Set interface argument for reference for pvc_obj in pvc_objs_rbd_new: pvc_obj.interface = constants.CEPHBLOCKPOOL # Confirm PVCs are Bound log.info("Verifying the new CephFS and RBD PVCs are Bound") for pvc_obj in pvc_objs_cephfs_new + pvc_objs_rbd_new: helpers.wait_for_resource_state( resource=pvc_obj, state=constants.STATUS_BOUND, timeout=180 ) pvc_obj.reload() log.info("Verified: New CephFS and RBD PVCs are Bound.") # Getting result of pods creation as list of Pod objects log.info("Getting the result of pods creation process") pod_objs_rbd_new = pod_create_rbd.result() pod_objs_cephfs_new = pod_create_cephfs.result() # Verify new pods are Running log.info("Verifying the new pods are Running") for pod_obj in pod_objs_rbd_new + pod_objs_cephfs_new: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=90 ) pod_obj.reload() log.info("Verified: All new pods are Running.") # Verify PVs are deleted for pv_obj in pv_objs: pv_obj.ocp.wait_for_delete(resource_name=pv_obj.name, timeout=300) log.info("Verified: PVs are deleted.") # Verify PV using ceph toolbox. Image/Subvolume should be deleted. pool_name = default_ceph_block_pool() for pvc_obj, uuid in pvc_uuid_map.items(): if pvc_obj.interface == constants.CEPHBLOCKPOOL: ret = verify_volume_deleted_in_backend( interface=constants.CEPHBLOCKPOOL, image_uuid=uuid, pool_name=pool_name, ) if pvc_obj.interface == constants.CEPHFILESYSTEM: ret = verify_volume_deleted_in_backend( interface=constants.CEPHFILESYSTEM, image_uuid=uuid ) assert ( ret ), f"Volume associated with PVC {pvc_obj.name} still exists in the backend" log.info("Fetching IO results from the pods.") for pod_obj in io_pods: fio_result = pod_obj.get_fio_results() err_count = fio_result.get("jobs")[0].get("error") assert ( err_count == 0 ), f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" log.info("Verified IO result on pods.") # Verify that the new PVCs are usable by creating new pods log.info("Verify that the new PVCs are usable by creating new pods") pod_objs_rbd_re = helpers.create_pods( pvc_objs_rbd_new, pod_factory, constants.CEPHBLOCKPOOL, 2 ) pod_objs_cephfs_re = helpers.create_pods( pvc_objs_cephfs_new, pod_factory, constants.CEPHFILESYSTEM, 2 ) # Verify pods are Running log.info("Verifying the pods are Running") for pod_obj in pod_objs_rbd_re + pod_objs_cephfs_re: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=90 ) pod_obj.reload() log.info( "Successfully created and verified the status of the pods using the new CephFS and RBD PVCs." ) new_pods = ( pod_objs_rbd_new + pod_objs_cephfs_new + pod_objs_rbd_re + pod_objs_cephfs_re ) # Do setup on the new pods for running IO log.info("Setting up the new pods for running IO.") for pod_obj in new_pods: if pod_obj.pvc.get_pvc_vol_mode == "Block": storage_type = "block" else: storage_type = "fs" executor.submit(pod_obj.workload_setup, storage_type=storage_type) # Wait for setup on the new pods to complete for pod_obj in new_pods: log.info(f"Waiting for IO setup to complete on pod {pod_obj.name}") for sample in TimeoutSampler(360, 2, getattr, pod_obj, "wl_setup_done"): if sample: log.info( f"Setup for running IO is completed on pod " f"{pod_obj.name}." ) break log.info("Setup for running IO is completed on the new pods.") # Start IO on the new pods log.info("Start IO on the new pods") self.run_io_on_pods(new_pods) log.info("Started IO on the new pods") log.info("Fetching IO results from the new pods.") for pod_obj in new_pods: get_fio_rw_iops(pod_obj) log.info("Verified IO result on the new pods.") # Verify number of pods of each daemon type final_num_resource_name = [ len(pod_functions[resource_name]()) for resource_name in daemons_to_kill ] assert final_num_resource_name == num_of_resource_pods, ( f"Total number of pods of each type is not matching with " f"initial value. Total number of pods of each type before daemon kill: " f"{num_of_resource_pods}. Total number of pods of each type present now: " f"{final_num_resource_name}" ) # Check ceph status ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"]) log.info("Ceph cluster health is OK")
def test_rwx_dynamic_pvc(self, interface_type, reclaim_policy, setup, pvc_factory, pod_factory): """ RWX Dynamic PVC creation tests with Reclaim policy set to Retain/Delete """ access_mode = constants.ACCESS_MODE_RWX storage_type = "fs" sc_obj, worker_nodes_list = setup logger.info("CephFS RWX test") logger.info(f"Creating PVC with {access_mode} access mode") pvc_obj = pvc_factory( interface=interface_type, storageclass=sc_obj, size=self.pvc_size, access_mode=access_mode, status=constants.STATUS_BOUND, ) logger.info(f"Creating first pod on node: {worker_nodes_list[0]} " f"with pvc {pvc_obj.name}") pod_obj1 = pod_factory( interface=interface_type, pvc=pvc_obj, status=constants.STATUS_RUNNING, node_name=worker_nodes_list[0], pod_dict_path=constants.NGINX_POD_YAML, ) logger.info(f"Creating second pod on node: {worker_nodes_list[1]} " f"with pvc {pvc_obj.name}") pod_obj2 = pod_factory( interface=interface_type, pvc=pvc_obj, status=constants.STATUS_RUNNING, node_name=worker_nodes_list[1], pod_dict_path=constants.NGINX_POD_YAML, ) node_pod1 = pod_obj1.get().get("spec").get("nodeName") node_pod2 = pod_obj2.get().get("spec").get("nodeName") assert node_pod1 != node_pod2, "Both pods are on the same node" # Run IO on both the pods logger.info(f"Running IO on pod {pod_obj1.name}") file_name1 = pod_obj1.name logger.info(file_name1) pod_obj1.run_io(storage_type=storage_type, size="1G", fio_filename=file_name1) logger.info(f"Running IO on pod {pod_obj2.name}") file_name2 = pod_obj2.name pod_obj2.run_io(storage_type=storage_type, size="1G", fio_filename=file_name2) # Check IO and calculate md5sum of files pod.get_fio_rw_iops(pod_obj1) md5sum_pod1_data = pod.cal_md5sum(pod_obj=pod_obj1, file_name=file_name1) pod.get_fio_rw_iops(pod_obj2) md5sum_pod2_data = pod.cal_md5sum(pod_obj=pod_obj2, file_name=file_name2) logger.info("verify data from alternate pods") pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name1, original_md5sum=md5sum_pod1_data) pod.verify_data_integrity(pod_obj=pod_obj1, file_name=file_name2, original_md5sum=md5sum_pod2_data) # Verify that data is mutable from any pod logger.info("Perform modification of files from alternate pod") # Access and rename file written by pod-2 from pod-1 file_path2 = pod.get_file_path(pod_obj2, file_name2) logger.info(file_path2) pod_obj1.exec_cmd_on_pod( command=f'bash -c "mv {file_path2} {file_path2}-renamed"', out_yaml_format=False, ) # Access and rename file written by pod-1 from pod-2 file_path1 = pod.get_file_path(pod_obj1, file_name1) logger.info(file_path1) pod_obj2.exec_cmd_on_pod( command=f'bash -c "mv {file_path1} {file_path1}-renamed"', out_yaml_format=False, ) logger.info("Verify presence of renamed files from both pods") file_names = [f"{file_path1}-renamed", f"{file_path2}-renamed"] for file in file_names: assert pod.check_file_existence(pod_obj1, file), f"File {file} doesn't exist" logger.info(f"File {file} exists in {pod_obj1.name} ") assert pod.check_file_existence(pod_obj2, file), f"File {file} doesn't exist" logger.info(f"File {file} exists in {pod_obj2.name}")
def test_sc_reclaim_policy_retain_rep2_comp( self, storageclass_factory, pvc_factory, pod_factory, ): """ This test function does below, *. Create storageclass with reclaim policy retain and pool with rep2 and compression *. Create pvc and pod *. Run IO on pod *. Verify compression and replication *. Delete Pod, Pvc, Pv, Rbd image """ log.info( f"Creating storageclass with replica {self.replica}" f", compression {self.compression} and" f"reclaim policy {self.reclaim_policy}" ) sc_obj = storageclass_factory( interface=CEPHBLOCKPOOL, new_rbd_pool=True, replica=self.replica, compression=self.compression, reclaim_policy=self.reclaim_policy, ) pool = sc_obj.get()["parameters"]["pool"] log.info("Creating PVCs and PODs") pvc_obj = pvc_factory(interface=CEPHBLOCKPOOL, storageclass=sc_obj) pod_obj = pod_factory(interface=CEPHBLOCKPOOL, pvc=pvc_obj) log.info("Running IO on pod") pod_obj.run_io("fs", size="1G") get_fio_rw_iops(pod_obj) log.info(f"validating info on pool {pool}") validate_rep_result = validate_replica_data(pool, self.replica) if validate_rep_result is False: raise PoolNotReplicatedAsNeeded(f"pool {pool} not replicated as expected") validate_comp_result = validate_compression(pool) if validate_comp_result is False: raise PoolNotCompressedAsExpected(f"pool {pool} not compressed as expected") log.info("Deleting pod") pod_obj_list = [pod_obj] delete_pods(pod_obj_list, wait=True) log.info("Deleting pvc, pv and rbd image") pvc_obj.reload() pvc_uuid_map = pvc_obj.image_uuid pv_obj = pvc_obj.backed_pv_obj pvc_obj.delete() pv_obj.delete() delete_results = delete_volume_in_backend(img_uuid=pvc_uuid_map, pool_name=pool) if not delete_results: raise ImageIsNotDeletedOrNotFound( f"Could not delete or find image csi-vol-{pvc_uuid_map}" )
def test_multiple_sc_comp_rep_data_deletion(self, storageclass_factory, pvc_factory, pod_factory): """ This test function does below, *. Creates 2 Storage Class with creating new rbd pool *. Creates PVCs using new Storage Class *. Mount PVC to an app pod *. Run IO on an app pod *. Delete the pods and pvc *. Verify that the data is deleted """ log.info("Creating storageclasses with compression and replica3") interface_type = constants.CEPHBLOCKPOOL sc_obj1 = storageclass_factory( interface=interface_type, new_rbd_pool=True, replica=3, compression="aggressive", ) log.info("Creating storageclasses with compression and replica2") sc_obj2 = storageclass_factory( interface=interface_type, new_rbd_pool=True, replica=2, compression="aggressive", ) sc_obj_list = [sc_obj1, sc_obj2] pod_obj_list = [] pvc_obj_list = [] log.info("Creating PVCs and PODs") for sc_obj in sc_obj_list: pvc_obj = pvc_factory(interface=interface_type, storageclass=sc_obj) pvc_obj_list.append(pvc_obj) pod_obj_list.append( pod_factory(interface=interface_type, pvc=pvc_obj)) log.info("Running IO on pods") for pod_obj in pod_obj_list: pod_obj.run_io("fs", size="1G") for pod_obj in pod_obj_list: get_fio_rw_iops(pod_obj) log.info("deleting PODs and PVCs") delete_pods(pod_obj_list, wait=True) delete_pvcs(pvc_obj_list, concurrent=True) log.info("Wait for 15 seconds for all data to delete") sleep(15) log.info("Checking stats after deleting PODs and PVCs") for sc_obj in sc_obj_list: pvc_list = get_all_pvcs_in_storageclass(sc_obj.name) if len(pvc_list) == 0: cbp_name = sc_obj.get()["parameters"]["pool"] ceph_pool_byte_used = get_byte_used_by_pool(cbp_name) log.info( f"pool {cbp_name} has {ceph_pool_byte_used} bytes used") if ceph_pool_byte_used > MAX_BYTES_IN_POOL_AFTER_DATA_DELETE: raise PoolDataNotErased( f"Pool {cbp_name} has {ceph_pool_byte_used} bytes which were not deleted" ) else: raise PvcNotDeleted(f"PVC {pvc_list} were not deleted")
def test_all_worker_nodes_short_network_failure( self, nodes, setup, node_restart_teardown ): """ OCS-1432/OCS-1433: - Start DeploymentConfig based app pods - Make all the worker nodes unresponsive by doing abrupt network failure - Reboot the unresponsive node after short duration of ~300 seconds - When unresponsive node recovers, app pods and ceph cluster should recover - Again run IOs from app pods """ pod_objs = setup worker_nodes = node.get_worker_nodes() # Run IO on pods logger.info(f"Starting IO on {len(pod_objs)} app pods") with ThreadPoolExecutor() as executor: for pod_obj in pod_objs: logger.info(f"Starting IO on pod {pod_obj.name}") storage_type = ( "block" if pod_obj.pvc.get_pvc_vol_mode == "Block" else "fs" ) executor.submit( pod_obj.run_io, storage_type=storage_type, size="2G", runtime=30, fio_filename=f"{pod_obj.name}_io_f1", ) logger.info(f"IO started on all {len(pod_objs)} app pods") # Wait for IO results for pod_obj in pod_objs: pod.get_fio_rw_iops(pod_obj) # Induce network failure on all worker nodes with ThreadPoolExecutor() as executor: for node_name in worker_nodes: executor.submit(node.node_network_failure, node_name, False) node.wait_for_nodes_status( node_names=worker_nodes, status=constants.NODE_NOT_READY ) logger.info(f"Waiting for {self.short_nw_fail_time} seconds") sleep(self.short_nw_fail_time) # Reboot the worker nodes logger.info(f"Stop and start the worker nodes: {worker_nodes}") nodes.restart_nodes_by_stop_and_start(node.get_node_objs(worker_nodes)) try: node.wait_for_nodes_status( node_names=worker_nodes, status=constants.NODE_READY ) logger.info("Wait for OCS pods to be in running state") if not pod.wait_for_pods_to_be_running(timeout=720): raise ResourceWrongStatusException("Pods are not in running state") except ResourceWrongStatusException: # Restart nodes nodes.restart_nodes(node.get_node_objs(worker_nodes)) ceph_health_check(tries=80) # Get current info of app pods new_pod_objs = list() for pod_obj in pod_objs: pod_label = pod_obj.labels.get("deploymentconfig") pods_data = pod.get_pods_having_label( f"deploymentconfig={pod_label}", pod_obj.namespace ) current_pods = [ pod_data.get("metadata").get("name") for pod_data in pods_data if "-deploy" not in pod_data.get("metadata").get("name") ] logger.info(f"Pods with label {pod_label}: {current_pods}") # Remove the older pod from the list if pod is rescheduled if len(current_pods) > 1: current_pods.remove(pod_obj.name) new_pod_obj = pod.get_pod_obj(current_pods.pop(), pod_obj.namespace) new_pod_obj.pvc = pod_obj.pvc new_pod_objs.append(new_pod_obj) logger.info("Wait for app pods are in running state") for pod_obj in new_pod_objs: pod_obj.ocp.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=pod_obj.name, timeout=720, sleep=20, ) logger.info("All the app pods reached running state") # Run more IOs on app pods with ThreadPoolExecutor() as executor: for pod_obj in new_pod_objs: logger.info(f"Starting IO on pod {pod_obj.name}") pod_obj.wl_setup_done = False storage_type = ( "block" if pod_obj.pvc.get_pvc_vol_mode == "Block" else "fs" ) executor.submit( pod_obj.run_io, storage_type=storage_type, size="1G", runtime=30, fio_filename=f"{pod_obj.name}_io_f2", ) for pod_obj in new_pod_objs: pod.get_fio_rw_iops(pod_obj)
def operations_base(self, resource_to_delete): """ Delete resource 'resource_to_delete' while PVCs creation, Pods creation and IO operation are progressing. Verifies PVCs can be re-used by creating new pods. Steps: 1. Create pods for running IO and verify they are Running. 2. Start creating more pods. 3. Start creating new PVCs. 4. Start IO on pods created in Step 1. 5. Delete the resource 'resource_to_delete'. 6. Verify that PVCs created in Step 3 are in Bound state. 7. Verify that pods created in Step 2 are Running. 8. Verify IO results. 9. Delete pods created in Steps 1 and 2. 10. Verify the total number of 'resource_to_delete' pods. 11. Verify volumes are unmapped from nodes after deleting pods. 12. Use all PVCs to create new pods. One PVC for one pod. 13. Start IO on all pods created in Step 10. 14. Verify IO results. """ # Separate the available PVCs pvc_objs_for_io_pods = self.pvc_objs[0:self.pvc_num_for_io_pods] pvc_objs_new_pods = self.pvc_objs[self.pvc_num_for_io_pods:] pod_functions = { 'mds': get_mds_pods, 'mon': get_mon_pods, 'mgr': get_mgr_pods, 'osd': get_osd_pods } executor = ThreadPoolExecutor(max_workers=2) disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=resource_to_delete) # Get number of pods initial_pods_num = len(pod_functions[resource_to_delete]()) # Create pods for running IO io_pods = helpers.create_pods(pvc_objs_list=pvc_objs_for_io_pods, interface_type=self.interface, desired_status=constants.STATUS_RUNNING, wait=True, namespace=self.namespace) # Updating self.pod_objs for the purpose of teardown self.pod_objs.extend(io_pods) # Do setup for running IO on pods log.info("Setting up pods for running IO") for pod_obj in io_pods: pod_obj.workload_setup(storage_type='fs') log.info("Setup for running IO is completed on pods") # Start creating new pods log.info("Start creating new pods.") bulk_pod_create = executor.submit(helpers.create_pods, pvc_objs_list=pvc_objs_new_pods, interface_type=self.interface, wait=False, namespace=self.namespace) # Start creation of new PVCs log.info("Start creating new PVCs.") bulk_pvc_create = executor.submit(helpers.create_multiple_pvcs, sc_name=self.sc_obj.name, namespace=self.namespace, number_of_pvc=self.num_of_new_pvcs, size=self.pvc_size, wait=False) # Start IO on each pod log.info("Start IO on pods") for pod_obj in io_pods: pod_obj.run_io(storage_type='fs', size=f'{self.pvc_size_int - 1}G') log.info("IO started on all pods.") # Delete the resource disruption.delete_resource() # Getting result of PVC creation as list of PVC objects pvc_objs_new = bulk_pvc_create.result() # Updating self.pvc_objs_new for the purpose of teardown self.pvc_objs_new.extend(pvc_objs_new) # Verify PVCs are Bound for pvc_obj in pvc_objs_new: assert pvc_obj.ocp.wait_for_resource( condition=constants.STATUS_BOUND, resource_name=pvc_obj.name, timeout=240, sleep=10 ), (f"Wait timeout: PVC {pvc_obj.name} is not in 'Bound' status") log.info("Verified: New PVCs are Bound.") # Getting result of pods creation as list of Pod objects pod_objs_new = bulk_pod_create.result() # Updating self.pod_objs for the purpose of teardown self.pod_objs.extend(pod_objs_new) # Verify new pods are Running for pod_obj in pod_objs_new: assert pod_obj.ocp.wait_for_resource( condition=constants.STATUS_RUNNING, resource_name=pod_obj.name, timeout=240, sleep=10), ( f"Wait timeout: Pod {pod_obj.name} is not in 'Running' " f"state even after 120 seconds.") log.info("Verified: All pods are Running.") # Verify IO log.info("Fetching IO results.") for pod_obj in io_pods: get_fio_rw_iops(pod_obj) log.info("Verified IO result on pods.") all_pod_objs = io_pods + pod_objs_new # Fetch volume details from pods for the purpose of verification node_pv_dict = {} for pod in all_pod_objs: pod_info = pod.get() node = pod_info['spec']['nodeName'] pvc = pod_info['spec']['volumes'][0]['persistentVolumeClaim'][ 'claimName'] for pvc_obj in self.pvc_objs: if pvc_obj.name == pvc: pvc_obj.reload() pv = pvc_obj.backed_pv break if node in node_pv_dict: node_pv_dict[node].append(pv) else: node_pv_dict[node] = [pv] # Delete pods for pod_obj in all_pod_objs: pod_obj.delete(wait=False) # Verify pods are deleted for pod_obj in all_pod_objs: pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name) # Updating self.pod_objs for the purpose of teardown self.pod_objs.clear() # Verify number of 'resource_to_delete' type pods final_pods_num = len(pod_functions[resource_to_delete]()) assert final_pods_num == initial_pods_num, ( f"Total number of {resource_to_delete} pods is not matching with " f"initial value. Total number of pods before deleting a pod: " f"{initial_pods_num}. Total number of pods present now: " f"{final_pods_num}") # Verify volumes are unmapped from nodes after deleting the pods for node, pvs in node_pv_dict.items(): cmd = f'oc debug nodes/{node} -- df' df_on_node = run_cmd(cmd) for pv in pvs: assert pv not in df_on_node, ( f"{pv} is still present on node {node} after " f"deleting the pods.") # Verify that PVCs are reusable by creating new pods all_pvc_objs = self.pvc_objs + pvc_objs_new pod_objs_re = helpers.create_pods( pvc_objs_list=all_pvc_objs, interface_type=self.interface, desired_status=constants.STATUS_RUNNING, wait=True, namespace=self.namespace) log.info("Successfully created new pods using all PVCs.") # Updating self.pod_objs for the purpose of teardown self.pod_objs.extend(pod_objs_re) # Run IO on each of the newly created pods for pod_obj in pod_objs_re: pod_obj.run_io(storage_type='fs', size='100M', runtime=10, fio_filename='fio-file-retest') log.info("Fetching IO results from newly created pods") for pod_obj in pod_objs_re: get_fio_rw_iops(pod_obj) log.info("Verified IO result on newly created pods.")
def test_create_sc_and_make_it_as_a_default( self, interface_type, storageclass_factory, pvc_factory, pod_factory ): """ Test function which verifies the above class """ # Get default StorageClass initial_default_sc = helpers.get_default_storage_class() # Create a Storage Class sc_obj = storageclass_factory(interface=interface_type) log.info( f"{interface_type}StorageClass: {sc_obj.name} " f"created successfully" ) # Change the above created StorageClass to default log.info(f"Changing the default StorageClass to {sc_obj.name}") helpers.change_default_storageclass(scname=sc_obj.name) # Confirm that the default StorageClass is changed tmp_default_sc = helpers.get_default_storage_class() assert len(tmp_default_sc) == 1, "More than 1 default storage class exist" log.info(f"Current Default StorageClass is:{tmp_default_sc[0]}") assert tmp_default_sc[0] == sc_obj.name, "Failed to change default StorageClass" log.info(f"Successfully changed the default StorageClass to " f"{sc_obj.name}") # Create a PVC using the default StorageClass log.info(f"Creating a PVC using {sc_obj.name}") pvc_obj = pvc_factory(interface=interface_type) log.info(f"PVC: {pvc_obj.name} created successfully using " f"{sc_obj.name}") # Create app pod and mount each PVC log.info(f"Creating an app pod and mount {pvc_obj.name}") pod_obj = pod_factory(interface=interface_type) log.info(f"{pod_obj.name} created successfully and mounted {pvc_obj.name}") # Run IO on each app pod for sometime log.info(f"Running FIO on {pod_obj.name}") pod_obj.run_io("fs", size="2G") get_fio_rw_iops(pod_obj) # Switch back to initial default storageclass # Currently we are not setting default SC after deployment # hence handling the initial_default_sc None case # This check can be removed once the default sc is set if len(initial_default_sc) != 0: helpers.change_default_storageclass(initial_default_sc[0]) # Confirm that the default StorageClass is changed end_default_sc = helpers.get_default_storage_class() log.info(f"Current Default StorageClass is:{tmp_default_sc[0]}") assert ( end_default_sc[0] == initial_default_sc[0] ), "Failed to change back to default StorageClass" log.info( f"Successfully changed back to default StorageClass " f"{end_default_sc[0]}" ) ocp_obj = ocp.OCP() patch = ( ' \'{"metadata": {"annotations":' '{"storageclass.kubernetes.io/is-default-class"' ':"false"}}}\' ' ) patch_cmd = f"patch storageclass {tmp_default_sc[0]} -p" + patch ocp_obj.exec_oc_cmd(command=patch_cmd) log.info( "Initially there is no default StorageClass, hence " "setting the current default StorageClass to False" )
def test_rwo_dynamic_pvc(self, setup_base): logger.info(f"Creating two pods using same PVC {self.pvc_obj.name}") logger.info(f"Creating first pod on node: {self.worker_nodes_list[0]}") pod_obj1 = helpers.create_pod(interface_type=self.interface_type, pvc_name=self.pvc_obj.name, desired_status=constants.STATUS_RUNNING, wait=True, namespace=self.namespace, node_name=self.worker_nodes_list[0], pod_dict_path=constants.NGINX_POD_YAML) node_pod1 = pod_obj1.get().get('spec').get('nodeName') logger.info( f"Creating second pod on node: {self.worker_nodes_list[1]}") pod_obj2 = helpers.create_pod(interface_type=self.interface_type, pvc_name=self.pvc_obj.name, wait=False, namespace=self.namespace, node_name=self.worker_nodes_list[1], pod_dict_path=constants.NGINX_POD_YAML) node_pod2 = pod_obj2.get().get('spec').get('nodeName') assert node_pod1 != node_pod2, 'Both pods are on the same node' logger.info(f"Running IO on pod {pod_obj1.name}") file_name = pod_obj1.name pod_obj1.run_io(storage_type=self.storage_type, size=self.io_size, runtime=30, fio_filename=file_name) pod.get_fio_rw_iops(pod_obj1) md5sum_pod1_data = pod.cal_md5sum(pod_obj=pod_obj1, file_name=file_name) # Verify that second pod is still in Pending state and not able to # attain Running state due to expected failure assert helpers.wait_for_resource_state(resource=pod_obj2, state=constants.STATUS_PENDING) self.verify_expected_failure_event( ocs_obj=pod_obj2, failure_str=self.expected_pod_failure) pod_obj1.delete() pod_obj1.ocp.wait_for_delete(resource_name=pod_obj1.name) # Wait for second pod to be in Running state assert helpers.wait_for_resource_state(resource=pod_obj2, state=constants.STATUS_RUNNING, timeout=240) assert pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name, original_md5sum=md5sum_pod1_data) pod_obj2.run_io(storage_type=self.storage_type, size=self.io_size, runtime=30, fio_filename=pod_obj2.name) pod.get_fio_rw_iops(pod_obj2) # Again verify data integrity assert pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name, original_md5sum=md5sum_pod1_data) pod_obj2.delete() pod_obj1.ocp.wait_for_delete(resource_name=pod_obj2.name)
def test_pvc_rwx_writeable_after_pod_deletions( self, pvc_factory, teardown_factory ): """ Test assign nodeName to a pod using RWX pvc 1. Create a new project. 2. Create a RWX CEPHFS based PVC 3. Attach the same PVC to multiple PODs and start IO on all the PODs 4. Delete all but one pod. 5. Verify mount point is still write-able. - Start IO again on the Running pod. 6. Also, access the data written by deleted pods from the Running pod """ worker_nodes_list = helpers.get_worker_nodes() # Create a RWX PVC pvc_obj = pvc_factory( interface=constants.CEPHFILESYSTEM, access_mode=constants.ACCESS_MODE_RWX, size=10, status=constants.STATUS_BOUND ) logger.info( f"Creating pods on all worker nodes backed" f"with same pvc {pvc_obj.name}" ) pod_list = [] for each_node in worker_nodes_list: pod_obj = helpers.create_pod( interface_type=constants.CEPHFILESYSTEM, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=each_node, pod_dict_path=constants.NGINX_POD_YAML ) pod_list.append(pod_obj) teardown_factory(pod_obj) # Confirm pods are created and are running on designated nodes node_count = 0 for pod_obj in pod_list: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120 ) pod_obj.reload() assert pod.verify_node_name(pod_obj, worker_nodes_list[node_count]), ( f'Pod {pod_obj.name} is running on a different node ' f'than the selected node' ) node_count = node_count + 1 # Run IOs on all pods. FIO Filename is kept same as pod name with ThreadPoolExecutor() as p: for pod_obj in pod_list: logger.info(f"Running IO on pod {pod_obj.name}") p.submit( pod_obj.run_io, storage_type='fs', size='512M', runtime=30, fio_filename=pod_obj.name ) # Check IO from all pods for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj) # Calculate md5sum of each file md5sum_pod_data = [] for pod_obj in pod_list: md5sum_pod_data.append(pod.cal_md5sum( pod_obj=pod_obj, file_name=pod_obj.name )) # Delete all but the last app pod. for index in range(node_count - 1): pod_list[index].delete() pod_list[index].ocp.wait_for_delete( resource_name=pod_list[index].name ) # Verify presence of files written by each pod logger.info( f"Verify existence of each file from app pod " f"{pod_list[-1].name} " ) for pod_obj in pod_list: file_path = pod.get_file_path(pod_list[-1], pod_obj.name) assert pod.check_file_existence(pod_list[-1], file_path), ( f"File {pod_obj.name} doesnt exist" ) logger.info( f"File {pod_obj.name} exists in {pod_list[-1].name}" ) # From surviving pod, verify data integrity of files # written by deleted pods logger.info(f"verify all data from {pod_list[-1].name}") for index, pod_obj in enumerate(pod_list): assert pod.verify_data_integrity( pod_obj=pod_list[-1], file_name=pod_obj.name, original_md5sum=md5sum_pod_data[index] ) # From surviving pod, confirm mount point is still write-able logger.info(f"Re-running IO on pod {pod_list[-1].name}") fio_new_file = f"{pod_list[-1].name}-new-file" pod_list[-1].run_io( storage_type='fs', size='512M', runtime=30, fio_filename=fio_new_file ) pod.get_fio_rw_iops(pod_list[-1]) file_path = pod.get_file_path(pod_list[-1], fio_new_file) assert pod.check_file_existence(pod_list[-1], file_path), ( f"File {fio_new_file} doesnt exist" ) logger.info( f"File {fio_new_file} exists in {pod_list[-1].name} " )
def test_rwx_dynamic_pvc(self, setup_base): """ RWX Dynamic PVC creation tests with Reclaim policy set to Delete/Retain """ logger.info(f"CephFS RWX test") logger.info( f"Creating second pod on node: {self.worker_nodes_list[1]} " f"with pvc {self.pvc_obj.name}") pod_obj2 = helpers.create_pod(interface_type=self.interface_type, pvc_name=self.pvc_obj.name, namespace=self.namespace, node_name=self.worker_nodes_list[1], pod_dict_path=constants.NGINX_POD_YAML) helpers.wait_for_resource_state(pod_obj2, constants.STATUS_RUNNING) pod_obj2.reload() node_pod1 = self.pod_obj1.get().get('spec').get('nodeName') node_pod2 = pod_obj2.get().get('spec').get('nodeName') assert node_pod1 != node_pod2, 'Both pods are on the same node' # Run IO on both the pods logger.info(f"Running IO on pod {self.pod_obj1.name}") file_name1 = self.pod_obj1.name logger.info(file_name1) self.pod_obj1.run_io(storage_type=self.storage_type, size=self.io_size, runtime=30, fio_filename=file_name1) logger.info(f"Running IO on pod {pod_obj2.name}") file_name2 = pod_obj2.name pod_obj2.run_io(storage_type=self.storage_type, size=self.io_size, runtime=30, fio_filename=file_name2) # Check IO and calculate md5sum of files pod.get_fio_rw_iops(self.pod_obj1) md5sum_pod1_data = pod.cal_md5sum(pod_obj=self.pod_obj1, file_name=file_name1) pod.get_fio_rw_iops(pod_obj2) md5sum_pod2_data = pod.cal_md5sum(pod_obj=pod_obj2, file_name=file_name2) logger.info(f"verify data from alternate pods") assert pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name1, original_md5sum=md5sum_pod1_data) assert pod.verify_data_integrity(pod_obj=self.pod_obj1, file_name=file_name2, original_md5sum=md5sum_pod2_data) # Verify that data is mutable from any pod logger.info(f"Perform modification of files from alternate pod") # Access and rename file written by pod-2 from pod-1 file_path2 = pod.get_file_path(pod_obj2, file_name2) logger.info(file_path2) self.pod_obj1.exec_cmd_on_pod( command=f"bash -c \"mv {file_path2} {file_path2}-renamed\"", out_yaml_format=False) # Access and rename file written by pod-1 from pod-2 file_path1 = pod.get_file_path(self.pod_obj1, file_name1) logger.info(file_path1) pod_obj2.exec_cmd_on_pod( command=f"bash -c \"mv {file_path1} {file_path1}-renamed\"", out_yaml_format=False) logger.info(f"Verify presence of renamed files from both pods") file_names = [f"{file_path1}-renamed", f"{file_path2}-renamed"] for file in file_names: assert pod.check_file_existence( self.pod_obj1, file), (f"File {file} doesn't exist") logger.info(f"File {file} exists in {self.pod_obj1.name} ") assert pod.check_file_existence( pod_obj2, file), (f"File {file} doesn't exist") logger.info(f"File {file} exists in {pod_obj2.name}")
def test_create_multiple_sc_with_different_pool_name( self, teardown_factory ): """ This test function does below, *. Creates multiple Storage Classes with different pool name *. Creates PVCs using each Storage Class *. Mount each PVC to an app pod *. Run IO on each app pod """ # Create 3 storageclasses, each with different pool name cbp_list = [] sc_list = [] for i in range(3): log.info(f"Creating cephblockpool") cbp_obj = helpers.create_ceph_block_pool() log.info( f"{cbp_obj.name} created successfully" ) log.info( f"Creating a RBD storage class using {cbp_obj.name}" ) cbp_list.append(cbp_obj) sc_obj = helpers.create_storage_class( interface_type=constants.CEPHBLOCKPOOL, interface_name=cbp_obj.name, secret_name=self.rbd_secret_obj.name ) log.info( f"StorageClass: {sc_obj.name} " f"created successfully using {cbp_obj.name}" ) sc_list.append(sc_obj) teardown_factory(cbp_obj) teardown_factory(sc_obj) # Create PVCs using each SC pvc_list = [] for i in range(3): log.info(f"Creating a PVC using {sc_list[i].name}") pvc_obj = helpers.create_pvc(sc_list[i].name) log.info( f"PVC: {pvc_obj.name} created successfully using " f"{sc_list[i].name}" ) pvc_list.append(pvc_obj) teardown_factory(pvc_obj) helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() # Create app pod and mount each PVC pod_list = [] for i in range(3): log.info(f"Creating an app pod and mount {pvc_list[i].name}") pod_obj = helpers.create_pod( interface_type=constants.CEPHBLOCKPOOL, pvc_name=pvc_list[i].name, ) log.info( f"{pod_obj.name} created successfully and " f"mounted {pvc_list[i].name}" ) pod_list.append(pod_obj) teardown_factory(pod_obj) helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING) pod_obj.reload() # Run IO on each app pod for sometime for pod in pod_list: log.info(f"Running FIO on {pod.name}") pod.run_io('fs', size='2G') for pod in pod_list: get_fio_rw_iops(pod)
def test_delete_provisioner_pod_while_thick_provisioning( self, pvc_factory, pod_factory, ): """ Test to delete RBD provisioner leader pod while creating a PVC using thick provision enabled storage class """ pvc_size = 20 pool_name = default_ceph_block_pool() executor = ThreadPoolExecutor(max_workers=1) DISRUPTION_OPS.set_resource(resource="rbdplugin_provisioner", leader_type="provisioner") # Start creation of PVC pvc_create = executor.submit( pvc_factory, interface=constants.CEPHBLOCKPOOL, project=self.proj_obj, storageclass=default_thick_storage_class(), size=pvc_size, access_mode=constants.ACCESS_MODE_RWO, status="", ) # Ensure that the PVC is being created before deleting the rbd provisioner pod ret = helpers.wait_for_resource_count_change(get_all_pvcs, 0, self.proj_obj.namespace, "increase") assert ret, "Wait timeout: PVC is not being created." logger.info("PVC creation has started.") DISRUPTION_OPS.delete_resource() logger.info("Deleted RBD provisioner leader pod.") pvc_obj = pvc_create.result() # Confirm that the PVC is Bound helpers.wait_for_resource_state(resource=pvc_obj, state=constants.STATUS_BOUND, timeout=600) pvc_obj.reload() logger.info(f"Verified: PVC {pvc_obj.name} reached Bound state.") image_name = pvc_obj.get_rbd_image_name pv_obj = pvc_obj.backed_pv_obj # Verify thick provision by checking the image used size assert check_rbd_image_used_size( pvc_objs=[pvc_obj], usage_to_compare=f"{pvc_size}GiB", rbd_pool=pool_name, expect_match=True, ), f"PVC {pvc_obj.name} is not thick provisioned.\n PV describe :\n {pv_obj.describe()}" logger.info("Verified: The PVC is thick provisioned") # Create pod and run IO pod_obj = pod_factory( interface=constants.CEPHBLOCKPOOL, pvc=pvc_obj, status=constants.STATUS_RUNNING, ) pod_obj.run_io( storage_type="fs", size=f"{pvc_size-1}G", fio_filename=f"{pod_obj.name}_io", end_fsync=1, ) # Get IO result get_fio_rw_iops(pod_obj) logger.info(f"Deleting pod {pod_obj.name}") pod_obj.delete() pod_obj.ocp.wait_for_delete(pod_obj.name, 180), f"Pod {pod_obj.name} is not deleted" # Fetch image id for verification image_uid = pvc_obj.image_uuid logger.info(f"Deleting PVC {pvc_obj.name}") pvc_obj.delete() pvc_obj.ocp.wait_for_delete( pvc_obj.name), f"PVC {pvc_obj.name} is not deleted" logger.info(f"Verified: PVC {pvc_obj.name} is deleted.") pv_obj.ocp.wait_for_delete( pv_obj.name), f"PV {pv_obj.name} is not deleted" logger.info(f"Verified: PV {pv_obj.name} is deleted.") # Verify the rbd image is deleted logger.info(f"Wait for the RBD image {image_name} to get deleted") assert verify_volume_deleted_in_backend( interface=constants.CEPHBLOCKPOOL, image_uuid=image_uid, pool_name=pool_name, timeout=300, ), f"Wait timeout - RBD image {image_name} is not deleted" logger.info(f"Verified: RBD image {image_name} is deleted")
def test_rwo_dynamic_pvc(self, interface_type, reclaim_policy, setup, pvc_factory, pod_factory): """ RWO Dynamic PVC creation tests with Reclaim policy set to Retain/Delete """ access_mode = constants.ACCESS_MODE_RWO expected_failure_str = "Multi-Attach error for volume" storage_type = "fs" sc_obj, worker_nodes_list = setup logger.info(f"Creating PVC with {access_mode} access mode") pvc_obj = pvc_factory( interface=interface_type, storageclass=sc_obj, size=self.pvc_size, access_mode=access_mode, status=constants.STATUS_BOUND, ) logger.info(f"Creating first pod on node: {worker_nodes_list[0]} " f"with pvc {pvc_obj.name}") pod_obj1 = pod_factory( interface=interface_type, pvc=pvc_obj, status=constants.STATUS_RUNNING, node_name=worker_nodes_list[0], pod_dict_path=constants.NGINX_POD_YAML, ) logger.info(f"Creating second pod on node: {worker_nodes_list[1]} " f"with pvc {pvc_obj.name}") pod_obj2 = pod_factory( interface=interface_type, pvc=pvc_obj, status=constants.STATUS_CONTAINER_CREATING, node_name=worker_nodes_list[1], pod_dict_path=constants.NGINX_POD_YAML, ) node_pod1 = pod_obj1.get().get("spec").get("nodeName") node_pod2 = pod_obj2.get().get("spec").get("nodeName") assert node_pod1 != node_pod2, "Both pods are on the same node" logger.info(f"Running IO on first pod {pod_obj1.name}") file_name = pod_obj1.name pod_obj1.run_io(storage_type=storage_type, size="1G", fio_filename=file_name) pod.get_fio_rw_iops(pod_obj1) md5sum_pod1_data = pod.cal_md5sum(pod_obj=pod_obj1, file_name=file_name) # Verify that second pod is still in ContainerCreating state and not # able to attain Running state due to expected failure logger.info( f"Verify that second pod {pod_obj2.name} is still in ContainerCreating state" ) helpers.wait_for_resource_state( resource=pod_obj2, state=constants.STATUS_CONTAINER_CREATING) self.verify_expected_failure_event(ocs_obj=pod_obj2, failure_str=expected_failure_str) logger.info( f"Deleting first pod so that second pod can attach PVC {pvc_obj.name}" ) pod_obj1.delete() pod_obj1.ocp.wait_for_delete(resource_name=pod_obj1.name) # Wait for second pod to be in Running state helpers.wait_for_resource_state(resource=pod_obj2, state=constants.STATUS_RUNNING, timeout=240) logger.info(f"Verify data from second pod {pod_obj2.name}") pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name, original_md5sum=md5sum_pod1_data) pod_obj2.run_io(storage_type=storage_type, size="1G", fio_filename=pod_obj2.name) pod.get_fio_rw_iops(pod_obj2) # Again verify data integrity logger.info(f"Again verify data from second pod {pod_obj2.name}") pod.verify_data_integrity(pod_obj=pod_obj2, file_name=file_name, original_md5sum=md5sum_pod1_data)