def test_rwx_pvc_assign_pod_node(self, pvc_factory, teardown_factory): """ Test assign nodeName to a pod using RWX pvc """ interface = constants.CEPHFILESYSTEM worker_nodes_list = helpers.get_worker_nodes() # Create a RWX PVC pvc_obj = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWX, status=constants.STATUS_BOUND ) # Create two pods on selected nodes pod_list = [] selected_nodes = random.sample(worker_nodes_list, k=2) logger.info( f"Creating {len(selected_nodes)} pods with pvc {pvc_obj.name}" ) for node in selected_nodes: logger.info(f"Creating pod on node: {node}") pod_obj = helpers.create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=node, pod_dict_path=constants.NGINX_POD_YAML ) pod_list.append(pod_obj) teardown_factory(pod_obj) # Confirm that both pods are running on the selected_nodes logger.info('Checking whether pods are running on the selected nodes') for index in range(0, len(selected_nodes)): pod_obj = pod_list[index] selected_node = selected_nodes[index] helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120 ) pod_obj.reload() assert pod.verify_node_name(pod_obj, selected_node), ( f"Pod {pod_obj.name} is running on a different node " f"than the selected node" ) # Run IOs on all pods. FIO Filename is kept same as pod name with ThreadPoolExecutor() as p: for pod_obj in pod_list: logger.info(f"Running IO on pod {pod_obj.name}") p.submit( pod_obj.run_io, storage_type='fs', size='512M', runtime=30, fio_filename=pod_obj.name ) # Check IO from all pods for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj)
def test_rwo_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory): """ Test assign nodeName to a pod using RWO pvc """ worker_nodes_list = get_worker_nodes() # Create a RWO PVC pvc_obj = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWO, status=constants.STATUS_BOUND, ) # Create a pod on a particular node selected_node = random.choice(worker_nodes_list) logger.info( f"Creating a pod on node: {selected_node} with pvc {pvc_obj.name}") pod_obj = helpers.create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=selected_node, pod_dict_path=constants.NGINX_POD_YAML, ) teardown_factory(pod_obj) # Confirm that the pod is running on the selected_node helpers.wait_for_resource_state(resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120) pod_obj.reload() assert pod.verify_node_name( pod_obj, selected_node ), "Pod is running on a different node than the selected node" # Run IO logger.info(f"Running IO on pod {pod_obj.name}") pod_obj.run_io(storage_type="fs", size="512M", runtime=30, invalidate=0) pod.get_fio_rw_iops(pod_obj)
def setup( self, request, scenario, num_of_nodes, num_of_fail_nodes, disrupt_provisioner, project_factory, multi_pvc_factory, dc_pod_factory, ): """ Identify the nodes and start DeploymentConfig based app pods using PVC with ReadWriteOnce (RWO) access mode on selected nodes Args: scenario (str): Scenario of app pods running on OCS or dedicated nodes (eg., 'colocated', 'dedicated') num_of_nodes (int): number of nodes required for running test num_of_fail_nodes (int): number of nodes to make unresponsive during test disrupt_provisioner (bool): True to disrupt the leader provisioner pods if not running on selected nodes, else False project_factory: A fixture to create new project multi_pvc_factory: A fixture create a set of new PVCs dc_pod_factory: A fixture to create deploymentconfig pods Returns: tuple: containing the params used in test cases """ ocs_nodes, non_ocs_nodes = self.identify_and_add_nodes( scenario, num_of_nodes) test_nodes = ocs_nodes if (scenario == "colocated") else non_ocs_nodes logger.info(f"Using nodes {test_nodes} for running test") def finalizer(): helpers.remove_label_from_worker_node(node_list=test_nodes, label_key="nodetype") # Check ceph health ceph_health_check(tries=40) request.addfinalizer(finalizer) project = project_factory() if helpers.storagecluster_independent_check(): ceph_cluster = CephClusterExternal() else: ceph_cluster = CephCluster() # Wait for mon pods to reach expected count # Bug 1778273 - [RFE]: Configure 5 MONs for OCS cluster with 5 or more nodes # This wait is required for some of the previous OCS versions (< 4.5) current_mon_count = int( ceph_cluster.CEPHCLUSTER.get_resource(resource_name="", column="MONCOUNT")) assert ceph_cluster.POD.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=current_mon_count, timeout=900, ) ceph_cluster.mons = [] ceph_cluster.scan_cluster() # Select nodes for running app pods and inducing network failure later app_pod_nodes = self.select_nodes_for_app_pods(scenario, ceph_cluster, ocs_nodes, non_ocs_nodes, num_of_fail_nodes) # Create multiple RBD and CephFS backed PVCs with RWO accessmode num_of_pvcs = self.num_of_app_pods_per_node * num_of_fail_nodes rbd_pvcs = multi_pvc_factory( interface=constants.CEPHBLOCKPOOL, project=project, size=self.pvc_size, access_modes=[constants.ACCESS_MODE_RWO], num_of_pvc=num_of_pvcs, ) cephfs_pvcs = multi_pvc_factory( interface=constants.CEPHFILESYSTEM, project=project, size=self.pvc_size, access_modes=[constants.ACCESS_MODE_RWO], num_of_pvc=num_of_pvcs, ) # Create deploymentconfig based pods dc_pods = [] # Start app-pods on selected node(s) for node_name in app_pod_nodes: logger.info(f"Starting app pods on the node {node_name}") helpers.label_worker_node(node_list=[node_name], label_key="nodetype", label_value="app-pod") for num in range(self.num_of_app_pods_per_node): dc_pods.append( dc_pod_factory( interface=constants.CEPHBLOCKPOOL, pvc=rbd_pvcs.pop(0), node_selector={"nodetype": "app-pod"}, )) assert pod.verify_node_name( dc_pods[-1], node_name ), f"Pod {dc_pods[-1].name} is not running on labeled node {node_name}" dc_pods.append( dc_pod_factory( interface=constants.CEPHFILESYSTEM, pvc=cephfs_pvcs.pop(0), node_selector={"nodetype": "app-pod"}, )) assert pod.verify_node_name( dc_pods[-1], node_name ), f"Pod {dc_pods[-1].name} is not running on labeled node {node_name}" helpers.remove_label_from_worker_node(node_list=[node_name], label_key="nodetype") # Label other test nodes to be able to run app pods later helpers.label_worker_node(node_list=test_nodes, label_key="nodetype", label_value="app-pod") # Get ceph mon,osd pods running on selected node if colocated scenario # and extra OCS nodes are present # Recovery steps for MON and OSDS not required from OCS 4.4 onwards # Refer to BZ 1830015 and BZ 1835908 ceph_pods = [] if float(config.ENV_DATA["ocs_version"]) < 4.4 and ( scenario == "colocated" and len(test_nodes) > 3): pods_to_check = ceph_cluster.osds # Skip mon pods if mon_count is 5 as there may not be enough nodes # for all mons to run after multiple node failures if ceph_cluster.mon_count == 3: pods_to_check.extend(ceph_cluster.mons) for pod_obj in pods_to_check: if pod.get_pod_node(pod_obj).name in app_pod_nodes[0]: ceph_pods.append(pod_obj) logger.info( f"Colocated Mon, OSD pods: {[pod_obj.name for pod_obj in ceph_pods]}" ) disruptor = [] if disrupt_provisioner: disruptor = self.disrupt_plugin_provisioner_pods(app_pod_nodes) return ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor
def setup( self, request, scenario, num_of_nodes, num_of_fail_nodes, disrupt_provisioner, project_factory, multi_pvc_factory, dc_pod_factory ): """ Identify the nodes and start DeploymentConfig based app pods using PVC with ReadWriteOnce (RWO) access mode on selected nodes Args: scenario (str): Scenario of app pods running on OCS or dedicated nodes (eg., 'colocated', 'dedicated') num_of_nodes (int): number of nodes required for running test num_of_fail_nodes (int): number of nodes to make unresponsive during test disrupt_provisioner (bool): True to disrupt the leader provisioner pods if not running on selected nodes, else False project_factory: A fixture to create new project multi_pvc_factory: A fixture create a set of new PVCs dc_pod_factory: A fixture to create deploymentconfig pods Returns: tuple: containing the params used in test cases """ ocs_nodes, non_ocs_nodes = self.identify_and_add_nodes( scenario, num_of_nodes ) test_nodes = ocs_nodes if (scenario == "colocated") else non_ocs_nodes logger.info(f"Using nodes {test_nodes} for running test") def finalizer(): helpers.remove_label_from_worker_node( node_list=test_nodes, label_key="nodetype" ) request.addfinalizer(finalizer) if len(ocs_nodes) > 4 and float(config.ENV_DATA['ocs_version']) >= 4.3: pod_obj = ocp.OCP( kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace'] ) assert pod_obj.wait_for_resource( condition=constants.STATUS_RUNNING, selector=constants.MON_APP_LABEL, resource_count=5, timeout=900 ) ceph_cluster = CephCluster() project = project_factory() # Select nodes for running app pods and inducing network failure later app_pod_nodes = self.select_nodes_for_app_pods( scenario, ceph_cluster, ocs_nodes, non_ocs_nodes, num_of_fail_nodes ) # Create multiple RBD and CephFS backed PVCs with RWO accessmode num_of_pvcs = self.num_of_app_pods_per_node * num_of_fail_nodes rbd_pvcs = multi_pvc_factory( interface=constants.CEPHBLOCKPOOL, project=project, size=self.pvc_size, access_modes=[constants.ACCESS_MODE_RWO], num_of_pvc=num_of_pvcs ) cephfs_pvcs = multi_pvc_factory( interface=constants.CEPHFILESYSTEM, project=project, size=self.pvc_size, access_modes=[constants.ACCESS_MODE_RWO], num_of_pvc=num_of_pvcs ) # Create deploymentconfig based pods dc_pods = [] # Start app-pods on selected node(s) for node_name in app_pod_nodes: logger.info(f"Starting app pods on the node {node_name}") helpers.label_worker_node( node_list=[node_name], label_key="nodetype", label_value="app-pod" ) for num in range(self.num_of_app_pods_per_node): dc_pods.append( dc_pod_factory( interface=constants.CEPHBLOCKPOOL, pvc=rbd_pvcs.pop(0), node_selector={'nodetype': 'app-pod'} ) ) assert pod.verify_node_name(dc_pods[-1], node_name), ( f"Pod {dc_pods[-1].name} is not running on labeled node {node_name}" ) dc_pods.append( dc_pod_factory( interface=constants.CEPHFILESYSTEM, pvc=cephfs_pvcs.pop(0), node_selector={'nodetype': 'app-pod'} ) ) assert pod.verify_node_name(dc_pods[-1], node_name), ( f"Pod {dc_pods[-1].name} is not running on labeled node {node_name}" ) helpers.remove_label_from_worker_node( node_list=[node_name], label_key="nodetype" ) # Label other test nodes to be able to run app pods later helpers.label_worker_node( node_list=test_nodes, label_key="nodetype", label_value="app-pod" ) # Get ceph mon,osd pods running on selected node if colocated scenario # and extra OCS nodes are present ceph_pods = [] if scenario == "colocated" and len(test_nodes) > len(ceph_cluster.osds): pods_to_check = ceph_cluster.osds # Skip mon pods if mon_count is 5 as there may not be enough nodes # for all mons to run after multiple node failures if ceph_cluster.mon_count == 3: pods_to_check.extend(ceph_cluster.mons) for pod_obj in pods_to_check: if pod.get_pod_node(pod_obj).name in app_pod_nodes[0]: ceph_pods.append(pod_obj) logger.info( f"Colocated Mon, OSD pods: {[pod_obj.name for pod_obj in ceph_pods]}" ) disruptor = [] if disrupt_provisioner: disruptor = self.disrupt_plugin_provisioner_pods(app_pod_nodes) return ceph_cluster, dc_pods, ceph_pods, app_pod_nodes, test_nodes, disruptor
def test_rwx_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory): """ Test assign nodeName to a pod using RWX pvc """ worker_nodes_list = get_worker_nodes() if interface == constants.CEPHBLOCKPOOL: volume_mode = "Block" storage_type = "block" block_pv = True pod_yaml = constants.CSI_RBD_RAW_BLOCK_POD_YAML else: volume_mode = "" storage_type = "fs" block_pv = False pod_yaml = "" # Create a RWX PVC pvc_obj = pvc_factory( interface=interface, access_mode=constants.ACCESS_MODE_RWX, status=constants.STATUS_BOUND, volume_mode=volume_mode, ) # Create two pods on selected nodes pod_list = [] selected_nodes = random.sample(worker_nodes_list, k=2) logger.info(f"Creating {len(selected_nodes)} pods with pvc {pvc_obj.name}") for node in selected_nodes: logger.info(f"Creating pod on node: {node}") pod_obj = helpers.create_pod( interface_type=interface, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=node, pod_dict_path=pod_yaml, raw_block_pv=block_pv, ) pod_list.append(pod_obj) teardown_factory(pod_obj) # Confirm that both pods are running on the selected_nodes logger.info("Checking whether pods are running on the selected nodes") for index in range(0, len(selected_nodes)): pod_obj = pod_list[index] selected_node = selected_nodes[index] helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120 ) pod_obj.reload() assert pod.verify_node_name(pod_obj, selected_node), ( f"Pod {pod_obj.name} is running on a different node " f"than the selected node" ) # Run IOs on all pods. FIO Filename is kept same as pod name with ThreadPoolExecutor() as p: for pod_obj in pod_list: logger.info(f"Running IO on pod {pod_obj.name}") p.submit( pod_obj.run_io, storage_type=storage_type, size="512M", runtime=30, fio_filename=pod_obj.name, ) # Check IO from all pods for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj)
def test_pvc_rwx_writeable_after_pod_deletions( self, pvc_factory, teardown_factory ): """ Test assign nodeName to a pod using RWX pvc 1. Create a new project. 2. Create a RWX CEPHFS based PVC 3. Attach the same PVC to multiple PODs and start IO on all the PODs 4. Delete all but one pod. 5. Verify mount point is still write-able. - Start IO again on the Running pod. 6. Also, access the data written by deleted pods from the Running pod """ worker_nodes_list = helpers.get_worker_nodes() # Create a RWX PVC pvc_obj = pvc_factory( interface=constants.CEPHFILESYSTEM, access_mode=constants.ACCESS_MODE_RWX, size=10, status=constants.STATUS_BOUND ) logger.info( f"Creating pods on all worker nodes backed" f"with same pvc {pvc_obj.name}" ) pod_list = [] for each_node in worker_nodes_list: pod_obj = helpers.create_pod( interface_type=constants.CEPHFILESYSTEM, pvc_name=pvc_obj.name, namespace=pvc_obj.namespace, node_name=each_node, pod_dict_path=constants.NGINX_POD_YAML ) pod_list.append(pod_obj) teardown_factory(pod_obj) # Confirm pods are created and are running on designated nodes node_count = 0 for pod_obj in pod_list: helpers.wait_for_resource_state( resource=pod_obj, state=constants.STATUS_RUNNING, timeout=120 ) pod_obj.reload() assert pod.verify_node_name(pod_obj, worker_nodes_list[node_count]), ( f'Pod {pod_obj.name} is running on a different node ' f'than the selected node' ) node_count = node_count + 1 # Run IOs on all pods. FIO Filename is kept same as pod name with ThreadPoolExecutor() as p: for pod_obj in pod_list: logger.info(f"Running IO on pod {pod_obj.name}") p.submit( pod_obj.run_io, storage_type='fs', size='512M', runtime=30, fio_filename=pod_obj.name ) # Check IO from all pods for pod_obj in pod_list: pod.get_fio_rw_iops(pod_obj) # Calculate md5sum of each file md5sum_pod_data = [] for pod_obj in pod_list: md5sum_pod_data.append(pod.cal_md5sum( pod_obj=pod_obj, file_name=pod_obj.name )) # Delete all but the last app pod. for index in range(node_count - 1): pod_list[index].delete() pod_list[index].ocp.wait_for_delete( resource_name=pod_list[index].name ) # Verify presence of files written by each pod logger.info( f"Verify existence of each file from app pod " f"{pod_list[-1].name} " ) for pod_obj in pod_list: file_path = pod.get_file_path(pod_list[-1], pod_obj.name) assert pod.check_file_existence(pod_list[-1], file_path), ( f"File {pod_obj.name} doesnt exist" ) logger.info( f"File {pod_obj.name} exists in {pod_list[-1].name}" ) # From surviving pod, verify data integrity of files # written by deleted pods logger.info(f"verify all data from {pod_list[-1].name}") for index, pod_obj in enumerate(pod_list): assert pod.verify_data_integrity( pod_obj=pod_list[-1], file_name=pod_obj.name, original_md5sum=md5sum_pod_data[index] ) # From surviving pod, confirm mount point is still write-able logger.info(f"Re-running IO on pod {pod_list[-1].name}") fio_new_file = f"{pod_list[-1].name}-new-file" pod_list[-1].run_io( storage_type='fs', size='512M', runtime=30, fio_filename=fio_new_file ) pod.get_fio_rw_iops(pod_list[-1]) file_path = pod.get_file_path(pod_list[-1], fio_new_file) assert pod.check_file_existence(pod_list[-1], file_path), ( f"File {fio_new_file} doesnt exist" ) logger.info( f"File {fio_new_file} exists in {pod_list[-1].name} " )