def test_glusterblock_logs_presence_verification(self): """Validate presence of glusterblock provisioner POD and it's status""" # Get glusterblock provisioner dc name cmd = ("oc get dc | awk '{ print $1 }' | " "grep -e glusterblock -e provisioner") dc_name = cmd_run(cmd, self.ocp_master_node[0], True) # Get glusterblock provisioner pod name and it's status gb_prov_name, gb_prov_status = oc_get_custom_resource( self.node, 'pod', custom=':.metadata.name,:.status.phase', selector='deploymentconfig=%s' % dc_name)[0] self.assertEqual(gb_prov_status, 'Running') # Create Secret, SC and PVC self.create_storage_class() self.create_and_wait_for_pvc() # Get list of Gluster nodes g_hosts = list(g.config.get("gluster_servers", {}).keys()) self.assertGreater( len(g_hosts), 0, "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts) # Perform checks on Gluster nodes/PODs logs = ("gluster-block-configshell", "gluster-blockd") gluster_pods = oc_get_pods( self.ocp_client[0], selector="glusterfs-node=pod") cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log" for g_host in g_hosts: for log in logs: out = cmd_run_on_gluster_pod_or_node( self.ocp_client[0], cmd % log, gluster_node=g_host) self.assertTrue(out, "Command '%s' output is empty." % cmd)
def verify_pods_are_running(self): pods = oc_get_pods(self.ocp_master_node[0], selector='scale=scale') faulty_pods = {} for pod in pods.keys(): if not (pods[pod]['ready'] == '1/1' and pods[pod]['status'] == 'Running'): faulty_pods[pod] = pods[pod] msg = "Out of {} pods {} pods are not running. Pods are {}".format( len(pods), len(faulty_pods), faulty_pods) self.assertFalse(faulty_pods, msg)
def test_prometheus_volume_metrics_on_pod_restart(self): """Validate volume metrics using prometheus before and after pod restart""" # Create PVC and wait for it to be in 'Bound' state pvc_name = self.create_and_wait_for_pvc() pod_name = openshift_ops.oc_create_tiny_pod_with_volume( self._master, pvc_name, "autotest-volume", image=self.io_container_image_cirros) self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name, raise_on_absence=False) # Wait for POD be up and running openshift_ops.wait_for_pod_be_ready( self._master, pod_name, timeout=60, wait_step=2) # Write data on the volume and wait for 2 mins and sleep is must for # prometheus to get the exact values of the metrics self._run_io_on_the_pod(pod_name, 30) time.sleep(120) # Fetching the metrics and storing in initial_metrics as dictionary initial_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) # Mark the current node unschedulable on which app pod is running openshift_ops.switch_oc_project( self._master, self.storage_project_name) pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name) openshift_ops.oc_adm_manage_node( self._master, '--schedulable=false', nodes=[pod_info[pod_name]["node"]]) self.addCleanup( openshift_ops.oc_adm_manage_node, self._master, '--schedulable=true', nodes=[pod_info[pod_name]["node"]]) # Delete the existing pod and create a new pod openshift_ops.oc_delete(self._master, 'pod', pod_name) pod_name = openshift_ops.oc_create_tiny_pod_with_volume( self._master, pvc_name, "autotest-volume") self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name) # Wait for POD be up and running and prometheus to refresh the data openshift_ops.wait_for_pod_be_ready( self._master, pod_name, timeout=60, wait_step=2) time.sleep(120) # Fetching the metrics and storing in final_metrics as dictionary and # validating with initial_metrics final_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) self.assertEqual(dict(initial_metrics), dict(final_metrics), "Metrics are different post pod restart")
def verify_if_more_than_n_percentage_pod_restarted( self, pods_old, selector='scale=scale', percentage=33): # Make sure pods did not got restarted pods_new = oc_get_pods(self.ocp_master_node[0], selector=selector) pods_restart = [] for pod in pods_new.keys(): if pods_new[pod]['restarts'] != pods_old[pod]['restarts']: pods_restart += [pod] if len(pods_restart) > int(len(pods_new.keys()) / (100 / percentage)): msg = "Out of {} pods {} pods restarted {}".format( len(pods_new), len(pods_restart), pods_restart) raise AssertionError(msg)
def _fetch_metric_from_promtheus_pod(self, metric): """Fetch metric from prometheus pod using api call""" prometheus_pods = list(openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector).keys()) fetch_metric_cmd = ("curl 'http://localhost:9090/api/v1/query" "?query={}'".format(metric)) ret, metric_data, _ = openshift_ops.oc_rsh( self._master, prometheus_pods[0], fetch_metric_cmd) metric_result = json.loads(metric_data)["data"]["result"] if (not metric_result) or ret: raise exceptions.ExecutionError( "Failed to fecth data for metric {}, output {}".format( metric, metric_result)) return metric_result
def validate_multipath_info(self, hacount): """validates multipath command on the pod node Args: hacount (int): hacount for which multipath to be checked """ # create pod using pvc created dc_name = oc_create_app_dc_with_io( self.ocp_master_node[0], self.pvc_name, image=self.io_container_image_cirros) pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) self.addCleanup(oc_delete, self.ocp_master_node[0], "dc", dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.ocp_master_node[0], dc_name, 0) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name, timeout=120, wait_step=3) # Get pod info pod_info = oc_get_pods(self.ocp_master_node[0], selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # Find iqn from volume info pv_name = get_pv_name_from_pvc(self.ocp_master_node[0], self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource(self.ocp_master_node[0], 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info(self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] # Get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # Validate mpath mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod(self.ocp_master_node[0], pod_name, hacount, list(mpaths)[0])
def validate_multipath_info(self, hacount): """validates multipath command on the pod node Args: hacount (int): hacount for which multipath to be checked """ # create pod using pvc created dc_name = oc_create_app_dc_with_io( self.ocp_master_node[0], self.pvc_name ) pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) self.addCleanup(oc_delete, self.ocp_master_node[0], "dc", dc_name) self.addCleanup( scale_dc_pod_amount_and_wait, self.ocp_master_node[0], dc_name, 0 ) wait_for_pod_be_ready( self.ocp_master_node[0], pod_name, timeout=120, wait_step=3 ) # Get pod info pod_info = oc_get_pods( self.ocp_master_node[0], selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # Find iqn from volume info pv_name = get_pv_name_from_pvc(self.ocp_master_node[0], self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource( self.ocp_master_node[0], 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] # Get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # Validate mpath mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod( self.ocp_master_node[0], pod_name, hacount, list(mpaths)[0])
def test_glusterblock_logs_presence_verification(self): """Validate presence of glusterblock provisioner POD and it's status""" # Get glusterblock provisioner dc name cmd = ("oc get dc | awk '{ print $1 }' | " "grep -e glusterblock -e provisioner") dc_name = cmd_run(cmd, self.ocp_master_node[0], True) # Get glusterblock provisioner pod name and it's status gb_prov_name, gb_prov_status = oc_get_custom_resource( self.node, 'pod', custom=':.metadata.name,:.status.phase', selector='deploymentconfig=%s' % dc_name)[0] self.assertEqual(gb_prov_status, 'Running') # Create Secret, SC and PVC self.create_storage_class() self.create_and_wait_for_pvc() # Get list of Gluster nodes g_hosts = list(g.config.get("gluster_servers", {}).keys()) self.assertGreater( len(g_hosts), 0, "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts) # Perform checks on Gluster nodes/PODs logs = ("gluster-block-configshell", "gluster-blockd") gluster_pods = oc_get_pods( self.ocp_client[0], selector="glusterfs-node=pod") if gluster_pods: cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log" else: cmd = "tail -n 5 /var/log/gluster-block/%s.log" for g_host in g_hosts: for log in logs: out = cmd_run_on_gluster_pod_or_node( self.ocp_client[0], cmd % log, gluster_node=g_host) self.assertTrue(out, "Command '%s' output is empty." % cmd)
def test_glusterblock_logs_presence_verification(self): """Validate presence of glusterblock provisioner POD and it's status""" gb_prov_cmd = ("oc get pods --all-namespaces " "-l glusterfs=block-%s-provisioner-pod " "-o=custom-columns=:.metadata.name,:.status.phase" % (self.storage_project_name)) ret, out, err = g.run(self.ocp_client[0], gb_prov_cmd, "root") self.assertEqual(ret, 0, "Failed to get Glusterblock provisioner POD.") gb_prov_name, gb_prov_status = out.split() self.assertEqual(gb_prov_status, 'Running') # Create Secret, SC and PVC self.create_storage_class() self.create_and_wait_for_pvc() # Get list of Gluster nodes g_hosts = list(g.config.get("gluster_servers", {}).keys()) self.assertGreater( len(g_hosts), 0, "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts) # Perform checks on Gluster nodes/PODs logs = ("gluster-block-configshell", "gluster-blockd") gluster_pods = oc_get_pods(self.ocp_client[0], selector="glusterfs-node=pod") if gluster_pods: cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log" else: cmd = "tail -n 5 /var/log/gluster-block/%s.log" for g_host in g_hosts: for log in logs: out = cmd_run_on_gluster_pod_or_node(self.ocp_client[0], cmd % log, gluster_node=g_host) self.assertTrue(out, "Command '%s' output is empty." % cmd)
def test_glusterblock_logs_presence_verification(self): """Validate presence of glusterblock provisioner POD and it's status""" gb_prov_cmd = ("oc get pods --all-namespaces " "-l glusterfs=block-%s-provisioner-pod " "-o=custom-columns=:.metadata.name,:.status.phase" % ( self.storage_project_name)) ret, out, err = g.run(self.ocp_client[0], gb_prov_cmd, "root") self.assertEqual(ret, 0, "Failed to get Glusterblock provisioner POD.") gb_prov_name, gb_prov_status = out.split() self.assertEqual(gb_prov_status, 'Running') # Create Secret, SC and PVC self.create_storage_class() self.create_and_wait_for_pvc() # Get list of Gluster nodes g_hosts = list(g.config.get("gluster_servers", {}).keys()) self.assertGreater( len(g_hosts), 0, "We expect, at least, one Gluster Node/POD:\n %s" % g_hosts) # Perform checks on Gluster nodes/PODs logs = ("gluster-block-configshell", "gluster-blockd") gluster_pods = oc_get_pods( self.ocp_client[0], selector="glusterfs-node=pod") if gluster_pods: cmd = "tail -n 5 /var/log/glusterfs/gluster-block/%s.log" else: cmd = "tail -n 5 /var/log/gluster-block/%s.log" for g_host in g_hosts: for log in logs: out = cmd_run_on_gluster_pod_or_node( self.ocp_client[0], cmd % log, gluster_node=g_host) self.assertTrue(out, "Command '%s' output is empty." % cmd)
def verify_iscsi_sessions_and_multipath( self, pvc_name, rname, rtype='dc', heketi_server_url=None, is_registry_gluster=False): if not heketi_server_url: heketi_server_url = self.heketi_server_url # Get storage ips of glusterfs pods keys = (list(g.config['gluster_registry_servers'].keys()) if is_registry_gluster else self.gluster_servers) servers_info = (g.config['gluster_registry_servers'] if is_registry_gluster else self.gluster_servers_info) gluster_ips = [] for key in keys: gluster_ips.append(servers_info[key]['storage']) gluster_ips.sort() # Find iqn and hacount from volume info pv_name = get_pv_name_from_pvc(self.ocp_client[0], pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource( self.ocp_client[0], 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] hacount = int(vol_info['hacount']) # Find node on which pod is running if rtype == 'dc': pod_name = get_pod_name_from_dc(self.ocp_client[0], rname) pod_info = oc_get_pods( self.ocp_client[0], selector='deploymentconfig=%s' % rname) elif rtype == 'pod': pod_info = oc_get_pods(self.ocp_client[0], name=rname) pod_name = rname elif rtype == 'rc': pod_name = get_pod_name_from_rc(self.ocp_client[0], rname) pod_info = oc_get_pods( self.ocp_client[0], selector='name=%s' % rname) else: raise NameError("Value of rtype should be either 'dc' or 'pod'") node = pod_info[pod_name]['node'] # Get the iscsi sessions info from the node iscsi = get_iscsi_session(node, iqn) msg = ('Only %s iscsi sessions are present on node %s, expected %s.' % (iscsi, node, hacount)) self.assertEqual(hacount, len(iscsi), msg) iscsi.sort() msg = ("Only gluster Nodes %s were expected in iscsi sessions, " "but got other Nodes %s on Node %s" % ( gluster_ips, iscsi, node)) self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi)), msg) # Get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn) msg = ("Only %s devices are present on Node %s, expected %s" % ( devices, node, hacount,)) self.assertEqual(hacount, len(devices), msg) # Get mpath names and verify that only one mpath is there mpaths = set() for device in devices.keys(): mpaths.add(get_mpath_name_from_device_name(node, device)) msg = ("Only one mpath was expected on Node %s, but got %s" % ( node, mpaths)) self.assertEqual(1, len(mpaths), msg) validate_multipath_pod( self.ocp_client[0], pod_name, hacount, mpath=list(mpaths)[0]) return iqn, hacount, node
def initiator_side_failures(self): # get storage ips of glusterfs pods keys = self.gluster_servers gluster_ips = [] for key in keys: gluster_ips.append(self.gluster_servers_info[key]['storage']) gluster_ips.sort() self.create_storage_class() self.create_and_wait_for_pvc() # find iqn and hacount from volume info pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info(self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] hacount = int(self.sc['hacount']) # create app pod dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name) # When we have to verify iscsi login devices & mpaths, we run it twice for i in range(2): # get node hostname from pod info pod_info = oc_get_pods(self.node, selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # get the iscsi sessions info from the node iscsi = get_iscsi_session(node, iqn) self.assertEqual(hacount, len(iscsi)) iscsi.sort() self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi))) # get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # get mpath names and verify that only one mpath is there mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod(self.node, pod_name, hacount, mpath=list(mpaths)[0]) # When we have to verify iscsi session logout, we run only once if i == 1: break # make node unschedulabe where pod is running oc_adm_manage_node(self.node, '--schedulable=false', nodes=[node]) # make node schedulabe where pod is running self.addCleanup(oc_adm_manage_node, self.node, '--schedulable=true', nodes=[node]) # delete pod so it get respun on any other node oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) # wait for pod to come up pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # get the iscsi session from the previous node to verify logout iscsi = get_iscsi_session(node, iqn, raise_on_error=False) self.assertFalse(iscsi)
def test_metrics_workload_on_prometheus(self): """Validate metrics workload on prometheus""" # Skip test if the prometheus pods are not present openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) prometheus_pods = openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector) if not prometheus_pods: self.skipTest( prometheus_pods, "Skipping test as prometheus" " pod is not present") if not self.registry_sc: self.skipTest( prometheus_pods, "Skipping test as registry " " storage details are not provided") self._registry_project = self.registry_sc.get( 'restsecretnamespace') self.prefix = "autotest-{}".format(utils.get_random_str()) # Get one of the prometheus pod name and respective pvc name prometheus_pod = list(prometheus_pods.keys())[0] pvc_custom = ":.spec.volumes[*].persistentVolumeClaim.claimName" pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", pvc_custom, prometheus_pod)[0] self.assertTrue( pvc_name, "Failed to get PVC name for prometheus" " pod {}".format(prometheus_pod)) self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from the prometheus pod self._fetch_metric_from_promtheus_pod( metric='kube_persistentvolumeclaim_info') # Create storage class openshift_ops.switch_oc_project( self._master, self._registry_project) self.sc_name = self.create_storage_class( vol_name_prefix=self.prefix, glusterfs_registry=True) self.addCleanup(openshift_ops.switch_oc_project, self._master, self._registry_project) # Create PVCs and app pods pvc_size, pvc_count, batch_count = 1, 5, 5 for _ in range(batch_count): test_pvc_names = self.create_and_wait_for_pvcs( pvc_size, pvc_name_prefix=self.prefix, pvc_amount=pvc_count, sc_name=self.sc_name, timeout=600, wait_step=10) self.create_dcs_with_pvc( test_pvc_names, timeout=600, wait_step=5, dc_name_prefix="autotests-dc-with-app-io", space_to_use=1048576) # Check from the prometheus pod for the PVC space usage openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) mount_path = "/prometheus" cmd = "oc exec {0} -- df -PT {1} | grep {1}".format( prometheus_pod, mount_path) out = self.cmd_run(cmd) self.assertTrue(out, "Failed to get info about mounted volume. " "Output is empty.") # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='kube_persistentvolumeclaim_info') self._fetch_metric_from_promtheus_pod( metric='kube_pod_spec_volumes_persistentvolumeclaims_info') self.addCleanup(openshift_ops.switch_oc_project, self._master, self._registry_project)
def test_restart_prometheus_glusterfs_pod(self): """Validate restarting glusterfs pod""" # Add check for CRS version openshift_ops.switch_oc_project( self._master, self._registry_project_name) if not self.is_containerized_gluster(): self.skipTest( "Skipping this test case as CRS version check " "can not be implemented") # Get one of the prometheus pod name and respective pvc name openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) prometheus_pods = openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector) if not prometheus_pods: self.skipTest( prometheus_pods, "Skipping test as prometheus" " pod is not present") prometheus_pod = list(prometheus_pods.keys())[0] pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", ":.spec.volumes[*].persistentVolumeClaim.claimName", prometheus_pod)[0] self.assertTrue( pvc_name, "Failed to get pvc name from {} pod".format(prometheus_pod)) iqn, _, node = self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Get the ip of active path devices = openshift_storage_libs.get_iscsi_block_devices_by_path( node, iqn) mpath = openshift_storage_libs.get_mpath_name_from_device_name( node, list(devices.keys())[0]) mpath_dev = ( openshift_storage_libs.get_active_and_enabled_devices_from_mpath( node, mpath)) node_ip = devices[mpath_dev['active'][0]] # Get the name of gluster pod from the ip openshift_ops.switch_oc_project( self._master, self._registry_project_name) gluster_pods = openshift_ops.get_ocp_gluster_pod_details( self._master) active_pod_name = list( filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods) )[0]["pod_name"] err_msg = "Failed to get the gluster pod name {} with active path" self.assertTrue(active_pod_name, err_msg.format(active_pod_name)) g_pods = [pod['pod_name'] for pod in gluster_pods] g_pods.remove(active_pod_name) pod_list = [active_pod_name, g_pods[0]] for pod_name in pod_list: # Delete the glusterfs pods openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') openshift_ops.switch_oc_project( self._master, self._registry_project_name) g_pod_list_before = [ pod["pod_name"] for pod in openshift_ops.get_ocp_gluster_pod_details( self._master)] openshift_ops.oc_delete(self._master, 'pod', pod_name) self.addCleanup( self._guster_pod_delete, g_pod_list_before) # Wait for gluster pod to be absent openshift_ops.wait_for_resource_absence( self._master, 'pod', pod_name) # Try to fetch metric from prometheus pod openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') # Wait for new pod to come up openshift_ops.switch_oc_project( self._master, self._registry_project_name) self.assertTrue(self._get_newly_deployed_gluster_pod( g_pod_list_before), "Failed to get new pod") self._wait_for_gluster_pod_be_ready(g_pod_list_before) # Validate iscsi and multipath openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count')
def test_prometheous_kill_bhv_brick_process(self): """Validate kill brick process of block hosting volume with prometheus workload running""" # Add check for CRS version openshift_ops.switch_oc_project( self._master, self._registry_project_name) if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as CRS" " version check can not be implemented") # Get one of the prometheus pod name and respective pvc name openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) prometheus_pods = openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector) if not prometheus_pods: self.skipTest( prometheus_pods, "Skipping test as prometheus" " pod is not present") # Validate iscsi and multipath prometheus_pod = list(prometheus_pods.keys())[0] pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", ":.spec.volumes[*].persistentVolumeClaim.claimName", prometheus_pod) self.assertTrue(pvc_name, "Failed to get PVC name") pvc_name = pvc_name[0] self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') # Kill the brick process of a BHV gluster_node = list(self._registry_servers_info.keys())[0] openshift_ops.switch_oc_project( self._master, self._registry_project_name) bhv_name = self.get_block_hosting_volume_by_pvc_name( pvc_name, heketi_server_url=self._registry_heketi_server_url, gluster_node=gluster_node, ocp_client_node=self._master) vol_status = gluster_ops.get_gluster_vol_status(bhv_name) gluster_node_ip, brick_pid = None, None for g_node, g_node_data in vol_status.items(): for process_name, process_data in g_node_data.items(): if process_name.startswith("/var"): gluster_node_ip = g_node brick_pid = process_data["pid"] break if gluster_node_ip and brick_pid: break self.assertIsNotNone(brick_pid, "Could not find pid for brick") cmd = "kill -9 {}".format(brick_pid) openshift_ops.cmd_run_on_gluster_pod_or_node( self._master, cmd, gluster_node_ip) self.addCleanup(self._guster_volume_cleanup, bhv_name) # Check if the brick-process has been killed killed_pid_cmd = ( "ps -p {} -o pid --no-headers".format(brick_pid)) try: openshift_ops.cmd_run_on_gluster_pod_or_node( self._master, killed_pid_cmd, gluster_node_ip) except exceptions.ExecutionError: g.log.info("Brick process {} was killed" "successfully".format(brick_pid)) # Try to fetch metric from prometheus pod openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') # Start the bhv using force openshift_ops.switch_oc_project( self._master, self._registry_project_name) start_vol, _, _ = volume_ops.volume_start( gluster_node_ip, bhv_name, force=True) self.assertFalse( start_vol, "Failed to start volume {}" " using force".format(bhv_name)) # Validate iscsi and multipath openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) self.verify_iscsi_sessions_and_multipath( pvc_name, prometheus_pod, rtype='pod', heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) # Try to fetch metric from prometheus pod self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count')
def initiator_side_failures(self): # get storage ips of glusterfs pods keys = self.gluster_servers gluster_ips = [] for key in keys: gluster_ips.append(self.gluster_servers_info[key]['storage']) gluster_ips.sort() self.create_storage_class() self.create_and_wait_for_pvc() # find iqn and hacount from volume info pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] hacount = int(self.sc['hacount']) # create app pod dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name) # When we have to verify iscsi login devices & mpaths, we run it twice for i in range(2): # get node hostname from pod info pod_info = oc_get_pods( self.node, selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # get the iscsi sessions info from the node iscsi = get_iscsi_session(node, iqn) self.assertEqual(hacount, len(iscsi)) iscsi.sort() self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi))) # get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # get mpath names and verify that only one mpath is there mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod( self.node, pod_name, hacount, mpath=list(mpaths)[0]) # When we have to verify iscsi session logout, we run only once if i == 1: break # make node unschedulabe where pod is running oc_adm_manage_node( self.node, '--schedulable=false', nodes=[node]) # make node schedulabe where pod is running self.addCleanup( oc_adm_manage_node, self.node, '--schedulable=true', nodes=[node]) # delete pod so it get respun on any other node oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) # wait for pod to come up pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # get the iscsi session from the previous node to verify logout iscsi = get_iscsi_session(node, iqn, raise_on_error=False) self.assertFalse(iscsi)
def test_prometheus_volume_metrics_on_node_reboot(self): """Validate volume metrics using prometheus before and after node reboot""" # Pod name for the entire test prefix = "autotest-{}".format(utils.get_random_str()) # Create I/O pod with PVC pvc_name = self.create_and_wait_for_pvc() pod_name = openshift_ops.oc_create_tiny_pod_with_volume( self._master, pvc_name, prefix, image=self.io_container_image_cirros) self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name, raise_on_absence=False) openshift_ops.wait_for_pod_be_ready( self._master, pod_name, timeout=60, wait_step=5) # Write data on the volume and wait for 2 mins and sleep is must for # prometheus to get the exact values of the metrics ret, _, err = openshift_ops.oc_rsh( self._master, pod_name, "touch /mnt/file{1..1000}") self.assertEqual( ret, 0, "Failed to create files in the app pod " "with {}".format(err)) time.sleep(120) # Fetch the metrics and store in initial_metrics as dictionary initial_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Get the hostname to reboot where the pod is running pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name) node_for_reboot = pod_info[pod_name]['node'] # Get the vm name by the hostname vm_name = node_ops.find_vm_name_by_ip_or_hostname(node_for_reboot) # power off and on the vm, based on the vm type(either gluster or not) if node_for_reboot in self.gluster_servers: self.power_off_gluster_node_vm(vm_name, node_for_reboot) self.power_on_gluster_node_vm(vm_name, node_for_reboot) else: self.power_off_vm(vm_name) self.power_on_vm(vm_name) openshift_ops.wait_for_ocp_node_be_ready( self._master, node_for_reboot) # Create the new pod and validate the prometheus metrics pod_name = openshift_ops.oc_create_tiny_pod_with_volume( self._master, pvc_name, prefix) self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name) # Wait for POD be up and running and prometheus to refresh the data openshift_ops.wait_for_pod_be_ready( self._master, pod_name, timeout=60, wait_step=5) time.sleep(120) # Fetching the metrics and storing in final_metrics as dictionary and # validating with initial_metrics final_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) self.assertEqual(dict(initial_metrics), dict(final_metrics), "Metrics are different post node reboot")