def power_on_gluster_node_vm(self, vm_name, gluster_hostname, timeout=300, wait_step=3): # NOTE(Nitin Goyal): Same timeout is used for all functions. # Bring up the target node power_on_vm_by_name(vm_name) # Wait for gluster node and pod to be ready if self.is_containerized_gluster(): wait_for_ocp_node_be_ready(self.node, gluster_hostname, timeout=timeout, wait_step=wait_step) wait_for_gluster_pod_be_ready_on_specific_node(self.node, gluster_hostname, timeout=timeout, wait_step=wait_step) # Wait for gluster services to be up for service in ('glusterd', 'gluster-blockd'): wait_for_service_status_on_gluster_pod_or_node( self.node, service, 'active', 'running', gluster_hostname, raise_on_error=False, timeout=timeout, wait_step=wait_step)
def _wait_for_gluster_pod_after_node_reboot(self, node_hostname): """Wait for glusterfs pod to be ready after node reboot""" openshift_ops.wait_for_ocp_node_be_ready( self.ocp_client, node_hostname) gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node( self.ocp_client, node_hostname) openshift_ops.wait_for_pod_be_ready(self.ocp_client, gluster_pod) services = ( ("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for service, state in services: openshift_ops.check_service_status_on_pod( self.ocp_client, gluster_pod, service, "active", state)
def test_heketi_metrics_validation_with_node_reboot(self): """Validate heketi metrics after node reboot using prometheus""" initial_metrics, final_metrics = {}, {} # Use storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Get initial metrics result h_node, h_server = self.heketi_client_node, self.heketi_server_url initial_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Use prometheus project openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) # Get initial prometheus result initial_prometheus = self._get_and_manipulate_metric_data( self.metrics) # Get hosted node IP of heketi pod openshift_ops.switch_oc_project( self._master, self.storage_project_name) heketi_pod = openshift_ops.get_pod_name_from_dc( self._master, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self._master, 'pod', '.:spec.nodeName', heketi_pod)[0] # Reboot the node on which heketi pod is scheduled self.addCleanup( self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node) node_ops.node_reboot_by_command(heketi_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self._master, 'node', custom, heketi_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(heketi_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node) # Use prometheus project openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) # Get final metrics result final_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Get final prometheus result final_prometheus = self._get_and_manipulate_metric_data( self.metrics) err_msg = "Initial value {} is not same as final value {}" self.assertEqual( initial_metrics, final_metrics, err_msg.format( initial_metrics, final_metrics)) self.assertEqual( initial_prometheus, final_prometheus, err_msg.format( initial_prometheus, final_prometheus))
def test_prometheus_volume_metrics_on_node_reboot(self): """Validate volume metrics using prometheus before and after node reboot""" # Pod name for the entire test prefix = "autotest-{}".format(utils.get_random_str()) # Create I/O pod with PVC pvc_name = self.create_and_wait_for_pvc() pod_name = openshift_ops.oc_create_tiny_pod_with_volume( self._master, pvc_name, prefix, image=self.io_container_image_cirros) self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name, raise_on_absence=False) openshift_ops.wait_for_pod_be_ready( self._master, pod_name, timeout=60, wait_step=5) # Write data on the volume and wait for 2 mins and sleep is must for # prometheus to get the exact values of the metrics ret, _, err = openshift_ops.oc_rsh( self._master, pod_name, "touch /mnt/file{1..1000}") self.assertEqual( ret, 0, "Failed to create files in the app pod " "with {}".format(err)) time.sleep(120) # Fetch the metrics and store in initial_metrics as dictionary initial_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Get the hostname to reboot where the pod is running pod_info = openshift_ops.oc_get_pods(self._master, name=pod_name) node_for_reboot = pod_info[pod_name]['node'] # Get the vm name by the hostname vm_name = node_ops.find_vm_name_by_ip_or_hostname(node_for_reboot) # power off and on the vm, based on the vm type(either gluster or not) if node_for_reboot in self.gluster_servers: self.power_off_gluster_node_vm(vm_name, node_for_reboot) self.power_on_gluster_node_vm(vm_name, node_for_reboot) else: self.power_off_vm(vm_name) self.power_on_vm(vm_name) openshift_ops.wait_for_ocp_node_be_ready( self._master, node_for_reboot) # Create the new pod and validate the prometheus metrics pod_name = openshift_ops.oc_create_tiny_pod_with_volume( self._master, pvc_name, prefix) self.addCleanup(openshift_ops.oc_delete, self._master, 'pod', pod_name) # Wait for POD be up and running and prometheus to refresh the data openshift_ops.wait_for_pod_be_ready( self._master, pod_name, timeout=60, wait_step=5) time.sleep(120) # Fetching the metrics and storing in final_metrics as dictionary and # validating with initial_metrics final_metrics = self._get_and_manipulate_metric_data( self.metrics, pvc_name) self.assertEqual(dict(initial_metrics), dict(final_metrics), "Metrics are different post node reboot")
def test_udev_usage_in_container(self): """Validate LVM inside container does not use udev""" # Skip the TC if independent mode deployment if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as it needs to run on " "converged mode deployment") h_client, h_url = self.heketi_client_node, self.heketi_server_url server_info = list(g.config.get('gluster_servers').values())[0] server_node = server_info.get('manage') additional_device = server_info.get('additional_devices')[0] # command to run pvscan cmd_pvscan = "timeout 300 pvscan" # Get pod name from on host for pod_info in self.pod_name: if pod_info.get('pod_hostname') == server_node: pod_name = pod_info.get('pod_name') break # Create file volume vol_info = heketi_ops.heketi_volume_create(h_client, h_url, self.volume_size, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_url, vol_info.get("id")) # Create block volume block_vol_info = heketi_ops.heketi_blockvolume_create(h_client, h_url, self.volume_size, json=True) self.addCleanup(heketi_ops.heketi_blockvolume_delete, h_client, h_url, block_vol_info.get("id")) # Check dmeventd service in container err_msg = "dmeventd.service is running on setup" with self.assertRaises(AssertionError, msg=err_msg): openshift_ops.oc_rsh(self.oc_node, pod_name, "systemctl is-active dmeventd.service") # Service dmeventd should not be running in background with self.assertRaises(AssertionError, msg=err_msg): openshift_ops.oc_rsh(self.oc_node, pod_name, "ps aux | grep dmeventd.service") # Perform a pvscan in contaier openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan) # Get heketi node to add new device heketi_node_list = heketi_ops.heketi_node_list(h_client, h_url) for h_node_id in heketi_node_list: h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) h_node_host = h_node_info.get('hostnames', {}).get('manage')[0] if h_node_host == server_node: break # Add new device to the node heketi_ops.heketi_device_add(h_client, h_url, additional_device, h_node_id) h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) h_device_id = [ device.get('id') for device in h_node_info.get('devices') if device.get('name') == additional_device ] self.addCleanup(heketi_ops.heketi_device_delete, h_client, h_url, h_device_id[0]) self.addCleanup(heketi_ops.heketi_device_remove, h_client, h_url, h_device_id[0]) self.addCleanup(heketi_ops.heketi_device_disable, h_client, h_url, h_device_id[0]) # Reboot the node on which device is added self.addCleanup(self._check_heketi_and_gluster_pod_after_node_reboot, server_node) node_ops.node_reboot_by_command(server_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self.oc_node, 'node', custom, server_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring node down {}".format(server_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self.oc_node, server_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(server_node) # Perform a pvscan in contaier openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan)