def test_node_failure_pv_mounted(self): """Test node failure when PV is mounted with app pods running""" filepath = "/mnt/file_for_testing_volume.log" pvc_name = self.create_and_wait_for_pvc() dc_and_pod_names = self.create_dcs_with_pvc(pvc_name) dc_name, pod_name = dc_and_pod_names[pvc_name] mount_point = "df -kh /mnt -P | tail -1 | awk '{{print $1}}'" pod_cmd = "oc exec {} -- {}".format(pod_name, mount_point) hostname = command.cmd_run(pod_cmd, hostname=self.node) hostname = hostname.split(":")[0] vm_name = find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(power_on_vm_by_name, vm_name) power_off_vm_by_name(vm_name) cmd = "dd if=/dev/urandom of={} bs=1K count=100".format(filepath) ret, _, err = oc_rsh(self.node, pod_name, cmd) self.assertFalse( ret, "Failed to execute command {} on {} with error {}".format( cmd, self.node, err)) oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) ret, _, err = oc_rsh(self.node, pod_name, cmd) self.assertFalse( ret, "Failed to execute command {} on {} with error {}".format( cmd, self.node, err))
def detach_and_attach_vmdk(self, vm_name, node_hostname, devices_list): # Detach devices list and attach existing vmdk present vmdk_list, modified_device_list = [], [] devices_list.reverse() self.addCleanup(self.power_on_gluster_node_vm, vm_name, node_hostname) for device in devices_list: # Detach disks from vm vmdk = detach_disk_from_vm(vm_name, device) self.addCleanup( attach_existing_vmdk_from_vmstore, vm_name, device, vmdk) vmdk_list.append(vmdk) vmdk_list.reverse() devices_list.reverse() modified_vmdk_list = vmdk_list[-1:] + vmdk_list[:-1] for device, vmdk in zip(devices_list, modified_vmdk_list): modified_device_list.append((device, vmdk)) # Power off gluster node power_off_vm_by_name(vm_name) self.addCleanup(power_off_vm_by_name, vm_name) for device, vdisk in modified_device_list: attach_existing_vmdk_from_vmstore(vm_name, device, vdisk) self.addCleanup(detach_disk_from_vm, vm_name, device) self.power_on_gluster_node_vm(vm_name, node_hostname) devices_list.sort()
def _power_off_node_and_wait_node_to_be_not_ready(self, hostname): # Bring down the glusterfs node vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname) self.addCleanup(node_ops.power_on_vm_by_name, vm_name) node_ops.power_off_vm_by_name(vm_name) # Wait glusterfs node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 20): status = openshift_ops.oc_get_custom_resource( self.ocp_client, 'node', custom, hostname) if status[0] in ['False', 'Unknown']: break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(hostname))
def power_off_vm(self, vm_name): self.addCleanup(self.power_on_vm, vm_name) power_off_vm_by_name(vm_name)
def test_brick_evict_with_node_down(self): """Test brick evict basic functionality and verify brick evict after node down""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable node if more than 3 node_list = heketi_ops.heketi_node_list(h_node, h_server) if len(node_list) > 3: for node_id in node_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_server, node_id) # Create heketi volume vol_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, vol_info.get('id')) # Get node on which heketi pod is scheduled heketi_pod = openshift_ops.get_pod_name_from_dc( self.ocp_client, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] # Get list of hostname from node id host_list = [] for node_id in node_list[3:]: node_info = heketi_ops.heketi_node_info(h_node, h_server, node_id, json=True) host_list.append(node_info.get('hostnames').get('manage')[0]) # Get brick id and glusterfs node which is not heketi node for node in vol_info.get('bricks', {}): node_info = heketi_ops.heketi_node_info(h_node, h_server, node.get('node'), json=True) hostname = node_info.get('hostnames').get('manage')[0] if (hostname != heketi_node) and (hostname not in host_list): brick_id = node.get('id') break # Bring down the glusterfs node vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname) self.addCleanup(node_ops.power_on_vm_by_name, vm_name) node_ops.power_off_vm_by_name(vm_name) # Wait glusterfs node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 20): status = openshift_ops.oc_get_custom_resource( self.ocp_client, 'node', custom, hostname) if status[0] in ['False', 'Unknown']: break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(hostname)) # Perform brick evict operation try: heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) except AssertionError as e: if ('No Replacement was found' not in six.text_type(e)): raise