def test_pvc_deletion_while_pod_is_running(self): """Validate PVC deletion while pod is running""" if get_openshift_version() <= "3.9": self.skipTest("PVC deletion while pod is running is not supported" " in OCP older than 3.9") # Create DC with POD and attached PVC to it sc_name = self.create_storage_class() pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name) dc_name, pod_name = self.create_dc_with_pvc(pvc_name) # Delete PVC oc_delete(self.node, 'pvc', self.pvc_name) with self.assertRaises(ExecutionError): wait_for_resource_absence(self.node, 'pvc', self.pvc_name, interval=3, timeout=30) # Make sure we are able to work with files on the mounted volume # after deleting pvc. filepath = "/mnt/file_for_testing_volume.log" cmd = "dd if=/dev/urandom of=%s bs=1K count=100" % filepath ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to execute command %s on %s" % (cmd, self.node))
def _guster_pod_delete_cleanup(self, g_pod_list_before): """Cleanup for deletion of gluster pod using force delete""" # Switch to gluster project openshift_ops.switch_oc_project(self._master, self._registry_project_name) try: # Fetch gluster pod after delete pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before) # Check if pod name is empty i.e no new pod come up so use old pod openshift_ops.wait_for_pod_be_ready( self._master, pod_name[0] if pod_name else g_pod_list_before[0], timeout=1) except exceptions.ExecutionError: # Force delete and wait for new pod to come up openshift_ops.oc_delete(self._master, 'pod', g_pod_list_before[0], is_force=True) openshift_ops.wait_for_resource_absence(self._master, 'pod', g_pod_list_before[0]) # Fetch gluster pod after force delete g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before) openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])
def test_pvc_deletion_while_pod_is_running(self): """Validate PVC deletion while pod is running""" if get_openshift_version() <= "3.9": self.skipTest( "PVC deletion while pod is running is not supported" " in OCP older than 3.9") # Create DC with POD and attached PVC to it sc_name = self.create_storage_class() pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name) dc_name, pod_name = self.create_dc_with_pvc(pvc_name) # Delete PVC oc_delete(self.node, 'pvc', self.pvc_name) with self.assertRaises(ExecutionError): wait_for_resource_absence( self.node, 'pvc', self.pvc_name, interval=3, timeout=30) # Make sure we are able to work with files on the mounted volume # after deleting pvc. filepath = "/mnt/file_for_testing_volume.log" cmd = "dd if=/dev/urandom of=%s bs=1K count=100" % filepath ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to execute command %s on %s" % (cmd, self.node))
def test_100gb_block_pvc_create_and_delete_twice(self): """Validate creation and deletion of blockvoume of size 100GB""" # Define required space, bhv size required for on 100GB block PVC size, bhv_size, required_space = 100, 103, 309 h_node, h_url = self.heketi_client_node, self.heketi_server_url prefix = 'autotest-pvc-{}'.format(utils.get_random_str(size=5)) # Skip test if required free space is not available free_space = get_total_free_space(self.heketi_client_node, self.heketi_server_url)[0] if free_space < required_space: self.skipTest("Available free space {} is less than the required " "free space {}".format(free_space, required_space)) # Create block hosting volume of 103GB required for 100GB block PVC bhv = heketi_volume_create(h_node, h_url, bhv_size, block=True, json=True)['id'] self.addCleanup(heketi_volume_delete, h_node, h_url, bhv) for _ in range(2): # Create PVC of size 100GB pvc_name = self.create_and_wait_for_pvc(pvc_size=size, pvc_name_prefix=prefix) match_pvc_and_pv(self.node, prefix) # Delete the PVC oc_delete(self.node, 'pvc', pvc_name) wait_for_resource_absence(self.node, 'pvc', pvc_name)
def test_pv_resize_with_prefix_for_name_and_size( self, create_vol_name_prefix=False, valid_size=True): """Validate PV resize with and without name prefix""" dir_path = "/mnt/" node = self.ocp_client[0] # Create PVC self.create_storage_class( allow_volume_expansion=True, create_vol_name_prefix=create_vol_name_prefix) pvc_name = self.create_and_wait_for_pvc() # Create DC with POD and attached PVC to it. dc_name = oc_create_app_dc_with_io(node, pvc_name) self.addCleanup(oc_delete, node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) if create_vol_name_prefix: ret = heketi_ops.verify_volume_name_prefix( node, self.sc['volumenameprefix'], self.sc['secretnamespace'], pvc_name, self.heketi_server_url) self.assertTrue(ret, "verify volnameprefix failed") cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "Failed to execute command %s on %s" % (cmd, node)) pv_name = get_pv_name_from_pvc(node, pvc_name) # If resize size is invalid then size should not change if valid_size: cmd = ("dd if=/dev/urandom of=%sfile2 " "bs=100K count=10000") % dir_path with self.assertRaises(AssertionError): ret, out, err = oc_rsh(node, pod_name, cmd) msg = ("Command '%s' was expected to fail on '%s' node. " "But it returned following: ret is '%s', err is '%s' " "and out is '%s'" % (cmd, node, ret, err, out)) raise ExecutionError(msg) pvc_size = 2 resize_pvc(node, pvc_name, pvc_size) verify_pvc_size(node, pvc_name, pvc_size) verify_pv_size(node, pv_name, pvc_size) else: invalid_pvc_size = 'ten' with self.assertRaises(AssertionError): resize_pvc(node, pvc_name, invalid_pvc_size) verify_pvc_size(node, pvc_name, 1) verify_pv_size(node, pv_name, 1) oc_delete(node, 'pod', pod_name) wait_for_resource_absence(node, 'pod', pod_name) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) cmd = ("dd if=/dev/urandom of=%sfile_new " "bs=50K count=10000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "Failed to execute command %s on %s" % (cmd, node))
def test_dev_path_mapping_gluster_pod_reboot(self): """Validate dev path mapping for app pods with file volume after reboot """ # Skip the tc for independent mode if not self.is_containerized_gluster(): self.skipTest("Skip TC as it is not supported in independent mode") # Create file volume with app pod and verify IO's # and Compare path, uuid, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Fetch the gluster pod name from node g_pod = self._get_gluster_pod() # Respin a gluster pod openshift_ops.oc_delete(self.node, "pod", g_pod) self.addCleanup(self._guster_pod_delete_cleanup) # Wait for pod to get absent openshift_ops.wait_for_resource_absence(self.node, "pod", g_pod) # Fetch gluster pod after delete g_pod = self._get_gluster_pod() openshift_ops.wait_for_pod_be_ready(self.node, g_pod) # Check if IO's are running after respin of gluster pod use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))
def test_restart_gluster_block_provisioner_pod(self): """Restart gluster-block provisioner pod.""" # Get glusterblock provisioner dc name cmd = ("oc get dc | awk '{ print $1 }' | " "grep -e glusterblock -e provisioner") dc_name = command.cmd_run(cmd, self.ocp_master_node[0], True) # create heketi block volume vol_info = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, size=5, json=True) self.assertTrue(vol_info, "Failed to create heketi block" "volume of size 5") self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id']) # restart gluster-block-provisioner-pod pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) oc_delete(self.ocp_master_node[0], 'pod', pod_name) wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name) # new gluster-pod name pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name) # create new heketi block volume vol_info = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi block" "volume of size 2") heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def test_restart_gluster_block_provisioner_pod(self): """Restart gluster-block provisioner pod """ # create heketi block volume vol_info = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, size=5, json=True) self.assertTrue(vol_info, "Failed to create heketi block" "volume of size 5") self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id']) # restart gluster-block-provisioner-pod dc_name = "glusterblock-%s-provisioner-dc" % self.storage_project_name pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) oc_delete(self.ocp_master_node[0], 'pod', pod_name) wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name) # new gluster-pod name pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name) # create new heketi block volume vol_info = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi block" "volume of size 2") heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def test_restart_gluster_block_provisioner_pod(self): """Restart gluster-block provisioner pod """ # create heketi block volume vol_info = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, size=5, json=True) self.assertTrue(vol_info, "Failed to create heketi block" "volume of size 5") self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id']) # restart gluster-block-provisioner-pod dc_name = "glusterblock-%s-provisioner-dc" % self.storage_project_name pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) oc_delete(self.ocp_master_node[0], 'pod', pod_name) wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name) # new gluster-pod name pod_name = get_pod_name_from_dc(self.ocp_master_node[0], dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name) # create new heketi block volume vol_info = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi block" "volume of size 2") heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def test_node_failure_pv_mounted(self): """Test node failure when PV is mounted with app pods running""" filepath = "/mnt/file_for_testing_volume.log" pvc_name = self.create_and_wait_for_pvc() dc_and_pod_names = self.create_dcs_with_pvc(pvc_name) dc_name, pod_name = dc_and_pod_names[pvc_name] mount_point = "df -kh /mnt -P | tail -1 | awk '{{print $1}}'" pod_cmd = "oc exec {} -- {}".format(pod_name, mount_point) hostname = command.cmd_run(pod_cmd, hostname=self.node) hostname = hostname.split(":")[0] vm_name = find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(power_on_vm_by_name, vm_name) power_off_vm_by_name(vm_name) cmd = "dd if=/dev/urandom of={} bs=1K count=100".format(filepath) ret, _, err = oc_rsh(self.node, pod_name, cmd) self.assertFalse( ret, "Failed to execute command {} on {} with error {}".format( cmd, self.node, err)) oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) ret, _, err = oc_rsh(self.node, pod_name, cmd) self.assertFalse( ret, "Failed to execute command {} on {} with error {}".format( cmd, self.node, err))
def test_heketi_logs_after_heketi_pod_restart(self): h_node, h_server = self.heketi_client_node, self.heketi_server_url find_string_in_log = r"Started background pending operations cleaner" ocp_node = self.ocp_master_node[0] # Restart heketi pod heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) oc_delete(ocp_node, 'pod', heketi_pod_name, collect_logs=self.heketi_logs_before_delete) self.addCleanup(self._heketi_pod_delete_cleanup) wait_for_resource_absence(ocp_node, 'pod', heketi_pod_name) heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) wait_for_pod_be_ready(ocp_node, heketi_pod_name) self.assertTrue(hello_heketi(h_node, h_server), "Heketi server {} is not alive".format(h_server)) # Collect logs after heketi pod restart cmd = "oc logs {}".format(heketi_pod_name) out = cmd_run(cmd, hostname=ocp_node) # Validate string is present in heketi logs pending_check = re.compile(find_string_in_log) entry_list = pending_check.findall(out) self.assertIsNotNone(entry_list, "Failed to find entries in heketi logs") for entry in entry_list: self.assertEqual( entry, find_string_in_log, "Failed to validate, Expected {}; Actual {}".format( find_string_in_log, entry))
def _pv_resize(self, exceed_free_space): dir_path = "/mnt" pvc_size_gb = 1 available_size_gb = self._available_disk_free_space() # Create PVC self.create_storage_class(allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb) # Create DC with POD and attached PVC to it dc_name = oc_create_app_dc_with_io( self.node, pvc_name, image=self.io_container_image_cirros) self.addCleanup(oc_delete, self.node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) if exceed_free_space: exceed_size = available_size_gb + 10 # Try to expand existing PVC exceeding free space resize_pvc(self.node, pvc_name, exceed_size) wait_for_events(self.node, obj_name=pvc_name, event_reason='VolumeResizeFailed') # Check that app POD is up and runnig then try to write data wait_for_pod_be_ready(self.node, pod_name) cmd = ("dd if=/dev/urandom of=%s/autotest bs=100K count=1" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to write data after failed attempt to expand PVC.") else: # Expand existing PVC using all the available free space expand_size_gb = available_size_gb - pvc_size_gb resize_pvc(self.node, pvc_name, expand_size_gb) verify_pvc_size(self.node, pvc_name, expand_size_gb) pv_name = get_pv_name_from_pvc(self.node, pvc_name) verify_pv_size(self.node, pv_name, expand_size_gb) wait_for_events(self.node, obj_name=pvc_name, event_reason='VolumeResizeSuccessful') # Recreate app POD oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # Write data on the expanded PVC cmd = ("dd if=/dev/urandom of=%s/autotest " "bs=1M count=1025" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual(ret, 0, "Failed to write data on the expanded PVC")
def test_arbiter_volume_delete_using_pvc(self): """Test Arbiter volume delete using pvc when volume is not mounted on app pod """ prefix = "autotest-%s" % utils.get_random_str() # Create sc with gluster arbiter info sc_name = self.create_storage_class(vol_name_prefix=prefix, is_arbiter_vol=True) # Create PVC and wait for it to be in 'Bound' state pvc_name = self.create_and_wait_for_pvc(pvc_name_prefix=prefix, sc_name=sc_name) # Get vol info gluster_vol_info = openshift_ops.get_gluster_vol_info_by_pvc_name( self.node, pvc_name) # Verify arbiter volume properties self.verify_amount_and_proportion_of_arbiter_and_data_bricks( gluster_vol_info) # Get volume ID gluster_vol_id = gluster_vol_info["gluster_vol_id"] # Delete the pvc openshift_ops.oc_delete(self.node, 'pvc', pvc_name) openshift_ops.wait_for_resource_absence(self.node, 'pvc', pvc_name) # Check the heketi volume list if pvc is deleted g.log.info("List heketi volumes") heketi_volumes = heketi_ops.heketi_volume_list(self.heketi_client_node, self.heketi_server_url) err_msg = "Failed to delete heketi volume by prefix %s" % prefix self.assertNotIn(prefix, heketi_volumes, err_msg) # Check presence for the gluster volume get_gluster_vol_info = volume_ops.get_volume_info( "auto_get_gluster_endpoint", gluster_vol_id) err_msg = "Failed to delete gluster volume %s" % gluster_vol_id self.assertFalse(get_gluster_vol_info, err_msg) # Check presence of bricks and lvs for brick in gluster_vol_info['bricks']['brick']: gluster_node_ip, brick_name = brick["name"].split(":") with self.assertRaises(exceptions.ExecutionError): cmd = "df %s" % brick_name openshift_ops.cmd_run_on_gluster_pod_or_node( self.node, cmd, gluster_node_ip) with self.assertRaises(exceptions.ExecutionError): lv_match = re.search(BRICK_REGEX, brick["name"]) if lv_match: cmd = "lvs %s" % lv_match.group(2).strip() openshift_ops.cmd_run_on_gluster_pod_or_node( self.node, cmd, gluster_node_ip)
def _check_heketi_pod_to_come_up_after_changing_env(self): heketi_pod = openshift_ops.get_pod_names_from_dc( self.oc_node, self.heketi_dc_name)[0] openshift_ops.wait_for_resource_absence( self.oc_node, "pod", heketi_pod) new_heketi_pod = openshift_ops.get_pod_names_from_dc( self.oc_node, self.heketi_dc_name)[0] openshift_ops.wait_for_pod_be_ready( self.oc_node, new_heketi_pod, wait_step=20)
def test_restart_heketi_pod(self): """Validate restarting heketi pod""" # create heketi volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=1, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 1") self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id'], raise_on_error=False) topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) # get heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) # delete heketi-pod (it restarts the pod) oc_delete(self.ocp_master_node[0], 'pod', heketi_pod_name, collect_logs=self.heketi_logs_before_delete) wait_for_resource_absence(self.ocp_master_node[0], 'pod', heketi_pod_name) # get new heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], heketi_pod_name) # check heketi server is running self.assertTrue( hello_heketi(self.heketi_client_node, self.heketi_server_url), "Heketi server %s is not alive" % self.heketi_server_url) # compare the topology new_topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) self.assertEqual( new_topo_info, topo_info, "topology info is not same," " difference - %s" % diff(topo_info, new_topo_info)) # create new volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 20") heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def test_dynamic_provisioning_glusterblock_reclaim_policy_retain(self): """Validate retain policy for gluster-block after PVC deletion""" if get_openshift_version() < "3.9": self.skipTest( "'Reclaim' feature is not supported in OCP older than 3.9") self.create_storage_class(reclaim_policy='Retain') self.create_and_wait_for_pvc() dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name) try: pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) finally: scale_dc_pod_amount_and_wait(self.node, dc_name, pod_amount=0) oc_delete(self.node, 'dc', dc_name) # get the name of volume pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [ r':.metadata.annotations."gluster\.org\/volume\-id"', r':.spec.persistentVolumeReclaimPolicy' ] vol_id, reclaim_policy = oc_get_custom_resource( self.node, 'pv', custom, pv_name) # checking the retainPolicy of pvc self.assertEqual(reclaim_policy, 'Retain') # delete the pvc oc_delete(self.node, 'pvc', self.pvc_name) # check if pv is also deleted or not with self.assertRaises(ExecutionError): wait_for_resource_absence(self.node, 'pvc', self.pvc_name, interval=3, timeout=30) # getting the blockvol list blocklist = heketi_blockvolume_list(self.heketi_client_node, self.heketi_server_url) self.assertIn(vol_id, blocklist) heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, vol_id) blocklist = heketi_blockvolume_list(self.heketi_client_node, self.heketi_server_url) self.assertNotIn(vol_id, blocklist) oc_delete(self.node, 'pv', pv_name) wait_for_resource_absence(self.node, 'pv', pv_name)
def test_pv_resize_with_prefix_for_name(self, create_vol_name_prefix=False): """Validate PV resize with and without name prefix""" dir_path = "/mnt/" node = self.ocp_client[0] # Create PVC self.create_storage_class( allow_volume_expansion=True, create_vol_name_prefix=create_vol_name_prefix) pvc_name = self.create_and_wait_for_pvc() # Create DC with POD and attached PVC to it. dc_name = oc_create_app_dc_with_io(node, pvc_name) self.addCleanup(oc_delete, node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) if create_vol_name_prefix: ret = heketi_ops.verify_volume_name_prefix( node, self.sc['volumenameprefix'], self.sc['secretnamespace'], pvc_name, self.heketi_server_url) self.assertTrue(ret, "verify volnameprefix failed") cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "failed to execute command %s on %s" % ( cmd, node)) cmd = ("dd if=/dev/urandom of=%sfile2 " "bs=100K count=10000") % dir_path with self.assertRaises(AssertionError): ret, out, err = oc_rsh(node, pod_name, cmd) msg = ("Command '%s' was expected to fail on '%s' node. " "But it returned following: ret is '%s', err is '%s' " "and out is '%s'" % (cmd, node, ret, err, out)) raise ExecutionError(msg) pvc_size = 2 resize_pvc(node, pvc_name, pvc_size) verify_pvc_size(node, pvc_name, pvc_size) pv_name = get_pv_name_from_pvc(node, pvc_name) verify_pv_size(node, pv_name, pvc_size) oc_delete(node, 'pod', pod_name) wait_for_resource_absence(node, 'pod', pod_name) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) cmd = ("dd if=/dev/urandom of=%sfile_new " "bs=50K count=10000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "failed to execute command %s on %s" % ( cmd, node))
def _delete_and_wait_for_new_es_pod_to_come_up(self): # Force delete and wait for es pod to come up openshift_ops.switch_oc_project( self._master, self._logging_project_name) pod_name = openshift_ops.get_pod_name_from_dc( self._master, self._logging_es_dc) openshift_ops.oc_delete(self._master, 'pod', pod_name, is_force=True) openshift_ops.wait_for_resource_absence(self._master, 'pod', pod_name) new_pod_name = openshift_ops.get_pod_name_from_dc( self._master, self._logging_es_dc) openshift_ops.wait_for_pod_be_ready( self._master, new_pod_name, timeout=1800)
def test_restart_heketi_pod(self): """Validate restarting heketi pod""" # create heketi volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=1, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 1") self.addCleanup( heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id'], raise_on_error=False) topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) # get heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) # delete heketi-pod (it restarts the pod) oc_delete(self.ocp_master_node[0], 'pod', heketi_pod_name) wait_for_resource_absence(self.ocp_master_node[0], 'pod', heketi_pod_name) # get new heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], heketi_pod_name) # check heketi server is running self.assertTrue( hello_heketi(self.heketi_client_node, self.heketi_server_url), "Heketi server %s is not alive" % self.heketi_server_url ) # compare the topology new_topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) self.assertEqual(new_topo_info, topo_info, "topology info is not same," " difference - %s" % diff(topo_info, new_topo_info)) # create new volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 20") heketi_volume_delete( self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def test_dynamic_provisioning_glusterblock_reclaim_policy_retain(self): """Validate retain policy for gluster-block after PVC deletion""" if get_openshift_version() < "3.9": self.skipTest( "'Reclaim' feature is not supported in OCP older than 3.9") self.create_storage_class(reclaim_policy='Retain') self.create_and_wait_for_pvc() dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name) try: pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) finally: scale_dc_pod_amount_and_wait(self.node, dc_name, pod_amount=0) oc_delete(self.node, 'dc', dc_name) # get the name of volume pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"', r':.spec.persistentVolumeReclaimPolicy'] vol_id, reclaim_policy = oc_get_custom_resource( self.node, 'pv', custom, pv_name) # checking the retainPolicy of pvc self.assertEqual(reclaim_policy, 'Retain') # delete the pvc oc_delete(self.node, 'pvc', self.pvc_name) # check if pv is also deleted or not with self.assertRaises(ExecutionError): wait_for_resource_absence( self.node, 'pvc', self.pvc_name, interval=3, timeout=30) # getting the blockvol list blocklist = heketi_blockvolume_list(self.heketi_client_node, self.heketi_server_url) self.assertIn(vol_id, blocklist) heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, vol_id) blocklist = heketi_blockvolume_list(self.heketi_client_node, self.heketi_server_url) self.assertNotIn(vol_id, blocklist) oc_delete(self.node, 'pv', pv_name) wait_for_resource_absence(self.node, 'pv', pv_name)
def test_metrics_during_cassandra_pod_respin(self, motive='delete'): """Validate cassandra pod respin""" old_cassandra_pod, pvc_name, _, _, node = ( self.verify_cassandra_pod_multipath_and_iscsi()) if motive == 'delete': # Delete the cassandra pod oc_delete(self.master, 'pod', old_cassandra_pod) self.addCleanup(self.cassandra_pod_delete_cleanup) elif motive == 'drain': # Get the number of infra nodes infra_node_count_cmd = ( 'oc get nodes ' '--no-headers -l node-role.kubernetes.io/infra=true|wc -l') infra_node_count = command.cmd_run(infra_node_count_cmd, self.master) # Skip test case if number infra nodes are less than #2 if int(infra_node_count) < 2: self.skipTest('Available number of infra nodes "{}", it should' ' be more than 1'.format(infra_node_count)) # Drain the node drain_cmd = ('oc adm drain {} --force=true --ignore-daemonsets ' '--delete-local-data'.format(node)) command.cmd_run(drain_cmd, hostname=self.master) # Cleanup to make node schedulable cmd_schedule = ( 'oc adm manage-node {} --schedulable=true'.format(node)) self.addCleanup(command.cmd_run, cmd_schedule, hostname=self.master) # Wait for pod to get absent wait_for_resource_absence(self.master, 'pod', old_cassandra_pod) # Wait for new pod to come up new_cassandra_pod = get_pod_name_from_rc( self.master, self.metrics_rc_hawkular_cassandra) wait_for_pod_be_ready(self.master, new_cassandra_pod) # Validate iscsi and multipath self.verify_iscsi_sessions_and_multipath( pvc_name, self.metrics_rc_hawkular_cassandra, rtype='rc', heketi_server_url=self.registry_heketi_server_url, is_registry_gluster=True)
def _heketi_pod_delete_cleanup(self): """Cleanup for deletion of heketi pod using force delete""" try: # Fetch heketi pod after delete pod_name = openshift_ops.get_pod_name_from_dc( self.node, self.heketi_dc_name) openshift_ops.wait_for_pod_be_ready(self.node, pod_name, timeout=1) except exceptions.ExecutionError: # Force delete and wait for new pod to come up openshift_ops.oc_delete(self.node, 'pod', pod_name, is_force=True) openshift_ops.wait_for_resource_absence(self.node, 'pod', pod_name) new_pod_name = openshift_ops.get_pod_name_from_dc( self.node, self.heketi_dc_name) openshift_ops.wait_for_pod_be_ready(self.node, new_pod_name)
def test_pv_resize_with_prefix_for_name(self, create_vol_name_prefix=False): """Validate PV resize with and without name prefix""" dir_path = "/mnt/" node = self.ocp_client[0] # Create PVC self.create_storage_class( allow_volume_expansion=True, create_vol_name_prefix=create_vol_name_prefix) pvc_name = self.create_and_wait_for_pvc() # Create DC with POD and attached PVC to it. dc_name = oc_create_app_dc_with_io(node, pvc_name) self.addCleanup(oc_delete, node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) if create_vol_name_prefix: ret = heketi_ops.verify_volume_name_prefix( node, self.sc['volumenameprefix'], self.sc['secretnamespace'], pvc_name, self.heketi_server_url) self.assertTrue(ret, "verify volnameprefix failed") cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "failed to execute command %s on %s" % (cmd, node)) cmd = ("dd if=/dev/urandom of=%sfile2 " "bs=100K count=10000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertNotEqual( ret, 0, " This IO did not fail as expected " "command %s on %s" % (cmd, node)) pvc_size = 2 resize_pvc(node, pvc_name, pvc_size) verify_pvc_size(node, pvc_name, pvc_size) pv_name = get_pv_name_from_pvc(node, pvc_name) verify_pv_size(node, pv_name, pvc_size) oc_delete(node, 'pod', pod_name) wait_for_resource_absence(node, 'pod', pod_name) pod_name = get_pod_name_from_dc(node, dc_name) wait_for_pod_be_ready(node, pod_name) cmd = ("dd if=/dev/urandom of=%sfile_new " "bs=50K count=10000") % dir_path ret, out, err = oc_rsh(node, pod_name, cmd) self.assertEqual(ret, 0, "failed to execute command %s on %s" % (cmd, node))
def _guster_pod_delete_cleanup(self): """Cleanup for deletion of gluster pod using force delete""" try: # Fetch gluster pod after delete pod_name = self._get_gluster_pod() # Check if gluster pod name is ready state openshift_ops.wait_for_pod_be_ready(self.node, pod_name, timeout=1) except exceptions.ExecutionError: # Force delete and wait for new pod to come up openshift_ops.oc_delete(self.node, 'pod', pod_name, is_force=True) openshift_ops.wait_for_resource_absence(self.node, 'pod', pod_name) # Fetch gluster pod after force delete g_new_pod = self._get_gluster_pod() openshift_ops.wait_for_pod_be_ready(self.node, g_new_pod)
def test_verify_metrics_data_during_gluster_pod_respin(self): # Add check for CRS version switch_oc_project(self.master, self.registry_project_name) if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as CRS version check " "can not be implemented") # Verify multipath and iscsi for cassandra pod switch_oc_project(self.master, self.metrics_project_name) hawkular_cassandra, pvc_name, iqn, _, node = ( self.verify_cassandra_pod_multipath_and_iscsi()) # Get the ip of active path device_and_ip = get_iscsi_block_devices_by_path(node, iqn) mpath = get_mpath_name_from_device_name(node, list(device_and_ip.keys())[0]) active_passive_dict = get_active_and_enabled_devices_from_mpath( node, mpath) node_ip = device_and_ip[active_passive_dict['active'][0]] # Get the name of gluster pod from the ip switch_oc_project(self.master, self.registry_project_name) gluster_pods = get_ocp_gluster_pod_details(self.master) pod_name = list( filter(lambda pod: (pod["pod_host_ip"] == node_ip), gluster_pods))[0]["pod_name"] err_msg = "Failed to get the gluster pod name {} with active path" self.assertTrue(pod_name, err_msg.format(pod_name)) # Delete the pod oc_delete(self.master, 'pod', pod_name) wait_for_resource_absence(self.master, 'pod', pod_name) # Wait for new pod to come up pod_count = len(self.registry_servers_info.keys()) selector = "glusterfs-node=pod" wait_for_pods_be_ready(self.master, pod_count, selector) # Validate cassandra pod state, multipath and issci switch_oc_project(self.master, self.metrics_project_name) wait_for_pod_be_ready(self.master, hawkular_cassandra, timeout=2) self.verify_iscsi_sessions_and_multipath( pvc_name, self.metrics_rc_hawkular_cassandra, rtype='rc', heketi_server_url=self.registry_heketi_server_url, is_registry_gluster=True)
def _heketi_pod_delete_cleanup(self): """Cleanup for deletion of heketi pod using force delete""" try: pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) # Check if heketi pod name is ready state wait_for_pod_be_ready(self.ocp_master_node[0], pod_name, timeout=1) except ExecutionError: # Force delete and wait for new pod to come up oc_delete(self.ocp_master_node[0], 'pod', pod_name, is_force=True) wait_for_resource_absence(self.ocp_master_node[0], 'pod', pod_name) # Fetch heketi pod after force delete pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], pod_name)
def test_resping_gluster_pod(self): """Validate gluster pod restart with no disruption to elasticsearch pod """ restart_custom = ":status.containerStatuses[0].restartCount" # Fetch pod and validate iscsi and multipath es_pod, _ = self._get_es_pod_and_verify_iscsi_sessions() # Fetch the restart count for the es pod restart_count_before = openshift_ops.oc_get_custom_resource( self._master, "pod", restart_custom, es_pod)[0] # Switch to gluster project openshift_ops.switch_oc_project(self._master, self._registry_project_name) # Fetch the gluster pod list before g_pod_list_before = [ pod["pod_name"] for pod in openshift_ops.get_ocp_gluster_pod_details(self._master) ] # Respin a gluster pod openshift_ops.oc_delete(self._master, "pod", g_pod_list_before[0]) self.addCleanup(self._guster_pod_delete_cleanup, g_pod_list_before) # Wait for pod to get absent openshift_ops.wait_for_resource_absence(self._master, "pod", g_pod_list_before[0]) # Fetch gluster pod after delete g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before) openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0]) # Switch to logging project openshift_ops.switch_oc_project(self._master, self._logging_project_name) # Fetch the restart count for the es pod restart_count_after = openshift_ops.oc_get_custom_resource( self._master, "pod", restart_custom, es_pod)[0] self.assertEqual( restart_count_before, restart_count_after, "Failed disruption to es pod found expecting restart count before" " {} and after {} for es pod to be equal after gluster pod" " respin".format(restart_count_before, restart_count_after))
def cassandra_pod_delete_cleanup(self, raise_on_error=False): """Cleanup for deletion of cassandra pod using force delete""" switch_oc_project(self.master, self.metrics_project_name) try: # Check if pod is up or ready pod_name = get_pod_name_from_rc(self.master, self.metrics_rc_hawkular_cassandra) wait_for_pod_be_ready(self.master, pod_name, timeout=1) except exceptions.ExecutionError as err: # Force delete and wait for new pod to come up oc_delete(self.master, 'pod', pod_name, is_force=True) wait_for_resource_absence(self.master, 'pod', pod_name) new_pod_name = get_pod_name_from_rc( self.master, self.metrics_rc_hawkular_cassandra) wait_for_pod_be_ready(self.master, new_pod_name) if raise_on_error: raise err
def _guster_pod_delete(self, g_pod_list_before): """Delete the gluster pod using force delete""" openshift_ops.switch_oc_project( self._master, self._registry_project_name) # Fetch newly deployed gluster pod after delete try: pod_name = self._get_newly_deployed_gluster_pod(g_pod_list_before) openshift_ops.wait_for_pod_be_ready( self._master, pod_name[0] if pod_name else g_pod_list_before[0], timeout=120, wait_step=6) except exceptions.ExecutionError: openshift_ops.oc_delete( self._master, 'pod', g_pod_list_before[0], is_force=True) openshift_ops.wait_for_resource_absence( self._master, 'pod', g_pod_list_before[0]) g_new_pod = self._get_newly_deployed_gluster_pod(g_pod_list_before) openshift_ops.wait_for_pod_be_ready(self._master, g_new_pod[0])
def _respin_heketi_pod(self): h_node, h_url = self.heketi_client_node, self.heketi_server_url ocp_node = self.ocp_master_node[0] # get heketi-pod name heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) # delete heketi-pod (it restarts the pod) oc_delete(ocp_node, "pod", heketi_pod_name, collect_logs=self.heketi_logs_before_delete) wait_for_resource_absence(ocp_node, "pod", heketi_pod_name) # get new heketi-pod name heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) wait_for_pod_be_ready(ocp_node, heketi_pod_name) # check heketi server is running err_msg = "Heketi server %s is not alive" % h_url self.assertTrue(hello_heketi(h_node, h_url), err_msg)
def test_dev_path_mapping_heketi_pod_reboot(self): """Validate dev path mapping for heketi pod reboot """ self.node = self.ocp_master_node[0] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Create file volume with app pod and verify IO's # and Compare path, uuid, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Fetch heketi-pod name heketi_pod_name = openshift_ops.get_pod_name_from_dc( self.node, self.heketi_dc_name) # Respin heketi-pod (it restarts the pod) openshift_ops.oc_delete(self.node, "pod", heketi_pod_name, collect_logs=self.heketi_logs_before_delete) self.addCleanup(self._heketi_pod_delete_cleanup) openshift_ops.wait_for_resource_absence(self.node, "pod", heketi_pod_name) # Fetch new heketi-pod name heketi_pod_name = openshift_ops.get_pod_name_from_dc( self.node, self.heketi_dc_name) openshift_ops.wait_for_pod_be_ready(self.node, heketi_pod_name) # Check heketi server is running self.assertTrue(heketi_ops.hello_heketi(h_node, h_url), "Heketi server {} is not alive".format(h_url)) # Check if IO's are running after respin of heketi pod use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))
def test_dynamic_provisioning_glusterfile_heketidown_pvc_delete(self): """Validate deletion of PVC's when heketi is down""" # Create storage class, secret and PVCs self.create_storage_class() self.pvc_name_list = self.create_and_wait_for_pvcs( 1, 'pvc-heketi-down', 3) # remove heketi-pod scale_dc_pod_amount_and_wait(self.ocp_client[0], self.heketi_dc_name, 0, self.storage_project_name) try: # delete pvc for pvc in self.pvc_name_list: oc_delete(self.ocp_client[0], 'pvc', pvc) for pvc in self.pvc_name_list: with self.assertRaises(ExecutionError): wait_for_resource_absence(self.ocp_client[0], 'pvc', pvc, interval=3, timeout=30) finally: # bring back heketi-pod scale_dc_pod_amount_and_wait(self.ocp_client[0], self.heketi_dc_name, 1, self.storage_project_name) # verify PVC's are deleted for pvc in self.pvc_name_list: wait_for_resource_absence(self.ocp_client[0], 'pvc', pvc, interval=1, timeout=120) # create a new PVC self.create_and_wait_for_pvc()
def test_dev_path_mapping_app_pod_with_block_volume_reboot(self): """Validate dev path mapping for app pods with block volume after reboot """ # Create block volume with app pod and verify IO's # and Compare path, uuid, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Delete app pods openshift_ops.oc_delete(self.node, 'pod', pod_name) openshift_ops.wait_for_resource_absence(self.node, 'pod', pod_name) # Wait for the new app pod to come up self.assertTrue(dc_name, "Failed to get the dc name from {}".format(dc_name)) pod_name = openshift_ops.get_pod_name_from_dc(self.node, dc_name) openshift_ops.wait_for_pod_be_ready(self.node, pod_name) # Check if IO's are running after respin of app pod use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))
def test_dynamic_provisioning_glusterblock_heketidown_pvc_delete(self): """Validate PVC deletion when heketi is down""" # Create Secret, SC and PVCs self.create_storage_class() self.pvc_name_list = self.create_and_wait_for_pvcs( 1, 'pvc-heketi-down', 3) # remove heketi-pod scale_dc_pod_amount_and_wait(self.ocp_client[0], self.heketi_dc_name, 0, self.storage_project_name) try: # delete pvc for pvc in self.pvc_name_list: oc_delete(self.ocp_client[0], 'pvc', pvc) for pvc in self.pvc_name_list: with self.assertRaises(ExecutionError): wait_for_resource_absence( self.ocp_client[0], 'pvc', pvc, interval=3, timeout=30) finally: # bring back heketi-pod scale_dc_pod_amount_and_wait(self.ocp_client[0], self.heketi_dc_name, 1, self.storage_project_name) # verify PVC's are deleted for pvc in self.pvc_name_list: wait_for_resource_absence(self.ocp_client[0], 'pvc', pvc, interval=1, timeout=120) # create a new PVC self.create_and_wait_for_pvc()
def test_dynamic_provisioning_glusterfile_reclaim_policy_retain(self): """Validate retain policy for glusterfs after deletion of pvc""" if get_openshift_version() < "3.9": self.skipTest( "'Reclaim' feature is not supported in OCP older than 3.9") self.create_storage_class(reclaim_policy='Retain') self.create_and_wait_for_pvc() # get the name of the volume pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [ r':.metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi\-volume\-id"', r':.spec.persistentVolumeReclaimPolicy' ] vol_id, reclaim_policy = oc_get_custom_resource( self.node, 'pv', custom, pv_name) self.assertEqual(reclaim_policy, 'Retain') # Create DC with POD and attached PVC to it. try: dc_name = oc_create_app_dc_with_io( self.node, self.pvc_name, image=self.io_container_image_cirros) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) finally: scale_dc_pod_amount_and_wait(self.node, dc_name, 0) oc_delete(self.node, 'dc', dc_name) wait_for_resource_absence(self.node, 'pod', pod_name) oc_delete(self.node, 'pvc', self.pvc_name) with self.assertRaises(ExecutionError): wait_for_resource_absence(self.node, 'pvc', self.pvc_name, interval=3, timeout=30) heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, vol_id) vol_list = heketi_volume_list(self.heketi_client_node, self.heketi_server_url) self.assertNotIn(vol_id, vol_list) oc_delete(self.node, 'pv', pv_name) wait_for_resource_absence(self.node, 'pv', pv_name)
def test_dynamic_provisioning_glusterfile_reclaim_policy_retain(self): """Validate retain policy for glusterfs after deletion of pvc""" if get_openshift_version() < "3.9": self.skipTest( "'Reclaim' feature is not supported in OCP older than 3.9") self.create_storage_class(reclaim_policy='Retain') self.create_and_wait_for_pvc() # get the name of the volume pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations.' r'"gluster\.kubernetes\.io\/heketi\-volume\-id"', r':.spec.persistentVolumeReclaimPolicy'] vol_id, reclaim_policy = oc_get_custom_resource( self.node, 'pv', custom, pv_name) self.assertEqual(reclaim_policy, 'Retain') # Create DC with POD and attached PVC to it. try: dc_name = oc_create_app_dc_with_io(self.node, self.pvc_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) finally: scale_dc_pod_amount_and_wait(self.node, dc_name, 0) oc_delete(self.node, 'dc', dc_name) wait_for_resource_absence(self.node, 'pod', pod_name) oc_delete(self.node, 'pvc', self.pvc_name) with self.assertRaises(ExecutionError): wait_for_resource_absence( self.node, 'pvc', self.pvc_name, interval=3, timeout=30) heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, vol_id) vol_list = heketi_volume_list(self.heketi_client_node, self.heketi_server_url) self.assertNotIn(vol_id, vol_list) oc_delete(self.node, 'pv', pv_name) wait_for_resource_absence(self.node, 'pv', pv_name)
def test_respin_es_pod(self, motive): """Validate respin of elastic search pod""" # Get the pod name and PVC name es_pod = openshift_ops.get_pod_name_from_dc(self._master, self._logging_es_dc) pvc_custom = ":.spec.volumes[*].persistentVolumeClaim.claimName" pvc_name = openshift_ops.oc_get_custom_resource( self._master, "pod", pvc_custom, es_pod)[0] # Validate iscsi and multipath _, _, node = self.verify_iscsi_sessions_and_multipath( pvc_name, self._logging_es_dc, heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True) if motive == 'delete': # Delete the es pod self.addCleanup(self._delete_and_wait_for_new_es_pod_to_come_up) openshift_ops.oc_delete(self._master, "pod", es_pod) elif motive == 'drain': # Get the number of infra nodes infra_node_count_cmd = ( 'oc get nodes ' '--no-headers -l node-role.kubernetes.io/infra=true|wc -l') infra_node_count = command.cmd_run(infra_node_count_cmd, self._master) # Skip test case if number infra nodes are less than #2 if int(infra_node_count) < 2: self.skipTest('Available number of infra nodes "{}", it should' ' be more than 1'.format(infra_node_count)) # Cleanup to make node schedulable cmd_schedule = ( 'oc adm manage-node {} --schedulable=true'.format(node)) self.addCleanup(command.cmd_run, cmd_schedule, hostname=self._master) # Drain the node drain_cmd = ('oc adm drain {} --force=true --ignore-daemonsets ' '--delete-local-data'.format(node)) command.cmd_run(drain_cmd, hostname=self._master) # Wait for pod to get absent openshift_ops.wait_for_resource_absence(self._master, "pod", es_pod) # Wait for new pod to come up try: pod_name = openshift_ops.get_pod_name_from_dc( self._master, self._logging_es_dc) openshift_ops.wait_for_pod_be_ready(self._master, pod_name) except exceptions.ExecutionError: self._delete_and_wait_for_new_es_pod_to_come_up() # Validate iscsi and multipath self.verify_iscsi_sessions_and_multipath( pvc_name, self._logging_es_dc, heketi_server_url=self._registry_heketi_server_url, is_registry_gluster=True)
def test_dynamic_provisioning_glusterfile_heketipod_failure(self): """Validate dynamic provisioning for gluster file when heketi pod down """ mount_path = "/mnt" datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id()) # Create secret and storage class sc_name = self.create_storage_class() # Create PVC app_1_pvc_name = self.create_and_wait_for_pvc( pvc_name_prefix="autotest-file", sc_name=sc_name ) # Create app POD with attached volume app_1_pod_name = oc_create_tiny_pod_with_volume( self.node, app_1_pvc_name, "test-pvc-mount-on-app-pod", mount_path=mount_path) self.addCleanup( wait_for_resource_absence, self.node, 'pod', app_1_pod_name) self.addCleanup(oc_delete, self.node, 'pod', app_1_pod_name) # Wait for app POD be up and running wait_for_pod_be_ready( self.node, app_1_pod_name, timeout=60, wait_step=2) # Write data to the app POD write_data_cmd = ( "dd if=/dev/urandom of=%s bs=1K count=100" % datafile_path) ret, out, err = oc_rsh(self.node, app_1_pod_name, write_data_cmd) self.assertEqual( ret, 0, "Failed to execute command %s on %s" % (write_data_cmd, self.node)) # Remove Heketi pod heketi_down_cmd = "oc scale --replicas=0 dc/%s --namespace %s" % ( self.heketi_dc_name, self.storage_project_name) heketi_up_cmd = "oc scale --replicas=1 dc/%s --namespace %s" % ( self.heketi_dc_name, self.storage_project_name) self.addCleanup(self.cmd_run, heketi_up_cmd) heketi_pod_name = get_pod_name_from_dc( self.node, self.heketi_dc_name, timeout=10, wait_step=3) self.cmd_run(heketi_down_cmd) wait_for_resource_absence(self.node, 'pod', heketi_pod_name) app_2_pvc_name = oc_create_pvc( self.node, pvc_name_prefix="autotest-file2", sc_name=sc_name ) self.addCleanup( wait_for_resource_absence, self.node, 'pvc', app_2_pvc_name) self.addCleanup( oc_delete, self.node, 'pvc', app_2_pvc_name, raise_on_absence=False ) # Create second app POD app_2_pod_name = oc_create_tiny_pod_with_volume( self.node, app_2_pvc_name, "test-pvc-mount-on-app-pod", mount_path=mount_path) self.addCleanup( wait_for_resource_absence, self.node, 'pod', app_2_pod_name) self.addCleanup(oc_delete, self.node, 'pod', app_2_pod_name) # Bring Heketi POD back self.cmd_run(heketi_up_cmd) # Wait for Heketi POD be up and running new_heketi_pod_name = get_pod_name_from_dc( self.node, self.heketi_dc_name, timeout=10, wait_step=2) wait_for_pod_be_ready( self.node, new_heketi_pod_name, wait_step=5, timeout=120) # Wait for second PVC and app POD be ready verify_pvc_status_is_bound(self.node, app_2_pvc_name) wait_for_pod_be_ready( self.node, app_2_pod_name, timeout=60, wait_step=2) # Verify that we are able to write data ret, out, err = oc_rsh(self.node, app_2_pod_name, write_data_cmd) self.assertEqual( ret, 0, "Failed to execute command %s on %s" % (write_data_cmd, self.node))
def _pv_resize(self, exceed_free_space): dir_path = "/mnt" pvc_size_gb, min_free_space_gb = 1, 3 # Get available free space disabling redundant devices and nodes heketi_url = self.heketi_server_url node_id_list = heketi_ops.heketi_node_list( self.heketi_client_node, heketi_url) self.assertTrue(node_id_list) nodes = {} min_free_space = min_free_space_gb * 1024**2 for node_id in node_id_list: node_info = heketi_ops.heketi_node_info( self.heketi_client_node, heketi_url, node_id, json=True) if (node_info['state'].lower() != 'online' or not node_info['devices']): continue if len(nodes) > 2: out = heketi_ops.heketi_node_disable( self.heketi_client_node, heketi_url, node_id) self.assertTrue(out) self.addCleanup( heketi_ops.heketi_node_enable, self.heketi_client_node, heketi_url, node_id) for device in node_info['devices']: if device['state'].lower() != 'online': continue free_space = device['storage']['free'] if (node_id in nodes.keys() or free_space < min_free_space): out = heketi_ops.heketi_device_disable( self.heketi_client_node, heketi_url, device['id']) self.assertTrue(out) self.addCleanup( heketi_ops.heketi_device_enable, self.heketi_client_node, heketi_url, device['id']) continue nodes[node_id] = free_space if len(nodes) < 3: raise self.skipTest( "Could not find 3 online nodes with, " "at least, 1 online device having free space " "bigger than %dGb." % min_free_space_gb) # Calculate maximum available size for PVC available_size_gb = int(min(nodes.values()) / (1024**2)) # Create PVC self.create_storage_class(allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb) # Create DC with POD and attached PVC to it dc_name = oc_create_app_dc_with_io(self.node, pvc_name) self.addCleanup(oc_delete, self.node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) if exceed_free_space: # Try to expand existing PVC exceeding free space resize_pvc(self.node, pvc_name, available_size_gb) wait_for_events(self.node, obj_name=pvc_name, event_reason='VolumeResizeFailed') # Check that app POD is up and runnig then try to write data wait_for_pod_be_ready(self.node, pod_name) cmd = ( "dd if=/dev/urandom of=%s/autotest bs=100K count=1" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to write data after failed attempt to expand PVC.") else: # Expand existing PVC using all the available free space expand_size_gb = available_size_gb - pvc_size_gb resize_pvc(self.node, pvc_name, expand_size_gb) verify_pvc_size(self.node, pvc_name, expand_size_gb) pv_name = get_pv_name_from_pvc(self.node, pvc_name) verify_pv_size(self.node, pv_name, expand_size_gb) wait_for_events( self.node, obj_name=pvc_name, event_reason='VolumeResizeSuccessful') # Recreate app POD oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # Write data on the expanded PVC cmd = ("dd if=/dev/urandom of=%s/autotest " "bs=1M count=1025" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to write data on the expanded PVC")
def test_create_delete_pvcs_to_make_gluster_reuse_released_space(self): """Validate reuse of volume space after deletion of PVCs""" min_storage_gb = 10 # Set arbiter:disabled tags to the first 2 nodes data_nodes = [] biggest_disks = [] self.assertGreater(len(self.node_id_list), 2) for node_id in self.node_id_list[0:2]: node_info = heketi_ops.heketi_node_info( self.heketi_client_node, self.heketi_server_url, node_id, json=True) biggest_disk_free_space = 0 for device in node_info['devices']: disk_free_space = int(device['storage']['free']) if disk_free_space < (min_storage_gb * 1024**2): self.skipTest( "Devices are expected to have more than " "%sGb of free space" % min_storage_gb) if disk_free_space > biggest_disk_free_space: biggest_disk_free_space = disk_free_space self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'device', device['id'], 'disabled', revert_to=device.get('tags', {}).get('arbiter')) biggest_disks.append(biggest_disk_free_space) self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'node', node_id, 'disabled', revert_to=node_info.get('tags', {}).get('arbiter')) data_nodes.append(node_info) # Set arbiter:required tag to all other nodes and their devices arbiter_nodes = [] for node_id in self.node_id_list[2:]: node_info = heketi_ops.heketi_node_info( self.heketi_client_node, self.heketi_server_url, node_id, json=True) for device in node_info['devices']: self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'device', device['id'], 'required', revert_to=device.get('tags', {}).get('arbiter')) self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'node', node_id, 'required', revert_to=node_info.get('tags', {}).get('arbiter')) arbiter_nodes.append(node_info) # Calculate size and amount of volumes to be created pvc_size = int(min(biggest_disks) / 1024**2) pvc_amount = max([len(n['devices']) for n in data_nodes]) + 1 # Create sc with gluster arbiter info self.create_storage_class(is_arbiter_vol=True) # Create and delete 3 small volumes concurrently pvc_names = [] for i in range(3): pvc_name = oc_create_pvc( self.node, self.sc_name, pvc_name_prefix='arbiter-pvc', pvc_size=int(pvc_size / 3)) pvc_names.append(pvc_name) exception_exists = False for pvc_name in pvc_names: try: verify_pvc_status_is_bound(self.node, pvc_name) except Exception: for pvc_name in pvc_names: self.addCleanup( wait_for_resource_absence, self.node, 'pvc', pvc_name) for pvc_name in pvc_names: self.addCleanup(oc_delete, self.node, 'pvc', pvc_name) exception_exists = True if exception_exists: raise for pvc_name in pvc_names: oc_delete(self.node, 'pvc', pvc_name) for pvc_name in pvc_names: wait_for_resource_absence(self.node, 'pvc', pvc_name) # Create and delete big volumes in a loop for i in range(pvc_amount): pvc_name = oc_create_pvc( self.node, self.sc_name, pvc_name_prefix='arbiter-pvc', pvc_size=pvc_size) try: verify_pvc_status_is_bound(self.node, pvc_name) except Exception: self.addCleanup( wait_for_resource_absence, self.node, 'pvc', pvc_name) self.addCleanup(oc_delete, self.node, 'pvc', pvc_name) raise oc_delete(self.node, 'pvc', pvc_name) wait_for_resource_absence(self.node, 'pvc', pvc_name)
def test_dynamic_provisioning_glusterblock_heketipod_failure(self): """Validate PVC with glusterblock creation when heketi pod is down""" datafile_path = '/mnt/fake_file_for_%s' % self.id() # Create DC with attached PVC sc_name = self.create_storage_class() app_1_pvc_name = self.create_and_wait_for_pvc( pvc_name_prefix='autotest-block', sc_name=sc_name) app_1_dc_name, app_1_pod_name = self.create_dc_with_pvc(app_1_pvc_name) # Write test data write_data_cmd = ( "dd if=/dev/urandom of=%s bs=1K count=100" % datafile_path) ret, out, err = oc_rsh(self.node, app_1_pod_name, write_data_cmd) self.assertEqual( ret, 0, "Failed to execute command %s on %s" % (write_data_cmd, self.node)) # Remove Heketi pod heketi_down_cmd = "oc scale --replicas=0 dc/%s --namespace %s" % ( self.heketi_dc_name, self.storage_project_name) heketi_up_cmd = "oc scale --replicas=1 dc/%s --namespace %s" % ( self.heketi_dc_name, self.storage_project_name) self.addCleanup(self.cmd_run, heketi_up_cmd) heketi_pod_name = get_pod_name_from_dc( self.node, self.heketi_dc_name, timeout=10, wait_step=3) self.cmd_run(heketi_down_cmd) wait_for_resource_absence(self.node, 'pod', heketi_pod_name) # Create second PVC app_2_pvc_name = oc_create_pvc( self.node, pvc_name_prefix='autotest-block2', sc_name=sc_name ) self.addCleanup( wait_for_resource_absence, self.node, 'pvc', app_2_pvc_name) self.addCleanup( oc_delete, self.node, 'pvc', app_2_pvc_name ) # Create second app POD app_2_dc_name = oc_create_app_dc_with_io(self.node, app_2_pvc_name) self.addCleanup(oc_delete, self.node, 'dc', app_2_dc_name) self.addCleanup( scale_dc_pod_amount_and_wait, self.node, app_2_dc_name, 0) app_2_pod_name = get_pod_name_from_dc(self.node, app_2_dc_name) # Bring Heketi pod back self.cmd_run(heketi_up_cmd) # Wait for Heketi POD be up and running new_heketi_pod_name = get_pod_name_from_dc( self.node, self.heketi_dc_name, timeout=10, wait_step=2) wait_for_pod_be_ready( self.node, new_heketi_pod_name, wait_step=5, timeout=120) # Wait for second PVC and app POD be ready verify_pvc_status_is_bound(self.node, app_2_pvc_name) wait_for_pod_be_ready( self.node, app_2_pod_name, timeout=150, wait_step=3) # Verify that we are able to write data ret, out, err = oc_rsh(self.node, app_2_pod_name, write_data_cmd) self.assertEqual( ret, 0, "Failed to execute command %s on %s" % (write_data_cmd, self.node))
def initiator_side_failures(self): # get storage ips of glusterfs pods keys = self.gluster_servers gluster_ips = [] for key in keys: gluster_ips.append(self.gluster_servers_info[key]['storage']) gluster_ips.sort() self.create_storage_class() self.create_and_wait_for_pvc() # find iqn and hacount from volume info pv_name = get_pv_name_from_pvc(self.node, self.pvc_name) custom = [r':.metadata.annotations."gluster\.org\/volume\-id"'] vol_id = oc_get_custom_resource(self.node, 'pv', custom, pv_name)[0] vol_info = heketi_blockvolume_info( self.heketi_client_node, self.heketi_server_url, vol_id, json=True) iqn = vol_info['blockvolume']['iqn'] hacount = int(self.sc['hacount']) # create app pod dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name) # When we have to verify iscsi login devices & mpaths, we run it twice for i in range(2): # get node hostname from pod info pod_info = oc_get_pods( self.node, selector='deploymentconfig=%s' % dc_name) node = pod_info[pod_name]['node'] # get the iscsi sessions info from the node iscsi = get_iscsi_session(node, iqn) self.assertEqual(hacount, len(iscsi)) iscsi.sort() self.assertEqual(set(iscsi), (set(gluster_ips) & set(iscsi))) # get the paths info from the node devices = get_iscsi_block_devices_by_path(node, iqn).keys() self.assertEqual(hacount, len(devices)) # get mpath names and verify that only one mpath is there mpaths = set() for device in devices: mpaths.add(get_mpath_name_from_device_name(node, device)) self.assertEqual(1, len(mpaths)) validate_multipath_pod( self.node, pod_name, hacount, mpath=list(mpaths)[0]) # When we have to verify iscsi session logout, we run only once if i == 1: break # make node unschedulabe where pod is running oc_adm_manage_node( self.node, '--schedulable=false', nodes=[node]) # make node schedulabe where pod is running self.addCleanup( oc_adm_manage_node, self.node, '--schedulable=true', nodes=[node]) # delete pod so it get respun on any other node oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) # wait for pod to come up pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # get the iscsi session from the previous node to verify logout iscsi = get_iscsi_session(node, iqn, raise_on_error=False) self.assertFalse(iscsi)