def test_brick_evict_on_three_node_with_one_down(self): """Test brick evict basic functionality and verify brick evict will fail after node down if nodes are three""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable node if more than 3 node_list = heketi_ops.heketi_node_list(h_node, h_server) if len(node_list) > 3: for node_id in node_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_server, node_id) # Create heketi volume vol_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, vol_info.get('id')) # Get node on which heketi pod is scheduled heketi_pod = openshift_ops.get_pod_name_from_dc( self.ocp_client, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] # Get list of hostname from node id host_list = [] for node_id in node_list[3:]: node_info = heketi_ops.heketi_node_info(h_node, h_server, node_id, json=True) host_list.append(node_info.get('hostnames').get('manage')[0]) # Get brick id and glusterfs node which is not heketi node for node in vol_info.get('bricks', {}): node_info = heketi_ops.heketi_node_info(h_node, h_server, node.get('node'), json=True) hostname = node_info.get('hostnames').get('manage')[0] if (hostname != heketi_node) and (hostname not in host_list): brick_id = node.get('id') break self._power_off_node_and_wait_node_to_be_not_ready(hostname) # Perform brick evict operation try: heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) except AssertionError as e: if ('No Replacement was found' not in six.text_type(e)): raise
def delete_node_and_devices_on_it(self, node_id): heketi_ops.heketi_node_disable(self.h_node, self.h_url, node_id) heketi_ops.heketi_node_remove(self.h_node, self.h_url, node_id) node_info = heketi_ops.heketi_node_info( self.h_node, self.h_url, node_id, json=True) for device in node_info['devices']: heketi_ops.heketi_device_delete( self.h_node, self.h_url, device['id']) heketi_ops.heketi_node_delete(self.h_node, self.h_url, node_id)
def get_ready_for_node_add(self, hostname): self.configure_node_to_run_gluster(hostname) h_nodes = heketi_ops.heketi_node_list(self.h_node, self.h_url) # Disable nodes except first two nodes for node_id in h_nodes[2:]: heketi_ops.heketi_node_disable(self.h_node, self.h_url, node_id) self.addCleanup(heketi_ops.heketi_node_enable, self.h_node, self.h_url, node_id)
def test_heketi_node_states_enable_disable(self): """Test node enable and disable functionality """ h_client, h_server = self.heketi_client_node, self.heketi_server_url node_list = heketi_ops.heketi_node_list(h_client, h_server) online_hosts = [] for node_id in node_list: node_info = heketi_ops.heketi_node_info(h_client, h_server, node_id, json=True) if node_info["state"] == "online": online_hosts.append(node_info) if len(online_hosts) < 3: raise self.skipTest( "This test can run only if online hosts are more than 2") # Disable n-3 nodes, in case we have n nodes for node_info in online_hosts[3:]: node_id = node_info["id"] heketi_ops.heketi_node_disable(h_client, h_server, node_id) self.addCleanup(heketi_ops.heketi_node_enable, h_client, h_server, node_id) # Create volume when 3 nodes are online vol_size = 1 vol_info = heketi_ops.heketi_volume_create(h_client, h_server, vol_size, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_server, vol_info['id']) node_id = online_hosts[0]['id'] try: heketi_ops.heketi_node_disable(h_client, h_server, node_id) # Try to create a volume, volume creation should fail with self.assertRaises(AssertionError): heketi_volume = heketi_ops.heketi_volume_create( h_client, h_server, vol_size) self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_server, heketi_volume["id"]) finally: # Enable heketi node heketi_ops.heketi_node_enable(h_client, h_server, node_id) # Create volume when heketi node is enabled vol_info = heketi_ops.heketi_volume_create(h_client, h_server, vol_size, json=True) heketi_ops.heketi_volume_delete(h_client, h_server, vol_info['id'])
def test_brick_multiplex_pids_with_diff_vol_option_values(self): """Test Brick Pid's should be same when values of vol options are diff """ h_client, h_url = self.heketi_client_node, self.heketi_server_url # Disable heketi nodes except first three nodes h_nodes_list = heketi_node_list(h_client, h_url) for node_id in h_nodes_list[3:]: heketi_node_disable(h_client, h_url, node_id) self.addCleanup(heketi_node_enable, h_client, h_url, node_id) # Create storage class with diff volumeoptions sc1 = self.create_storage_class(volumeoptions='user.heketi.abc 1') sc2 = self.create_storage_class(volumeoptions='user.heketi.abc 2') # Create PVC's with above SC pvc1 = self.create_and_wait_for_pvcs(sc_name=sc1) pvc2 = self.create_and_wait_for_pvcs(sc_name=sc2) # Get vol info and status vol_info1 = get_gluster_vol_info_by_pvc_name(self.node, pvc1[0]) vol_info2 = get_gluster_vol_info_by_pvc_name(self.node, pvc2[0]) vol_status1 = get_gluster_vol_status(vol_info1['gluster_vol_id']) vol_status2 = get_gluster_vol_status(vol_info2['gluster_vol_id']) # Verify vol options err_msg = ('Volume option "user.heketi.abc %s" did not got match for ' 'volume %s in gluster vol info') self.assertEqual( vol_info1['options']['user.heketi.abc'], '1', err_msg % (1, vol_info1['gluster_vol_id'])) self.assertEqual( vol_info2['options']['user.heketi.abc'], '2', err_msg % (2, vol_info2['gluster_vol_id'])) # Get the PID's and match them pids1 = set() for brick in vol_info1['bricks']['brick']: host, bname = brick['name'].split(":") pids1.add(vol_status1[host][bname]['pid']) pids2 = set() for brick in vol_info2['bricks']['brick']: host, bname = brick['name'].split(":") pids2.add(vol_status2[host][bname]['pid']) err_msg = ('Pids of both the volumes %s and %s are expected to be' 'same. But got the different Pids "%s" and "%s".' % (vol_info1['gluster_vol_id'], vol_info2['gluster_vol_id'], pids1, pids2)) self.assertEqual(pids1, pids2, err_msg)
def get_online_nodes_disable_redundant(self): """ Find online nodes and disable n-3 nodes and return list of online nodes """ node_list = heketi_node_list(self.heketi_client_node, self.heketi_server_url) self.assertTrue(node_list, "Failed to list heketi nodes") g.log.info("Successfully got the list of nodes") # Fetch online nodes from node list online_hosts = [] for node in node_list: node_info = heketi_node_info( self.heketi_client_node, self.heketi_server_url, node, json=True) if node_info["state"] == "online": online_hosts.append(node_info) # Skip test if online node count is less than 3i if len(online_hosts) < 3: raise self.skipTest( "This test can run only if online hosts are more than 2") # if we have n nodes, disable n-3 nodes for node_info in online_hosts[3:]: node_id = node_info["id"] g.log.info("going to disable node id %s", node_id) heketi_node_disable(self.heketi_client_node, self.heketi_server_url, node_id) self.addCleanup(heketi_node_enable, self.heketi_client_node, self.heketi_server_url, node_id) for host in online_hosts[1:3]: found_online = False for device in host["devices"]: if device["state"].strip().lower() == "online": found_online = True break if not found_online: self.skipTest(("no device online on node %s" % host["id"])) return online_hosts
def disable_node(self, node_id): """ Disable node through heketi-cli. :param node_id: str node ID """ out = heketi_node_disable(self.heketi_client_node, self.heketi_server_url, node_id) self.assertNotEqual(out, False, "Failed to disable node of" " id %s" % node_id)
def setUp(self): super(TestDevPathMapping, self).setUp() self.node = self.ocp_master_node[0] self.h_node, self.h_server = (self.heketi_client_node, self.heketi_server_url) h_nodes_list = heketi_ops.heketi_node_list(self.h_node, self.h_server) h_node_count = len(h_nodes_list) if h_node_count < 3: self.skipTest( "At least 3 nodes are required, found {}".format(h_node_count)) # Disable 4th and other nodes for node_id in h_nodes_list[3:]: self.addCleanup(heketi_ops.heketi_node_enable, self.h_node, self.h_server, node_id) heketi_ops.heketi_node_disable(self.h_node, self.h_server, node_id) h_info = heketi_ops.heketi_node_info(self.h_node, self.h_server, h_nodes_list[0], json=True) self.assertTrue( h_info, "Failed to get the heketi node info for node id" " {}".format(h_nodes_list[0])) self.node_ip = h_info['hostnames']['storage'][0] self.node_hostname = h_info["hostnames"]["manage"][0] self.vm_name = node_ops.find_vm_name_by_ip_or_hostname( self.node_hostname) self.devices_list = [device['name'] for device in h_info["devices"]] # Get list of additional devices for one of the Gluster nodes for gluster_server in list(g.config["gluster_servers"].values()): if gluster_server['storage'] == self.node_ip: additional_device = gluster_server.get("additional_devices") if additional_device: self.devices_list.extend(additional_device) # sort the devices list self.devices_list.sort()
def disable_node(self, node_id): """ Disable node through heketi-cli. :param node_id: str node ID """ out = heketi_node_disable(self.heketi_client_node, self.heketi_server_url, node_id) self.assertNotEqual(out, False, "Failed to disable node of" " id %s" % node_id)
def _get_vol_size(self): # Get available free space disabling redundant nodes min_free_space_gb = 5 heketi_url = self.heketi_server_url node_ids = heketi_node_list(self.heketi_client_node, heketi_url) self.assertTrue(node_ids) nodes = {} min_free_space = min_free_space_gb * 1024**2 for node_id in node_ids: node_info = heketi_node_info(self.heketi_client_node, heketi_url, node_id, json=True) if (node_info['state'].lower() != 'online' or not node_info['devices']): continue if len(nodes) > 2: out = heketi_node_disable(self.heketi_client_node, heketi_url, node_id) self.assertTrue(out) self.addCleanup(heketi_node_enable, self.heketi_client_node, heketi_url, node_id) for device in node_info['devices']: if device['state'].lower() != 'online': continue free_space = device['storage']['free'] if free_space < min_free_space: out = heketi_device_disable(self.heketi_client_node, heketi_url, device['id']) self.assertTrue(out) self.addCleanup(heketi_device_enable, self.heketi_client_node, heketi_url, device['id']) continue if node_id not in nodes: nodes[node_id] = [] nodes[node_id].append(device['storage']['free']) # Skip test if nodes requirements are not met if (len(nodes) < 3 or not all(map( (lambda _list: len(_list) > 1), nodes.values()))): raise self.skipTest("Could not find 3 online nodes with, " "at least, 2 online devices having free space " "bigger than %dGb." % min_free_space_gb) # Calculate size of a potential distributed vol vol_size_gb = int(min(map(max, nodes.values())) / (1024**2)) + 1 return vol_size_gb
def _available_disk_free_space(self): min_free_space_gb = 3 # Get available free space disabling redundant devices and nodes heketi_url = self.heketi_server_url node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node, heketi_url) self.assertTrue(node_id_list) nodes = {} min_free_space = min_free_space_gb * 1024**2 for node_id in node_id_list: node_info = heketi_ops.heketi_node_info(self.heketi_client_node, heketi_url, node_id, json=True) if (node_info['state'].lower() != 'online' or not node_info['devices']): continue if len(nodes) > 2: self.addCleanup(heketi_ops.heketi_node_enable, self.heketi_client_node, heketi_url, node_id) out = heketi_ops.heketi_node_disable(self.heketi_client_node, heketi_url, node_id) self.assertTrue(out) for device in node_info['devices']: if device['state'].lower() != 'online': continue free_space = device['storage']['free'] if (node_id in nodes.keys() or free_space < min_free_space): out = heketi_ops.heketi_device_disable( self.heketi_client_node, heketi_url, device['id']) self.assertTrue(out) self.addCleanup(heketi_ops.heketi_device_enable, self.heketi_client_node, heketi_url, device['id']) continue nodes[node_id] = free_space if len(nodes) < 3: raise self.skipTest("Could not find 3 online nodes with, " "at least, 1 online device having free space " "bigger than %dGb." % min_free_space_gb) # Calculate maximum available size for PVC available_size_gb = int(min(nodes.values()) / (1024**2)) return available_size_gb
def test_create_volumes_enabling_and_disabling_heketi_devices(self): """Validate enable/disable of heketi device""" # Get nodes info node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node, self.heketi_server_url) node_info_list = [] for node_id in node_id_list[0:3]: node_info = heketi_ops.heketi_node_info(self.heketi_client_node, self.heketi_server_url, node_id, json=True) node_info_list.append(node_info) # Disable 4th and other nodes if len(node_id_list) > 3: for node_id in node_id_list[3:]: heketi_ops.heketi_node_disable(self.heketi_client_node, self.heketi_server_url, node_id) self.addCleanup(heketi_ops.heketi_node_enable, self.heketi_client_node, self.heketi_server_url, node_id) # Disable second and other devices on the first 3 nodes for node_info in node_info_list[0:3]: devices = node_info["devices"] self.assertTrue( devices, "Node '%s' does not have devices." % node_info["id"]) if devices[0]["state"].strip().lower() != "online": self.skipTest("Test expects first device to be enabled.") if len(devices) < 2: continue for device in node_info["devices"][1:]: out = heketi_ops.heketi_device_disable(self.heketi_client_node, self.heketi_server_url, device["id"]) self.assertTrue( out, "Failed to disable the device %s" % device["id"]) self.addCleanup(heketi_ops.heketi_device_enable, self.heketi_client_node, self.heketi_server_url, device["id"]) # Create heketi volume out = heketi_ops.heketi_volume_create(self.heketi_client_node, self.heketi_server_url, 1, json=True) self.assertTrue(out, "Failed to create heketi volume of size 1") g.log.info("Successfully created heketi volume of size 1") device_id = out["bricks"][0]["device"] self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, out["bricks"][0]["volume"]) # Disable device g.log.info("Disabling '%s' device" % device_id) out = heketi_ops.heketi_device_disable(self.heketi_client_node, self.heketi_server_url, device_id) self.assertTrue(out, "Failed to disable the device %s" % device_id) g.log.info("Successfully disabled device %s" % device_id) try: # Get device info g.log.info("Retrieving '%s' device info" % device_id) out = heketi_ops.heketi_device_info(self.heketi_client_node, self.heketi_server_url, device_id, json=True) self.assertTrue(out, "Failed to get device info %s" % device_id) g.log.info("Successfully retrieved device info %s" % device_id) name = out["name"] self.assertEqual(out["state"].lower().strip(), "offline", "Device %s is not in offline state." % name) g.log.info("Device %s is now offine" % name) # Try to create heketi volume g.log.info("Creating heketi volume: Expected to fail.") try: out = heketi_ops.heketi_volume_create(self.heketi_client_node, self.heketi_server_url, 1, json=True) except AssertionError: g.log.info("Volume was not created as expected.") else: self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, out["bricks"][0]["volume"]) msg = "Volume unexpectedly created. Out: %s" % out assert False, msg finally: # Enable the device back g.log.info("Enable '%s' device back." % device_id) out = heketi_ops.heketi_device_enable(self.heketi_client_node, self.heketi_server_url, device_id) self.assertTrue(out, "Failed to enable the device %s" % device_id) g.log.info("Successfully enabled device %s" % device_id) # Get device info out = heketi_ops.heketi_device_info(self.heketi_client_node, self.heketi_server_url, device_id, json=True) self.assertTrue(out, ("Failed to get device info %s" % device_id)) g.log.info("Successfully retrieved device info %s" % device_id) name = out["name"] self.assertEqual(out["state"], "online", "Device %s is not in online state." % name) # Create heketi volume of size out = heketi_ops.heketi_volume_create(self.heketi_client_node, self.heketi_server_url, 1, json=True) self.assertTrue(out, "Failed to create volume of size 1") self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, out["bricks"][0]["volume"]) g.log.info("Successfully created volume of size 1") name = out["name"] # Get gluster volume info vol_info = get_volume_info('auto_get_gluster_endpoint', volname=name) self.assertTrue(vol_info, "Failed to get '%s' volume info." % name) g.log.info("Successfully got the '%s' volume info." % name)
def test_heketi_manual_cleanup_operation_in_bhv(self): """Validate heketi db cleanup will resolve the mismatch in the free size of the block hosting volume with failed block device create operations. """ bhv_size_before, bhv_size_after, vol_count = [], [], 5 ocp_node, g_node = self.ocp_master_node[0], self.gluster_servers[0] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Get existing heketi volume list existing_volumes = heketi_volume_list(h_node, h_url, json=True) # Add function to clean stale volumes created during test self.addCleanup(self._cleanup_heketi_volumes, existing_volumes.get("volumes")) # Get nodes id list node_id_list = heketi_node_list(h_node, h_url) # Disable 4th and other nodes for node_id in node_id_list[3:]: heketi_node_disable(h_node, h_url, node_id) self.addCleanup(heketi_node_enable, h_node, h_url, node_id) # Calculate heketi volume size free_space, nodenum = get_total_free_space(h_node, h_url) free_space_available = int(free_space / nodenum) if free_space_available > vol_count: h_volume_size = int(free_space_available / vol_count) if h_volume_size > 50: h_volume_size = 50 else: h_volume_size, vol_count = 1, free_space_available # Create BHV in case blockvolume size is greater than default BHV size default_bhv_size = get_default_block_hosting_volume_size( h_node, self.heketi_dc_name) if default_bhv_size < h_volume_size: h_volume_name = "autotest-{}".format(utils.get_random_str()) bhv_info = self.create_heketi_volume_with_name_and_wait( h_volume_name, free_space_available, raise_on_cleanup_error=False, block=True, json=True) free_space_available -= ( int(bhv_info.get("blockinfo").get("reservedsize")) + 1) h_volume_size = int(free_space_available / vol_count) # Get BHV list h_bhv_list = get_block_hosting_volume_list(h_node, h_url).keys() self.assertTrue(h_bhv_list, "Failed to get the BHV list") # Get BHV size for bhv in h_bhv_list: vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bhv_vol_size_before = vol_info.get("freesize") bhv_size_before.append(bhv_vol_size_before) # Kill Tcmu-runner service services = ("tcmu-runner", "gluster-block-target", "gluster-blockd") kill_service_on_gluster_pod_or_node(ocp_node, "tcmu-runner", g_node) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') self.addCleanup(wait_for_service_status_on_gluster_pod_or_node, ocp_node, service, 'active', state, g_node) self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node, service, g_node) def run_async(cmd, hostname, raise_on_error=True): return g.run_async(host=hostname, command=cmd) # Create stale block volumes in async for count in range(vol_count): with mock.patch.object(json, 'loads', side_effect=(lambda j: j)): with mock.patch.object(command, 'cmd_run', side_effect=run_async): heketi_blockvolume_create(h_node, h_url, h_volume_size, json=True) # Wait for pending operation to get generated self._check_for_pending_operations(h_node, h_url) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') restart_service_on_gluster_pod_or_node(ocp_node, service, g_node) wait_for_service_status_on_gluster_pod_or_node( ocp_node, service, 'active', state, g_node) # Cleanup pending operation heketi_server_operation_cleanup(h_node, h_url) # wait for pending operation to get cleaned up for w in waiter.Waiter(timeout=120, interval=10): # Get BHV size for bhv in h_bhv_list: vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bhv_vol_size_after = vol_info.get("freesize") bhv_size_after.append(bhv_vol_size_after) if (set(bhv_size_before) == set(bhv_size_after)): break if w.expired: raise exceptions.ExecutionError( "Failed to Validate volume size Actual:{}," " Expected:{}".format(set(bhv_size_before), set(bhv_size_after)))
def test_brick_evict_with_node_down(self): """Test brick evict basic functionality and verify brick evict after node down""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable node if more than 3 node_list = heketi_ops.heketi_node_list(h_node, h_server) if len(node_list) > 3: for node_id in node_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_server, node_id) # Create heketi volume vol_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, vol_info.get('id')) # Get node on which heketi pod is scheduled heketi_pod = openshift_ops.get_pod_name_from_dc( self.ocp_client, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] # Get list of hostname from node id host_list = [] for node_id in node_list[3:]: node_info = heketi_ops.heketi_node_info(h_node, h_server, node_id, json=True) host_list.append(node_info.get('hostnames').get('manage')[0]) # Get brick id and glusterfs node which is not heketi node for node in vol_info.get('bricks', {}): node_info = heketi_ops.heketi_node_info(h_node, h_server, node.get('node'), json=True) hostname = node_info.get('hostnames').get('manage')[0] if (hostname != heketi_node) and (hostname not in host_list): brick_id = node.get('id') break # Bring down the glusterfs node vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname) self.addCleanup(node_ops.power_on_vm_by_name, vm_name) node_ops.power_off_vm_by_name(vm_name) # Wait glusterfs node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 20): status = openshift_ops.oc_get_custom_resource( self.ocp_client, 'node', custom, hostname) if status[0] in ['False', 'Unknown']: break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(hostname)) # Perform brick evict operation try: heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) except AssertionError as e: if ('No Replacement was found' not in six.text_type(e)): raise
def test_targetcli_failure_during_block_pvc_creation(self): h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable redundant nodes and leave just 3 nodes online h_node_id_list = heketi_node_list(h_node, h_server) self.assertGreater(len(h_node_id_list), 2) for node_id in h_node_id_list[3:]: heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_node_enable, h_node, h_server, node_id) # Gather info about the Gluster node we are going to use for killing # targetcli processes. chosen_g_node_id = h_node_id_list[0] chosen_g_node_info = heketi_node_info(h_node, h_server, chosen_g_node_id, json=True) chosen_g_node_ip = chosen_g_node_info['hostnames']['storage'][0] chosen_g_node_hostname = chosen_g_node_info['hostnames']['manage'][0] chosen_g_node_ip_and_hostname = set( (chosen_g_node_ip, chosen_g_node_hostname)) g_pods = oc_get_custom_resource( self.node, 'pod', [ ':.metadata.name', ':.status.hostIP', ':.status.podIP', ':.spec.nodeName' ], selector='glusterfs-node=pod') if g_pods and g_pods[0]: for g_pod in g_pods: if chosen_g_node_ip_and_hostname.intersection(set(g_pod[1:])): host_to_run_cmds = self.node g_pod_prefix, g_pod = 'oc exec %s -- ' % g_pod[0], g_pod[0] break else: err_msg = ( 'Failed to find Gluster pod filtering it by following IPs ' 'and hostnames: %s\nFound following Gluster pods: %s') % ( chosen_g_node_ip_and_hostname, g_pods) g.log.error(err_msg) raise AssertionError(err_msg) else: host_to_run_cmds, g_pod_prefix, g_pod = chosen_g_node_ip, '', '' # Schedule deletion of targetcli process file_for_bkp, pvc_number = "~/.targetcli/prefs.bin", 10 self.cmd_run("%scp %s %s_backup" % (g_pod_prefix, file_for_bkp, file_for_bkp), hostname=host_to_run_cmds) self.addCleanup(self.cmd_run, "%srm -f %s_backup" % (g_pod_prefix, file_for_bkp), hostname=host_to_run_cmds) kill_targetcli_services_cmd = ( "while true; do " " %spkill targetcli || echo 'failed to kill targetcli process'; " "done" % g_pod_prefix) loop_for_killing_targetcli_process = g.run_async( host_to_run_cmds, kill_targetcli_services_cmd, "root") try: # Create bunch of PVCs sc_name, pvc_names = self.create_storage_class(), [] for i in range(pvc_number): pvc_names.append(oc_create_pvc(self.node, sc_name, pvc_size=1)) self.addCleanup(wait_for_resources_absence, self.node, 'pvc', pvc_names) self.addCleanup(oc_delete, self.node, 'pvc', ' '.join(pvc_names)) # Check that we get expected number of provisioning errors timeout, wait_step, succeeded_pvcs, failed_pvcs = 120, 1, [], [] _waiter, err_msg = Waiter(timeout=timeout, interval=wait_step), "" for pvc_name in pvc_names: _waiter._attempt = 0 for w in _waiter: events = get_events(self.node, pvc_name, obj_type="PersistentVolumeClaim") for event in events: if event['reason'] == 'ProvisioningSucceeded': succeeded_pvcs.append(pvc_name) break elif event['reason'] == 'ProvisioningFailed': failed_pvcs.append(pvc_name) break else: continue break if w.expired: err_msg = ( "Failed to get neither 'ProvisioningSucceeded' nor " "'ProvisioningFailed' statuses for all the PVCs in " "time. Timeout was %ss, interval was %ss." % (timeout, wait_step)) g.log.error(err_msg) raise AssertionError(err_msg) self.assertGreater(len(failed_pvcs), len(succeeded_pvcs)) finally: # Restore targetcli workability loop_for_killing_targetcli_process._proc.terminate() # Revert breakage back which can be caused by BZ-1769426 check_bkp_file_size_cmd = ("%sls -lah %s | awk '{print $5}'" % (g_pod_prefix, file_for_bkp)) bkp_file_size = self.cmd_run(check_bkp_file_size_cmd, hostname=host_to_run_cmds).strip() if bkp_file_size == "0": self.cmd_run("%smv %s_backup %s" % (g_pod_prefix, file_for_bkp, file_for_bkp), hostname=host_to_run_cmds) breakage_err_msg = ( "File located at '%s' was corrupted (zero size) on the " "%s. Looks like BZ-1769426 took effect. \n" "Don't worry, it has been restored after test failure." % (file_for_bkp, "'%s' Gluster pod" % g_pod if g_pod else "'%s' Gluster node" % chosen_g_node_ip)) g.log.error(breakage_err_msg) if err_msg: breakage_err_msg = "%s\n%s" % (err_msg, breakage_err_msg) raise AssertionError(breakage_err_msg) # Wait for all the PVCs to be in bound state wait_for_pvcs_be_bound(self.node, pvc_names, timeout=300, wait_step=5)
def test_volume_expansion_no_free_space(self): """Validate volume expansion when there is no free space""" vol_size, expand_size, additional_devices_attached = None, 10, {} h_node, h_server_url = self.heketi_client_node, self.heketi_server_url # Get nodes info heketi_node_id_list = heketi_ops.heketi_node_list(h_node, h_server_url) if len(heketi_node_id_list) < 3: self.skipTest("3 Heketi nodes are required.") # Disable 4th and other nodes for node_id in heketi_node_id_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server_url, node_id) self.addCleanup( heketi_ops.heketi_node_enable, h_node, h_server_url, node_id) # Prepare first 3 nodes smallest_size = None err_msg = '' for node_id in heketi_node_id_list[0:3]: node_info = heketi_ops.heketi_node_info( h_node, h_server_url, node_id, json=True) # Disable second and other devices devices = node_info["devices"] self.assertTrue( devices, "Node '%s' does not have devices." % node_id) if devices[0]["state"].strip().lower() != "online": self.skipTest("Test expects first device to be enabled.") if (smallest_size is None or devices[0]["storage"]["free"] < smallest_size): smallest_size = devices[0]["storage"]["free"] for device in node_info["devices"][1:]: heketi_ops.heketi_device_disable( h_node, h_server_url, device["id"]) self.addCleanup( heketi_ops.heketi_device_enable, h_node, h_server_url, device["id"]) # Gather info about additional devices additional_device_name = None for gluster_server in self.gluster_servers: gluster_server_data = self.gluster_servers_info[gluster_server] g_manage = gluster_server_data["manage"] g_storage = gluster_server_data["storage"] if not (g_manage in node_info["hostnames"]["manage"] or g_storage in node_info["hostnames"]["storage"]): continue additional_device_name = (( gluster_server_data.get("additional_devices") or [''])[0]) break if not additional_device_name: err_msg += ("No 'additional_devices' are configured for " "'%s' node, which has following hostnames and " "IP addresses: %s.\n" % ( node_id, ', '.join( node_info["hostnames"]["manage"] + node_info["hostnames"]["storage"]))) continue heketi_ops.heketi_device_add( h_node, h_server_url, additional_device_name, node_id) additional_devices_attached.update( {node_id: additional_device_name}) # Schedule cleanup of the added devices for node_id in additional_devices_attached.keys(): node_info = heketi_ops.heketi_node_info( h_node, h_server_url, node_id, json=True) for device in node_info["devices"]: if device["name"] != additional_devices_attached[node_id]: continue self.addCleanup(self.detach_devices_attached, device["id"]) break else: self.fail("Could not find ID for added device on " "'%s' node." % node_id) if err_msg: self.skipTest(err_msg) # Temporary disable new devices self.disable_devices(additional_devices_attached) # Create volume and save info about it vol_size = int(smallest_size / (1024**2)) - 1 creation_info = heketi_ops.heketi_volume_create( h_node, h_server_url, vol_size, json=True) volume_name, volume_id = creation_info["name"], creation_info["id"] self.addCleanup( heketi_ops.heketi_volume_delete, h_node, h_server_url, volume_id, raise_on_error=False) volume_info_before_expansion = heketi_ops.heketi_volume_info( h_node, h_server_url, volume_id, json=True) num_of_bricks_before_expansion = self.get_num_of_bricks(volume_name) self.get_brick_and_volume_status(volume_name) free_space_before_expansion = self.get_devices_summary_free_space() # Try to expand volume with not enough device space self.assertRaises( AssertionError, heketi_ops.heketi_volume_expand, h_node, h_server_url, volume_id, expand_size) # Enable new devices to be able to expand our volume self.enable_devices(additional_devices_attached) # Expand volume and validate results heketi_ops.heketi_volume_expand( h_node, h_server_url, volume_id, expand_size, json=True) free_space_after_expansion = self.get_devices_summary_free_space() self.assertGreater( free_space_before_expansion, free_space_after_expansion, "Free space not consumed after expansion of %s" % volume_id) num_of_bricks_after_expansion = self.get_num_of_bricks(volume_name) self.get_brick_and_volume_status(volume_name) volume_info_after_expansion = heketi_ops.heketi_volume_info( h_node, h_server_url, volume_id, json=True) self.assertGreater( volume_info_after_expansion["size"], volume_info_before_expansion["size"], "Size of %s not increased" % volume_id) self.assertGreater( num_of_bricks_after_expansion, num_of_bricks_before_expansion) self.assertEqual( num_of_bricks_after_expansion % num_of_bricks_before_expansion, 0) # Delete volume and validate release of the used space heketi_ops.heketi_volume_delete(h_node, h_server_url, volume_id) free_space_after_deletion = self.get_devices_summary_free_space() self.assertGreater( free_space_after_deletion, free_space_after_expansion, "Free space not reclaimed after deletion of volume %s" % volume_id)
def _pv_resize(self, exceed_free_space): dir_path = "/mnt" pvc_size_gb, min_free_space_gb = 1, 3 # Get available free space disabling redundant devices and nodes heketi_url = self.heketi_server_url node_id_list = heketi_ops.heketi_node_list( self.heketi_client_node, heketi_url) self.assertTrue(node_id_list) nodes = {} min_free_space = min_free_space_gb * 1024**2 for node_id in node_id_list: node_info = heketi_ops.heketi_node_info( self.heketi_client_node, heketi_url, node_id, json=True) if (node_info['state'].lower() != 'online' or not node_info['devices']): continue if len(nodes) > 2: out = heketi_ops.heketi_node_disable( self.heketi_client_node, heketi_url, node_id) self.assertTrue(out) self.addCleanup( heketi_ops.heketi_node_enable, self.heketi_client_node, heketi_url, node_id) for device in node_info['devices']: if device['state'].lower() != 'online': continue free_space = device['storage']['free'] if (node_id in nodes.keys() or free_space < min_free_space): out = heketi_ops.heketi_device_disable( self.heketi_client_node, heketi_url, device['id']) self.assertTrue(out) self.addCleanup( heketi_ops.heketi_device_enable, self.heketi_client_node, heketi_url, device['id']) continue nodes[node_id] = free_space if len(nodes) < 3: raise self.skipTest( "Could not find 3 online nodes with, " "at least, 1 online device having free space " "bigger than %dGb." % min_free_space_gb) # Calculate maximum available size for PVC available_size_gb = int(min(nodes.values()) / (1024**2)) # Create PVC self.create_storage_class(allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb) # Create DC with POD and attached PVC to it dc_name = oc_create_app_dc_with_io(self.node, pvc_name) self.addCleanup(oc_delete, self.node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) if exceed_free_space: # Try to expand existing PVC exceeding free space resize_pvc(self.node, pvc_name, available_size_gb) wait_for_events(self.node, obj_name=pvc_name, event_reason='VolumeResizeFailed') # Check that app POD is up and runnig then try to write data wait_for_pod_be_ready(self.node, pod_name) cmd = ( "dd if=/dev/urandom of=%s/autotest bs=100K count=1" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to write data after failed attempt to expand PVC.") else: # Expand existing PVC using all the available free space expand_size_gb = available_size_gb - pvc_size_gb resize_pvc(self.node, pvc_name, expand_size_gb) verify_pvc_size(self.node, pvc_name, expand_size_gb) pv_name = get_pv_name_from_pvc(self.node, pvc_name) verify_pv_size(self.node, pv_name, expand_size_gb) wait_for_events( self.node, obj_name=pvc_name, event_reason='VolumeResizeSuccessful') # Recreate app POD oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # Write data on the expanded PVC cmd = ("dd if=/dev/urandom of=%s/autotest " "bs=1M count=1025" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to write data on the expanded PVC")
def test_heketi_device_replacement_in_node(self): """Validate device replacement operation on single node""" h_client, h_server = self.heketi_client_node, self.heketi_server_url try: gluster_server_0 = list(g.config["gluster_servers"].values())[0] manage_hostname = gluster_server_0["manage"] add_device_name = gluster_server_0["additional_devices"][0] except (KeyError, IndexError): self.skipTest( "Additional disk is not specified for node with following " "hostnames and IP addresses: {}, {}".format( gluster_server_0.get('manage', '?'), gluster_server_0.get('storage', '?'))) # Get existing heketi volume list existing_volumes = heketi_volume_list(h_client, h_server, json=True) # Add cleanup function to clean stale volumes created during test self.addCleanup(self._cleanup_heketi_volumes, existing_volumes.get("volumes")) # Get nodes info node_id_list = heketi_node_list(h_client, h_server) # Disable 4th and other nodes if len(node_id_list) > 3: for node_id in node_id_list[3:]: heketi_node_disable(h_client, h_server, node_id) self.addCleanup(heketi_node_enable, h_client, h_server, node_id) # Create volume when 3 nodes are online vol_size, vol_count = 2, 4 for _ in range(vol_count): vol_info = heketi_blockvolume_create(h_client, h_server, vol_size, json=True) self.addCleanup(heketi_blockvolume_delete, h_client, h_server, vol_info['id']) # Get node ID of the Gluster hostname topology_info = heketi_topology_info(h_client, h_server, json=True) self.assertIsNotNone(topology_info, "Failed to get topology info") self.assertIn("clusters", topology_info.keys(), "Failed to get cluster " "details from topology info") node_list = topology_info["clusters"][0]["nodes"] self.assertTrue(node_list, "Cluster info command returned empty list of nodes") node_id = None for node in node_list: if manage_hostname == node['hostnames']["manage"][0]: node_id = node["id"] break self.assertTrue( node_id, "Failed to get node info for node id '{}'".format( manage_hostname)) # Add extra device, then remember it's ID and size device_id_new, device_size_new = self._add_heketi_device( add_device_name, node_id) # Remove one of the existing devices on node except new device device_name, device_id = None, None node_info_after_addition = heketi_node_info(h_client, h_server, node_id, json=True) for device in node_info_after_addition["devices"]: if (device["name"] != add_device_name and device["storage"]["total"] == device_size_new): device_name = device["name"] device_id = device["id"] break self.assertIsNotNone(device_name, "Failed to get device name") self.assertIsNotNone(device_id, "Failed to get device id") self.addCleanup(heketi_device_enable, h_client, h_server, device_id, raise_on_error=False) self.addCleanup(heketi_device_add, h_client, h_server, device_name, node_id, raise_on_error=False) heketi_device_disable(h_client, h_server, device_id) heketi_device_remove(h_client, h_server, device_id) heketi_device_delete(h_client, h_server, device_id)
def test_heketi_metrics_validation_after_node(self, condition): """Validate heketi metrics after adding and remove node""" # Get additional node additional_host_info = g.config.get("additional_gluster_servers") if not additional_host_info: self.skipTest( "Skipping this test case as additional gluster server is " "not provied in config file") additional_host_info = list(additional_host_info.values())[0] storage_hostname = additional_host_info.get("manage") storage_ip = additional_host_info.get("storage") if not (storage_hostname and storage_ip): self.skipTest( "Config options 'additional_gluster_servers.manage' " "and 'additional_gluster_servers.storage' must be set.") h_client, h_server = self.heketi_client_node, self.heketi_server_url initial_node_count, final_node_count = 0, 0 # Get initial node count from prometheus metrics metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') initial_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) # Switch to storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Configure node before adding node self.configure_node_to_run_gluster(storage_hostname) # Get cluster list cluster_info = heketi_ops.heketi_cluster_list( h_client, h_server, json=True) # Add node to the cluster heketi_node_info = heketi_ops.heketi_node_add( h_client, h_server, len(self.gluster_servers), cluster_info.get('clusters')[0], storage_hostname, storage_ip, json=True) heketi_node_id = heketi_node_info.get("id") self.addCleanup( heketi_ops.heketi_node_delete, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_remove, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_disable, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( openshift_ops.switch_oc_project, self._master, self.storage_project_name) if condition == 'delete': # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get initial node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if node_count != initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to get updated node details from prometheus") # Remove node from cluster heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id) heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id) for device in heketi_node_info.get('devices'): heketi_ops.heketi_device_delete( h_client, h_server, device.get('id')) heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id) # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get final node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') final_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if condition == 'delete': if final_node_count < node_count: break else: if final_node_count > initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to update node details in prometheus")
def test_check_node_disable_based_on_heketi_zone( self, zone_count, is_disable_on_different_zone, is_set_env=False): """Validate node disable in different heketi zones""" expected_node_count, heketi_zone_checking, sc_name = 4, "strict", None # Check amount of available online nodes online_node_count = len(self._get_online_nodes()) if online_node_count < expected_node_count: self.skipTest('Available node count {} is less than expected node ' 'count {}'.format(online_node_count, expected_node_count)) # Check amount of available online heketi zones self._check_for_available_zones(zone_count) # Get the online devices and nodes w.r.t. to zone zone_devices_nodes = self._get_online_devices_and_nodes_with_zone() # Create sc or else directly set env to "strict" inside dc is_create_sc = not is_set_env if is_create_sc: sc_name = self.create_storage_class( sc_name_prefix=self.prefix, vol_name_prefix=self.prefix, heketi_zone_checking=heketi_zone_checking) if is_set_env: self._set_zone_check_env_in_heketi_dc(heketi_zone_checking) # Choose a zone and node_id to disable the device for zone, nodes_and_devices in zone_devices_nodes.items(): if zone_count == 3: # Select a node with a zone having multiple nodes in same # zone to cover the test cases disable node in same zone if len(nodes_and_devices['nodes']) > 1: zone_with_disabled_node = zone disabled_node = nodes_and_devices['nodes'][0] break else: # Select node from any of the zones zone_with_disabled_node = zone disabled_node = nodes_and_devices['nodes'][0] break # Disable the selected node heketi_ops.heketi_node_disable(self.h_client, self.h_server, disabled_node) self.addCleanup(heketi_ops.heketi_node_enable, self.h_client, self.h_server, disabled_node) # Create some DCs with PVCs and check brick placement in heketi zones pod_names = self._create_dcs_and_check_brick_placement( self.prefix, sc_name, heketi_zone_checking, zone_count) # Enable disabled node heketi_ops.heketi_node_enable(self.h_client, self.h_server, disabled_node) if is_disable_on_different_zone: # Select the new node in a different zone for zone, nodes_and_devices in zone_devices_nodes.items(): if zone != zone_with_disabled_node: new_node_to_disable = nodes_and_devices['nodes'][0] break else: # Select the new node in the same zone new_node_to_disable = zone_devices_nodes[zone_with_disabled_node][ 'nodes'][1] # Disable the newly selected node heketi_ops.heketi_node_disable(self.h_client, self.h_server, new_node_to_disable) self.addCleanup(heketi_ops.heketi_node_enable, self.h_client, self.h_server, new_node_to_disable) # Verify if pods are in ready state for pod_name in pod_names: openshift_ops.wait_for_pod_be_ready(self.node, pod_name, timeout=5, wait_step=2)
def test_volume_creation_no_free_devices(self): """Validate heketi error is returned when no free devices available""" node, server_url = self.heketi_client_node, self.heketi_server_url # Get nodes info node_id_list = heketi_ops.heketi_node_list(node, server_url) node_info_list = [] for node_id in node_id_list[0:3]: node_info = heketi_ops.heketi_node_info(node, server_url, node_id, json=True) node_info_list.append(node_info) # Disable 4th and other nodes for node_id in node_id_list[3:]: heketi_ops.heketi_node_disable(node, server_url, node_id) self.addCleanup(heketi_ops.heketi_node_enable, node, server_url, node_id) # Disable second and other devices on the first 3 nodes for node_info in node_info_list[0:3]: devices = node_info["devices"] self.assertTrue( devices, "Node '%s' does not have devices." % node_info["id"]) if devices[0]["state"].strip().lower() != "online": self.skipTest("Test expects first device to be enabled.") if len(devices) < 2: continue for device in node_info["devices"][1:]: out = heketi_ops.heketi_device_disable(node, server_url, device["id"]) self.assertTrue( out, "Failed to disable the device %s" % device["id"]) self.addCleanup(heketi_ops.heketi_device_enable, node, server_url, device["id"]) # Calculate common available space available_spaces = [ int(node_info["devices"][0]["storage"]["free"]) for n in node_info_list[0:3] ] min_space_gb = int(min(available_spaces) / 1024**2) self.assertGreater(min_space_gb, 3, "Not enough available free space.") # Create first small volume vol = heketi_ops.heketi_volume_create(node, server_url, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol["id"]) # Try to create second volume getting "no free space" error try: vol_fail = heketi_ops.heketi_volume_create(node, server_url, min_space_gb, json=True) except AssertionError: g.log.info("Volume was not created as expected.") else: self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_fail["bricks"][0]["volume"]) self.assertFalse( vol_fail, "Volume should have not been created. Out: %s" % vol_fail)
def _pv_resize(self, exceed_free_space): dir_path = "/mnt" pvc_size_gb, min_free_space_gb = 1, 3 # Get available free space disabling redundant devices and nodes heketi_url = self.heketi_server_url node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node, heketi_url) self.assertTrue(node_id_list) nodes = {} min_free_space = min_free_space_gb * 1024**2 for node_id in node_id_list: node_info = heketi_ops.heketi_node_info(self.heketi_client_node, heketi_url, node_id, json=True) if (node_info['state'].lower() != 'online' or not node_info['devices']): continue if len(nodes) > 2: out = heketi_ops.heketi_node_disable(self.heketi_client_node, heketi_url, node_id) self.assertTrue(out) self.addCleanup(heketi_ops.heketi_node_enable, self.heketi_client_node, heketi_url, node_id) for device in node_info['devices']: if device['state'].lower() != 'online': continue free_space = device['storage']['free'] if (node_id in nodes.keys() or free_space < min_free_space): out = heketi_ops.heketi_device_disable( self.heketi_client_node, heketi_url, device['id']) self.assertTrue(out) self.addCleanup(heketi_ops.heketi_device_enable, self.heketi_client_node, heketi_url, device['id']) continue nodes[node_id] = free_space if len(nodes) < 3: raise self.skipTest("Could not find 3 online nodes with, " "at least, 1 online device having free space " "bigger than %dGb." % min_free_space_gb) # Calculate maximum available size for PVC available_size_gb = int(min(nodes.values()) / (1024**2)) # Create PVC self.create_storage_class(allow_volume_expansion=True) pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb) # Create DC with POD and attached PVC to it dc_name = oc_create_app_dc_with_io(self.node, pvc_name) self.addCleanup(oc_delete, self.node, 'dc', dc_name) self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) if exceed_free_space: # Try to expand existing PVC exceeding free space resize_pvc(self.node, pvc_name, available_size_gb) wait_for_events(self.node, obj_name=pvc_name, event_reason='VolumeResizeFailed') # Check that app POD is up and runnig then try to write data wait_for_pod_be_ready(self.node, pod_name) cmd = ("dd if=/dev/urandom of=%s/autotest bs=100K count=1" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual( ret, 0, "Failed to write data after failed attempt to expand PVC.") else: # Expand existing PVC using all the available free space expand_size_gb = available_size_gb - pvc_size_gb resize_pvc(self.node, pvc_name, expand_size_gb) verify_pvc_size(self.node, pvc_name, expand_size_gb) pv_name = get_pv_name_from_pvc(self.node, pvc_name) verify_pv_size(self.node, pv_name, expand_size_gb) wait_for_events(self.node, obj_name=pvc_name, event_reason='VolumeResizeSuccessful') # Recreate app POD oc_delete(self.node, 'pod', pod_name) wait_for_resource_absence(self.node, 'pod', pod_name) pod_name = get_pod_name_from_dc(self.node, dc_name) wait_for_pod_be_ready(self.node, pod_name) # Write data on the expanded PVC cmd = ("dd if=/dev/urandom of=%s/autotest " "bs=1M count=1025" % dir_path) ret, out, err = oc_rsh(self.node, pod_name, cmd) self.assertEqual(ret, 0, "Failed to write data on the expanded PVC")
def test_volume_expansion_no_free_space(self): """Validate volume expansion when there is no free space""" vol_size, expand_size, additional_devices_attached = None, 10, {} h_node, h_server_url = self.heketi_client_node, self.heketi_server_url # Get nodes info heketi_node_id_list = heketi_ops.heketi_node_list(h_node, h_server_url) if len(heketi_node_id_list) < 3: self.skipTest("3 Heketi nodes are required.") # Disable 4th and other nodes for node_id in heketi_node_id_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server_url, node_id) self.addCleanup( heketi_ops.heketi_node_enable, h_node, h_server_url, node_id) # Prepare first 3 nodes smallest_size = None err_msg = '' for node_id in heketi_node_id_list[0:3]: node_info = heketi_ops.heketi_node_info( h_node, h_server_url, node_id, json=True) # Disable second and other devices devices = node_info["devices"] self.assertTrue( devices, "Node '%s' does not have devices." % node_id) if devices[0]["state"].strip().lower() != "online": self.skipTest("Test expects first device to be enabled.") if (smallest_size is None or devices[0]["storage"]["free"] < smallest_size): smallest_size = devices[0]["storage"]["free"] for device in node_info["devices"][1:]: heketi_ops.heketi_device_disable( h_node, h_server_url, device["id"]) self.addCleanup( heketi_ops.heketi_device_enable, h_node, h_server_url, device["id"]) # Gather info about additional devices additional_device_name = None for gluster_server in self.gluster_servers: gluster_server_data = self.gluster_servers_info[gluster_server] g_manage = gluster_server_data["manage"] g_storage = gluster_server_data["storage"] if not (g_manage in node_info["hostnames"]["manage"] or g_storage in node_info["hostnames"]["storage"]): continue additional_device_name = (( gluster_server_data.get("additional_devices") or [''])[0]) break if not additional_device_name: err_msg += ("No 'additional_devices' are configured for " "'%s' node, which has following hostnames and " "IP addresses: %s.\n" % ( node_id, ', '.join(node_info["hostnames"]["manage"] + node_info["hostnames"]["storage"]))) continue heketi_ops.heketi_device_add( h_node, h_server_url, additional_device_name, node_id) additional_devices_attached.update( {node_id: additional_device_name}) # Schedule cleanup of the added devices for node_id in additional_devices_attached.keys(): node_info = heketi_ops.heketi_node_info( h_node, h_server_url, node_id, json=True) for device in node_info["devices"]: if device["name"] != additional_devices_attached[node_id]: continue self.addCleanup(self.detach_devices_attached, device["id"]) break else: self.fail("Could not find ID for added device on " "'%s' node." % node_id) if err_msg: self.skipTest(err_msg) # Temporary disable new devices self.disable_devices(additional_devices_attached) # Create volume and save info about it vol_size = int(smallest_size / (1024**2)) - 1 creation_info = heketi_ops.heketi_volume_create( h_node, h_server_url, vol_size, json=True) volume_name, volume_id = creation_info["name"], creation_info["id"] self.addCleanup( heketi_ops.heketi_volume_delete, h_node, h_server_url, volume_id, raise_on_error=False) volume_info_before_expansion = heketi_ops.heketi_volume_info( h_node, h_server_url, volume_id, json=True) num_of_bricks_before_expansion = self.get_num_of_bricks(volume_name) self.get_brick_and_volume_status(volume_name) free_space_before_expansion = self.get_devices_summary_free_space() # Try to expand volume with not enough device space self.assertRaises( ExecutionError, heketi_ops.heketi_volume_expand, h_node, h_server_url, volume_id, expand_size) # Enable new devices to be able to expand our volume self.enable_devices(additional_devices_attached) # Expand volume and validate results heketi_ops.heketi_volume_expand( h_node, h_server_url, volume_id, expand_size, json=True) free_space_after_expansion = self.get_devices_summary_free_space() self.assertGreater( free_space_before_expansion, free_space_after_expansion, "Free space not consumed after expansion of %s" % volume_id) num_of_bricks_after_expansion = self.get_num_of_bricks(volume_name) self.get_brick_and_volume_status(volume_name) volume_info_after_expansion = heketi_ops.heketi_volume_info( h_node, h_server_url, volume_id, json=True) self.assertGreater( volume_info_after_expansion["size"], volume_info_before_expansion["size"], "Size of %s not increased" % volume_id) self.assertGreater( num_of_bricks_after_expansion, num_of_bricks_before_expansion) self.assertEqual( num_of_bricks_after_expansion % num_of_bricks_before_expansion, 0) # Delete volume and validate release of the used space heketi_ops.heketi_volume_delete(h_node, h_server_url, volume_id) free_space_after_deletion = self.get_devices_summary_free_space() self.assertGreater( free_space_after_deletion, free_space_after_expansion, "Free space not reclaimed after deletion of volume %s" % volume_id)
def test_heketi_with_device_removal_insuff_space(self): """Validate heketi with device removal insufficient space""" # Disable 4+ nodes and 3+ devices on the first 3 nodes min_free_space_gb = 5 min_free_space = min_free_space_gb * 1024**2 heketi_url = self.heketi_server_url heketi_node = self.heketi_client_node nodes = {} node_ids = heketi_node_list(heketi_node, heketi_url) self.assertTrue(node_ids) for node_id in node_ids: node_info = heketi_node_info(heketi_node, heketi_url, node_id, json=True) if (node_info["state"].lower() != "online" or not node_info["devices"]): continue if len(nodes) > 2: heketi_node_disable(heketi_node, heketi_url, node_id) self.addCleanup(heketi_node_enable, heketi_node, heketi_url, node_id) continue for device in node_info["devices"]: if device["state"].lower() != "online": continue free_space = device["storage"]["free"] if node_id not in nodes: nodes[node_id] = [] if (free_space < min_free_space or len(nodes[node_id]) > 1): heketi_device_disable(heketi_node, heketi_url, device["id"]) self.addCleanup(heketi_device_enable, heketi_node, heketi_url, device["id"]) continue nodes[node_id].append({ "device_id": device["id"], "free": free_space }) # Skip test if nodes requirements are not met if (len(nodes) < 3 or not all(map( (lambda _list: len(_list) > 1), nodes.values()))): raise self.skipTest( "Could not find 3 online nodes with 2 online devices " "having free space bigger than %dGb." % min_free_space_gb) # Calculate size of a potential distributed vol if nodes[node_ids[0]][0]["free"] > nodes[node_ids[0]][1]["free"]: index = 0 else: index = 1 vol_size_gb = int(nodes[node_ids[0]][index]["free"] / (1024**2)) + 1 device_id = nodes[node_ids[0]][index]["device_id"] # Create volume with such size that we consume space more than # size of smaller disks try: heketi_vol = heketi_volume_create(heketi_node, heketi_url, vol_size_gb, json=True) except Exception as e: g.log.warning( "Got following error trying to create '%s'Gb vol: %s" % (vol_size_gb, e)) vol_size_gb -= 1 heketi_vol = heketi_volume_create(heketi_node, heketi_url, vol_size_gb, json=True) self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, heketi_vol["bricks"][0]["volume"]) # Try to 'remove' bigger Heketi disk expecting error, # because there is no space on smaller disk to relocate bricks to heketi_device_disable(heketi_node, heketi_url, device_id) self.addCleanup(heketi_device_enable, heketi_node, heketi_url, device_id) try: self.assertRaises(ExecutionError, heketi_device_remove, heketi_node, heketi_url, device_id) except Exception: self.addCleanup(heketi_device_disable, heketi_node, heketi_url, device_id) raise
def test_dev_path_mapping_heketi_device_delete(self): """Validate dev path mapping for heketi device delete lifecycle""" h_client, h_url = self.heketi_client_node, self.heketi_server_url node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertTrue(node_ids, "Failed to get heketi node list") # Fetch #4th node for the operations h_disable_node = node_ids[3] # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volume with app pod and verify IO's # and compare path, UUID, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Fetch bricks on the devices after volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the node {}".format(h_node)) # Enable the #4th node heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_disable_node, json=True) h_node_id = node_info['id'] self.assertEqual(node_info['state'], "online", "Failed to enable node {}".format(h_disable_node)) # Fetch device list i.e to be deleted h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) devices_list = [[device['id'], device['name']] for device in h_node_info['devices']] # Device deletion operation for device in devices_list: device_id, device_name = device[0], device[1] self.addCleanup(heketi_ops.heketi_device_enable, h_client, h_url, device_id, raise_on_error=False) # Disable device from heketi device_disable = heketi_ops.heketi_device_disable( h_client, h_url, device_id) self.assertTrue( device_disable, "Device {} could not be disabled".format(device_id)) device_info = heketi_ops.heketi_device_info(h_client, h_url, device_id, json=True) self.assertEqual(device_info['state'], "offline", "Failed to disable device {}".format(device_id)) # Remove device from heketi device_remove = heketi_ops.heketi_device_remove( h_client, h_url, device_id) self.assertTrue(device_remove, "Device {} could not be removed".format(device_id)) # Bricks after device removal device_info = heketi_ops.heketi_device_info(h_client, h_url, device_id, json=True) bricks_count_after = len(device_info['bricks']) self.assertFalse( bricks_count_after, "Failed to remove the bricks from the device {}".format( device_id)) # Delete device from heketi self.addCleanup(heketi_ops.heketi_device_add, h_client, h_url, device_name, h_node, raise_on_error=False) device_delete = heketi_ops.heketi_device_delete( h_client, h_url, device_id) self.assertTrue(device_delete, "Device {} could not be deleted".format(device_id)) # Check if IO's are running after device is deleted use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Add device operations for device in devices_list: device_name = device[1] # Add device back to the node heketi_ops.heketi_device_add(h_client, h_url, device_name, h_node) # Fetch device info after device add node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) device_id = None for device in node_info["devices"]: if device["name"] == device_name: device_id = device["id"] break self.assertTrue( device_id, "Failed to add device {} on node" " {}".format(device_name, h_node)) # Disable the #4th node heketi_ops.heketi_node_disable(h_client, h_url, h_node_id) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) self.assertEqual(node_info['state'], "offline", "Failed to disable node {}".format(h_node_id)) pvc_amount, pvc_size = 5, 1 # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volumes pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size, pvc_amount=pvc_amount) self.assertEqual(len(pvc_name), pvc_amount, "Failed to create {} pvc".format(pvc_amount)) # Fetch bricks on the devices after volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the node {}".format(h_node)) # Check if IO's are running after new device is added use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))
def test_dev_path_mapping_heketi_node_delete(self): """Validate dev path mapping for heketi node deletion lifecycle""" h_client, h_url = self.heketi_client_node, self.heketi_server_url node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertTrue(node_ids, "Failed to get heketi node list") # Fetch #4th node for the operations h_disable_node = node_ids[3] # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volume with app pod and verify IO's # and compare path, UUID, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Fetch bricks on the devices after volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the node {}".format(h_node)) self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_url, h_disable_node) # Enable the #4th node heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_disable_node, json=True) h_node_id = node_info['id'] self.assertEqual(node_info['state'], "online", "Failed to enable node {}".format(h_disable_node)) # Disable the node and check for brick migrations self.addCleanup(heketi_ops.heketi_node_enable, h_client, h_url, h_node, raise_on_error=False) heketi_ops.heketi_node_disable(h_client, h_url, h_node) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) self.assertEqual(node_info['state'], "offline", "Failed to disable node {}".format(h_node)) # Before bricks migration h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) # Bricks before migration on the node i.e to be deleted bricks_counts_before = 0 for device in h_node_info['devices']: bricks_counts_before += (len(device['bricks'])) # Remove the node heketi_ops.heketi_node_remove(h_client, h_url, h_node) # After bricks migration h_node_info_after = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) # Bricks after migration on the node i.e to be delete bricks_counts = 0 for device in h_node_info_after['devices']: bricks_counts += (len(device['bricks'])) self.assertFalse( bricks_counts, "Failed to remove all the bricks from node {}".format(h_node)) # Old node which is to deleted, new node were bricks resides old_node, new_node = h_node, h_node_id # Node info for the new node were brick reside after migration h_node_info_new = heketi_ops.heketi_node_info(h_client, h_url, new_node, json=True) bricks_counts_after = 0 for device in h_node_info_new['devices']: bricks_counts_after += (len(device['bricks'])) self.assertEqual( bricks_counts_before, bricks_counts_after, "Failed to migrated bricks from {} node to {}".format( old_node, new_node)) # Fetch device list i.e to be deleted h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) devices_list = [[device['id'], device['name']] for device in h_node_info['devices']] for device in devices_list: device_id = device[0] device_name = device[1] self.addCleanup(heketi_ops.heketi_device_add, h_client, h_url, device_name, h_node, raise_on_error=False) # Device deletion from heketi node device_delete = heketi_ops.heketi_device_delete( h_client, h_url, device_id) self.assertTrue(device_delete, "Failed to delete the device {}".format(device_id)) node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node, json=True) cluster_id = node_info['cluster'] zone = node_info['zone'] storage_hostname = node_info['hostnames']['manage'][0] storage_ip = node_info['hostnames']['storage'][0] # Delete the node self.addCleanup(heketi_ops.heketi_node_add, h_client, h_url, zone, cluster_id, storage_hostname, storage_ip, raise_on_error=False) heketi_ops.heketi_node_delete(h_client, h_url, h_node) # Verify if the node is deleted node_ids = heketi_ops.heketi_node_list(h_client, h_url) self.assertNotIn(old_node, node_ids, "Failed to delete the node {}".format(old_node)) # Check if IO's are running use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name)) # Adding node back h_node_info = heketi_ops.heketi_node_add(h_client, h_url, zone, cluster_id, storage_hostname, storage_ip, json=True) self.assertTrue( h_node_info, "Failed to add the node in the cluster {}".format(cluster_id)) h_node_id = h_node_info["id"] # Adding devices to the new node for device in devices_list: storage_device = device[1] # Add device to the new heketi node heketi_ops.heketi_device_add(h_client, h_url, storage_device, h_node_id) heketi_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) device_id = None for device in heketi_node_info["devices"]: if device["name"] == storage_device: device_id = device["id"] break self.assertTrue( device_id, "Failed to add device {} on node {}".format( storage_device, h_node_id)) # Create n pvc in order to verfiy if the bricks reside on the new node pvc_amount, pvc_size = 5, 1 # Fetch bricks on the devices before volume create h_node_details_before, h_node = self._get_bricks_and_device_details() # Bricks count on the node before pvc creation brick_count_before = [count[1] for count in h_node_details_before] # Create file volumes pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size, pvc_amount=pvc_amount) self.assertEqual(len(pvc_name), pvc_amount, "Failed to create {} pvc".format(pvc_amount)) # Fetch bricks on the devices before volume create h_node_details_after, h_node = self._get_bricks_and_device_details() # Bricks count on the node after pvc creation brick_count_after = [count[1] for count in h_node_details_after] self.assertGreater( sum(brick_count_after), sum(brick_count_before), "Failed to add bricks on the new node {}".format(new_node)) # Check if IO's are running after new node is added use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))
def test_arbiter_required_tag_on_node_or_devices_other_disabled( self, r_node_tag, d_node_tag, r_device_tag, d_device_tag): """Validate arbiter vol creation with node or device tag""" pvc_amount = 3 # Get Heketi nodes info node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node, self.heketi_server_url) # Disable n-3 nodes for node_id in node_id_list[3:]: heketi_ops.heketi_node_disable(self.heketi_client_node, self.heketi_server_url, node_id) self.addCleanup(heketi_ops.heketi_node_enable, self.heketi_client_node, self.heketi_server_url, node_id) # Set arbiter:required tags arbiter_node = heketi_ops.heketi_node_info(self.heketi_client_node, self.heketi_server_url, node_id_list[0], json=True) arbiter_nodes_ip_addresses = arbiter_node['hostnames']['storage'] self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'node', node_id_list[0], ('required' if r_node_tag else None), revert_to=arbiter_node.get('tags', {}).get('arbiter')) for device in arbiter_node['devices']: self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'device', device['id'], ('required' if r_device_tag else None), revert_to=device.get('tags', {}).get('arbiter')) # Set arbiter:disabled tags data_nodes_ip_addresses = [] for node_id in node_id_list[1:]: node_info = heketi_ops.heketi_node_info(self.heketi_client_node, self.heketi_server_url, node_id, json=True) if not any([ int(d['storage']['free']) > (pvc_amount * 1024**2) for d in node_info['devices'] ]): self.skipTest("Devices are expected to have more than " "%sGb of free space" % pvc_amount) data_nodes_ip_addresses.extend(node_info['hostnames']['storage']) for device in node_info['devices']: self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'device', device['id'], ('disabled' if d_device_tag else None), revert_to=device.get('tags', {}).get('arbiter')) self._set_arbiter_tag_with_further_revert( self.heketi_client_node, self.heketi_server_url, 'node', node_id, ('disabled' if d_node_tag else None), revert_to=node_info.get('tags', {}).get('arbiter')) # Create PVCs and check that their bricks are correctly located self.create_storage_class(is_arbiter_vol=True) for i in range(pvc_amount): self.create_and_wait_for_pvc(1) # Get gluster volume info vol_info = openshift_ops.get_gluster_vol_info_by_pvc_name( self.node, self.pvc_name) arbiter_bricks, data_bricks = [], [] for brick in vol_info['bricks']['brick']: if int(brick["isArbiter"]) == 1: arbiter_bricks.append(brick["name"]) else: data_bricks.append(brick["name"]) # Verify that all the arbiter bricks are located on # arbiter:required node and data bricks on all other nodes only. for arbiter_brick in arbiter_bricks: self.assertIn( arbiter_brick.split(':')[0], arbiter_nodes_ip_addresses) for data_brick in data_bricks: self.assertIn( data_brick.split(':')[0], data_nodes_ip_addresses)
def test_volume_creation_of_size_greater_than_the_device_size(self): """Validate creation of a volume of size greater than the size of a device. """ h_node, h_url = self.heketi_client_node, self.heketi_server_url # Remove existing BHV to calculate freespace bhv_list = heketi_ops.get_block_hosting_volume_list(h_node, h_url) if bhv_list: for bhv in bhv_list: bhv_info = heketi_ops.heketi_volume_info(h_node, h_url, bhv, json=True) if bhv_info['blockinfo'].get('blockvolume') is None: heketi_ops.heketi_volume_delete(h_node, h_url, bhv) topology = heketi_ops.heketi_topology_info(h_node, h_url, json=True) nodes_free_space, nodes_ips = [], [] selected_nodes, selected_devices = [], [] cluster = topology['clusters'][0] node_count = len(cluster['nodes']) msg = ("At least 3 Nodes are required in cluster. " "But only %s Nodes are present." % node_count) if node_count < 3: self.skipTest(msg) online_nodes_count = 0 for node in cluster['nodes']: nodes_ips.append(node['hostnames']['storage'][0]) if node['state'] != 'online': continue online_nodes_count += 1 # Disable nodes after 3rd online nodes if online_nodes_count > 3: heketi_ops.heketi_node_disable(h_node, h_url, node['id']) self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_url, node['id']) continue selected_nodes.append(node['id']) device_count = len(node['devices']) msg = ("At least 2 Devices are required on each Node." "But only %s Devices are present." % device_count) if device_count < 2: self.skipTest(msg) sel_devices, online_devices_count, free_space = [], 0, 0 for device in node['devices']: if device['state'] != 'online': continue online_devices_count += 1 # Disable devices after 2nd online devices if online_devices_count > 2: heketi_ops.heketi_device_disable(h_node, h_url, device['id']) self.addCleanup(heketi_ops.heketi_device_enable, h_node, h_url, device['id']) continue sel_devices.append(device['id']) free_space += int(device['storage']['free'] / (1024**2)) selected_devices.append(sel_devices) nodes_free_space.append(free_space) msg = ("At least 2 online Devices are required on each Node. " "But only %s Devices are online on Node: %s." % (online_devices_count, node['id'])) if online_devices_count < 2: self.skipTest(msg) msg = ("At least 3 online Nodes are required in cluster. " "But only %s Nodes are online in Cluster: %s." % (online_nodes_count, cluster['id'])) if online_nodes_count < 3: self.skipTest(msg) # Select node with minimum free space min_free_size = min(nodes_free_space) index = nodes_free_space.index(min_free_size) # Get max device size from selected node device_size = 0 for device in selected_devices[index]: device_info = heketi_ops.heketi_device_info(h_node, h_url, device, json=True) device_size = max(device_size, (int(device_info['storage']['total'] / (1024**2)))) vol_size = device_size + 1 if vol_size >= min_free_size: self.skipTest('Required free space %s is not available' % vol_size) # Create heketi volume with device size + 1 vol_info = self.create_heketi_volume_with_name_and_wait( name="volume_size_greater_than_device_size", size=vol_size, json=True) # Get gluster server IP's from heketi volume info glusterfs_servers = heketi_ops.get_vol_file_servers_and_hosts( h_node, h_url, vol_info['id']) # Verify gluster server IP's in heketi volume info msg = ("gluster IP's '%s' does not match with IP's '%s' found in " "heketi volume info" % (nodes_ips, glusterfs_servers['vol_servers'])) self.assertEqual(set(glusterfs_servers['vol_servers']), set(nodes_ips), msg) vol_name = vol_info['name'] gluster_v_info = self.get_gluster_vol_info(vol_name) # Verify replica count in gluster v info msg = "Volume %s is replica %s instead of replica 3" % ( vol_name, gluster_v_info['replicaCount']) self.assertEqual('3', gluster_v_info['replicaCount']) # Verify distCount in gluster v info msg = "Volume %s distCount is %s instead of distCount as 3" % ( vol_name, int(gluster_v_info['distCount'])) self.assertEqual( int(gluster_v_info['brickCount']) // 3, int(gluster_v_info['distCount']), msg) # Verify bricks count in gluster v info msg = ( "Volume %s does not have bricks count multiple of 3. It has %s" % (vol_name, gluster_v_info['brickCount'])) self.assertFalse(int(gluster_v_info['brickCount']) % 3, msg)
def test_heketi_device_removal_with_insuff_space(self): """Validate heketi with device removal insufficient space""" # Disable 4+ nodes and 3+ devices on the first 3 nodes min_free_space_gb = 5 min_free_space = min_free_space_gb * 1024**2 heketi_url = self.heketi_server_url heketi_node = self.heketi_client_node nodes = {} node_ids = heketi_node_list(heketi_node, heketi_url) self.assertTrue(node_ids) for node_id in node_ids: node_info = heketi_node_info(heketi_node, heketi_url, node_id, json=True) if (node_info["state"].lower() != "online" or not node_info["devices"]): continue if len(nodes) > 2: heketi_node_disable(heketi_node, heketi_url, node_id) self.addCleanup(heketi_node_enable, heketi_node, heketi_url, node_id) continue for device in node_info["devices"]: if device["state"].lower() != "online": continue free_space = device["storage"]["free"] if node_id not in nodes: nodes[node_id] = [] if (free_space < min_free_space or len(nodes[node_id]) > 1): heketi_device_disable(heketi_node, heketi_url, device["id"]) self.addCleanup(heketi_device_enable, heketi_node, heketi_url, device["id"]) continue nodes[node_id].append({ "device_id": device["id"], "free": free_space }) # Skip test if nodes requirements are not met if (len(nodes) < 3 or not all(map((lambda _l: len(_l) > 1), nodes.values()))): raise self.skipTest( "Could not find 3 online nodes with 2 online devices " "having free space bigger than %dGb." % min_free_space_gb) # Calculate size of a potential distributed vol if nodes[node_ids[0]][0]["free"] > nodes[node_ids[0]][1]["free"]: index = 0 else: index = 1 vol_size_gb = int(nodes[node_ids[0]][index]["free"] / (1024**2)) + 1 device_id = nodes[node_ids[0]][index]["device_id"] # Create volume with such size that we consume space more than # size of smaller disks h_volume_name = "autotests-heketi-volume-%s" % utils.get_random_str() try: self.create_heketi_volume_with_name_and_wait(h_volume_name, vol_size_gb, json=True) except Exception as e: # NOTE: rare situation when we need to decrease size of a volume. g.log.info("Failed to create '%s'Gb volume. " "Trying to create another one, smaller for 1Gb.") if not ('more required' in str(e) and ('Insufficient suitable allocatable extents for ' 'logical volume' in str(e))): raise vol_size_gb -= 1 self.create_heketi_volume_with_name_and_wait(h_volume_name, vol_size_gb, json=True) # Try to 'remove' bigger Heketi disk expecting error, # because there is no space on smaller disk to relocate bricks to heketi_device_disable(heketi_node, heketi_url, device_id) self.addCleanup(heketi_device_enable, heketi_node, heketi_url, device_id) try: self.assertRaises(AssertionError, heketi_device_remove, heketi_node, heketi_url, device_id) except Exception: self.addCleanup(heketi_device_disable, heketi_node, heketi_url, device_id) raise
def test_create_volumes_enabling_and_disabling_heketi_devices(self): """Validate enable/disable of heketi device""" # Get nodes info node_id_list = heketi_ops.heketi_node_list( self.heketi_client_node, self.heketi_server_url) node_info_list = [] for node_id in node_id_list[0:3]: node_info = heketi_ops.heketi_node_info( self.heketi_client_node, self.heketi_server_url, node_id, json=True) node_info_list.append(node_info) # Disable 4th and other nodes if len(node_id_list) > 3: for node in node_id_list[3:]: heketi_ops.heketi_node_disable( self.heketi_client_node, self.heketi_server_url, node_id) self.addCleanup( heketi_ops.heketi_node_enable, self.heketi_client_node, self.heketi_server_url, node_id) # Disable second and other devices on the first 3 nodes for node_info in node_info_list[0:3]: devices = node_info["devices"] self.assertTrue( devices, "Node '%s' does not have devices." % node_info["id"]) if devices[0]["state"].strip().lower() != "online": self.skipTest("Test expects first device to be enabled.") if len(devices) < 2: continue for device in node_info["devices"][1:]: out = heketi_ops.heketi_device_disable( self.heketi_client_node, self.heketi_server_url, device["id"]) self.assertTrue( out, "Failed to disable the device %s" % device["id"]) self.addCleanup( heketi_ops.heketi_device_enable, self.heketi_client_node, self.heketi_server_url, device["id"]) # Create heketi volume out = heketi_ops.heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 1, json=True) self.assertTrue(out, "Failed to create heketi volume of size 1") g.log.info("Successfully created heketi volume of size 1") device_id = out["bricks"][0]["device"] self.addCleanup( heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, out["bricks"][0]["volume"]) # Disable device g.log.info("Disabling '%s' device" % device_id) out = heketi_ops.heketi_device_disable( self.heketi_client_node, self.heketi_server_url, device_id) self.assertTrue(out, "Failed to disable the device %s" % device_id) g.log.info("Successfully disabled device %s" % device_id) try: # Get device info g.log.info("Retrieving '%s' device info" % device_id) out = heketi_ops.heketi_device_info( self.heketi_client_node, self.heketi_server_url, device_id, json=True) self.assertTrue(out, "Failed to get device info %s" % device_id) g.log.info("Successfully retrieved device info %s" % device_id) name = out["name"] if out["state"].lower().strip() != "offline": raise exceptions.ExecutionError( "Device %s is not in offline state." % name) g.log.info("Device %s is now offine" % name) # Try to create heketi volume g.log.info("Creating heketi volume: Expected to fail.") try: out = heketi_ops.heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 1, json=True) except exceptions.ExecutionError: g.log.info("Volume was not created as expected.") else: self.addCleanup( heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, out["bricks"][0]["volume"]) msg = "Volume unexpectedly created. Out: %s" % out assert False, msg finally: # Enable the device back g.log.info("Enable '%s' device back." % device_id) out = heketi_ops.heketi_device_enable( self.heketi_client_node, self.heketi_server_url, device_id) self.assertTrue(out, "Failed to enable the device %s" % device_id) g.log.info("Successfully enabled device %s" % device_id) # Get device info out = heketi_ops.heketi_device_info( self.heketi_client_node, self.heketi_server_url, device_id, json=True) self.assertTrue(out, ("Failed to get device info %s" % device_id)) g.log.info("Successfully retrieved device info %s" % device_id) name = out["name"] if out["state"] != "online": raise exceptions.ExecutionError( "Device %s is not in online state." % name) # Create heketi volume of size out = heketi_ops.heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 1, json=True) self.assertTrue(out, "Failed to create volume of size 1") self.addCleanup( heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, out["bricks"][0]["volume"]) g.log.info("Successfully created volume of size 1") name = out["name"] # Get gluster volume info vol_info = get_volume_info('auto_get_gluster_endpoint', volname=name) self.assertTrue(vol_info, "Failed to get '%s' volume info." % name) g.log.info("Successfully got the '%s' volume info." % name)