def _check_heketi_and_gluster_pod_after_node_reboot(self, heketi_node): openshift_ops.switch_oc_project( self._master, self.storage_project_name) heketi_pod = openshift_ops.get_pod_names_from_dc( self._master, self.heketi_dc_name)[0] # Wait for heketi pod to become ready and running openshift_ops.wait_for_pod_be_ready(self._master, heketi_pod) heketi_ops.hello_heketi(self._master, self.heketi_server_url) # Wait for glusterfs pods to become ready if hosted on same node heketi_node_ip = openshift_ops.oc_get_custom_resource( self._master, 'pod', '.:status.hostIP', heketi_pod)[0] if heketi_node_ip in self.gluster_servers: gluster_pod = openshift_ops.get_gluster_pod_name_for_specific_node( self._master, heketi_node) # Wait for glusterfs pod to become ready openshift_ops.wait_for_pod_be_ready(self._master, gluster_pod) services = ( ("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for service, state in services: openshift_ops.check_service_status_on_pod( self._master, gluster_pod, service, "active", state)
def setUpClass(cls): """Initialize all the variables necessary for test cases.""" super(BaseClass, cls).setUpClass() # Initializes OCP config variables cls.ocp_servers_info = g.config['ocp_servers'] cls.ocp_master_node = list(g.config['ocp_servers']['master'].keys()) cls.ocp_master_node_info = g.config['ocp_servers']['master'] cls.ocp_client = list(g.config['ocp_servers']['client'].keys()) cls.ocp_client_info = g.config['ocp_servers']['client'] cls.ocp_nodes = list(g.config['ocp_servers']['nodes'].keys()) cls.ocp_nodes_info = g.config['ocp_servers']['nodes'] # Initializes storage project config variables openshift_config = g.config.get("cns", g.config.get("openshift")) cls.storage_project_name = openshift_config.get( 'storage_project_name', openshift_config.get('setup', {}).get('cns_project_name')) # Initializes heketi config variables heketi_config = openshift_config['heketi_config'] cls.heketi_dc_name = heketi_config['heketi_dc_name'] cls.heketi_service_name = heketi_config['heketi_service_name'] cls.heketi_client_node = heketi_config['heketi_client_node'] cls.heketi_server_url = heketi_config['heketi_server_url'] cls.heketi_cli_user = heketi_config['heketi_cli_user'] cls.heketi_cli_key = heketi_config['heketi_cli_key'] cls.gluster_servers = list(g.config['gluster_servers'].keys()) cls.gluster_servers_info = g.config['gluster_servers'] cls.storage_classes = openshift_config['dynamic_provisioning'][ 'storage_classes'] cls.sc = cls.storage_classes.get( 'storage_class1', cls.storage_classes.get('file_storage_class')) cmd = "echo -n %s | base64" % cls.heketi_cli_key ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root") if ret != 0: raise ExecutionError("failed to execute cmd %s on %s out: %s " "err: %s" % ( cmd, cls.ocp_master_node[0], out, err)) cls.secret_data_key = out.strip() # Checks if heketi server is alive if not hello_heketi(cls.heketi_client_node, cls.heketi_server_url): raise ConfigError("Heketi server %s is not alive" % cls.heketi_server_url) # Switch to the storage project if not switch_oc_project( cls.ocp_master_node[0], cls.storage_project_name): raise ExecutionError("Failed to switch oc project on node %s" % cls.ocp_master_node[0]) if 'glustotest_run_id' not in g.config: g.config['glustotest_run_id'] = ( datetime.datetime.now().strftime('%H_%M_%d_%m_%Y')) cls.glustotest_run_id = g.config['glustotest_run_id'] msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id) g.log.info(msg)
def test_heketi_logs_after_heketi_pod_restart(self): h_node, h_server = self.heketi_client_node, self.heketi_server_url find_string_in_log = r"Started background pending operations cleaner" ocp_node = self.ocp_master_node[0] # Restart heketi pod heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) oc_delete(ocp_node, 'pod', heketi_pod_name, collect_logs=self.heketi_logs_before_delete) self.addCleanup(self._heketi_pod_delete_cleanup) wait_for_resource_absence(ocp_node, 'pod', heketi_pod_name) heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) wait_for_pod_be_ready(ocp_node, heketi_pod_name) self.assertTrue(hello_heketi(h_node, h_server), "Heketi server {} is not alive".format(h_server)) # Collect logs after heketi pod restart cmd = "oc logs {}".format(heketi_pod_name) out = cmd_run(cmd, hostname=ocp_node) # Validate string is present in heketi logs pending_check = re.compile(find_string_in_log) entry_list = pending_check.findall(out) self.assertIsNotNone(entry_list, "Failed to find entries in heketi logs") for entry in entry_list: self.assertEqual( entry, find_string_in_log, "Failed to validate, Expected {}; Actual {}".format( find_string_in_log, entry))
def setUpClass(cls): """Initialize all the variables necessary for test cases.""" super(BaseClass, cls).setUpClass() # Initializes OCP config variables cls.ocp_servers_info = g.config['ocp_servers'] cls.ocp_master_node = list(g.config['ocp_servers']['master'].keys()) cls.ocp_master_node_info = g.config['ocp_servers']['master'] cls.ocp_client = list(g.config['ocp_servers']['client'].keys()) cls.ocp_client_info = g.config['ocp_servers']['client'] cls.ocp_nodes = list(g.config['ocp_servers']['nodes'].keys()) cls.ocp_nodes_info = g.config['ocp_servers']['nodes'] # Initializes storage project config variables openshift_config = g.config.get("cns", g.config.get("openshift")) cls.storage_project_name = openshift_config.get( 'storage_project_name', openshift_config.get('setup', {}).get('cns_project_name')) # Initializes heketi config variables heketi_config = openshift_config['heketi_config'] cls.heketi_dc_name = heketi_config['heketi_dc_name'] cls.heketi_service_name = heketi_config['heketi_service_name'] cls.heketi_client_node = heketi_config['heketi_client_node'] cls.heketi_server_url = heketi_config['heketi_server_url'] cls.heketi_cli_user = heketi_config['heketi_cli_user'] cls.heketi_cli_key = heketi_config['heketi_cli_key'] cls.gluster_servers = list(g.config['gluster_servers'].keys()) cls.gluster_servers_info = g.config['gluster_servers'] cls.storage_classes = openshift_config['dynamic_provisioning'][ 'storage_classes'] cls.sc = cls.storage_classes.get( 'storage_class1', cls.storage_classes.get('file_storage_class')) cmd = "echo -n %s | base64" % cls.heketi_cli_key ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root") if ret != 0: raise ExecutionError("failed to execute cmd %s on %s out: %s " "err: %s" % (cmd, cls.ocp_master_node[0], out, err)) cls.secret_data_key = out.strip() # Checks if heketi server is alive if not hello_heketi(cls.heketi_client_node, cls.heketi_server_url): raise ConfigError("Heketi server %s is not alive" % cls.heketi_server_url) # Switch to the storage project if not switch_oc_project(cls.ocp_master_node[0], cls.storage_project_name): raise ExecutionError("Failed to switch oc project on node %s" % cls.ocp_master_node[0]) if 'glustotest_run_id' not in g.config: g.config['glustotest_run_id'] = ( datetime.datetime.now().strftime('%H_%M_%d_%m_%Y')) cls.glustotest_run_id = g.config['glustotest_run_id'] msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id) g.log.info(msg)
def test_restart_heketi_pod(self): """Validate restarting heketi pod""" # create heketi volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=1, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 1") self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id'], raise_on_error=False) topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) # get heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) # delete heketi-pod (it restarts the pod) oc_delete(self.ocp_master_node[0], 'pod', heketi_pod_name, collect_logs=self.heketi_logs_before_delete) wait_for_resource_absence(self.ocp_master_node[0], 'pod', heketi_pod_name) # get new heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], heketi_pod_name) # check heketi server is running self.assertTrue( hello_heketi(self.heketi_client_node, self.heketi_server_url), "Heketi server %s is not alive" % self.heketi_server_url) # compare the topology new_topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) self.assertEqual( new_topo_info, topo_info, "topology info is not same," " difference - %s" % diff(topo_info, new_topo_info)) # create new volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 20") heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def test_restart_heketi_pod(self): """Validate restarting heketi pod""" # create heketi volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=1, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 1") self.addCleanup( heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, vol_info['id'], raise_on_error=False) topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) # get heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) # delete heketi-pod (it restarts the pod) oc_delete(self.ocp_master_node[0], 'pod', heketi_pod_name) wait_for_resource_absence(self.ocp_master_node[0], 'pod', heketi_pod_name) # get new heketi-pod name heketi_pod_name = get_pod_name_from_dc(self.ocp_master_node[0], self.heketi_dc_name) wait_for_pod_be_ready(self.ocp_master_node[0], heketi_pod_name) # check heketi server is running self.assertTrue( hello_heketi(self.heketi_client_node, self.heketi_server_url), "Heketi server %s is not alive" % self.heketi_server_url ) # compare the topology new_topo_info = heketi_topology_info(self.heketi_client_node, self.heketi_server_url, json=True) self.assertEqual(new_topo_info, topo_info, "topology info is not same," " difference - %s" % diff(topo_info, new_topo_info)) # create new volume vol_info = heketi_volume_create(self.heketi_client_node, self.heketi_server_url, size=2, json=True) self.assertTrue(vol_info, "Failed to create heketi volume of size 20") heketi_volume_delete( self.heketi_client_node, self.heketi_server_url, vol_info['id'])
def _respin_heketi_pod(self): h_node, h_url = self.heketi_client_node, self.heketi_server_url ocp_node = self.ocp_master_node[0] # get heketi-pod name heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) # delete heketi-pod (it restarts the pod) oc_delete(ocp_node, "pod", heketi_pod_name, collect_logs=self.heketi_logs_before_delete) wait_for_resource_absence(ocp_node, "pod", heketi_pod_name) # get new heketi-pod name heketi_pod_name = get_pod_name_from_dc(ocp_node, self.heketi_dc_name) wait_for_pod_be_ready(ocp_node, heketi_pod_name) # check heketi server is running err_msg = "Heketi server %s is not alive" % h_url self.assertTrue(hello_heketi(h_node, h_url), err_msg)
def test_dev_path_mapping_heketi_pod_reboot(self): """Validate dev path mapping for heketi pod reboot """ self.node = self.ocp_master_node[0] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Create file volume with app pod and verify IO's # and Compare path, uuid, vg_name pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs() # Fetch heketi-pod name heketi_pod_name = openshift_ops.get_pod_name_from_dc( self.node, self.heketi_dc_name) # Respin heketi-pod (it restarts the pod) openshift_ops.oc_delete(self.node, "pod", heketi_pod_name, collect_logs=self.heketi_logs_before_delete) self.addCleanup(self._heketi_pod_delete_cleanup) openshift_ops.wait_for_resource_absence(self.node, "pod", heketi_pod_name) # Fetch new heketi-pod name heketi_pod_name = openshift_ops.get_pod_name_from_dc( self.node, self.heketi_dc_name) openshift_ops.wait_for_pod_be_ready(self.node, heketi_pod_name) # Check heketi server is running self.assertTrue(heketi_ops.hello_heketi(h_node, h_url), "Heketi server {} is not alive".format(h_url)) # Check if IO's are running after respin of heketi pod use_percent_after = self._get_space_use_percent_in_app_pod(pod_name) self.assertNotEqual( use_percent, use_percent_after, "Failed to execute IO's in the app pod {} after respin".format( pod_name))
def test_create_max_num_blockhostingvolumes(self): num_of_bv = 10 new_bhv_list, bv_list, g_nodes = [], [], [] free_space, nodenum = get_total_free_space(self.heketi_client_node, self.heketi_server_url) if nodenum < 3: self.skipTest("Skip the test case since number of" "online nodes is less than 3.") free_space_available = int(free_space / nodenum) default_bhv_size = get_default_block_hosting_volume_size( self.heketi_client_node, self.heketi_dc_name) # Get existing list of BHV's existing_bhv_list = get_block_hosting_volume_list( self.heketi_client_node, self.heketi_server_url) # Skip the test if available space is less than default_bhv_size if free_space_available < default_bhv_size: self.skipTest("Skip the test case since free_space_available %s" "is less than space_required_for_bhv %s ." % (free_space_available, default_bhv_size)) # Create BHV's while free_space_available > default_bhv_size: block_host_create_info = heketi_volume_create( self.heketi_client_node, self.heketi_server_url, default_bhv_size, json=True, block=True) if block_host_create_info["id"] not in existing_bhv_list.keys(): new_bhv_list.append(block_host_create_info["id"]) self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, block_host_create_info["id"], raise_on_error=False) block_vol_size = int( block_host_create_info["blockinfo"]["freesize"] / num_of_bv) # Create specified number of BV's in BHV's created for i in range(0, num_of_bv): block_vol = heketi_blockvolume_create(self.heketi_client_node, self.heketi_server_url, block_vol_size, json=True, ha=3, auth=True) self.addCleanup(heketi_blockvolume_delete, self.heketi_client_node, self.heketi_server_url, block_vol["id"], raise_on_error=False) bv_list.append(block_vol["id"]) free_space_available = int(free_space_available - default_bhv_size) # Get gluster node ips h_nodes_ids = heketi_node_list(self.heketi_client_node, self.heketi_server_url) for h_node in h_nodes_ids[:2]: g_node = heketi_node_info(self.heketi_client_node, self.heketi_server_url, h_node, json=True) g_nodes.append(g_node['hostnames']['manage'][0]) # Check if there is no crash in gluster related services & heketi services = (("glusterd", "running"), ("gluster-blockd", "running"), ("tcmu-runner", "running"), ("gluster-block-target", "exited")) for g_node in g_nodes: for service, state in services: wait_for_service_status_on_gluster_pod_or_node( self.ocp_client[0], service, 'active', state, g_node, raise_on_error=False) out = hello_heketi(self.heketi_client_node, self.heketi_server_url) self.assertTrue( out, "Heketi server %s is not alive" % self.heketi_server_url) # Delete all the BHV's and BV's created for bv_volume in bv_list: heketi_blockvolume_delete(self.heketi_client_node, self.heketi_server_url, bv_volume) # Check if any blockvolume exist in heketi & gluster for bhv_volume in new_bhv_list[:]: heketi_vol_info = heketi_volume_info(self.heketi_client_node, self.heketi_server_url, bhv_volume, json=True) self.assertNotIn("blockvolume", heketi_vol_info["blockinfo"].keys()) gluster_vol_info = get_block_list('auto_get_gluster_endpoint', volname="vol_%s" % bhv_volume) self.assertIsNotNone(gluster_vol_info, "Failed to get volume info %s" % bhv_volume) new_bhv_list.remove(bhv_volume) for blockvol in gluster_vol_info: self.assertNotIn("blockvol_", blockvol) heketi_volume_delete(self.heketi_client_node, self.heketi_server_url, bhv_volume) # Check if all blockhosting volumes are deleted from heketi self.assertFalse(new_bhv_list)
def test_heketi_server_stale_operations_during_heketi_pod_reboot(self): """ Validate failed/stale entries in db and performs a cleanup of those entries """ volume_id_list, async_obj, ocp_node = [], [], self.ocp_master_node[0] h_node, h_server = self.heketi_client_node, self.heketi_server_url for i in range(0, 8): volume_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) volume_id_list.append(volume_info["id"]) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, volume_info["id"], raise_on_error=False) def run_async(cmd, hostname, raise_on_error=True): async_op = g.run_async(host=hostname, command=cmd) async_obj.append(async_op) return async_op # Temporary replace g.run with g.async_run in heketi_volume_delete # to be able to run it in background. for vol_id in volume_id_list: with mock.patch.object(command, 'cmd_run', side_effect=run_async): heketi_ops.heketi_volume_delete(h_node, h_server, vol_id) # Restart heketi pod and check pod is running heketi_pod_name = openshift_ops.get_pod_name_from_dc( ocp_node, self.heketi_dc_name) openshift_ops.oc_delete(ocp_node, 'pod', heketi_pod_name, collect_logs=self.heketi_logs_before_delete) self.addCleanup(self._heketi_pod_delete_cleanup, ocp_node) openshift_ops.wait_for_resource_absence(ocp_node, 'pod', heketi_pod_name) heketi_pod_name = openshift_ops.get_pod_name_from_dc( ocp_node, self.heketi_dc_name) openshift_ops.wait_for_pod_be_ready(ocp_node, heketi_pod_name) self.assertTrue(heketi_ops.hello_heketi(h_node, h_server), "Heketi server {} is not alive".format(h_server)) # Wait for pending operations to get generate for w in waiter.Waiter(timeout=30, interval=3): h_db_check = heketi_ops.heketi_db_check(h_node, h_server) h_db_check_vol = h_db_check.get("volumes") h_db_check_bricks = h_db_check.get("bricks") if ((h_db_check_vol.get("pending")) and (h_db_check_bricks.get("pending"))): break if w.expired: raise exceptions.ExecutionError( "No any pending operations found during volumes deletion " "volumes:{}, Bricks:{} ".format( h_db_check_vol.get("pending"), h_db_check_bricks.get("pending"))) # Verify pending bricks are multiples of 3 self.assertFalse( h_db_check_bricks.get("pending") % 3, "Expecting bricks pending count to be multiple of 3 but " "found {}".format(h_db_check_bricks.get("pending"))) # Verify and Wait for pending operations to complete for w in waiter.Waiter(timeout=120, interval=10): h_db_check = heketi_ops.heketi_db_check(h_node, h_server) h_db_check_vol = h_db_check.get("volumes") h_db_check_bricks = h_db_check.get("bricks") if ((not h_db_check_bricks.get("pending")) and (not h_db_check_vol.get("pending"))): break if w.expired: raise AssertionError("Failed to delete volumes after 120 secs")