def _get_heketi_client_version_str(hostname=None): """Gets Heketi client package version from heketi client node. Args: hostname (str): Node on which the version check command should run. Returns: str : heketi version, i.e. '7.0.0-1' Raises: 'exceptions.ExecutionError' if failed to get version """ if not hostname: openshift_config = g.config.get("cns", g.config.get("openshift")) heketi_config = openshift_config['heketi_config'] hostname = heketi_config['heketi_client_node'].strip() cmd = ("rpm -q heketi-client --queryformat '%{version}-%{release}\n' | " "cut -d '.' -f 1,2,3") ret, out, err = g.run(hostname, cmd, "root") if ret != 0: msg = ("Failed to get heketi client version. " "\n'err': %s\n 'out': %s" % (err, out)) g.log.error(msg) raise AssertionError(msg) out = out.strip() if not out: error_msg = "Empty output for '%s' cmd: '%s'" % (cmd, out) g.log.error(error_msg) raise exceptions.ExecutionError(error_msg) return out
def test_delete_heketidb_volume(self): """Method to test heketidb volume deletion via heketi-cli.""" for i in range(0, 2): volume_info = heketi_ops.heketi_volume_create( self.heketi_client_node, self.heketi_server_url, 10, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, volume_info["id"]) volume_list_info = heketi_ops.heketi_volume_list( self.heketi_client_node, self.heketi_server_url, json=True) self.assertTrue(volume_list_info["volumes"], "Heketi volume list empty.") for volume_id in volume_list_info["volumes"]: volume_info = heketi_ops.heketi_volume_info( self.heketi_client_node, self.heketi_server_url, volume_id, json=True) if volume_info["name"] == "heketidbstorage": self.assertRaises(AssertionError, heketi_ops.heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, volume_id) return raise exceptions.ExecutionError( "Warning: heketidbstorage doesn't exist in list of volumes")
def _get_openshift_storage_version_str(hostname=None): """Gets OpenShift Storage version from gluster pod's buildinfo directory. Args: hostname (str): Node on which the ocp command should run. Returns: str : Openshift Storage version, i.e. '3.11.3' Raises: 'NotImplementedError' if CRS setup is provided. """ if not hostname: hostname = list(g.config['ocp_servers']['client'].keys())[0] get_gluster_pod_cmd = ( "oc get --no-headers=true pods --selector glusterfs-node=pod " "-o=custom-columns=:.metadata.name | tail -1") gluster_pod = command.cmd_run(get_gluster_pod_cmd, hostname) if not gluster_pod: raise NotImplementedError( "OCS version check cannot be done on the standalone setup.") buildinfo_cmd = ( "oc rsh %s " "find . -name \"Dockerfile-rhgs3-rhgs-server-rhel7*\" " r"-exec awk '/%s/{print $0}' {} \; " "| tail -1" % (gluster_pod, BUILDS_LABEL_TAG_REGEX)) out = command.cmd_run(buildinfo_cmd, hostname) build_tag_match = re.search(BUILDS_LABEL_TAG_REGEX, out) if not build_tag_match: error_msg = "Unexpected BUILD LABEL tag expression: '%s'" % out g.log.error(error_msg) raise exceptions.ExecutionError(error_msg) return (build_tag_match.group(2)).strip()
def _set_log_level(self, node, level, msg, exec_time): delete_log_level = r'sed -i "/\(^log_level.*=.*[0-9]\)/d" {}' set_log_level = r'sed -i "\$alog_level = {}" {}' check_log_msg = r'sed -n "/.*\({}\).*/{{p;}}" {} | tail -1' # Set log level openshift_ops.cmd_run_on_gluster_pod_or_node( self.node, set_log_level.format(level, TCMU_CONF), gluster_node=node) self.addCleanup( openshift_ops.cmd_run_on_gluster_pod_or_node, self.node, delete_log_level.format(TCMU_CONF), gluster_node=node) # Validate log level log_msg = "log level now is {}".format(msg) for w in waiter.Waiter(120, 3): out = openshift_ops.cmd_run_on_gluster_pod_or_node( self.node, check_log_msg.format(log_msg, TCMU_RUNNER_LOG), gluster_node=node) match = re.match(LOG_REGEX, out) if (match and exec_time < datetime.datetime.strptime( match.group(1), self.timeformat)): break if w.expired: raise exceptions.ExecutionError( "Log level '{}:{}' of tcmu did not get changed on node" " {}".format(level, msg, node)) openshift_ops.cmd_run_on_gluster_pod_or_node( self.node, delete_log_level.format(TCMU_CONF), gluster_node=node)
def _check_for_pending_operations(self, h_node, h_url): # Check for pending operations for w in waiter.Waiter(timeout=120, interval=10): h_db_check = heketi_db_check(h_node, h_url) h_db_check_vol = h_db_check.get("blockvolumes") if h_db_check_vol.get("pending"): break if w.expired: raise exceptions.ExecutionError( "No pending operations found during blockvolumes creation " "{}".format(h_db_check_vol.get("pending")))
def _fetch_metric_from_promtheus_pod(self, metric): """Fetch metric from prometheus pod using api call""" prometheus_pods = list(openshift_ops.oc_get_pods( self._master, selector=self._prometheus_resources_selector).keys()) fetch_metric_cmd = ("curl 'http://localhost:9090/api/v1/query" "?query={}'".format(metric)) ret, metric_data, _ = openshift_ops.oc_rsh( self._master, prometheus_pods[0], fetch_metric_cmd) metric_result = json.loads(metric_data)["data"]["result"] if (not metric_result) or ret: raise exceptions.ExecutionError( "Failed to fecth data for metric {}, output {}".format( metric, metric_result)) return metric_result
def _wrapper_for_get_ssh_connection(cls, host, user=None, recreate=False): if recreate and "%s@%s" % (user, host) in cls._ssh_connections: cls.ssh_close_connection(host=host, user=user) ssh = cls._get_ssh_connection(host, user) if not ssh: if "%s@%s" % (user, host) in cls._ssh_connections: cls.ssh_close_connection(host=host, user=user) ssh = cls._get_ssh_connection(host=host, user=user) if not ssh: raise exceptions.ExecutionError( "Failed to establish SSH connection to the '%s' host " "using '%s' user. Plese check availability of the " "hostname and make sure it has passwordless " "connection from your host." % (host, user)) return ssh
def _power_off_node_and_wait_node_to_be_not_ready(self, hostname): # Bring down the glusterfs node vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname) self.addCleanup(node_ops.power_on_vm_by_name, vm_name) node_ops.power_off_vm_by_name(vm_name) # Wait glusterfs node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 20): status = openshift_ops.oc_get_custom_resource( self.ocp_client, 'node', custom, hostname) if status[0] in ['False', 'Unknown']: break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(hostname))
def _get_heketi_server_version_str(ocp_client_node=None): """Gets Heketi server package version from Heketi POD. Args: ocp_client_node (str): Node on which the version check command should run. Returns: str : heketi version, i.e. '7.0.0-1' Raises: 'exceptions.ExecutionError' if failed to get version """ if not ocp_client_node: ocp_client_node = list(g.config["ocp_servers"]["client"].keys())[0] get_package_version_cmd = ( "rpm -q heketi --queryformat '%{version}-%{release}\n' | " "cut -d '.' -f 1,2,3") # NOTE(vponomar): we implement Heketi POD call command here, not in common # module for OC commands just to avoid cross-reference imports. get_pods_cmd = "oc get -o wide --no-headers=true pods --selector heketi" heketi_pods = command.cmd_run(get_pods_cmd, hostname=ocp_client_node) err_msg = "" for heketi_pod_line in heketi_pods.split("\n"): heketi_pod_data = heketi_pod_line.split() if ("-deploy" in heketi_pod_data[0] or heketi_pod_data[1].lower() != "1/1" or heketi_pod_data[2].lower() != "running"): continue try: pod_cmd = "oc exec %s -- %s" % ( heketi_pod_data[0], get_package_version_cmd) return command.cmd_run(pod_cmd, hostname=ocp_client_node) except Exception as e: err = ("Failed to run '%s' command on '%s' Heketi POD. " "Error: %s\n" % (pod_cmd, heketi_pod_data[0], e)) err_msg += err g.log.error(err) if not err_msg: err_msg += "Haven't found 'Running' and 'ready' (1/1) Heketi PODs.\n" err_msg += "Heketi PODs: %s" % heketi_pods raise exceptions.ExecutionError(err_msg)
def node_reboot_by_command(node, timeout=600, wait_step=10): """Reboot node and wait to start for given timeout. Args: node (str) : Node which needs to be rebooted. timeout (int) : Seconds to wait before node to be started. wait_step (int): Interval in seconds to wait before checking status of node again. """ cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" ret, out, err = g.run(node, cmd) if ret != 255: err_msg = "failed to reboot host '%s' error %s" % (node, err) g.log.error(err_msg) raise AssertionError(err_msg) try: g.ssh_close_connection(node) except Exception as e: g.log.error("failed to close connection with host %s " "with error: %s" % (node, e)) raise # added sleep as node will restart after 3 sec time.sleep(3) for w in waiter.Waiter(timeout=timeout, interval=wait_step): try: if g.rpyc_get_connection(node, user="******"): g.rpyc_close_connection(node, user="******") return except Exception as err: g.log.info("exception while getting connection: '%s'" % err) if w.expired: error_msg = ("exceeded timeout %s sec, node '%s' is " "not reachable" % (timeout, node)) g.log.error(error_msg) raise exceptions.ExecutionError(error_msg)
def _get_openshift_version_str(hostname=None): """Gets OpenShift version from 'oc version' command. Args: hostname (str): Node on which the ocp command should run. Returns: str : oc version, i.e. 'v3.10.47' Raises: 'exceptions.ExecutionError' if failed to get version """ if not hostname: hostname = list(g.config['ocp_servers']['client'].keys())[0] cmd = "oc version | grep openshift | cut -d ' ' -f 2" ret, out, err = g.run(hostname, cmd, "root") if ret != 0: msg = "Failed to get oc version. \n'err': %s\n 'out': %s" % (err, out) g.log.error(msg) raise AssertionError(msg) out = out.strip() if not out: error_msg = "Empty output from 'oc version' command: '%s'" % out g.log.error(error_msg) raise exceptions.ExecutionError(error_msg) return out
def restart_gluster_vol_brick_processes(ocp_client_node, file_vol, gluster_nodes): """Restarts brick process of a file volume. Args: ocp_client_node (str): Node to execute OCP commands on. file_vol (str): file volume name. gluster_nodes (str/list): One or several IPv4 addresses of Gluster nodes, where 'file_vol' brick processes must be recreated. """ if not isinstance(gluster_nodes, (list, set, tuple)): gluster_nodes = [gluster_nodes] # Get Gluster vol brick PIDs gluster_volume_status = get_gluster_vol_status(file_vol) pids = [] for gluster_node in gluster_nodes: pid = None for g_node, g_node_data in gluster_volume_status.items(): if g_node != gluster_node: continue for process_name, process_data in g_node_data.items(): if not process_name.startswith("/var"): continue pid = process_data["pid"] # When birck is down, pid of the brick is returned as -1. # Which is unexepeted situation. So, add appropriate assertion. assert pid != "-1", ( "Got unexpected PID (-1) for '%s' gluster vol on '%s' " "node." % file_vol, gluster_node) assert pid, ("Could not find 'pid' in Gluster vol data for '%s' " "Gluster node. Data: %s" % ( gluster_node, gluster_volume_status)) pids.append((gluster_node, pid)) # Restart Gluster vol brick processes using found PIDs for gluster_node, pid in pids: cmd = "kill -9 %s" % pid cmd_run_on_gluster_pod_or_node(ocp_client_node, cmd, gluster_node) # Wait for Gluster vol brick processes to be recreated for gluster_node, pid in pids: killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'" _waiter = waiter.Waiter(timeout=60, interval=2) for w in _waiter: result = cmd_run_on_gluster_pod_or_node( ocp_client_node, killed_pid_cmd, gluster_node) if result.strip() == pid: continue g.log.info("Brick process '%s' was killed successfully on '%s'" % ( pid, gluster_node)) break if w.expired: error_msg = ("Process ID '%s' still exists on '%s' after waiting " "for it 60 seconds to get killed." % ( pid, gluster_node)) g.log.error(error_msg) raise exceptions.ExecutionError(error_msg) # Start volume after gluster vol brick processes recreation ret, out, err = volume_start( "auto_get_gluster_endpoint", file_vol, force=True) if ret != 0: err_msg = "Failed to start gluster volume %s on %s. error: %s" % ( file_vol, gluster_node, err) g.log.error(err_msg) raise AssertionError(err_msg)
def test_brick_evict_with_node_down(self): """Test brick evict basic functionality and verify brick evict after node down""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Disable node if more than 3 node_list = heketi_ops.heketi_node_list(h_node, h_server) if len(node_list) > 3: for node_id in node_list[3:]: heketi_ops.heketi_node_disable(h_node, h_server, node_id) self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_server, node_id) # Create heketi volume vol_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, vol_info.get('id')) # Get node on which heketi pod is scheduled heketi_pod = openshift_ops.get_pod_name_from_dc( self.ocp_client, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0] # Get list of hostname from node id host_list = [] for node_id in node_list[3:]: node_info = heketi_ops.heketi_node_info(h_node, h_server, node_id, json=True) host_list.append(node_info.get('hostnames').get('manage')[0]) # Get brick id and glusterfs node which is not heketi node for node in vol_info.get('bricks', {}): node_info = heketi_ops.heketi_node_info(h_node, h_server, node.get('node'), json=True) hostname = node_info.get('hostnames').get('manage')[0] if (hostname != heketi_node) and (hostname not in host_list): brick_id = node.get('id') break # Bring down the glusterfs node vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname) self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname) self.addCleanup(node_ops.power_on_vm_by_name, vm_name) node_ops.power_off_vm_by_name(vm_name) # Wait glusterfs node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 20): status = openshift_ops.oc_get_custom_resource( self.ocp_client, 'node', custom, hostname) if status[0] in ['False', 'Unknown']: break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(hostname)) # Perform brick evict operation try: heketi_ops.heketi_brick_evict(h_node, h_server, brick_id) except AssertionError as e: if ('No Replacement was found' not in six.text_type(e)): raise
def test_heketi_manual_cleanup_operation_in_bhv(self): """Validate heketi db cleanup will resolve the mismatch in the free size of the block hosting volume with failed block device create operations. """ bhv_size_before, bhv_size_after, vol_count = [], [], 5 ocp_node, g_node = self.ocp_master_node[0], self.gluster_servers[0] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Get existing heketi volume list existing_volumes = heketi_volume_list(h_node, h_url, json=True) # Add function to clean stale volumes created during test self.addCleanup(self._cleanup_heketi_volumes, existing_volumes.get("volumes")) # Get nodes id list node_id_list = heketi_node_list(h_node, h_url) # Disable 4th and other nodes for node_id in node_id_list[3:]: heketi_node_disable(h_node, h_url, node_id) self.addCleanup(heketi_node_enable, h_node, h_url, node_id) # Calculate heketi volume size free_space, nodenum = get_total_free_space(h_node, h_url) free_space_available = int(free_space / nodenum) if free_space_available > vol_count: h_volume_size = int(free_space_available / vol_count) if h_volume_size > 50: h_volume_size = 50 else: h_volume_size, vol_count = 1, free_space_available # Create BHV in case blockvolume size is greater than default BHV size default_bhv_size = get_default_block_hosting_volume_size( h_node, self.heketi_dc_name) if default_bhv_size < h_volume_size: h_volume_name = "autotest-{}".format(utils.get_random_str()) bhv_info = self.create_heketi_volume_with_name_and_wait( h_volume_name, free_space_available, raise_on_cleanup_error=False, block=True, json=True) free_space_available -= ( int(bhv_info.get("blockinfo").get("reservedsize")) + 1) h_volume_size = int(free_space_available / vol_count) # Get BHV list h_bhv_list = get_block_hosting_volume_list(h_node, h_url).keys() self.assertTrue(h_bhv_list, "Failed to get the BHV list") # Get BHV size for bhv in h_bhv_list: vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bhv_vol_size_before = vol_info.get("freesize") bhv_size_before.append(bhv_vol_size_before) # Kill Tcmu-runner service services = ("tcmu-runner", "gluster-block-target", "gluster-blockd") kill_service_on_gluster_pod_or_node(ocp_node, "tcmu-runner", g_node) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') self.addCleanup(wait_for_service_status_on_gluster_pod_or_node, ocp_node, service, 'active', state, g_node) self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node, service, g_node) def run_async(cmd, hostname, raise_on_error=True): return g.run_async(host=hostname, command=cmd) # Create stale block volumes in async for count in range(vol_count): with mock.patch.object(json, 'loads', side_effect=(lambda j: j)): with mock.patch.object(command, 'cmd_run', side_effect=run_async): heketi_blockvolume_create(h_node, h_url, h_volume_size, json=True) # Wait for pending operation to get generated self._check_for_pending_operations(h_node, h_url) # Restart the services for service in services: state = ('exited' if service == 'gluster-block-target' else 'running') restart_service_on_gluster_pod_or_node(ocp_node, service, g_node) wait_for_service_status_on_gluster_pod_or_node( ocp_node, service, 'active', state, g_node) # Cleanup pending operation heketi_server_operation_cleanup(h_node, h_url) # wait for pending operation to get cleaned up for w in waiter.Waiter(timeout=120, interval=10): # Get BHV size for bhv in h_bhv_list: vol_info = heketi_volume_info(h_node, h_url, bhv, json=True) bhv_vol_size_after = vol_info.get("freesize") bhv_size_after.append(bhv_vol_size_after) if (set(bhv_size_before) == set(bhv_size_after)): break if w.expired: raise exceptions.ExecutionError( "Failed to Validate volume size Actual:{}," " Expected:{}".format(set(bhv_size_before), set(bhv_size_after)))
def test_heketi_server_stale_operations_during_heketi_pod_reboot(self): """ Validate failed/stale entries in db and performs a cleanup of those entries """ volume_id_list, async_obj, ocp_node = [], [], self.ocp_master_node[0] h_node, h_server = self.heketi_client_node, self.heketi_server_url for i in range(0, 8): volume_info = heketi_ops.heketi_volume_create(h_node, h_server, 1, json=True) volume_id_list.append(volume_info["id"]) self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server, volume_info["id"], raise_on_error=False) def run_async(cmd, hostname, raise_on_error=True): async_op = g.run_async(host=hostname, command=cmd) async_obj.append(async_op) return async_op # Temporary replace g.run with g.async_run in heketi_volume_delete # to be able to run it in background. for vol_id in volume_id_list: with mock.patch.object(command, 'cmd_run', side_effect=run_async): heketi_ops.heketi_volume_delete(h_node, h_server, vol_id) # Restart heketi pod and check pod is running heketi_pod_name = openshift_ops.get_pod_name_from_dc( ocp_node, self.heketi_dc_name) openshift_ops.oc_delete(ocp_node, 'pod', heketi_pod_name, collect_logs=self.heketi_logs_before_delete) self.addCleanup(self._heketi_pod_delete_cleanup, ocp_node) openshift_ops.wait_for_resource_absence(ocp_node, 'pod', heketi_pod_name) heketi_pod_name = openshift_ops.get_pod_name_from_dc( ocp_node, self.heketi_dc_name) openshift_ops.wait_for_pod_be_ready(ocp_node, heketi_pod_name) self.assertTrue(heketi_ops.hello_heketi(h_node, h_server), "Heketi server {} is not alive".format(h_server)) # Wait for pending operations to get generate for w in waiter.Waiter(timeout=30, interval=3): h_db_check = heketi_ops.heketi_db_check(h_node, h_server) h_db_check_vol = h_db_check.get("volumes") h_db_check_bricks = h_db_check.get("bricks") if ((h_db_check_vol.get("pending")) and (h_db_check_bricks.get("pending"))): break if w.expired: raise exceptions.ExecutionError( "No any pending operations found during volumes deletion " "volumes:{}, Bricks:{} ".format( h_db_check_vol.get("pending"), h_db_check_bricks.get("pending"))) # Verify pending bricks are multiples of 3 self.assertFalse( h_db_check_bricks.get("pending") % 3, "Expecting bricks pending count to be multiple of 3 but " "found {}".format(h_db_check_bricks.get("pending"))) # Verify and Wait for pending operations to complete for w in waiter.Waiter(timeout=120, interval=10): h_db_check = heketi_ops.heketi_db_check(h_node, h_server) h_db_check_vol = h_db_check.get("volumes") h_db_check_bricks = h_db_check.get("bricks") if ((not h_db_check_bricks.get("pending")) and (not h_db_check_vol.get("pending"))): break if w.expired: raise AssertionError("Failed to delete volumes after 120 secs")
def test_verify_create_heketi_volumes_pending_entries_in_db( self, vol_type): """Verify pending entries of file/block volumes in db during volumes creation from heketi side """ # Create large volumes to observe the pending operations vol_count, h_vol_creation_async_op = 3, [] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Verify file/block volumes pending operation before creation, h_db_check_before = heketi_db_check(h_node, h_url) h_db_check_vol_before = (h_db_check_before.get( "{}volumes".format(vol_type))) # Delete heketi pod to clean db operations if (h_db_check_vol_before.get("pending") or h_db_check_before.get("bricks").get("pending")): self._respin_heketi_pod() # Calculate heketi volume size free_space, nodenum = get_total_free_space(h_node, h_url) free_space_available = int(free_space / nodenum) if free_space_available > vol_count: h_volume_size = int(free_space_available / vol_count) if h_volume_size > 30: h_volume_size = 30 else: h_volume_size, vol_count = 1, free_space_available # Get existing heketi volume list existing_volumes = heketi_volume_list(h_node, h_url, json=True) # Add cleanup function to clean stale volumes created during test self.addCleanup(self._cleanup_heketi_volumes, existing_volumes.get("volumes")) # Create BHV in case blockvolume size is greater than default BHV size if vol_type: default_bhv_size = get_default_block_hosting_volume_size( h_node, self.heketi_dc_name) if default_bhv_size < h_volume_size: h_volume_name = "autotest-{}".format(utils.get_random_str()) bhv_info = self.create_heketi_volume_with_name_and_wait( h_volume_name, free_space_available, raise_on_cleanup_error=False, block=True, json=True) free_space_available -= ( int(bhv_info.get("blockinfo").get("reservedsize")) + 1) h_volume_size = int(free_space_available / vol_count) # Temporary replace g.run with g.async_run in heketi_blockvolume_create # func to be able to run it in background.Also, avoid parsing the # output as it won't be json at that moment. Parse it after reading # the async operation results. def run_async(cmd, hostname, raise_on_error=True): return g.run_async(host=hostname, command=cmd) for count in range(vol_count): with mock.patch.object(json, 'loads', side_effect=(lambda j: j)): with mock.patch.object(command, 'cmd_run', side_effect=run_async): h_vol_creation_async_op.append( eval("heketi_{}volume_create".format(vol_type))( h_node, h_url, h_volume_size, json=True)) # Check for pending operations for w in waiter.Waiter(timeout=120, interval=10): h_db_check = heketi_db_check(h_node, h_url) h_db_check_vol = h_db_check.get("{}volumes".format(vol_type)) if h_db_check_vol.get("pending"): h_db_check_bricks = h_db_check.get("bricks") break if w.expired: raise exceptions.ExecutionError( "No any pending operations found during {}volumes creation " "{}".format(vol_type, h_db_check_vol.get("pending"))) # Verify bricks pending operation during creation if not vol_type: self.assertTrue(h_db_check_bricks.get("pending"), "Expecting at least one bricks pending count") self.assertFalse( h_db_check_bricks.get("pending") % 3, "Expecting bricks pending count to be multiple of 3 but " "found {}".format(h_db_check_bricks.get("pending"))) # Wait for all counts of pending operations to be zero for w in waiter.Waiter(timeout=300, interval=10): h_db_check = heketi_db_check(h_node, h_url) h_db_check_vol = h_db_check.get("{}volumes".format(vol_type)) if not h_db_check_vol.get("pending"): break if w.expired: raise exceptions.ExecutionError( "Expecting no pending operations after 300 sec but " "found {} operation".format(h_db_check_vol.get("pending"))) # Get heketi server DB details h_db_check_after = heketi_db_check(h_node, h_url) h_db_check_vol_after = (h_db_check_after.get( "{}volumes".format(vol_type))) h_db_check_bricks_after = h_db_check_after.get("bricks") # Verify if initial and final file/block volumes are same act_vol_count = h_db_check_vol_after.get("total") exp_vol_count = h_db_check_vol_before.get("total") + vol_count err_msg = ( "Actual {} and expected {} {}volume counts are not matched".format( act_vol_count, exp_vol_count, vol_type)) self.assertEqual(act_vol_count, exp_vol_count, err_msg) # Verify if initial and final bricks are same for file volume volumes = heketi_volume_list(h_node, h_url, json=True).get("volumes") new_volumes = list(set(volumes) - set(existing_volumes)) exp_brick_count = 0 for volume in new_volumes: vol_info = heketi_volume_info(h_node, h_url, volume, json=True) exp_brick_count += len(vol_info.get("bricks")) err_msg = "Actual {} and expected {} bricks counts are not matched" act_brick_count = h_db_check_bricks_after.get("total") self.assertEqual(act_brick_count, exp_brick_count, err_msg.format(act_brick_count, exp_brick_count))
def test_verify_delete_heketi_volumes_pending_entries_in_db( self, vol_type): """Verify pending entries of blockvolumes/volumes and bricks in heketi db during blockvolume/volume delete operation. """ # Create a large volumes to observe the pending operation vol_count, volume_ids, async_obj = 10, [], [] h_node, h_url = self.heketi_client_node, self.heketi_server_url # Verify file/block volumes pending operation before creation, h_db_check_before = heketi_db_check(h_node, h_url) h_db_check_bricks_before = h_db_check_before.get("bricks") h_db_check_vol_before = (h_db_check_before.get( "{}volumes".format(vol_type))) # Get existing heketi volume list existing_volumes = heketi_volume_list(h_node, h_url, json=True) # Add cleanup function to clean stale volumes created during test self.addCleanup(self._cleanup_heketi_volumes, existing_volumes.get("volumes")) # Delete heketi pod to clean db operations if (h_db_check_bricks_before.get("pending") or h_db_check_vol_before.get("pending")): self._respin_heketi_pod() # Calculate heketi volume size free_space, nodenum = get_total_free_space(h_node, h_url) free_space_available = int(free_space / nodenum) if free_space_available > vol_count: h_volume_size = int(free_space_available / vol_count) if h_volume_size > 50: h_volume_size = 50 else: h_volume_size, vol_count = 1, free_space_available # Create BHV in case blockvolume size is greater than default BHV size if vol_type: default_bhv_size = get_default_block_hosting_volume_size( h_node, self.heketi_dc_name) if default_bhv_size < h_volume_size: h_volume_name = "autotest-{}".format(utils.get_random_str()) bhv_info = self.create_heketi_volume_with_name_and_wait( h_volume_name, free_space_available, raise_on_cleanup_error=False, block=True, json=True) free_space_available -= ( int(bhv_info.get("blockinfo").get("reservedsize")) + 1) h_volume_size = int(free_space_available / vol_count) # Create file/block volumes for _ in range(vol_count): vol_id = eval("heketi_{}volume_create".format(vol_type))( h_node, h_url, h_volume_size, json=True).get("id") volume_ids.append(vol_id) self.addCleanup(eval("heketi_{}volume_delete".format(vol_type)), h_node, h_url, vol_id, raise_on_error=False) def run_async(cmd, hostname, raise_on_error=True): async_op = g.run_async(host=hostname, command=cmd) async_obj.append(async_op) return async_op bhv_list = [] for vol_id in volume_ids: # Get BHV ids to delete in case of block volumes if vol_type: vol_info = (heketi_blockvolume_info(h_node, h_url, vol_id, json=True)) if not vol_info.get("blockhostingvolume") in bhv_list: bhv_list.append(vol_info.get("blockhostingvolume")) # Temporary replace g.run with g.async_run in heketi_volume_delete # and heketi_blockvolume_delete func to be able to run it in # background. with mock.patch.object(command, 'cmd_run', side_effect=run_async): eval("heketi_{}volume_delete".format(vol_type))(h_node, h_url, vol_id) # Wait for pending operations to get generate for w in waiter.Waiter(timeout=30, interval=3): h_db_check = heketi_db_check(h_node, h_url) h_db_check_vol = h_db_check.get("{}volumes".format(vol_type)) if h_db_check_vol.get("pending"): h_db_check_bricks = h_db_check.get("bricks") break if w.expired: raise exceptions.ExecutionError( "No any pending operations found during {}volumes deletion " "{}".format(vol_type, h_db_check_vol.get("pending"))) # Verify bricks pending operation during creation if not vol_type: self.assertTrue(h_db_check_bricks.get("pending"), "Expecting at least one bricks pending count") self.assertFalse( h_db_check_bricks.get("pending") % 3, "Expecting bricks pending count to be multiple of 3 but " "found {}".format(h_db_check_bricks.get("pending"))) # Verify file/block volume pending operation during delete for w in waiter.Waiter(timeout=120, interval=10): h_db_check = heketi_db_check(h_node, h_url) h_db_check_vol = h_db_check.get("{}volumes".format(vol_type)) h_db_check_bricks = h_db_check.get("bricks") if ((not h_db_check_bricks.get("pending")) and (not h_db_check_vol.get("pending"))): break if w.expired: raise AssertionError( "Failed to delete {}volumes after 120 secs".format(vol_type)) # Check that all background processes got exited for obj in async_obj: ret, out, err = obj.async_communicate() self.assertFalse( ret, "Failed to delete {}volume due to error: {}".format( vol_type, err)) # Delete BHV created during block volume creation if vol_type: for bhv_id in bhv_list: heketi_volume_delete(h_node, h_url, bhv_id) # Verify bricks and volume pending operations h_db_check_after = heketi_db_check(h_node, h_url) h_db_check_bricks_after = h_db_check_after.get("bricks") h_db_check_vol_after = (h_db_check_after.get( "{}volumes".format(vol_type))) act_brick_count = h_db_check_bricks_after.get("pending") act_vol_count = h_db_check_vol_after.get("pending") # Verify bricks pending operation after delete err_msg = "{} operations are pending for {} after {}volume deletion" if not vol_type: self.assertFalse( act_brick_count, err_msg.format(act_brick_count, "brick", vol_type)) # Verify file/bock volumes pending operation after delete self.assertFalse(act_vol_count, err_msg.format(act_vol_count, "volume", vol_type)) act_brick_count = h_db_check_bricks_after.get("total") act_vol_count = h_db_check_vol_after.get("total") exp_brick_count = h_db_check_bricks_before.get("total") exp_vol_count = h_db_check_vol_before.get("total") err_msg = "Actual {} and expected {} {} counts are not matched" # Verify if initial and final file/block volumes are same self.assertEqual( act_vol_count, exp_vol_count, err_msg.format(act_vol_count, exp_vol_count, "volume")) # Verify if initial and final bricks are same self.assertEqual( act_brick_count, exp_brick_count, err_msg.format(act_brick_count, exp_brick_count, "brick"))
def test_targetcli_when_block_hosting_volume_down(self): """Validate no inconsistencies occur in targetcli when block volumes are created with one block hosting volume down.""" h_node, h_server = self.heketi_client_node, self.heketi_server_url cmd = ("targetcli ls | egrep '%s' || echo unavailable") error_msg = ("targetcli has inconsistencies when block devices are " "created with one block hosting volume %s is down") # Delete BHV which has no BV or fill it completely bhv_list = get_block_hosting_volume_list(h_node, h_server).keys() for bhv in bhv_list: bhv_info = heketi_volume_info(h_node, h_server, bhv, json=True) if not bhv_info["blockinfo"].get("blockvolume", []): heketi_volume_delete(h_node, h_server, bhv) continue free_size = bhv_info["blockinfo"].get("freesize", 0) if free_size: bv = heketi_volume_create(h_node, h_server, free_size, json=True) self.addCleanup(heketi_volume_delete, h_node, h_server, bv["id"]) # Create BV bv = heketi_blockvolume_create(h_node, h_server, 2, json=True) self.addCleanup(heketi_blockvolume_delete, h_node, h_server, bv["id"]) # Bring down BHV bhv_name = get_block_hosting_volume_name(h_node, h_server, bv["id"]) ret, out, err = volume_stop("auto_get_gluster_endpoint", bhv_name) if ret != 0: err_msg = "Failed to stop gluster volume %s. error: %s" % ( bhv_name, err) g.log.error(err_msg) raise AssertionError(err_msg) self.addCleanup(podcmd.GlustoPod()(volume_start), "auto_get_gluster_endpoint", bhv_name) ocp_node = self.ocp_master_node[0] gluster_block_svc = "gluster-block-target" self.addCleanup(wait_for_service_status_on_gluster_pod_or_node, ocp_node, gluster_block_svc, "active", "exited", gluster_node=self.gluster_servers[0]) self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node, gluster_block_svc, self.gluster_servers[0]) for condition in ("continue", "break"): restart_service_on_gluster_pod_or_node( ocp_node, gluster_block_svc, gluster_node=self.gluster_servers[0]) wait_for_service_status_on_gluster_pod_or_node( ocp_node, gluster_block_svc, "active", "exited", gluster_node=self.gluster_servers[0]) targetcli = cmd_run_on_gluster_pod_or_node(ocp_node, cmd % bv["id"], self.gluster_servers[0]) if condition == "continue": self.assertEqual(targetcli, "unavailable", error_msg % bhv_name) else: self.assertNotEqual(targetcli, "unavailable", error_msg % bhv_name) break # Bring up the same BHV ret, out, err = volume_start("auto_get_gluster_endpoint", bhv_name) if ret != 0: err = "Failed to start gluster volume %s on %s. error: %s" % ( bhv_name, h_node, err) raise exceptions.ExecutionError(err)
def test_heketi_prometheus_usedbytes_brickcount_on_device_delete( self, operation): """Validate used bytes,device count on heketi and prometheus""" h_node, h_server = self.heketi_client_node, self.heketi_server_url # Get list of additional devices for one of the Gluster nodes gluster_server_0 = list(self.gluster_servers_info.values())[0] manage_hostname = gluster_server_0.get("manage") self.assertTrue( manage_hostname, "IP Address is not specified for " "node {}".format(gluster_server_0)) device_name = gluster_server_0.get("additional_devices")[0] self.assertTrue( device_name, "Additional devices are not specified for " "node {}".format(gluster_server_0)) # Get node ID of the Gluster hostname node_list = heketi_ops.heketi_topology_info( h_node, h_server, json=True).get("clusters")[0].get("nodes") self.assertTrue( node_list, "Cluster info command returned empty list of nodes") node_id = [ node.get("id") for node in node_list if manage_hostname == node.get("hostnames").get("manage")[0]] self.assertTrue( node_id, "Failed to get node_id for {}".format(manage_hostname)) node_id = node_id[0] # Adding heketi device heketi_ops.heketi_device_add(h_node, h_server, device_name, node_id) node_info_after_addition = heketi_ops.heketi_node_info( h_node, h_server, node_id, json=True) device_id, bricks = None, None for device in node_info_after_addition.get("devices"): if device.get("name") == device_name: device_id, bricks = ( device.get("id"), len(device.get("bricks"))) break # Verify zero bricks on the device msg = ( "Number of bricks on the device {} of the nodes should be" "zero".format(device_name)) self.assertFalse(bricks, msg) self.addCleanup( heketi_ops.heketi_device_delete, h_node, h_server, device_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_device_remove, h_node, h_server, device_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_device_disable, h_node, h_server, device_id, raise_on_error=False) # Disable,Remove and Delete heketi device heketi_ops.heketi_device_disable(h_node, h_server, device_id) heketi_ops.heketi_device_remove(h_node, h_server, device_id) heketi_ops.heketi_device_delete(h_node, h_server, device_id) # Verify device deletion node_info_after_deletion = ( heketi_ops.heketi_node_info(h_node, h_server, node_id)) msg = ("Device {} should not be shown in node info of the node {}" "after the device deletion".format(device_id, node_id)) self.assertNotIn(device_id, node_info_after_deletion, msg) if operation == "usedbytes": # Validate heketi and prometheus device used bytes for w in waiter.Waiter(timeout=60, interval=10): device_used_bytes_prometheus = 0 device_used_bytes_metrics = 0 openshift_ops.switch_oc_project( self.ocp_master_node[0], 'openshift-monitoring') metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_device_used_bytes') for result in metric_result: if (node_id == result.get('cluster') and device_name == result.get('device')): device_used_bytes_prometheus += ( int(result.get('value')[1])) openshift_ops.switch_oc_project( self.ocp_master_node[0], 'glusterfs') metrics = heketi_ops.get_heketi_metrics(h_node, h_server) heketi_device_count_metric = ( metrics.get('heketi_device_used_bytes')) for result in heketi_device_count_metric: if (node_id == result.get('cluster') and device_name == result.get('device')): device_used_bytes_metrics = int(result.get('value')) if device_used_bytes_prometheus == device_used_bytes_metrics: break if w.expired: raise exceptions.ExecutionError( "Failed to update device details in prometheus") elif operation == "brickcount": # Validate heketi and prometheus device brick count for w in waiter.Waiter(timeout=60, interval=10): device_brick_count_prometheus = 0 device_brick_count_metrics = 0 metrics = heketi_ops.get_heketi_metrics(h_node, h_server) heketi_device_count_metric = metrics.get( 'heketi_device_brick_count') for result in heketi_device_count_metric: device_brick_count_metrics += int(result.get('value')) openshift_ops.switch_oc_project( self.ocp_master_node[0], 'openshift-monitoring') metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_device_brick_count') for result in metric_result: device_brick_count_prometheus += ( int(result.get('value')[1])) if device_brick_count_prometheus == device_brick_count_metrics: break if w.expired: raise exceptions.ExecutionError( "Failed to update device details in prometheus")
def test_verify_pending_entries_in_db(self): """Verify pending entries of volumes and bricks in db during volume creation from heketi side """ h_volume_size = 100 h_db_chk_bfr_v_creation = heketi_db_check(self.heketi_client_node, self.heketi_server_url) if (h_db_chk_bfr_v_creation["bricks"]["pending"] != 0 or h_db_chk_bfr_v_creation["volumes"]["pending"] != 0): self.skipTest( "Skip TC due to unexpected bricks/volumes pending operations") # Verify bricks and volume pending operation before creation self.assertEqual(h_db_chk_bfr_v_creation["bricks"]["pending"], 0) self.assertEqual(h_db_chk_bfr_v_creation["volumes"]["pending"], 0) # Temporary replace g.run with g.async_run in heketi_volume_create func # to be able to run it in background.Also, avoid parsing the output as # it won't be json at that moment. Parse it after reading the async # operation results. def run_async(cmd, hostname, raise_on_error=True): return g.run_async(host=hostname, command=cmd) with mock.patch.object(json, 'loads', side_effect=(lambda j: j)): with mock.patch.object(command, 'cmd_run', side_effect=run_async): h_vol_creation_async_op = heketi_volume_create( self.heketi_client_node, self.heketi_server_url, h_volume_size, json=True) for w in waiter.Waiter(timeout=5, interval=1): h_db_chk_during_v_creation = heketi_db_check( self.heketi_client_node, self.heketi_server_url) if h_db_chk_during_v_creation["bricks"]["pending"] != 0: break if w.expired: err_msg = "No pending operation in Heketi db" g.log.error(err_msg) raise exceptions.ExecutionError(err_msg) retcode, stdout, stderr = h_vol_creation_async_op.async_communicate() heketi_vol = json.loads(stdout) volume_id = heketi_vol["id"] self.addCleanup(heketi_volume_delete, self.heketi_client_node, self.heketi_server_url, volume_id, raise_on_error=True) # Verify volume pending operation during creation self.assertFalse(h_db_chk_during_v_creation["bricks"]["pending"] % 3) self.assertEqual(h_db_chk_bfr_v_creation["volumes"]["pending"] + 1, h_db_chk_during_v_creation["volumes"]["pending"]) h_db_chk_after_v_creation = heketi_db_check(self.heketi_client_node, self.heketi_server_url) # Verify bricks and volume pending operation after creation self.assertEqual(h_db_chk_after_v_creation["bricks"]["pending"], 0) self.assertEqual(h_db_chk_after_v_creation["volumes"]["pending"], 0)
def test_heketi_metrics_validation_after_node(self, condition): """Validate heketi metrics after adding and remove node""" # Get additional node additional_host_info = g.config.get("additional_gluster_servers") if not additional_host_info: self.skipTest( "Skipping this test case as additional gluster server is " "not provied in config file") additional_host_info = list(additional_host_info.values())[0] storage_hostname = additional_host_info.get("manage") storage_ip = additional_host_info.get("storage") if not (storage_hostname and storage_ip): self.skipTest( "Config options 'additional_gluster_servers.manage' " "and 'additional_gluster_servers.storage' must be set.") h_client, h_server = self.heketi_client_node, self.heketi_server_url initial_node_count, final_node_count = 0, 0 # Get initial node count from prometheus metrics metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') initial_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) # Switch to storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Configure node before adding node self.configure_node_to_run_gluster(storage_hostname) # Get cluster list cluster_info = heketi_ops.heketi_cluster_list( h_client, h_server, json=True) # Add node to the cluster heketi_node_info = heketi_ops.heketi_node_add( h_client, h_server, len(self.gluster_servers), cluster_info.get('clusters')[0], storage_hostname, storage_ip, json=True) heketi_node_id = heketi_node_info.get("id") self.addCleanup( heketi_ops.heketi_node_delete, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_remove, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( heketi_ops.heketi_node_disable, h_client, h_server, heketi_node_id, raise_on_error=False) self.addCleanup( openshift_ops.switch_oc_project, self._master, self.storage_project_name) if condition == 'delete': # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get initial node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if node_count != initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to get updated node details from prometheus") # Remove node from cluster heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id) heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id) for device in heketi_node_info.get('devices'): heketi_ops.heketi_device_delete( h_client, h_server, device.get('id')) heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id) # Switch to openshift-monitoring project openshift_ops.switch_oc_project( self.ocp_master_node[0], self._prometheus_project_name) # Get final node count from prometheus metrics for w in waiter.Waiter(timeout=60, interval=10): metric_result = self._fetch_metric_from_promtheus_pod( metric='heketi_nodes_count') final_node_count = reduce( lambda x, y: x + y, [result.get('value')[1] for result in metric_result]) if condition == 'delete': if final_node_count < node_count: break else: if final_node_count > initial_node_count: break if w.expired: raise exceptions.ExecutionError( "Failed to update node details in prometheus")
def test_heketi_metrics_validation_with_node_reboot(self): """Validate heketi metrics after node reboot using prometheus""" initial_metrics, final_metrics = {}, {} # Use storage project openshift_ops.switch_oc_project( self._master, self.storage_project_name) # Get initial metrics result h_node, h_server = self.heketi_client_node, self.heketi_server_url initial_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Use prometheus project openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) # Get initial prometheus result initial_prometheus = self._get_and_manipulate_metric_data( self.metrics) # Get hosted node IP of heketi pod openshift_ops.switch_oc_project( self._master, self.storage_project_name) heketi_pod = openshift_ops.get_pod_name_from_dc( self._master, self.heketi_dc_name) heketi_node = openshift_ops.oc_get_custom_resource( self._master, 'pod', '.:spec.nodeName', heketi_pod)[0] # Reboot the node on which heketi pod is scheduled self.addCleanup( self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node) node_ops.node_reboot_by_command(heketi_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self._master, 'node', custom, heketi_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring down node {}".format(heketi_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node) # Use prometheus project openshift_ops.switch_oc_project( self._master, self._prometheus_project_name) # Get final metrics result final_metrics = tuple( heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0] for metric in self.metrics) # Get final prometheus result final_prometheus = self._get_and_manipulate_metric_data( self.metrics) err_msg = "Initial value {} is not same as final value {}" self.assertEqual( initial_metrics, final_metrics, err_msg.format( initial_metrics, final_metrics)) self.assertEqual( initial_prometheus, final_prometheus, err_msg.format( initial_prometheus, final_prometheus))
def test_heketi_server_db_pending_entries_for_volume_operations( self, vol_type): """Verify pending entries of blockvolumes/volumes and bricks in db during heketi blockvolume/volume delete operation. """ # Create a large volumes to observe the pending operation h_volume_size, volume_ids, async_obj = 95, [], [] h_node, h_url = self.heketi_client_node, self.heketi_server_url h_db_check_before = heketi_db_check(h_node, h_url) h_db_check_bricks_before = h_db_check_before["bricks"] h_db_check_vol_before = h_db_check_before["{}volumes".format(vol_type)] # Check file/block volume pending operations before creation. if h_db_check_vol_before["pending"]: self.skipTest( "Skip TC due to unexpected {}volumes pending operations". format(vol_type)) # Check bricks pending operations before creation. if h_db_check_bricks_before["pending"]: self.skipTest( "Skip TC due to unexpected bricks pending operations for" " {}volume".format(vol_type)) # Create 5 file/block volumes to find out pending operations for count in range(5): vol_info = eval("heketi_{}volume_create".format(vol_type))( h_node, h_url, h_volume_size, json=True) volume_ids.append(vol_info["id"]) self.addCleanup(eval("heketi_{}volume_delete".format(vol_type)), h_node, h_url, vol_info["id"], raise_on_error=False) h_db_check_after = heketi_db_check(h_node, h_url) h_db_check_bricks_after = h_db_check_after["bricks"] h_db_check_vol_after = h_db_check_after["{}volumes".format( vol_type)] # Verify file/block volumes pending operation after creation err_msg = ("Expecting heketi db {}volume pending operation to be " "0 but found {}") self.assertFalse( h_db_check_vol_after["pending"], err_msg.format(vol_type, h_db_check_vol_after["pending"])) # Verify bricks pending operation after volume creation err_msg = ("Expecting heketi db bricks pending operation to be " "0 but found {} after {}volume creation") self.assertFalse( h_db_check_bricks_after["pending"], err_msg.format(h_db_check_bricks_after["pending"], vol_type)) def run_async(cmd, hostname, raise_on_error=True): async_op = g.run_async(host=hostname, command=cmd) async_obj.append(async_op) return async_op for vol_id in volume_ids: # Temporary replace g.run with g.async_run in heketi_volume_delete # and heketi_blockvolume_delete func to be able to run it in # background. with mock.patch.object(command, 'cmd_run', side_effect=run_async): eval("heketi_{}volume_delete".format(vol_type))(h_node, h_url, vol_id) for w in waiter.Waiter(timeout=10, interval=1): h_db_check = heketi_db_check(h_node, h_url) h_db_check_bricks = h_db_check["bricks"] h_db_check_vol = h_db_check["{}volumes".format(vol_type)] if h_db_check_vol["pending"] != 0: break if w.expired: err_msg = ("Expected some pending operations found {} operation" " for {}volume in Heketi db") g.log.error(err_msg.format(h_db_check_vol["pending"], vol_type)) raise exceptions.ExecutionError( err_msg.format(h_db_check_vol["pending"], vol_type)) # Verify pending operation during file/block volumes delete err_msg = ("Expecting pending operations for {}volume during" " deletion") self.assertTrue(h_db_check_vol["pending"], err_msg.format(vol_type)) # Verify brick pending operation during delete err_msg = ("Expecting bricks pending in multiple of 3 but found {}") if vol_type == '': self.assertFalse(h_db_check_bricks["pending"] % 3, err_msg.format(h_db_check_bricks["pending"])) # Verify volume/blockvolume pending operation during delete for w in waiter.Waiter(timeout=100, interval=5): h_db_check_vol_after = heketi_db_check(h_node, h_url) h_db_check_bricks_after = h_db_check_vol_after["bricks"] h_db_check_vol_after = h_db_check_vol_after["{}volumes".format( vol_type)] # verify if file/block volumes and bricks are properly deleted if (((not vol_type) and (not h_db_check_bricks_after["pending"])) or (not h_db_check_vol_after["pending"])): break if w.expired: err_msg = ("Failed to delete {}volumes after waiting for 100 secs") raise exceptions.AssertionError(err_msg.format(vol_type)) # Check that all background processes got exited for obj in async_obj: ret, out, err = obj.async_communicate() self.assertFalse(ret, err) # Verify bricks pending operation after delete if vol_type == "": err_msg = ("Expecting 0 bricks pending operations after deletion" " but found {} after {}volume deletion") self.assertFalse( h_db_check_bricks_after["pending"], err_msg.format(h_db_check_bricks_after["pending"], vol_type)) # Verify volumes/bockvolumes pending operation after delete err_msg = ("Expecting 0 {}volume pending operations after deletion" " but found {}") self.assertFalse( h_db_check_vol_after["pending"], err_msg.format(vol_type, h_db_check_vol_after["pending"])) # Verify if initial and final volumes/blockvolumes are same err_msg = ( "Total volume before {} and after {} creation not matched".format( h_db_check_vol_after["total"], h_db_check_vol_before["total"])) self.assertEqual(h_db_check_vol_after["total"], h_db_check_vol_before["total"], err_msg) # Verify if initial and final bricks are same err_msg = ( "Total bricks before {} and after {} creation not matched".format( h_db_check_bricks_after["total"], h_db_check_bricks_before["total"])) self.assertEqual(h_db_check_bricks_after["total"], h_db_check_bricks_before["total"], err_msg)
def test_udev_usage_in_container(self): """Validate LVM inside container does not use udev""" # Skip the TC if independent mode deployment if not self.is_containerized_gluster(): self.skipTest("Skipping this test case as it needs to run on " "converged mode deployment") h_client, h_url = self.heketi_client_node, self.heketi_server_url server_info = list(g.config.get('gluster_servers').values())[0] server_node = server_info.get('manage') additional_device = server_info.get('additional_devices')[0] # command to run pvscan cmd_pvscan = "timeout 300 pvscan" # Get pod name from on host for pod_info in self.pod_name: if pod_info.get('pod_hostname') == server_node: pod_name = pod_info.get('pod_name') break # Create file volume vol_info = heketi_ops.heketi_volume_create(h_client, h_url, self.volume_size, json=True) self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_url, vol_info.get("id")) # Create block volume block_vol_info = heketi_ops.heketi_blockvolume_create(h_client, h_url, self.volume_size, json=True) self.addCleanup(heketi_ops.heketi_blockvolume_delete, h_client, h_url, block_vol_info.get("id")) # Check dmeventd service in container err_msg = "dmeventd.service is running on setup" with self.assertRaises(AssertionError, msg=err_msg): openshift_ops.oc_rsh(self.oc_node, pod_name, "systemctl is-active dmeventd.service") # Service dmeventd should not be running in background with self.assertRaises(AssertionError, msg=err_msg): openshift_ops.oc_rsh(self.oc_node, pod_name, "ps aux | grep dmeventd.service") # Perform a pvscan in contaier openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan) # Get heketi node to add new device heketi_node_list = heketi_ops.heketi_node_list(h_client, h_url) for h_node_id in heketi_node_list: h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) h_node_host = h_node_info.get('hostnames', {}).get('manage')[0] if h_node_host == server_node: break # Add new device to the node heketi_ops.heketi_device_add(h_client, h_url, additional_device, h_node_id) h_node_info = heketi_ops.heketi_node_info(h_client, h_url, h_node_id, json=True) h_device_id = [ device.get('id') for device in h_node_info.get('devices') if device.get('name') == additional_device ] self.addCleanup(heketi_ops.heketi_device_delete, h_client, h_url, h_device_id[0]) self.addCleanup(heketi_ops.heketi_device_remove, h_client, h_url, h_device_id[0]) self.addCleanup(heketi_ops.heketi_device_disable, h_client, h_url, h_device_id[0]) # Reboot the node on which device is added self.addCleanup(self._check_heketi_and_gluster_pod_after_node_reboot, server_node) node_ops.node_reboot_by_command(server_node) # Wait node to become NotReady custom = r'":.status.conditions[?(@.type==\"Ready\")]".status' for w in waiter.Waiter(300, 10): status = openshift_ops.oc_get_custom_resource( self.oc_node, 'node', custom, server_node) if status[0] == 'False': break if w.expired: raise exceptions.ExecutionError( "Failed to bring node down {}".format(server_node)) # Wait for node to become ready openshift_ops.wait_for_ocp_node_be_ready(self.oc_node, server_node) # Wait for heketi and glusterfs pod to become ready self._check_heketi_and_gluster_pod_after_node_reboot(server_node) # Perform a pvscan in contaier openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan)