def get_kver_ktree(ktree, split=False, proc=None, makecmd=None): """ Get version of the kernel in the kernel sources directory 'ktree'. The 'ktree' directory must contain an already configured kernel or it should be path to the kernel build directory if the kernel was compliled out of tree (make O=<ktree>). By default this function runs the 'make -C <ktree> --quiet -- kernelrelease' command to get the kernel version. However, you can use the 'makecmd' argument to verride the 'make -C <ktree>' part of it. The 'split' and 'proc' arguments are the same as in 'get_kver()'. """ if not proc: proc = Procs.Proc() if not makecmd: makecmd = "make -C '%s'" % ktree cmd = makecmd + " --quiet -- kernelrelease" try: kver = proc.run_verify(cmd)[0].strip() except proc.Error as err: raise Error("cannot detect kernel version in '%s':\n%s\nMake sure kernel sources are " "configured." % (ktree, err)) from err if split: return split_kver(kver) return kver
def find_processes(regex: str, proc=None): """ Find all processes which match the 'regex' regular expression on the host defined by 'proc'. The regular expression is matched against the process executable name + command-line arguments. By default this function operates on the local host, but the 'proc' argument can be used to pass a connected 'SSH' object in which case this function will operate on the remote host. Returns a list of tuples containing the PID and the command line. """ if not proc: proc = Procs.Proc() cmd = "ps axo pid,args" stdout, stderr = proc.run_verify(cmd, join=False) if len(stdout) < 2: raise Error( f"no processes found at all{proc.hostmsg}\nExecuted this command:\n{cmd}\n" f"stdout:\n{stdout}\nstderr:{stderr}\n") procs = [] for line in stdout[1:]: pid, comm = line.strip().split(" ", 1) pid = int(pid) if proc.hostname == "localhost" and pid == Trivial.get_pid(): continue if re.search(regex, comm): procs.append((int(pid), comm)) return procs
def get_kver_bin(path, split=False, proc=None): """ Get version of a kernel binary at 'path'. The 'split' and 'proc' arguments are the same as in 'get_kver()'. """ if not proc: proc = Procs.Proc() cmd = f"file -- {path}" stdout = proc.run_verify(cmd)[0].strip() msg = f"ran this command: {cmd}, got output:\n{stdout}" matchobj = re.match(r".* Linux kernel.* executable .*", stdout) if not matchobj: raise Error(f"file at '{path}'{proc.hostmsg} is not a Linux kernel binary file\n{msg}") matchobj = re.match(r".* version ([^ ]+) .*", stdout) if not matchobj: raise Error(f"{msg}\nFailed to find kernel version in the output.") kver = matchobj.group(1) if split: return split_kver(kver) return kver
def get_proc(args, hostname): """Returns an "SSH" or 'Procs' object for host 'hostname'.""" if hostname == "localhost": return Procs.Proc() return SSH.SSH(hostname=hostname, username=args.username, privkeypath=args.privkey, timeout=args.timeout)
def __init__(self, netif, tc_bin="tc", handover_delta=500000, phc2sys_bin="phc2sys", proc=None): """ Class constructor. The arguments are as follows. * netif - the 'NetIface' object of network device used for measurements. * tc_bin - path to the 'tc' tool that should be used for setting up the ETF qdisc. * handover_delta - the qdisc delta - the time offset in microseconds when the qdisc hands the packet over to the network driver. * phc2sys_bin - path to the 'phc2sys' tool that will be run in background and periodically synchronize the host and NIC clock. * proc - the 'Proc' or 'SSH' object that defines the host to configure the ETF qdisc on (default is the local host). This object will keep a 'proc' reference and use it in various methods. Note about phc2sys. When ETF qdisc offloads packets to the NIC, it is important to keep host and NIC times in sync, because Linux kernel API accept absolute time values to send the packets at, and these values are passed down to the NIC as-is. If NIC's time is different, the packets will be sent at incorrect time or just dropped causing errors like "missing deadline". """ self._proc = proc self._netif = netif self._ifname = netif.ifname self._close_proc = proc is None self._tchk = None self._tc_bin = None self._phc2sys_bin = None self._phc2sys_proc = None self._handover_delta = None self._old_tc_err_msg = None if not self._proc: self._proc = Procs.Proc() self._handover_delta = int(handover_delta * 1000) self._tchk = ToolChecker.ToolChecker(proc=self._proc) self._tc_bin = self._tchk.check_tool(tc_bin) self._phc2sys_bin = self._tchk.check_tool(phc2sys_bin) self._old_tc_err_msg = f"the 'tc' tool installed{self._proc.hostmsg} is not new enough " \ f"and does not support the ETF qdisc.\nPlease, install 'tc' " \ f"version '181023' or greater.\nThe 'tc' tool is part of the " \ f"'iproute2' project. Run 'tc -V' to check its version."
def _get_pyhelper_dependencies(script_path): """ Find and return a python helper script (pyhelper) dependencies. Only wult module dependencies are returned. An example of such a dependency would be: /usr/lib/python3.9/site-packages/helperlibs/Trivial.py """ # All pyhelpers implement the '--print-module-paths' option, which prints the dependencies. cmd = f"{script_path} --print-module-paths" stdout, _ = Procs.Proc().run_verify(cmd) return [Path(path) for path in stdout.splitlines()]
def __init__(self, proc=None): """Class constructor.""" if not proc: proc = Procs.Proc() self._proc = proc self._lspci_bin = "lspci" if not FSHelpers.which(self._lspci_bin, default=None, proc=proc): raise ErrorNotSupported( f"the '{self._lspci_bin}' tool is not installed{proc.hostmsg}")
def _setup_stats_collect_ssh_forwarding(self): """ This is a helper function for '_start_stats_collect()' which sets up an SSH forwarding between local host and the SUT. 'Statsd' always listens on a Unix socket, which means that we cannot directly connect to it when 'stats-collect' runs on a remote host. Therefore, we create an SSH tunnel which will forward TCP stream between a local TCP port the remote Unix socket. """ proc = self._proc self._ssht_port = RemoteHelpers.get_free_port() self._sc_id = f"{self._ssht_port}:{proc.hostname}:{self._uspath}" ssh_opts = proc.get_ssh_opts() cmd = f"ssh -L {self._ssht_port}:{self._uspath} -N {ssh_opts} {proc.hostname}" self._ssht = Procs.Proc().run_async(cmd) # Wait the tunnel to get established. start_time = time.time() timeout = max(proc.connection_timeout, 5) msg = f"failed to establish SSH tunnel between localhost and {proc.hostname} " \ f"with this command:\n{cmd}" while time.time() - start_time <= timeout: _LOG.debug("trying to connect to localhost:%s", self._ssht_port) stdout, stderr, exitcode = self._ssht.wait_for_cmd( timeout=1, capture_output=True) # pylint: disable=no-member if exitcode is not None: raise Error( proc.cmd_failed_msg(cmd, stdout, stderr, exitcode, startmsg=msg)) try: self._connect() except Error: pass else: self._disconnect() return raise Error( f"{msg}\nTried for {timeout} seconds, but could not connect to " f"localhost:{self._ssht_port}\nCheck '{self._logpath}'{proc.hostmsg} for " f"details")
def _get_deployables(srcpath, proc=None): """ Returns the list of "deployables" (driver names or helper tool names) provided by tools or drivers source code directory 'srcpath' on a host defined by 'proc'. """ if not proc: proc = Procs.Proc() cmd = f"make --silent -C '{srcpath}' list_deployables" deployables, _ = proc.run_verify(cmd) if deployables: deployables = Trivial.split_csv_line(deployables, sep=" ") return deployables
def __init__(self, proc=None): """ Initialize a class instance for the host associated with the 'proc' object. By default it is is going to be the local host, but 'proc' can be used to pass a connected 'SSH' object, in which case all operation will be done on the remote host. This object will keep a 'proc' reference and use it in various methods. """ if not proc: proc = Procs.Proc() self._proc = proc self._saved_managed = {} if not FSHelpers.which("nmcli", default=None, proc=proc): raise ErrorNotSupported( f"the 'nmcli' tool is not installed{proc.hostmsg}")
def __init__(self, sutname, outdir=None, sc_path=None): """ Initialize a class instance. The 'sutname' argument is name of the SUT to collect the statistics for. This string will be passed over to 'stats-collect' and will affect its messages. It will also be used fo distinguishing between multiple 'stats-collect' processes. This name will not be used for connecting to the SUT. The other arguments are the same as in 'StatsCollect.__init__()'. """ # Call the base class constructor. super().__init__(Procs.Proc(), sutname, outdir=outdir, sc_path=sc_path) # Cleanup 'self.stinfo' by removing in-band statistics. for stname in list(self.stinfo): if self.stinfo[stname]["inband"]: del self.stinfo[stname]
def get_kver(split=False, proc=None): """ Return version of the kernel running on the host associated with the 'proc' object. By default it is the local system. But one can pass a connect 'SSH' object via 'proc' in order to get the version of the kernel running on a remote system. By default this function returns the kernel version string (e.g., "4.18.1-build0"), but if 'split' is 'True', this function returns the split kernel version (refer to 'split_kver()' for the details). """ if not proc: proc = Procs.Proc() kver = proc.run_verify("uname -r")[0].strip() if split: return split_kver(kver) return kver
def kill_processes(regex: str, sig: str = "SIGTERM", log: bool = False, name: str = None, proc=None): """ Kill or signal all processes matching the 'regex' regular expression on the host defined by 'proc'. The regular expression is matched against the process executable name + command-line arguments. By default the processes are killed (SIGTERM), but you can specify any signal either by name or by number. If 'log' is 'True', then this function also prints a message which includes the PIDs of the processes which are going to be killed. The 'name' argument is a human readable name of the processes which are being killed - this name will be part of the printed message. By default this function operates on the local host, but the 'proc' argument can be used to pass a connected 'SSH' object in which case this function will operate on the remote host. Returns the list of found and killed processes. """ if not proc: proc = Procs.Proc() procs = find_processes(regex, proc=proc) if not procs: return [] if not name: name = "the following process(es)" pids = [pid for pid, _ in procs] if log: pids_str = ", ".join([str(pid) for pid in pids]) _LOG.info("Sending '%s' signal to %s%s, PID(s): %s", sig, name, proc.hostmsg, pids_str) killing = _is_sigterm(sig) or _is_sigkill(sig) kill_pids(pids, sig=sig, kill_children=killing, proc=proc) return procs
def __init__(self, ifid, proc=None): """ Initialize a class instance network interface corresponding to 'ifid' on the host associated with the 'proc' object. The 'ifid' argumen can be either the network interface name or its hardware address (e.g., the PCI address of the network card corresponding to the network interface). By default this class is intialized for the local host, but 'proc' can be used to pass a connected 'SSH' object, in which case all operation will be done on the remote host. This object will keep a 'proc' reference and use it in various methods. """ if not proc: proc = Procs.Proc() self._ifid = ifid self._proc = proc self.ifname = None self.hwaddr = None self._sysfsbase = None self._saved_ip_info = {} self._ip_tool_present = None sysfsbase = _SYSFSBASE.joinpath(ifid) if FSHelpers.isdir(sysfsbase, proc=proc): # 'ifid' is a network interface name. self.ifname = ifid self._sysfsbase = sysfsbase self.hwaddr = self._get_hw_addr() else: # 'ifid' is probably a HW address (e.g., PCI address). self.ifname = self._hw_addr_to_ifname() if not self.ifname: self._raise_iface_not_found() self.hwaddr = ifid self._sysfsbase = _SYSFSBASE.joinpath(self.ifname)
def is_deploy_needed(proc, toolname, helpers=None, pyhelpers=None): """ Wult and other tools require additional helper programs and drivers to be installed on the SUT. This function tries to analyze the SUT and figure out whether drivers and helper programs are present and up-to-date. Returns 'True' if re-deployment is needed, and 'False' otherwise. This function works by simply matching the modification date of sources and binaries for every required helper and driver. If sources have later date, then re-deployment is probably needed. * proc - the 'Proc' or 'SSH' object associated with the SUT. * toolname - name of the tool to check the necessity of deployment for (e.g., "wult"). o helpers - list of helpers required to be deployed on the SUT. o pyhelpers - list of python helpers required to be deployed on the SUT. """ def get_newest_mtime(paths): """ Scan list of paths 'paths', find and return the most recent modification time (mtime) among files in 'path' and (in case 'path' is irectory) every file under 'path'. """ newest = 0 for path in paths: if not path.is_dir(): mtime = path.stat().st_mtime if mtime > newest: newest = mtime else: for root, _, files in os.walk(path): for file in files: mtime = Path(root, file).stat().st_mtime if mtime > newest: newest = mtime if not newest: paths_str = "\n* ".join([str(path) for path in paths]) raise Error(f"no files found in the following paths:\n{paths_str}") return newest def deployable_not_found(what): """Called when a helper of driver was not found on the SUT to raise an exception.""" err = f"{what} was not found on {proc.hostmsg}. Please, run:\n{toolname} deploy" if proc.is_remote: err += f" -H {proc.hostname}" raise Error(err) # Build the deploy information dictionary. Start with drivers. dinfos = {} srcpath = find_app_data("wult", _DRV_SRC_SUBPATH / toolname, appname=toolname) dstpaths = [] for deployable in _get_deployables(srcpath): dstpath = _get_module_path(proc, deployable) if not dstpath: deployable_not_found(f"the '{deployable}' kernel module") dstpaths.append(_get_module_path(proc, deployable)) dinfos["drivers"] = {"src" : [srcpath], "dst" : dstpaths} # Add non-python helpers' deploy information. if helpers or pyhelpers: helpers_deploy_path = get_helpers_deploy_path(proc, toolname) if helpers: for helper in helpers: srcpath = find_app_data("wult", _HELPERS_SRC_SUBPATH / helper, appname=toolname) dstpaths = [] for deployable in _get_deployables(srcpath): dstpaths.append(helpers_deploy_path / deployable) dinfos[helper] = {"src" : [srcpath], "dst" : dstpaths} # Add python helpers' deploy information. Note, python helpers are deployed only to the remote # host. The local copy of python helpers comes via 'setup.py'. Therefore, check them only for # the remote case. if pyhelpers and proc.is_remote: for pyhelper in pyhelpers: datapath = find_app_data("wult", _HELPERS_SRC_SUBPATH / pyhelper, appname=toolname) srcpaths = [] dstpaths = [] for deployable in _get_deployables(datapath, Procs.Proc()): if datapath.joinpath(deployable).exists(): # This case is relevant for running wult from sources - python helpers are # in the 'helpers/pyhelper' directory. srcpath = datapath else: # When wult is installed with 'pip', the python helpers go to the "bindir", # and they are not installed to the data directory. srcpath = FSHelpers.which(deployable).parent srcpaths += _get_pyhelper_dependencies(srcpath / deployable) dstpaths.append(helpers_deploy_path / deployable) dinfos[pyhelper] = {"src" : srcpaths, "dst" : dstpaths} # We are about to get timestamps for local and remote files. Take into account the possible time # shift between local and remote systems. time_delta = 0 if proc.is_remote: time_delta = time.time() - RemoteHelpers.time_time(proc=proc) # Compare source and destination files' timestamps. for what, dinfo in dinfos.items(): src = dinfo["src"] src_mtime = get_newest_mtime(src) for dst in dinfo["dst"]: try: dst_mtime = FSHelpers.get_mtime(dst, proc) except ErrorNotFound: deployable_not_found(dst) if src_mtime > time_delta + dst_mtime: src_str = ", ".join([str(path) for path in src]) _LOG.debug("%s src time %d + %d > dst_mtime %d\nsrc: %s\ndst %s", what, src_mtime, time_delta, dst_mtime, src_str, dst) return True return False
def _deploy_helpers(args, proc): """Deploy helpers (including python helpers) to the SUT represented by 'proc'.""" # Python helpers need to be deployd only to a remote host. The local host already has them # deployed by 'setup.py'. if not proc.is_remote: args.pyhelpers = [] helpers = args.helpers + args.pyhelpers if not helpers: return # We assume all helpers are in the same base directory. helper_path = _HELPERS_SRC_SUBPATH/f"{helpers[0]}" helpersrc = find_app_data("wult", helper_path, descr=f"{args.toolname} helper sources") helpersrc = helpersrc.parent if not helpersrc.is_dir(): raise Error(f"path '{helpersrc}' does not exist or it is not a directory") # Make sure all helpers are available. for helper in helpers: helperdir = helpersrc / helper if not helperdir.is_dir(): raise Error(f"path '{helperdir}' does not exist or it is not a directory") # Copy python helpers to the temporary directory on the controller. for pyhelper in args.pyhelpers: srcdir = helpersrc / pyhelper _LOG.debug("copying helper %s:\n '%s' -> '%s'", pyhelper, srcdir, args.ctmpdir) Procs.Proc().rsync(f"{srcdir}", args.ctmpdir, remotesrc=False, remotedst=False) # Build stand-alone version of every python helper. for pyhelper in args.pyhelpers: _LOG.info("Building a stand-alone version of '%s'", pyhelper) basedir = args.ctmpdir / pyhelper deployables = _get_deployables(basedir) for name in deployables: _create_standalone_python_script(name, basedir) # And copy the "standoline-ized" version of python helpers to the SUT. if proc.is_remote: for pyhelper in args.pyhelpers: srcdir = args.ctmpdir / pyhelper _LOG.debug("copying helper '%s' to %s:\n '%s' -> '%s'", pyhelper, proc.hostname, srcdir, args.stmpdir) proc.rsync(f"{srcdir}", args.stmpdir, remotesrc=False, remotedst=True) # Copy non-python helpers to the temporary directory on the SUT. for helper in args.helpers: srcdir = helpersrc/ helper _LOG.debug("copying helper '%s' to %s:\n '%s' -> '%s'", helper, proc.hostname, srcdir, args.stmpdir) proc.rsync(f"{srcdir}", args.stmpdir, remotesrc=False, remotedst=True) deploy_path = get_helpers_deploy_path(proc, args.toolname) # Build the non-python helpers on the SUT. if args.helpers: for helper in args.helpers: _LOG.info("Compiling helper '%s'%s", helper, proc.hostmsg) helperpath = f"{args.stmpdir}/{helper}" stdout, stderr = proc.run_verify(f"make -C '{helperpath}'") _log_cmd_output(args, stdout, stderr) # Make sure the the destination deployment directory exists. FSHelpers.mkdir(deploy_path, parents=True, exist_ok=True, proc=proc) # Deploy all helpers. _LOG.info("Deploying helpers to '%s'%s", deploy_path, proc.hostmsg) helpersdst = args.stmpdir / "helpers_deployed" _LOG.debug("deploying helpers to '%s'%s", helpersdst, proc.hostmsg) for helper in helpers: helperpath = f"{args.stmpdir}/{helper}" cmd = f"make -C '{helperpath}' install PREFIX='{helpersdst}'" stdout, stderr = proc.run_verify(cmd) _log_cmd_output(args, stdout, stderr) proc.rsync(str(helpersdst) + "/bin/", deploy_path, remotesrc=True, remotedst=True)
def kill_pids(pids, sig: str = "SIGTERM", kill_children: bool = False, must_die: bool = False, proc=None): """ This function kills or signals processes with PIDs in 'pids' on the host defined by 'procs'. The 'pids' argument can be a collection of PID numbers ('int' or 'str' types) or a single PID number. By default the processes are killed (SIGTERM), but you can specify any signal either by name or by number. The 'children' and 'must_die' arguments must only be used when killing processes (SIGTERM or SIGKILL). The 'children' argument controls whether this function should also try killing the children. If the 'must_die' argument is 'True', then this function also verifies that the process(es) did actually die, and if any of them did not die, it raises an exception. By default this function operates on the local host, but the 'proc' argument can be used to pass a connected 'SSH' object in which case this function will operate on the remote host. """ def collect_zombies(proc): """In case of a local process we need to 'waitpid()' the children.""" if not proc.is_remote: with contextlib.suppress(OSError): os.waitpid(0, os.WNOHANG) if not proc: proc = Procs.Proc() if not pids: return if not Trivial.is_iterable(pids): pids = (pids, ) pids = [str(int(pid)) for pid in pids] if sig is None: sig = "SIGTERM" else: sig = str(sig) killing = _is_sigterm(sig) or _is_sigkill(sig) if (kill_children or must_die) and not killing: raise Error( f"'children' and 'must_die' arguments cannot be used with '{sig}' signal" ) if kill_children: # Find all the children of the process. for pid in pids: children, _, exitcode = proc.run(f"pgrep -P {pid}", join=False) if exitcode != 0: break pids += [child.strip() for child in children] pids_spc = " ".join(pids) pids_comma = ",".join(pids) _LOG.debug("sending '%s' signal to the following process%s: %s", sig, proc.hostmsg, pids_comma) try: proc.run_verify(f"kill -{sig} -- {pids_spc}") except Error as err: if not killing: raise Error( f"failed to send signal '{sig}' to PIDs '{pids_comma}'{proc.hostmsg}:\n" f"{err}") from err # Some error happened on the first attempt. We've seen a couple of situations when this # happens. # 1. Most often, a PID does not exist anymore, the process exited already (race condition). # 2 One of the processes in the list is owned by a different user (e.g., root). Let's call # it process A. We have no permissions to kill process A, but we can kill other processes # in the 'pids' list. But often killing other processes in the 'pids' list will make # process A exit. This is why we do not error out just yet. # # So the strategy is to do the second signal sending round and often times it happens # without errors, and all the processes that we want to kill just go away. if not killing: return # Give the processes up to 4 seconds to die. timeout = 4 start_time = time.time() while time.time() - start_time <= timeout: collect_zombies(proc) _, _, exitcode = proc.run(f"kill -0 -- {pids_spc}") if exitcode == 1: return time.sleep(0.2) if _is_sigterm(sig): # Something refused to die, try SIGKILL. try: proc.run_verify(f"kill -9 -- {pids_spc}") except Error as err: # It is fine if one of the processes exited meanwhile. if "No such process" not in str(err): raise collect_zombies(proc) if not must_die: return # Give the processes up to 4 seconds to die. timeout = 4 start_time = time.time() while time.time() - start_time <= timeout: collect_zombies(proc) _, _, exitcode = proc.run(f"kill -0 -- {pids_spc}") if exitcode != 0: return time.sleep(0.2) # Something refused to die, find out what. msg, _, = proc.run_verify(f"ps -f {pids_spc}", join=False) if len(msg) < 2: msg = pids_comma raise Error( f"one of the following processes{proc.hostmsg} did not die after 'SIGKILL': {msg}" )