Beispiel #1
0
    def _bash_login_cmd(cmd: List[str]) -> List[str]:
        """Return the given command as a bash login shell command.

        This allows users to set env vars.

        Example:
        >>> HostSelector._bash_login_cmd(["echo", "-n", "Multiple words"])
        ['bash', '-l', '-c', "echo -n 'Multiple words'"]
        """
        return ['bash', '-l', '-c', RosePopener.shlex_join(cmd)]
Beispiel #2
0
def write_source_vc_info(run_source_dir, output=None, popen=None):
    """Write version control information of sources used in run time.

    run_source_dir -- The source directory we are interested in.
    output -- An open file handle or a string containing a writable path.
              If not specified, use sys.stdout.
    popen -- A metomi.rose.popen.RosePopener instance for running vc commands.
             If not specified, use a new local instance.

    """
    if popen is None:
        popen = RosePopener()
    if output is None:
        handle = sys.stdout
    elif hasattr(output, "write"):
        handle = output
    else:
        handle = open(output, "wb")
    msg = "%s\n" % run_source_dir
    write_safely(msg, handle)
    environ = dict(os.environ)
    environ["LANG"] = "C"
    for vcs, args_list in [
        (
            "svn",
            [
                ["info", "--non-interactive"],
                ["status", "--non-interactive"],
                ["diff", "--internal-diff", "--non-interactive"],
            ],
        ),
        ("git", [["describe"], ["status"], ["diff"]]),
    ]:
        if not popen.which(vcs):
            continue
        cwd = os.getcwd()
        os.chdir(run_source_dir)
        try:
            for args in args_list:
                cmd = [vcs, *args]
                ret_code, out, _ = popen.run(*cmd, env=environ)
                if out:
                    write_safely(("#" * 80 + "\n"), handle)
                    write_safely(("# %s\n" % popen.shlex_join(cmd)), handle)
                    write_safely(("#" * 80 + "\n"), handle)
                    write_safely(out, handle)
                if ret_code:  # If cmd fails once, it will likely fail again
                    break
        finally:
            os.chdir(cwd)
Beispiel #3
0
    def select(
        self,
        names=None,
        rank_method=None,
        thresholds=None,
        ssh_cmd_timeout=None,
    ):
        """Return a list. Element 0 is most desirable.
        Each element of the list is a tuple (host, score).

        names: a list of known host groups or host names.
        rank_method: the ranking method. Can be one of:
                     load:1, load:5, load:15 (=load =default), fs:FS and
                     random.  The "load" methods determines the load using the
                     average load as returned by the "uptime" command divided
                     by the number of CPUs. The "fs" method determines the load
                     using the usage in the file system specified by FS. The
                     "mem" method ranks by highest free memory. The "random"
                     method ranks everything by random.

        thresholds: a list of thresholds which each host must not exceed.
                    Should be in the format rank_method:value, where
                    rank_method is one of load:1, load:5, load:15 or fs:FS; and
                    value is number that must be be exceeded.

        ssh_cmd_timeout: timeout of SSH commands to hosts. A float in seconds.

        """

        host_names, rank_method, thresholds = self.expand(
            names, rank_method, thresholds)

        # Load scorers, ranking and thresholds
        rank_method_arg = None
        if rank_method:
            if ":" in rank_method:
                rank_method, rank_method_arg = rank_method.split(":", 1)
        else:
            rank_method = self.RANK_METHOD_DEFAULT
        rank_conf = ScorerConf(self.get_scorer(rank_method), rank_method_arg)
        self.handle_event(RankMethodEvent(rank_method, rank_method_arg))

        threshold_confs = []
        if thresholds:
            for threshold in thresholds:
                method = self.RANK_METHOD_DEFAULT
                method_arg = None
                value = threshold
                if ":" in threshold:
                    head, value = threshold.rsplit(":", 1)
                    method = head
                    if ":" in head:
                        method, method_arg = head.split(":", 1)
                try:
                    float(value)
                except ValueError:
                    raise ValueError(threshold)
                scorer = self.get_scorer(method)
                if method_arg is None:
                    method_arg = scorer.ARG
                threshold_conf = ScorerConf(self.get_scorer(method),
                                            method_arg, value)
                threshold_confs.append(threshold_conf)

        if ssh_cmd_timeout is None:
            conf = ResourceLocator.default().get_conf()
            ssh_cmd_timeout = float(
                conf.get_value(["rose-host-select", "timeout"],
                               self.SSH_CMD_TIMEOUT))

        host_name_list = list(host_names)
        host_names = []
        for host_name in host_name_list:
            if self.is_local_host(host_name):
                if self.get_local_host() not in host_names:
                    host_names.append(self.get_local_host())
            else:
                host_names.append(host_name)

        # Random selection with no thresholds. Return the 1st available host.
        if rank_conf.method == self.RANK_METHOD_RANDOM and not threshold_confs:
            shuffle(host_names)
            for host_name in host_names:
                if self.is_local_host(host_name):
                    return [("localhost", 1)]
                command = self.popen.get_cmd("ssh", host_name, "true")
                proc = self.popen.run_bg(*command, preexec_fn=os.setpgrp)
                time0 = time()
                while (proc.poll() is None
                       and time() - time0 <= ssh_cmd_timeout):
                    sleep(self.SSH_CMD_POLL_DELAY)
                if proc.poll() is None:
                    os.killpg(proc.pid, signal.SIGTERM)
                    proc.wait()
                    self.handle_event(TimedOutHostEvent(host_name))
                elif proc.wait():
                    self.handle_event(
                        HostSelectCommandFailedEvent(host_name,
                                                     proc.returncode))
                else:
                    return [(host_name, 1)]
            else:
                raise NoHostSelectError()

        # ssh to each host to return its score(s).
        host_proc_dict = {}
        for host_name in sorted(host_names):
            # build host-select-client command
            command: List[str] = []

            # pass through CYLC_VERSION to support use of cylc wrapper script
            try:
                import cylc.flow
            except ModuleNotFoundError:
                pass
            else:
                command.extend([
                    'env',
                    f'CYLC_VERSION={cylc.flow.__version__}',
                ])
                cylc_env_name = os.getenv('CYLC_ENV_NAME')
                if cylc_env_name:
                    command.append(f'CYLC_ENV_NAME={cylc_env_name}')

            command.extend(self._bash_login_cmd(['rose',
                                                 'host-select-client']))

            # build list of metrics to obtain for each host
            metrics = rank_conf.get_command()
            for threshold_conf in threshold_confs:
                for metric in threshold_conf.get_command():
                    if metric not in metrics:
                        metrics.append(metric)

            # convert metrics list to JSON stdin
            stdin = '\n***start**\n' + json.dumps(metrics) + '\n**end**\n'

            if not self.is_local_host(host_name):
                command = [
                    *self.popen.get_cmd('ssh', host_name),
                    RosePopener.shlex_join(command)
                ]
            # fire off host-select-client processes
            proc = self.popen.run_bg(*command,
                                     stdin=stdin,
                                     preexec_fn=os.setpgrp)
            proc.stdin.write(stdin)
            proc.stdin.flush()
            host_proc_dict[host_name] = (proc, metrics)

        # Retrieve score for each host name
        host_score_list = []
        time0 = time()
        while host_proc_dict:
            sleep(self.SSH_CMD_POLL_DELAY)
            for host_name, (proc, metrics) in list(host_proc_dict.items()):
                if proc.poll() is None:  # still running
                    continue
                stdout, stderr = proc.communicate()
                if proc.returncode:
                    self.handle_event(
                        HostSelectCommandFailedEvent(host_name,
                                                     proc.returncode, stderr))
                    host_proc_dict.pop(host_name)
                else:
                    out = _deserialise(metrics, json.loads(stdout.strip()))

                    host_proc_dict.pop(host_name)
                    for threshold_conf in threshold_confs:
                        try:
                            score = threshold_conf.command_out_parser(
                                out, metrics)
                            is_bad = threshold_conf.check_threshold(score)
                        except ValueError:
                            is_bad = True
                            score = None
                        if is_bad:
                            self.handle_event(
                                HostThresholdNotMetEvent(
                                    host_name, threshold_conf, score))
                            break
                    else:
                        try:
                            score = rank_conf.command_out_parser(out, metrics)
                            host_score_list.append((host_name, score))
                        except ValueError:
                            score = None
                        self.handle_event(
                            HostSelectScoreEvent(host_name, score))
            if time() - time0 > ssh_cmd_timeout:
                break

        # Report timed out hosts
        for host_name, (proc, _) in sorted(host_proc_dict.items()):
            self.handle_event(TimedOutHostEvent(host_name))
            os.killpg(proc.pid, signal.SIGTERM)
            proc.wait()

        if not host_score_list:
            raise NoHostSelectError()
        host_score_list.sort(key=lambda a: a[1],
                             reverse=rank_conf.scorer.SIGN < 0)
        return host_score_list