Beispiel #1
0
def _distribute(host, is_restart):
    """Re-invoke this command on a different host if requested."""
    # Check whether a run host is explicitly specified, else select one.
    if not host:
        host = select_suite_host()[0]
    if is_remote_host(host):
        # Prevent recursive host selection
        cmd = sys.argv[1:]
        cmd.append("--host=localhost")
        remote_cylc_cmd(cmd, host=host)
        sys.exit(0)
Beispiel #2
0
def _get_metrics(hosts, metrics, data=None):
    """Retrieve host metrics using SSH if necessary.

    Note hosts will not appear in the returned results if:
    * They are not contactable.
    * There is an error in the command which returns the results.

    Args:
        hosts (list):
            List of host fqdns.
        metrics (list):
            List in the form [(function, arg1, arg2, ...), ...]
        data (dict):
            Used for logging success/fail outcomes of the form {host: {}}

    Examples:
        Command failure:
        >>> _get_metrics(['localhost'], [['elephant']])
        ({}, {'localhost': {'get_metrics': 'Command failed (exit: 1)'}})

    Returns:
        dict - {host: {(function, arg1, arg2, ...): result}}

    """
    host_stats = {}
    proc_map = {}
    if not data:
        data = {host: dict() for host in hosts}

    # Start up commands on hosts
    cmd = ['psutil']
    kwargs = {'stdin_str': json.dumps(metrics), 'capture_process': True}
    for host in hosts:
        if is_remote_host(host):
            proc_map[host] = remote_cylc_cmd(cmd, host=host, **kwargs)
        else:
            proc_map[host] = run_cmd(['cylc'] + cmd, **kwargs)

    # Collect results from commands
    while proc_map:
        for host, proc in list(proc_map.copy().items()):
            if proc.poll() is None:
                continue
            del proc_map[host]
            out, err = (f.decode() for f in proc.communicate())
            if proc.wait():
                # Command failed in verbose/debug mode
                LOG.warning('Could not evaluate "%s" (return code %d)\n%s',
                            host, proc.returncode, err)
                data[host]['get_metrics'] = (
                    f'Command failed (exit: {proc.returncode})')
            else:
                host_stats[host] = dict(
                    zip(
                        metrics,
                        # convert JSON dicts -> namedtuples
                        _deserialise(metrics, parse_dirty_json(out))))
        sleep(0.01)
    return host_stats, data
Beispiel #3
0
def scheduler_cli(parser, options, args, is_restart=False):
    """CLI main."""
    reg = args[0]
    # Check suite is not already running before start of host selection.
    try:
        suite_files.detect_old_contact_file(reg)
    except SuiteServiceFileError as exc:
        sys.exit(exc)

    suite_run_dir = get_suite_run_dir(reg)

    if not os.path.exists(suite_run_dir):
        sys.stderr.write(f'suite service directory not found '
                         f'at: {suite_run_dir}\n')
        sys.exit(1)

    # Create auth files if needed.
    suite_files.create_auth_files(reg)

    # Extract job.sh from library, for use in job scripts.
    extract_resources(suite_files.get_suite_srv_dir(reg), ['etc/job.sh'])

    # Check whether a run host is explicitly specified, else select one.
    if not options.host:
        try:
            host = HostAppointer().appoint_host()
        except EmptyHostList as exc:
            if cylc.flow.flags.debug:
                raise
            else:
                sys.exit(str(exc))
        if is_remote_host(host):
            if is_restart:
                base_cmd = ["restart"] + sys.argv[1:]
            else:
                base_cmd = ["run"] + sys.argv[1:]
            # Prevent recursive host selection
            base_cmd.append("--host=localhost")
            return remote_cylc_cmd(base_cmd, host=host)
    if remrun(set_rel_local=True):  # State localhost as above.
        sys.exit()

    try:
        suite_files.get_suite_source_dir(args[0], options.owner)
    except SuiteServiceFileError:
        # Source path is assumed to be the run directory
        suite_files.register(args[0], get_suite_run_dir(args[0]))

    try:
        scheduler = Scheduler(is_restart, options, args)
    except SuiteServiceFileError as exc:
        sys.exit(exc)
    scheduler.start()
Beispiel #4
0
    def _get_host_metrics(self):
        """Run "cylc get-host-metrics" commands on hosts.

        Return (dict): {host: host-metrics-dict, ...}
        """
        host_stats = {}
        # Run "cylc get-host-metrics" commands on hosts
        host_proc_map = {}
        cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts())
        # Start up commands on hosts
        for host in self.hosts:
            if is_remote_host(host):
                host_proc_map[host] = remote_cylc_cmd(cmd,
                                                      stdin=None,
                                                      host=host,
                                                      capture_process=True)
            elif 'localhost' in host_proc_map:
                continue  # Don't duplicate localhost
            else:
                # 1st instance of localhost
                host_proc_map['localhost'] = run_cmd(['cylc'] + cmd,
                                                     capture_process=True)
        # Collect results from commands
        while host_proc_map:
            for host, proc in list(host_proc_map.copy().items()):
                if proc.poll() is None:
                    continue
                del host_proc_map[host]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    # Command failed in verbose/debug mode
                    LOG.warning(
                        "can't get host metric from '%s'" +
                        "%s  # returncode=%d, err=%s\n", host, ' '.join(
                            (quote(item) for item in cmd)), proc.returncode,
                        err)
                else:
                    # Command OK
                    # Users may have profile scripts that write to STDOUT.
                    # Drop all output lines until the the first character of a
                    # line is '{'. Hopefully this is enough to find us the
                    # first line that denotes the beginning of the expected
                    # JSON data structure.
                    out = ''.join(
                        dropwhile(lambda s: not s.startswith('{'),
                                  out.splitlines(True)))
                    host_stats[host] = json.loads(out)
            sleep(0.01)
        return host_stats
Beispiel #5
0
    def _get_host_metrics(self):
        """Run "cylc get-host-metrics" commands on hosts.

        Return (dict): {host: host-metrics-dict, ...}
        """
        host_stats = {}
        # Run "cylc get-host-metrics" commands on hosts
        host_proc_map = {}
        cmd = [self.CMD_BASE] + sorted(self._get_host_metrics_opts())
        # Start up commands on hosts
        for host in self.hosts:
            if is_remote_host(host):
                host_proc_map[host] = remote_cylc_cmd(
                    cmd, stdin=None, host=host, capture_process=True)
            elif 'localhost' in host_proc_map:
                continue  # Don't duplicate localhost
            else:
                # 1st instance of localhost
                host_proc_map['localhost'] = run_cmd(
                    ['cylc'] + cmd, capture_process=True)
        # Collect results from commands
        while host_proc_map:
            for host, proc in list(host_proc_map.copy().items()):
                if proc.poll() is None:
                    continue
                del host_proc_map[host]
                out, err = (f.decode() for f in proc.communicate())
                if proc.wait():
                    # Command failed in verbose/debug mode
                    LOG.warning(
                        "can't get host metric from '%s'" +
                        "%s  # returncode=%d, err=%s\n",
                        host, ' '.join((quote(item) for item in cmd)),
                        proc.returncode, err)
                else:
                    # Command OK
                    # Users may have profile scripts that write to STDOUT.
                    # Drop all output lines until the the first character of a
                    # line is '{'. Hopefully this is enough to find us the
                    # first line that denotes the beginning of the expected
                    # JSON data structure.
                    out = ''.join(dropwhile(
                        lambda s: not s.startswith('{'), out.splitlines(True)))
                    host_stats[host] = json.loads(out)
            sleep(0.01)
        return host_stats
Beispiel #6
0
def scheduler_cli(parser, options, args, is_restart=False):
    """CLI main."""
    # Check suite is not already running before start of host selection.
    try:
        SuiteSrvFilesManager().detect_old_contact_file(args[0])
    except SuiteServiceFileError as exc:
        sys.exit(exc)

    # Create auth files if needed.
    SuiteSrvFilesManager().create_auth_files(args[0])

    # Check whether a run host is explicitly specified, else select one.
    if not options.host:
        try:
            host = HostAppointer().appoint_host()
        except EmptyHostList as exc:
            if cylc.flow.flags.debug:
                raise
            else:
                sys.exit(str(exc))
        if is_remote_host(host):
            if is_restart:
                base_cmd = ["restart"] + sys.argv[1:]
            else:
                base_cmd = ["run"] + sys.argv[1:]
            # Prevent recursive host selection
            base_cmd.append("--host=localhost")
            return remote_cylc_cmd(base_cmd, host=host)
    if remrun(set_rel_local=True):  # State localhost as above.
        sys.exit()

    try:
        SuiteSrvFilesManager().get_suite_source_dir(args[0], options.owner)
    except SuiteServiceFileError:
        # Source path is assumed to be the run directory
        SuiteSrvFilesManager().register(args[0], get_suite_run_dir(args[0]))

    try:
        scheduler = Scheduler(is_restart, options, args)
    except SuiteServiceFileError as exc:
        sys.exit(exc)
    scheduler.start()
Beispiel #7
0
def main(parser, options, *args, color=False):
    """Implement cylc cat-log CLI.

    Determine log path, user@host, batchview_cmd, and action (print, dir-list,
    cat, edit, or tail), and then if the log path is:
      a) local: perform action on log path, or
      b) remote: re-invoke cylc cat-log as a) on the remote account

    """
    if options.remote_args:
        # Invoked on job hosts for job logs only, as a wrapper to view_log().
        # Tail and batchview commands come from global config on suite host).
        logpath, mode, tail_tmpl = options.remote_args[0:3]
        if logpath.startswith('$'):
            logpath = os.path.expandvars(logpath)
        elif logpath.startswith('~'):
            logpath = os.path.expanduser(logpath)
        try:
            batchview_cmd = options.remote_args[3]
        except IndexError:
            batchview_cmd = None
        res = view_log(logpath,
                       mode,
                       tail_tmpl,
                       batchview_cmd,
                       remote=True,
                       color=color)
        if res == 1:
            sys.exit(res)
        return

    suite_name = args[0]
    # Get long-format mode.
    try:
        mode = MODES[options.mode]
    except KeyError:
        mode = options.mode

    if len(args) == 1:
        # Cat suite logs, local only.
        if options.filename is not None:
            raise UserInputError("The '-f' option is for job logs only.")

        logpath = get_suite_run_log_name(suite_name)
        if options.rotation_num:
            logs = glob('%s.*' % logpath)
            logs.sort(key=os.path.getmtime, reverse=True)
            try:
                logpath = logs[int(options.rotation_num)]
            except IndexError:
                raise UserInputError("max rotation %d" % (len(logs) - 1))
        tail_tmpl = str(glbl_cfg().get_host_item("tail command template"))
        out = view_log(logpath, mode, tail_tmpl, color=color)
        if out == 1:
            sys.exit(1)
        if mode == 'edit':
            tmpfile_edit(out, options.geditor)
        return

    if len(args) == 2:
        # Cat task job logs, may be on suite or job host.
        if options.rotation_num is not None:
            raise UserInputError("only suite (not job) logs get rotated")
        task_id = args[1]
        try:
            task, point = TaskID.split(task_id)
        except ValueError:
            parser.error("Illegal task ID: %s" % task_id)
        if options.submit_num != NN:
            try:
                options.submit_num = "%02d" % int(options.submit_num)
            except ValueError:
                parser.error("Illegal submit number: %s" % options.submit_num)
        if options.filename is None:
            options.filename = JOB_LOG_OUT
        else:
            # Convert short filename args to long (e.g. 'o' to 'job.out').
            try:
                options.filename = JOB_LOG_OPTS[options.filename]
            except KeyError:
                # Is already long form (standard log, or custom).
                pass
        user_at_host, batch_sys_name, live_job_id = get_task_job_attrs(
            suite_name, point, task, options.submit_num)
        user, host = split_user_at_host(user_at_host)
        batchview_cmd = None
        if live_job_id is not None:
            # Job is currently running. Get special batch system log view
            # command (e.g. qcat) if one exists, and the log is out or err.
            conf_key = None
            if options.filename == JOB_LOG_OUT:
                if mode == 'cat':
                    conf_key = "out viewer"
                elif mode == 'tail':
                    conf_key = "out tailer"
            elif options.filename == JOB_LOG_ERR:
                if mode == 'cat':
                    conf_key = "err viewer"
                elif mode == 'tail':
                    conf_key = "err tailer"
            if conf_key is not None:
                conf = glbl_cfg().get_host_item("batch systems", host, user)
                batchview_cmd_tmpl = None
                try:
                    batchview_cmd_tmpl = conf[batch_sys_name][conf_key]
                except KeyError:
                    pass
                if batchview_cmd_tmpl is not None:
                    batchview_cmd = batchview_cmd_tmpl % {
                        "job_id": str(live_job_id)
                    }

        log_is_remote = (is_remote(host, user)
                         and (options.filename not in JOB_LOGS_LOCAL))
        log_is_retrieved = (glbl_cfg().get_host_item('retrieve job logs', host)
                            and live_job_id is None)
        if log_is_remote and (not log_is_retrieved or options.force_remote):
            logpath = os.path.normpath(
                get_remote_suite_run_job_dir(host, user, suite_name, point,
                                             task, options.submit_num,
                                             options.filename))
            tail_tmpl = str(glbl_cfg().get_host_item("tail command template",
                                                     host, user))
            # Reinvoke the cat-log command on the remote account.
            cmd = ['cat-log']
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            for item in [logpath, mode, tail_tmpl]:
                cmd.append('--remote-arg=%s' % quote(item))
            if batchview_cmd:
                cmd.append('--remote-arg=%s' % quote(batchview_cmd))
            cmd.append(suite_name)
            is_edit_mode = (mode == 'edit')
            try:
                proc = remote_cylc_cmd(cmd,
                                       user,
                                       host,
                                       capture_process=is_edit_mode,
                                       manage=(mode == 'tail'))
            except KeyboardInterrupt:
                # Ctrl-C while tailing.
                pass
            else:
                if is_edit_mode:
                    # Write remote stdout to a temp file for viewing in editor.
                    # Only BUFSIZE bytes at a time in case huge stdout volume.
                    out = NamedTemporaryFile()
                    data = proc.stdout.read(BUFSIZE)
                    while data:
                        out.write(data)
                        data = proc.stdout.read(BUFSIZE)
                    os.chmod(out.name, S_IRUSR)
                    out.seek(0, 0)
        else:
            # Local task job or local job log.
            logpath = os.path.normpath(
                get_suite_run_job_dir(suite_name, point, task,
                                      options.submit_num, options.filename))
            tail_tmpl = str(glbl_cfg().get_host_item("tail command template"))
            out = view_log(logpath,
                           mode,
                           tail_tmpl,
                           batchview_cmd,
                           color=color)
            if mode != 'edit':
                sys.exit(out)
        if mode == 'edit':
            tmpfile_edit(out, options.geditor)
Beispiel #8
0
def main(parser: COP,
         options: 'Values',
         reg: str,
         task_id: Optional[str] = None,
         color: bool = False) -> None:
    """Implement cylc cat-log CLI.

    Determine log path, user@host, batchview_cmd, and action (print, dir-list,
    cat, edit, or tail), and then if the log path is:
      a) local: perform action on log path, or
      b) remote: re-invoke cylc cat-log as a) on the remote account

    """
    if options.remote_args:
        # Invoked on job hosts for job logs only, as a wrapper to view_log().
        # Tail and batchview commands from global config on workflow host).
        logpath, mode, tail_tmpl = options.remote_args[0:3]
        logpath = expand_path(logpath)
        tail_tmpl = expand_path(tail_tmpl)
        try:
            batchview_cmd = options.remote_args[3]
        except IndexError:
            batchview_cmd = None
        res = view_log(logpath,
                       mode,
                       tail_tmpl,
                       batchview_cmd,
                       remote=True,
                       color=color)
        if res == 1:
            sys.exit(res)
        return

    workflow_name, _ = parse_reg(reg)
    # Get long-format mode.
    try:
        mode = MODES[options.mode]
    except KeyError:
        mode = options.mode

    if not task_id:
        # Cat workflow logs, local only.
        if options.filename is not None:
            raise UserInputError("The '-f' option is for job logs only.")

        logpath = get_workflow_run_log_name(workflow_name)
        if options.rotation_num:
            logs = glob('%s.*' % logpath)
            logs.sort(key=os.path.getmtime, reverse=True)
            try:
                logpath = logs[int(options.rotation_num)]
            except IndexError:
                raise UserInputError("max rotation %d" % (len(logs) - 1))
        tail_tmpl = os.path.expandvars(get_platform()["tail command template"])
        out = view_log(logpath, mode, tail_tmpl, color=color)
        if out == 1:
            sys.exit(1)
        if mode == 'edit':
            tmpfile_edit(out, options.geditor)
        return

    if task_id:
        # Cat task job logs, may be on workflow or job host.
        if options.rotation_num is not None:
            raise UserInputError("only workflow (not job) logs get rotated")
        try:
            task, point = TaskID.split(task_id)
        except ValueError:
            parser.error("Illegal task ID: %s" % task_id)
        if options.submit_num != NN:
            try:
                options.submit_num = "%02d" % int(options.submit_num)
            except ValueError:
                parser.error("Illegal submit number: %s" % options.submit_num)
        if options.filename is None:
            options.filename = JOB_LOG_OUT
        else:
            # Convert short filename args to long (e.g. 'o' to 'job.out').
            with suppress(KeyError):
                options.filename = JOB_LOG_OPTS[options.filename]
                # KeyError: Is already long form (standard log, or custom).
        platform_name, job_runner_name, live_job_id = get_task_job_attrs(
            workflow_name, point, task, options.submit_num)
        platform = get_platform(platform_name)
        batchview_cmd = None
        if live_job_id is not None:
            # Job is currently running. Get special job runner log view
            # command (e.g. qcat) if one exists, and the log is out or err.
            conf_key = None
            if options.filename == JOB_LOG_OUT:
                if mode == 'cat':
                    conf_key = "out viewer"
                elif mode == 'tail':
                    conf_key = "out tailer"
            elif options.filename == JOB_LOG_ERR:
                if mode == 'cat':
                    conf_key = "err viewer"
                elif mode == 'tail':
                    conf_key = "err tailer"
            if conf_key is not None:
                batchview_cmd_tmpl = None
                with suppress(KeyError):
                    batchview_cmd_tmpl = platform[conf_key]
                if batchview_cmd_tmpl is not None:
                    batchview_cmd = batchview_cmd_tmpl % {
                        "job_id": str(live_job_id)
                    }

        log_is_remote = (is_remote_platform(platform)
                         and (options.filename != JOB_LOG_ACTIVITY))
        log_is_retrieved = (platform['retrieve job logs']
                            and live_job_id is None)
        if log_is_remote and (not log_is_retrieved or options.force_remote):
            logpath = os.path.normpath(
                get_remote_workflow_run_job_dir(workflow_name, point, task,
                                                options.submit_num,
                                                options.filename))
            tail_tmpl = platform["tail command template"]
            # Reinvoke the cat-log command on the remote account.
            cmd = ['cat-log', *verbosity_to_opts(cylc.flow.flags.verbosity)]
            for item in [logpath, mode, tail_tmpl]:
                cmd.append('--remote-arg=%s' % shlex.quote(item))
            if batchview_cmd:
                cmd.append('--remote-arg=%s' % shlex.quote(batchview_cmd))
            cmd.append(workflow_name)
            is_edit_mode = (mode == 'edit')
            # TODO: Add Intelligent Host selection to this
            try:
                proc = remote_cylc_cmd(cmd,
                                       platform,
                                       capture_process=is_edit_mode,
                                       manage=(mode == 'tail'))
            except KeyboardInterrupt:
                # Ctrl-C while tailing.
                pass
            else:
                if is_edit_mode:
                    # Write remote stdout to a temp file for viewing in editor.
                    # Only BUFSIZE bytes at a time in case huge stdout volume.
                    out = NamedTemporaryFile()
                    data = proc.stdout.read(BUFSIZE)
                    while data:
                        out.write(data)
                        data = proc.stdout.read(BUFSIZE)
                    os.chmod(out.name, S_IRUSR)
                    out.seek(0, 0)
        else:
            # Local task job or local job log.
            logpath = os.path.normpath(
                get_workflow_run_job_dir(workflow_name, point, task,
                                         options.submit_num, options.filename))
            tail_tmpl = os.path.expandvars(platform["tail command template"])
            out = view_log(logpath,
                           mode,
                           tail_tmpl,
                           batchview_cmd,
                           color=color)
            if mode != 'edit':
                sys.exit(out)
        if mode == 'edit':
            tmpfile_edit(out, options.geditor)