Beispiel #1
0
    def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys):
        """Process retrieval of task job logs from remote user@host."""
        if ctx.user_at_host and "@" in ctx.user_at_host:
            s_user, s_host = ctx.user_at_host.split("@", 1)
        else:
            s_user, s_host = (None, ctx.user_at_host)
        ssh_str = str(glbl_cfg().get_host_item("ssh command", s_host, s_user))
        rsync_str = str(glbl_cfg().get_host_item("retrieve job logs command",
                                                 s_host, s_user))

        cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str]
        if LOG.isEnabledFor(DEBUG):
            cmd.append("-v")
        if ctx.max_size:
            cmd.append("--max-size=%s" % (ctx.max_size, ))
        # Includes and excludes
        includes = set()
        for _, point, name, submit_num in id_keys:
            # Include relevant directories, all levels needed
            includes.add("/%s" % (point))
            includes.add("/%s/%s" % (point, name))
            includes.add("/%s/%s/%02d" % (point, name, submit_num))
            includes.add("/%s/%s/%02d/**" % (point, name, submit_num))
        cmd += ["--include=%s" % (include) for include in sorted(includes)]
        cmd.append("--exclude=/**")  # exclude everything else
        # Remote source
        cmd.append(
            "%s:%s/" %
            (ctx.user_at_host,
             get_remote_suite_run_job_dir(s_host, s_user, schd_ctx.suite)))
        # Local target
        cmd.append(get_suite_run_job_dir(schd_ctx.suite) + "/")
        self.proc_pool.put_command(
            SubProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys),
            self._job_logs_retrieval_callback, [schd_ctx])
Beispiel #2
0
def _append_job_status_file(suite, task_job, event_time, messages):
    """Write messages to job status file."""
    job_log_name = os.getenv('CYLC_TASK_LOG_ROOT')
    if not job_log_name:
        job_log_name = get_suite_run_job_dir(suite, task_job, 'job')
    try:
        job_status_file = open(job_log_name + '.status', 'a')
    except IOError:
        if cylc.flow.flags.debug:
            import traceback
            traceback.print_exc()
        return
    for severity, message in messages:
        if message == TASK_OUTPUT_STARTED:
            job_id = os.getppid()
            if job_id > 1:
                # If os.getppid() returns 1, the original job process
                # is likely killed already
                job_status_file.write('%s=%s\n' % (CYLC_JOB_PID, job_id))
            job_status_file.write('%s=%s\n' % (CYLC_JOB_INIT_TIME, event_time))
        elif message == TASK_OUTPUT_SUCCEEDED:
            job_status_file.write(
                ('%s=%s\n' % (CYLC_JOB_EXIT, TASK_OUTPUT_SUCCEEDED.upper())) +
                ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time)))
        elif message.startswith(FAIL_MESSAGE_PREFIX):
            job_status_file.write(
                ('%s=%s\n' %
                 (CYLC_JOB_EXIT, message[len(FAIL_MESSAGE_PREFIX):])) +
                ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time)))
        elif message.startswith(ABORT_MESSAGE_PREFIX):
            job_status_file.write(
                ('%s=%s\n' %
                 (CYLC_JOB_EXIT, message[len(ABORT_MESSAGE_PREFIX):])) +
                ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time)))
        elif message.startswith(VACATION_MESSAGE_PREFIX):
            # Job vacated, remove entries related to current job
            job_status_file_name = job_status_file.name
            job_status_file.close()
            lines = []
            for line in open(job_status_file_name):
                if not line.startswith('CYLC_JOB_'):
                    lines.append(line)
            job_status_file = open(job_status_file_name, 'w')
            for line in lines:
                job_status_file.write(line)
            job_status_file.write(
                '%s=%s|%s|%s\n' %
                (CYLC_MESSAGE, event_time, severity, message))
        else:
            job_status_file.write(
                '%s=%s|%s|%s\n' %
                (CYLC_MESSAGE, event_time, severity, message))
    try:
        job_status_file.close()
    except IOError:
        if cylc.flow.flags.debug:
            import traceback
            traceback.print_exc()
Beispiel #3
0
def get_task_job_log(suite, point, name, submit_num=None, suffix=None):
    """Return the full job log path."""
    args = [
        get_suite_run_job_dir(suite),
        get_task_job_id(point, name, submit_num)
    ]
    if suffix is not None:
        args.append(suffix)
    return os.path.join(*args)
Beispiel #4
0
def main(parser, options, *args, color=False):
    """Implement cylc cat-log CLI.

    Determine log path, user@host, batchview_cmd, and action (print, dir-list,
    cat, edit, or tail), and then if the log path is:
      a) local: perform action on log path, or
      b) remote: re-invoke cylc cat-log as a) on the remote account

    """
    if options.remote_args:
        # Invoked on job hosts for job logs only, as a wrapper to view_log().
        # Tail and batchview commands come from global config on suite host).
        logpath, mode, tail_tmpl = options.remote_args[0:3]
        if logpath.startswith('$'):
            logpath = os.path.expandvars(logpath)
        elif logpath.startswith('~'):
            logpath = os.path.expanduser(logpath)
        try:
            batchview_cmd = options.remote_args[3]
        except IndexError:
            batchview_cmd = None
        res = view_log(logpath,
                       mode,
                       tail_tmpl,
                       batchview_cmd,
                       remote=True,
                       color=color)
        if res == 1:
            sys.exit(res)
        return

    suite_name = args[0]
    # Get long-format mode.
    try:
        mode = MODES[options.mode]
    except KeyError:
        mode = options.mode

    if len(args) == 1:
        # Cat suite logs, local only.
        if options.filename is not None:
            raise UserInputError("The '-f' option is for job logs only.")

        logpath = get_suite_run_log_name(suite_name)
        if options.rotation_num:
            logs = glob('%s.*' % logpath)
            logs.sort(key=os.path.getmtime, reverse=True)
            try:
                logpath = logs[int(options.rotation_num)]
            except IndexError:
                raise UserInputError("max rotation %d" % (len(logs) - 1))
        tail_tmpl = str(glbl_cfg().get_host_item("tail command template"))
        out = view_log(logpath, mode, tail_tmpl, color=color)
        if out == 1:
            sys.exit(1)
        if mode == 'edit':
            tmpfile_edit(out, options.geditor)
        return

    if len(args) == 2:
        # Cat task job logs, may be on suite or job host.
        if options.rotation_num is not None:
            raise UserInputError("only suite (not job) logs get rotated")
        task_id = args[1]
        try:
            task, point = TaskID.split(task_id)
        except ValueError:
            parser.error("Illegal task ID: %s" % task_id)
        if options.submit_num != NN:
            try:
                options.submit_num = "%02d" % int(options.submit_num)
            except ValueError:
                parser.error("Illegal submit number: %s" % options.submit_num)
        if options.filename is None:
            options.filename = JOB_LOG_OUT
        else:
            # Convert short filename args to long (e.g. 'o' to 'job.out').
            try:
                options.filename = JOB_LOG_OPTS[options.filename]
            except KeyError:
                # Is already long form (standard log, or custom).
                pass
        user_at_host, batch_sys_name, live_job_id = get_task_job_attrs(
            suite_name, point, task, options.submit_num)
        user, host = split_user_at_host(user_at_host)
        batchview_cmd = None
        if live_job_id is not None:
            # Job is currently running. Get special batch system log view
            # command (e.g. qcat) if one exists, and the log is out or err.
            conf_key = None
            if options.filename == JOB_LOG_OUT:
                if mode == 'cat':
                    conf_key = "out viewer"
                elif mode == 'tail':
                    conf_key = "out tailer"
            elif options.filename == JOB_LOG_ERR:
                if mode == 'cat':
                    conf_key = "err viewer"
                elif mode == 'tail':
                    conf_key = "err tailer"
            if conf_key is not None:
                conf = glbl_cfg().get_host_item("batch systems", host, user)
                batchview_cmd_tmpl = None
                try:
                    batchview_cmd_tmpl = conf[batch_sys_name][conf_key]
                except KeyError:
                    pass
                if batchview_cmd_tmpl is not None:
                    batchview_cmd = batchview_cmd_tmpl % {
                        "job_id": str(live_job_id)
                    }

        log_is_remote = (is_remote(host, user)
                         and (options.filename not in JOB_LOGS_LOCAL))
        log_is_retrieved = (glbl_cfg().get_host_item('retrieve job logs', host)
                            and live_job_id is None)
        if log_is_remote and (not log_is_retrieved or options.force_remote):
            logpath = os.path.normpath(
                get_remote_suite_run_job_dir(host, user, suite_name, point,
                                             task, options.submit_num,
                                             options.filename))
            tail_tmpl = str(glbl_cfg().get_host_item("tail command template",
                                                     host, user))
            # Reinvoke the cat-log command on the remote account.
            cmd = ['cat-log']
            if cylc.flow.flags.debug:
                cmd.append('--debug')
            for item in [logpath, mode, tail_tmpl]:
                cmd.append('--remote-arg=%s' % quote(item))
            if batchview_cmd:
                cmd.append('--remote-arg=%s' % quote(batchview_cmd))
            cmd.append(suite_name)
            is_edit_mode = (mode == 'edit')
            try:
                proc = remote_cylc_cmd(cmd,
                                       user,
                                       host,
                                       capture_process=is_edit_mode,
                                       manage=(mode == 'tail'))
            except KeyboardInterrupt:
                # Ctrl-C while tailing.
                pass
            else:
                if is_edit_mode:
                    # Write remote stdout to a temp file for viewing in editor.
                    # Only BUFSIZE bytes at a time in case huge stdout volume.
                    out = NamedTemporaryFile()
                    data = proc.stdout.read(BUFSIZE)
                    while data:
                        out.write(data)
                        data = proc.stdout.read(BUFSIZE)
                    os.chmod(out.name, S_IRUSR)
                    out.seek(0, 0)
        else:
            # Local task job or local job log.
            logpath = os.path.normpath(
                get_suite_run_job_dir(suite_name, point, task,
                                      options.submit_num, options.filename))
            tail_tmpl = str(glbl_cfg().get_host_item("tail command template"))
            out = view_log(logpath,
                           mode,
                           tail_tmpl,
                           batchview_cmd,
                           color=color)
            if mode != 'edit':
                sys.exit(out)
        if mode == 'edit':
            tmpfile_edit(out, options.geditor)