def _process_job_logs_retrieval(self, schd_ctx, ctx, id_keys): """Process retrieval of task job logs from remote user@host.""" if ctx.user_at_host and "@" in ctx.user_at_host: s_user, s_host = ctx.user_at_host.split("@", 1) else: s_user, s_host = (None, ctx.user_at_host) ssh_str = str(glbl_cfg().get_host_item("ssh command", s_host, s_user)) rsync_str = str(glbl_cfg().get_host_item("retrieve job logs command", s_host, s_user)) cmd = shlex.split(rsync_str) + ["--rsh=" + ssh_str] if LOG.isEnabledFor(DEBUG): cmd.append("-v") if ctx.max_size: cmd.append("--max-size=%s" % (ctx.max_size, )) # Includes and excludes includes = set() for _, point, name, submit_num in id_keys: # Include relevant directories, all levels needed includes.add("/%s" % (point)) includes.add("/%s/%s" % (point, name)) includes.add("/%s/%s/%02d" % (point, name, submit_num)) includes.add("/%s/%s/%02d/**" % (point, name, submit_num)) cmd += ["--include=%s" % (include) for include in sorted(includes)] cmd.append("--exclude=/**") # exclude everything else # Remote source cmd.append( "%s:%s/" % (ctx.user_at_host, get_remote_suite_run_job_dir(s_host, s_user, schd_ctx.suite))) # Local target cmd.append(get_suite_run_job_dir(schd_ctx.suite) + "/") self.proc_pool.put_command( SubProcContext(ctx, cmd, env=dict(os.environ), id_keys=id_keys), self._job_logs_retrieval_callback, [schd_ctx])
def _append_job_status_file(suite, task_job, event_time, messages): """Write messages to job status file.""" job_log_name = os.getenv('CYLC_TASK_LOG_ROOT') if not job_log_name: job_log_name = get_suite_run_job_dir(suite, task_job, 'job') try: job_status_file = open(job_log_name + '.status', 'a') except IOError: if cylc.flow.flags.debug: import traceback traceback.print_exc() return for severity, message in messages: if message == TASK_OUTPUT_STARTED: job_id = os.getppid() if job_id > 1: # If os.getppid() returns 1, the original job process # is likely killed already job_status_file.write('%s=%s\n' % (CYLC_JOB_PID, job_id)) job_status_file.write('%s=%s\n' % (CYLC_JOB_INIT_TIME, event_time)) elif message == TASK_OUTPUT_SUCCEEDED: job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, TASK_OUTPUT_SUCCEEDED.upper())) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(FAIL_MESSAGE_PREFIX): job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, message[len(FAIL_MESSAGE_PREFIX):])) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(ABORT_MESSAGE_PREFIX): job_status_file.write( ('%s=%s\n' % (CYLC_JOB_EXIT, message[len(ABORT_MESSAGE_PREFIX):])) + ('%s=%s\n' % (CYLC_JOB_EXIT_TIME, event_time))) elif message.startswith(VACATION_MESSAGE_PREFIX): # Job vacated, remove entries related to current job job_status_file_name = job_status_file.name job_status_file.close() lines = [] for line in open(job_status_file_name): if not line.startswith('CYLC_JOB_'): lines.append(line) job_status_file = open(job_status_file_name, 'w') for line in lines: job_status_file.write(line) job_status_file.write( '%s=%s|%s|%s\n' % (CYLC_MESSAGE, event_time, severity, message)) else: job_status_file.write( '%s=%s|%s|%s\n' % (CYLC_MESSAGE, event_time, severity, message)) try: job_status_file.close() except IOError: if cylc.flow.flags.debug: import traceback traceback.print_exc()
def get_task_job_log(suite, point, name, submit_num=None, suffix=None): """Return the full job log path.""" args = [ get_suite_run_job_dir(suite), get_task_job_id(point, name, submit_num) ] if suffix is not None: args.append(suffix) return os.path.join(*args)
def main(parser, options, *args, color=False): """Implement cylc cat-log CLI. Determine log path, user@host, batchview_cmd, and action (print, dir-list, cat, edit, or tail), and then if the log path is: a) local: perform action on log path, or b) remote: re-invoke cylc cat-log as a) on the remote account """ if options.remote_args: # Invoked on job hosts for job logs only, as a wrapper to view_log(). # Tail and batchview commands come from global config on suite host). logpath, mode, tail_tmpl = options.remote_args[0:3] if logpath.startswith('$'): logpath = os.path.expandvars(logpath) elif logpath.startswith('~'): logpath = os.path.expanduser(logpath) try: batchview_cmd = options.remote_args[3] except IndexError: batchview_cmd = None res = view_log(logpath, mode, tail_tmpl, batchview_cmd, remote=True, color=color) if res == 1: sys.exit(res) return suite_name = args[0] # Get long-format mode. try: mode = MODES[options.mode] except KeyError: mode = options.mode if len(args) == 1: # Cat suite logs, local only. if options.filename is not None: raise UserInputError("The '-f' option is for job logs only.") logpath = get_suite_run_log_name(suite_name) if options.rotation_num: logs = glob('%s.*' % logpath) logs.sort(key=os.path.getmtime, reverse=True) try: logpath = logs[int(options.rotation_num)] except IndexError: raise UserInputError("max rotation %d" % (len(logs) - 1)) tail_tmpl = str(glbl_cfg().get_host_item("tail command template")) out = view_log(logpath, mode, tail_tmpl, color=color) if out == 1: sys.exit(1) if mode == 'edit': tmpfile_edit(out, options.geditor) return if len(args) == 2: # Cat task job logs, may be on suite or job host. if options.rotation_num is not None: raise UserInputError("only suite (not job) logs get rotated") task_id = args[1] try: task, point = TaskID.split(task_id) except ValueError: parser.error("Illegal task ID: %s" % task_id) if options.submit_num != NN: try: options.submit_num = "%02d" % int(options.submit_num) except ValueError: parser.error("Illegal submit number: %s" % options.submit_num) if options.filename is None: options.filename = JOB_LOG_OUT else: # Convert short filename args to long (e.g. 'o' to 'job.out'). try: options.filename = JOB_LOG_OPTS[options.filename] except KeyError: # Is already long form (standard log, or custom). pass user_at_host, batch_sys_name, live_job_id = get_task_job_attrs( suite_name, point, task, options.submit_num) user, host = split_user_at_host(user_at_host) batchview_cmd = None if live_job_id is not None: # Job is currently running. Get special batch system log view # command (e.g. qcat) if one exists, and the log is out or err. conf_key = None if options.filename == JOB_LOG_OUT: if mode == 'cat': conf_key = "out viewer" elif mode == 'tail': conf_key = "out tailer" elif options.filename == JOB_LOG_ERR: if mode == 'cat': conf_key = "err viewer" elif mode == 'tail': conf_key = "err tailer" if conf_key is not None: conf = glbl_cfg().get_host_item("batch systems", host, user) batchview_cmd_tmpl = None try: batchview_cmd_tmpl = conf[batch_sys_name][conf_key] except KeyError: pass if batchview_cmd_tmpl is not None: batchview_cmd = batchview_cmd_tmpl % { "job_id": str(live_job_id) } log_is_remote = (is_remote(host, user) and (options.filename not in JOB_LOGS_LOCAL)) log_is_retrieved = (glbl_cfg().get_host_item('retrieve job logs', host) and live_job_id is None) if log_is_remote and (not log_is_retrieved or options.force_remote): logpath = os.path.normpath( get_remote_suite_run_job_dir(host, user, suite_name, point, task, options.submit_num, options.filename)) tail_tmpl = str(glbl_cfg().get_host_item("tail command template", host, user)) # Reinvoke the cat-log command on the remote account. cmd = ['cat-log'] if cylc.flow.flags.debug: cmd.append('--debug') for item in [logpath, mode, tail_tmpl]: cmd.append('--remote-arg=%s' % quote(item)) if batchview_cmd: cmd.append('--remote-arg=%s' % quote(batchview_cmd)) cmd.append(suite_name) is_edit_mode = (mode == 'edit') try: proc = remote_cylc_cmd(cmd, user, host, capture_process=is_edit_mode, manage=(mode == 'tail')) except KeyboardInterrupt: # Ctrl-C while tailing. pass else: if is_edit_mode: # Write remote stdout to a temp file for viewing in editor. # Only BUFSIZE bytes at a time in case huge stdout volume. out = NamedTemporaryFile() data = proc.stdout.read(BUFSIZE) while data: out.write(data) data = proc.stdout.read(BUFSIZE) os.chmod(out.name, S_IRUSR) out.seek(0, 0) else: # Local task job or local job log. logpath = os.path.normpath( get_suite_run_job_dir(suite_name, point, task, options.submit_num, options.filename)) tail_tmpl = str(glbl_cfg().get_host_item("tail command template")) out = view_log(logpath, mode, tail_tmpl, batchview_cmd, color=color) if mode != 'edit': sys.exit(out) if mode == 'edit': tmpfile_edit(out, options.geditor)