def _receiver(self, message): """Wrap incoming messages and dispatch them to exposed methods. Args: message (dict): message contents """ # determine the server method to call try: method = getattr(self, message['command']) args = message['args'] args.update({'user': message['user']}) if 'meta' in message: args['meta'] = message['meta'] except KeyError: # malformed message return {'error': { 'message': 'Request missing required field(s).'}} except AttributeError: # no exposed method by that name return {'error': { 'message': 'No method by the name "%s"' % message['command']}} # generate response try: response = method(**args) except Exception as exc: # includes incorrect arguments (TypeError) LOG.exception(exc) # note the error server side import traceback return {'error': { 'message': str(exc), 'traceback': traceback.format_exc()}} return {'data': response}
def create_directory(dir_, name): """Create directory. Raise GlobalConfigError on error.""" try: os.makedirs(dir_, exist_ok=True) except OSError as exc: LOG.exception(exc) raise GlobalConfigError( 'Failed to create directory "' + name + '"')
def _manip_task_jobs_callback( self, ctx, suite, itasks, summary_callback, more_callbacks=None): """Callback when submit/poll/kill tasks command exits.""" if ctx.ret_code: LOG.error(ctx) else: LOG.debug(ctx) # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy # # Note for "reload": A TaskProxy instance may be replaced on reload, so # the "itasks" list may not reference the TaskProxy objects that # replace the old ones. The .reload_successor attribute provides the # link(s) for us to get to the latest replacement. # # Note for "kill": It is possible for a job to trigger its trap and # report back to the suite back this logic is called. If so, the task # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and # its output line will be ignored here. tasks = {} for itask in itasks: while itask.reload_successor is not None: itask = itask.reload_successor if itask.point is not None and itask.submit_num: submit_num = "%02d" % (itask.submit_num) tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)] if more_callbacks: for prefix, callback in more_callbacks.items(): handlers.append((prefix, callback)) out = ctx.out if not out: out = "" bad_tasks = dict(tasks) for line in out.splitlines(True): for prefix, callback in handlers: if line.startswith(prefix): line = line[len(prefix):].strip() try: path = line.split("|", 2)[1] # timestamp, path, status point, name, submit_num = path.split(os.sep, 2) if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY: del bad_tasks[(point, name, submit_num)] itask = tasks[(point, name, submit_num)] callback(suite, itask, ctx, line) except (LookupError, ValueError, KeyError) as exc: LOG.warning( 'Unhandled %s output: %s', ctx.cmd_key, line) LOG.exception(exc) # Task jobs that are in the original command but did not get a status # in the output. Handle as failures. for key, itask in sorted(bad_tasks.items()): line = ( "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n") summary_callback(suite, itask, ctx, line)
def _event_email_callback(self, proc_ctx, schd_ctx): """Call back when email notification command exits.""" for id_key in proc_ctx.cmd_kwargs["id_keys"]: key1, point, name, submit_num = id_key try: if proc_ctx.ret_code == 0: del self.event_timers[id_key] log_ctx = SubProcContext((key1, submit_num), None) log_ctx.ret_code = 0 log_task_job_activity( log_ctx, schd_ctx.suite, point, name, submit_num) else: self.event_timers[id_key].unset_waiting() except KeyError as exc: LOG.exception(exc)
def _event_email_callback(self, proc_ctx, schd_ctx): """Call back when email notification command exits.""" for id_key in proc_ctx.cmd_kwargs["id_keys"]: key1, point, name, submit_num = id_key try: if proc_ctx.ret_code == 0: del self.event_timers[id_key] log_ctx = SubProcContext((key1, submit_num), None) log_ctx.ret_code = 0 log_task_job_activity(log_ctx, schd_ctx.suite, point, name, submit_num) else: self.event_timers[id_key].unset_waiting() except KeyError as exc: LOG.exception(exc)
def _run_command_init(cls, ctx, callback=None, callback_args=None): """Prepare and launch shell command in ctx.""" try: if ctx.cmd_kwargs.get('stdin_files'): if len(ctx.cmd_kwargs['stdin_files']) > 1: stdin_file = TemporaryFile() for file_ in ctx.cmd_kwargs['stdin_files']: if hasattr(file_, 'read'): stdin_file.write(file_.read()) else: stdin_file.write(open(file_, 'rb').read()) stdin_file.seek(0) elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'): stdin_file = ctx.cmd_kwargs['stdin_files'][0] else: stdin_file = open(ctx.cmd_kwargs['stdin_files'][0], 'rb') elif ctx.cmd_kwargs.get('stdin_str'): stdin_file = TemporaryFile('bw+') stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode()) stdin_file.seek(0) else: stdin_file = open(os.devnull) proc = procopen( ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True, # Execute command as a process group leader, # so we can use "os.killpg" to kill the whole group. preexec_fn=os.setpgrp, env=ctx.cmd_kwargs.get('env'), usesh=ctx.cmd_kwargs.get('shell')) # calls to open a shell are aggregated in cylc_subproc.procopen() # with logging for what is calling it and the commands given except (IOError, OSError) as exc: if exc.filename is None: exc.filename = ctx.cmd[0] LOG.exception(exc) ctx.ret_code = 1 ctx.err = str(exc) cls._run_command_exit(ctx, callback, callback_args) return None else: LOG.debug(ctx.cmd) return proc
def _manip_task_jobs_callback( self, ctx, suite, itasks, summary_callback, more_callbacks=None): """Callback when submit/poll/kill tasks command exits.""" if ctx.ret_code: LOG.error(ctx) else: LOG.debug(ctx) tasks = {} # Note for "kill": It is possible for a job to trigger its trap and # report back to the suite back this logic is called. If so, the task # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and # its output line will be ignored here. for itask in itasks: if itask.point is not None and itask.submit_num: submit_num = "%02d" % (itask.submit_num) tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)] if more_callbacks: for prefix, callback in more_callbacks.items(): handlers.append((prefix, callback)) out = ctx.out if not out: out = "" # Something is very wrong here # Fallback to use "job_log_dirs" list to report the problem job_log_dirs = ctx.cmd_kwargs.get("job_log_dirs", []) for job_log_dir in job_log_dirs: point, name, submit_num = job_log_dir.split(os.sep, 2) itask = tasks[(point, name, submit_num)] out += (self.batch_sys_mgr.OUT_PREFIX_SUMMARY + "|".join([ctx.timestamp, job_log_dir, "1"]) + "\n") for line in out.splitlines(True): for prefix, callback in handlers: if line.startswith(prefix): line = line[len(prefix):].strip() try: path = line.split("|", 2)[1] # timestamp, path, status point, name, submit_num = path.split(os.sep, 2) itask = tasks[(point, name, submit_num)] callback(suite, itask, ctx, line) except (LookupError, ValueError) as exc: LOG.warning( 'Unhandled %s output: %s', ctx.cmd_key, line) LOG.exception(exc)
def _run_command_init(cls, ctx, callback=None, callback_args=None): """Prepare and launch shell command in ctx.""" try: if ctx.cmd_kwargs.get('stdin_file_paths'): if len(ctx.cmd_kwargs['stdin_file_paths']) > 1: stdin_file = TemporaryFile() for file_path in ctx.cmd_kwargs['stdin_file_paths']: stdin_file.write(open(file_path, 'rb').read()) stdin_file.seek(0) else: stdin_file = open(ctx.cmd_kwargs['stdin_file_paths'][0], 'rb') elif ctx.cmd_kwargs.get('stdin_str'): stdin_file = TemporaryFile() stdin_file.write(ctx.cmd_kwargs.get('stdin_str')) stdin_file.seek(0) else: stdin_file = open(os.devnull) proc = Popen( ctx.cmd, stdin=stdin_file, stdout=PIPE, stderr=PIPE, # Execute command as a process group leader, # so we can use "os.killpg" to kill the whole group. preexec_fn=os.setpgrp, env=ctx.cmd_kwargs.get('env'), shell=ctx.cmd_kwargs.get('shell')) except (IOError, OSError) as exc: if exc.filename is None: exc.filename = ctx.cmd[0] LOG.exception(exc) ctx.ret_code = 1 ctx.err = str(exc) cls._run_command_exit(ctx, callback, callback_args) return None else: LOG.debug(ctx.cmd) return proc
def log_task_job_activity(ctx, suite, point, name, submit_num=None): """Log an activity for a task job.""" ctx_str = str(ctx) if not ctx_str: return if isinstance(ctx.cmd_key, tuple): # An event handler submit_num = ctx.cmd_key[-1] job_activity_log = get_task_job_activity_log( suite, point, name, submit_num) try: with open(job_activity_log, "ab") as handle: handle.write((ctx_str + '\n').encode()) except IOError as exc: # This happens when there is no job directory, e.g. if job host # selection command causes an submission failure, there will be no job # directory. In this case, just send the information to the suite log. LOG.exception(exc) LOG.info(ctx_str) if ctx.cmd and ctx.ret_code: LOG.error(ctx_str) elif ctx.cmd: LOG.debug(ctx_str)
def log_task_job_activity(ctx, suite, point, name, submit_num=None): """Log an activity for a task job.""" ctx_str = str(ctx) if not ctx_str: return if isinstance(ctx.cmd_key, tuple): # An event handler submit_num = ctx.cmd_key[-1] job_activity_log = get_task_job_activity_log(suite, point, name, submit_num) try: with open(job_activity_log, "ab") as handle: handle.write(ctx_str + '\n') except IOError as exc: # This happens when there is no job directory, e.g. if job host # selection command causes an submission failure, there will be no job # directory. In this case, just send the information to the suite log. LOG.exception(exc) LOG.info(ctx_str) if ctx.cmd and ctx.ret_code: LOG.error(ctx_str) elif ctx.cmd: LOG.debug(ctx_str)
def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx): """Call back when log job retrieval completes.""" if proc_ctx.ret_code: LOG.error(proc_ctx) else: LOG.debug(proc_ctx) for id_key in proc_ctx.cmd_kwargs["id_keys"]: key1, point, name, submit_num = id_key try: # All completed jobs are expected to have a "job.out". fnames = [JOB_LOG_OUT] try: if key1[1] not in 'succeeded': fnames.append(JOB_LOG_ERR) except TypeError: pass fname_oks = {} for fname in fnames: fname_oks[fname] = os.path.exists( get_task_job_log(schd_ctx.suite, point, name, submit_num, fname)) # All expected paths must exist to record a good attempt log_ctx = SubProcContext((key1, submit_num), None) if all(fname_oks.values()): log_ctx.ret_code = 0 del self.event_timers[id_key] else: log_ctx.ret_code = 1 log_ctx.err = "File(s) not retrieved:" for fname, exist_ok in sorted(fname_oks.items()): if not exist_ok: log_ctx.err += " %s" % fname self.event_timers[id_key].unset_waiting() log_task_job_activity(log_ctx, schd_ctx.suite, point, name, submit_num) except KeyError as exc: LOG.exception(exc)
def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx): """Call back when log job retrieval completes.""" if proc_ctx.ret_code: LOG.error(proc_ctx) else: LOG.debug(proc_ctx) for id_key in proc_ctx.cmd_kwargs["id_keys"]: key1, point, name, submit_num = id_key try: # All completed jobs are expected to have a "job.out". fnames = [JOB_LOG_OUT] try: if key1[1] not in 'succeeded': fnames.append(JOB_LOG_ERR) except TypeError: pass fname_oks = {} for fname in fnames: fname_oks[fname] = os.path.exists(get_task_job_log( schd_ctx.suite, point, name, submit_num, fname)) # All expected paths must exist to record a good attempt log_ctx = SubProcContext((key1, submit_num), None) if all(fname_oks.values()): log_ctx.ret_code = 0 del self.event_timers[id_key] else: log_ctx.ret_code = 1 log_ctx.err = "File(s) not retrieved:" for fname, exist_ok in sorted(fname_oks.items()): if not exist_ok: log_ctx.err += " %s" % fname self.event_timers[id_key].unset_waiting() log_task_job_activity( log_ctx, schd_ctx.suite, point, name, submit_num) except KeyError as exc: LOG.exception(exc)
def _receiver(self, message): """Wrap incoming messages and dispatch them to exposed methods.""" # determine the server method to call try: method = getattr(self, message['command']) args = message['args'] args.update({'user': message['user']}) if 'meta' in message: args['meta'] = message['meta'] except KeyError: # malformed message return {'error': {'message': 'Request missing required field(s).'}} except AttributeError: # no exposed method by that name return { 'error': { 'message': 'No method by the name "%s"' % message['command'] } } # generate response try: response = method(**args) except Exception as exc: # includes incorrect arguments (TypeError) LOG.exception(exc) # note the error server side import traceback return { 'error': { 'message': str(exc), 'traceback': traceback.format_exc() } } return {'data': response}