def _submit_task_job_callback(self, suite, itask, cmd_ctx, line): """Helper for _submit_task_jobs_callback, on one task job.""" ctx = SuiteProcContext(self.JOBS_SUBMIT, None) ctx.out = line items = line.split("|") try: ctx.timestamp, _, ctx.ret_code = items[0:3] except ValueError: ctx.ret_code = 1 ctx.cmd = cmd_ctx.cmd # print original command on failure else: ctx.ret_code = int(ctx.ret_code) if ctx.ret_code: ctx.cmd = cmd_ctx.cmd # print original command on failure log_task_job_activity(ctx, suite, itask.point, itask.tdef.name) if ctx.ret_code == SuiteProcPool.RET_CODE_SUITE_STOPPING: return try: itask.summary['submit_method_id'] = items[3] except IndexError: itask.summary['submit_method_id'] = None if itask.summary['submit_method_id'] == "None": itask.summary['submit_method_id'] = None if itask.summary['submit_method_id'] and ctx.ret_code == 0: self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_SUBMITTED, ctx.timestamp) else: self.task_events_mgr.process_message( itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED, ctx.timestamp)
def _submit_task_job_callback(self, suite, itask, cmd_ctx, line): """Helper for _submit_task_jobs_callback, on one task job.""" ctx = SuiteProcContext(self.JOBS_SUBMIT, None) ctx.out = line items = line.split("|") try: ctx.timestamp, _, ctx.ret_code = items[0:3] except ValueError: ctx.ret_code = 1 ctx.cmd = cmd_ctx.cmd # print original command on failure else: ctx.ret_code = int(ctx.ret_code) if ctx.ret_code: ctx.cmd = cmd_ctx.cmd # print original command on failure self.task_events_mgr.log_task_job_activity( ctx, suite, itask.point, itask.tdef.name) if ctx.ret_code == SuiteProcPool.JOB_SKIPPED_FLAG: return try: itask.summary['submit_method_id'] = items[3] except IndexError: itask.summary['submit_method_id'] = None if itask.summary['submit_method_id'] == "None": itask.summary['submit_method_id'] = None if itask.summary['submit_method_id'] and ctx.ret_code == 0: self.task_events_mgr.process_message( itask, INFO, '%s at %s' % ( TASK_OUTPUT_SUBMITTED, ctx.timestamp)) else: self.task_events_mgr.process_message( itask, CRITICAL, '%s at %s' % ( self.task_events_mgr.EVENT_SUBMIT_FAILED, ctx.timestamp))
def _poll_task_job_callback(self, suite, itask, cmd_ctx, line): """Helper for _poll_task_jobs_callback, on one task job.""" ctx = SuiteProcContext(self.JOBS_POLL, None) ctx.out = line ctx.ret_code = 0 items = line.split("|") # See cylc.batch_sys_manager.JobPollContext try: ( batch_sys_exit_polled, run_status, run_signal, time_submit_exit, time_run, time_run_exit ) = items[4:10] except IndexError: itask.summary['latest_message'] = 'poll failed' cylc.flags.iflag = True ctx.cmd = cmd_ctx.cmd # print original command on failure return finally: log_task_job_activity(ctx, suite, itask.point, itask.tdef.name) flag = self.task_events_mgr.POLLED_FLAG if run_status == "1" and run_signal in ["ERR", "EXIT"]: # Failed normally self.task_events_mgr.process_message( itask, INFO, TASK_OUTPUT_FAILED, time_run_exit, flag) elif run_status == "1" and batch_sys_exit_polled == "1": # Failed by a signal, and no longer in batch system self.task_events_mgr.process_message( itask, INFO, TASK_OUTPUT_FAILED, time_run_exit, flag) self.task_events_mgr.process_message( itask, INFO, FAIL_MESSAGE_PREFIX + run_signal, time_run_exit, flag) elif run_status == "1": # The job has terminated, but is still managed by batch system. # Some batch system may restart a job in this state, so don't # mark as failed yet. self.task_events_mgr.process_message( itask, INFO, TASK_OUTPUT_STARTED, time_run, flag) elif run_status == "0": # The job succeeded self.task_events_mgr.process_message( itask, INFO, TASK_OUTPUT_SUCCEEDED, time_run_exit, flag) elif time_run and batch_sys_exit_polled == "1": # The job has terminated without executing the error trap self.task_events_mgr.process_message( itask, INFO, TASK_OUTPUT_FAILED, get_current_time_string(), flag) elif time_run: # The job has started, and is still managed by batch system self.task_events_mgr.process_message( itask, INFO, TASK_OUTPUT_STARTED, time_run, flag) elif batch_sys_exit_polled == "1": # The job never ran, and no longer in batch system self.task_events_mgr.process_message( itask, INFO, self.task_events_mgr.EVENT_SUBMIT_FAILED, time_submit_exit, flag) else: # The job never ran, and is in batch system self.task_events_mgr.process_message( itask, INFO, TASK_STATUS_SUBMITTED, time_submit_exit, flag)
def _poll_task_job_message_callback(self, suite, itask, cmd_ctx, line): """Helper for _poll_task_jobs_callback, on message of one task job.""" ctx = SuiteProcContext(self.JOBS_POLL, None) ctx.out = line try: event_time, severity, message = line.split("|")[2:5] except ValueError: ctx.ret_code = 1 ctx.cmd = cmd_ctx.cmd # print original command on failure else: ctx.ret_code = 0 self.task_events_mgr.process_message( itask, severity, message, self.poll_task_jobs, event_time) log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
def _poll_task_job_message_callback(self, suite, itask, cmd_ctx, line): """Helper for _poll_task_jobs_callback, on message of one task job.""" ctx = SuiteProcContext(self.JOBS_POLL, None) ctx.out = line try: event_time, priority, message = line.split("|")[2:5] except ValueError: ctx.ret_code = 1 ctx.cmd = cmd_ctx.cmd # print original command on failure else: ctx.ret_code = 0 self.task_events_mgr.process_message( itask, priority, message, self.poll_task_jobs, event_time) self.task_events_mgr.log_task_job_activity( ctx, suite, itask.point, itask.tdef.name)
def _kill_task_job_callback(self, suite, itask, cmd_ctx, line): """Helper for _kill_task_jobs_callback, on one task job.""" ctx = SuiteProcContext(self.JOBS_KILL, None) ctx.out = line try: ctx.timestamp, _, ctx.ret_code = line.split("|", 2) except ValueError: ctx.ret_code = 1 ctx.cmd = cmd_ctx.cmd # print original command on failure else: ctx.ret_code = int(ctx.ret_code) if ctx.ret_code: ctx.cmd = cmd_ctx.cmd # print original command on failure self.task_events_mgr.log_task_job_activity(ctx, suite, itask.point, itask.tdef.name) log_lvl = INFO log_msg = 'killed' if ctx.ret_code: # non-zero exit status log_lvl = WARNING log_msg = 'kill failed' itask.state.kill_failed = True elif itask.state.status == TASK_STATUS_SUBMITTED: self.task_events_mgr.process_message( itask, CRITICAL, "%s at %s" % (self.task_events_mgr.EVENT_SUBMIT_FAILED, ctx.timestamp), self.poll_task_jobs) cylc.flags.iflag = True elif itask.state.status == TASK_STATUS_RUNNING: self.task_events_mgr.process_message(itask, CRITICAL, TASK_OUTPUT_FAILED, self.poll_task_jobs) cylc.flags.iflag = True else: log_lvl = WARNING log_msg = ('ignoring job kill result, unexpected task state: %s' % itask.state.status) itask.summary['latest_message'] = log_msg LOG.log( log_lvl, "[%s] -job(%02d) %s" % (itask.identity, itask.submit_num, log_msg))
def _kill_task_job_callback(self, suite, itask, cmd_ctx, line): """Helper for _kill_task_jobs_callback, on one task job.""" ctx = SuiteProcContext(self.JOBS_KILL, None) ctx.out = line try: ctx.timestamp, _, ctx.ret_code = line.split("|", 2) except ValueError: ctx.ret_code = 1 ctx.cmd = cmd_ctx.cmd # print original command on failure else: ctx.ret_code = int(ctx.ret_code) if ctx.ret_code: ctx.cmd = cmd_ctx.cmd # print original command on failure self.task_events_mgr.log_task_job_activity( ctx, suite, itask.point, itask.tdef.name) log_lvl = INFO log_msg = 'killed' if ctx.ret_code: # non-zero exit status log_lvl = WARNING log_msg = 'kill failed' itask.state.kill_failed = True elif itask.state.status == TASK_STATUS_SUBMITTED: self.task_events_mgr.process_message( itask, CRITICAL, "%s at %s" % ( self.task_events_mgr.EVENT_SUBMIT_FAILED, ctx.timestamp)) cylc.flags.iflag = True elif itask.state.status == TASK_STATUS_RUNNING: self.task_events_mgr.process_message( itask, CRITICAL, TASK_OUTPUT_FAILED) cylc.flags.iflag = True else: log_lvl = WARNING log_msg = ( 'ignoring job kill result, unexpected task state: %s' % itask.state.status) itask.summary['latest_message'] = log_msg LOG.log(log_lvl, "[%s] -job(%02d) %s" % ( itask.identity, itask.submit_num, log_msg))
def _poll_task_job_callback(self, suite, itask, cmd_ctx, line): """Helper for _poll_task_jobs_callback, on one task job.""" ctx = SuiteProcContext(self.JOBS_POLL, None) ctx.out = line ctx.ret_code = 0 items = line.split("|") # See cylc.batch_sys_manager.JobPollContext try: (batch_sys_exit_polled, run_status, run_signal, time_submit_exit, time_run, time_run_exit) = items[4:10] except IndexError: itask.summary['latest_message'] = 'poll failed' cylc.flags.iflag = True ctx.cmd = cmd_ctx.cmd # print original command on failure return finally: self.task_events_mgr.log_task_job_activity(ctx, suite, itask.point, itask.tdef.name) if run_status == "1" and run_signal in ["ERR", "EXIT"]: # Failed normally self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_FAILED, self.poll_task_jobs, time_run_exit) elif run_status == "1" and batch_sys_exit_polled == "1": # Failed by a signal, and no longer in batch system self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_FAILED, self.poll_task_jobs, time_run_exit) self.task_events_mgr.process_message( itask, INFO, TaskMessage.FAIL_MESSAGE_PREFIX + run_signal, self.poll_task_jobs, time_run_exit) elif run_status == "1": # The job has terminated, but is still managed by batch system. # Some batch system may restart a job in this state, so don't # mark as failed yet. self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_STARTED, self.poll_task_jobs, time_run) elif run_status == "0": # The job succeeded self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_SUCCEEDED, self.poll_task_jobs, time_run_exit) elif time_run and batch_sys_exit_polled == "1": # The job has terminated without executing the error trap self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_FAILED, self.poll_task_jobs, "") elif time_run: # The job has started, and is still managed by batch system self.task_events_mgr.process_message(itask, INFO, TASK_OUTPUT_STARTED, self.poll_task_jobs, time_run) elif batch_sys_exit_polled == "1": # The job never ran, and no longer in batch system self.task_events_mgr.process_message( itask, INFO, self.task_events_mgr.EVENT_SUBMIT_FAILED, self.poll_task_jobs, time_submit_exit) else: # The job never ran, and is in batch system self.task_events_mgr.process_message(itask, INFO, TASK_STATUS_SUBMITTED, self.poll_task_jobs, time_submit_exit)