Esempio n. 1
0
 def _kill_task_job_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _kill_task_jobs_callback, on one task job."""
     ctx = SubProcContext(self.JOBS_KILL, None)
     ctx.out = line
     try:
         ctx.timestamp, _, ctx.ret_code = line.split("|", 2)
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = int(ctx.ret_code)
         if ctx.ret_code:
             ctx.cmd = cmd_ctx.cmd  # print original command on failure
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
     log_lvl = INFO
     log_msg = 'killed'
     if ctx.ret_code:  # non-zero exit status
         log_lvl = WARNING
         log_msg = 'kill failed'
         itask.state.kill_failed = True
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
             ctx.timestamp)
     elif itask.state.status == TASK_STATUS_RUNNING:
         self.task_events_mgr.process_message(
             itask, CRITICAL, TASK_OUTPUT_FAILED)
     else:
         log_lvl = DEBUG
         log_msg = (
             'ignoring job kill result, unexpected task state: %s' %
             itask.state.status)
     itask.set_summary_message(log_msg)
     LOG.log(log_lvl, "[%s] -job(%02d) %s" % (
         itask.identity, itask.submit_num, log_msg))
Esempio n. 2
0
 def _kill_task_job_callback(self, suite, itask, cmd_ctx, line):
     """Helper for _kill_task_jobs_callback, on one task job."""
     ctx = SubProcContext(self.JOBS_KILL, None)
     ctx.out = line
     try:
         ctx.timestamp, _, ctx.ret_code = line.split("|", 2)
     except ValueError:
         ctx.ret_code = 1
         ctx.cmd = cmd_ctx.cmd  # print original command on failure
     else:
         ctx.ret_code = int(ctx.ret_code)
         if ctx.ret_code:
             ctx.cmd = cmd_ctx.cmd  # print original command on failure
     log_task_job_activity(ctx, suite, itask.point, itask.tdef.name)
     log_lvl = INFO
     log_msg = 'killed'
     if ctx.ret_code:  # non-zero exit status
         log_lvl = WARNING
         log_msg = 'kill failed'
         itask.state.kill_failed = True
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         self.task_events_mgr.process_message(
             itask, CRITICAL, self.task_events_mgr.EVENT_SUBMIT_FAILED,
             ctx.timestamp)
     elif itask.state.status == TASK_STATUS_RUNNING:
         self.task_events_mgr.process_message(itask, CRITICAL,
                                              TASK_OUTPUT_FAILED)
     else:
         log_lvl = DEBUG
         log_msg = ('ignoring job kill result, unexpected task state: %s' %
                    itask.state.status)
     itask.set_summary_message(log_msg)
     LOG.log(
         log_lvl,
         "[%s] -job(%02d) %s" % (itask.identity, itask.submit_num, log_msg))
Esempio n. 3
0
    def _process_message_check(
        self,
        itask,
        severity,
        message,
        event_time,
        flag,
        submit_num,
    ):
        """Helper for `.process_message`.

        See `.process_message` for argument list
        Check whether to process/skip message.
        Return True if `.process_message` should contine, False otherwise.
        """
        logfmt = r'[%s] status=%s: %s%s at %s for job(%02d)'
        if flag == self.FLAG_RECEIVED and submit_num != itask.submit_num:
            # Ignore received messages from old jobs
            LOG.warning(logfmt + r' != current job(%02d)', itask, itask.state,
                        self.FLAG_RECEIVED_IGNORED, message, event_time,
                        submit_num, itask.submit_num)
            return False
        if itask.state.status in (TASK_STATUS_SUBMIT_RETRYING,
                                  TASK_STATUS_RETRYING):
            # Ignore polled messages if task is already in retrying statuses
            LOG.warning(logfmt, itask, itask.state, self.FLAG_POLLED_IGNORED,
                        message, event_time, submit_num)
            return False
        LOG.log(self.LEVELS.get(severity, INFO), logfmt, itask, itask.state,
                flag, message, event_time, submit_num)
        return True
Esempio n. 4
0
    def _process_message_check(
        self,
        itask,
        severity,
        message,
        event_time,
        flag,
        submit_num,
    ):
        """Helper for `.process_message`.

        See `.process_message` for argument list
        Check whether to process/skip message.
        Return True if `.process_message` should contine, False otherwise.
        """
        if self.timestamp:
            timestamp = " at %s " % event_time
        else:
            timestamp = ""
        logfmt = r'[%s] status=%s: %s%s%s for job(%02d) flow(%s)'
        if flag == self.FLAG_RECEIVED and submit_num != itask.submit_num:
            # Ignore received messages from old jobs
            LOG.warning(
                logfmt + r' != current job(%02d)',
                itask, itask.state, self.FLAG_RECEIVED_IGNORED, message,
                timestamp, submit_num, itask.flow_label, itask.submit_num)
            return False

        if (
                itask.state(TASK_STATUS_WAITING)
                and
                (
                    (
                        # task has a submit-retry lined up
                        TimerFlags.SUBMISSION_RETRY in itask.try_timers
                        and itask.try_timers[
                            TimerFlags.SUBMISSION_RETRY].num > 0
                    )
                    or
                    (
                        # task has an execution-retry lined up
                        TimerFlags.EXECUTION_RETRY in itask.try_timers
                        and itask.try_timers[
                            TimerFlags.EXECUTION_RETRY].num > 0
                    )
                )

        ):
            # Ignore polled messages if task has a retry lined up
            LOG.warning(
                logfmt,
                itask, itask.state, self.FLAG_POLLED_IGNORED, message,
                timestamp, submit_num, itask.flow_label)
            return False
        LOG.log(
            LOG_LEVELS.get(severity, INFO), logfmt, itask, itask.state, flag,
            message, timestamp, submit_num, itask.flow_label)
        return True
Esempio n. 5
0
    def upgrade(self):
        warnings = OrderedDict()
        for vn, upgs in self.upgrades.items():
            for u in upgs:
                try:
                    exp = self.expand(u)
                except (KeyError, UpgradeError):
                    continue

                for upg in exp:
                    try:
                        old = self.get_item(upg['old'])
                    except KeyError:
                        # OK: deprecated item not found
                        pass
                    else:
                        msg = self.show_keys(upg['old'])
                        if upg['new']:
                            msg += ' -> ' + self.show_keys(upg['new'])
                        else:
                            upg['new'] = upg['old']
                        msg += " - " + upg['cvt'].describe()
                        if not upg['silent']:
                            warnings.setdefault(vn, [])
                            warnings[vn].append(msg)
                        self.del_item(upg['old'])
                        if upg['cvt'].describe() != "DELETED (OBSOLETE)":
                            # check self.cfg does not already contain a
                            # non-deprecated item matching upg['new']:
                            try:
                                self.get_item(upg['new'])
                            except KeyError:
                                self.put_item(upg['new'],
                                              upg['cvt'].convert(old))
                            else:
                                raise UpgradeError(
                                    'ERROR: Cannot upgrade deprecated '
                                    f'item "{msg}" because the upgraded '
                                    'item already exists'
                                )
        if warnings:
            level = WARNING
            if self.descr == self.SITE_CONFIG:
                # Site level configuration, user cannot easily fix.
                # Only log at debug level.
                level = DEBUG
            else:
                # User level configuration, user should be able to fix.
                # Log at warning level.
                level = WARNING
            LOG.log(level,
                    'deprecated items were automatically upgraded in '
                    f'"{self.descr}"')
            for vn, msgs in warnings.items():
                for msg in msgs:
                    LOG.log(level, ' * (%s) %s', vn, msg)
Esempio n. 6
0
File: upgrade.py Progetto: cylc/cylc
    def upgrade(self):
        warnings = OrderedDict()
        for vn, upgs in self.upgrades.items():
            for u in upgs:
                try:
                    exp = self.expand(u)
                except (KeyError, UpgradeError):
                    continue

                for upg in exp:
                    try:
                        old = self.get_item(upg['old'])
                    except KeyError:
                        # OK: deprecated item not found
                        pass
                    else:
                        msg = self.show_keys(upg['old'])
                        if upg['new']:
                            msg += ' -> ' + self.show_keys(upg['new'])
                        else:
                            upg['new'] = upg['old']
                        msg += " - " + upg['cvt'].describe()
                        if not upg['silent']:
                            warnings.setdefault(vn, [])
                            warnings[vn].append(msg)
                        self.del_item(upg['old'])
                        if upg['cvt'].describe() != "DELETED (OBSOLETE)":
                            self.put_item(upg['new'], upg['cvt'].convert(old))
        if warnings:
            level = WARNING
            if self.descr == self.SITE_CONFIG:
                # Site level configuration, user cannot easily fix.
                # Only log at debug level.
                level = DEBUG
            else:
                # User level configuration, user should be able to fix.
                # Log at warning level.
                level = WARNING
            LOG.log(
                level,
                "deprecated items were automatically upgraded in '%s':",
                self.descr)
            for vn, msgs in warnings.items():
                for msg in msgs:
                    LOG.log(level, ' * (%s) %s', vn, msg)
Esempio n. 7
0
    def process_message(
        self,
        itask,
        severity,
        message,
        event_time=None,
        flag=FLAG_INTERNAL,
        submit_num=None,
    ):
        """Parse an task message and update task state.

        Incoming, e.g. "succeeded at <TIME>", may be from task job or polling.

        It is possible for the current state of a task to be inconsistent with
        a message (whether internal, received or polled) e.g. due to a late
        poll result, or a network outage, or manual state reset. To handle
        this, if a message would take the task state backward, issue a poll to
        confirm instead of changing state - then always believe the next
        message. Note that the next message might not be the result of this
        confirmation poll, in the unlikely event that a job emits a succession
        of messages very quickly, but this is the best we can do without
        somehow uniquely associating each poll with its result message.

        Arguments:
            itask (cylc.flow.task_proxy.TaskProxy):
                The task proxy object relevant for the message.
            severity (str or int):
                Message severity, should be a recognised logging level.
            message (str):
                Message content.
            event_time (str):
                Event time stamp. Expect ISO8601 date time string.
                If not specified, use current time.
            flag (str):
                If specified, can be:
                    FLAG_INTERNAL (default):
                        To indicate an internal message.
                    FLAG_RECEIVED:
                        To indicate a message received from a job or an
                        external source.
                    FLAG_POLLED:
                        To indicate a message resulted from a poll.
            submit_num (int):
                The submit number of the task relevant for the message.
                If not specified, use latest submit number.

        Return:
            None: in normal circumstances.
            True: if polling is required to confirm a reversal of status.

        """
        # Log messages
        if event_time is None:
            event_time = get_current_time_string()
        if submit_num is None:
            submit_num = itask.submit_num
        logfmt = r'[%s] status=%s: %s%s at %s for job(%02d)'
        if flag == self.FLAG_RECEIVED and submit_num != itask.submit_num:
            LOG.warning(logfmt + r' != current job(%02d)', itask, itask.state,
                        self.FLAG_RECEIVED_IGNORED, message, event_time,
                        submit_num, itask.submit_num)
            return
        LOG.log(self.LEVELS.get(severity, INFO), logfmt, itask, itask.state,
                flag, message, event_time, submit_num)

        # always update the suite state summary for latest message
        if flag == self.FLAG_POLLED:
            itask.set_summary_message('%s %s' % (message, self.FLAG_POLLED))
        else:
            itask.set_summary_message(message)

        # Satisfy my output, if possible, and record the result.
        completed_trigger = itask.state.outputs.set_msg_trg_completion(
            message=message, is_completed=True)

        if message == TASK_OUTPUT_STARTED:
            if (flag == self.FLAG_RECEIVED
                    and itask.state.is_gt(TASK_STATUS_RUNNING)):
                return True
            self._process_message_started(itask, event_time)
        elif message == TASK_OUTPUT_SUCCEEDED:
            self._process_message_succeeded(itask, event_time)
        elif message == TASK_OUTPUT_FAILED:
            if (flag == self.FLAG_RECEIVED
                    and itask.state.is_gt(TASK_STATUS_FAILED)):
                return True
            self._process_message_failed(itask, event_time, self.JOB_FAILED)
        elif message == self.EVENT_SUBMIT_FAILED:
            if (flag == self.FLAG_RECEIVED
                    and itask.state.is_gt(TASK_STATUS_SUBMIT_FAILED)):
                return True
            self._process_message_submit_failed(itask, event_time)
        elif message == TASK_OUTPUT_SUBMITTED:
            if (flag == self.FLAG_RECEIVED
                    and itask.state.is_gt(TASK_STATUS_SUBMITTED)):
                return True
            self._process_message_submitted(itask, event_time)
        elif message.startswith(FAIL_MESSAGE_PREFIX):
            # Task received signal.
            if (flag == self.FLAG_RECEIVED
                    and itask.state.is_gt(TASK_STATUS_FAILED)):
                return True
            signal = message[len(FAIL_MESSAGE_PREFIX):]
            self._db_events_insert(itask, "signaled", signal)
            self.suite_db_mgr.put_update_task_jobs(itask,
                                                   {"run_signal": signal})
            self._process_message_failed(itask, event_time, self.JOB_FAILED)
        elif message.startswith(ABORT_MESSAGE_PREFIX):
            # Task aborted with message
            if (flag == self.FLAG_RECEIVED
                    and itask.state.is_gt(TASK_STATUS_FAILED)):
                return True
            aborted_with = message[len(ABORT_MESSAGE_PREFIX):]
            self._db_events_insert(itask, "aborted", message)
            self.suite_db_mgr.put_update_task_jobs(
                itask, {"run_signal": aborted_with})
            self._process_message_failed(itask, event_time, aborted_with)
        elif message.startswith(VACATION_MESSAGE_PREFIX):
            # Task job pre-empted into a vacation state
            self._db_events_insert(itask, "vacated", message)
            itask.set_summary_time('started')  # unset
            if TASK_STATUS_SUBMIT_RETRYING in itask.try_timers:
                itask.try_timers[TASK_STATUS_SUBMIT_RETRYING].num = 0
            itask.job_vacated = True
            # Believe this and change state without polling (could poll?).
            self.pflag = True
            itask.state.reset(TASK_STATUS_SUBMITTED)
            self._reset_job_timers(itask)
            # We should really have a special 'vacated' handler, but given that
            # this feature can only be used on the deprecated loadleveler
            # system, we should probably aim to remove support for job vacation
            # instead. Otherwise, we should have:
            # self.setup_event_handlers(itask, 'vacated', message)
        elif completed_trigger:
            # Message of an as-yet unreported custom task output.
            # No state change.
            self.pflag = True
            self.suite_db_mgr.put_update_task_outputs(itask)
            self.setup_event_handlers(itask, completed_trigger, message)
        else:
            # Unhandled messages. These include:
            #  * general non-output/progress messages
            #  * poll messages that repeat previous results
            # Note that all messages are logged already at the top.
            # No state change.
            LOG.debug('[%s] status=%s: unhandled: %s', itask,
                      itask.state.status, message)
            if severity in [CRITICAL, ERROR, WARNING, INFO, DEBUG]:
                severity = getLevelName(severity)
            self._db_events_insert(itask,
                                   ("message %s" % str(severity).lower()),
                                   message)
        lseverity = str(severity).lower()
        if lseverity in self.NON_UNIQUE_EVENTS:
            itask.non_unique_events.setdefault(lseverity, 0)
            itask.non_unique_events[lseverity] += 1
            self.setup_event_handlers(itask, lseverity, message)