Ejemplo n.º 1
0
 def _setup_event_mail(self, itask, event):
     """Set up task event notification, by email."""
     if event in self.NON_UNIQUE_EVENTS:
         key1 = (self.HANDLER_MAIL,
                 '%s-%d' % (event, itask.non_unique_events.get(event, 1)))
     else:
         key1 = (self.HANDLER_MAIL, event)
     id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num)
     if (id_key in self.event_timers or event not in self._get_events_conf(
             itask, "mail events", [])):
         return
     retry_delays = self._get_events_conf(itask, "mail retry delays")
     if not retry_delays:
         retry_delays = [0]
     self.event_timers[id_key] = TaskActionTimer(
         TaskEventMailContext(
             self.HANDLER_MAIL,  # key
             self.HANDLER_MAIL,  # ctx_type
             self._get_events_conf(  # mail_from
                 itask,
                 "mail from",
                 "notifications@" + get_host(),
             ),
             self._get_events_conf(itask, "mail to", get_user()),  # mail_to
             self._get_events_conf(itask, "mail smtp"),  # mail_smtp
         ),
         retry_delays)
Ejemplo n.º 2
0
    def _setup_job_logs_retrieval(self, itask, event):
        """Set up remote job logs retrieval.

        For a task with a job completion event, i.e. succeeded, failed,
        (execution) retry.
        """
        id_key = ((self.HANDLER_JOB_LOGS_RETRIEVE, event), str(itask.point),
                  itask.tdef.name, itask.submit_num)
        if itask.task_owner:
            user_at_host = itask.task_owner + "@" + itask.task_host
        else:
            user_at_host = itask.task_host
        events = (self.EVENT_FAILED, self.EVENT_RETRY, self.EVENT_SUCCEEDED)
        if (event not in events
                or user_at_host in [get_user() + '@localhost', 'localhost']
                or not self.get_host_conf(itask, "retrieve job logs")
                or id_key in self.event_timers):
            return
        retry_delays = self.get_host_conf(itask,
                                          "retrieve job logs retry delays")
        if not retry_delays:
            retry_delays = [0]
        self.event_timers[id_key] = TaskActionTimer(
            TaskJobLogsRetrieveContext(
                self.HANDLER_JOB_LOGS_RETRIEVE,  # key
                self.HANDLER_JOB_LOGS_RETRIEVE,  # ctx_type
                user_at_host,
                self.get_host_conf(itask, "retrieve job logs max size"),
            ),
            retry_delays)
Ejemplo n.º 3
0
    def _set_retry_timers(itask, rtconfig=None, retry=True):
        """Set try number and retry delays."""
        if rtconfig is None:
            rtconfig = itask.tdef.rtconfig
        if (itask.tdef.run_mode + ' mode' in rtconfig and 'disable retries'
                in rtconfig[itask.tdef.run_mode + ' mode']):
            retry = False

        if retry:
            if rtconfig['job']['submission retry delays']:
                submit_delays = rtconfig['job']['submission retry delays']
            else:
                submit_delays = itask.platform['submission retry delays']
            # TODO: same for execution delays?

        if retry:
            for key, delays in [(TimerFlags.SUBMISSION_RETRY, submit_delays),
                                (TimerFlags.EXECUTION_RETRY,
                                 rtconfig['job']['execution retry delays'])]:
                if delays is None:
                    delays = []
                try:
                    itask.try_timers[key].set_delays(delays)
                except KeyError:
                    itask.try_timers[key] = TaskActionTimer(delays=delays)
Ejemplo n.º 4
0
    def _setup_job_logs_retrieval(self, itask, event):
        """Set up remote job logs retrieval.

        For a task with a job completion event, i.e. succeeded, failed,
        (execution) retry.
        """
        id_key = ((self.HANDLER_JOB_LOGS_RETRIEVE, event), str(itask.point),
                  itask.tdef.name, itask.submit_num)
        events = (self.EVENT_FAILED, self.EVENT_RETRY, self.EVENT_SUCCEEDED)
        host = get_host_from_platform(itask.platform)
        if (event not in events or not is_remote_host(host)
                or not self.get_host_conf(itask, "retrieve job logs")
                or id_key in self.event_timers):
            return
        retry_delays = self.get_host_conf(itask,
                                          "retrieve job logs retry delays")
        if not retry_delays:
            retry_delays = [0]
        self.event_timers[id_key] = TaskActionTimer(
            TaskJobLogsRetrieveContext(
                self.HANDLER_JOB_LOGS_RETRIEVE,  # key
                self.HANDLER_JOB_LOGS_RETRIEVE,  # ctx_type
                itask.platform['name'],
                self.get_host_conf(itask, "retrieve job logs max size"),
            ),
            retry_delays)
Ejemplo n.º 5
0
 def _set_retry_timers(itask, rtconfig=None):
     """Set try number and retry delays."""
     if rtconfig is None:
         rtconfig = itask.tdef.rtconfig
     try:
         no_retry = (rtconfig[itask.tdef.run_mode +
                              ' mode']['disable retries'])
     except KeyError:
         no_retry = False
     if not no_retry:
         for key, cfg_key in [
             (TASK_STATUS_SUBMIT_RETRYING, 'submission retry delays'),
             (TASK_STATUS_RETRYING, 'execution retry delays')
         ]:
             delays = rtconfig['job'][cfg_key]
             if delays is None:
                 delays = []
             try:
                 itask.try_timers[key].set_delays(delays)
             except KeyError:
                 itask.try_timers[key] = TaskActionTimer(delays=delays)
Ejemplo n.º 6
0
 def _reset_job_timers(self, itask):
     """Set up poll timer and timeout for task."""
     if not itask.state(*TASK_STATUSES_ACTIVE):
         # Reset, task not active
         itask.timeout = None
         itask.poll_timer = None
         return
     ctx = (itask.submit_num, itask.state.status)
     if itask.poll_timer and itask.poll_timer.ctx == ctx:
         return
     # Set poll timer
     # Set timeout
     timeref = None  # reference time, submitted or started time
     timeout = None  # timeout in setting
     if itask.state(TASK_STATUS_RUNNING):
         timeref = itask.summary['started_time']
         timeout_key = 'execution timeout'
         timeout = self._get_events_conf(itask, timeout_key)
         delays = list(
             self.get_host_conf(itask,
                                'execution polling intervals',
                                skey='job',
                                default=[900
                                         ]))  # Default 15 minute intervals
         if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]:
             time_limit = itask.summary[self.KEY_EXECUTE_TIME_LIMIT]
             try:
                 host_conf = self.get_host_conf(itask, 'batch systems')
                 batch_sys_conf = host_conf[itask.summary['batch_sys_name']]
             except (TypeError, KeyError):
                 batch_sys_conf = {}
             time_limit_delays = batch_sys_conf.get(
                 'execution time limit polling intervals', [60, 120, 420])
             timeout = time_limit + sum(time_limit_delays)
             # Remove excessive polling before time limit
             while sum(delays) > time_limit:
                 del delays[-1]
             # But fill up the gap before time limit
             if delays:
                 size = int((time_limit - sum(delays)) / delays[-1])
                 delays.extend([delays[-1]] * size)
             time_limit_delays[0] += time_limit - sum(delays)
             delays += time_limit_delays
     else:  # if itask.state.status == TASK_STATUS_SUBMITTED:
         timeref = itask.summary['submitted_time']
         timeout_key = 'submission timeout'
         timeout = self._get_events_conf(itask, timeout_key)
         delays = list(
             self.get_host_conf(itask,
                                'submission polling intervals',
                                skey='job',
                                default=[900
                                         ]))  # Default 15 minute intervals
     try:
         itask.timeout = timeref + float(timeout)
         timeout_str = intvl_as_str(timeout)
     except (TypeError, ValueError):
         itask.timeout = None
         timeout_str = None
     itask.poll_timer = TaskActionTimer(ctx=ctx, delays=delays)
     # Log timeout and polling schedule
     message = 'health check settings: %s=%s' % (timeout_key, timeout_str)
     # Attempt to group identical consecutive delays as N*DELAY,...
     if itask.poll_timer.delays:
         items = []  # [(number of item - 1, item), ...]
         for delay in itask.poll_timer.delays:
             if items and items[-1][1] == delay:
                 items[-1][0] += 1
             else:
                 items.append([0, delay])
         message += ', polling intervals='
         for num, item in items:
             if num:
                 message += '%d*' % (num + 1)
             message += '%s,' % intvl_as_str(item)
         message += '...'
     LOG.info('[%s] -%s', itask, message)
     # Set next poll time
     self.check_poll_time(itask)
Ejemplo n.º 7
0
    def _setup_custom_event_handlers(self, itask, event, message):
        """Set up custom task event handlers."""
        handlers = self._get_events_conf(itask, event + ' handler')
        if (handlers is None and event in self._get_events_conf(
                itask, 'handler events', [])):
            handlers = self._get_events_conf(itask, 'handlers')
        if handlers is None:
            return
        retry_delays = self._get_events_conf(
            itask, 'handler retry delays',
            self.get_host_conf(itask, "task event handler retry delays"))
        if not retry_delays:
            retry_delays = [0]
        # There can be multiple custom event handlers
        for i, handler in enumerate(handlers):
            if event in self.NON_UNIQUE_EVENTS:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), '%s-%d' %
                        (event, itask.non_unique_events.get(event, 1)))
            else:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), event)
            id_key = (key1, str(itask.point), itask.tdef.name,
                      itask.submit_num)
            if id_key in self.event_timers:
                continue
            # Note: user@host may not always be set for a submit number, e.g.
            # on late event or if host select command fails. Use null string to
            # prevent issues in this case.
            user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '')
            if user_at_host and '@' not in user_at_host:
                # (only has 'user@' on the front if user is not suite owner).
                user_at_host = '%s@%s' % (get_user(), user_at_host)
            # Custom event handler can be a command template string
            # or a command that takes 4 arguments (classic interface)
            # Note quote() fails on None, need str(None).
            try:
                handler_data = {
                    "event":
                    quote(event),
                    "suite":
                    quote(self.suite),
                    'suite_uuid':
                    quote(str(self.uuid_str)),
                    "point":
                    quote(str(itask.point)),
                    "name":
                    quote(itask.tdef.name),
                    "submit_num":
                    itask.submit_num,
                    "try_num":
                    itask.get_try_num(),
                    "id":
                    quote(itask.identity),
                    "message":
                    quote(message),
                    "batch_sys_name":
                    quote(str(itask.summary['batch_sys_name'])),
                    "batch_sys_job_id":
                    quote(str(itask.summary['submit_method_id'])),
                    "submit_time":
                    quote(str(itask.summary['submitted_time_string'])),
                    "start_time":
                    quote(str(itask.summary['started_time_string'])),
                    "finish_time":
                    quote(str(itask.summary['finished_time_string'])),
                    "user@host":
                    quote(user_at_host)
                }

                if self.suite_cfg:
                    for key, value in self.suite_cfg.items():
                        if key == "URL":
                            handler_data["suite_url"] = quote(value)
                        else:
                            handler_data["suite_" + key] = quote(value)

                if itask.tdef.rtconfig['meta']:
                    for key, value in itask.tdef.rtconfig['meta'].items():
                        if key == "URL":
                            handler_data["task_url"] = quote(value)
                        handler_data[key] = quote(value)

                cmd = handler % (handler_data)
            except KeyError as exc:
                message = "%s/%s/%02d %s bad template: %s" % (
                    itask.point, itask.tdef.name, itask.submit_num, key1, exc)
                LOG.error(message)
                continue

            if cmd == handler:
                # Nothing substituted, assume classic interface
                cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite,
                                                  itask.identity, message)
            LOG.debug("[%s] -Queueing %s handler: %s", itask, event, cmd)
            self.event_timers[id_key] = (TaskActionTimer(
                CustomTaskEventHandlerContext(
                    key1,
                    self.HANDLER_CUSTOM,
                    cmd,
                ), retry_delays))
Ejemplo n.º 8
0
    def _setup_custom_event_handlers(self, itask, event, message):
        """Set up custom task event handlers."""
        handlers = self._get_events_conf(itask, event + ' handler')
        if (handlers is None and event in self._get_events_conf(
                itask, 'handler events', [])):
            handlers = self._get_events_conf(itask, 'handlers')
        if handlers is None:
            return
        retry_delays = self._get_events_conf(itask, 'handler retry delays')
        if not retry_delays:
            retry_delays = [0]
        # There can be multiple custom event handlers
        for i, handler in enumerate(handlers):
            if event in self.NON_UNIQUE_EVENTS:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), '%s-%d' %
                        (event, itask.non_unique_events.get(event, 1)))
            else:
                key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), event)
            id_key = (key1, str(itask.point), itask.tdef.name,
                      itask.submit_num)
            if id_key in self.event_timers:
                continue
            # Note: user@host may not always be set for a submit number, e.g.
            # on late event or if host select command fails. Use null string to
            # prevent issues in this case.
            platform_n = itask.summary['platforms_used'].get(
                itask.submit_num, '')
            # Custom event handler can be a command template string
            # or a command that takes 4 arguments (classic interface)
            # Note quote() fails on None, need str(None).
            try:
                handler_data = {
                    EventData.BatchSysJobID.value:
                    quote(str(itask.summary['submit_method_id'])),
                    EventData.BatchSysName.value:
                    quote(str(itask.summary['batch_sys_name'])),
                    EventData.CyclePoint.value:
                    quote(str(itask.point)),
                    EventData.Event.value:
                    quote(event),
                    EventData.FinishTime.value:
                    quote(str(itask.summary['finished_time_string'])),
                    EventData.ID.value:
                    quote(itask.identity),
                    EventData.Message.value:
                    quote(message),
                    EventData.TaskName.value:
                    quote(itask.tdef.name),
                    EventData.PlatformName.value:
                    quote(platform_n),
                    EventData.StartTime.value:
                    quote(str(itask.summary['started_time_string'])),
                    EventData.SubmitNum.value:
                    itask.submit_num,
                    EventData.SubmitTime.value:
                    quote(str(itask.summary['submitted_time_string'])),
                    EventData.Suite.value:
                    quote(self.suite),
                    EventData.SuiteUUID.value:
                    quote(str(self.uuid_str)),
                    EventData.TryNum.value:
                    itask.get_try_num(),
                    # task and suite metadata
                    **get_event_handler_data(itask.tdef.rtconfig, self.suite_cfg)
                }
                cmd = handler % (handler_data)
            except KeyError as exc:
                message = "%s/%s/%02d %s bad template: %s" % (
                    itask.point, itask.tdef.name, itask.submit_num, key1, exc)
                LOG.error(message)
                continue

            if cmd == handler:
                # Nothing substituted, assume classic interface
                cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite,
                                                  itask.identity, message)
            LOG.debug("[%s] -Queueing %s handler: %s", itask, event, cmd)
            self.event_timers[id_key] = (TaskActionTimer(
                CustomTaskEventHandlerContext(
                    key1,
                    self.HANDLER_CUSTOM,
                    cmd,
                ), retry_delays))