Example #1
0
 def check_job_time(self, itask, now):
     """Check/handle job timeout and poll timer"""
     can_poll = self.check_poll_time(itask, now)
     if itask.timeout is None or now <= itask.timeout:
         return can_poll
     # Timeout reached for task, emit event and reset itask.timeout
     if itask.state.status == TASK_STATUS_RUNNING:
         time_ref = itask.summary['started_time']
         event = 'execution timeout'
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         time_ref = itask.summary['submitted_time']
         event = 'submission timeout'
     msg = event
     try:
         msg += ' after %s' % intvl_as_str(itask.timeout - time_ref)
     except (TypeError, ValueError):
         # Badness in time_ref?
         pass
     itask.timeout = None  # emit event only once
     if msg and event:
         LOG.warning(msg, itask=itask)
         self.setup_event_handlers(itask, event, msg)
         return True
     else:
         return can_poll
Example #2
0
 def check_job_time(self, itask, now):
     """Check/handle job timeout and poll timer"""
     can_poll = self.check_poll_time(itask, now)
     if itask.timeout is None or now <= itask.timeout:
         return can_poll
     # Timeout reached for task, emit event and reset itask.timeout
     if itask.state.status == TASK_STATUS_RUNNING:
         time_ref = itask.summary['started_time']
         event = 'execution timeout'
     elif itask.state.status == TASK_STATUS_SUBMITTED:
         time_ref = itask.summary['submitted_time']
         event = 'submission timeout'
     msg = event
     try:
         msg += ' after %s' % intvl_as_str(itask.timeout - time_ref)
     except (TypeError, ValueError):
         # Badness in time_ref?
         pass
     itask.timeout = None  # emit event only once
     if msg and event:
         LOG.warning('[%s] -%s', itask, msg)
         self.setup_event_handlers(itask, event, msg)
         return True
     else:
         return can_poll
Example #3
0
 def _reset_job_timers(self, itask):
     """Set up poll timer and timeout for task."""
     if itask.state.status not in TASK_STATUSES_ACTIVE:
         # Reset, task not active
         itask.timeout = None
         itask.poll_timer = None
         return
     ctx = (itask.submit_num, itask.state.status)
     if itask.poll_timer and itask.poll_timer.ctx == ctx:
         return
     # Set poll timer
     # Set timeout
     timeref = None  # reference time, submitted or started time
     timeout = None  # timeout in setting
     if itask.state.status == TASK_STATUS_RUNNING:
         timeref = itask.summary['started_time']
         timeout_key = 'execution timeout'
         timeout = self._get_events_conf(itask, timeout_key)
         delays = self.get_host_conf(
             itask,
             'execution polling intervals',
             skey='job',
             default=[900])  # Default 15 minute intervals
         if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]:
             time_limit = itask.summary[self.KEY_EXECUTE_TIME_LIMIT]
             try:
                 host_conf = self.get_host_conf(itask, 'batch systems')
                 batch_sys_conf = host_conf[itask.summary['batch_sys_name']]
             except (TypeError, KeyError):
                 batch_sys_conf = {}
             time_limit_delays = batch_sys_conf.get(
                 'execution time limit polling intervals', [60, 120, 420])
             timeout = time_limit + sum(time_limit_delays)
             # Remove execessive polling before time limit
             while sum(delays) > time_limit:
                 del delays[-1]
             # But fill up the gap before time limit
             if delays:
                 size = int((time_limit - sum(delays)) / delays[-1])
                 delays.extend([delays[-1]] * size)
             time_limit_delays[0] += time_limit - sum(delays)
             delays += time_limit_delays
     else:  # if itask.state.status == TASK_STATUS_SUBMITTED:
         timeref = itask.summary['submitted_time']
         timeout_key = 'submission timeout'
         timeout = self._get_events_conf(itask, timeout_key)
         delays = self.get_host_conf(
             itask,
             'submission polling intervals',
             skey='job',
             default=[900])  # Default 15 minute intervals
     try:
         itask.timeout = timeref + float(timeout)
         timeout_str = intvl_as_str(timeout)
     except (TypeError, ValueError):
         itask.timeout = None
         timeout_str = None
     itask.poll_timer = TaskActionTimer(ctx=ctx, delays=delays)
     # Log timeout and polling schedule
     message = 'health check settings: %s=%s' % (timeout_key, timeout_str)
     # Attempt to group idenitical consecutive delays as N*DELAY,...
     if itask.poll_timer.delays:
         items = []  # [(number of item - 1, item), ...]
         for delay in itask.poll_timer.delays:
             if items and items[-1][1] == delay:
                 items[-1][0] += 1
             else:
                 items.append([0, delay])
         message += ', polling intervals='
         for num, item in items:
             if num:
                 message += '%d*' % (num + 1)
             message += '%s,' % intvl_as_str(item)
         message += '...'
     LOG.info(message, itask=itask)
     # Set next poll time
     self.check_poll_time(itask)
Example #4
0
 def _reset_job_timers(self, itask):
     """Set up poll timer and timeout for task."""
     if itask.state.status not in TASK_STATUSES_ACTIVE:
         # Reset, task not active
         itask.timeout = None
         itask.poll_timer = None
         return
     ctx = (itask.submit_num, itask.state.status)
     if itask.poll_timer and itask.poll_timer.ctx == ctx:
         return
     # Set poll timer
     # Set timeout
     timeref = None  # reference time, submitted or started time
     timeout = None  # timeout in setting
     if itask.state.status == TASK_STATUS_RUNNING:
         timeref = itask.summary['started_time']
         timeout_key = 'execution timeout'
         timeout = self._get_events_conf(itask, timeout_key)
         delays = list(self.get_host_conf(
             itask, 'execution polling intervals', skey='job',
             default=[900]))  # Default 15 minute intervals
         if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]:
             time_limit = itask.summary[self.KEY_EXECUTE_TIME_LIMIT]
             try:
                 host_conf = self.get_host_conf(itask, 'batch systems')
                 batch_sys_conf = host_conf[itask.summary['batch_sys_name']]
             except (TypeError, KeyError):
                 batch_sys_conf = {}
             time_limit_delays = batch_sys_conf.get(
                 'execution time limit polling intervals', [60, 120, 420])
             timeout = time_limit + sum(time_limit_delays)
             # Remove excessive polling before time limit
             while sum(delays) > time_limit:
                 del delays[-1]
             # But fill up the gap before time limit
             if delays:
                 size = int((time_limit - sum(delays)) / delays[-1])
                 delays.extend([delays[-1]] * size)
             time_limit_delays[0] += time_limit - sum(delays)
             delays += time_limit_delays
     else:  # if itask.state.status == TASK_STATUS_SUBMITTED:
         timeref = itask.summary['submitted_time']
         timeout_key = 'submission timeout'
         timeout = self._get_events_conf(itask, timeout_key)
         delays = list(self.get_host_conf(
             itask, 'submission polling intervals', skey='job',
             default=[900]))  # Default 15 minute intervals
     try:
         itask.timeout = timeref + float(timeout)
         timeout_str = intvl_as_str(timeout)
     except (TypeError, ValueError):
         itask.timeout = None
         timeout_str = None
     itask.poll_timer = TaskActionTimer(ctx=ctx, delays=delays)
     # Log timeout and polling schedule
     message = 'health check settings: %s=%s' % (timeout_key, timeout_str)
     # Attempt to group identical consecutive delays as N*DELAY,...
     if itask.poll_timer.delays:
         items = []  # [(number of item - 1, item), ...]
         for delay in itask.poll_timer.delays:
             if items and items[-1][1] == delay:
                 items[-1][0] += 1
             else:
                 items.append([0, delay])
         message += ', polling intervals='
         for num, item in items:
             if num:
                 message += '%d*' % (num + 1)
             message += '%s,' % intvl_as_str(item)
         message += '...'
     LOG.info('[%s] -%s', itask, message)
     # Set next poll time
     self.check_poll_time(itask)