def check_job_time(self, itask, now): """Check/handle job timeout and poll timer""" can_poll = self.check_poll_time(itask, now) if itask.timeout is None or now <= itask.timeout: return can_poll # Timeout reached for task, emit event and reset itask.timeout if itask.state.status == TASK_STATUS_RUNNING: time_ref = itask.summary['started_time'] event = 'execution timeout' elif itask.state.status == TASK_STATUS_SUBMITTED: time_ref = itask.summary['submitted_time'] event = 'submission timeout' msg = event try: msg += ' after %s' % intvl_as_str(itask.timeout - time_ref) except (TypeError, ValueError): # Badness in time_ref? pass itask.timeout = None # emit event only once if msg and event: LOG.warning(msg, itask=itask) self.setup_event_handlers(itask, event, msg) return True else: return can_poll
def check_job_time(self, itask, now): """Check/handle job timeout and poll timer""" can_poll = self.check_poll_time(itask, now) if itask.timeout is None or now <= itask.timeout: return can_poll # Timeout reached for task, emit event and reset itask.timeout if itask.state.status == TASK_STATUS_RUNNING: time_ref = itask.summary['started_time'] event = 'execution timeout' elif itask.state.status == TASK_STATUS_SUBMITTED: time_ref = itask.summary['submitted_time'] event = 'submission timeout' msg = event try: msg += ' after %s' % intvl_as_str(itask.timeout - time_ref) except (TypeError, ValueError): # Badness in time_ref? pass itask.timeout = None # emit event only once if msg and event: LOG.warning('[%s] -%s', itask, msg) self.setup_event_handlers(itask, event, msg) return True else: return can_poll
def _reset_job_timers(self, itask): """Set up poll timer and timeout for task.""" if itask.state.status not in TASK_STATUSES_ACTIVE: # Reset, task not active itask.timeout = None itask.poll_timer = None return ctx = (itask.submit_num, itask.state.status) if itask.poll_timer and itask.poll_timer.ctx == ctx: return # Set poll timer # Set timeout timeref = None # reference time, submitted or started time timeout = None # timeout in setting if itask.state.status == TASK_STATUS_RUNNING: timeref = itask.summary['started_time'] timeout_key = 'execution timeout' timeout = self._get_events_conf(itask, timeout_key) delays = self.get_host_conf( itask, 'execution polling intervals', skey='job', default=[900]) # Default 15 minute intervals if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]: time_limit = itask.summary[self.KEY_EXECUTE_TIME_LIMIT] try: host_conf = self.get_host_conf(itask, 'batch systems') batch_sys_conf = host_conf[itask.summary['batch_sys_name']] except (TypeError, KeyError): batch_sys_conf = {} time_limit_delays = batch_sys_conf.get( 'execution time limit polling intervals', [60, 120, 420]) timeout = time_limit + sum(time_limit_delays) # Remove execessive polling before time limit while sum(delays) > time_limit: del delays[-1] # But fill up the gap before time limit if delays: size = int((time_limit - sum(delays)) / delays[-1]) delays.extend([delays[-1]] * size) time_limit_delays[0] += time_limit - sum(delays) delays += time_limit_delays else: # if itask.state.status == TASK_STATUS_SUBMITTED: timeref = itask.summary['submitted_time'] timeout_key = 'submission timeout' timeout = self._get_events_conf(itask, timeout_key) delays = self.get_host_conf( itask, 'submission polling intervals', skey='job', default=[900]) # Default 15 minute intervals try: itask.timeout = timeref + float(timeout) timeout_str = intvl_as_str(timeout) except (TypeError, ValueError): itask.timeout = None timeout_str = None itask.poll_timer = TaskActionTimer(ctx=ctx, delays=delays) # Log timeout and polling schedule message = 'health check settings: %s=%s' % (timeout_key, timeout_str) # Attempt to group idenitical consecutive delays as N*DELAY,... if itask.poll_timer.delays: items = [] # [(number of item - 1, item), ...] for delay in itask.poll_timer.delays: if items and items[-1][1] == delay: items[-1][0] += 1 else: items.append([0, delay]) message += ', polling intervals=' for num, item in items: if num: message += '%d*' % (num + 1) message += '%s,' % intvl_as_str(item) message += '...' LOG.info(message, itask=itask) # Set next poll time self.check_poll_time(itask)
def _reset_job_timers(self, itask): """Set up poll timer and timeout for task.""" if itask.state.status not in TASK_STATUSES_ACTIVE: # Reset, task not active itask.timeout = None itask.poll_timer = None return ctx = (itask.submit_num, itask.state.status) if itask.poll_timer and itask.poll_timer.ctx == ctx: return # Set poll timer # Set timeout timeref = None # reference time, submitted or started time timeout = None # timeout in setting if itask.state.status == TASK_STATUS_RUNNING: timeref = itask.summary['started_time'] timeout_key = 'execution timeout' timeout = self._get_events_conf(itask, timeout_key) delays = list(self.get_host_conf( itask, 'execution polling intervals', skey='job', default=[900])) # Default 15 minute intervals if itask.summary[self.KEY_EXECUTE_TIME_LIMIT]: time_limit = itask.summary[self.KEY_EXECUTE_TIME_LIMIT] try: host_conf = self.get_host_conf(itask, 'batch systems') batch_sys_conf = host_conf[itask.summary['batch_sys_name']] except (TypeError, KeyError): batch_sys_conf = {} time_limit_delays = batch_sys_conf.get( 'execution time limit polling intervals', [60, 120, 420]) timeout = time_limit + sum(time_limit_delays) # Remove excessive polling before time limit while sum(delays) > time_limit: del delays[-1] # But fill up the gap before time limit if delays: size = int((time_limit - sum(delays)) / delays[-1]) delays.extend([delays[-1]] * size) time_limit_delays[0] += time_limit - sum(delays) delays += time_limit_delays else: # if itask.state.status == TASK_STATUS_SUBMITTED: timeref = itask.summary['submitted_time'] timeout_key = 'submission timeout' timeout = self._get_events_conf(itask, timeout_key) delays = list(self.get_host_conf( itask, 'submission polling intervals', skey='job', default=[900])) # Default 15 minute intervals try: itask.timeout = timeref + float(timeout) timeout_str = intvl_as_str(timeout) except (TypeError, ValueError): itask.timeout = None timeout_str = None itask.poll_timer = TaskActionTimer(ctx=ctx, delays=delays) # Log timeout and polling schedule message = 'health check settings: %s=%s' % (timeout_key, timeout_str) # Attempt to group identical consecutive delays as N*DELAY,... if itask.poll_timer.delays: items = [] # [(number of item - 1, item), ...] for delay in itask.poll_timer.delays: if items and items[-1][1] == delay: items[-1][0] += 1 else: items.append([0, delay]) message += ', polling intervals=' for num, item in items: if num: message += '%d*' % (num + 1) message += '%s,' % intvl_as_str(item) message += '...' LOG.info('[%s] -%s', itask, message) # Set next poll time self.check_poll_time(itask)