def testHasKey(self): ks = KeyedSet(firstLetter, ["apple", "avocado", "banana"]) self.assertTrue(ks.has_key("a")) self.assertTrue(ks.has_key("b")) self.assertFalse(ks.has_key("c"))
class Scheduler(object): """ A simple interval-based scheduler that makes use of the Twisted framework's LoopingCall construct. """ zope.interface.implements(IScheduler) CLEANUP_TASKS_INTERVAL = 10 # seconds ATTEMPTS = 3 def __init__(self, callableTaskFactory=CallableTaskFactory()): self._loopingCalls = {} self._tasks = {} self._taskCallback = {} self._taskStats = {} self._callableTaskFactory = callableTaskFactory self._shuttingDown = False # create a cleanup task that will periodically sweep the # cleanup dictionary for tasks that need to be cleaned self._tasksToCleanup = KeyedSet(getConfigId) self._cleanupTask = task.LoopingCall(self._cleanupTasks) self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL) self._executor = TwistedExecutor(1) # Ensure that we can cleanly shutdown all of our tasks reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before') reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during') reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after') def __contains__(self, task): """ Returns True if the task has been added to the scheduler. Otherwise False is returned. """ # If task has no 'name' attribute, assume the task name was passed in. name = getattr(task, "name", task) return name in self._tasks def shutdown(self, phase): """ The reactor shutdown has three phases for event types: before - tasks can shut down safely at this time with access to all services (eg EventService, their own services) during - EventService and other services are gone before this starts after - not a lot left -- be careful Tasks that have the attribute 'stopPhase' can set the state for which they should be run, otherwise they will be shut down in the 'before' phase. Tasks that have the attribute 'stopOrder' can set the order in which they are shut down (lowest first). A stopOrder of 0 (zero) is assumed for tasks which do not declare a stopOrder. Returns a list of deferreds to wait on. """ self._shuttingDown = True doomedTasks = [] stopQ = {} log.debug("In shutdown stage %s", phase) for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task stopPhase = getattr(task, 'stopPhase', 'before') if stopPhase in ('before', 'after', 'during') and \ stopPhase != phase: continue stopOrder = getattr(task, 'stopOrder', 0) queue = stopQ.setdefault(stopOrder, []) queue.append((taskName, taskWrapper, task)) for stopOrder in sorted(stopQ): for (taskName, taskWrapper, task) in stopQ[stopOrder]: loopTask = self._loopingCalls[taskName] if loopTask.running: log.debug("Stopping running task %s", taskName) loopTask.stop() log.debug("Removing task %s", taskName) doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: self._tasksToCleanup.add(self._tasks[taskName].task) del self._loopingCalls[taskName] del self._tasks[taskName] del self._taskStats[taskName] cleanupList = self._cleanupTasks() return defer.DeferredList(cleanupList) @property def executor(self): return self._executor def _getMaxTasks(self): return self._executor.getMax() def _setMaxTasks(self, max): return self._executor.setMax(max) maxTasks = property(_getMaxTasks, _setMaxTasks) def _ltCallback(self, result, task_name): """last call back in the chain, if it gets called as an errBack the looping will stop - shouldn't be called since CallableTask doesn't return a deferred, here for sanity and debug""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] log.debug("call finished %s : %s" % (loopingCall, result)) if isinstance(result, Failure): log.warn("Failure in looping call, will not reschedule %s" % task_name) log.error("%s" % result) def _startTask(self, result, task_name, interval, configId, delayed, attempts=0): """start the task using a callback so that its put at the bottom of the Twisted event queue, to allow other processing to continue and to support a task start-time jitter""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] if not loopingCall.running: if self._tasksToCleanup.has_key(configId): delay = random.randint(0, int(interval / 2)) delayed = delayed + delay if attempts > Scheduler.ATTEMPTS: obj = self._tasksToCleanup.pop_by_key(configId) log.debug("Forced cleanup of %s. Task: %s", configId, obj.name) attempts = 0 attempts += 1 log.debug( "Waiting for cleanup of %s. Task %s postponing its start %d seconds (%d so far). Attempt: %s", configId, task_name, delay, delayed, attempts) d = defer.Deferred() d.addCallback(self._startTask, task_name, interval, configId, delayed, attempts) reactor.callLater(delay, d.callback, None) else: log.debug( "Task %s starting (waited %d seconds) on %d second intervals", task_name, delayed, interval) d = loopingCall.start(interval) d.addBoth(self._ltCallback, task_name) def addTask(self, newTask, callback=None, now=False): """ Add a new IScheduledTask to the scheduler for execution. @param newTask the new task to schedule @type newTask IScheduledTask @param callback a callback to be notified each time the task completes @type callback a Python callable """ if newTask.name in self._tasks: raise ValueError("Task %s already exists" % newTask.name) log.debug("add task %s, %s using %s second interval", newTask.name, newTask, newTask.interval) callableTask = self._callableTaskFactory.getCallableTask(newTask, self) loopingCall = task.LoopingCall(callableTask) self._loopingCalls[newTask.name] = loopingCall self._tasks[newTask.name] = callableTask self._taskCallback[newTask.name] = callback self.taskAdded(callableTask) startDelay = getattr(newTask, 'startDelay', None) if startDelay is None: startDelay = 0 if now else self._getStartDelay(newTask) d = defer.Deferred() d.addCallback(self._startTask, newTask.name, newTask.interval, newTask.configId, startDelay) reactor.callLater(startDelay, d.callback, None) # just in case someone does not implement scheduled, lets be careful scheduled = getattr(newTask, 'scheduled', lambda x: None) scheduled(self) def _getStartDelay(self, task): """ amount of time to delay the start of a task. Prevents bunching up of task execution when a large amount of tasks are scheduled at the same time. """ # simple delay of random number between 0 and half the task interval delay = random.randint(0, int(task.interval / 2)) return delay def taskAdded(self, taskWrapper): """ Called whenever the scheduler adds a task. """ task = taskWrapper.task # watch the task's attribute changes task.attachAttributeObserver('state', self._taskStateChangeListener) task.attachAttributeObserver('interval', self._taskIntervalChangeListener) # create the statistics data for this task self._taskStats[task.name] = TaskStatistics(task) taskWrapper.taskStats = self._taskStats[task.name] def taskRemoved(self, taskWrapper): """ Called whenever the scheduler removes a task. """ task = taskWrapper.task task.detachAttributeObserver('state', self._taskStateChangeListener) task.detachAttributeObserver('interval', self._taskIntervalChangeListener) def taskPaused(self, taskWrapper): """ Called whenever the scheduler pauses a task. """ if IPausingScheduledTask.providedBy(taskWrapper.task): taskWrapper.task.pause() def taskResumed(self, taskWrapper): """ Called whenever the scheduler resumes a task. """ if IPausingScheduledTask.providedBy(taskWrapper.task): taskWrapper.task.resume() def getTasksForConfig(self, configId): """ Get all tasks associated with the specified identifier. """ tasks = [] for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: tasks.append(task) return tasks def getNextExpectedRun(self, taskName): """ Get the next expected execution time for given task. """ loopingCall = self._loopingCalls.get(taskName, None) if loopingCall: return getattr(loopingCall, '_expectNextCallAt', None) def setNextExpectedRun(self, taskName, taskInterval): """ Set the next expected execution time for given task. """ loopingCall = self._loopingCalls.get(taskName, None) if loopingCall: loopingCall._expectNextCallAt = time.time() + taskInterval log.debug( "Next expected run %s has been set for task %s", loopingCall._expectNextCallAt, taskName) def removeTasks(self, taskNames): """ Remove tasks """ doomedTasks = [] # child ids are any task that are children of the current task being # removed childIds = [] for taskName in taskNames: taskWrapper = self._tasks[taskName] task = taskWrapper.task subIds = getattr(task, "childIds", None) if subIds: childIds.extend(subIds) log.debug("Stopping task %s, %s", taskName, task) if self._loopingCalls[taskName].running: self._loopingCalls[taskName].stop() doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: task = self._tasks[taskName].task self._tasksToCleanup.add(task) del self._loopingCalls[taskName] del self._tasks[taskName] self._displayTaskStatistics(task) del self._taskStats[taskName] # TODO: ponder task statistics and keeping them around? map(self.removeTasksForConfig, childIds) def removeTasksForConfig(self, configId): """ Remove all tasks associated with the specified identifier. @paramater configId: the identifier to search for @type configId: string """ self.removeTasks( taskName for taskName, taskWrapper in self._tasks.iteritems() if taskWrapper.task.configId == configId) def pauseTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.items(): task = taskWrapper.task if task.configId == configId: log.debug("Pausing task %s", taskName) taskWrapper.paused = True self.taskPaused(taskWrapper) def resumeTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: log.debug("Resuming task %s", taskName) taskWrapper.paused = False self.taskResumed(taskWrapper) def taskDone(self, taskName): if callable(self._taskCallback[taskName]): self._taskCallback[taskName](taskName=taskName) def _taskStateChangeListener(self, observable, attrName, oldValue, newValue): task = observable log.debug("Task %s changing state from %s to %s", task.name, oldValue, newValue) taskStat = self._taskStats[task.name] taskStat.trackStateChange(oldValue, newValue) def _taskIntervalChangeListener(self, observable, attrName, oldValue, newValue): """ Allows tasks to change their collection interval on the fly """ task = observable log.debug("Task %s changing run interval from %s to %s", task.name, oldValue, newValue) # TODO: should this be... # loopingCall = self._loopingCalls[task.name] loopingCall = task._dataService._scheduler._loopingCalls[task.name] loopingCall.interval = newValue @property def missedRuns(self): totalMissedRuns = 0 for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalMissedRuns += taskStats.missedRuns return totalMissedRuns @property def failedRuns(self): totalFailedRuns = 0 for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalFailedRuns += taskStats.failedRuns return totalFailedRuns @property def taskCount(self): return len(self._tasks) def displayStatistics(self, verbose): totalRuns = 0 totalFailedRuns = 0 totalMissedRuns = 0 totalTasks = 0 stateStats = {} for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalTasks += 1 totalRuns += taskStats.totalRuns totalFailedRuns += taskStats.failedRuns totalMissedRuns += taskStats.missedRuns for state, stats in taskStats.states.iteritems(): if state in stateStats: totalStateStats = stateStats[state] else: totalStateStats = StateStatistics(state) stateStats[state] = totalStateStats totalStateStats.totalElapsedTime += stats.totalElapsedTime totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared totalStateStats.totalCalls += stats.totalCalls totalStateStats.minElapsedTime = min( totalStateStats.minElapsedTime, stats.minElapsedTime) totalStateStats.maxElapsedTime = max( totalStateStats.maxElapsedTime, stats.maxElapsedTime) log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " "Queued_Tasks: %d Running_Tasks: %d ", totalTasks, totalRuns, totalFailedRuns, totalMissedRuns, self.executor.queued, self.executor.running) if verbose: buffer = "Task States Summary:\n" buffer = self._displayStateStatistics(buffer, stateStats) buffer += "\nTasks:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] if not taskStats.states: # Hasn't run yet continue buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Scheduler Statistics:\n%s", buffer) # TODO: the above logic doesn't print out any stats for the 'current' # state... i.e. enter the PAUSED state and wait there an hour, and # you'll see no data # TODO: should we reset the statistics here, or do it on a periodic # interval, like once an hour or day or something? def _displayTaskStatistics(self, task): verbose = log.isEnabledFor(logging.DEBUG) if not verbose: return taskStats = self._taskStats[task.name] buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" if taskStats.states: # Hasn't run yet buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Task Statistics:\n%s", buffer) def _displayStateStatistics(self, buffer, stateStats, prefix=''): for state, stats in stateStats.iteritems(): buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \ % (prefix, state, stats.totalCalls, stats.totalElapsedTime, stats.minElapsedTime, stats.maxElapsedTime, stats.mean, stats.stddev) return buffer def _cleanupTasks(self): """ Periodically cleanup tasks that have been queued up for cleaning. """ # Build a list of the tasks that need to be cleaned up so that there # is no issue with concurrent modifications to the _tasksToCleanup # dictionary when tasks are quickly cleaned up. if self._tasksToCleanup: log.debug("tasks to clean %s", self._tasksToCleanup) todoList = [task for task in self._tasksToCleanup if self._isTaskCleanable(task)] cleanupWaitList = [] for task in todoList: # changing the state of the task will keep the next cleanup run # from processing it again task.state = TaskStates.STATE_CLEANING if self._shuttingDown: # let the task know the scheduler is shutting down task.state = TaskStates.STATE_SHUTDOWN log.debug("Cleanup on task %s %s", task.name, task) d = defer.maybeDeferred(task.cleanup) d.addBoth(self._cleanupTaskComplete, task) cleanupWaitList.append(d) return cleanupWaitList def _cleanupTaskComplete(self, result, task): """ Twisted callback to remove a task from the cleanup queue once it has completed its cleanup work. """ log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" % (result, task.name)) self._tasksToCleanup.discard(task) return result def _isTaskCleanable(self, task): """ Determines if a task is able to be cleaned up or not. """ return task.state in (TaskStates.STATE_IDLE, TaskStates.STATE_PAUSED, TaskStates.STATE_COMPLETED, TaskStates.STATE_CONNECTING) def resetStats(self, taskName): taskStats = self._taskStats[taskName] taskStats.totalRuns = 0 taskStats.failedRuns = 0 taskStats.missedRuns = 0
class Scheduler(object): """ A simple interval-based scheduler that makes use of the Twisted framework's LoopingCall construct. """ zope.interface.implements(IScheduler) CLEANUP_TASKS_INTERVAL = 10 # seconds def __init__(self, callableTaskFactory=CallableTaskFactory()): self._loopingCalls = {} self._tasks = {} self._taskCallback = {} self._taskStats = {} self._callableTaskFactory = callableTaskFactory self._shuttingDown = False # create a cleanup task that will periodically sweep the # cleanup dictionary for tasks that need to be cleaned self._tasksToCleanup = KeyedSet(getConfigId) self._cleanupTask = task.LoopingCall(self._cleanupTasks) self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL) self._executor = TwistedExecutor(1) # Ensure that we can cleanly shutdown all of our tasks reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before') reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during') reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after') def __contains__(self, task): """ Returns True if the task has been added to the scheduler. Otherwise False is returned. """ # If task has no 'name' attribute, assume the task name was passed in. name = getattr(task, "name", task) return name in self._tasks def shutdown(self, phase): """ The reactor shutdown has three phases for event types: before - tasks can shut down safely at this time with access to all services (eg EventService, their own services) during - EventService and other services are gone before this starts after - not a lot left -- be careful Tasks that have the attribute 'stopPhase' can set the state for which they should be run, otherwise they will be shut down in the 'before' phase. Tasks that have the attribute 'stopOrder' can set the order in which they are shut down (lowest first). A stopOrder of 0 (zero) is assumed for tasks which do not declare a stopOrder. Returns a list of deferreds to wait on. """ self._shuttingDown = True doomedTasks = [] stopQ = {} log.debug("In shutdown stage %s", phase) for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task stopPhase = getattr(task, 'stopPhase', 'before') if stopPhase in ('before', 'after', 'during') and \ stopPhase != phase: continue stopOrder = getattr(task, 'stopOrder', 0) queue = stopQ.setdefault(stopOrder, []) queue.append((taskName, taskWrapper, task)) for stopOrder in sorted(stopQ): for (taskName, taskWrapper, task) in stopQ[stopOrder]: loopTask = self._loopingCalls[taskName] if loopTask.running: log.debug("Stopping running task %s", taskName) loopTask.stop() log.debug("Removing task %s", taskName) doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: self._tasksToCleanup.add(self._tasks[taskName].task) del self._loopingCalls[taskName] del self._tasks[taskName] del self._taskStats[taskName] cleanupList = self._cleanupTasks() return defer.DeferredList(cleanupList) @property def executor(self): return self._executor def _getMaxTasks(self): return self._executor.getMax() def _setMaxTasks(self, max): return self._executor.setMax(max) maxTasks = property(_getMaxTasks, _setMaxTasks) def _ltCallback(self, result, task_name): """last call back in the chain, if it gets called as an errBack the looping will stop - shouldn't be called since CallableTask doesn't return a deferred, here for sanity and debug""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] log.debug("call finished %s : %s" % (loopingCall, result)) if isinstance(result, Failure): log.warn("Failure in looping call, will not reschedule %s" % task_name) log.error("%s" % result) def _startTask(self, result, task_name, interval, configId, delayed): """start the task using a callback so that its put at the bottom of the Twisted event queue, to allow other processing to continue and to support a task start-time jitter""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] if not loopingCall.running: if self._tasksToCleanup.has_key(configId): delay = random.randint(0, int(interval / 2)) delayed = delayed + delay log.debug( "Waiting for cleanup of %s. Task %s postponing its start %d seconds (%d so far)", configId, task_name, delay, delayed) d = defer.Deferred() d.addCallback(self._startTask, task_name, interval, configId, delayed) reactor.callLater(delay, d.callback, None) else: log.debug( "Task %s starting (waited %d seconds) on %d second intervals", task_name, delayed, interval) d = loopingCall.start(interval) d.addBoth(self._ltCallback, task_name) def addTask(self, newTask, callback=None, now=False): """ Add a new IScheduledTask to the scheduler for execution. @param newTask the new task to schedule @type newTask IScheduledTask @param callback a callback to be notified each time the task completes @type callback a Python callable """ if newTask.name in self._tasks: raise ValueError("Task %s already exists" % newTask.name) log.debug("add task %s, %s using %s second interval", newTask.name, newTask, newTask.interval) callableTask = self._callableTaskFactory.getCallableTask(newTask, self) loopingCall = task.LoopingCall(callableTask) self._loopingCalls[newTask.name] = loopingCall self._tasks[newTask.name] = callableTask self._taskCallback[newTask.name] = callback self.taskAdded(callableTask) startDelay = getattr(newTask, 'startDelay', None) if startDelay is None: startDelay = 0 if now else self._getStartDelay(newTask) d = defer.Deferred() d.addCallback(self._startTask, newTask.name, newTask.interval, newTask.configId, startDelay) reactor.callLater(startDelay, d.callback, None) # just in case someone does not implement scheduled, lets be careful scheduled = getattr(newTask, 'scheduled', lambda x: None) scheduled(self) def _getStartDelay(self, task): """ amount of time to delay the start of a task. Prevents bunching up of task execution when a large amount of tasks are scheduled at the same time. """ #simple delay of random number between 0 and half the task interval delay = random.randint(0, int(task.interval / 2)) return delay def taskAdded(self, taskWrapper): """ Called whenever the scheduler adds a task. """ task = taskWrapper.task # watch the task's attribute changes task.attachAttributeObserver('state', self._taskStateChangeListener) task.attachAttributeObserver('interval', self._taskIntervalChangeListener) # create the statistics data for this task self._taskStats[task.name] = TaskStatistics(task) taskWrapper.taskStats = self._taskStats[task.name] def taskRemoved(self, taskWrapper): """ Called whenever the scheduler removes a task. """ task = taskWrapper.task task.detachAttributeObserver('state', self._taskStateChangeListener) task.detachAttributeObserver('interval', self._taskIntervalChangeListener) def taskPaused(self, taskWrapper): """ Called whenever the scheduler pauses a task. """ pass def taskResumed(self, taskWrapper): """ Called whenever the scheduler resumes a task. """ pass def getTasksForConfig(self, configId): """ Get all tasks associated with the specified identifier. """ tasks = [] for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: tasks.append(task) return tasks def removeTasksForConfig(self, configId): """ Remove all tasks associated with the specified identifier. @paramater configId: the identifier to search for @type configId: string """ doomedTasks = [] # child ids are any task that are children of the current task being # removed childIds = [] for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: subIds = getattr(task, "childIds", None) if subIds: childIds.extend(subIds) log.debug("Stopping task %s, %s", taskName, task) if self._loopingCalls[taskName].running: self._loopingCalls[taskName].stop() doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: task = self._tasks[taskName].task self._tasksToCleanup.add(task) del self._loopingCalls[taskName] del self._tasks[taskName] self._displayTaskStatistics(task) del self._taskStats[taskName] # TODO: ponder task statistics and keeping them around? for childId in childIds: self.removeTasksForConfig(childId) def pauseTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.items(): task = taskWrapper.task if task.configId == configId: log.debug("Pausing task %s", taskName) taskWrapper.paused = True self.taskPaused(taskWrapper) def resumeTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: log.debug("Resuming task %s", taskName) taskWrapper.paused = False self.taskResumed(taskWrapper) def taskDone(self, taskName): if callable(self._taskCallback[taskName]): self._taskCallback[taskName](taskName=taskName) def _taskStateChangeListener(self, observable, attrName, oldValue, newValue): task = observable log.debug("Task %s changing state from %s to %s", task.name, oldValue, newValue) taskStat = self._taskStats[task.name] taskStat.trackStateChange(oldValue, newValue) def _taskIntervalChangeListener(self, observable, attrName, oldValue, newValue): """ Allows tasks to change their collection interval on the fly """ task = observable log.debug("Task %s changing run interval from %s to %s", task.name, oldValue, newValue) #TODO: should this be... # loopingCall = self._loopingCalls[task.name] loopingCall = task._dataService._scheduler._loopingCalls[task.name] loopingCall.interval = newValue @property def missedRuns(self): totalMissedRuns = 0 for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalMissedRuns += taskStats.missedRuns return totalMissedRuns @property def failedRuns(self): totalFailedRuns = 0 for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalFailedRuns += taskStats.failedRuns return totalFailedRuns def displayStatistics(self, verbose): totalRuns = 0 totalFailedRuns = 0 totalMissedRuns = 0 totalTasks = 0 stateStats = {} for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalTasks += 1 totalRuns += taskStats.totalRuns totalFailedRuns += taskStats.failedRuns totalMissedRuns += taskStats.missedRuns for state, stats in taskStats.states.iteritems(): if state in stateStats: totalStateStats = stateStats[state] else: totalStateStats = StateStatistics(state) stateStats[state] = totalStateStats totalStateStats.totalElapsedTime += stats.totalElapsedTime totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared totalStateStats.totalCalls += stats.totalCalls totalStateStats.minElapsedTime = min( totalStateStats.minElapsedTime, stats.minElapsedTime) totalStateStats.maxElapsedTime = max( totalStateStats.maxElapsedTime, stats.maxElapsedTime) log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " \ "Queued_Tasks: %d Running_Tasks: %d ", totalTasks, totalRuns, totalFailedRuns, totalMissedRuns, self.executor.queued, self.executor.running) if verbose: buffer = "Task States Summary:\n" buffer = self._displayStateStatistics(buffer, stateStats) buffer += "\nTasks:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] if not taskStats.states: # Hasn't run yet continue buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Scheduler Statistics:\n%s", buffer) # TODO: the above logic doesn't print out any stats for the 'current' # state... i.e. enter the PAUSED state and wait there an hour, and # you'll see no data # TODO: should we reset the statistics here, or do it on a periodic # interval, like once an hour or day or something? def _displayTaskStatistics(self, task): verbose = log.isEnabledFor(logging.DEBUG) if not verbose: return taskStats = self._taskStats[task.name] buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" if taskStats.states: # Hasn't run yet buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Task Statistics:\n%s", buffer) def _displayStateStatistics(self, buffer, stateStats, prefix=''): for state, stats in stateStats.iteritems(): buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \ % (prefix, state, stats.totalCalls, stats.totalElapsedTime, stats.minElapsedTime, stats.maxElapsedTime, stats.mean, stats.stddev) return buffer def _cleanupTasks(self): """ Periodically cleanup tasks that have been queued up for cleaning. """ # Build a list of the tasks that need to be cleaned up so that there # is no issue with concurrent modifications to the _tasksToCleanup # dictionary when tasks are quickly cleaned up. if self._tasksToCleanup: log.debug("tasks to clean %s", self._tasksToCleanup) todoList = [ task for task in self._tasksToCleanup if self._isTaskCleanable(task) ] cleanupWaitList = [] for task in todoList: # changing the state of the task will keep the next cleanup run # from processing it again task.state = TaskStates.STATE_CLEANING if self._shuttingDown: #let the task know the scheduler is shutting down task.state = TaskStates.STATE_SHUTDOWN log.debug("Cleanup on task %s %s", task.name, task) d = defer.maybeDeferred(task.cleanup) d.addBoth(self._cleanupTaskComplete, task) cleanupWaitList.append(d) return cleanupWaitList def _cleanupTaskComplete(self, result, task): """ Twisted callback to remove a task from the cleanup queue once it has completed its cleanup work. """ log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" % (result, task.name)) self._tasksToCleanup.discard(task) return result def _isTaskCleanable(self, task): """ Determines if a task is able to be cleaned up or not. """ return task.state in (TaskStates.STATE_IDLE, TaskStates.STATE_PAUSED, TaskStates.STATE_COMPLETED)