Example #1
0
 def testHasKey(self):
     ks = KeyedSet(firstLetter, ["apple", "avocado", "banana"])
     self.assertTrue(ks.has_key("a"))
     self.assertTrue(ks.has_key("b"))
     self.assertFalse(ks.has_key("c"))
Example #2
0
class Scheduler(object):

    """
    A simple interval-based scheduler that makes use of the Twisted framework's
    LoopingCall construct.
    """

    zope.interface.implements(IScheduler)

    CLEANUP_TASKS_INTERVAL = 10  # seconds
    ATTEMPTS = 3

    def __init__(self, callableTaskFactory=CallableTaskFactory()):
        self._loopingCalls = {}
        self._tasks = {}
        self._taskCallback = {}
        self._taskStats = {}
        self._callableTaskFactory = callableTaskFactory
        self._shuttingDown = False
        # create a cleanup task that will periodically sweep the
        # cleanup dictionary for tasks that need to be cleaned
        self._tasksToCleanup = KeyedSet(getConfigId)
        self._cleanupTask = task.LoopingCall(self._cleanupTasks)
        self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL)

        self._executor = TwistedExecutor(1)

        # Ensure that we can cleanly shutdown all of our tasks
        reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before')
        reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during')
        reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after')

    def __contains__(self, task):
        """
        Returns True if the task has been added to the scheduler.  Otherwise
        False is returned.
        """
        # If task has no 'name' attribute, assume the task name was passed in.
        name = getattr(task, "name", task)
        return name in self._tasks

    def shutdown(self, phase):
        """
        The reactor shutdown has three phases for event types:

               before - tasks can shut down safely at this time with access to all
                        services (eg EventService, their own services)
               during - EventService and other services are gone before this starts
               after  - not a lot left -- be careful

        Tasks that have the attribute 'stopPhase' can set the state for which
        they should be run, otherwise they will be shut down in the 'before' phase.

        Tasks that have the attribute 'stopOrder' can set the order in which they
        are shut down (lowest first).  A stopOrder of 0 (zero) is assumed for tasks
        which do not declare a stopOrder.

        Returns a list of deferreds to wait on.
        """
        self._shuttingDown = True
        doomedTasks = []
        stopQ = {}
        log.debug("In shutdown stage %s", phase)
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            stopPhase = getattr(task, 'stopPhase', 'before')
            if stopPhase in ('before', 'after', 'during') and \
               stopPhase != phase:
                continue
            stopOrder = getattr(task, 'stopOrder', 0)
            queue = stopQ.setdefault(stopOrder, [])
            queue.append((taskName, taskWrapper, task))

        for stopOrder in sorted(stopQ):
            for (taskName, taskWrapper, task) in stopQ[stopOrder]:
                loopTask = self._loopingCalls[taskName]
                if loopTask.running:
                    log.debug("Stopping running task %s", taskName)
                    loopTask.stop()
                log.debug("Removing task %s", taskName)
                doomedTasks.append(taskName)
                self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            self._tasksToCleanup.add(self._tasks[taskName].task)

            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            del self._taskStats[taskName]

        cleanupList = self._cleanupTasks()
        return defer.DeferredList(cleanupList)

    @property
    def executor(self):
        return self._executor

    def _getMaxTasks(self):
        return self._executor.getMax()

    def _setMaxTasks(self, max):
        return self._executor.setMax(max)

    maxTasks = property(_getMaxTasks, _setMaxTasks)

    def _ltCallback(self, result, task_name):
        """last call back in the chain, if it gets called as an errBack
        the looping will stop - shouldn't be called since CallableTask
        doesn't return a deferred, here for sanity and debug"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            log.debug("call finished %s : %s" % (loopingCall, result))
        if isinstance(result, Failure):
            log.warn("Failure in looping call, will not reschedule %s" % task_name)
            log.error("%s" % result)

    def _startTask(self, result, task_name, interval, configId, delayed, attempts=0):
        """start the task using a callback so that its put at the bottom of
        the Twisted event queue, to allow other processing to continue and
        to support a task start-time jitter"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            if not loopingCall.running:
                if self._tasksToCleanup.has_key(configId):
                    delay = random.randint(0, int(interval / 2))
                    delayed = delayed + delay
                    if attempts > Scheduler.ATTEMPTS:
                        obj = self._tasksToCleanup.pop_by_key(configId)
                        log.debug("Forced cleanup of %s. Task: %s", configId, obj.name)
                        attempts = 0
                    attempts += 1
                    log.debug(
                        "Waiting for cleanup of %s. Task %s postponing its start %d seconds (%d so far). Attempt: %s",
                              configId, task_name, delay, delayed, attempts)
                    d = defer.Deferred()
                    d.addCallback(self._startTask, task_name, interval, configId, delayed, attempts)
                    reactor.callLater(delay, d.callback, None)
                else:
                    log.debug(
                        "Task %s starting (waited %d seconds) on %d second intervals", task_name, delayed, interval)
                    d = loopingCall.start(interval)
                    d.addBoth(self._ltCallback, task_name)

    def addTask(self, newTask, callback=None, now=False):
        """
        Add a new IScheduledTask to the scheduler for execution.
        @param newTask the new task to schedule
        @type newTask IScheduledTask
        @param callback a callback to be notified each time the task completes
        @type callback a Python callable
        """
        if newTask.name in self._tasks:
            raise ValueError("Task %s already exists" % newTask.name)
        log.debug("add task %s, %s using %s second interval",
                  newTask.name, newTask, newTask.interval)
        callableTask = self._callableTaskFactory.getCallableTask(newTask, self)
        loopingCall = task.LoopingCall(callableTask)
        self._loopingCalls[newTask.name] = loopingCall
        self._tasks[newTask.name] = callableTask
        self._taskCallback[newTask.name] = callback
        self.taskAdded(callableTask)
        startDelay = getattr(newTask, 'startDelay', None)
        if startDelay is None:
            startDelay = 0 if now else self._getStartDelay(newTask)
        d = defer.Deferred()
        d.addCallback(self._startTask, newTask.name, newTask.interval, newTask.configId, startDelay)
        reactor.callLater(startDelay, d.callback, None)

        # just in case someone does not implement scheduled, lets be careful
        scheduled = getattr(newTask, 'scheduled', lambda x: None)
        scheduled(self)

    def _getStartDelay(self, task):
        """
        amount of time to delay the start of a task. Prevents bunching up of
        task execution when a large amount of tasks are scheduled at the same
        time.
        """
        # simple delay of random number between 0 and half the task interval
        delay = random.randint(0, int(task.interval / 2))
        return delay

    def taskAdded(self, taskWrapper):
        """
        Called whenever the scheduler adds a task.
        """
        task = taskWrapper.task

        # watch the task's attribute changes
        task.attachAttributeObserver('state', self._taskStateChangeListener)
        task.attachAttributeObserver('interval', self._taskIntervalChangeListener)

        # create the statistics data for this task
        self._taskStats[task.name] = TaskStatistics(task)
        taskWrapper.taskStats = self._taskStats[task.name]

    def taskRemoved(self, taskWrapper):
        """
        Called whenever the scheduler removes a task.
        """
        task = taskWrapper.task
        task.detachAttributeObserver('state', self._taskStateChangeListener)
        task.detachAttributeObserver('interval', self._taskIntervalChangeListener)

    def taskPaused(self, taskWrapper):
        """
        Called whenever the scheduler pauses a task.
        """
        if IPausingScheduledTask.providedBy(taskWrapper.task):
            taskWrapper.task.pause()

    def taskResumed(self, taskWrapper):
        """
        Called whenever the scheduler resumes a task.
        """
        if IPausingScheduledTask.providedBy(taskWrapper.task):
            taskWrapper.task.resume()

    def getTasksForConfig(self, configId):
        """
        Get all tasks associated with the specified identifier.
        """
        tasks = []
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                tasks.append(task)
        return tasks

    def getNextExpectedRun(self, taskName):
        """
        Get the next expected execution time for given task.
        """
        loopingCall = self._loopingCalls.get(taskName, None)
        if loopingCall:
            return getattr(loopingCall, '_expectNextCallAt', None)

    def setNextExpectedRun(self, taskName, taskInterval):
        """
        Set the next expected execution time for given task.
        """
        loopingCall = self._loopingCalls.get(taskName, None)
        if loopingCall:
            loopingCall._expectNextCallAt = time.time() + taskInterval
            log.debug(
                "Next expected run %s has been set for task %s", 
                loopingCall._expectNextCallAt, 
                taskName)

    def removeTasks(self, taskNames):
        """
        Remove tasks
        """
        doomedTasks = []
        # child ids are any task that are children of the current task being
        # removed
        childIds = []
        for taskName in taskNames:
            taskWrapper = self._tasks[taskName]
            task = taskWrapper.task
            subIds = getattr(task, "childIds", None)
            if subIds:
                childIds.extend(subIds)
            log.debug("Stopping task %s, %s", taskName, task)
            if self._loopingCalls[taskName].running:
                self._loopingCalls[taskName].stop()

            doomedTasks.append(taskName)
            self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            task = self._tasks[taskName].task
            self._tasksToCleanup.add(task)
            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            self._displayTaskStatistics(task)
            del self._taskStats[taskName]
            # TODO: ponder task statistics and keeping them around?

        map(self.removeTasksForConfig, childIds)

    def removeTasksForConfig(self, configId):
        """
        Remove all tasks associated with the specified identifier.

        @paramater configId: the identifier to search for
        @type configId: string
        """
        self.removeTasks(
            taskName for taskName, taskWrapper in self._tasks.iteritems() if taskWrapper.task.configId == configId)

    def pauseTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.items():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Pausing task %s", taskName)
                taskWrapper.paused = True
                self.taskPaused(taskWrapper)

    def resumeTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Resuming task %s", taskName)
                taskWrapper.paused = False
                self.taskResumed(taskWrapper)

    def taskDone(self, taskName):
        if callable(self._taskCallback[taskName]):
            self._taskCallback[taskName](taskName=taskName)

    def _taskStateChangeListener(self, observable, attrName, oldValue, newValue):
        task = observable

        log.debug("Task %s changing state from %s to %s", task.name, oldValue,
                  newValue)

        taskStat = self._taskStats[task.name]
        taskStat.trackStateChange(oldValue, newValue)

    def _taskIntervalChangeListener(self, observable, attrName, oldValue, newValue):
        """
        Allows tasks to change their collection interval on the fly
        """
        task = observable
        log.debug("Task %s changing run interval from %s to %s", task.name, oldValue,
                  newValue)
        # TODO: should this be...
        # loopingCall = self._loopingCalls[task.name]
        loopingCall = task._dataService._scheduler._loopingCalls[task.name]
        loopingCall.interval = newValue

    @property
    def missedRuns(self):
        totalMissedRuns = 0
        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]
            totalMissedRuns += taskStats.missedRuns
        return totalMissedRuns

    @property
    def failedRuns(self):
        totalFailedRuns = 0
        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]
            totalFailedRuns += taskStats.failedRuns
        return totalFailedRuns

    @property
    def taskCount(self):
        return len(self._tasks)

    def displayStatistics(self, verbose):
        totalRuns = 0
        totalFailedRuns = 0
        totalMissedRuns = 0
        totalTasks = 0
        stateStats = {}

        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]

            totalTasks += 1
            totalRuns += taskStats.totalRuns
            totalFailedRuns += taskStats.failedRuns
            totalMissedRuns += taskStats.missedRuns

            for state, stats in taskStats.states.iteritems():
                if state in stateStats:
                    totalStateStats = stateStats[state]
                else:
                    totalStateStats = StateStatistics(state)
                    stateStats[state] = totalStateStats
                totalStateStats.totalElapsedTime += stats.totalElapsedTime
                totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared
                totalStateStats.totalCalls += stats.totalCalls
                totalStateStats.minElapsedTime = min(
                    totalStateStats.minElapsedTime, stats.minElapsedTime)
                totalStateStats.maxElapsedTime = max(
                    totalStateStats.maxElapsedTime, stats.maxElapsedTime)

        log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d "
                 "Queued_Tasks: %d Running_Tasks: %d ",
                 totalTasks, totalRuns, totalFailedRuns, totalMissedRuns,
                 self.executor.queued, self.executor.running)

        if verbose:
            buffer = "Task States Summary:\n"
            buffer = self._displayStateStatistics(buffer, stateStats)

            buffer += "\nTasks:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
                    % (task.name, task.state, taskStats.totalRuns,
                       taskStats.failedRuns, taskStats.missedRuns)

            buffer += "\nDetailed Task States:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                if not taskStats.states:  # Hasn't run yet
                    continue

                buffer = self._displayStateStatistics(buffer,
                                                      taskStats.states,
                                                      "%s " % task.name)

                if hasattr(task, 'displayStatistics'):
                    buffer += task.displayStatistics()

                buffer += "\n"

            log.info("Detailed Scheduler Statistics:\n%s", buffer)

        # TODO: the above logic doesn't print out any stats for the 'current'
        # state... i.e. enter the PAUSED state and wait there an hour, and
        # you'll see no data

        # TODO: should we reset the statistics here, or do it on a periodic
        # interval, like once an hour or day or something?

    def _displayTaskStatistics(self, task):
        verbose = log.isEnabledFor(logging.DEBUG)
        if not verbose:
            return
        taskStats = self._taskStats[task.name]

        buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
            % (task.name, task.state, taskStats.totalRuns,
               taskStats.failedRuns, taskStats.missedRuns)

        buffer += "\nDetailed Task States:\n"
        if taskStats.states:  # Hasn't run yet
            buffer = self._displayStateStatistics(buffer,
                                                  taskStats.states,
                                                  "%s " % task.name)

            if hasattr(task, 'displayStatistics'):
                buffer += task.displayStatistics()

        buffer += "\n"

        log.info("Detailed Task Statistics:\n%s", buffer)

    def _displayStateStatistics(self, buffer, stateStats, prefix=''):
        for state, stats in stateStats.iteritems():
            buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \
                % (prefix,
                   state, stats.totalCalls, stats.totalElapsedTime,
                   stats.minElapsedTime, stats.maxElapsedTime, stats.mean,
                   stats.stddev)
        return buffer

    def _cleanupTasks(self):
        """
        Periodically cleanup tasks that have been queued up for cleaning.
        """
        # Build a list of the tasks that need to be cleaned up so that there
        # is no issue with concurrent modifications to the _tasksToCleanup
        # dictionary when tasks are quickly cleaned up.
        if self._tasksToCleanup:
            log.debug("tasks to clean %s", self._tasksToCleanup)

        todoList = [task for task in self._tasksToCleanup
                    if self._isTaskCleanable(task)]

        cleanupWaitList = []
        for task in todoList:
            # changing the state of the task will keep the next cleanup run
            # from processing it again
            task.state = TaskStates.STATE_CLEANING
            if self._shuttingDown:
                # let the task know the scheduler is shutting down
                task.state = TaskStates.STATE_SHUTDOWN
            log.debug("Cleanup on task %s %s", task.name, task)
            d = defer.maybeDeferred(task.cleanup)
            d.addBoth(self._cleanupTaskComplete, task)
            cleanupWaitList.append(d)
        return cleanupWaitList

    def _cleanupTaskComplete(self, result, task):
        """
        Twisted callback to remove a task from the cleanup queue once it has
        completed its cleanup work.
        """
        log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" % (result, task.name))
        self._tasksToCleanup.discard(task)
        return result

    def _isTaskCleanable(self, task):
        """
        Determines if a task is able to be cleaned up or not.
        """
        return task.state in (TaskStates.STATE_IDLE,
                              TaskStates.STATE_PAUSED,
                              TaskStates.STATE_COMPLETED,
                              TaskStates.STATE_CONNECTING)

    def resetStats(self, taskName):
        taskStats = self._taskStats[taskName]
        taskStats.totalRuns = 0
        taskStats.failedRuns = 0
        taskStats.missedRuns = 0
Example #3
0
class Scheduler(object):
    """
    A simple interval-based scheduler that makes use of the Twisted framework's
    LoopingCall construct.
    """

    zope.interface.implements(IScheduler)

    CLEANUP_TASKS_INTERVAL = 10  # seconds

    def __init__(self, callableTaskFactory=CallableTaskFactory()):
        self._loopingCalls = {}
        self._tasks = {}
        self._taskCallback = {}
        self._taskStats = {}
        self._callableTaskFactory = callableTaskFactory
        self._shuttingDown = False
        # create a cleanup task that will periodically sweep the
        # cleanup dictionary for tasks that need to be cleaned
        self._tasksToCleanup = KeyedSet(getConfigId)
        self._cleanupTask = task.LoopingCall(self._cleanupTasks)
        self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL)

        self._executor = TwistedExecutor(1)

        # Ensure that we can cleanly shutdown all of our tasks
        reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown,
                                      'before')
        reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown,
                                      'during')
        reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown,
                                      'after')

    def __contains__(self, task):
        """
        Returns True if the task has been added to the scheduler.  Otherwise
        False is returned.
        """
        # If task has no 'name' attribute, assume the task name was passed in.
        name = getattr(task, "name", task)
        return name in self._tasks

    def shutdown(self, phase):
        """
        The reactor shutdown has three phases for event types:

               before - tasks can shut down safely at this time with access to all
                        services (eg EventService, their own services)
               during - EventService and other services are gone before this starts
               after  - not a lot left -- be careful

        Tasks that have the attribute 'stopPhase' can set the state for which
        they should be run, otherwise they will be shut down in the 'before' phase.

        Tasks that have the attribute 'stopOrder' can set the order in which they
        are shut down (lowest first).  A stopOrder of 0 (zero) is assumed for tasks
        which do not declare a stopOrder.

        Returns a list of deferreds to wait on.
        """
        self._shuttingDown = True
        doomedTasks = []
        stopQ = {}
        log.debug("In shutdown stage %s", phase)
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            stopPhase = getattr(task, 'stopPhase', 'before')
            if stopPhase in ('before', 'after', 'during') and \
               stopPhase != phase:
                continue
            stopOrder = getattr(task, 'stopOrder', 0)
            queue = stopQ.setdefault(stopOrder, [])
            queue.append((taskName, taskWrapper, task))

        for stopOrder in sorted(stopQ):
            for (taskName, taskWrapper, task) in stopQ[stopOrder]:
                loopTask = self._loopingCalls[taskName]
                if loopTask.running:
                    log.debug("Stopping running task %s", taskName)
                    loopTask.stop()
                log.debug("Removing task %s", taskName)
                doomedTasks.append(taskName)
                self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            self._tasksToCleanup.add(self._tasks[taskName].task)

            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            del self._taskStats[taskName]

        cleanupList = self._cleanupTasks()
        return defer.DeferredList(cleanupList)

    @property
    def executor(self):
        return self._executor

    def _getMaxTasks(self):
        return self._executor.getMax()

    def _setMaxTasks(self, max):
        return self._executor.setMax(max)

    maxTasks = property(_getMaxTasks, _setMaxTasks)

    def _ltCallback(self, result, task_name):
        """last call back in the chain, if it gets called as an errBack
        the looping will stop - shouldn't be called since CallableTask
        doesn't return a deferred, here for sanity and debug"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            log.debug("call finished %s : %s" % (loopingCall, result))
        if isinstance(result, Failure):
            log.warn("Failure in looping call, will not reschedule %s" %
                     task_name)
            log.error("%s" % result)

    def _startTask(self, result, task_name, interval, configId, delayed):
        """start the task using a callback so that its put at the bottom of
        the Twisted event queue, to allow other processing to continue and
        to support a task start-time jitter"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            if not loopingCall.running:
                if self._tasksToCleanup.has_key(configId):
                    delay = random.randint(0, int(interval / 2))
                    delayed = delayed + delay
                    log.debug(
                        "Waiting for cleanup of %s. Task %s postponing its start %d seconds (%d so far)",
                        configId, task_name, delay, delayed)
                    d = defer.Deferred()
                    d.addCallback(self._startTask, task_name, interval,
                                  configId, delayed)
                    reactor.callLater(delay, d.callback, None)
                else:
                    log.debug(
                        "Task %s starting (waited %d seconds) on %d second intervals",
                        task_name, delayed, interval)
                    d = loopingCall.start(interval)
                    d.addBoth(self._ltCallback, task_name)

    def addTask(self, newTask, callback=None, now=False):
        """
        Add a new IScheduledTask to the scheduler for execution.
        @param newTask the new task to schedule
        @type newTask IScheduledTask
        @param callback a callback to be notified each time the task completes
        @type callback a Python callable
        """
        if newTask.name in self._tasks:
            raise ValueError("Task %s already exists" % newTask.name)
        log.debug("add task %s, %s using %s second interval", newTask.name,
                  newTask, newTask.interval)
        callableTask = self._callableTaskFactory.getCallableTask(newTask, self)
        loopingCall = task.LoopingCall(callableTask)
        self._loopingCalls[newTask.name] = loopingCall
        self._tasks[newTask.name] = callableTask
        self._taskCallback[newTask.name] = callback
        self.taskAdded(callableTask)
        startDelay = getattr(newTask, 'startDelay', None)
        if startDelay is None:
            startDelay = 0 if now else self._getStartDelay(newTask)
        d = defer.Deferred()
        d.addCallback(self._startTask, newTask.name, newTask.interval,
                      newTask.configId, startDelay)
        reactor.callLater(startDelay, d.callback, None)

        # just in case someone does not implement scheduled, lets be careful
        scheduled = getattr(newTask, 'scheduled', lambda x: None)
        scheduled(self)

    def _getStartDelay(self, task):
        """
        amount of time to delay the start of a task. Prevents bunching up of
        task execution when a large amount of tasks are scheduled at the same
        time.
        """
        #simple delay of random number between 0 and half the task interval
        delay = random.randint(0, int(task.interval / 2))
        return delay

    def taskAdded(self, taskWrapper):
        """
        Called whenever the scheduler adds a task.
        """
        task = taskWrapper.task

        # watch the task's attribute changes
        task.attachAttributeObserver('state', self._taskStateChangeListener)
        task.attachAttributeObserver('interval',
                                     self._taskIntervalChangeListener)

        # create the statistics data for this task
        self._taskStats[task.name] = TaskStatistics(task)
        taskWrapper.taskStats = self._taskStats[task.name]

    def taskRemoved(self, taskWrapper):
        """
        Called whenever the scheduler removes a task.
        """
        task = taskWrapper.task
        task.detachAttributeObserver('state', self._taskStateChangeListener)
        task.detachAttributeObserver('interval',
                                     self._taskIntervalChangeListener)

    def taskPaused(self, taskWrapper):
        """
        Called whenever the scheduler pauses a task.
        """
        pass

    def taskResumed(self, taskWrapper):
        """
        Called whenever the scheduler resumes a task.
        """
        pass

    def getTasksForConfig(self, configId):
        """
        Get all tasks associated with the specified identifier.
        """
        tasks = []
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                tasks.append(task)
        return tasks

    def removeTasksForConfig(self, configId):
        """
        Remove all tasks associated with the specified identifier.

        @paramater configId: the identifier to search for
        @type configId: string
        """
        doomedTasks = []
        # child ids are any task that are children of the current task being
        # removed
        childIds = []
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                subIds = getattr(task, "childIds", None)
                if subIds:
                    childIds.extend(subIds)
                log.debug("Stopping task %s, %s", taskName, task)

                if self._loopingCalls[taskName].running:
                    self._loopingCalls[taskName].stop()

                doomedTasks.append(taskName)
                self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            task = self._tasks[taskName].task
            self._tasksToCleanup.add(task)
            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            self._displayTaskStatistics(task)
            del self._taskStats[taskName]
            # TODO: ponder task statistics and keeping them around?

        for childId in childIds:
            self.removeTasksForConfig(childId)

    def pauseTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.items():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Pausing task %s", taskName)
                taskWrapper.paused = True
                self.taskPaused(taskWrapper)

    def resumeTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Resuming task %s", taskName)
                taskWrapper.paused = False
                self.taskResumed(taskWrapper)

    def taskDone(self, taskName):
        if callable(self._taskCallback[taskName]):
            self._taskCallback[taskName](taskName=taskName)

    def _taskStateChangeListener(self, observable, attrName, oldValue,
                                 newValue):
        task = observable

        log.debug("Task %s changing state from %s to %s", task.name, oldValue,
                  newValue)

        taskStat = self._taskStats[task.name]
        taskStat.trackStateChange(oldValue, newValue)

    def _taskIntervalChangeListener(self, observable, attrName, oldValue,
                                    newValue):
        """
        Allows tasks to change their collection interval on the fly
        """
        task = observable
        log.debug("Task %s changing run interval from %s to %s", task.name,
                  oldValue, newValue)
        #TODO: should this be...
        # loopingCall = self._loopingCalls[task.name]
        loopingCall = task._dataService._scheduler._loopingCalls[task.name]
        loopingCall.interval = newValue

    @property
    def missedRuns(self):
        totalMissedRuns = 0
        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]
            totalMissedRuns += taskStats.missedRuns
        return totalMissedRuns

    @property
    def failedRuns(self):
        totalFailedRuns = 0
        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]
            totalFailedRuns += taskStats.failedRuns
        return totalFailedRuns

    def displayStatistics(self, verbose):
        totalRuns = 0
        totalFailedRuns = 0
        totalMissedRuns = 0
        totalTasks = 0
        stateStats = {}

        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]

            totalTasks += 1
            totalRuns += taskStats.totalRuns
            totalFailedRuns += taskStats.failedRuns
            totalMissedRuns += taskStats.missedRuns

            for state, stats in taskStats.states.iteritems():
                if state in stateStats:
                    totalStateStats = stateStats[state]
                else:
                    totalStateStats = StateStatistics(state)
                    stateStats[state] = totalStateStats
                totalStateStats.totalElapsedTime += stats.totalElapsedTime
                totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared
                totalStateStats.totalCalls += stats.totalCalls
                totalStateStats.minElapsedTime = min(
                    totalStateStats.minElapsedTime, stats.minElapsedTime)
                totalStateStats.maxElapsedTime = max(
                    totalStateStats.maxElapsedTime, stats.maxElapsedTime)

        log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " \
                 "Queued_Tasks: %d Running_Tasks: %d ",
                 totalTasks, totalRuns, totalFailedRuns, totalMissedRuns,
                 self.executor.queued, self.executor.running)

        if verbose:
            buffer = "Task States Summary:\n"
            buffer = self._displayStateStatistics(buffer, stateStats)

            buffer += "\nTasks:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
                    % (task.name, task.state, taskStats.totalRuns,
                       taskStats.failedRuns, taskStats.missedRuns)

            buffer += "\nDetailed Task States:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                if not taskStats.states:  # Hasn't run yet
                    continue

                buffer = self._displayStateStatistics(buffer, taskStats.states,
                                                      "%s " % task.name)

                if hasattr(task, 'displayStatistics'):
                    buffer += task.displayStatistics()

                buffer += "\n"

            log.info("Detailed Scheduler Statistics:\n%s", buffer)

        # TODO: the above logic doesn't print out any stats for the 'current'
        # state... i.e. enter the PAUSED state and wait there an hour, and
        # you'll see no data

        # TODO: should we reset the statistics here, or do it on a periodic
        # interval, like once an hour or day or something?

    def _displayTaskStatistics(self, task):
        verbose = log.isEnabledFor(logging.DEBUG)
        if not verbose:
            return
        taskStats = self._taskStats[task.name]

        buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
            % (task.name, task.state, taskStats.totalRuns,
               taskStats.failedRuns, taskStats.missedRuns)

        buffer += "\nDetailed Task States:\n"
        if taskStats.states:  # Hasn't run yet
            buffer = self._displayStateStatistics(buffer, taskStats.states,
                                                  "%s " % task.name)

            if hasattr(task, 'displayStatistics'):
                buffer += task.displayStatistics()

        buffer += "\n"

        log.info("Detailed Task Statistics:\n%s", buffer)

    def _displayStateStatistics(self, buffer, stateStats, prefix=''):
        for state, stats in stateStats.iteritems():
            buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \
                % (prefix,
                   state, stats.totalCalls, stats.totalElapsedTime,
                   stats.minElapsedTime, stats.maxElapsedTime, stats.mean,
                   stats.stddev)
        return buffer

    def _cleanupTasks(self):
        """
        Periodically cleanup tasks that have been queued up for cleaning.
        """
        # Build a list of the tasks that need to be cleaned up so that there
        # is no issue with concurrent modifications to the _tasksToCleanup
        # dictionary when tasks are quickly cleaned up.
        if self._tasksToCleanup:
            log.debug("tasks to clean %s", self._tasksToCleanup)

        todoList = [
            task for task in self._tasksToCleanup
            if self._isTaskCleanable(task)
        ]

        cleanupWaitList = []
        for task in todoList:
            # changing the state of the task will keep the next cleanup run
            # from processing it again
            task.state = TaskStates.STATE_CLEANING
            if self._shuttingDown:
                #let the task know the scheduler is shutting down
                task.state = TaskStates.STATE_SHUTDOWN
            log.debug("Cleanup on task %s %s", task.name, task)
            d = defer.maybeDeferred(task.cleanup)
            d.addBoth(self._cleanupTaskComplete, task)
            cleanupWaitList.append(d)
        return cleanupWaitList

    def _cleanupTaskComplete(self, result, task):
        """
        Twisted callback to remove a task from the cleanup queue once it has
        completed its cleanup work.
        """
        log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" %
                  (result, task.name))
        self._tasksToCleanup.discard(task)
        return result

    def _isTaskCleanable(self, task):
        """
        Determines if a task is able to be cleaned up or not.
        """
        return task.state in (TaskStates.STATE_IDLE, TaskStates.STATE_PAUSED,
                              TaskStates.STATE_COMPLETED)
Example #4
0
 def testHasKey(self):
     ks = KeyedSet(firstLetter, ["apple", "avocado", "banana"])
     self.assertTrue(ks.has_key("a"))
     self.assertTrue(ks.has_key("b"))
     self.assertFalse(ks.has_key("c"))