def post_collect(self, config):
     self._executor = TwistedExecutor(self.conn_params['consess'])
     d = maybeDeferred(self._connect)
     d.addCallbacks(self.connectCallback, self.connectionFailed)
     # do not return internal connection handling deferred, but global
     # tasks deferred.
     self._task_defer = Deferred()
     return self._task_defer
Example #2
0
    def __init__(self, taskName, configId, scheduleIntervalSeconds,
                 taskConfig):
        """
        @param taskName: the unique identifier for this task
        @type taskName: string
        @param configId: configuration to watch
        @type configId: string
        @param scheduleIntervalSeconds: the interval at which this task will be
               collected
        @type scheduleIntervalSeconds: int
        @param taskConfig: the configuration for this task
        """
        super(SshPerformanceCollectionTask,
              self).__init__(taskName, configId, scheduleIntervalSeconds,
                             taskConfig)

        # Needed for interface
        self.name = taskName
        self.configId = configId
        self.state = TaskStates.STATE_IDLE
        self.interval = scheduleIntervalSeconds

        # The taskConfig corresponds to a DeviceProxy
        self._device = taskConfig

        self._devId = self._device.id
        self._manageIp = self._device.manageIp

        self._dataService = zope.component.queryUtility(IDataService)
        self._eventService = zope.component.queryUtility(IEventService)

        self._preferences = zope.component.queryUtility(
            ICollectorPreferences, COLLECTOR_NAME)
        self._lastErrorMsg = ''

        self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60

        self._concurrentSessions = taskConfig.zSshConcurrentSessions
        self._executor = TwistedExecutor(self._concurrentSessions)
        self._useSsh = taskConfig.datasources[0].useSsh
        self._connection = None

        self._datasources = taskConfig.datasources
        self.pool = getPool('SSH Connections')
        self.executed = 0
        self._task_defer = None
Example #3
0
    def __init__(self, callableTaskFactory=CallableTaskFactory()):
        self._loopingCalls = {}
        self._tasks = {}
        self._taskCallback = {}
        self._taskStats = {}
        self._callableTaskFactory = callableTaskFactory
        self._shuttingDown = False
        # create a cleanup task that will periodically sweep the
        # cleanup dictionary for tasks that need to be cleaned
        self._tasksToCleanup = KeyedSet(getConfigId)
        self._cleanupTask = task.LoopingCall(self._cleanupTasks)
        self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL)

        self._executor = TwistedExecutor(1)

        # Ensure that we can cleanly shutdown all of our tasks
        reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before')
        reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during')
        reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after')
    def __init__(self, taskName, configId, scheduleIntervalSeconds,
                 taskConfig):
        """
        @param taskName: the unique identifier for this task
        @type taskName: string
        @param configId: configuration to watch
        @type configId: string
        @param scheduleIntervalSeconds: the interval at which this task will be
               collected
        @type scheduleIntervalSeconds: int
        @param taskConfig: the configuration for this task
        """
        super(SshPerformanceCollectionTask,
              self).__init__(taskName, configId, scheduleIntervalSeconds,
                             taskConfig)

        # Needed for interface
        self.name = taskName
        self.configId = configId
        self.state = TaskStates.STATE_IDLE
        self.interval = scheduleIntervalSeconds

        # The taskConfig corresponds to a DeviceProxy
        self._device = taskConfig
        self._devId = taskConfig.id
        self._manageIp = taskConfig.manageIp
        self._datasources = taskConfig.datasources

        self._useSsh = taskConfig.datasources[0].useSsh
        client = MySshClient if self._useSsh else None
        self._runner = partial(runner.getRunner, taskConfig, client)
        self._connector = self._runner()

        self._dataService = zope.component.queryUtility(IDataService)
        self._eventService = zope.component.queryUtility(IEventService)

        preferences = zope.component.queryUtility(ICollectorPreferences,
                                                  COLLECTOR_NAME)
        self._maxbackoffseconds = preferences.options.maxbackoffminutes * 60
        self._showfullcommand = preferences.options.showfullcommand
        self._chosenDatasource = preferences.options.datasource

        self._executor = TwistedExecutor(taskConfig.zSshConcurrentSessions)

        self.executed = 0
        self._lastErrorMsg = ''

        self.manage_ip_event = {
            'eventClass': Cmd_Fail,
            'device': self._devId,
            'summary': 'IP address not set, collection will be attempted\
                        with host name',
            'component': COLLECTOR_NAME,
            'eventKey': 'Empty_IP_address'
        }
Example #5
0
 def post_collect(self, device, log):
     """
         Should get called from collect in subclass before finishing.
     """
     log.debug("%s: DDN collection for device", device.id)
     # prepare the connection
     d = self._internal_defer = maybeDeferred(self._connect)
     d.addCallbacks(self.connectCallback, self.connectionFailed)
     # prepare concurrent executor context
     self._executor = TwistedExecutor(self._conn_params['consess'])
     return self._task_defer
Example #6
0
    def __init__(self,
                 taskName,
                 configId,
                 scheduleIntervalSeconds,
                 taskConfig):
        """
        @param taskName: the unique identifier for this task
        @type taskName: string
        @param configId: configuration to watch
        @type configId: string
        @param scheduleIntervalSeconds: the interval at which this task will be
               collected
        @type scheduleIntervalSeconds: int
        @param taskConfig: the configuration for this task
        """
        super(SshPerformanceCollectionTask, self).__init__(
                 taskName, configId,
                 scheduleIntervalSeconds, taskConfig
               )

        # Needed for interface
        self.name = taskName
        self.configId = configId
        self.state = TaskStates.STATE_IDLE
        self.interval = scheduleIntervalSeconds

        # The taskConfig corresponds to a DeviceProxy
        self._device = taskConfig

        self._devId = self._device.id
        self._manageIp = self._device.manageIp

        self._dataService = zope.component.queryUtility(IDataService)
        self._eventService = zope.component.queryUtility(IEventService)

        self._preferences = zope.component.queryUtility(ICollectorPreferences,
                                                        COLLECTOR_NAME)
        self._lastErrorMsg = ''

        self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60

        self._concurrentSessions = taskConfig.zSshConcurrentSessions
        self._executor = TwistedExecutor(self._concurrentSessions)
        self._useSsh = taskConfig.datasources[0].useSsh
        self._connection = None

        self._datasources = taskConfig.datasources
        self.pool = getPool('SSH Connections')
        self.executed = 0
        self._task_defer = None
Example #7
0
    def __init__(self, callableTaskFactory=CallableTaskFactory()):
        self._loopingCalls = {}
        self._tasks = {}
        self._taskCallback = {}
        self._taskStats = {}
        self._callableTaskFactory = callableTaskFactory
        self._shuttingDown = False
        # create a cleanup task that will periodically sweep the 
        # cleanup dictionary for tasks that need to be cleaned
        self._tasksToCleanup = set()
        self._cleanupTask = task.LoopingCall(self._cleanupTasks)
        self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL)
        
        self._executor = TwistedExecutor(1)

        # Ensure that we can cleanly shutdown all of our tasks
        reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before')
        reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during')
        reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after')
Example #8
0
class Scheduler(object):
    """
    A simple interval-based scheduler that makes use of the Twisted framework's
    LoopingCall construct.
    """

    zope.interface.implements(IScheduler)

    CLEANUP_TASKS_INTERVAL = 10 # seconds

    def __init__(self, callableTaskFactory=CallableTaskFactory()):
        self._loopingCalls = {}
        self._tasks = {}
        self._taskCallback = {}
        self._taskStats = {}
        self._callableTaskFactory = callableTaskFactory
        self._shuttingDown = False
        # create a cleanup task that will periodically sweep the 
        # cleanup dictionary for tasks that need to be cleaned
        self._tasksToCleanup = set()
        self._cleanupTask = task.LoopingCall(self._cleanupTasks)
        self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL)
        
        self._executor = TwistedExecutor(1)

        # Ensure that we can cleanly shutdown all of our tasks
        reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before')
        reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during')
        reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after')

    def __contains__(self, task):
        """
        Returns True if the task has been added to the scheduler.  Otherwise
        False is returned.
        """
        # If task has no 'name' attribute, assume the task name was passed in.
        name = getattr(task, "name", task)
        return name in self._tasks

    def shutdown(self, phase):
        """
        The reactor shutdown has three phases for event types:

               before - tasks can shut down safely at this time with access to all
                        services (eg EventService, their own services)
               during - EventService and other services are gone before this starts
               after  - not a lot left -- be careful

        Tasks that have the attribute 'stopPhase' can set the state for which
        they should be run, otherwise they will be shut down in the 'before' phase.

        Tasks that have the attribute 'stopOrder' can set the order in which they
        are shut down (lowest first).  A stopOrder of 0 (zero) is assumed for tasks
        which do not declare a stopOrder.

        Returns a list of deferreds to wait on.
        """
        self._shuttingDown = True
        doomedTasks = []
        stopQ = {}
        log.debug("In shutdown stage %s", phase)
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            stopPhase = getattr(task, 'stopPhase', 'before')
            if stopPhase in ('before', 'after', 'during') and \
               stopPhase != phase:
                continue
            stopOrder = getattr(task, 'stopOrder', 0)
            queue = stopQ.setdefault(stopOrder, [])
            queue.append( (taskName, taskWrapper, task) )

        for stopOrder in sorted(stopQ):
            for (taskName, taskWrapper, task) in stopQ[stopOrder]:
                loopTask = self._loopingCalls[taskName]
                if loopTask.running:
                    log.debug("Stopping running task %s", taskName)
                    loopTask.stop()
                log.debug("Removing task %s", taskName)
                doomedTasks.append(taskName)
                self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            self._tasksToCleanup.add(self._tasks[taskName].task)

            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            del self._taskStats[taskName]

        cleanupList = self._cleanupTasks()
        return defer.DeferredList(cleanupList)

    @property 
    def executor(self):
        return self._executor

    def _getMaxTasks(self):
        return self._executor.getMax()

    def _setMaxTasks(self, max):
        return self._executor.setMax(max)

    maxTasks = property(_getMaxTasks, _setMaxTasks)

    def _ltCallback(self, result, task_name):
        """last call back in the chain, if it gets called as an errBack
        the looping will stop - shouldn't be called since CallableTask
        doesn't return a deferred, here for sanity and debug"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            log.debug("call finished %s : %s" %(loopingCall, result))
        if isinstance(result, Failure):
            log.warn("Failure in looping call, will not reschedule %s" % task_name)
            log.error("%s" % result)

    def _startTask(self, result, task_name, interval):
        """start the task using a callback so that its put at the bottom of
        the Twisted event queue, to allow other processing to continue and
        to support a task start-time jitter"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            if not loopingCall.running:
                log.debug("Task %s starting on %d second intervals", task_name, interval)
                d = loopingCall.start(interval)
                d.addBoth(self._ltCallback, task_name)

    def addTask(self, newTask, callback=None, now=False):
        """
        Add a new IScheduledTask to the scheduler for execution.
        @param newTask the new task to schedule
        @type newTask IScheduledTask
        @param callback a callback to be notified each time the task completes
        @type callback a Python callable
        """
        if newTask.name in self._tasks:
            raise ValueError("Task %s already exists" % newTask.name)
        log.debug("add task %s, %s using %s second interval", newTask.name, newTask, newTask.interval)
        callableTask = self._callableTaskFactory.getCallableTask(newTask, self)
        loopingCall = task.LoopingCall(callableTask)
        self._loopingCalls[newTask.name] = loopingCall
        self._tasks[newTask.name] = callableTask
        self._taskCallback[newTask.name] = callback
        self.taskAdded(callableTask)
        d = defer.Deferred()
        d.addCallback(self._startTask, newTask.name, newTask.interval)

        startDelay = getattr(newTask, 'startDelay', None)
        if startDelay is None:
            startDelay = 0 if now else self._getStartDelay(newTask)
        # explicitly set, next expected call in case task was never executed/schedule
        loopingCall._expectNextCallAt = time.time() + startDelay
        reactor.callLater(startDelay, d.callback, None)

        # just in case someone does not implement scheduled, lets be careful
        scheduled = getattr(newTask, 'scheduled', lambda x: None)
        scheduled(self)

    def _getStartDelay(self, task):
        """
        amount of time to delay the start of a task. Prevents bunching up of 
        task execution when a large amount of tasks are scheduled at the same 
        time.
        """
        #simple delay of random number between 0 and half the task interval
        delay = random.randint(0, int(task.interval/2))
        return delay
    
    def taskAdded(self, taskWrapper):
        """
        Called whenever the scheduler adds a task.
        """
        task = taskWrapper.task

        # watch the task's attribute changes
        task.attachAttributeObserver('state', self._taskStateChangeListener)
        task.attachAttributeObserver('interval', self._taskIntervalChangeListener)

        # create the statistics data for this task
        self._taskStats[task.name] = TaskStatistics(task)
        taskWrapper.taskStats = self._taskStats[task.name]

    def taskRemoved(self, taskWrapper):
        """
        Called whenever the scheduler removes a task.
        """
        task = taskWrapper.task
        task.detachAttributeObserver('state', self._taskStateChangeListener)
        task.detachAttributeObserver('interval', self._taskIntervalChangeListener)

    def taskPaused(self, taskWrapper):
        """
        Called whenever the scheduler pauses a task.
        """
        pass

    def taskResumed(self, taskWrapper):
        """
        Called whenever the scheduler resumes a task.
        """
        pass

    def getTasksForConfig(self, configId):
        """
        Get all tasks associated with the specified identifier.
        """
        tasks = []
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                tasks.append(task)
        return tasks

    def getNextExpectedRun(self, taskName):
        """
        Get the next expected execution time for given task
        """
        loopingCall = self._loopingCalls.get(taskName, None)
        if loopingCall:
            return loopingCall._expectNextCallAt

    def removeTasks(self, taskNames):
        """
        Remove tasks
        """
        doomedTasks = []
        # child ids are any task that are children of the current task being
        # removed
        childIds = []
        for taskName in taskNames:
            taskWrapper = self._tasks[taskName]
            task = taskWrapper.task
            subIds = getattr(task, "childIds", None)
            if subIds:
                childIds.extend(subIds)
            log.debug("Stopping task %s, %s", taskName, task)
            if self._loopingCalls[taskName].running:
                self._loopingCalls[taskName].stop()

            doomedTasks.append(taskName)
            self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            task = self._tasks[taskName].task
            self._tasksToCleanup.add(task)
            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            self._displayTaskStatistics(task)
            del self._taskStats[taskName]
            # TODO: ponder task statistics and keeping them around?

        map(self.removeTasksForConfig, childIds)

        # TODO: don't let any tasks for the same config start until
        # these old tasks are really gone

    def removeTasksForConfig(self, configId):
        """
        Remove all tasks associated with the specified identifier.

        @paramater configId: the identifier to search for
        @type configId: string
        """
        self.removeTasks(taskName for taskName, taskWrapper in self._tasks.iteritems() if taskWrapper.task.configId == configId)

    def pauseTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.items():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Pausing task %s", taskName)
                taskWrapper.paused = True
                self.taskPaused(taskWrapper)

    def resumeTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Resuming task %s", taskName)
                taskWrapper.paused = False
                self.taskResumed(taskWrapper)

    def taskDone(self, taskName):
        if callable(self._taskCallback[taskName]):
            self._taskCallback[taskName](taskName=taskName)

    def _taskStateChangeListener(self, observable, attrName, oldValue, newValue):
        task = observable

        log.debug("Task %s changing state from %s to %s", task.name, oldValue,
                  newValue)

        taskStat = self._taskStats[task.name]
        taskStat.trackStateChange(oldValue, newValue)

    def _taskIntervalChangeListener(self, observable, attrName, oldValue, newValue):
        """
        Allows tasks to change their collection interval on the fly
        """
        task = observable
        log.debug("Task %s changing run interval from %s to %s", task.name, oldValue,
                  newValue)

        loopingCall = task._dataService._scheduler._loopingCalls[task.name]
        loopingCall.interval = newValue

    def displayStatistics(self, verbose):
        totalRuns = 0
        totalFailedRuns = 0
        totalMissedRuns = 0
        totalTasks = 0
        stateStats = {}

        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]

            totalTasks += 1
            totalRuns += taskStats.totalRuns
            totalFailedRuns += taskStats.failedRuns
            totalMissedRuns += taskStats.missedRuns

            for state, stats in taskStats.states.iteritems():
                if state in stateStats:
                    totalStateStats = stateStats[state]
                else:
                    totalStateStats = StateStatistics(state)
                    stateStats[state] = totalStateStats
                totalStateStats.totalElapsedTime += stats.totalElapsedTime
                totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared
                totalStateStats.totalCalls += stats.totalCalls
                totalStateStats.minElapsedTime = min(totalStateStats.minElapsedTime, stats.minElapsedTime)
                totalStateStats.maxElapsedTime = max(totalStateStats.maxElapsedTime, stats.maxElapsedTime)

        log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " \
                 "Queued_Tasks: %d Running_Tasks: %d ",
                 totalTasks, totalRuns, totalFailedRuns, totalMissedRuns,
                 self.executor.queued, self.executor.running)

        if verbose:
            buffer = "Task States Summary:\n"
            buffer = self._displayStateStatistics(buffer, stateStats)

            buffer += "\nTasks:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
                    % (task.name, task.state, taskStats.totalRuns, 
                       taskStats.failedRuns, taskStats.missedRuns)

            buffer += "\nDetailed Task States:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                if not taskStats.states: # Hasn't run yet
                    continue

                buffer = self._displayStateStatistics(buffer,
                                             taskStats.states,
                                             "%s " % task.name)

                if hasattr(task, 'displayStatistics'):
                    buffer += task.displayStatistics()

                buffer += "\n"

            log.info("Detailed Scheduler Statistics:\n%s", buffer)

        # TODO: the above logic doesn't print out any stats for the 'current'
        # state... i.e. enter the PAUSED state and wait there an hour, and 
        # you'll see no data

        # TODO: should we reset the statistics here, or do it on a periodic
        # interval, like once an hour or day or something?

    def _displayTaskStatistics(self, task):
        verbose = log.isEnabledFor(logging.DEBUG)
        if not verbose:
            return
        taskStats = self._taskStats[task.name]

        buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
            % (task.name, task.state, taskStats.totalRuns,
               taskStats.failedRuns, taskStats.missedRuns)

        buffer += "\nDetailed Task States:\n"
        if taskStats.states: # Hasn't run yet
            buffer = self._displayStateStatistics(buffer,
                                         taskStats.states,
                                         "%s " % task.name)

            if hasattr(task, 'displayStatistics'):
                buffer += task.displayStatistics()

        buffer += "\n"

        log.info("Detailed Task Statistics:\n%s", buffer)

    def _displayStateStatistics(self, buffer, stateStats, prefix=''):
        for state, stats in stateStats.iteritems():
            buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \
                % (prefix,
                   state, stats.totalCalls, stats.totalElapsedTime,
                   stats.minElapsedTime, stats.maxElapsedTime, stats.mean,
                   stats.stddev)
        return buffer

    def _cleanupTasks(self):
        """
        Periodically cleanup tasks that have been queued up for cleaning. 
        """
        # Build a list of the tasks that need to be cleaned up so that there
        # is no issue with concurrent modifications to the _tasksToCleanup
        # dictionary when tasks are quickly cleaned up.
        if self._tasksToCleanup:
            log.debug("tasks to clean %s" % self._tasksToCleanup)

        todoList = [task for task in self._tasksToCleanup
                             if self._isTaskCleanable(task)]

        cleanupWaitList = []
        for task in todoList:
            # changing the state of the task will keep the next cleanup run
            # from processing it again
            task.state = TaskStates.STATE_CLEANING
            if self._shuttingDown:
                #let the task know the scheduler is shutting down
                task.state = TaskStates.STATE_SHUTDOWN
            log.debug("Cleanup on task %s %s", task.name, task)
            d = defer.maybeDeferred(task.cleanup)
            d.addBoth(self._cleanupTaskComplete, task)
            cleanupWaitList.append(d)
        return cleanupWaitList

    def _cleanupTaskComplete(self, result, task):
        """
        Twisted callback to remove a task from the cleanup queue once it has
        completed its cleanup work.
        """
        log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" % (result, task.name))
        self._tasksToCleanup.discard(task)
        return result

    def _isTaskCleanable(self, task):
        """
        Determines if a task is able to be cleaned up or not.
        """
        return task.state in (TaskStates.STATE_IDLE,
                              TaskStates.STATE_PAUSED,
                              TaskStates.STATE_COMPLETED)
Example #9
0
class Scheduler(object):
    """
    A simple interval-based scheduler that makes use of the Twisted framework's
    LoopingCall construct.
    """

    zope.interface.implements(IScheduler)

    CLEANUP_TASKS_INTERVAL = 10  # seconds

    def __init__(self, callableTaskFactory=CallableTaskFactory()):
        self._loopingCalls = {}
        self._tasks = {}
        self._taskCallback = {}
        self._taskStats = {}
        self._callableTaskFactory = callableTaskFactory
        self._shuttingDown = False
        # create a cleanup task that will periodically sweep the
        # cleanup dictionary for tasks that need to be cleaned
        self._tasksToCleanup = KeyedSet(getConfigId)
        self._cleanupTask = task.LoopingCall(self._cleanupTasks)
        self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL)

        self._executor = TwistedExecutor(1)

        # Ensure that we can cleanly shutdown all of our tasks
        reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown,
                                      'before')
        reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown,
                                      'during')
        reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown,
                                      'after')

    def __contains__(self, task):
        """
        Returns True if the task has been added to the scheduler.  Otherwise
        False is returned.
        """
        # If task has no 'name' attribute, assume the task name was passed in.
        name = getattr(task, "name", task)
        return name in self._tasks

    def shutdown(self, phase):
        """
        The reactor shutdown has three phases for event types:

               before - tasks can shut down safely at this time with access to all
                        services (eg EventService, their own services)
               during - EventService and other services are gone before this starts
               after  - not a lot left -- be careful

        Tasks that have the attribute 'stopPhase' can set the state for which
        they should be run, otherwise they will be shut down in the 'before' phase.

        Tasks that have the attribute 'stopOrder' can set the order in which they
        are shut down (lowest first).  A stopOrder of 0 (zero) is assumed for tasks
        which do not declare a stopOrder.

        Returns a list of deferreds to wait on.
        """
        self._shuttingDown = True
        doomedTasks = []
        stopQ = {}
        log.debug("In shutdown stage %s", phase)
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            stopPhase = getattr(task, 'stopPhase', 'before')
            if stopPhase in ('before', 'after', 'during') and \
               stopPhase != phase:
                continue
            stopOrder = getattr(task, 'stopOrder', 0)
            queue = stopQ.setdefault(stopOrder, [])
            queue.append((taskName, taskWrapper, task))

        for stopOrder in sorted(stopQ):
            for (taskName, taskWrapper, task) in stopQ[stopOrder]:
                loopTask = self._loopingCalls[taskName]
                if loopTask.running:
                    log.debug("Stopping running task %s", taskName)
                    loopTask.stop()
                log.debug("Removing task %s", taskName)
                doomedTasks.append(taskName)
                self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            self._tasksToCleanup.add(self._tasks[taskName].task)

            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            del self._taskStats[taskName]

        cleanupList = self._cleanupTasks()
        return defer.DeferredList(cleanupList)

    @property
    def executor(self):
        return self._executor

    def _getMaxTasks(self):
        return self._executor.getMax()

    def _setMaxTasks(self, max):
        return self._executor.setMax(max)

    maxTasks = property(_getMaxTasks, _setMaxTasks)

    def _ltCallback(self, result, task_name):
        """last call back in the chain, if it gets called as an errBack
        the looping will stop - shouldn't be called since CallableTask
        doesn't return a deferred, here for sanity and debug"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            log.debug("call finished %s : %s" % (loopingCall, result))
        if isinstance(result, Failure):
            log.warn("Failure in looping call, will not reschedule %s" %
                     task_name)
            log.error("%s" % result)

    def _startTask(self, result, task_name, interval, configId, delayed):
        """start the task using a callback so that its put at the bottom of
        the Twisted event queue, to allow other processing to continue and
        to support a task start-time jitter"""
        if task_name in self._loopingCalls:
            loopingCall = self._loopingCalls[task_name]
            if not loopingCall.running:
                if self._tasksToCleanup.has_key(configId):
                    delay = random.randint(0, int(interval / 2))
                    delayed = delayed + delay
                    log.debug(
                        "Waiting for cleanup of %s. Task %s postponing its start %d seconds (%d so far)",
                        configId, task_name, delay, delayed)
                    d = defer.Deferred()
                    d.addCallback(self._startTask, task_name, interval,
                                  configId, delayed)
                    reactor.callLater(delay, d.callback, None)
                else:
                    log.debug(
                        "Task %s starting (waited %d seconds) on %d second intervals",
                        task_name, delayed, interval)
                    d = loopingCall.start(interval)
                    d.addBoth(self._ltCallback, task_name)

    def addTask(self, newTask, callback=None, now=False):
        """
        Add a new IScheduledTask to the scheduler for execution.
        @param newTask the new task to schedule
        @type newTask IScheduledTask
        @param callback a callback to be notified each time the task completes
        @type callback a Python callable
        """
        if newTask.name in self._tasks:
            raise ValueError("Task %s already exists" % newTask.name)
        log.debug("add task %s, %s using %s second interval", newTask.name,
                  newTask, newTask.interval)
        callableTask = self._callableTaskFactory.getCallableTask(newTask, self)
        loopingCall = task.LoopingCall(callableTask)
        self._loopingCalls[newTask.name] = loopingCall
        self._tasks[newTask.name] = callableTask
        self._taskCallback[newTask.name] = callback
        self.taskAdded(callableTask)
        startDelay = getattr(newTask, 'startDelay', None)
        if startDelay is None:
            startDelay = 0 if now else self._getStartDelay(newTask)
        d = defer.Deferred()
        d.addCallback(self._startTask, newTask.name, newTask.interval,
                      newTask.configId, startDelay)
        reactor.callLater(startDelay, d.callback, None)

        # just in case someone does not implement scheduled, lets be careful
        scheduled = getattr(newTask, 'scheduled', lambda x: None)
        scheduled(self)

    def _getStartDelay(self, task):
        """
        amount of time to delay the start of a task. Prevents bunching up of
        task execution when a large amount of tasks are scheduled at the same
        time.
        """
        #simple delay of random number between 0 and half the task interval
        delay = random.randint(0, int(task.interval / 2))
        return delay

    def taskAdded(self, taskWrapper):
        """
        Called whenever the scheduler adds a task.
        """
        task = taskWrapper.task

        # watch the task's attribute changes
        task.attachAttributeObserver('state', self._taskStateChangeListener)
        task.attachAttributeObserver('interval',
                                     self._taskIntervalChangeListener)

        # create the statistics data for this task
        self._taskStats[task.name] = TaskStatistics(task)
        taskWrapper.taskStats = self._taskStats[task.name]

    def taskRemoved(self, taskWrapper):
        """
        Called whenever the scheduler removes a task.
        """
        task = taskWrapper.task
        task.detachAttributeObserver('state', self._taskStateChangeListener)
        task.detachAttributeObserver('interval',
                                     self._taskIntervalChangeListener)

    def taskPaused(self, taskWrapper):
        """
        Called whenever the scheduler pauses a task.
        """
        pass

    def taskResumed(self, taskWrapper):
        """
        Called whenever the scheduler resumes a task.
        """
        pass

    def getTasksForConfig(self, configId):
        """
        Get all tasks associated with the specified identifier.
        """
        tasks = []
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                tasks.append(task)
        return tasks

    def removeTasksForConfig(self, configId):
        """
        Remove all tasks associated with the specified identifier.

        @paramater configId: the identifier to search for
        @type configId: string
        """
        doomedTasks = []
        # child ids are any task that are children of the current task being
        # removed
        childIds = []
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                subIds = getattr(task, "childIds", None)
                if subIds:
                    childIds.extend(subIds)
                log.debug("Stopping task %s, %s", taskName, task)

                if self._loopingCalls[taskName].running:
                    self._loopingCalls[taskName].stop()

                doomedTasks.append(taskName)
                self.taskRemoved(taskWrapper)

        for taskName in doomedTasks:
            task = self._tasks[taskName].task
            self._tasksToCleanup.add(task)
            del self._loopingCalls[taskName]
            del self._tasks[taskName]
            self._displayTaskStatistics(task)
            del self._taskStats[taskName]
            # TODO: ponder task statistics and keeping them around?

        for childId in childIds:
            self.removeTasksForConfig(childId)

    def pauseTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.items():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Pausing task %s", taskName)
                taskWrapper.paused = True
                self.taskPaused(taskWrapper)

    def resumeTasksForConfig(self, configId):
        for (taskName, taskWrapper) in self._tasks.iteritems():
            task = taskWrapper.task
            if task.configId == configId:
                log.debug("Resuming task %s", taskName)
                taskWrapper.paused = False
                self.taskResumed(taskWrapper)

    def taskDone(self, taskName):
        if callable(self._taskCallback[taskName]):
            self._taskCallback[taskName](taskName=taskName)

    def _taskStateChangeListener(self, observable, attrName, oldValue,
                                 newValue):
        task = observable

        log.debug("Task %s changing state from %s to %s", task.name, oldValue,
                  newValue)

        taskStat = self._taskStats[task.name]
        taskStat.trackStateChange(oldValue, newValue)

    def _taskIntervalChangeListener(self, observable, attrName, oldValue,
                                    newValue):
        """
        Allows tasks to change their collection interval on the fly
        """
        task = observable
        log.debug("Task %s changing run interval from %s to %s", task.name,
                  oldValue, newValue)
        #TODO: should this be...
        # loopingCall = self._loopingCalls[task.name]
        loopingCall = task._dataService._scheduler._loopingCalls[task.name]
        loopingCall.interval = newValue

    @property
    def missedRuns(self):
        totalMissedRuns = 0
        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]
            totalMissedRuns += taskStats.missedRuns
        return totalMissedRuns

    @property
    def failedRuns(self):
        totalFailedRuns = 0
        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]
            totalFailedRuns += taskStats.failedRuns
        return totalFailedRuns

    def displayStatistics(self, verbose):
        totalRuns = 0
        totalFailedRuns = 0
        totalMissedRuns = 0
        totalTasks = 0
        stateStats = {}

        for taskWrapper in self._tasks.itervalues():
            task = taskWrapper.task
            taskStats = self._taskStats[task.name]

            totalTasks += 1
            totalRuns += taskStats.totalRuns
            totalFailedRuns += taskStats.failedRuns
            totalMissedRuns += taskStats.missedRuns

            for state, stats in taskStats.states.iteritems():
                if state in stateStats:
                    totalStateStats = stateStats[state]
                else:
                    totalStateStats = StateStatistics(state)
                    stateStats[state] = totalStateStats
                totalStateStats.totalElapsedTime += stats.totalElapsedTime
                totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared
                totalStateStats.totalCalls += stats.totalCalls
                totalStateStats.minElapsedTime = min(
                    totalStateStats.minElapsedTime, stats.minElapsedTime)
                totalStateStats.maxElapsedTime = max(
                    totalStateStats.maxElapsedTime, stats.maxElapsedTime)

        log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " \
                 "Queued_Tasks: %d Running_Tasks: %d ",
                 totalTasks, totalRuns, totalFailedRuns, totalMissedRuns,
                 self.executor.queued, self.executor.running)

        if verbose:
            buffer = "Task States Summary:\n"
            buffer = self._displayStateStatistics(buffer, stateStats)

            buffer += "\nTasks:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
                    % (task.name, task.state, taskStats.totalRuns,
                       taskStats.failedRuns, taskStats.missedRuns)

            buffer += "\nDetailed Task States:\n"
            for taskWrapper in self._tasks.itervalues():
                task = taskWrapper.task
                taskStats = self._taskStats[task.name]

                if not taskStats.states:  # Hasn't run yet
                    continue

                buffer = self._displayStateStatistics(buffer, taskStats.states,
                                                      "%s " % task.name)

                if hasattr(task, 'displayStatistics'):
                    buffer += task.displayStatistics()

                buffer += "\n"

            log.info("Detailed Scheduler Statistics:\n%s", buffer)

        # TODO: the above logic doesn't print out any stats for the 'current'
        # state... i.e. enter the PAUSED state and wait there an hour, and
        # you'll see no data

        # TODO: should we reset the statistics here, or do it on a periodic
        # interval, like once an hour or day or something?

    def _displayTaskStatistics(self, task):
        verbose = log.isEnabledFor(logging.DEBUG)
        if not verbose:
            return
        taskStats = self._taskStats[task.name]

        buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \
            % (task.name, task.state, taskStats.totalRuns,
               taskStats.failedRuns, taskStats.missedRuns)

        buffer += "\nDetailed Task States:\n"
        if taskStats.states:  # Hasn't run yet
            buffer = self._displayStateStatistics(buffer, taskStats.states,
                                                  "%s " % task.name)

            if hasattr(task, 'displayStatistics'):
                buffer += task.displayStatistics()

        buffer += "\n"

        log.info("Detailed Task Statistics:\n%s", buffer)

    def _displayStateStatistics(self, buffer, stateStats, prefix=''):
        for state, stats in stateStats.iteritems():
            buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \
                % (prefix,
                   state, stats.totalCalls, stats.totalElapsedTime,
                   stats.minElapsedTime, stats.maxElapsedTime, stats.mean,
                   stats.stddev)
        return buffer

    def _cleanupTasks(self):
        """
        Periodically cleanup tasks that have been queued up for cleaning.
        """
        # Build a list of the tasks that need to be cleaned up so that there
        # is no issue with concurrent modifications to the _tasksToCleanup
        # dictionary when tasks are quickly cleaned up.
        if self._tasksToCleanup:
            log.debug("tasks to clean %s", self._tasksToCleanup)

        todoList = [
            task for task in self._tasksToCleanup
            if self._isTaskCleanable(task)
        ]

        cleanupWaitList = []
        for task in todoList:
            # changing the state of the task will keep the next cleanup run
            # from processing it again
            task.state = TaskStates.STATE_CLEANING
            if self._shuttingDown:
                #let the task know the scheduler is shutting down
                task.state = TaskStates.STATE_SHUTDOWN
            log.debug("Cleanup on task %s %s", task.name, task)
            d = defer.maybeDeferred(task.cleanup)
            d.addBoth(self._cleanupTaskComplete, task)
            cleanupWaitList.append(d)
        return cleanupWaitList

    def _cleanupTaskComplete(self, result, task):
        """
        Twisted callback to remove a task from the cleanup queue once it has
        completed its cleanup work.
        """
        log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" %
                  (result, task.name))
        self._tasksToCleanup.discard(task)
        return result

    def _isTaskCleanable(self, task):
        """
        Determines if a task is able to be cleaned up or not.
        """
        return task.state in (TaskStates.STATE_IDLE, TaskStates.STATE_PAUSED,
                              TaskStates.STATE_COMPLETED)
Example #10
0
class SshPerformanceCollectionTask(BaseTask):
    """
    A task that performs periodic performance collection for devices providing
    data via SSH connections.
    """
    zope.interface.implements(IScheduledTask)

    STATE_CONNECTING = 'CONNECTING'
    STATE_FETCH_DATA = 'FETCH_DATA'
    STATE_PARSE_DATA = 'PARSING_DATA'
    STATE_STORE_PERF = 'STORE_PERF_DATA'

    def __init__(self,
                 taskName,
                 configId,
                 scheduleIntervalSeconds,
                 taskConfig):
        """
        @param taskName: the unique identifier for this task
        @type taskName: string
        @param configId: configuration to watch
        @type configId: string
        @param scheduleIntervalSeconds: the interval at which this task will be
               collected
        @type scheduleIntervalSeconds: int
        @param taskConfig: the configuration for this task
        """
        super(SshPerformanceCollectionTask, self).__init__(
                 taskName, configId,
                 scheduleIntervalSeconds, taskConfig
               )

        # Needed for interface
        self.name = taskName
        self.configId = configId
        self.state = TaskStates.STATE_IDLE
        self.interval = scheduleIntervalSeconds

        # The taskConfig corresponds to a DeviceProxy
        self._device = taskConfig

        self._devId = self._device.id
        self._manageIp = self._device.manageIp

        self._dataService = zope.component.queryUtility(IDataService)
        self._eventService = zope.component.queryUtility(IEventService)

        self._preferences = zope.component.queryUtility(ICollectorPreferences,
                                                        COLLECTOR_NAME)
        self._lastErrorMsg = ''

        self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60

        self._concurrentSessions = taskConfig.zSshConcurrentSessions
        self._executor = TwistedExecutor(self._concurrentSessions)
        self._useSsh = taskConfig.datasources[0].useSsh
        self._connection = None

        self._datasources = taskConfig.datasources
        self.pool = getPool('SSH Connections')
        self.executed = 0
        self._task_defer = None

    def __str__(self):
        return "COMMAND schedule Name: %s configId: %s Datasources: %d" % (
               self.name, self.configId, len(self._datasources))

    def cleanup(self):
        self._cleanUpPool()
        self._close()

    def _getPoolKey(self):
        """
        Get the key under which the client should be stored in the pool.
        """
        username = self._device.zCommandUsername
        password = self._device.zCommandPassword
        ip = self._manageIp
        port = self._device.zCommandPort
        return hash((username, password, ip, port))

    def _cleanUpPool(self):
        """
        Close the connection currently associated with this task.
        """
        poolkey = self._getPoolKey()
        if poolkey in self.pool:
            client = self.pool[poolkey]
            if not client._taskList:
                # No other tasks, so safe to clean up
                transport = client.transport 
                if transport:
                    transport.loseConnection()
                del self.pool[poolkey]

    def doTask(self):
        """
        Contact to one device and return a deferred which gathers data from
        the device.

        @return: Deferred actions to run against a device configuration
        @rtype: Twisted deferred object
        """

        # Create a deferred wrapper for the scheduler since the connection
        # deferred is shared among many tasks.  We have to initialize this
        # first to satisfy race condition with _connectCallback
        d = self._task_defer = defer.Deferred()
        d.addCallback(self._fetchPerf)

        # Call _finished for both success and error scenarios
        d.addBoth(self._finished)

        #----------------------------------------------------------------------

        # See if we need to connect first before doing any collection
        d = defer.maybeDeferred(self._connect)
        d.addCallbacks(self._connectCallback, self.connectionFailed)

        # Wait until the Deferred actually completes
        return self._task_defer

    def _connect(self):
        """
        If a local datasource executor, do nothing.

        If an SSH datasource executor, create a connection to object the remote device.
        Make a new SSH connection object if there isn't one available.  This doesn't
        actually connect to the device.
        """
        if not self._useSsh:
            return defer.succeed(None)

        connection = self.pool.get(self._getPoolKey(), None)
        if connection is None:
            self.state = SshPerformanceCollectionTask.STATE_CONNECTING
            log.debug("Creating connection object to %s", self._devId)
            username = self._device.zCommandUsername
            password = self._device.zCommandPassword
            loginTimeout = self._device.zCommandLoginTimeout
            commandTimeout = self._device.zCommandCommandTimeout
            keypath = self._device.zKeyPath
            options = SshOptions(username, password,
                              loginTimeout, commandTimeout,
                              keypath, self._concurrentSessions)

            connection = MySshClient(self._devId, self._manageIp,
                                 self._device.zCommandPort, options=options)
            connection.sendEvent = self._eventService.sendEvent

            # Opens SSH connection to device
            d = connection.run()
            self.pool[self._getPoolKey()] = d
            return d

        return connection

    def _close(self):
        """
        If a local datasource executor, do nothing.

        If an SSH datasource executor, relinquish a connection to the remote device.
        """
        if self._connection:
            self._connection._taskList.discard(self)
            if not self._connection._taskList:
                if self._getPoolKey() in self.pool:
                    client = self.pool[self._getPoolKey()]
                    client.clientFinished()
            self._connection = None

    def connectionFailed(self, msg):
        """
        This method is called by the SSH client when the connection fails.

        @parameter msg: message indicating the cause of the problem
        @type msg: string
        """
        # Note: Raising an exception and then catching it doesn't work
        #       as it appears that the exception is discarded in PBDaemon.py
        self.state = TaskStates.STATE_PAUSED
        log.error("Pausing task %s as %s [%s] connection failure: %s",
                  self.name, self._devId, self._manageIp, msg.getErrorMessage())
        self._eventService.sendEvent(STATUS_EVENT,
                                     device=self._devId,
                                     summary=msg.getErrorMessage(),
                                     component=COLLECTOR_NAME,
                                     severity=Event.Error)
        self._task_defer.errback(msg)
        return msg

    def _connectCallback(self, connection):
        """
        Callback called after a successful connect to the remote device.
        """
        if self._useSsh:
            self._connection = connection
            self._connection._taskList.add(self)

            msg = "Connected to %s [%s]" % (self._devId, self._manageIp)
            self._connection.sendEvent(STATUS_EVENT, device=self._devId, summary=msg, component=COLLECTOR_NAME, severity=Clear)
        else:
            msg = "Running command(s) locally"

        log.debug(msg)
        self._task_defer.callback(connection)
        return connection

    def _addDatasource(self, datasource):
        """
        Add a new instantiation of ProcessRunner or SshRunner
        for every datasource.
        """
        if self._preferences.options.showfullcommand:
            log.info("Datasource %s command: %s", datasource.name,
                     datasource.command)

        if self._useSsh:
            runner = SshRunner(self._connection)
        else:
            runner = ProcessRunner()

        d = runner.start(datasource)
        datasource.lastStart = time.time()
        d.addBoth(datasource.processCompleted)
        return d

    def _fetchPerf(self, ignored):
        """
        Get performance data for all the monitored components on a device

        @parameter ignored: required to keep Twisted's callback chain happy
        @type ignored: result of previous callback
        """
        self.state = SshPerformanceCollectionTask.STATE_FETCH_DATA

        # The keys are the datasource commands, which are by definition unique
        # to the command run.
        cacheableDS = {}

        # Bundle up the list of tasks
        deferredCmds = []
        for datasource in self._datasources:
            datasource.deviceConfig = self._device

            if datasource.command in cacheableDS:
                cacheableDS[datasource.command].append(datasource)
                continue
            cacheableDS[datasource.command] = []

            task = self._executor.submit(self._addDatasource, datasource)
            deferredCmds.append(task)

        # Run the tasks
        dl = defer.DeferredList(deferredCmds, consumeErrors=True)
        dl.addCallback(self._parseResults, cacheableDS)
        dl.addCallback(self._storeResults)
        dl.addCallback(self._updateStatus)

        # Save the list in case we need to cancel the commands
        self._commandsToExecute = dl
        return dl

    def _parseResults(self, resultList, cacheableDS):
        """
        Interpret the results retrieved from the commands and pass on
        the datapoint values and events.

        @parameter resultList: results of running the commands in a DeferredList
        @type resultList: array of (boolean, datasource)
        @parameter cacheableDS: other datasources that can use the same results
        @type cacheableDS: dictionary of arrays of datasources
        """
        self.state = SshPerformanceCollectionTask.STATE_PARSE_DATA
        parseableResults = []
        for success, datasource in resultList:
            results = ParsedResults()
            if not success:
                # In this case, our datasource is actually a defer.Failure
                reason = datasource
                datasource, = reason.value.args
                msg = "Datasource %s command timed out" % (
                         datasource.name)
                ev = self._makeCmdEvent(datasource, msg, event_key='Timeout')
                results.events.append(ev)

            else:
                # clear our timeout event
                msg = "Datasource %s command timed out" % datasource.name
                ev = self._makeCmdEvent(datasource, msg, severity=Clear, event_key='Timeout')
                # Re-use our results for any similar datasources
                cachedDsList = cacheableDS.get(datasource.command)
                if cachedDsList:
                    for ds in cachedDsList:
                        ds.result = copy(datasource.result)
                        results = ParsedResults()
                        self._processDatasourceResults(ds, results)
                        parseableResults.append( (ds, results) )
                    results = ParsedResults()
                results.events.append(ev)
                self._processDatasourceResults(datasource, results)

            parseableResults.append( (datasource, results) )
        return parseableResults

    def _makeParser(self, datasource, eventList):
        """
        Create a parser object to process data

        @parameter datasource: datasource containg information
        @type datasource: Cmd object
        @parameter eventList: list of events
        @type eventList: list of dictionaries
        """
        parser = None
        try:
            parser = datasource.parser.create()
        except Exception:
            msg = "Error loading parser %s" % datasource.parser
            log.exception("%s %s %s", self.name, datasource.name, msg)
            ev = self._makeCmdEvent(datasource, msg)
            ev['message'] = traceback.format_exc()
            eventList.append(ev)
        return parser

    def _processDatasourceResults(self, datasource, results):
        """
        Process a single datasource's results

        @parameter datasource: datasource containg information
        @type datasource: Cmd object
        @parameter results: empty results object
        @type results: ParsedResults object
        """
        showcommand = self._preferences.options.showfullcommand
        result =  datasource.result
        if result.exitCode == 0 and not result.output.strip():
            msg = "No data returned for command"
            if showcommand:
                msg += ": %s" % datasource.command
            log.warn("%s %s %s", self.name, datasource.name, msg)
            ev = self._makeCmdEvent(datasource, msg)
            if showcommand:
                ev['command'] = datasource.command
            results.events.append(ev)
            return

        parser = self._makeParser(datasource, results.events)
        if not parser:
            return

        try:
            parser.preprocessResults(datasource, log)
            parser.processResults(datasource, results)
            if not results.events and parser.createDefaultEventUsingExitCode:
                # Add a failsafe event guessing at the error codes
                self._addDefaultEvent(datasource, results)
            if datasource.result.stderr:
                self._addStderrMsg(datasource.result.stderr,
                                               results.events)
        except Exception:
            msg = "Error running parser %s" % datasource.parser
            log.exception("%s %s %s", self.name, datasource.name, msg)
            ev = self._makeCmdEvent(datasource, msg)
            ev['message'] = traceback.format_exc()
            ev['output'] = datasource.result.output
            results.events.append(ev)

    def _addDefaultEvent(self, datasource, results):
        """
        If there is no event, send one based on the exit code.
        """
        exitCode = datasource.result.exitCode
        if exitCode == 0:
            msg = ''
            severity = 0
        else:
            msg = 'Datasource: %s - Code: %s - Msg: %s' % (
                datasource.name, exitCode, getExitMessage(exitCode))
            severity = datasource.severity

        ev = self._makeCmdEvent(datasource, msg, severity)
        results.events.append(ev)

    def _addStderrMsg(self, stderrMsg, eventList):
        """
        Add the stderr output to error events.

        @parameter stderrMsg: stderr output from the command
        @type stderrMsg: string
        @parameter eventList: list of events
        @type eventList: list of dictionaries
        """
        for event in eventList:
            if event['severity'] not in ('Clear', 'Info', 'Debug'):
                event['stderr'] = stderrMsg

    def _storeResults(self, resultList):
        """
        Store the values in RRD files

        @parameter resultList: results of running the commands
        @type resultList: array of (datasource, dictionary)
        """
        self.state = SshPerformanceCollectionTask.STATE_STORE_PERF
        for datasource, results in resultList:
            for dp, value in results.values:
                threshData = {
                    'eventKey': datasource.getEventKey(dp),
                    'component': dp.component,
                }
                self._dataService.writeRRD(
                                  dp.rrdPath,
                                  value,
                                  dp.rrdType,
                                  dp.rrdCreateCommand,
                                  datasource.cycleTime,
                                  dp.rrdMin,
                                  dp.rrdMax,
                                  threshData)

        return resultList

    def _updateStatus(self, resultList):
        """
        Send any accumulated events

        @parameter resultList: results of running the commands
        @type resultList: array of (datasource, dictionary)
        """
        for datasource, results in resultList:
            self._clearEvent(datasource, results.events)
            for ev in results.events:
                self._eventService.sendEvent(ev, device=self._devId)
        return resultList

    def _clearEvent(self, datasource, eventList):
        """
        Ensure that a CLEAR event is sent for any command that
        successfully completes.
        """
        # If the result is a Failure, no exitCode exists
        exitCode = getattr(datasource.result, 'exitCode', -1)
        # Don't send if non-zero exit code or no output returned
        if exitCode is None or exitCode != 0 or not datasource.result.output.strip():
            return

        clearEvents = [ev for ev in eventList if ev['severity'] == Clear]
        if not clearEvents:
            msg = 'Datasource %s command completed successfully' % (
                    datasource.name)
            ev = self._makeCmdEvent(datasource, msg, severity=Clear)
            eventList.append(ev)

    def _makeCmdEvent(self, datasource, msg, severity=None, event_key=None):
        """
        Create an event using the info in the Cmd object.
        """
        severity = datasource.severity if severity is None else severity
        event_key = datasource.eventKey if event_key is None else event_key
        ev = dict(
                  device=self._devId,
                  component=datasource.component,
                  eventClass=datasource.eventClass,
                  eventKey=event_key,
                  severity=severity,
                  summary=msg
        )
        return ev

    def _finished(self, result):
        """
        Callback activated when the task is complete

        @parameter result: results of the task
        @type result: deferred object
        """
        if not isinstance(result, Failure):
            self._returnToNormalSchedule()

        try:
            self._close()
        except Exception, ex:
            log.warn("Failed to close device %s: error %s" %
                     (self._devId, str(ex)))

        # Return the result so the framework can track success/failure
        return result
Example #11
0
class SshPerformanceCollectionTask(BaseTask):
    """
    A task that performs periodic performance collection for devices providing
    data via SSH connections.
    """
    zope.interface.implements(IScheduledTask)

    STATE_CONNECTING = 'CONNECTING'
    STATE_FETCH_DATA = 'FETCH_DATA'
    STATE_PARSE_DATA = 'PARSING_DATA'
    STATE_STORE_PERF = 'STORE_PERF_DATA'

    def __init__(self, taskName, configId, scheduleIntervalSeconds,
                 taskConfig):
        """
        @param taskName: the unique identifier for this task
        @type taskName: string
        @param configId: configuration to watch
        @type configId: string
        @param scheduleIntervalSeconds: the interval at which this task will be
               collected
        @type scheduleIntervalSeconds: int
        @param taskConfig: the configuration for this task
        """
        super(SshPerformanceCollectionTask,
              self).__init__(taskName, configId, scheduleIntervalSeconds,
                             taskConfig)

        # Needed for interface
        self.name = taskName
        self.configId = configId
        self.state = TaskStates.STATE_IDLE
        self.interval = scheduleIntervalSeconds

        # The taskConfig corresponds to a DeviceProxy
        self._device = taskConfig

        self._devId = self._device.id
        self._manageIp = self._device.manageIp

        self._dataService = zope.component.queryUtility(IDataService)
        self._eventService = zope.component.queryUtility(IEventService)

        self._preferences = zope.component.queryUtility(
            ICollectorPreferences, COLLECTOR_NAME)
        self._lastErrorMsg = ''

        self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60

        self._concurrentSessions = taskConfig.zSshConcurrentSessions
        self._executor = TwistedExecutor(self._concurrentSessions)
        self._useSsh = taskConfig.datasources[0].useSsh
        self._connection = None

        self._datasources = taskConfig.datasources
        self.pool = getPool('SSH Connections')
        self.executed = 0
        self._task_defer = None

    def __str__(self):
        return "COMMAND schedule Name: %s configId: %s Datasources: %d" % (
            self.name, self.configId, len(self._datasources))

    def cleanup(self):
        self._cleanUpPool()
        self._close()

    def _getPoolKey(self):
        """
        Get the key under which the client should be stored in the pool.
        """
        username = self._device.zCommandUsername
        password = self._device.zCommandPassword
        ip = self._manageIp
        port = self._device.zCommandPort
        return hash((username, password, ip, port))

    def _cleanUpPool(self):
        """
        Close the connection currently associated with this task.
        """
        poolkey = self._getPoolKey()
        if poolkey in self.pool:
            client = self.pool[poolkey]
            if not client._taskList:
                # No other tasks, so safe to clean up
                transport = client.transport
                if transport:
                    transport.loseConnection()
                del self.pool[poolkey]

    def doTask(self):
        """
        Contact to one device and return a deferred which gathers data from
        the device.

        @return: Deferred actions to run against a device configuration
        @rtype: Twisted deferred object
        """

        # Create a deferred wrapper for the scheduler since the connection
        # deferred is shared among many tasks.  We have to initialize this
        # first to satisfy race condition with _connectCallback
        d = self._task_defer = defer.Deferred()
        d.addCallback(self._fetchPerf)

        # Call _finished for both success and error scenarios
        d.addBoth(self._finished)

        #----------------------------------------------------------------------

        # See if we need to connect first before doing any collection
        d = defer.maybeDeferred(self._connect)
        d.addCallbacks(self._connectCallback, self.connectionFailed)

        # Wait until the Deferred actually completes
        return self._task_defer

    def _connect(self):
        """
        If a local datasource executor, do nothing.

        If an SSH datasource executor, create a connection to object the remote device.
        Make a new SSH connection object if there isn't one available.  This doesn't
        actually connect to the device.
        """
        if not self._useSsh:
            return defer.succeed(None)

        connection = self.pool.get(self._getPoolKey(), None)
        if connection is None:
            self.state = SshPerformanceCollectionTask.STATE_CONNECTING
            log.debug("Creating connection object to %s", self._devId)
            username = self._device.zCommandUsername
            password = self._device.zCommandPassword
            loginTimeout = self._device.zCommandLoginTimeout
            commandTimeout = self._device.zCommandCommandTimeout
            keypath = self._device.zKeyPath
            options = SshOptions(username, password, loginTimeout,
                                 commandTimeout, keypath,
                                 self._concurrentSessions)

            connection = MySshClient(self._devId,
                                     self._manageIp,
                                     self._device.zCommandPort,
                                     options=options)
            connection.sendEvent = self._eventService.sendEvent

            # Opens SSH connection to device
            d = connection.run()
            self.pool[self._getPoolKey()] = d
            return d

        return connection

    def _close(self):
        """
        If a local datasource executor, do nothing.

        If an SSH datasource executor, relinquish a connection to the remote device.
        """
        if self._connection:
            self._connection._taskList.discard(self)
            if not self._connection._taskList:
                if self._getPoolKey() in self.pool:
                    client = self.pool[self._getPoolKey()]
                    client.clientFinished()
            self._connection = None

    def connectionFailed(self, msg):
        """
        This method is called by the SSH client when the connection fails.

        @parameter msg: message indicating the cause of the problem
        @type msg: string
        """
        # Note: Raising an exception and then catching it doesn't work
        #       as it appears that the exception is discarded in PBDaemon.py
        self.state = TaskStates.STATE_PAUSED
        log.error("Pausing task %s as %s [%s] connection failure: %s",
                  self.name, self._devId, self._manageIp,
                  msg.getErrorMessage())
        self._eventService.sendEvent(STATUS_EVENT,
                                     device=self._devId,
                                     summary=msg.getErrorMessage(),
                                     component=COLLECTOR_NAME,
                                     severity=Event.Error)
        self._task_defer.errback(msg)
        return msg

    def _connectCallback(self, connection):
        """
        Callback called after a successful connect to the remote device.
        """
        if self._useSsh:
            self._connection = connection
            self._connection._taskList.add(self)

            msg = "Connected to %s [%s]" % (self._devId, self._manageIp)
            self._connection.sendEvent(STATUS_EVENT,
                                       device=self._devId,
                                       summary=msg,
                                       component=COLLECTOR_NAME,
                                       severity=Clear)
        else:
            msg = "Running command(s) locally"

        log.debug(msg)
        self._task_defer.callback(connection)
        return connection

    def _addDatasource(self, datasource):
        """
        Add a new instantiation of ProcessRunner or SshRunner
        for every datasource.
        """
        if self._preferences.options.showfullcommand:
            log.info("Datasource %s command: %s", datasource.name,
                     datasource.command)

        if self._useSsh:
            runner = SshRunner(self._connection)
        else:
            runner = ProcessRunner()

        d = runner.start(datasource)
        datasource.lastStart = time.time()
        d.addBoth(datasource.processCompleted)
        return d

    def _fetchPerf(self, ignored):
        """
        Get performance data for all the monitored components on a device

        @parameter ignored: required to keep Twisted's callback chain happy
        @type ignored: result of previous callback
        """
        self.state = SshPerformanceCollectionTask.STATE_FETCH_DATA

        # The keys are the datasource commands, which are by definition unique
        # to the command run.
        cacheableDS = {}

        # Bundle up the list of tasks
        deferredCmds = []
        for datasource in self._datasources:
            datasource.deviceConfig = self._device

            if datasource.command in cacheableDS:
                cacheableDS[datasource.command].append(datasource)
                continue
            cacheableDS[datasource.command] = []

            task = self._executor.submit(self._addDatasource, datasource)
            deferredCmds.append(task)

        # Run the tasks
        dl = defer.DeferredList(deferredCmds, consumeErrors=True)
        dl.addCallback(self._parseResults, cacheableDS)
        dl.addCallback(self._storeResults)
        dl.addCallback(self._updateStatus)

        # Save the list in case we need to cancel the commands
        self._commandsToExecute = dl
        return dl

    def _parseResults(self, resultList, cacheableDS):
        """
        Interpret the results retrieved from the commands and pass on
        the datapoint values and events.

        @parameter resultList: results of running the commands in a DeferredList
        @type resultList: array of (boolean, datasource)
        @parameter cacheableDS: other datasources that can use the same results
        @type cacheableDS: dictionary of arrays of datasources
        """
        self.state = SshPerformanceCollectionTask.STATE_PARSE_DATA
        parseableResults = []
        for success, datasource in resultList:
            results = ParsedResults()
            if not success:
                # In this case, our datasource is actually a defer.Failure
                reason = datasource
                datasource, = reason.value.args
                msg = "Datasource %s command timed out" % (datasource.name)
                ev = self._makeCmdEvent(datasource, msg, event_key='Timeout')
                results.events.append(ev)

            else:
                # clear our timeout event
                msg = "Datasource %s command timed out" % datasource.name
                ev = self._makeCmdEvent(datasource,
                                        msg,
                                        severity=Clear,
                                        event_key='Timeout')
                # Re-use our results for any similar datasources
                cachedDsList = cacheableDS.get(datasource.command)
                if cachedDsList:
                    for ds in cachedDsList:
                        ds.result = copy(datasource.result)
                        results = ParsedResults()
                        self._processDatasourceResults(ds, results)
                        parseableResults.append((ds, results))
                    results = ParsedResults()
                results.events.append(ev)
                self._processDatasourceResults(datasource, results)

            parseableResults.append((datasource, results))
        return parseableResults

    def _makeParser(self, datasource, eventList):
        """
        Create a parser object to process data

        @parameter datasource: datasource containg information
        @type datasource: Cmd object
        @parameter eventList: list of events
        @type eventList: list of dictionaries
        """
        parser = None
        try:
            parser = datasource.parser.create()
        except Exception:
            msg = "Error loading parser %s" % datasource.parser
            log.exception("%s %s %s", self.name, datasource.name, msg)
            ev = self._makeCmdEvent(datasource, msg)
            ev['message'] = traceback.format_exc()
            eventList.append(ev)
        return parser

    def _processDatasourceResults(self, datasource, results):
        """
        Process a single datasource's results

        @parameter datasource: datasource containg information
        @type datasource: Cmd object
        @parameter results: empty results object
        @type results: ParsedResults object
        """
        showcommand = self._preferences.options.showfullcommand
        result = datasource.result
        if result.exitCode == 0 and not result.output.strip():
            msg = "No data returned for command"
            if showcommand:
                msg += ": %s" % datasource.command
            log.warn("%s %s %s", self.name, datasource.name, msg)
            ev = self._makeCmdEvent(datasource, msg)
            if showcommand:
                ev['command'] = datasource.command
            results.events.append(ev)
            return

        parser = self._makeParser(datasource, results.events)
        if not parser:
            return

        try:
            parser.preprocessResults(datasource, log)
            parser.processResults(datasource, results)
            if not results.events and parser.createDefaultEventUsingExitCode:
                # Add a failsafe event guessing at the error codes
                self._addDefaultEvent(datasource, results)
            if datasource.result.stderr:
                self._addStderrMsg(datasource.result.stderr, results.events)
        except Exception:
            msg = "Error running parser %s" % datasource.parser
            log.exception("%s %s %s", self.name, datasource.name, msg)
            ev = self._makeCmdEvent(datasource, msg)
            ev['message'] = traceback.format_exc()
            ev['output'] = datasource.result.output
            results.events.append(ev)

    def _addDefaultEvent(self, datasource, results):
        """
        If there is no event, send one based on the exit code.
        """
        exitCode = datasource.result.exitCode
        if exitCode == 0:
            msg = ''
            severity = 0
        else:
            msg = 'Datasource: %s - Code: %s - Msg: %s' % (
                datasource.name, exitCode, getExitMessage(exitCode))
            severity = datasource.severity

        ev = self._makeCmdEvent(datasource, msg, severity)
        results.events.append(ev)

    def _addStderrMsg(self, stderrMsg, eventList):
        """
        Add the stderr output to error events.

        @parameter stderrMsg: stderr output from the command
        @type stderrMsg: string
        @parameter eventList: list of events
        @type eventList: list of dictionaries
        """
        for event in eventList:
            if event['severity'] not in ('Clear', 'Info', 'Debug'):
                event['stderr'] = stderrMsg

    def _storeResults(self, resultList):
        """
        Store the values in RRD files

        @parameter resultList: results of running the commands
        @type resultList: array of (datasource, dictionary)
        """
        self.state = SshPerformanceCollectionTask.STATE_STORE_PERF
        for datasource, results in resultList:
            for dp, value in results.values:
                threshData = {
                    'eventKey': datasource.getEventKey(dp),
                    'component': dp.component,
                }
                self._dataService.writeRRD(dp.rrdPath, value, dp.rrdType,
                                           dp.rrdCreateCommand,
                                           datasource.cycleTime, dp.rrdMin,
                                           dp.rrdMax, threshData)

        return resultList

    def _updateStatus(self, resultList):
        """
        Send any accumulated events

        @parameter resultList: results of running the commands
        @type resultList: array of (datasource, dictionary)
        """
        for datasource, results in resultList:
            self._clearEvent(datasource, results.events)
            for ev in results.events:
                self._eventService.sendEvent(ev, device=self._devId)
        return resultList

    def _clearEvent(self, datasource, eventList):
        """
        Ensure that a CLEAR event is sent for any command that
        successfully completes.
        """
        # If the result is a Failure, no exitCode exists
        exitCode = getattr(datasource.result, 'exitCode', -1)
        # Don't send if non-zero exit code or no output returned
        if exitCode is None or exitCode != 0 or not datasource.result.output.strip(
        ):
            return

        clearEvents = [ev for ev in eventList if ev['severity'] == Clear]
        if not clearEvents:
            msg = 'Datasource %s command completed successfully' % (
                datasource.name)
            ev = self._makeCmdEvent(datasource, msg, severity=Clear)
            eventList.append(ev)

    def _makeCmdEvent(self, datasource, msg, severity=None, event_key=None):
        """
        Create an event using the info in the Cmd object.
        """
        severity = datasource.severity if severity is None else severity
        event_key = datasource.eventKey if event_key is None else event_key
        ev = dict(device=self._devId,
                  component=datasource.component,
                  eventClass=datasource.eventClass,
                  eventKey=event_key,
                  severity=severity,
                  summary=msg)
        return ev

    def _finished(self, result):
        """
        Callback activated when the task is complete

        @parameter result: results of the task
        @type result: deferred object
        """
        if not isinstance(result, Failure):
            self._returnToNormalSchedule()

        try:
            self._close()
        except Exception, ex:
            log.warn("Failed to close device %s: error %s" %
                     (self._devId, str(ex)))

        # Return the result so the framework can track success/failure
        return result