class ThresholdNotifier(object):
    """
    Encapsulates the logic necessary to evaluate a datapoint value
    against thresholds and send any events that are generated from
    threshold evaluation. Used by CollectorDaemon and DaemonStats.
    """
    def __init__(self, send_callback, thresholds):
        self._send_callback = send_callback
        if isinstance(thresholds, list):
            self._thresholds = Thresholds()
            self._thresholds.updateList(thresholds)
        elif isinstance(thresholds, Thresholds):
            self._thresholds = thresholds
        else:
            self._thresholds = Thresholds()

    def updateThresholds(self, thresholds):
        self._thresholds.updateList(thresholds)

    @defer.inlineCallbacks
    def notify(self,
               context_uuid,
               context_id,
               metric,
               timestamp,
               value,
               thresh_event_data=None):
        """
        Check the specified value against thresholds and send any generated
        events

        @param context_uuid: context name used to check thresholds
        @param context_id: can be used for event key prefix
        @param metric: name of the metric
        @param timestamp: timestamp for the value
        @param value: the value to check
        @param thresh_event_data: additional data to send with any events
        @return:
        """
        if self._thresholds and value is not None:
            thresh_event_data = thresh_event_data or {}
            if 'eventKey' in thresh_event_data:
                eventKeyPrefix = [thresh_event_data['eventKey']]
            else:
                eventKeyPrefix = [metric]
            for ev in self._thresholds.check(context_uuid, metric, timestamp,
                                             value):
                parts = eventKeyPrefix[:]
                if 'eventKey' in ev:
                    parts.append(ev['eventKey'])
                ev['eventKey'] = '|'.join(parts)
                # add any additional values for this threshold
                # (only update if key is not in event, or if
                # the event's value is blank or None)
                for key, value in thresh_event_data.items():
                    if ev.get(key, None) in ('', None):
                        ev[key] = value
                if ev.get("component", None):
                    ev['component_guid'] = context_uuid
                yield defer.maybeDeferred(self._send_callback, ev)
Esempio n. 2
0
class ThresholdNotifier(object):
    """
    Encapsulates the logic necessary to evaluate a datapoint value
    against thresholds and send any events that are generated from
    threshold evaluation. Used by CollectorDaemon and DaemonStats.
    """

    def __init__(self, send_callback, thresholds):
        self._send_callback = send_callback
        if isinstance(thresholds, list):
            self._thresholds = Thresholds()
            self._thresholds.updateList(thresholds)
        elif isinstance(thresholds, Thresholds):
            self._thresholds = thresholds
        else:
            self._thresholds = Thresholds()

    def updateThresholds(self, thresholds):
        self._thresholds.updateList(thresholds)

    @defer.inlineCallbacks
    def notify(self, context_uuid, context_id, metric, timestamp, value, thresh_event_data=None):
        """
        Check the specified value against thresholds and send any generated
        events

        @param context_uuid: context name used to check thresholds
        @param context_id: can be used for event key prefix
        @param metric: name of the metric
        @param timestamp: timestamp for the value
        @param value: the value to check
        @param thresh_event_data: additional data to send with any events
        @return:
        """
        if self._thresholds and value is not None:
            thresh_event_data = thresh_event_data or {}
            if 'eventKey' in thresh_event_data:
                eventKeyPrefix = [thresh_event_data['eventKey']]
            else:
                eventKeyPrefix = [metric]
            for ev in self._thresholds.check(context_uuid, metric, timestamp, value):
                parts = eventKeyPrefix[:]
                if 'eventKey' in ev:
                    parts.append(ev['eventKey'])
                ev['eventKey'] = '|'.join(parts)
                # add any additional values for this threshold
                # (only update if key is not in event, or if
                # the event's value is blank or None)
                for key, value in thresh_event_data.items():
                    if ev.get(key, None) in ('', None):
                        ev[key] = value
                if ev.get("component", None):
                    ev['component_guid'] = context_uuid
                yield defer.maybeDeferred(self._send_callback, ev)
Esempio n. 3
0
class CollectorDaemon(RRDDaemon):
    """
    The daemon class for the entire ZenCollector framework. This class bridges
    the gap between the older daemon framework and ZenCollector. New collectors
    no longer should extend this class to implement a new collector.
    """
    zope.interface.implements(ICollector,
                              IDataService,
                              IEventService)

    _frameworkFactoryName = ""

    @property
    def preferences(self):
        """
        Preferences for this daemon
        """
        return self._prefs

    def __init__(self, preferences, taskSplitter, 
                 configurationListener=DUMMY_LISTENER,
                 initializationCallback=None,
                 stoppingCallback=None):
        """
        Constructs a new instance of the CollectorDaemon framework. Normally
        only a singleton instance of a CollectorDaemon should exist within a
        process, but this is not enforced.
        
        @param preferences: the collector configuration
        @type preferences: ICollectorPreferences
        @param taskSplitter: the task splitter to use for this collector
        @type taskSplitter: ITaskSplitter
        @param initializationCallback: a callable that will be executed after
                                       connection to the hub but before
                                       retrieving configuration information
        @type initializationCallback: any callable
        @param stoppingCallback: a callable that will be executed first during
                                 the stopping process. Exceptions will be
                                 logged but otherwise ignored.
        @type stoppingCallback: any callable
        """
        # create the configuration first, so we have the collector name
        # available before activating the rest of the Daemon class hierarchy.
        if not ICollectorPreferences.providedBy(preferences):
            raise TypeError("configuration must provide ICollectorPreferences")
        else:
            self._prefs = ObservableProxy(preferences)
            self._prefs.attachAttributeObserver('configCycleInterval', self._rescheduleConfig)

        if not ITaskSplitter.providedBy(taskSplitter):
            raise TypeError("taskSplitter must provide ITaskSplitter")
        else:
            self._taskSplitter = taskSplitter
        
        if not IConfigurationListener.providedBy(configurationListener):
            raise TypeError(
                    "configurationListener must provide IConfigurationListener")
        self._configListener = ConfigListenerNotifier()
        self._configListener.addListener(configurationListener)
        self._configListener.addListener(DeviceGuidListener(self))
        self._initializationCallback = initializationCallback
        self._stoppingCallback = stoppingCallback

        # register the various interfaces we provide the rest of the system so
        # that collector implementors can easily retrieve a reference back here
        # if needed
        zope.component.provideUtility(self, ICollector)
        zope.component.provideUtility(self, IEventService)
        zope.component.provideUtility(self, IDataService)

        # setup daemon statistics
        self._statService = StatisticsService()
        self._statService.addStatistic("devices", "GAUGE")
        self._statService.addStatistic("cyclePoints", "GAUGE")
        self._statService.addStatistic("dataPoints", "DERIVE")
        self._statService.addStatistic("runningTasks", "GAUGE")
        self._statService.addStatistic("queuedTasks", "GAUGE")
        self._statService.addStatistic("missedRuns", "GAUGE")
        zope.component.provideUtility(self._statService, IStatisticsService)

        # register the collector's own preferences object so it may be easily
        # retrieved by factories, tasks, etc.
        zope.component.provideUtility(self.preferences,
                                      ICollectorPreferences,
                                      self.preferences.collectorName)

        super(CollectorDaemon, self).__init__(name=self.preferences.collectorName)

        self._deviceGuids = {}
        self._devices = set()
        self._thresholds = Thresholds()
        self._unresponsiveDevices = set()
        self._rrd = None
        self.reconfigureTimeout = None

        # keep track of pending tasks if we're doing a single run, and not a
        # continuous cycle
        if not self.options.cycle:
            self._completedTasks = 0
            self._pendingTasks = []

        frameworkFactory = zope.component.queryUtility(IFrameworkFactory, self._frameworkFactoryName)
        self._configProxy = frameworkFactory.getConfigurationProxy()
        self._scheduler = frameworkFactory.getScheduler()
        self._scheduler.maxTasks = self.options.maxTasks
        self._ConfigurationLoaderTask = frameworkFactory.getConfigurationLoaderTask()

        # OLD - set the initialServices attribute so that the PBDaemon class
        # will load all of the remote services we need.
        self.initialServices = PBDaemon.initialServices +\
            [self.preferences.configurationService]

        # trap SIGUSR2 so that we can display detailed statistics
        signal.signal(signal.SIGUSR2, self._signalHandler)

        # let the configuration do any additional startup it might need
        self.preferences.postStartup()
        self.addedPostStartupTasks = False

    def buildOptions(self):
        """
        Method called by CmdBase.__init__ to build all of the possible 
        command-line options for this collector daemon.
        """
        super(CollectorDaemon, self).buildOptions()

        maxTasks = getattr(self.preferences, 'maxTasks', None)
        defaultMax = maxTasks if maxTasks else 500
        
        self.parser.add_option('--maxparallel',
                                dest='maxTasks',
                                type='int',
                                default=defaultMax,
                                help='Max number of tasks to run at once, default %default')
        self.parser.add_option('--logTaskStats',
                               dest='logTaskStats',
                               type='int',
                               default=0,
                               help='How often to logs statistics of current tasks, value in seconds; very verbose')
        self.parser.add_option('--redis-url', default='redis://localhost:16379/0',
            help='redis connection string: redis://[hostname]:[port]/[db], default: %default')

        frameworkFactory = zope.component.queryUtility(IFrameworkFactory, self._frameworkFactoryName)
        if hasattr(frameworkFactory, 'getFrameworkBuildOptions'):
            # During upgrades we'll be missing this option
            self._frameworkBuildOptions = frameworkFactory.getFrameworkBuildOptions()
            if self._frameworkBuildOptions:
                self._frameworkBuildOptions(self.parser)

        # give the collector configuration a chance to add options, too
        self.preferences.buildOptions(self.parser)

    def parseOptions(self):
        super(CollectorDaemon, self).parseOptions()
        self.preferences.options = self.options

    def connected(self):
        """
        Method called by PBDaemon after a connection to ZenHub is established.
        """
        return self._startup()

    def _getInitializationCallback(self):
        def doNothing():
            pass

        if self._initializationCallback is not None:
            return self._initializationCallback
        else:
            return doNothing

    def connectTimeout(self):
        super(CollectorDaemon, self).connectTimeout()
        return self._startup()

    def _startup(self):
        d = defer.maybeDeferred( self._getInitializationCallback() )
        d.addCallback( self._startConfigCycle )
        d.addCallback( self._startMaintenance )
        d.addErrback( self._errorStop )
        return d

    def watchdogCycleTime(self):
        """
        Return our cycle time (in minutes)

        @return: cycle time
        @rtype: integer
        """
        return self.preferences.cycleInterval * 2

    def getRemoteConfigServiceProxy(self):
        """
        Called to retrieve the remote configuration service proxy object.
        """
        return self.services.get(self.preferences.configurationService,
                                 FakeRemote())

    def generateEvent(self, event, **kw):
        eventCopy = super(CollectorDaemon, self).generateEvent(event, **kw)
        if eventCopy.get("device"):
            device_id = eventCopy.get("device")
            guid = self._deviceGuids.get(device_id)
            if guid:
                eventCopy['device_guid'] = guid
        return eventCopy

    def writeRRD(self, path, value, rrdType, rrdCommand=None, cycleTime=None,
                 min='U', max='U', threshEventData={}, timestamp='N', allowStaleDatapoint=True):
        now = time.time()

        hasThresholds = bool(self._thresholds.byFilename.get(path))
        if hasThresholds:
            rrd_write_fn = self._rrd.save
        else:
            rrd_write_fn = self._rrd.put            
        
        # save the raw data directly to the RRD files
        value = rrd_write_fn(
            path,
            value,
            rrdType,
            rrdCommand,
            cycleTime,
            min,
            max,
            timestamp=timestamp,
            allowStaleDatapoint=allowStaleDatapoint,
        )

        # check for threshold breaches and send events when needed
        if hasThresholds:
            if 'eventKey' in threshEventData:
                eventKeyPrefix = [threshEventData['eventKey']]
            else:
                eventKeyPrefix = [path.rsplit('/')[-1]]

            for ev in self._thresholds.check(path, now, value):
                parts = eventKeyPrefix[:]
                if 'eventKey' in ev:
                    parts.append(ev['eventKey'])
                ev['eventKey'] = '|'.join(parts)

                # add any additional values for this threshold
                # (only update if key is not in event, or if
                # the event's value is blank or None)
                for key,value in threshEventData.items():
                    if ev.get(key, None) in ('',None):
                        ev[key] = value

                self.sendEvent(ev)

    def readRRD(self, path, consolidationFunction, start, end):
        return RRDUtil.read(path, consolidationFunction, start, end)

    def stop(self, ignored=""):
        if self._stoppingCallback is not None:
            try:
                self._stoppingCallback()
            except Exception:
                self.log.exception('Exception while stopping daemon')
        super(CollectorDaemon, self).stop( ignored )

    def remote_deleteDevice(self, devId):
        """
        Called remotely by ZenHub when a device we're monitoring is deleted.
        """
        self._deleteDevice(devId)

    def remote_deleteDevices(self, deviceIds):
        """
        Called remotely by ZenHub when devices we're monitoring are deleted.
        """
        for devId in Zipper.load(deviceIds):
            self._deleteDevice(devId)

    def remote_updateDeviceConfig(self, config):
        """
        Called remotely by ZenHub when asynchronous configuration updates occur.
        """
        self.log.debug("Device %s updated", config.configId)
        if not self.options.device or self.options.device in (config.id, config.configId):
            self._updateConfig(config)
            self._configProxy.updateConfigProxy(self.preferences, config)

    def remote_updateDeviceConfigs(self, configs):
        """
        Called remotely by ZenHub when asynchronous configuration updates occur.
        """
        for config in Zipper.load(configs):
            self.remote_updateDeviceConfig(config)
            
    def remote_notifyConfigChanged(self):
        """
        Called from zenhub to notify that the entire config should be updated  
        """
        if self.reconfigureTimeout and self.reconfigureTimeout.active():
            # We will run along with the already scheduled task
            self.log.debug("notifyConfigChanged - using existing call")
            return

        self.log.debug("notifyConfigChanged - scheduling call in 30 seconds")
        self.reconfigureTimeout = reactor.callLater(30, self._rebuildConfig)

    def _rebuildConfig(self):
        """
        Delete and re-add the configuration tasks to completely re-build the configuration.
        """
        if self.reconfigureTimeout and not self.reconfigureTimeout.active():
            self.reconfigureTimeout = None
        self._scheduler.removeTasksForConfig(CONFIG_LOADER_NAME)
        self._startConfigCycle()

    def _rescheduleConfig(self, observable, attrName, oldValue, newValue, **kwargs):
        """
        Delete and re-add the configuration tasks to start on new interval.
        """
        if oldValue != newValue:
            self.log.debug("Changing config task interval from %s to %s minutes" % (oldValue, newValue))
            self._scheduler.removeTasksForConfig(CONFIG_LOADER_NAME)
            #values are in minutes, scheduler takes seconds
            self._startConfigCycle(startDelay=newValue*60)


    def _taskCompleteCallback(self, taskName):
        # if we're not running a normal daemon cycle then we need to shutdown
        # once all of our pending tasks have completed
        if not self.options.cycle:
            try:
                self._pendingTasks.remove(taskName)
            except ValueError:
                pass

            self._completedTasks += 1

            # if all pending tasks have been completed then shutdown the daemon
            if len(self._pendingTasks) == 0:
                self._displayStatistics()
                self.stop()

    def _updateConfig(self, cfg):
        configId = cfg.configId
        self.log.debug("Processing configuration for %s", configId)

        if configId in self._devices:
            self._scheduler.removeTasksForConfig(configId)
            self._configListener.updated(cfg)
        else:
            self._devices.add(configId)
            self._configListener.added(cfg)

        newTasks = self._taskSplitter.splitConfiguration([cfg])
        self.log.debug("Tasks for config %s: %s", configId, newTasks)

        for (taskName, task_) in newTasks.iteritems():
            #if not cycling run the task immediately otherwise let the scheduler
            #decide when to run the task
            now = not self.options.cycle
            self._scheduler.addTask(task_, self._taskCompleteCallback, now)

            # TODO: another hack?
            if hasattr(cfg, 'thresholds'):
                self._thresholds.updateForDevice(configId, cfg.thresholds)

            # if we're not running a normal daemon cycle then keep track of the
            # tasks we just added for this device so that we can shutdown once
            # all pending tasks have completed
            if not self.options.cycle:
                self._pendingTasks.append(taskName)

    @defer.inlineCallbacks
    def _updateDeviceConfigs(self, updatedConfigs, purgeOmitted):
        """
        Update the device configurations for the devices managed by this
        collector.
        @param deviceConfigs a list of device configurations
        @type deviceConfigs list of name,value tuples
        """
        self.log.debug("updateDeviceConfigs: updatedConfigs=%s", (map(str, updatedConfigs)))

        for cfg in updatedConfigs:
            self._updateConfig(cfg)
            # yield time to reactor so other things can happen
            yield task.deferLater(reactor, 0, lambda: None)

        if purgeOmitted:
            self._purgeOmittedDevices(cfg.configId for cfg in updatedConfigs)

    def _purgeOmittedDevices(self, updatedDevices):
        """
        Delete all current devices that are omitted from the list of devices being updated.
        @param updatedDevices a collection of device ids
        @type updatedDevices a sequence of strings
        """
        # remove tasks for the deleted devices
        deletedDevices = set(self._devices) - set(updatedDevices)
        self.log.debug("purgeOmittedDevices: deletedConfigs=%s", ','.join(deletedDevices))
        for configId in deletedDevices:
            self._deleteDevice(configId)
            
    def _deleteDevice(self, deviceId):
        self.log.debug("Device %s deleted" % deviceId)

        self._devices.discard(deviceId)
        self._configListener.deleted(deviceId)
        self._configProxy.deleteConfigProxy(self.preferences, deviceId)
        self._scheduler.removeTasksForConfig(deviceId)

    def _errorStop(self, result):
        """
        Twisted callback to receive fatal messages.
        
        @param result: the Twisted failure
        @type result: failure object
        """
        if isinstance(result, Failure):
            msg = result.getErrorMessage()
        else:
            msg = str(result)
        self.log.critical("Unrecoverable Error: %s", msg)
        self.stop()

    def _startConfigCycle(self, result=None, startDelay=0):
        configLoader = self._ConfigurationLoaderTask(CONFIG_LOADER_NAME,
                                               taskConfig=self.preferences)
        configLoader.startDelay = startDelay
        # Don't add the config loader task if the scheduler already has
        # an instance of it.
        if configLoader not in self._scheduler:
            # Run initial maintenance cycle as soon as possible
            # TODO: should we not run maintenance if running in non-cycle mode?
            self._scheduler.addTask(configLoader)
        else:
            self.log.info("%s already added to scheduler", configLoader.name)
        return defer.succeed("Configuration loader task started")


    def setPropertyItems(self, items):
        """
        Override so that preferences are updated
        """
        super(CollectorDaemon, self).setPropertyItems(items)
        self._setCollectorPreferences(dict(items))


    def _setCollectorPreferences(self, preferenceItems):
        for name, value in preferenceItems.iteritems():
            if not hasattr(self.preferences, name):
                # TODO: make a super-low level debug mode?  The following message isn't helpful
                #self.log.debug("Preferences object does not have attribute %s",
                #               name)
                setattr(self.preferences, name, value)
            elif getattr(self.preferences, name) != value:
                self.log.debug("Updated %s preference to %s", name, value)
                setattr(self.preferences, name, value)

    def _loadThresholdClasses(self, thresholdClasses):
        self.log.debug("Loading classes %s", thresholdClasses)
        for c in thresholdClasses:
            try:
                importClass(c)
            except ImportError:
                log.exception("Unable to import class %s", c)

    def _configureRRD(self, rrdCreateCommand, thresholds):
        self._rrd = RRDUtil.RRDUtil(rrdCreateCommand, self.preferences.cycleInterval)
        self.rrdStats.config(self.options.monitor,
                             self.name,
                             thresholds,
                             rrdCreateCommand)

    def _isRRDConfigured(self):
        return (self.rrdStats and self._rrd)

    def _startMaintenance(self, ignored=None):
        unused(ignored)
        if not self.options.cycle:
            self._maintenanceCycle()
            return
        if self.options.logTaskStats > 0:
            log.debug("Starting Task Stat logging")
            loop = task.LoopingCall(self._displayStatistics, verbose=True)
            loop.start(self.options.logTaskStats, now=False)
        interval = self.preferences.cycleInterval
        self.log.debug("Initializing maintenance Cycle")
        maintenanceCycle = MaintenanceCycle(interval, self, self._maintenanceCycle)
        maintenanceCycle.start()

    def _maintenanceCycle(self, ignored=None):
        """
        Perform daemon maintenance processing on a periodic schedule. Initially
        called after the daemon configuration loader task is added, but afterward
        will self-schedule each run.
        """
        self.log.debug("Performing periodic maintenance")

        def _processDeviceIssues(result):
            self.log.debug("deviceIssues=%r", result)
            if result is None:
                return result  # exception or some other problem

            # Device ping issues returns as a tuple of (deviceId, count, total)
            # and we just want the device id
            newUnresponsiveDevices = set(i[0] for i in result)

            clearedDevices = self._unresponsiveDevices.difference(newUnresponsiveDevices)
            for devId in clearedDevices:
                self.log.debug("Resuming tasks for device %s", devId)
                self._scheduler.resumeTasksForConfig(devId)

            self._unresponsiveDevices = newUnresponsiveDevices
            for devId in self._unresponsiveDevices:
                self.log.debug("Pausing tasks for device %s", devId)
                self._scheduler.pauseTasksForConfig(devId)

            return result

        def _getDeviceIssues(result):
            # TODO: handle different types of device issues, such as WMI issues
            d = self.getDevicePingIssues()
            return d

        def _postStatistics():
            self._displayStatistics()

            # update and post statistics if we've been configured to do so
            if self._isRRDConfigured():
                stat = self._statService.getStatistic("devices")
                stat.value = len(self._devices)

                stat = self._statService.getStatistic("cyclePoints")
                stat.value = self._rrd.endCycle()

                stat = self._statService.getStatistic("dataPoints")
                stat.value = self._rrd.dataPoints

                # Scheduler statistics
                stat = self._statService.getStatistic("runningTasks")
                stat.value = self._scheduler._executor.running

                stat = self._statService.getStatistic("queuedTasks")
                stat.value = self._scheduler._executor.queued

                stat = self._statService.getStatistic("missedRuns")
                stat.value = self._scheduler.missedRuns

                events = self._statService.postStatistics(self.rrdStats,
                                                          self.preferences.cycleInterval)
                self.sendEvents(events)

        def _maintenance():
            if self.options.cycle:
                d = defer.maybeDeferred(_postStatistics)
                if getattr(self.preferences, 'pauseUnreachableDevices', True):
                    d.addCallback(_getDeviceIssues)
                    d.addCallback(_processDeviceIssues)

            else:
                d = defer.succeed("No maintenance required")
            return d

        d = _maintenance()
        return d

    def runPostConfigTasks(self, result=None):
        """
        Add post-startup tasks from the preferences.

        This may be called with the failure code as well.
        """
        if isinstance(result, Failure):
            pass

        elif not self.addedPostStartupTasks:
            postStartupTasks = getattr(self.preferences, 'postStartupTasks',
                                       lambda : [])
            for task in postStartupTasks():
                self._scheduler.addTask(task, now=True)
            self.addedPostStartupTasks = True

    def _displayStatistics(self, verbose=False):
        if self._rrd:
            self.log.info("%d devices processed (%d datapoints)",
                          len(self._devices), self._rrd.dataPoints)
        else:
            self.log.info("%d devices processed (0 datapoints)",
                          len(self._devices))

        self._scheduler.displayStatistics(verbose)

    def _signalHandler(self, signum, frame):
        self._displayStatistics(True)
Esempio n. 4
0
class CollectorDaemon(RRDDaemon):
    """
    The daemon class for the entire ZenCollector framework. This class bridges
    the gap between the older daemon framework and ZenCollector. New collectors
    no longer should extend this class to implement a new collector.
    """
    zope.interface.implements(ICollector, IDataService, IEventService)

    _frameworkFactoryName = ""

    @property
    def preferences(self):
        """
        Preferences for this daemon
        """
        return self._prefs

    def __init__(self,
                 preferences,
                 taskSplitter,
                 configurationListener=DUMMY_LISTENER,
                 initializationCallback=None,
                 stoppingCallback=None):
        """
        Constructs a new instance of the CollectorDaemon framework. Normally
        only a singleton instance of a CollectorDaemon should exist within a
        process, but this is not enforced.
        
        @param preferences: the collector configuration
        @type preferences: ICollectorPreferences
        @param taskSplitter: the task splitter to use for this collector
        @type taskSplitter: ITaskSplitter
        @param initializationCallback: a callable that will be executed after
                                       connection to the hub but before
                                       retrieving configuration information
        @type initializationCallback: any callable
        @param stoppingCallback: a callable that will be executed first during
                                 the stopping process. Exceptions will be
                                 logged but otherwise ignored.
        @type stoppingCallback: any callable
        """
        # create the configuration first, so we have the collector name
        # available before activating the rest of the Daemon class hierarchy.
        if not ICollectorPreferences.providedBy(preferences):
            raise TypeError("configuration must provide ICollectorPreferences")
        else:
            self._prefs = ObservableProxy(preferences)
            self._prefs.attachAttributeObserver('configCycleInterval',
                                                self._rescheduleConfig)

        if not ITaskSplitter.providedBy(taskSplitter):
            raise TypeError("taskSplitter must provide ITaskSplitter")
        else:
            self._taskSplitter = taskSplitter

        if not IConfigurationListener.providedBy(configurationListener):
            raise TypeError(
                "configurationListener must provide IConfigurationListener")
        self._configListener = ConfigListenerNotifier()
        self._configListener.addListener(configurationListener)
        self._configListener.addListener(DeviceGuidListener(self))
        self._initializationCallback = initializationCallback
        self._stoppingCallback = stoppingCallback

        # register the various interfaces we provide the rest of the system so
        # that collector implementors can easily retrieve a reference back here
        # if needed
        zope.component.provideUtility(self, ICollector)
        zope.component.provideUtility(self, IEventService)
        zope.component.provideUtility(self, IDataService)

        # setup daemon statistics
        self._statService = StatisticsService()
        self._statService.addStatistic("devices", "GAUGE")
        self._statService.addStatistic("cyclePoints", "GAUGE")
        self._statService.addStatistic("dataPoints", "DERIVE")
        self._statService.addStatistic("runningTasks", "GAUGE")
        self._statService.addStatistic("queuedTasks", "GAUGE")
        self._statService.addStatistic("missedRuns", "GAUGE")
        zope.component.provideUtility(self._statService, IStatisticsService)

        # register the collector's own preferences object so it may be easily
        # retrieved by factories, tasks, etc.
        zope.component.provideUtility(self.preferences, ICollectorPreferences,
                                      self.preferences.collectorName)

        super(CollectorDaemon,
              self).__init__(name=self.preferences.collectorName)

        self._deviceGuids = {}
        self._devices = set()
        self._thresholds = Thresholds()
        self._unresponsiveDevices = set()
        self._rrd = None
        self.reconfigureTimeout = None

        # keep track of pending tasks if we're doing a single run, and not a
        # continuous cycle
        if not self.options.cycle:
            self._completedTasks = 0
            self._pendingTasks = []

        frameworkFactory = zope.component.queryUtility(
            IFrameworkFactory, self._frameworkFactoryName)
        self._configProxy = frameworkFactory.getConfigurationProxy()
        self._scheduler = frameworkFactory.getScheduler()
        self._scheduler.maxTasks = self.options.maxTasks
        self._ConfigurationLoaderTask = frameworkFactory.getConfigurationLoaderTask(
        )

        # OLD - set the initialServices attribute so that the PBDaemon class
        # will load all of the remote services we need.
        self.initialServices = PBDaemon.initialServices +\
            [self.preferences.configurationService]

        # trap SIGUSR2 so that we can display detailed statistics
        signal.signal(signal.SIGUSR2, self._signalHandler)

        # let the configuration do any additional startup it might need
        self.preferences.postStartup()
        self.addedPostStartupTasks = False

    def buildOptions(self):
        """
        Method called by CmdBase.__init__ to build all of the possible 
        command-line options for this collector daemon.
        """
        super(CollectorDaemon, self).buildOptions()

        maxTasks = getattr(self.preferences, 'maxTasks', None)
        defaultMax = maxTasks if maxTasks else 500

        self.parser.add_option(
            '--maxparallel',
            dest='maxTasks',
            type='int',
            default=defaultMax,
            help='Max number of tasks to run at once, default %default')
        self.parser.add_option(
            '--logTaskStats',
            dest='logTaskStats',
            type='int',
            default=0,
            help=
            'How often to logs statistics of current tasks, value in seconds; very verbose'
        )
        self.parser.add_option(
            '--redis-url',
            default='redis://localhost:16379/0',
            help=
            'redis connection string: redis://[hostname]:[port]/[db], default: %default'
        )

        frameworkFactory = zope.component.queryUtility(
            IFrameworkFactory, self._frameworkFactoryName)
        if hasattr(frameworkFactory, 'getFrameworkBuildOptions'):
            # During upgrades we'll be missing this option
            self._frameworkBuildOptions = frameworkFactory.getFrameworkBuildOptions(
            )
            if self._frameworkBuildOptions:
                self._frameworkBuildOptions(self.parser)

        # give the collector configuration a chance to add options, too
        self.preferences.buildOptions(self.parser)

    def parseOptions(self):
        super(CollectorDaemon, self).parseOptions()
        self.preferences.options = self.options

    def connected(self):
        """
        Method called by PBDaemon after a connection to ZenHub is established.
        """
        return self._startup()

    def _getInitializationCallback(self):
        def doNothing():
            pass

        if self._initializationCallback is not None:
            return self._initializationCallback
        else:
            return doNothing

    def connectTimeout(self):
        super(CollectorDaemon, self).connectTimeout()
        return self._startup()

    def _startup(self):
        d = defer.maybeDeferred(self._getInitializationCallback())
        d.addCallback(self._startConfigCycle)
        d.addCallback(self._startMaintenance)
        d.addErrback(self._errorStop)
        return d

    def watchdogCycleTime(self):
        """
        Return our cycle time (in minutes)

        @return: cycle time
        @rtype: integer
        """
        return self.preferences.cycleInterval * 2

    def getRemoteConfigServiceProxy(self):
        """
        Called to retrieve the remote configuration service proxy object.
        """
        return self.services.get(self.preferences.configurationService,
                                 FakeRemote())

    def generateEvent(self, event, **kw):
        eventCopy = super(CollectorDaemon, self).generateEvent(event, **kw)
        if eventCopy.get("device"):
            device_id = eventCopy.get("device")
            guid = self._deviceGuids.get(device_id)
            if guid:
                eventCopy['device_guid'] = guid
        return eventCopy

    def writeRRD(self,
                 path,
                 value,
                 rrdType,
                 rrdCommand=None,
                 cycleTime=None,
                 min='U',
                 max='U',
                 threshEventData={},
                 timestamp='N',
                 allowStaleDatapoint=True):
        now = time.time()

        hasThresholds = bool(self._thresholds.byFilename.get(path))
        if hasThresholds:
            rrd_write_fn = self._rrd.save
        else:
            rrd_write_fn = self._rrd.put

        # save the raw data directly to the RRD files
        value = rrd_write_fn(
            path,
            value,
            rrdType,
            rrdCommand,
            cycleTime,
            min,
            max,
            timestamp=timestamp,
            allowStaleDatapoint=allowStaleDatapoint,
        )

        # check for threshold breaches and send events when needed
        if hasThresholds:
            if 'eventKey' in threshEventData:
                eventKeyPrefix = [threshEventData['eventKey']]
            else:
                eventKeyPrefix = [path.rsplit('/')[-1]]

            for ev in self._thresholds.check(path, now, value):
                parts = eventKeyPrefix[:]
                if 'eventKey' in ev:
                    parts.append(ev['eventKey'])
                ev['eventKey'] = '|'.join(parts)

                # add any additional values for this threshold
                # (only update if key is not in event, or if
                # the event's value is blank or None)
                for key, value in threshEventData.items():
                    if ev.get(key, None) in ('', None):
                        ev[key] = value

                self.sendEvent(ev)

    def readRRD(self, path, consolidationFunction, start, end):
        return RRDUtil.read(path, consolidationFunction, start, end)

    def stop(self, ignored=""):
        if self._stoppingCallback is not None:
            try:
                self._stoppingCallback()
            except Exception:
                self.log.exception('Exception while stopping daemon')
        super(CollectorDaemon, self).stop(ignored)

    def remote_deleteDevice(self, devId):
        """
        Called remotely by ZenHub when a device we're monitoring is deleted.
        """
        # guard against parsing updates during a disconnect
        if devId is None:
            return
        self._deleteDevice(devId)

    def remote_deleteDevices(self, deviceIds):
        """
        Called remotely by ZenHub when devices we're monitoring are deleted.
        """
        # guard against parsing updates during a disconnect
        if deviceIds is None:
            return
        for devId in Zipper.load(deviceIds):
            self._deleteDevice(devId)

    def remote_updateDeviceConfig(self, config):
        """
        Called remotely by ZenHub when asynchronous configuration updates occur.
        """
        # guard against parsing updates during a disconnect
        if config is None:
            return
        self.log.debug("Device %s updated", config.configId)
        if not self.options.device or self.options.device in (config.id,
                                                              config.configId):
            self._updateConfig(config)
            self._configProxy.updateConfigProxy(self.preferences, config)

    def remote_updateDeviceConfigs(self, configs):
        """
        Called remotely by ZenHub when asynchronous configuration updates occur.
        """
        if configs is None:
            return
        for config in Zipper.load(configs):
            self.remote_updateDeviceConfig(config)

    def remote_notifyConfigChanged(self):
        """
        Called from zenhub to notify that the entire config should be updated  
        """
        if self.reconfigureTimeout and self.reconfigureTimeout.active():
            # We will run along with the already scheduled task
            self.log.debug("notifyConfigChanged - using existing call")
            return

        self.log.debug("notifyConfigChanged - scheduling call in 30 seconds")
        self.reconfigureTimeout = reactor.callLater(30, self._rebuildConfig)

    def _rebuildConfig(self):
        """
        Delete and re-add the configuration tasks to completely re-build the configuration.
        """
        if self.reconfigureTimeout and not self.reconfigureTimeout.active():
            self.reconfigureTimeout = None
        self._scheduler.removeTasksForConfig(CONFIG_LOADER_NAME)
        self._startConfigCycle()

    def _rescheduleConfig(self, observable, attrName, oldValue, newValue,
                          **kwargs):
        """
        Delete and re-add the configuration tasks to start on new interval.
        """
        if oldValue != newValue:
            self.log.debug(
                "Changing config task interval from %s to %s minutes" %
                (oldValue, newValue))
            self._scheduler.removeTasksForConfig(CONFIG_LOADER_NAME)
            #values are in minutes, scheduler takes seconds
            self._startConfigCycle(startDelay=newValue * 60)

    def _taskCompleteCallback(self, taskName):
        # if we're not running a normal daemon cycle then we need to shutdown
        # once all of our pending tasks have completed
        if not self.options.cycle:
            try:
                self._pendingTasks.remove(taskName)
            except ValueError:
                pass

            self._completedTasks += 1

            # if all pending tasks have been completed then shutdown the daemon
            if len(self._pendingTasks) == 0:
                self._displayStatistics()
                self.stop()

    def _updateConfig(self, cfg):
        configId = cfg.configId
        self.log.debug("Processing configuration for %s", configId)

        nextExpectedRuns = {}
        if configId in self._devices:
            tasksToRemove = self._scheduler.getTasksForConfig(configId)
            nextExpectedRuns = {
                taskToRemove.name:
                self._scheduler.getNextExpectedRun(taskToRemove.name)
                for taskToRemove in tasksToRemove
            }
            self._scheduler.removeTasks(task.name for task in tasksToRemove)
            self._configListener.updated(cfg)
        else:
            self._devices.add(configId)
            self._configListener.added(cfg)

        newTasks = self._taskSplitter.splitConfiguration([cfg])
        self.log.debug("Tasks for config %s: %s", configId, newTasks)

        nowTime = time.time()
        for (taskName, task_) in newTasks.iteritems():
            #if not cycling run the task immediately otherwise let the scheduler
            #decide when to run the task
            now = not self.options.cycle
            nextExpectedRun = nextExpectedRuns.get(taskName, None)
            if nextExpectedRun:
                startDelay = nextExpectedRun - nowTime
                if startDelay <= 0:
                    # handle edge case where we are about to run
                    # so run immediately
                    now = True
                    task_.startDelay = 0
                else:
                    task_.startDelay = startDelay
            self._scheduler.addTask(task_, self._taskCompleteCallback, now)

            # TODO: another hack?
            if hasattr(cfg, 'thresholds'):
                self._thresholds.updateForDevice(configId, cfg.thresholds)

            # if we're not running a normal daemon cycle then keep track of the
            # tasks we just added for this device so that we can shutdown once
            # all pending tasks have completed
            if not self.options.cycle:
                self._pendingTasks.append(taskName)

    @defer.inlineCallbacks
    def _updateDeviceConfigs(self, updatedConfigs, purgeOmitted):
        """
        Update the device configurations for the devices managed by this
        collector.
        @param deviceConfigs a list of device configurations
        @type deviceConfigs list of name,value tuples
        """
        self.log.debug("updateDeviceConfigs: updatedConfigs=%s",
                       (map(str, updatedConfigs)))

        for cfg in updatedConfigs:
            self._updateConfig(cfg)
            # yield time to reactor so other things can happen
            yield task.deferLater(reactor, 0, lambda: None)

        if purgeOmitted:
            self._purgeOmittedDevices(cfg.configId for cfg in updatedConfigs)

    def _purgeOmittedDevices(self, updatedDevices):
        """
        Delete all current devices that are omitted from the list of devices being updated.
        @param updatedDevices a collection of device ids
        @type updatedDevices a sequence of strings
        """
        # remove tasks for the deleted devices
        deletedDevices = set(self._devices) - set(updatedDevices)
        self.log.debug("purgeOmittedDevices: deletedConfigs=%s",
                       ','.join(deletedDevices))
        for configId in deletedDevices:
            self._deleteDevice(configId)

    def _deleteDevice(self, deviceId):
        self.log.debug("Device %s deleted" % deviceId)

        self._devices.discard(deviceId)
        self._configListener.deleted(deviceId)
        self._configProxy.deleteConfigProxy(self.preferences, deviceId)
        self._scheduler.removeTasksForConfig(deviceId)

    def _errorStop(self, result):
        """
        Twisted callback to receive fatal messages.
        
        @param result: the Twisted failure
        @type result: failure object
        """
        if isinstance(result, Failure):
            msg = result.getErrorMessage()
        else:
            msg = str(result)
        self.log.critical("Unrecoverable Error: %s", msg)
        self.stop()

    def _startConfigCycle(self, result=None, startDelay=0):
        configLoader = self._ConfigurationLoaderTask(
            CONFIG_LOADER_NAME, taskConfig=self.preferences)
        configLoader.startDelay = startDelay
        # Don't add the config loader task if the scheduler already has
        # an instance of it.
        if configLoader not in self._scheduler:
            # Run initial maintenance cycle as soon as possible
            # TODO: should we not run maintenance if running in non-cycle mode?
            self._scheduler.addTask(configLoader)
        else:
            self.log.info("%s already added to scheduler", configLoader.name)
        return defer.succeed("Configuration loader task started")

    def setPropertyItems(self, items):
        """
        Override so that preferences are updated
        """
        super(CollectorDaemon, self).setPropertyItems(items)
        self._setCollectorPreferences(dict(items))

    def _setCollectorPreferences(self, preferenceItems):
        for name, value in preferenceItems.iteritems():
            if not hasattr(self.preferences, name):
                # TODO: make a super-low level debug mode?  The following message isn't helpful
                #self.log.debug("Preferences object does not have attribute %s",
                #               name)
                setattr(self.preferences, name, value)
            elif getattr(self.preferences, name) != value:
                self.log.debug("Updated %s preference to %s", name, value)
                setattr(self.preferences, name, value)

    def _loadThresholdClasses(self, thresholdClasses):
        self.log.debug("Loading classes %s", thresholdClasses)
        for c in thresholdClasses:
            try:
                importClass(c)
            except ImportError:
                log.exception("Unable to import class %s", c)

    def _configureRRD(self, rrdCreateCommand, thresholds):
        self._rrd = RRDUtil.RRDUtil(rrdCreateCommand,
                                    self.preferences.cycleInterval)
        self.rrdStats.config(self.options.monitor, self.name, thresholds,
                             rrdCreateCommand)

    def _isRRDConfigured(self):
        return (self.rrdStats and self._rrd)

    def _startMaintenance(self, ignored=None):
        unused(ignored)
        if not self.options.cycle:
            self._maintenanceCycle()
            return
        if self.options.logTaskStats > 0:
            log.debug("Starting Task Stat logging")
            loop = task.LoopingCall(self._displayStatistics, verbose=True)
            loop.start(self.options.logTaskStats, now=False)
        interval = self.preferences.cycleInterval
        self.log.debug("Initializing maintenance Cycle")
        maintenanceCycle = MaintenanceCycle(interval, self,
                                            self._maintenanceCycle)
        maintenanceCycle.start()

    def _maintenanceCycle(self, ignored=None):
        """
        Perform daemon maintenance processing on a periodic schedule. Initially
        called after the daemon configuration loader task is added, but afterward
        will self-schedule each run.
        """
        self.log.debug("Performing periodic maintenance")

        def _processDeviceIssues(result):
            self.log.debug("deviceIssues=%r", result)
            if result is None:
                return result  # exception or some other problem

            # Device ping issues returns as a tuple of (deviceId, count, total)
            # and we just want the device id
            newUnresponsiveDevices = set(i[0] for i in result)

            clearedDevices = self._unresponsiveDevices.difference(
                newUnresponsiveDevices)
            for devId in clearedDevices:
                self.log.debug("Resuming tasks for device %s", devId)
                self._scheduler.resumeTasksForConfig(devId)

            self._unresponsiveDevices = newUnresponsiveDevices
            for devId in self._unresponsiveDevices:
                self.log.debug("Pausing tasks for device %s", devId)
                self._scheduler.pauseTasksForConfig(devId)

            return result

        def _getDeviceIssues(result):
            # TODO: handle different types of device issues, such as WMI issues
            d = self.getDevicePingIssues()
            return d

        def _postStatistics():
            self._displayStatistics()

            # update and post statistics if we've been configured to do so
            if self._isRRDConfigured():
                stat = self._statService.getStatistic("devices")
                stat.value = len(self._devices)

                stat = self._statService.getStatistic("cyclePoints")
                stat.value = self._rrd.endCycle()

                stat = self._statService.getStatistic("dataPoints")
                stat.value = self._rrd.dataPoints

                # Scheduler statistics
                stat = self._statService.getStatistic("runningTasks")
                stat.value = self._scheduler._executor.running

                stat = self._statService.getStatistic("queuedTasks")
                stat.value = self._scheduler._executor.queued

                stat = self._statService.getStatistic("missedRuns")
                stat.value = self._scheduler.missedRuns

                events = self._statService.postStatistics(
                    self.rrdStats, self.preferences.cycleInterval)
                self.sendEvents(events)

        def _maintenance():
            if self.options.cycle:
                d = defer.maybeDeferred(_postStatistics)
                if getattr(self.preferences, 'pauseUnreachableDevices', True):
                    d.addCallback(_getDeviceIssues)
                    d.addCallback(_processDeviceIssues)

            else:
                d = defer.succeed("No maintenance required")
            return d

        d = _maintenance()
        return d

    def runPostConfigTasks(self, result=None):
        """
        Add post-startup tasks from the preferences.

        This may be called with the failure code as well.
        """
        if isinstance(result, Failure):
            pass

        elif not self.addedPostStartupTasks:
            postStartupTasks = getattr(self.preferences, 'postStartupTasks',
                                       lambda: [])
            for task in postStartupTasks():
                self._scheduler.addTask(task, now=True)
            self.addedPostStartupTasks = True

    def _displayStatistics(self, verbose=False):
        if self._rrd:
            self.log.info("%d devices processed (%d datapoints)",
                          len(self._devices), self._rrd.dataPoints)
        else:
            self.log.info("%d devices processed (0 datapoints)",
                          len(self._devices))

        self._scheduler.displayStatistics(verbose)

    def _signalHandler(self, signum, frame):
        self._displayStatistics(True)
class DaemonStats(object):
    "Utility for a daemon to write out internal performance statistics"

    def __init__(self):
        self.name = ""
        self.monitor = ""
        self.rrdCreateCommand = ""

        self.thresholds = Thresholds()

    def config(self, name, monitor, thresholds, rrdCreateCommand=None):
        """Initialize the object.  We could do this in __init__, but
        that would delay creation to after configuration time, which
        may run asynchronously with collection or heartbeats.  By
        deferring initialization, this object implements the Null
        Object pattern until the application is ready to start writing
        real statistics.
        """
        self.name = name
        self.monitor = monitor
        if not rrdCreateCommand:
            from Products.ZenModel.PerformanceConf import PerformanceConf
            rrdCreateCommand = PerformanceConf.defaultRRDCreateCommand
        if not isinstance(rrdCreateCommand, basestring):
            self.createCommand = rrdCreateCommand
        else:
            self.createCommand = rrdCreateCommand.split('\n')
        self.thresholds = Thresholds()
        self.thresholds.updateList(thresholds)

    def rrdFile(self, type, cycleTime, name, minVal='U', maxVal='U'):
        """Create an RRD file if it does not exist.
        Returns the basename of the rrdFile, suitable for checking thresholds.
        """
        if not self.name: return None
        base = os.path.join('Daemons', self.name)
        directory = zenPath('perf', base)
        if not os.path.exists(directory):
            os.makedirs(directory)
        base = os.path.join(base, '%s_%s' % (self.monitor, name))
        fileName = fullname(base)
        if not os.path.exists(fileName):
            rrdtool.create(
                fileName, '--step', "%d" % cycleTime,
                'DS:ds0:%s:%s:%s:%s' % (type, cycleTime * 3, minVal, maxVal),
                *self.createCommand)
        return base

    def derive(self, name, cycleTime, value):
        "Write a DERIVE value, return threshold events"
        return self.counter(name, cycleTime, value)

    def counter(self, name, cycleTime, value):
        "Write a DERIVE(! NOT COUNTER!) value, return threshold events"
        fileName = self.rrdFile('DERIVE', cycleTime, name, 0)
        if fileName:
            full = fullname(fileName)
            try:
                rrdtool.update(full, 'N:%s' % int(value))
                startStop, names, values = \
                    rrdtool.fetch(full, 'AVERAGE',
                        '-s', 'now-%d' % (cycleTime*2),
                        '-e', 'now')
                value = values[0][0]
                if value is not None:
                    return self.thresholds.check(fileName, time.time(), value)
            except rrdtool.error, err:
                log.error('rrdtool reported error %s %s', err, full)
        return []
class DaemonStats(object):
    "Utility for a daemon to write out internal performance statistics"

    def __init__(self):
        self.name = ""
        self.monitor = ""
        self.rrdCreateCommand = ""

        self.thresholds = Thresholds()


    def config(self, name, monitor, thresholds, rrdCreateCommand = None):
        """Initialize the object.  We could do this in __init__, but
        that would delay creation to after configuration time, which
        may run asynchronously with collection or heartbeats.  By
        deferring initialization, this object implements the Null
        Object pattern until the application is ready to start writing
        real statistics.
        """
        self.name = name
        self.monitor = monitor
        if not rrdCreateCommand:
            from Products.ZenModel.PerformanceConf import PerformanceConf
            rrdCreateCommand = PerformanceConf.defaultRRDCreateCommand
        if not isinstance(rrdCreateCommand, basestring):
            self.createCommand = rrdCreateCommand
        else:
            self.createCommand = rrdCreateCommand.split('\n')
        self.thresholds = Thresholds()
        self.thresholds.updateList(thresholds)


    def rrdFile(self, type, cycleTime, name, minVal = 'U', maxVal = 'U'):
        """Create an RRD file if it does not exist.
        Returns the basename of the rrdFile, suitable for checking thresholds.
        """
        if not self.name: return None
        base = os.path.join('Daemons', self.name)
        directory = zenPath('perf', base)
        if not os.path.exists(directory):
            os.makedirs(directory)
        base = os.path.join(base, '%s_%s' % (self.monitor, name))
        fileName = fullname(base)
        if not os.path.exists(fileName):
            rrdtool.create(fileName,
                           '--step', "%d" % cycleTime,
                           'DS:ds0:%s:%s:%s:%s' % (type,
                                                   cycleTime * 3,
                                                   minVal,
                                                   maxVal),
                           *self.createCommand)
        return base


    def derive(self, name, cycleTime, value):
        "Write a DERIVE value, return threshold events"
        return self.counter(name, cycleTime, value)

    def counter(self, name, cycleTime, value):
        "Write a DERIVE(! NOT COUNTER!) value, return threshold events"
        fileName = self.rrdFile('DERIVE', cycleTime, name, 0)
        if fileName:
            full = fullname(fileName)
            try:
                rrdtool.update(full, 'N:%s' % int(value))
                startStop, names, values = \
                    rrdtool.fetch(full, 'AVERAGE',
                        '-s', 'now-%d' % (cycleTime*2),
                        '-e', 'now')
                value = values[0][0]
                if value is not None:
                    return self.thresholds.check(fileName, time.time(), value)
            except rrdtool.error, err:
                log.error('rrdtool reported error %s %s', err, full)
        return []