Esempio n. 1
0
    def __init__(self):
        signal.signal(signal.SIGUSR2, signal.SIG_IGN)
        ZCmdBase.__init__(self)
        if self.options.profiling:
            self.profiler = ContinuousProfiler('zenhubworker', log=self.log)
            self.profiler.start()
        self.current = IDLE
        self.currentStart = 0
        self.numCalls = Metrology.meter("zenhub.workerCalls")
        try:
            self.log.debug("establishing SIGUSR1 signal handler")
            signal.signal(signal.SIGUSR1, self.sighandler_USR1)
            self.log.debug("establishing SIGUSR2 signal handler")
            signal.signal(signal.SIGUSR2, self.sighandler_USR2)
        except ValueError:
            # If we get called multiple times, this will generate an exception:
            # ValueError: signal only works in main thread
            # Ignore it as we've already set up the signal handler.
            pass

        self.zem = self.dmd.ZenEventManager
        loadPlugins(self.dmd)
        self.pid = os.getpid()
        self.services = {}
        factory = ReconnectingPBClientFactory(pingPerspective=False)
        self.log.debug("Connecting to %s:%d", self.options.hubhost,
                       self.options.hubport)
        reactor.connectTCP(self.options.hubhost, self.options.hubport, factory)
        self.log.debug("Logging in as %s", self.options.username)
        c = credentials.UsernamePassword(self.options.username,
                                         self.options.password)
        factory.gotPerspective = self.gotPerspective

        def stop(*args):
            reactor.callLater(0, reactor.stop)

        factory.clientConnectionLost = stop
        factory.setCredentials(c)

        self.log.debug("Creating async MetricReporter")
        daemonTags = {
            'zenoss_daemon': 'zenhub_worker_%s' % self.options.workernum,
            'zenoss_monitor': self.options.monitor,
            'internal': True
        }

        def stopReporter():
            if self.metricreporter:
                return self.metricreporter.stop()

        # Order of the shutdown triggers matter. Want to stop reporter first, calling metricWriter() below
        # registers shutdown triggers for the actual metric http and redis publishers.
        reactor.addSystemEventTrigger('before', 'shutdown', stopReporter)
        self.metricreporter = TwistedMetricReporter(
            metricWriter=metricWriter(), tags=daemonTags)
        self.metricreporter.start()
Esempio n. 2
0
 def startStatsLoop():
     self.log.debug("Starting Statistic posting")
     loop = task.LoopingCall(self.postStatistics)
     loop.start(self.options.writeStatistics, now=False)
     daemonTags = {
         'zenoss_daemon': self.name,
         'zenoss_monitor': self.options.monitor,
         'internal': True
     }
     self._metrologyReporter = TwistedMetricReporter(
         self.options.writeStatistics, self.metricWriter(), daemonTags)
     self._metrologyReporter.start()
Esempio n. 3
0
    def metricreporter(self):
        if not self._metric_reporter:
            self._metric_reporter = TwistedMetricReporter(
                metricWriter=self.metric_writer, tags=self.daemon_tags
            )

        return self._metric_reporter
Esempio n. 4
0
 def startStatsLoop():
     self.log.debug("Starting Statistic posting")
     loop = task.LoopingCall(self.postStatistics)
     loop.start(self.options.writeStatistics, now=False)
     daemonTags = {
         'zenoss_daemon': self.name,
         'zenoss_monitor': self.options.monitor,
         'internal': True
     }
     self._metrologyReporter = TwistedMetricReporter(self.options.writeStatistics, self.metricWriter(), daemonTags)
     self._metrologyReporter.start()
Esempio n. 5
0
    def run(self):
        # Configure all actions with the command-line options
        self.abortIfWaiting()
        options_dict = dict(vars(self.options))
        for name, action in getUtilitiesFor(IAction):
            action.configure(options_dict)

        dao = NotificationDao(self.dmd)
        task = ISignalProcessorTask(dao)
        metric_destination = os.environ.get("CONTROLPLANE_CONSUMER_URL", "")
        if metric_destination == "":
            metric_destination = "http://localhost:22350/api/metrics/store"
        username = os.environ.get("CONTROLPLANE_CONSUMER_USERNAME", "")
        password = os.environ.get("CONTROLPLANE_CONSUMER_PASSWORD", "")
        pub = publisher.HttpPostPublisher(username, password,
                                          metric_destination)

        log.debug("Creating async MetricReporter")
        daemonTags = {'zenoss_daemon': 'zenactiond', 'internal': True}
        self.metricreporter = TwistedMetricReporter(
            prefix='zenoss.', metricWriter=MetricWriter(pub), tags=daemonTags)
        self.metricreporter.start()
        reactor.addSystemEventTrigger('before', 'shutdown',
                                      self.metricreporter.stop)

        if self.options.workerid == 0 and (self.options.daemon
                                           or self.options.cycle):
            self._callHomeCycler.start()
            self._schedule.start()  # maintenance windows

        if self.options.daemon or self.options.cycle:
            self._maintenanceCycle.start()  # heartbeats, etc.

        if (self.options.daemon
                or self.options.cycle) and self.options.workers > 1:
            self._workers.startWorkers()

        self._consumer = QueueConsumer(task, self.dmd)
        reactor.callWhenRunning(self._start)
        reactor.run()
Esempio n. 6
0
    def main(self):
        """
        Start the main event loop.
        """
        if self.options.cycle:
            reactor.callLater(0, self.heartbeat)
            self.log.debug("Creating async MetricReporter")
            daemonTags = {
                'zenoss_daemon': 'zenhub',
                'zenoss_monitor': self.options.monitor,
                'internal': True
            }
            self.metricreporter = TwistedMetricReporter(
                metricWriter=self._metric_writer, tags=daemonTags)
            self.metricreporter.start()
            reactor.addSystemEventTrigger('before', 'shutdown',
                                          self.metricreporter.stop)

        self.check_workers_task = task.LoopingCall(self.check_workers)
        self.check_workers_task.start(CHECK_WORKER_INTERVAL)

        reactor.run()

        self.shutdown = True
        self.log.debug("Killing workers")
        for proc in self.workerprocessmap.itervalues():
            try:
                proc.signalProcess('KILL')
                self.log.debug("Killed worker %s", proc)
            except ProcessExitedAlready:
                pass
            except Exception:
                pass
        workerconfig = getattr(self, 'workerconfig', None)
        if workerconfig and os.path.exists(workerconfig):
            os.unlink(self.workerconfig)
        getUtility(IEventPublisher).close()
        if self.options.profiling:
            self.profiler.stop()
Esempio n. 7
0
    def run(self):
        # Configure all actions with the command-line options
        self.abortIfWaiting()
        options_dict = dict(vars(self.options))
        for name, action in getUtilitiesFor(IAction):
            action.configure(options_dict)

        dao = NotificationDao(self.dmd)
        task = ISignalProcessorTask(dao)
        metric_destination = os.environ.get("CONTROLPLANE_CONSUMER_URL", "")
        if metric_destination == "":
            metric_destination = "http://localhost:22350/api/metrics/store"
        username = os.environ.get("CONTROLPLANE_CONSUMER_USERNAME", "")
        password = os.environ.get("CONTROLPLANE_CONSUMER_PASSWORD", "")
        pub = publisher.HttpPostPublisher(username, password, metric_destination)

        log.debug("Creating async MetricReporter")
        daemonTags = {
            'zenoss_daemon': 'zenactiond',
            'internal': True
        }
        self.metricreporter = TwistedMetricReporter(prefix='zenoss.', metricWriter=MetricWriter(pub), tags=daemonTags)
        self.metricreporter.start()
        reactor.addSystemEventTrigger('before', 'shutdown', self.metricreporter.stop)

        if self.options.workerid == 0 and (self.options.daemon or self.options.cycle):
            self._callHomeCycler.start()
            self._schedule.start()  # maintenance windows

        if self.options.daemon or self.options.cycle:
            self._maintenanceCycle.start()  # heartbeats, etc.

        if (self.options.daemon or self.options.cycle) and self.options.workers > 1:
            self._workers.startWorkers()

        self._consumer = QueueConsumer(task, self.dmd)
        reactor.callWhenRunning(self._start)
        reactor.run()
Esempio n. 8
0
class PBDaemon(ZenDaemon, pb.Referenceable):

    name = 'pbdaemon'
    initialServices = ['EventService']
    heartbeatEvent = {'eventClass':Heartbeat}
    heartbeatTimeout = 60*3
    _customexitcode = 0
    _pushEventsDeferred = None
    _eventHighWaterMark = None
    _healthMonitorInterval = 30

    def __init__(self, noopts=0, keeproot=False, name=None):
        # if we were provided our collector name via the constructor instead of
        # via code, be sure to store it correctly.
        if name is not None:
            self.name = name
            self.mname = name

        try:
            ZenDaemon.__init__(self, noopts, keeproot)

        except IOError:
            import traceback
            self.log.critical(traceback.format_exc(0))
            sys.exit(1)

        self._thresholds = None
        self._threshold_notifier = None
        self.rrdStats = DaemonStats()
        self.lastStats = 0
        self.perspective = None
        self.services = {}
        self.eventQueueManager = EventQueueManager(self.options, self.log)
        self.startEvent = startEvent.copy()
        self.stopEvent = stopEvent.copy()
        details = dict(component=self.name, device=self.options.monitor)
        for evt in self.startEvent, self.stopEvent, self.heartbeatEvent:
            evt.update(details)
        self.initialConnect = defer.Deferred()
        self.stopped = False
        self.counters = collections.Counter()
        self._pingedZenhub = None
        self._connectionTimeout = None
        self._publisher = None
        self._internal_publisher = None
        self._metric_writer = None
        self._derivative_tracker = None
        self._metrologyReporter = None
        # Add a shutdown trigger to send a stop event and flush the event queue
        reactor.addSystemEventTrigger('before', 'shutdown', self._stopPbDaemon)

        # Set up a looping call to support the health check.
        self.healthMonitor = task.LoopingCall(self._checkZenHub)
        self.healthMonitor.start(self._healthMonitorInterval)

    def publisher(self):
        if not self._publisher:
            host, port = urlparse(self.options.redisUrl).netloc.split(':')
            try:
                port = int(port)
            except ValueError:
                self.log.exception("redis url contains non-integer port " +
                                   "value {port}, defaulting to {default}".
                                   format(port=port, default=publisher.defaultRedisPort))
                port = publisher.defaultRedisPort
            self._publisher = publisher.RedisListPublisher(
                host, port, self.options.metricBufferSize,
                channel=self.options.metricsChannel, maxOutstandingMetrics=self.options.maxOutstandingMetrics
            )
        return self._publisher

    def internalPublisher(self):
        if not self._internal_publisher:
            url = os.environ.get( "CONTROLPLANE_CONSUMER_URL", None)
            username = os.environ.get( "CONTROLPLANE_CONSUMER_USERNAME", "")
            password = os.environ.get( "CONTROLPLANE_CONSUMER_PASSWORD", "")
            if url:
              self._internal_publisher = publisher.HttpPostPublisher( username, password, url)
        return self._internal_publisher

    def metricWriter(self):
        if not self._metric_writer:
            publisher = self.publisher()
            metric_writer = MetricWriter(publisher)
            if os.environ.get( "CONTROLPLANE", "0") == "1":
                internal_publisher = self.internalPublisher()
                if internal_publisher:
                  internal_metric_filter = lambda metric, value, timestamp, tags:\
                      tags and tags.get("internal", False)
                  internal_metric_writer = FilteredMetricWriter(internal_publisher, internal_metric_filter)
                  self._metric_writer = AggregateMetricWriter( [metric_writer, internal_metric_writer])
            else:
                self._metric_writer = metric_writer
        return self._metric_writer

    def derivativeTracker(self):
        if not self._derivative_tracker:
            self._derivative_tracker = DerivativeTracker()
        return self._derivative_tracker

    def connecting(self):
        """
        Called when about to connect to zenhub
        """
        self.log.info("Attempting to connect to zenhub")

    def getZenhubInstanceId(self):
        """
        Called after we connected to zenhub.
        """

        def callback(result):
            self.log.info("Connected to the zenhub/%s instance", result)

        def errback(result):
            self.log.info("Unexpected error appeared while getting zenhub instance number %s", result)

        d = self.perspective.callRemote('getHubInstanceId')
        d.addCallback(callback)
        d.addErrback(errback)
        return d

    def gotPerspective(self, perspective):
        """
        This gets called every time we reconnect.

        @parameter perspective: Twisted perspective object
        @type perspective: Twisted perspective object
        """
        self.perspective = perspective
        self.getZenhubInstanceId()
        # Cancel the connection timeout timer as it's no longer needed.
        if self._connectionTimeout:
            try:
                self._connectionTimeout.cancel()
            except AlreadyCalled:
                pass
            self._connectionTimeout = None
        d2 = self.getInitialServices()
        if self.initialConnect:
            self.log.debug('Chaining getInitialServices with d2')
            self.initialConnect, d = None, self.initialConnect
            d2.chainDeferred(d)

    def connect(self):
        pingInterval = self.options.zhPingInterval
        factory = ReconnectingPBClientFactory(connectTimeout=60, pingPerspective=self.options.pingPerspective,
                                              pingInterval=pingInterval, pingtimeout=pingInterval * 5)
        self.log.info("Connecting to %s:%d" % (self.options.hubhost, self.options.hubport))
        factory.connectTCP(self.options.hubhost, self.options.hubport)
        username = self.options.hubusername
        password = self.options.hubpassword
        self.log.debug("Logging in as %s" % username)
        c = credentials.UsernamePassword(username, password)
        factory.gotPerspective = self.gotPerspective
        factory.connecting = self.connecting
        factory.setCredentials(c)

        def timeout(d):
            if not d.called:
                self.connectTimeout()
        self._connectionTimeout = reactor.callLater(
            self.options.hubtimeout, timeout, self.initialConnect)
        return self.initialConnect

    def connectTimeout(self):
        self.log.error('Timeout connecting to zenhub: is it running?')
        pass

    def eventService(self):
        return self.getServiceNow('EventService')

    def getServiceNow(self, svcName):
        if not svcName in self.services:
            self.log.warning('No service named %r: ZenHub may be disconnected' % svcName)
        return self.services.get(svcName, None) or FakeRemote()

    def getService(self, serviceName, serviceListeningInterface=None):
        """
        Attempt to get a service from zenhub.  Returns a deferred.
        When service is retrieved it is stashed in self.services with
        serviceName as the key.  When getService is called it will first
        check self.services and if serviceName is already there it will return
        the entry from self.services wrapped in a defer.succeed
        """
        if serviceName in self.services:
            return defer.succeed(self.services[serviceName])

        def removeService(ignored):
            self.log.debug('Removing service %s' % serviceName)
            if serviceName in self.services:
                del self.services[serviceName]

        def callback(result, serviceName):
            self.log.debug('Loaded service %s from zenhub' % serviceName)
            self.services[serviceName] = result
            result.notifyOnDisconnect(removeService)
            return result

        def errback(error, serviceName):
            self.log.debug('errback after getting service %s' % serviceName)
            self.log.error('Could not retrieve service %s' % serviceName)
            if serviceName in self.services:
                del self.services[serviceName]
            return error

        d = self.perspective.callRemote('getService',
                                        serviceName,
                                        self.options.monitor,
                                        serviceListeningInterface or self,
                                        self.options.__dict__)
        d.addCallback(callback, serviceName)
        d.addErrback(errback, serviceName)
        return d

    def getInitialServices(self):
        """
        After connecting to zenhub, gather our initial list of services.
        """
        def errback(error):
            if isinstance(error, Failure):
                self.log.critical( "Invalid monitor: %s" % self.options.monitor)
                reactor.stop()
                return defer.fail(RemoteBadMonitor(
                           "Invalid monitor: %s" % self.options.monitor, ''))
            return error

        self.log.debug('Setting up initial services: %s' % \
                ', '.join(self.initialServices))
        d = defer.DeferredList(
            [self.getService(name) for name in self.initialServices],
            fireOnOneErrback=True, consumeErrors=True)
        d.addErrback(errback)
        return d

    def connected(self):
        pass


    def _getThresholdNotifier(self):
        if not self._threshold_notifier:
            self._threshold_notifier = ThresholdNotifier(self.sendEvent, self.getThresholds())
        return self._threshold_notifier

    def getThresholds(self):
        if not self._thresholds:
            self._thresholds = Thresholds()
        return self._thresholds


    def run(self):
        def stopReporter():
            if self._metrologyReporter:
                return self._metrologyReporter.stop()

        # Order of the shutdown triggers matter. Want to stop reporter first, calling self.metricWriter() below
        # registers shutdown triggers for the actual metric http and redis publishers.
        reactor.addSystemEventTrigger('before', 'shutdown', stopReporter)

        threshold_notifier = self._getThresholdNotifier()
        self.rrdStats.config(self.name,
                             self.options.monitor,
                             self.metricWriter(),
                             threshold_notifier,
                             self.derivativeTracker())
        self.log.debug('Starting PBDaemon initialization')
        d = self.connect()

        def callback(result):
            self.sendEvent(self.startEvent)
            self.pushEventsLoop()
            self.log.debug('Calling connected.')
            self.connected()
            return result

        def startStatsLoop():
            self.log.debug("Starting Statistic posting")
            loop = task.LoopingCall(self.postStatistics)
            loop.start(self.options.writeStatistics, now=False)
            daemonTags = {
                'zenoss_daemon': self.name,
                'zenoss_monitor': self.options.monitor,
                'internal': True
            }
            self._metrologyReporter = TwistedMetricReporter(self.options.writeStatistics, self.metricWriter(), daemonTags)
            self._metrologyReporter.start()

        if self.options.cycle:
            reactor.callWhenRunning(startStatsLoop)
        d.addCallback(callback)
        d.addErrback(twisted.python.log.err)
        reactor.run()
        if self._customexitcode:
            sys.exit(self._customexitcode)

    def setExitCode(self, exitcode):
        self._customexitcode = exitcode

    def stop(self, ignored=''):
        if reactor.running:
            try:
                reactor.stop()
            except ReactorNotRunning:
                self.log.debug("Tried to stop reactor that was stopped")
        else:
            self.log.debug("stop() called when not running")

    def _stopPbDaemon(self):
        if self.stopped:
            return
        self.stopped = True
        if 'EventService' in self.services:
            # send stop event if we don't have an implied --cycle,
            # or if --cycle has been specified
            if not hasattr(self.options, 'cycle') or \
                    getattr(self.options, 'cycle', True):
                self.sendEvent(self.stopEvent)
                self.log.debug("Sent a 'stop' event")
            if self._pushEventsDeferred:
                self.log.debug("Currently sending events. Queueing next call")
                d = self._pushEventsDeferred
                # Schedule another call to flush any additional queued events
                d.addBoth(lambda unused: self.pushEvents())
            else:
                d = self.pushEvents()
            return d

        self.log.debug("No event sent as no EventService available.")

    def sendEvents(self, events):
        map(self.sendEvent, events)

    def sendEvent(self, event, **kw):
        """ Add event to queue of events to be sent.  If we have an event
        service then process the queue.
        """
        generatedEvent = self.generateEvent(event, **kw)
        self.eventQueueManager.addEvent(generatedEvent)
        self.counters['eventCount'] += 1

        if self._eventHighWaterMark:
            return self._eventHighWaterMark
        elif self.eventQueueManager.event_queue_length >= self.options.maxqueuelen * self.options.queueHighWaterMark:
            return self.pushEvents()
        else:
            return defer.succeed(None)

    def generateEvent(self, event, **kw):
        """ Add event to queue of events to be sent.  If we have an event
        service then process the queue.
        """
        if not reactor.running:
            return
        eventCopy = {}
        for k, v in chain(event.items(), kw.items()):
            if isinstance(v, basestring):
                #default max size is 512k
                size = LIMITS.get(k, DEFAULT_LIMIT)
                eventCopy[k] = v[0:size] if len(v)>size else v
            else:
                eventCopy[k] = v

        eventCopy['agent'] = self.name
        eventCopy['monitor'] = self.options.monitor
        eventCopy['manager'] = self.fqdn
        return eventCopy

    @defer.inlineCallbacks
    def pushEventsLoop(self):
        """Periodially, wake up and flush events to ZenHub.
        """
        reactor.callLater(self.options.eventflushseconds, self.pushEventsLoop)
        yield self.pushEvents()

        # Record the number of events in the queue up to every 2 seconds.
        now = time.time()
        if self.rrdStats.name and now >= (self.lastStats + 2):
            self.lastStats = now
            self.rrdStats.gauge(
                'eventQueueLength', self.eventQueueManager.event_queue_length)

    @defer.inlineCallbacks
    def pushEvents(self):
        """Flush events to ZenHub.
        """
        # are we already shutting down?
        if not reactor.running:
            self.log.debug("Skipping event sending - reactor not running.")
            return

        if self.eventQueueManager.event_queue_length >= self.options.maxqueuelen * self.options.queueHighWaterMark and not self._eventHighWaterMark:
            self.log.debug("Queue length exceeded high water mark, %s ;creating high water mark deferred", self.eventQueueManager.event_queue_length)
            self._eventHighWaterMark = defer.Deferred()

        # are still connected to ZenHub?
        evtSvc = self.services.get('EventService', None)
        if not evtSvc:
            self.log.error("No event service: %r", evtSvc)
            yield task.deferLater(reactor, 0, lambda:None)
            if self._eventHighWaterMark:
                d, self._eventHighWaterMark = self._eventHighWaterMark, None
                #not connected, release throttle and let things queue
                d.callback("No Event Service")
            defer.returnValue(None)

        if self._pushEventsDeferred:
            self.log.debug("Skipping event sending - previous call active.")
            defer.returnValue("Push Pending")

        sent = 0
        try:
            #only set _pushEventsDeferred after we know we have an evtSvc/connectivity
            self._pushEventsDeferred = defer.Deferred()

            def repush(val):
                if self.eventQueueManager.event_queue_length >= self.options.eventflushchunksize:
                    self.pushEvents()
                return val
            # conditionally push more events after this pushEvents call finishes
            self._pushEventsDeferred.addCallback(repush)

            discarded_events = self.eventQueueManager.discarded_events
            if discarded_events:
                self.log.error(
                    'Discarded oldest %d events because maxqueuelen was '
                    'exceeded: %d/%d',
                    discarded_events,
                    discarded_events + self.options.maxqueuelen,
                    self.options.maxqueuelen)
                self.counters['discardedEvents'] += discarded_events
                self.eventQueueManager.discarded_events = 0

            send_events_fn = partial(evtSvc.callRemote, 'sendEvents')
            try:
                sent = yield self.eventQueueManager.sendEvents(send_events_fn)
            except ConnectionLost as ex:
                self.log.error('Error sending event: %s', ex)
                #let the reactor have time to clean up any connection errors and make callbacks
                yield task.deferLater(reactor, 0, lambda:None)
        except Exception as ex:
            self.log.exception(ex)
            #let the reactor have time to clean up any connection errors and make callbacks
            yield task.deferLater(reactor, 0, lambda:None)
        finally:
            if self._pushEventsDeferred:
                d, self._pushEventsDeferred = self._pushEventsDeferred, None
                d.callback('sent %s' % sent)
            if  self._eventHighWaterMark and self.eventQueueManager.event_queue_length < self.options.maxqueuelen * self.options.queueHighWaterMark:
                self.log.debug("Queue restored to below high water mark: %s", self.eventQueueManager.event_queue_length)
                d, self._eventHighWaterMark = self._eventHighWaterMark, None
                d.callback("Queue length below high water mark")

    def heartbeat(self):
        """if cycling, send a heartbeat, else, shutdown"""
        if not self.options.cycle:
            self.stop()
            return
        heartbeatEvent = self.generateEvent(self.heartbeatEvent, timeout=self.heartbeatTimeout)
        self.eventQueueManager.addHeartbeatEvent(heartbeatEvent)
        # heartbeat is normally 3x cycle time
        self.niceDoggie(self.heartbeatTimeout / 3)

    def postStatisticsImpl(self):
        pass

    def postStatistics(self):
        # save daemon counter stats
        for name, value in self.counters.items():
            self.log.info("Counter %s, value %d", name, value)
            self.rrdStats.counter(name, value)

        # persist counters values
        self.postStatisticsImpl()

    def _pickleName(self):
        instance_id = os.environ.get('CONTROLPLANE_INSTANCE_ID')
        return 'var/%s_%s_counters.pickle' % (self.name, instance_id)

    def remote_getName(self):
        return self.name

    def remote_shutdown(self, unused):
        self.stop()
        self.sigTerm()

    def remote_setPropertyItems(self, items):
        pass

    @translateError
    def remote_updateThresholdClasses(self, classes):
        from Products.ZenUtils.Utils import importClass
        self.log.debug("Loading classes %s", classes)
        for c in classes:
            try:
                importClass(c)
            except ImportError:
                self.log.error("Unable to import class %s", c)

    def _checkZenHub(self):
        """
        Check status of ZenHub (using ping method of service).
        @return: if ping occurs, return deferred with result of ping attempt.
        """
        self.log.debug('_checkZenHub: entry')

        def callback(result):
            self.log.debug('ZenHub health check: Got result %s' % result)
            if result == 'pong':
                self.log.debug('ZenHub health check: Success - received pong from ZenHub ping service.')
                self._signalZenHubAnswering(True)
            else:
                self.log.error('ZenHub health check did not respond as expected.')
                self._signalZenHubAnswering(False)

        def errback(error):
            self.log.error('Error pinging ZenHub: %s (%s).' % (error, getattr(error, 'message', '')))
            self._signalZenHubAnswering(False)

        try:
            if self.perspective:
                self.log.debug('ZenHub health check: perspective found. attempting remote ping call.')
                d = self.perspective.callRemote('ping')
                d.addCallback(callback)
                d.addErrback(errback)
                return d
            else:
                self.log.debug('ZenHub health check: ZenHub may be down.')
                self._signalZenHubAnswering(False)
        except pb.DeadReferenceError:
            self.log.warning("ZenHub health check: DeadReferenceError - lost connection to ZenHub.")
            self._signalZenHubAnswering(False)
        except Exception as e:
            self.log.error('ZenHub health check: caught %s exception: %s' % (e.__class__, e.message))
            self._signalZenHubAnswering(False)


    def _signalZenHubAnswering(self, answering):
        """
        Write or remove file that the ZenHub_answering health check uses to report status.
        @param answering: true if ZenHub is answering, False, otherwise.
        """
        self.log.debug('_signalZenHubAnswering(%s)' % answering)
        filename = 'zenhub_connected'
        signalFilePath = zenPath('var', filename)
        if answering:
            self.log.debug('writing file at %s' % signalFilePath)
            atomicWrite(signalFilePath, '')
        else:
            try:
                self.log.debug('removing file at %s' % signalFilePath)
                os.remove(signalFilePath)
            except Exception as e:
                self.log.debug('ignoring %s exception (%s) removing file %s' % (e.__class__, e.message, signalFilePath))


    def buildOptions(self):
        ZenDaemon.buildOptions(self)

        self.parser.add_option('--hubhost',
                                dest='hubhost',
                                default=DEFAULT_HUB_HOST,
                                help='Host of zenhub daemon.'
                                ' Default is %s.' % DEFAULT_HUB_HOST)
        self.parser.add_option('--hubport',
                                dest='hubport',
                                type='int',
                                default=DEFAULT_HUB_PORT,
                                help='Port zenhub listens on.'
                                    'Default is %s.' % DEFAULT_HUB_PORT)
        self.parser.add_option('--hubusername',
                                dest='hubusername',
                                default=DEFAULT_HUB_USERNAME,
                                help='Username for zenhub login.'
                                    ' Default is %s.' % DEFAULT_HUB_USERNAME)
        self.parser.add_option('--hubpassword',
                                dest='hubpassword',
                                default=DEFAULT_HUB_PASSWORD,
                                help='Password for zenhub login.'
                                    ' Default is %s.' % DEFAULT_HUB_PASSWORD)
        self.parser.add_option('--monitor',
                                dest='monitor',
                                default=DEFAULT_HUB_MONITOR,
                                help='Name of monitor instance to use for'
                                    ' configuration.  Default is %s.'
                                    % DEFAULT_HUB_MONITOR)
        self.parser.add_option('--initialHubTimeout',
                               dest='hubtimeout',
                               type='int',
                               default=30,
                               help='Initial time to wait for a ZenHub '
                                    'connection')
        self.parser.add_option('--allowduplicateclears',
                               dest='allowduplicateclears',
                               default=False,
                               action='store_true',
                               help='Send clear events even when the most '
                               'recent event was also a clear event.')

        self.parser.add_option('--duplicateclearinterval',
                               dest='duplicateclearinterval',
                               default=0,
                               type='int',
                               help=('Send a clear event every [DUPLICATECLEARINTEVAL] '
                                     'events.')
        )

        self.parser.add_option('--eventflushseconds',
                               dest='eventflushseconds',
                               default=5.,
                               type='float',
                               help='Seconds between attempts to flush '
                               'events to ZenHub.')

        self.parser.add_option('--eventflushchunksize',
                               dest='eventflushchunksize',
                               default=50,
                               type='int',
                               help='Number of events to send to ZenHub'
                               'at one time')

        self.parser.add_option('--maxqueuelen',
                               dest='maxqueuelen',
                               default=5000,
                               type='int',
                               help='Maximum number of events to queue')

        self.parser.add_option('--queuehighwatermark',
                               dest='queueHighWaterMark',
                               default=0.75,
                               type='float',
                               help='The size, in percent, of the event queue when event pushback starts')
        self.parser.add_option('--zenhubpinginterval',
                               dest='zhPingInterval',
                               default=120,
                               type='int',
                               help='How often to ping zenhub')

        self.parser.add_option('--disable-event-deduplication',
                               dest='deduplicate_events',
                               default=True,
                               action='store_false',
                               help='Disable event de-duplication')

        self.parser.add_option('--redis-url',
                               dest='redisUrl',
                               type='string',
                               default='redis://localhost:{default}/0'.format(default=publisher.defaultRedisPort),
                               help='redis connection string: redis://[hostname]:[port]/[db], default: %default')

        self.parser.add_option('--metricBufferSize',
                               dest='metricBufferSize',
                               type='int',
                               default=publisher.defaultMetricBufferSize,
                               help='Number of metrics to buffer if redis goes down')
        self.parser.add_option('--metricsChannel',
                               dest='metricsChannel',
                               type='string',
                               default=publisher.defaultMetricsChannel,
                               help='redis channel to which metrics are published')
        self.parser.add_option('--maxOutstandingMetrics',
                               dest='maxOutstandingMetrics',
                               type='int',
                               default=publisher.defaultMaxOutstandingMetrics,
                               help='Max Number of metrics to allow in redis')
        self.parser.add_option('--disable-ping-perspective',
                               dest='pingPerspective',
                               help="Enable or disable ping perspective",
                               default=True,
                               action='store_false')
        self.parser.add_option('--writeStatistics',
                               dest='writeStatistics',
                               type='int',
                               default=30,
                               help='How often to write internal statistics value in seconds')
Esempio n. 9
0
class PBDaemon(ZenDaemon, pb.Referenceable):

    name = 'pbdaemon'
    initialServices = ['EventService']
    heartbeatEvent = {'eventClass': Heartbeat}
    heartbeatTimeout = 60 * 3
    _customexitcode = 0
    _pushEventsDeferred = None
    _eventHighWaterMark = None
    _healthMonitorInterval = 30

    def __init__(self, noopts=0, keeproot=False, name=None):
        # if we were provided our collector name via the constructor instead of
        # via code, be sure to store it correctly.
        if name is not None:
            self.name = name
            self.mname = name

        try:
            ZenDaemon.__init__(self, noopts, keeproot)

        except IOError:
            import traceback
            self.log.critical(traceback.format_exc(0))
            sys.exit(1)

        self._thresholds = None
        self._threshold_notifier = None
        self.rrdStats = DaemonStats()
        self.lastStats = 0
        self.perspective = None
        self.services = {}
        self.eventQueueManager = EventQueueManager(self.options, self.log)
        self.startEvent = startEvent.copy()
        self.stopEvent = stopEvent.copy()
        details = dict(component=self.name, device=self.options.monitor)
        for evt in self.startEvent, self.stopEvent, self.heartbeatEvent:
            evt.update(details)
        self.initialConnect = defer.Deferred()
        self.stopped = False
        self.counters = collections.Counter()
        self._pingedZenhub = None
        self._connectionTimeout = None
        self._publisher = None
        self._internal_publisher = None
        self._metric_writer = None
        self._derivative_tracker = None
        self._metrologyReporter = None
        # Add a shutdown trigger to send a stop event and flush the event queue
        reactor.addSystemEventTrigger('before', 'shutdown', self._stopPbDaemon)

        # Set up a looping call to support the health check.
        self.healthMonitor = task.LoopingCall(self._checkZenHub)
        self.healthMonitor.start(self._healthMonitorInterval)

    def publisher(self):
        if not self._publisher:
            host, port = urlparse(self.options.redisUrl).netloc.split(':')
            try:
                port = int(port)
            except ValueError:
                self.log.exception(
                    "redis url contains non-integer port " +
                    "value {port}, defaulting to {default}".format(
                        port=port, default=publisher.defaultRedisPort))
                port = publisher.defaultRedisPort
            self._publisher = publisher.RedisListPublisher(
                host,
                port,
                self.options.metricBufferSize,
                channel=self.options.metricsChannel,
                maxOutstandingMetrics=self.options.maxOutstandingMetrics)
        return self._publisher

    def internalPublisher(self):
        if not self._internal_publisher:
            url = os.environ.get("CONTROLPLANE_CONSUMER_URL", None)
            username = os.environ.get("CONTROLPLANE_CONSUMER_USERNAME", "")
            password = os.environ.get("CONTROLPLANE_CONSUMER_PASSWORD", "")
            if url:
                self._internal_publisher = publisher.HttpPostPublisher(
                    username, password, url)
        return self._internal_publisher

    def metricWriter(self):
        if not self._metric_writer:
            publisher = self.publisher()
            metric_writer = MetricWriter(publisher)
            if os.environ.get("CONTROLPLANE", "0") == "1":
                internal_publisher = self.internalPublisher()
                if internal_publisher:
                    internal_metric_filter = lambda metric, value, timestamp, tags:\
                        tags and tags.get("internal", False)
                    internal_metric_writer = FilteredMetricWriter(
                        internal_publisher, internal_metric_filter)
                    self._metric_writer = AggregateMetricWriter(
                        [metric_writer, internal_metric_writer])
            else:
                self._metric_writer = metric_writer
        return self._metric_writer

    def derivativeTracker(self):
        if not self._derivative_tracker:
            self._derivative_tracker = DerivativeTracker()
        return self._derivative_tracker

    def connecting(self):
        """
        Called when about to connect to zenhub
        """
        self.log.info("Attempting to connect to zenhub")

    def getZenhubInstanceId(self):
        """
        Called after we connected to zenhub.
        """
        def callback(result):
            self.log.info("Connected to the zenhub/%s instance", result)

        def errback(result):
            self.log.info(
                "Unexpected error appeared while getting zenhub instance number %s",
                result)

        d = self.perspective.callRemote('getHubInstanceId')
        d.addCallback(callback)
        d.addErrback(errback)
        return d

    def gotPerspective(self, perspective):
        """
        This gets called every time we reconnect.

        @parameter perspective: Twisted perspective object
        @type perspective: Twisted perspective object
        """
        self.perspective = perspective
        self.getZenhubInstanceId()
        # Cancel the connection timeout timer as it's no longer needed.
        if self._connectionTimeout:
            try:
                self._connectionTimeout.cancel()
            except AlreadyCalled:
                pass
            self._connectionTimeout = None
        d2 = self.getInitialServices()
        if self.initialConnect:
            self.log.debug('Chaining getInitialServices with d2')
            self.initialConnect, d = None, self.initialConnect
            d2.chainDeferred(d)

    def connect(self):
        pingInterval = self.options.zhPingInterval
        factory = ReconnectingPBClientFactory(
            connectTimeout=60,
            pingPerspective=self.options.pingPerspective,
            pingInterval=pingInterval,
            pingtimeout=pingInterval * 5)
        self.log.info("Connecting to %s:%d" %
                      (self.options.hubhost, self.options.hubport))
        factory.connectTCP(self.options.hubhost, self.options.hubport)
        username = self.options.hubusername
        password = self.options.hubpassword
        self.log.debug("Logging in as %s" % username)
        c = credentials.UsernamePassword(username, password)
        factory.gotPerspective = self.gotPerspective
        factory.connecting = self.connecting
        factory.setCredentials(c)

        def timeout(d):
            if not d.called:
                self.connectTimeout()

        self._connectionTimeout = reactor.callLater(self.options.hubtimeout,
                                                    timeout,
                                                    self.initialConnect)
        return self.initialConnect

    def connectTimeout(self):
        self.log.error('Timeout connecting to zenhub: is it running?')
        pass

    def eventService(self):
        return self.getServiceNow('EventService')

    def getServiceNow(self, svcName):
        if not svcName in self.services:
            self.log.warning(
                'No service named %r: ZenHub may be disconnected' % svcName)
        return self.services.get(svcName, None) or FakeRemote()

    def getService(self, serviceName, serviceListeningInterface=None):
        """
        Attempt to get a service from zenhub.  Returns a deferred.
        When service is retrieved it is stashed in self.services with
        serviceName as the key.  When getService is called it will first
        check self.services and if serviceName is already there it will return
        the entry from self.services wrapped in a defer.succeed
        """
        if serviceName in self.services:
            return defer.succeed(self.services[serviceName])

        def removeService(ignored):
            self.log.debug('Removing service %s' % serviceName)
            if serviceName in self.services:
                del self.services[serviceName]

        def callback(result, serviceName):
            self.log.debug('Loaded service %s from zenhub' % serviceName)
            self.services[serviceName] = result
            result.notifyOnDisconnect(removeService)
            return result

        def errback(error, serviceName):
            self.log.debug('errback after getting service %s' % serviceName)
            self.log.error('Could not retrieve service %s' % serviceName)
            if serviceName in self.services:
                del self.services[serviceName]
            return error

        d = self.perspective.callRemote('getService', serviceName,
                                        self.options.monitor,
                                        serviceListeningInterface or self,
                                        self.options.__dict__)
        d.addCallback(callback, serviceName)
        d.addErrback(errback, serviceName)
        return d

    def getInitialServices(self):
        """
        After connecting to zenhub, gather our initial list of services.
        """
        def errback(error):
            if isinstance(error, Failure):
                self.log.critical("Invalid monitor: %s" % self.options.monitor)
                reactor.stop()
                return defer.fail(
                    RemoteBadMonitor(
                        "Invalid monitor: %s" % self.options.monitor, ''))
            return error

        self.log.debug('Setting up initial services: %s' % \
                ', '.join(self.initialServices))
        d = defer.DeferredList(
            [self.getService(name) for name in self.initialServices],
            fireOnOneErrback=True,
            consumeErrors=True)
        d.addErrback(errback)
        return d

    def connected(self):
        pass

    def _getThresholdNotifier(self):
        if not self._threshold_notifier:
            self._threshold_notifier = ThresholdNotifier(
                self.sendEvent, self.getThresholds())
        return self._threshold_notifier

    def getThresholds(self):
        if not self._thresholds:
            self._thresholds = Thresholds()
        return self._thresholds

    def run(self):
        def stopReporter():
            if self._metrologyReporter:
                return self._metrologyReporter.stop()

        # Order of the shutdown triggers matter. Want to stop reporter first, calling self.metricWriter() below
        # registers shutdown triggers for the actual metric http and redis publishers.
        reactor.addSystemEventTrigger('before', 'shutdown', stopReporter)

        threshold_notifier = self._getThresholdNotifier()
        self.rrdStats.config(self.name, self.options.monitor,
                             self.metricWriter(), threshold_notifier,
                             self.derivativeTracker())
        self.log.debug('Starting PBDaemon initialization')
        d = self.connect()

        def callback(result):
            self.sendEvent(self.startEvent)
            self.pushEventsLoop()
            self.log.debug('Calling connected.')
            self.connected()
            return result

        def startStatsLoop():
            self.log.debug("Starting Statistic posting")
            loop = task.LoopingCall(self.postStatistics)
            loop.start(self.options.writeStatistics, now=False)
            daemonTags = {
                'zenoss_daemon': self.name,
                'zenoss_monitor': self.options.monitor,
                'internal': True
            }
            self._metrologyReporter = TwistedMetricReporter(
                self.options.writeStatistics, self.metricWriter(), daemonTags)
            self._metrologyReporter.start()

        reactor.callWhenRunning(startStatsLoop)
        d.addCallback(callback)
        d.addErrback(twisted.python.log.err)
        reactor.run()
        if self._customexitcode:
            sys.exit(self._customexitcode)

    def setExitCode(self, exitcode):
        self._customexitcode = exitcode

    def stop(self, ignored=''):
        if reactor.running:
            try:
                reactor.stop()
            except ReactorNotRunning:
                self.log.debug("Tried to stop reactor that was stopped")
        else:
            self.log.debug("stop() called when not running")

    def _stopPbDaemon(self):
        if self.stopped:
            return
        self.stopped = True
        if 'EventService' in self.services:
            # send stop event if we don't have an implied --cycle,
            # or if --cycle has been specified
            if not hasattr(self.options, 'cycle') or \
                    getattr(self.options, 'cycle', True):
                self.sendEvent(self.stopEvent)
                self.log.debug("Sent a 'stop' event")
            if self._pushEventsDeferred:
                self.log.debug("Currently sending events. Queueing next call")
                d = self._pushEventsDeferred
                # Schedule another call to flush any additional queued events
                d.addBoth(lambda unused: self.pushEvents())
            else:
                d = self.pushEvents()
            return d

        self.log.debug("No event sent as no EventService available.")

    def sendEvents(self, events):
        map(self.sendEvent, events)

    def sendEvent(self, event, **kw):
        """ Add event to queue of events to be sent.  If we have an event
        service then process the queue.
        """
        generatedEvent = self.generateEvent(event, **kw)
        self.eventQueueManager.addEvent(generatedEvent)
        self.counters['eventCount'] += 1

        if self._eventHighWaterMark:
            return self._eventHighWaterMark
        elif self.eventQueueManager.event_queue_length >= self.options.maxqueuelen * self.options.queueHighWaterMark:
            return self.pushEvents()
        else:
            return defer.succeed(None)

    def generateEvent(self, event, **kw):
        """ Add event to queue of events to be sent.  If we have an event
        service then process the queue.
        """
        if not reactor.running:
            return
        eventCopy = {}
        for k, v in chain(event.items(), kw.items()):
            if isinstance(v, basestring):
                #default max size is 512k
                size = LIMITS.get(k, DEFAULT_LIMIT)
                eventCopy[k] = v[0:size] if len(v) > size else v
            else:
                eventCopy[k] = v

        eventCopy['agent'] = self.name
        eventCopy['monitor'] = self.options.monitor
        eventCopy['manager'] = self.fqdn
        return eventCopy

    @defer.inlineCallbacks
    def pushEventsLoop(self):
        """Periodially, wake up and flush events to ZenHub.
        """
        reactor.callLater(self.options.eventflushseconds, self.pushEventsLoop)
        yield self.pushEvents()

        # Record the number of events in the queue up to every 2 seconds.
        now = time.time()
        if self.rrdStats.name and now >= (self.lastStats + 2):
            self.lastStats = now
            self.rrdStats.gauge('eventQueueLength',
                                self.eventQueueManager.event_queue_length)

    @defer.inlineCallbacks
    def pushEvents(self):
        """Flush events to ZenHub.
        """
        # are we already shutting down?
        if not reactor.running:
            self.log.debug("Skipping event sending - reactor not running.")
            return

        if self.eventQueueManager.event_queue_length >= self.options.maxqueuelen * self.options.queueHighWaterMark and not self._eventHighWaterMark:
            self.log.debug(
                "Queue length exceeded high water mark, %s ;creating high water mark deferred",
                self.eventQueueManager.event_queue_length)
            self._eventHighWaterMark = defer.Deferred()

        # are still connected to ZenHub?
        evtSvc = self.services.get('EventService', None)
        if not evtSvc:
            self.log.error("No event service: %r", evtSvc)
            yield task.deferLater(reactor, 0, lambda: None)
            if self._eventHighWaterMark:
                d, self._eventHighWaterMark = self._eventHighWaterMark, None
                #not connected, release throttle and let things queue
                d.callback("No Event Service")
            defer.returnValue(None)

        if self._pushEventsDeferred:
            self.log.debug("Skipping event sending - previous call active.")
            defer.returnValue("Push Pending")

        sent = 0
        try:
            #only set _pushEventsDeferred after we know we have an evtSvc/connectivity
            self._pushEventsDeferred = defer.Deferred()

            def repush(val):
                if self.eventQueueManager.event_queue_length >= self.options.eventflushchunksize:
                    self.pushEvents()
                return val

            # conditionally push more events after this pushEvents call finishes
            self._pushEventsDeferred.addCallback(repush)

            discarded_events = self.eventQueueManager.discarded_events
            if discarded_events:
                self.log.error(
                    'Discarded oldest %d events because maxqueuelen was '
                    'exceeded: %d/%d', discarded_events,
                    discarded_events + self.options.maxqueuelen,
                    self.options.maxqueuelen)
                self.counters['discardedEvents'] += discarded_events
                self.eventQueueManager.discarded_events = 0

            send_events_fn = partial(evtSvc.callRemote, 'sendEvents')
            try:
                sent = yield self.eventQueueManager.sendEvents(send_events_fn)
            except ConnectionLost as ex:
                self.log.error('Error sending event: %s', ex)
                #let the reactor have time to clean up any connection errors and make callbacks
                yield task.deferLater(reactor, 0, lambda: None)
        except Exception as ex:
            self.log.exception(ex)
            #let the reactor have time to clean up any connection errors and make callbacks
            yield task.deferLater(reactor, 0, lambda: None)
        finally:
            if self._pushEventsDeferred:
                d, self._pushEventsDeferred = self._pushEventsDeferred, None
                d.callback('sent %s' % sent)
            if self._eventHighWaterMark and self.eventQueueManager.event_queue_length < self.options.maxqueuelen * self.options.queueHighWaterMark:
                self.log.debug("Queue restored to below high water mark: %s",
                               self.eventQueueManager.event_queue_length)
                d, self._eventHighWaterMark = self._eventHighWaterMark, None
                d.callback("Queue length below high water mark")

    def heartbeat(self):
        """if cycling, send a heartbeat, else, shutdown"""
        if not self.options.cycle:
            self.stop()
            return
        heartbeatEvent = self.generateEvent(self.heartbeatEvent,
                                            timeout=self.heartbeatTimeout)
        self.eventQueueManager.addHeartbeatEvent(heartbeatEvent)
        # heartbeat is normally 3x cycle time
        self.niceDoggie(self.heartbeatTimeout / 3)

    def postStatisticsImpl(self):
        pass

    def postStatistics(self):
        # save daemon counter stats
        for name, value in self.counters.items():
            self.log.info("Counter %s, value %d", name, value)
            self.rrdStats.counter(name, value)

        # persist counters values
        self.postStatisticsImpl()

    def _pickleName(self):
        instance_id = os.environ.get('CONTROLPLANE_INSTANCE_ID')
        return 'var/%s_%s_counters.pickle' % (self.name, instance_id)

    def remote_getName(self):
        return self.name

    def remote_shutdown(self, unused):
        self.stop()
        self.sigTerm()

    def remote_setPropertyItems(self, items):
        pass

    @translateError
    def remote_updateThresholdClasses(self, classes):
        from Products.ZenUtils.Utils import importClass
        self.log.debug("Loading classes %s", classes)
        for c in classes:
            try:
                importClass(c)
            except ImportError:
                self.log.error("Unable to import class %s", c)

    def _checkZenHub(self):
        """
        Check status of ZenHub (using ping method of service).
        @return: if ping occurs, return deferred with result of ping attempt.
        """
        self.log.debug('_checkZenHub: entry')

        def callback(result):
            self.log.debug('ZenHub health check: Got result %s' % result)
            if result == 'pong':
                self.log.debug(
                    'ZenHub health check: Success - received pong from ZenHub ping service.'
                )
                self._signalZenHubAnswering(True)
            else:
                self.log.error(
                    'ZenHub health check did not respond as expected.')
                self._signalZenHubAnswering(False)

        def errback(error):
            self.log.error('Error pinging ZenHub: %s (%s).' %
                           (error, getattr(error, 'message', '')))
            self._signalZenHubAnswering(False)

        try:
            if self.perspective:
                self.log.debug(
                    'ZenHub health check: perspective found. attempting remote ping call.'
                )
                d = self.perspective.callRemote('ping')
                d.addCallback(callback)
                d.addErrback(errback)
                return d
            else:
                self.log.debug('ZenHub health check: ZenHub may be down.')
                self._signalZenHubAnswering(False)
        except pb.DeadReferenceError:
            self.log.warning(
                "ZenHub health check: DeadReferenceError - lost connection to ZenHub."
            )
            self._signalZenHubAnswering(False)
        except Exception as e:
            self.log.error('ZenHub health check: caught %s exception: %s' %
                           (e.__class__, e.message))
            self._signalZenHubAnswering(False)

    def _signalZenHubAnswering(self, answering):
        """
        Write or remove file that the ZenHub_answering health check uses to report status.
        @param answering: true if ZenHub is answering, False, otherwise.
        """
        self.log.debug('_signalZenHubAnswering(%s)' % answering)
        filename = 'zenhub_connected'
        signalFilePath = zenPath('var', filename)
        if answering:
            self.log.debug('writing file at %s' % signalFilePath)
            atomicWrite(signalFilePath, '')
        else:
            try:
                self.log.debug('removing file at %s' % signalFilePath)
                os.remove(signalFilePath)
            except Exception as e:
                self.log.debug('ignoring %s exception (%s) removing file %s' %
                               (e.__class__, e.message, signalFilePath))

    def buildOptions(self):
        self.parser.add_option('--hubhost',
                               dest='hubhost',
                               default=DEFAULT_HUB_HOST,
                               help='Host of zenhub daemon.'
                               ' Default is %s.' % DEFAULT_HUB_HOST)
        self.parser.add_option('--hubport',
                               dest='hubport',
                               type='int',
                               default=DEFAULT_HUB_PORT,
                               help='Port zenhub listens on.'
                               'Default is %s.' % DEFAULT_HUB_PORT)
        self.parser.add_option('--hubusername',
                               dest='hubusername',
                               default=DEFAULT_HUB_USERNAME,
                               help='Username for zenhub login.'
                               ' Default is %s.' % DEFAULT_HUB_USERNAME)
        self.parser.add_option('--hubpassword',
                               dest='hubpassword',
                               default=DEFAULT_HUB_PASSWORD,
                               help='Password for zenhub login.'
                               ' Default is %s.' % DEFAULT_HUB_PASSWORD)
        self.parser.add_option('--monitor',
                               dest='monitor',
                               default=DEFAULT_HUB_MONITOR,
                               help='Name of monitor instance to use for'
                               ' configuration.  Default is %s.' %
                               DEFAULT_HUB_MONITOR)
        self.parser.add_option('--initialHubTimeout',
                               dest='hubtimeout',
                               type='int',
                               default=30,
                               help='Initial time to wait for a ZenHub '
                               'connection')
        self.parser.add_option('--allowduplicateclears',
                               dest='allowduplicateclears',
                               default=False,
                               action='store_true',
                               help='Send clear events even when the most '
                               'recent event was also a clear event.')

        self.parser.add_option(
            '--duplicateclearinterval',
            dest='duplicateclearinterval',
            default=0,
            type='int',
            help=('Send a clear event every [DUPLICATECLEARINTEVAL] '
                  'events.'))

        self.parser.add_option('--eventflushseconds',
                               dest='eventflushseconds',
                               default=5.,
                               type='float',
                               help='Seconds between attempts to flush '
                               'events to ZenHub.')

        self.parser.add_option('--eventflushchunksize',
                               dest='eventflushchunksize',
                               default=50,
                               type='int',
                               help='Number of events to send to ZenHub'
                               'at one time')

        self.parser.add_option('--maxqueuelen',
                               dest='maxqueuelen',
                               default=5000,
                               type='int',
                               help='Maximum number of events to queue')

        self.parser.add_option(
            '--queuehighwatermark',
            dest='queueHighWaterMark',
            default=0.75,
            type='float',
            help=
            'The size, in percent, of the event queue when event pushback starts'
        )
        self.parser.add_option('--zenhubpinginterval',
                               dest='zhPingInterval',
                               default=120,
                               type='int',
                               help='How often to ping zenhub')

        self.parser.add_option('--disable-event-deduplication',
                               dest='deduplicate_events',
                               default=True,
                               action='store_false',
                               help='Disable event de-duplication')

        self.parser.add_option(
            '--redis-url',
            dest='redisUrl',
            type='string',
            default='redis://localhost:{default}/0'.format(
                default=publisher.defaultRedisPort),
            help=
            'redis connection string: redis://[hostname]:[port]/[db], default: %default'
        )

        self.parser.add_option(
            '--metricBufferSize',
            dest='metricBufferSize',
            type='int',
            default=publisher.defaultMetricBufferSize,
            help='Number of metrics to buffer if redis goes down')
        self.parser.add_option(
            '--metricsChannel',
            dest='metricsChannel',
            type='string',
            default=publisher.defaultMetricsChannel,
            help='redis channel to which metrics are published')
        self.parser.add_option('--maxOutstandingMetrics',
                               dest='maxOutstandingMetrics',
                               type='int',
                               default=publisher.defaultMaxOutstandingMetrics,
                               help='Max Number of metrics to allow in redis')
        self.parser.add_option('--disable-ping-perspective',
                               dest='pingPerspective',
                               help="Enable or disable ping perspective",
                               default=True,
                               action='store_false')
        self.parser.add_option(
            '--writeStatistics',
            dest='writeStatistics',
            type='int',
            default=30,
            help='How often to write internal statistics value in seconds')

        ZenDaemon.buildOptions(self)
Esempio n. 10
0
class ZenActionD(ZCmdBase):

    MSGS_TO_PREFETCH = 1

    def __init__(self):
        super(ZenActionD, self).__init__()
        self._consumer = None
        self._workers = ProcessWorkers(self.options.workers - 1,
                                       exec_worker,
                                       "zenactiond worker")
        self._heartbeatSender = QueueHeartbeatSender('localhost',
                                                 'zenactiond',
                                                 self.options.heartbeatTimeout)

        self._maintenanceCycle = MaintenanceCycle(self.options.maintenancecycle,
                                                  self._heartbeatSender)
        self._callHomeCycler = CallHomeCycler(self.dmd)
        self._schedule = Schedule(self.options, self.dmd)
        self._schedule.sendEvent = self.dmd.ZenEventManager.sendEvent
        self._schedule.monitor = self.options.monitor

    def buildOptions(self):
        super(ZenActionD, self).buildOptions()
        maintenanceBuildOptions(self.parser)
        workersBuildOptions(self.parser, 1)

        default_max_commands = 10
        self.parser.add_option('--maxcommands', dest="maxCommands", type="int", default=default_max_commands,
                               help='Max number of action commands to perform concurrently (default: %d)' % \
                                    default_max_commands)
        default_max_pagingworkers = 1
        self.parser.add_option('--maxpagingworkers', dest="maxPagingWorkers", type="int", default=default_max_pagingworkers,
                               help='max number of paging workers to perform concurrently (default: %d)' % \
                                       default_max_pagingworkers)
        default_pagingworkers_timeout = 30
        self.parser.add_option('--pagingworkerstimeout', dest="pagingWorkersTimeout", type="int", default=default_pagingworkers_timeout,
                               help='Timeout, in seconds, for paging workers (default: %d)' % \
                                       default_pagingworkers_timeout)
        default_url = getDefaultZopeUrl()
        self.parser.add_option('--zopeurl', dest='zopeurl', default=default_url,
                               help="http path to the root of the zope server (default: %s)" % default_url)
        self.parser.add_option("--monitor", dest="monitor",
            default=DEFAULT_MONITOR,
            help="Name of monitor instance to use for heartbeat "
                " events. Default is %s." % DEFAULT_MONITOR)
        self.parser.add_option('--maintenance-window-cycletime',
            dest='maintenceWindowCycletime', default=60, type="int",
            help="How often to check to see if there are any maintenance windows to execute")
        self.parser.add_option('--maintenance-window-batch-size',
            dest='maintenceWindowBatchSize', default=200, type="int",
            help="How many devices update per one transaction on maintenance windows execution")
        self.parser.add_option('--strip-email-body-tags',
            dest='stripEmailBodyTags', default=True, action="store_false",
            help="Strip HTML/XML tags from plaintext email notifications?")

        self.parser.add_option("--workerid", dest='workerid', type='int', default=None,
                               help="ID of the worker instance.")

    def run(self):
        # Configure all actions with the command-line options
        self.abortIfWaiting()
        options_dict = dict(vars(self.options))
        for name, action in getUtilitiesFor(IAction):
            action.configure(options_dict)

        dao = NotificationDao(self.dmd)
        task = ISignalProcessorTask(dao)
        metric_destination = os.environ.get("CONTROLPLANE_CONSUMER_URL", "")
        if metric_destination == "":
            metric_destination = "http://localhost:22350/api/metrics/store"
        username = os.environ.get("CONTROLPLANE_CONSUMER_USERNAME", "")
        password = os.environ.get("CONTROLPLANE_CONSUMER_PASSWORD", "")
        pub = publisher.HttpPostPublisher(username, password, metric_destination)

        log.debug("Creating async MetricReporter")
        daemonTags = {
            'zenoss_daemon': 'zenactiond',
            'internal': True
        }
        self.metricreporter = TwistedMetricReporter(prefix='zenoss.', metricWriter=MetricWriter(pub), tags=daemonTags)
        self.metricreporter.start()
        reactor.addSystemEventTrigger('before', 'shutdown', self.metricreporter.stop)

        if self.options.workerid == 0 and (self.options.daemon or self.options.cycle):
            self._callHomeCycler.start()
            self._schedule.start()  # maintenance windows

        if self.options.daemon or self.options.cycle:
            self._maintenanceCycle.start()  # heartbeats, etc.

        if (self.options.daemon or self.options.cycle) and self.options.workers > 1:
            self._workers.startWorkers()

        self._consumer = QueueConsumer(task, self.dmd)
        reactor.callWhenRunning(self._start)
        reactor.run()

    def abortIfWaiting(self):
        # If this process is doing nothing, abort the transaction to avoid long INNODB history
        try:
            transaction.abort()
        except Exception:
            pass
        reactor.callLater(30.0, self.abortIfWaiting)

    def _start(self):
        log.info('starting zenactiond consumer.')
        reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown)
        self._consumer.run()


    @defer.inlineCallbacks
    def _shutdown(self, *ignored):
        log.info("Shutting down...")
        self._maintenanceCycle.stop()
        self._workers.shutdown()
        if self._consumer:
            yield self._consumer.shutdown()
Esempio n. 11
0
class zenhubworker(ZCmdBase, pb.Referenceable):
    "Execute ZenHub requests in separate process"

    def __init__(self):
        signal.signal(signal.SIGUSR2, signal.SIG_IGN)
        ZCmdBase.__init__(self)
        if self.options.profiling:
            self.profiler = ContinuousProfiler('zenhubworker', log=self.log)
            self.profiler.start()
        self.current = IDLE
        self.currentStart = 0
        self.numCalls = Metrology.meter("zenhub.workerCalls")
        try:
            self.log.debug("establishing SIGUSR1 signal handler")
            signal.signal(signal.SIGUSR1, self.sighandler_USR1)
            self.log.debug("establishing SIGUSR2 signal handler")
            signal.signal(signal.SIGUSR2, self.sighandler_USR2)
        except ValueError:
            # If we get called multiple times, this will generate an exception:
            # ValueError: signal only works in main thread
            # Ignore it as we've already set up the signal handler.
            pass

        self.zem = self.dmd.ZenEventManager
        loadPlugins(self.dmd)
        self.pid = os.getpid()
        self.services = {}
        factory = ReconnectingPBClientFactory(pingPerspective=False)
        self.log.debug("Connecting to %s:%d", self.options.hubhost,
                       self.options.hubport)
        reactor.connectTCP(self.options.hubhost, self.options.hubport, factory)
        self.log.debug("Logging in as %s", self.options.username)
        c = credentials.UsernamePassword(self.options.username,
                                         self.options.password)
        factory.gotPerspective = self.gotPerspective

        def stop(*args):
            reactor.callLater(0, reactor.stop)

        factory.clientConnectionLost = stop
        factory.setCredentials(c)

        self.log.debug("Creating async MetricReporter")
        daemonTags = {
            'zenoss_daemon': 'zenhub_worker_%s' % self.options.workernum,
            'zenoss_monitor': self.options.monitor,
            'internal': True
        }

        def stopReporter():
            if self.metricreporter:
                return self.metricreporter.stop()

        # Order of the shutdown triggers matter. Want to stop reporter first, calling metricWriter() below
        # registers shutdown triggers for the actual metric http and redis publishers.
        reactor.addSystemEventTrigger('before', 'shutdown', stopReporter)
        self.metricreporter = TwistedMetricReporter(
            metricWriter=metricWriter(), tags=daemonTags)
        self.metricreporter.start()

    def audit(self, action):
        """
        zenhubworkers restart all the time, it is not necessary to audit log it.
        """
        pass

    def sighandler_USR1(self, signum, frame):
        try:
            if self.options.profiling:
                self.profiler.dump_stats()
            super(zenhubworker, self).sighandler_USR1(signum, frame)
        except:
            pass

    def sighandler_USR2(self, *args):
        try:
            self.reportStats()
        except:
            pass

    def reportStats(self):
        now = time.time()
        if self.current != IDLE:
            self.log.debug("(%d) Currently performing %s, elapsed %.2f s",
                           self.pid, self.current, now - self.currentStart)
        else:
            self.log.debug("(%d) Currently IDLE", self.pid)
        if self.services:
            loglines = ["(%d) Running statistics:" % self.pid]
            for svc, svcob in sorted(
                    self.services.iteritems(),
                    key=lambda kvp:
                (kvp[0][1], kvp[0][0].rpartition('.')[-1])):
                svc = "%s/%s" % (svc[1], svc[0].rpartition('.')[-1])
                for method, stats in sorted(svcob.callStats.items()):
                    loglines.append(
                        " - %-48s %-32s %8d %12.2f %8.2f %s" %
                        (svc, method, stats.numoccurrences, stats.totaltime,
                         stats.totaltime /
                         stats.numoccurrences if stats.numoccurrences else 0.0,
                         isoDateTime(stats.lasttime)))
            self.log.debug('\n'.join(loglines))
        else:
            self.log.debug("no service activity statistics")

    def gotPerspective(self, perspective):
        """Once we are connected to zenhub, register ourselves"""
        d = perspective.callRemote('reportingForWork', self, pid=self.pid)

        def reportProblem(why):
            self.log.error("Unable to report for work: %s", why)
            reactor.stop()

        d.addErrback(reportProblem)

    def _getService(self, name, instance):
        """Utility method to create the service (like PingConfig)
        for instance (like localhost)

        @type name: string
        @param name: the dotted-name of the module to load
        (uses @L{Products.ZenUtils.Utils.importClass})
        @param instance: string
        @param instance: each service serves only one specific collector instances (like 'localhost').  instance defines the collector's instance name.
        @return: a service loaded from ZenHub/services or one of the zenpacks.
        """
        try:
            return self.services[name, instance]
        except KeyError:
            from Products.ZenUtils.Utils import importClass
            try:
                ctor = importClass(name)
            except ImportError:
                ctor = importClass('Products.ZenHub.services.%s' % name, name)
            svc = ctor(self.dmd, instance)
            self.services[name, instance] = svc

            # dict for tracking statistics on method calls invoked on this service,
            # including number of times called and total elapsed time, keyed
            # by method name
            svc.callStats = defaultdict(_CumulativeWorkerStats)

            return svc

    @translateError
    @defer.inlineCallbacks
    def remote_execute(self, service, instance, method, args):
        """Execute requests on behalf of zenhub
        @type service: string
        @param service: the name of a service, like PingConfig

        @type instance: string
        @param instance: each service serves only one specific collector instances (like 'localhost').  instance defines the collector's instance name.

        @type method: string
        @param method: the name of the called method, like getPingTree

        @type args: tuple
        @param args: arguments to the method

        @type kw: dictionary
        @param kw: keyword arguments to the method
        """
        svcstr = service.rpartition('.')[-1]
        self.current = "%s/%s" % (svcstr, method)
        self.log.debug("Servicing %s in %s", method, service)
        now = time.time()
        self.currentStart = now
        try:
            yield self.async_syncdb()
        except RemoteConflictError, ex:
            pass
        service = self._getService(service, instance)
        m = getattr(service, 'remote_' + method)
        # now that the service is loaded, we can unpack the arguments
        joinedArgs = "".join(args)
        args, kw = pickle.loads(joinedArgs)

        # see if this is our last call
        self.numCalls.mark()
        lastCall = self.numCalls.count >= self.options.calllimit

        def runOnce():
            res = m(*args, **kw)
            if lastCall:
                res = LastCallReturnValue(res)
            pickled_res = pickle.dumps(res, pickle.HIGHEST_PROTOCOL)
            chunkedres = []
            chunkSize = 102400
            while pickled_res:
                chunkedres.append(pickled_res[:chunkSize])
                pickled_res = pickled_res[chunkSize:]
            return chunkedres

        try:
            for i in range(4):
                try:
                    if i > 0:
                        #only sync for retries as it already happened above
                        yield self.async_syncdb()
                    result = runOnce()
                    defer.returnValue(result)
                except RemoteConflictError, ex:
                    pass
            # one last try, but don't hide the exception
            yield self.async_syncdb()
            result = runOnce()
            defer.returnValue(result)
Esempio n. 12
0
class ZenActionD(ZCmdBase):

    MSGS_TO_PREFETCH = 1

    def __init__(self):
        super(ZenActionD, self).__init__()
        self._consumer = None
        self._workers = ProcessWorkers(self.options.workers - 1, exec_worker,
                                       "zenactiond worker")
        self._heartbeatSender = QueueHeartbeatSender(
            'localhost', 'zenactiond', self.options.heartbeatTimeout)

        self._maintenanceCycle = MaintenanceCycle(
            self.options.maintenancecycle, self._heartbeatSender)
        self._callHomeCycler = CallHomeCycler(self.dmd)
        self._schedule = Schedule(self.options, self.dmd)
        self._schedule.sendEvent = self.dmd.ZenEventManager.sendEvent
        self._schedule.monitor = self.options.monitor

    def buildOptions(self):
        super(ZenActionD, self).buildOptions()
        maintenanceBuildOptions(self.parser)
        workersBuildOptions(self.parser, 1)

        default_max_commands = 10
        self.parser.add_option('--maxcommands', dest="maxCommands", type="int", default=default_max_commands,
                               help='Max number of action commands to perform concurrently (default: %d)' % \
                                    default_max_commands)
        default_max_pagingworkers = 1
        self.parser.add_option('--maxpagingworkers', dest="maxPagingWorkers", type="int", default=default_max_pagingworkers,
                               help='max number of paging workers to perform concurrently (default: %d)' % \
                                       default_max_pagingworkers)
        default_pagingworkers_timeout = 30
        self.parser.add_option('--pagingworkerstimeout', dest="pagingWorkersTimeout", type="int", default=default_pagingworkers_timeout,
                               help='Timeout, in seconds, for paging workers (default: %d)' % \
                                       default_pagingworkers_timeout)
        default_url = getDefaultZopeUrl()
        self.parser.add_option(
            '--zopeurl',
            dest='zopeurl',
            default=default_url,
            help="http path to the root of the zope server (default: %s)" %
            default_url)
        self.parser.add_option(
            "--monitor",
            dest="monitor",
            default=DEFAULT_MONITOR,
            help="Name of monitor instance to use for heartbeat "
            " events. Default is %s." % DEFAULT_MONITOR)
        self.parser.add_option(
            '--maintenance-window-cycletime',
            dest='maintenceWindowCycletime',
            default=60,
            type="int",
            help=
            "How often to check to see if there are any maintenance windows to execute"
        )
        self.parser.add_option(
            '--maintenance-window-batch-size',
            dest='maintenceWindowBatchSize',
            default=200,
            type="int",
            help=
            "How many devices update per one transaction on maintenance windows execution"
        )
        self.parser.add_option(
            '--strip-email-body-tags',
            dest='stripEmailBodyTags',
            default=True,
            action="store_false",
            help="Strip HTML/XML tags from plaintext email notifications?")

        self.parser.add_option("--workerid",
                               dest='workerid',
                               type='int',
                               default=None,
                               help="ID of the worker instance.")

    def run(self):
        # Configure all actions with the command-line options
        self.abortIfWaiting()
        options_dict = dict(vars(self.options))
        for name, action in getUtilitiesFor(IAction):
            action.configure(options_dict)

        dao = NotificationDao(self.dmd)
        task = ISignalProcessorTask(dao)
        metric_destination = os.environ.get("CONTROLPLANE_CONSUMER_URL", "")
        if metric_destination == "":
            metric_destination = "http://localhost:22350/api/metrics/store"
        username = os.environ.get("CONTROLPLANE_CONSUMER_USERNAME", "")
        password = os.environ.get("CONTROLPLANE_CONSUMER_PASSWORD", "")
        pub = publisher.HttpPostPublisher(username, password,
                                          metric_destination)

        log.debug("Creating async MetricReporter")
        daemonTags = {'zenoss_daemon': 'zenactiond', 'internal': True}
        self.metricreporter = TwistedMetricReporter(
            prefix='zenoss.', metricWriter=MetricWriter(pub), tags=daemonTags)
        self.metricreporter.start()
        reactor.addSystemEventTrigger('before', 'shutdown',
                                      self.metricreporter.stop)

        if self.options.workerid == 0 and (self.options.daemon
                                           or self.options.cycle):
            self._callHomeCycler.start()
            self._schedule.start()  # maintenance windows

        if self.options.daemon or self.options.cycle:
            self._maintenanceCycle.start()  # heartbeats, etc.

        if (self.options.daemon
                or self.options.cycle) and self.options.workers > 1:
            self._workers.startWorkers()

        self._consumer = QueueConsumer(task, self.dmd)
        reactor.callWhenRunning(self._start)
        reactor.run()

    def abortIfWaiting(self):
        # If this process is doing nothing, abort the transaction to avoid long INNODB history
        try:
            transaction.abort()
        except Exception:
            pass
        reactor.callLater(30.0, self.abortIfWaiting)

    def _start(self):
        log.info('starting zenactiond consumer.')
        reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown)
        self._consumer.run()

    @defer.inlineCallbacks
    def _shutdown(self, *ignored):
        log.info("Shutting down...")
        self._maintenanceCycle.stop()
        self._workers.shutdown()
        if self._consumer:
            yield self._consumer.shutdown()
Esempio n. 13
0
class ZenHub(ZCmdBase):
    """
    Listen for changes to objects in the Zeo database and update the
    collectors' configuration.

    The remote collectors connect the ZenHub and request configuration
    information and stay connected.  When changes are detected in the
    Zeo database, configuration updates are sent out to collectors
    asynchronously.  In this way, changes made in the web GUI can
    affect collection immediately, instead of waiting for a
    configuration cycle.

    Each collector uses a different, pluggable service within ZenHub
    to translate objects into configuration and data.  ZenPacks can
    add services for their collectors.  Collectors communicate using
    Twisted's Perspective Broker, which provides authenticated,
    asynchronous, bidirectional method invocation.

    ZenHub also provides an XmlRPC interface to some common services
    to support collectors written in other languages.

    ZenHub does very little work in its own process, but instead dispatches
    the work to a pool of zenhubworkers, running zenhubworker.py. zenhub
    manages these workers with 3 data structures:
    - workers - a list of remote PB instances
    - worker_processes - a set of WorkerRunningProtocol instances
    - workerprocessmap - a dict mapping pid to process instance created
        by reactor.spawnprocess
    Callbacks and handlers that detect worker shutdown update these
    structures automatically. ONLY ONE HANDLER must take care of restarting
    new workers, to avoid accidentally spawning too many workers. This
    handler also verifies that zenhub is not in the process of shutting
    down, so that callbacks triggered during daemon shutdown don't keep
    starting new workers.

    TODO: document invalidation workers
    """

    totalTime = 0.
    totalEvents = 0
    totalCallTime = 0.
    name = 'zenhub'

    def __init__(self):
        """
        Hook ourselves up to the Zeo database and wait for collectors
        to connect.
        """
        # list of remote worker references
        self.workers = []
        self.workTracker = {}
        # zenhub execution stats:
        # [count, idle_total, running_total, last_called_time]
        self.executionTimer = collections.defaultdict(lambda: [0, 0.0, 0.0, 0])
        self.workList = _ZenHubWorklist()
        # set of worker processes
        self.worker_processes = set()
        # map of worker pids -> worker processes
        self.workerprocessmap = {}
        self.shutdown = False
        self.counters = collections.Counter()
        self._invalidations_paused = False

        wl = self.workList
        metricNames = {x[0] for x in registry}

        class EventWorkList(Gauge):
            @property
            def value(self):
                return len(wl.eventworklist)

        if 'zenhub.eventWorkList' not in metricNames:
            Metrology.gauge('zenhub.eventWorkList', EventWorkList())

        class ADMWorkList(Gauge):
            @property
            def value(self):
                return len(wl.applyworklist)

        if 'zenhub.admWorkList' not in metricNames:
            Metrology.gauge('zenhub.admWorkList', ADMWorkList())

        class OtherWorkList(Gauge):
            @property
            def value(self):
                return len(wl.otherworklist)

        if 'zenhub.otherWorkList' not in metricNames:
            Metrology.gauge('zenhub.otherWorkList', OtherWorkList())

        class WorkListTotal(Gauge):
            @property
            def value(self):
                return len(wl)

        if 'zenhub.workList' not in metricNames:
            Metrology.gauge('zenhub.workList', WorkListTotal())

        ZCmdBase.__init__(self)
        import Products.ZenHub
        load_config("hub.zcml", Products.ZenHub)
        notify(HubWillBeCreatedEvent(self))

        if self.options.profiling:
            self.profiler = ContinuousProfiler('zenhub', log=self.log)
            self.profiler.start()

        # Worker selection handler
        self.workerselector = WorkerSelector(self.options)
        self.workList.log = self.log

        # make sure we don't reserve more than n-1 workers for events
        maxReservedEventsWorkers = 0
        if self.options.workers:
            maxReservedEventsWorkers = self.options.workers - 1
        if self.options.workersReservedForEvents > maxReservedEventsWorkers:
            self.options.workersReservedForEvents = maxReservedEventsWorkers
            self.log.info(
                "reduced number of workers reserved for sending events to %d",
                self.options.workersReservedForEvents)

        self.zem = self.dmd.ZenEventManager
        loadPlugins(self.dmd)
        self.services = {}

        er = HubRealm(self)
        checker = self.loadChecker()
        pt = portal.Portal(er, [checker])
        interface = '::' if ipv6_available() else ''
        pbport = reactor.listenTCP(self.options.pbport,
                                   pb.PBServerFactory(pt),
                                   interface=interface)
        self.setKeepAlive(pbport.socket)

        xmlsvc = AuthXmlRpcService(self.dmd, checker)
        reactor.listenTCP(self.options.xmlrpcport,
                          server.Site(xmlsvc),
                          interface=interface)

        # responsible for sending messages to the queues
        import Products.ZenMessaging.queuemessaging
        load_config_override('twistedpublisher.zcml',
                             Products.ZenMessaging.queuemessaging)
        notify(HubCreatedEvent(self))
        self.sendEvent(eventClass=App_Start,
                       summary="%s started" % self.name,
                       severity=0)

        self._initialize_invalidation_filters()
        reactor.callLater(self.options.invalidation_poll_interval,
                          self.processQueue)

        self._metric_writer = metricWriter()
        self.rrdStats = self.getRRDStats()

        if self.options.workers:
            self.workerconfig = zenPath(
                'var', 'zenhub', '{}_worker.conf'.format(self._getConf().id))
            self._createWorkerConf()
            for i in range(self.options.workers):
                self.createWorker(i)

            # start cyclic call to giveWorkToWorkers
            reactor.callLater(2, self.giveWorkToWorkers, True)

        # set up SIGUSR2 handling
        try:
            signal.signal(signal.SIGUSR2, self.sighandler_USR2)
        except ValueError:
            # If we get called multiple times, this will generate an exception:
            # ValueError: signal only works in main thread
            # Ignore it as we've already set up the signal handler.
            pass
        # ZEN-26671 Wait at least this duration in secs
        # before signaling a worker process
        self.SIGUSR_TIMEOUT = 5

    def setKeepAlive(self, sock):
        import socket
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, OPTION_STATE)
        sock.setsockopt(socket.SOL_TCP, socket.TCP_KEEPIDLE, CONNECT_TIMEOUT)
        interval = max(CONNECT_TIMEOUT / 4, 10)
        sock.setsockopt(socket.SOL_TCP, socket.TCP_KEEPINTVL, interval)
        sock.setsockopt(socket.SOL_TCP, socket.TCP_KEEPCNT, 2)
        self.log.debug("set socket%s  CONNECT_TIMEOUT:%d  TCP_KEEPINTVL:%d",
                       sock.getsockname(), CONNECT_TIMEOUT, interval)

    def sighandler_USR2(self, signum, frame):
        # log zenhub's worker stats
        self._workerStats()

        # send SIGUSR2 signal to all workers
        now = time.time()
        for worker in self.workerprocessmap.values():
            try:
                elapsed_since_spawn = now - worker.spawn_time
                self.log.debug(
                    '%s secs elapsed since this worker proc was spawned',
                    elapsed_since_spawn)
                if elapsed_since_spawn >= self.SIGUSR_TIMEOUT:
                    worker.signalProcess(signal.SIGUSR2)
                time.sleep(0.5)
            except Exception:
                pass

    def sighandler_USR1(self, signum, frame):
        # handle it ourselves
        if self.options.profiling:
            self.profiler.dump_stats()

        super(ZenHub, self).sighandler_USR1(signum, frame)

        # send SIGUSR1 signal to all workers
        for worker in self.workerprocessmap.values():
            try:
                worker.signalProcess(signal.SIGUSR1)
                time.sleep(0.5)
            except Exception:
                pass

    def stop(self):
        self.shutdown = True

    def _getConf(self):
        confProvider = IHubConfProvider(self)
        return confProvider.getHubConf()

    def getRRDStats(self):
        """
        Return the most recent RRD statistic information.
        """
        rrdStats = DaemonStats()
        perfConf = self._getConf()

        from Products.ZenModel.BuiltInDS import BuiltInDS
        threshs = perfConf.getThresholdInstances(BuiltInDS.sourcetype)
        threshold_notifier = ThresholdNotifier(self.zem.sendEvent, threshs)

        derivative_tracker = DerivativeTracker()

        rrdStats.config('zenhub', perfConf.id, self._metric_writer,
                        threshold_notifier, derivative_tracker)

        return rrdStats

    @defer.inlineCallbacks
    def processQueue(self):
        """
        Periodically process database changes

        @return: None
        """
        now = time.time()
        try:
            self.log.debug("[processQueue] syncing....")
            yield self.async_syncdb()  # reads the object invalidations
            self.log.debug("[processQueue] synced")
        except Exception:
            self.log.warn("Unable to poll invalidations, will try again.")
        else:
            try:
                self.doProcessQueue()
            except Exception:
                self.log.exception("Unable to poll invalidations.")
        reactor.callLater(self.options.invalidation_poll_interval,
                          self.processQueue)
        self.totalEvents += 1
        self.totalTime += time.time() - now

    def _initialize_invalidation_filters(self):
        filters = (f for n, f in getUtilitiesFor(IInvalidationFilter))
        self._invalidation_filters = []
        for fltr in sorted(filters, key=lambda f: getattr(f, 'weight', 100)):
            fltr.initialize(self.dmd)
            self._invalidation_filters.append(fltr)
        self.log.debug('Registered %s invalidation filters.',
                       len(self._invalidation_filters))

    def _filter_oids(self, oids):
        app = self.dmd.getPhysicalRoot()
        i = 0
        for oid in oids:
            i += 1
            try:
                obj = app._p_jar[oid]
            except POSKeyError:
                # State is gone from the database. Send it along.
                yield oid
            else:
                if isinstance(obj,
                              (PrimaryPathObjectManager, DeviceComponent)):
                    try:
                        obj = obj.__of__(self.dmd).primaryAq()
                    except (AttributeError, KeyError):
                        # It's a delete. This should go through.
                        yield oid
                    else:
                        included = True
                        for fltr in self._invalidation_filters:
                            result = fltr.include(obj)
                            if result in (FILTER_INCLUDE, FILTER_EXCLUDE):
                                included = (result == FILTER_INCLUDE)
                                break
                        if included:
                            oids = self._transformOid(oid, obj)
                            if oids:
                                for oid in oids:
                                    yield oid

    def _transformOid(self, oid, obj):
        # First, get any subscription adapters registered as transforms
        adapters = subscribers((obj, ), IInvalidationOid)
        # Next check for an old-style (regular adapter) transform
        try:
            adapters = itertools.chain(adapters, (IInvalidationOid(obj), ))
        except TypeError:
            # No old-style adapter is registered
            pass
        transformed = set()
        for adapter in adapters:
            o = adapter.transformOid(oid)
            if isinstance(o, basestring):
                transformed.add(o)
            elif hasattr(o, '__iter__'):
                # If the transform didn't give back a string, it should have
                # given back an iterable
                transformed.update(o)
        # Get rid of any useless Nones
        transformed.discard(None)
        # Get rid of the original oid, if returned. We don't want to use it IF
        # any transformed oid came back.
        transformed.discard(oid)
        return transformed or (oid, )

    def doProcessQueue(self):
        """
        Perform one cycle of update notifications.

        @return: None
        """
        changes_dict = self.storage.poll_invalidations()
        if changes_dict is not None:
            processor = getUtility(IInvalidationProcessor)
            d = processor.processQueue(
                tuple(set(self._filter_oids(changes_dict))))

            def done(n):
                if n == INVALIDATIONS_PAUSED:
                    self.sendEvent({
                        'summary':
                        "Invalidation processing is "
                        "currently paused. To resume, set "
                        "'dmd.pauseHubNotifications = False'",
                        'severity':
                        SEVERITY_CRITICAL,
                        'eventkey':
                        INVALIDATIONS_PAUSED
                    })
                    self._invalidations_paused = True
                else:
                    msg = 'Processed %s oids' % n
                    self.log.debug(msg)
                    if self._invalidations_paused:
                        self.sendEvent({
                            'summary': msg,
                            'severity': SEVERITY_CLEAR,
                            'eventkey': INVALIDATIONS_PAUSED
                        })
                        self._invalidations_paused = False

            d.addCallback(done)

    def sendEvent(self, **kw):
        """
        Useful method for posting events to the EventManager.

        @type kw: keywords (dict)
        @param kw: the values for an event: device, summary, etc.
        @return: None
        """
        if 'device' not in kw:
            kw['device'] = self.options.monitor
        if 'component' not in kw:
            kw['component'] = self.name
        try:
            self.zem.sendEvent(Event(**kw))
        except Exception:
            self.log.exception("Unable to send an event")

    def loadChecker(self):
        """
        Load the password file

        @return: an object satisfying the ICredentialsChecker
        interface using a password file or an empty list if the file
        is not available.  Uses the file specified in the --passwd
        command line option.
        """
        try:
            checker = checkers.FilePasswordDB(self.options.passwordfile)
            # grab credentials for the workers to login
            u, p = checker._loadCredentials().next()
            self.workerUsername, self.workerPassword = u, p
            return checker
        except Exception:
            self.log.exception("Unable to load %s", self.options.passwordfile)
        return []

    def getService(self, name, instance):
        """
        Helper method to load services dynamically for a collector.
        Returned instances are cached: reconnecting collectors will
        get the same service object.

        @type name: string
        @param name: the dotted-name of the module to load
        (uses @L{Products.ZenUtils.Utils.importClass})
        @param instance: string
        @param instance: each service serves only one specific collector
        instances (like 'localhost').  instance defines the collector's
        instance name.
        @return: a service loaded from ZenHub/services or one of the zenpacks.
        """
        # Sanity check the names given to us
        if not self.dmd.Monitors.Performance._getOb(instance, False):
            raise RemoteBadMonitor(
                "The provided performance monitor '%s'" % instance +
                " is not in the current list", None)

        try:
            return self.services[name, instance]

        except KeyError:
            from Products.ZenUtils.Utils import importClass
            try:
                ctor = importClass(name)
            except ImportError:
                ctor = importClass('Products.ZenHub.services.%s' % name, name)
            try:
                svc = ctor(self.dmd, instance)
            except Exception:
                self.log.exception("Failed to initialize %s", ctor)
                # Module can't be used, so unload it.
                if ctor.__module__ in sys.modules:
                    del sys.modules[ctor.__module__]
                return None
            else:
                if self.options.workers:
                    svc = WorkerInterceptor(self, svc)
                self.services[name, instance] = svc
                notify(ServiceAddedEvent(name, instance))
                return svc

    def deferToWorker(self, svcName, instance, method, args):
        """Take a remote request and queue it for worker processes.

        @type svcName: string
        @param svcName: the name of the hub service to call
        @type instance: string
        @param instance: the name of the hub service instance to call
        @type method: string
        @param method: the name of the method on the hub service to call
        @type args: tuple
        @param args: the remaining arguments to the remote_execute()
            method in the worker
        @return: a Deferred for the eventual results of the method call
        """
        d = defer.Deferred()
        service = self.getService(svcName, instance).service
        priority = service.getMethodPriority(method)

        self.workList.append(
            HubWorklistItem(priority, time.time(), d, svcName, instance,
                            method, (svcName, instance, method, args)))

        reactor.callLater(0, self.giveWorkToWorkers)
        return d

    def updateStatusAtStart(self, wId, job):
        now = time.time()
        jobDesc = "%s:%s.%s" % (job.instance, job.servicename, job.method)
        stats = self.workTracker.pop(wId, None)
        idletime = now - stats.lastupdate if stats else 0
        self.executionTimer[job.method][0] += 1
        self.executionTimer[job.method][1] += idletime
        self.executionTimer[job.method][3] = now
        self.log.debug("Giving %s to worker %d, (%s)", job.method, wId,
                       jobDesc)
        self.workTracker[wId] = WorkerStats('Busy', jobDesc, now, idletime)

    def updateStatusAtFinish(self, wId, job, error=None):
        now = time.time()
        self.executionTimer[job.method][3] = now
        stats = self.workTracker.pop(wId, None)
        if stats:
            elapsed = now - stats.lastupdate
            self.executionTimer[job.method][2] += elapsed
            self.log.debug("worker %s, work %s finished in %s", wId,
                           stats.description, elapsed)
        self.workTracker[wId] = WorkerStats(
            'Error: %s' % error if error else 'Idle', stats.description, now,
            0)

    @inlineCallbacks
    def finished(self, job, result, finishedWorker, wId):
        finishedWorker.busy = False
        error = None
        if isinstance(result, Exception):
            job.deferred.errback(result)
        else:
            try:
                result = pickle.loads(''.join(result))
            except Exception as e:
                error = e
                self.log.exception("Error un-pickling result from worker")

            # if zenhubworker is about to shutdown, it will wrap the actual
            # result in a LastCallReturnValue tuple - remove worker from worker
            # list to keep from accidentally sending it any more work while
            # it shuts down
            if isinstance(result, LastCallReturnValue):
                self.log.debug("worker %s is shutting down", wId)
                result = result.returnvalue
                if finishedWorker in self.workers:
                    self.workers.remove(finishedWorker)

            # the job contains a deferred to be used to return the actual value
            job.deferred.callback(result)

        self.updateStatusAtFinish(wId, job, error)
        reactor.callLater(0.1, self.giveWorkToWorkers)
        yield returnValue(result)

    @inlineCallbacks
    def giveWorkToWorkers(self, requeue=False):
        """Parcel out a method invocation to an available worker process
        """
        if self.workList:
            self.log.debug("worklist has %d items", len(self.workList))
        incompleteJobs = []
        while self.workList:
            if all(w.busy for w in self.workers):
                self.log.debug("all workers are busy")
                yield wait(0.1)
                break

            allowADM = self.dmd.getPauseADMLife(
            ) > self.options.modeling_pause_timeout
            job = self.workList.pop(allowADM)
            if job is None:
                self.log.info("Got None from the job worklist.  ApplyDataMaps"
                              " may be paused for zenpack"
                              " install/upgrade/removal.")
                yield wait(0.1)
                break

            candidateWorkers = [
                x for x in self.workerselector.getCandidateWorkerIds(
                    job.method, self.workers)
            ]
            for i in candidateWorkers:
                worker = self.workers[i]
                worker.busy = True
                self.counters['workerItems'] += 1
                self.updateStatusAtStart(i, job)
                try:
                    result = yield worker.callRemote('execute', *job.args)
                except Exception as ex:
                    self.log.warning("Failed to execute job on zenhub worker")
                    result = ex
                finally:
                    yield self.finished(job, result, worker, i)
                break
            else:
                # could not complete this job, put it back in the queue once
                # we're finished saturating the workers
                incompleteJobs.append(job)

        for job in reversed(incompleteJobs):
            # could not complete this job, put it back in the queue
            self.workList.push(job)

        if incompleteJobs:
            self.log.debug("No workers available for %d jobs.",
                           len(incompleteJobs))
            reactor.callLater(0.1, self.giveWorkToWorkers)

        if requeue and not self.shutdown:
            reactor.callLater(5, self.giveWorkToWorkers, True)

    def _workerStats(self):
        now = time.time()
        lines = [
            'Worklist Stats:',
            '\tEvents:\t%s' % len(self.workList.eventworklist),
            '\tOther:\t%s' % len(self.workList.otherworklist),
            '\tApplyDataMaps:\t%s' % len(self.workList.applyworklist),
            '\tTotal:\t%s' % len(self.workList),
            '\nHub Execution Timings: [method, count, idle_total, running_total, last_called_time]'
        ]

        statline = " - %-32s %8d %12.2f %8.2f  %s"
        for method, stats in sorted(self.executionTimer.iteritems(),
                                    key=lambda v: -v[1][2]):
            lines.append(
                statline %
                (method, stats[0], stats[1], stats[2],
                 time.strftime("%Y-%d-%m %H:%M:%S", time.localtime(stats[3]))))

        lines.append('\nWorker Stats:')
        for wId, worker in enumerate(self.workers):
            stat = self.workTracker.get(wId, None)
            linePattern = '\t%d(pid=%s):%s\t[%s%s]\t%.3fs'
            lines.append(
                linePattern %
                (wId, '{}'.format(
                    worker.pid), 'Busy' if worker.busy else 'Idle', '%s %s' %
                 (stat.status, stat.description) if stat else 'No Stats',
                 ' Idle:%.3fs' % stat.previdle if stat and stat.previdle else
                 '', now - stat.lastupdate if stat else 0))
            if stat and ((worker.busy and stat.status is 'Idle') or
                         (not worker.busy and stat.status is 'Busy')):
                self.log.warn(
                    'worker.busy: %s and stat.status: %s do not match!',
                    worker.busy, stat.status)
        self.log.info('\n'.join(lines))

    def _createWorkerConf(self):
        workerconfigdir = os.path.dirname(self.workerconfig)
        if not os.path.exists(workerconfigdir):
            os.makedirs(workerconfigdir)
        with open(self.workerconfig, 'w') as workerfd:
            workerfd.write("hubport %s\n" % self.options.pbport)
            workerfd.write("username %s\n" % self.workerUsername)
            workerfd.write("password %s\n" % self.workerPassword)
            workerfd.write("logseverity %s\n" % self.options.logseverity)
            workerfd.write("zodb-cachesize %s\n" % self.options.zodb_cachesize)
            workerfd.write("calllimit %s\n" % self.options.worker_call_limit)
            workerfd.write("profiling %s\n" % self.options.profiling)
            workerfd.write("monitor %s\n" % self.options.monitor)

    def createWorker(self, workerNum):
        """Start a worker subprocess

        @return: None
        """
        # this probably can't happen, but let's make sure
        if len(self.worker_processes) >= self.options.workers:
            self.log.info("already at maximum number of worker processes,"
                          " no worker will be created")
            return

        # watch for output, and generally just take notice
        class WorkerRunningProtocol(protocol.ProcessProtocol):
            def __init__(self, parent, workerNum):
                self._pid = 0
                self.parent = parent
                self.log = parent.log
                self.workerNum = workerNum

            @property
            def pid(self):
                return self._pid

            def connectionMade(self):
                self._pid = self.transport.pid
                reactor.callLater(1, self.parent.giveWorkToWorkers)

            def outReceived(self, data):
                self.log.debug("Worker %s (%s) reports %s", self.workerNum,
                               self.pid, data.rstrip())

            def errReceived(self, data):
                self.log.info("Worker %s (%s) reports %s", self.workerNum,
                              self.pid, data.rstrip())

            def processEnded(self, reason):
                try:
                    self.parent.worker_processes.discard(self)
                    ended_proc = self.parent.workerprocessmap.pop(
                        self.pid, None)
                    ended_proc_age = time.time() - ended_proc.spawn_time
                    self.log.warning(
                        "Worker %s (%s), age %f secs, exited with status: %s (%s)",
                        self.workerNum, self.pid, ended_proc_age,
                        reason.value.exitCode or -1,
                        getExitMessage(reason.value.exitCode))
                    # if not shutting down, restart a new worker
                    if not self.parent.shutdown:
                        self.log.info("Starting new zenhubworker")
                        self.parent.createWorker(self.workerNum)
                except Exception:
                    self.log.exception('Exception in zenhub worker'
                                       ' processEnded')

        try:
            if NICE_PATH:
                exe = NICE_PATH
                args = (NICE_PATH, "-n",
                        "%+d" % self.options.hubworker_priority,
                        zenPath('bin', 'zenhubworker'), 'run', '--workernum',
                        '%s' % workerNum, '-C', self.workerconfig)
            else:
                exe = zenPath('bin', 'zenhubworker')
                args = (exe, 'run', '-C', self.workerconfig)
            self.log.debug("Starting %s", ' '.join(args))
            prot = WorkerRunningProtocol(self, workerNum)
            proc = reactor.spawnProcess(prot, exe, args, os.environ)
            proc.spawn_time = time.time()
            self.workerprocessmap[proc.pid] = proc
            self.worker_processes.add(prot)
        except Exception:
            self.log.exception('Exception in createWorker')

    def heartbeat(self):
        """
        Since we don't do anything on a regular basis, just
        push heartbeats regularly.

        @return: None
        """
        seconds = 30
        evt = EventHeartbeat(self.options.monitor, self.name,
                             self.options.heartbeatTimeout)
        self.zem.sendEvent(evt)
        self.niceDoggie(seconds)
        reactor.callLater(seconds, self.heartbeat)
        r = self.rrdStats
        totalTime = sum(s.callTime for s in self.services.values())
        r.counter('totalTime', int(self.totalTime * 1000))
        r.counter('totalEvents', self.totalEvents)
        r.gauge('services', len(self.services))
        r.counter('totalCallTime', totalTime)
        r.gauge('workListLength', len(self.workList))

        for name, value in self.counters.items():
            r.counter(name, value)

        try:
            hbcheck = IHubHeartBeatCheck(self)
            hbcheck.check()
        except:
            self.log.exception("Error processing heartbeat hook")

    def check_workers(self):
        try:
            self.log.debug("ZenHub check on workers")
            for _ in range(0,
                           len(self.worker_processes) - self.options.workers):
                self.createWorker()
        except Exception:
            self.log.exception("Failure in check_workers")

    def main(self):
        """
        Start the main event loop.
        """
        if self.options.cycle:
            reactor.callLater(0, self.heartbeat)
            self.log.debug("Creating async MetricReporter")
            daemonTags = {
                'zenoss_daemon': 'zenhub',
                'zenoss_monitor': self.options.monitor,
                'internal': True
            }
            self.metricreporter = TwistedMetricReporter(
                metricWriter=self._metric_writer, tags=daemonTags)
            self.metricreporter.start()
            reactor.addSystemEventTrigger('before', 'shutdown',
                                          self.metricreporter.stop)

        self.check_workers_task = task.LoopingCall(self.check_workers)
        self.check_workers_task.start(CHECK_WORKER_INTERVAL)

        reactor.run()

        self.shutdown = True
        self.log.debug("Killing workers")
        for proc in self.workerprocessmap.itervalues():
            try:
                proc.signalProcess('KILL')
                self.log.debug("Killed worker %s", proc)
            except ProcessExitedAlready:
                pass
            except Exception:
                pass
        workerconfig = getattr(self, 'workerconfig', None)
        if workerconfig and os.path.exists(workerconfig):
            os.unlink(self.workerconfig)
        getUtility(IEventPublisher).close()
        if self.options.profiling:
            self.profiler.stop()

    def buildOptions(self):
        """
        Adds our command line options to ZCmdBase command line options.
        """
        ZCmdBase.buildOptions(self)
        self.parser.add_option(
            '--xmlrpcport',
            '-x',
            dest='xmlrpcport',
            type='int',
            default=XML_RPC_PORT,
            help='Port to use for XML-based Remote Procedure Calls (RPC)')
        self.parser.add_option('--pbport',
                               dest='pbport',
                               type='int',
                               default=PB_PORT,
                               help="Port to use for Twisted's pb service")
        self.parser.add_option('--passwd',
                               dest='passwordfile',
                               type='string',
                               default=zenPath('etc', 'hubpasswd'),
                               help='File where passwords are stored')
        self.parser.add_option(
            '--monitor',
            dest='monitor',
            default='localhost',
            help='Name of the distributed monitor this hub runs on')
        self.parser.add_option(
            '--workers',
            dest='workers',
            type='int',
            default=2,
            help="Number of worker instances to handle requests")
        self.parser.add_option(
            '--hubworker-priority',
            type='int',
            default=5,
            help="Relative process priority for hub workers (%default)")
        self.parser.add_option(
            '--prioritize',
            dest='prioritize',
            action='store_true',
            default=False,
            help="Run higher priority jobs before lower priority ones")
        self.parser.add_option(
            '--anyworker',
            dest='anyworker',
            action='store_true',
            default=False,
            help='Allow any priority job to run on any worker')
        self.parser.add_option(
            '--workers-reserved-for-events',
            dest='workersReservedForEvents',
            type='int',
            default=1,
            help="Number of worker instances to reserve for handling events")
        self.parser.add_option(
            '--worker-call-limit',
            dest='worker_call_limit',
            type='int',
            default=200,
            help=
            "Maximum number of remote calls a worker can run before restarting"
        )
        self.parser.add_option(
            '--invalidation-poll-interval',
            type='int',
            default=30,
            help="Interval at which to poll invalidations (default: %default)")
        self.parser.add_option('--profiling',
                               dest='profiling',
                               action='store_true',
                               default=False,
                               help="Run with profiling on")
        self.parser.add_option(
            '--modeling-pause-timeout',
            type='int',
            default=3600,
            help=
            "Maximum number of seconds to pause modeling during ZenPack install/upgrade/removal (default: %default)"
        )

        notify(ParserReadyForOptionsEvent(self.parser))