def setUp(t): t.tmr_patcher = patch( '{src}.TwistedMetricReporter'.format(**PATH), autospec=True, ) t.TwistedMetricReporter = t.tmr_patcher.start() t.addCleanup(t.tmr_patcher.stop) t.daemon_tags = { 'zenoss_daemon': 'zenhub', 'zenoss_monitor': 'localhost', 'internal': True } t.mm = MetricManager(t.daemon_tags)
def __init__(self, reactor): """Initialize a ZenHubWorker instance.""" ZCmdBase.__init__(self) self.__reactor = reactor if self.options.profiling: self.profiler = ContinuousProfiler('ZenHubWorker', log=self.log) self.profiler.start() reactor.addSystemEventTrigger( 'before', 'shutdown', self.profiler.stop, ) self.instanceId = self.options.workerid self.current = IDLE self.currentStart = 0 self.numCalls = Metrology.meter("zenhub.workerCalls") self.zem = self.dmd.ZenEventManager loadPlugins(self.dmd) serviceFactory = ServiceReferenceFactory(self) self.__registry = HubServiceRegistry(self.dmd, serviceFactory) # Configure/initialize the ZenHub client creds = UsernamePassword( self.options.hubusername, self.options.hubpassword, ) endpointDescriptor = "tcp:{host}:{port}".format( host=self.options.hubhost, port=self.options.hubport, ) endpoint = clientFromString(reactor, endpointDescriptor) self.__client = ZenHubClient(reactor, endpoint, creds, self, 10.0) # Setup Metric Reporting self.log.debug("Creating async MetricReporter") self._metric_manager = MetricManager( daemon_tags={ 'zenoss_daemon': 'zenhub_worker_%s' % self.options.workerid, 'zenoss_monitor': self.options.monitor, 'internal': True, }, )
class ZenHub(ZCmdBase): """ Listen for changes to objects in the Zeo database and update the collectors' configuration. The remote collectors connect the ZenHub and request configuration information and stay connected. When changes are detected in the Zeo database, configuration updates are sent out to collectors asynchronously. In this way, changes made in the web GUI can affect collection immediately, instead of waiting for a configuration cycle. Each collector uses a different, pluggable service within ZenHub to translate objects into configuration and data. ZenPacks can add services for their collectors. Collectors communicate using Twisted's Perspective Broker, which provides authenticated, asynchronous, bidirectional method invocation. ZenHub also provides an XmlRPC interface to some common services to support collectors written in other languages. ZenHub does very little work in its own process, but instead dispatches the work to a pool of zenhubworkers, running zenhubworker.py. zenhub manages these workers with 1 data structure: - workers - a list of remote PB instances TODO: document invalidation workers """ totalTime = 0. totalEvents = 0 totalCallTime = 0. mname = name = 'zenhub' def __init__(self): """ Hook ourselves up to the Zeo database and wait for collectors to connect. """ self.shutdown = False self.counters = collections.Counter() super(ZenHub, self).__init__() load_config("hub.zcml", ZENHUB_MODULE) notify(HubWillBeCreatedEvent(self)) if self.options.profiling: self.profiler = ContinuousProfiler('zenhub', log=self.log) self.profiler.start() self.zem = self.dmd.ZenEventManager # responsible for sending messages to the queues load_config_override('twistedpublisher.zcml', QUEUEMESSAGING_MODULE) notify(HubCreatedEvent(self)) self.sendEvent(eventClass=App_Start, summary="%s started" % self.name, severity=0) # ZenHub Services (and XMLRPC) self._service_manager = HubServiceManager( modeling_pause_timeout=self.options.modeling_pause_timeout, passwordfile=self.options.passwordfile, pbport=self.options.pbport, xmlrpcport=self.options.xmlrpcport, ) # Invalidation Processing self._invalidation_manager = InvalidationManager( self.dmd, self.log, self.async_syncdb, self.storage.poll_invalidations, self.sendEvent, poll_interval=self.options.invalidation_poll_interval, ) # Setup Metric Reporting self._metric_manager = MetricManager( daemon_tags={ 'zenoss_daemon': 'zenhub', 'zenoss_monitor': self.options.monitor, 'internal': True }) self._metric_writer = self._metric_manager.metric_writer self.rrdStats = self._metric_manager.get_rrd_stats( self._getConf(), self.zem.sendEvent ) # set up SIGUSR2 handling try: signal.signal(signal.SIGUSR2, self.sighandler_USR2) except ValueError: # If we get called multiple times, this will generate an exception: # ValueError: signal only works in main thread # Ignore it as we've already set up the signal handler. pass # ZEN-26671 Wait at least this duration in secs # before signaling a worker process self.SIGUSR_TIMEOUT = 5 @property def services(self): return self._service_manager.services def main(self): """ Start the main event loop. """ if self.options.cycle: reactor.callLater(0, self.heartbeat) self.log.debug("Creating async MetricReporter") self._metric_manager.start() reactor.addSystemEventTrigger( 'before', 'shutdown', self._metric_manager.stop ) # preserve legacy API self.metricreporter = self._metric_manager.metricreporter # Start ZenHub services self._service_manager.start(self.dmd, reactor) self._service_manager.onExecute(self.__workerItemCounter) # Start Processing Invalidations self.process_invalidations_task = task.LoopingCall( self._invalidation_manager.process_invalidations ) self.process_invalidations_task.start( self.options.invalidation_poll_interval ) reactor.run() self.shutdown = True getUtility(IEventPublisher).close() if self.options.profiling: self.profiler.stop() def __workerItemCounter(self, job): self.counters["workerItems"] += 1 def sighandler_USR2(self, signum, frame): reactor.callLater(0, self.__dumpStats) @inlineCallbacks def __dumpStats(self): self.log.info("\n%s\n", self._service_manager.getStatusReport()) yield self._service_manager.reportWorkerStatus() def sighandler_USR1(self, signum, frame): if self.options.profiling: self.profiler.dump_stats() super(ZenHub, self).sighandler_USR1(signum, frame) def stop(self): self.shutdown = True def _getConf(self): confProvider = IHubConfProvider(self) return confProvider.getHubConf() def getService(self, service, monitor): return self._service_manager.services.getService(service, monitor) # Legacy API def getRRDStats(self): return self._metric_manager.get_rrd_stats( self._getConf(), self.zem.sendEvent ) # Legacy API @inlineCallbacks def processQueue(self): """ Periodically process database changes @return: None """ yield self._invalidation_manager.process_invalidations() # Legacy API def _initialize_invalidation_filters(self): self._invalidation_filters = self._invalidation_manager\ .initialize_invalidation_filters() def sendEvent(self, **kw): """ Useful method for posting events to the EventManager. @type kw: keywords (dict) @param kw: the values for an event: device, summary, etc. @return: None """ if 'device' not in kw: kw['device'] = self.options.monitor if 'component' not in kw: kw['component'] = self.name try: self.zem.sendEvent(Event(**kw)) except Exception: self.log.exception("Unable to send an event") def heartbeat(self): """ Since we don't do anything on a regular basis, just push heartbeats regularly. @return: None """ seconds = 30 evt = EventHeartbeat( self.options.monitor, self.name, self.options.heartbeatTimeout ) self.zem.sendEvent(evt) self.niceDoggie(seconds) reactor.callLater(seconds, self.heartbeat) r = self.rrdStats totalTime = sum( s.callTime for s in self._service_manager.services.values() ) r.counter( 'totalTime', int(self._invalidation_manager.totalTime * 1000) ) r.counter('totalEvents', self._invalidation_manager.totalEvents) r.gauge('services', len(self._service_manager.services)) r.counter('totalCallTime', totalTime) r.gauge('workListLength', len(self._service_manager.worklist)) for name, value in self.counters.items(): r.counter(name, value) try: hbcheck = IHubHeartBeatCheck(self) hbcheck.check() except Exception: self.log.exception("Error processing heartbeat hook") def buildOptions(self): """ Adds our command line options to ZCmdBase command line options. """ ZCmdBase.buildOptions(self) self.parser.add_option( '--xmlrpcport', '-x', dest='xmlrpcport', type='int', default=XML_RPC_PORT, help='Port to use for XML-based Remote Procedure Calls (RPC)') self.parser.add_option( '--pbport', dest='pbport', type='int', default=PB_PORT, help="Port to use for Twisted's pb service") self.parser.add_option( '--passwd', dest='passwordfile', type='string', default=zenPath('etc', 'hubpasswd'), help='File where passwords are stored') self.parser.add_option( '--monitor', dest='monitor', default='localhost', help='Name of the distributed monitor this hub runs on') self.parser.add_option( '--workers-reserved-for-events', dest='workersReservedForEvents', type='int', default=1, help="Number of worker instances to reserve for handling events") self.parser.add_option( '--invalidation-poll-interval', type='int', default=30, help="Interval at which to poll invalidations (default: %default)") self.parser.add_option( '--profiling', dest='profiling', action='store_true', default=False, help="Run with profiling on") self.parser.add_option( '--modeling-pause-timeout', type='int', default=3600, help='Maximum number of seconds to pause modeling during ZenPack' ' install/upgrade/removal (default: %default)') notify(ParserReadyForOptionsEvent(self.parser))
def __init__(self): """ Hook ourselves up to the Zeo database and wait for collectors to connect. """ self.shutdown = False self.counters = collections.Counter() super(ZenHub, self).__init__() load_config("hub.zcml", ZENHUB_MODULE) notify(HubWillBeCreatedEvent(self)) if self.options.profiling: self.profiler = ContinuousProfiler('zenhub', log=self.log) self.profiler.start() self.zem = self.dmd.ZenEventManager # responsible for sending messages to the queues load_config_override('twistedpublisher.zcml', QUEUEMESSAGING_MODULE) notify(HubCreatedEvent(self)) self.sendEvent(eventClass=App_Start, summary="%s started" % self.name, severity=0) # ZenHub Services (and XMLRPC) self._service_manager = HubServiceManager( modeling_pause_timeout=self.options.modeling_pause_timeout, passwordfile=self.options.passwordfile, pbport=self.options.pbport, xmlrpcport=self.options.xmlrpcport, ) # Invalidation Processing self._invalidation_manager = InvalidationManager( self.dmd, self.log, self.async_syncdb, self.storage.poll_invalidations, self.sendEvent, poll_interval=self.options.invalidation_poll_interval, ) # Setup Metric Reporting self._metric_manager = MetricManager( daemon_tags={ 'zenoss_daemon': 'zenhub', 'zenoss_monitor': self.options.monitor, 'internal': True }) self._metric_writer = self._metric_manager.metric_writer self.rrdStats = self._metric_manager.get_rrd_stats( self._getConf(), self.zem.sendEvent ) # set up SIGUSR2 handling try: signal.signal(signal.SIGUSR2, self.sighandler_USR2) except ValueError: # If we get called multiple times, this will generate an exception: # ValueError: signal only works in main thread # Ignore it as we've already set up the signal handler. pass # ZEN-26671 Wait at least this duration in secs # before signaling a worker process self.SIGUSR_TIMEOUT = 5
def __init__(self, reactor): """Initialize a ZenHubWorker instance.""" ZCmdBase.__init__(self) self.__reactor = reactor if self.options.profiling: self.profiler = ContinuousProfiler('ZenHubWorker', log=self.log) self.profiler.start() reactor.addSystemEventTrigger( 'before', 'shutdown', self.profiler.stop, ) self.current = IDLE self.currentStart = 0 self.numCalls = Metrology.meter("zenhub.workerCalls") self.zem = self.dmd.ZenEventManager loadPlugins(self.dmd) self.__registry = ServiceRegistry() loader = ServiceLoader() factory = ServiceReferenceFactory(self) self.__manager = ServiceManager(self.__registry, loader, factory) # Configure/initialize the ZenHub client creds = UsernamePassword( self.options.hubusername, self.options.hubpassword, ) endpointDescriptor = "tcp:{host}:{port}".format( host=self.options.hubhost, port=self.options.hubport, ) endpoint = clientFromString(reactor, endpointDescriptor) self.__client = ZenHubClient( reactor, endpoint, creds, self, 10.0, self.worklistId, ) # Setup Metric Reporting self.log.debug("Creating async MetricReporter") self._metric_manager = MetricManager(daemon_tags={ 'zenoss_daemon': 'zenhub_worker_%s' % self.instanceId, 'zenoss_monitor': self.options.monitor, 'internal': True, }, ) # Make the metric manager available via zope.component.getUtility getGlobalSiteManager().registerUtility( self._metric_manager, IMetricManager, name='zenhub_worker_metricmanager', )
class ZenHubWorker(ZCmdBase, pb.Referenceable): """Execute ZenHub requests.""" mname = name = "zenhubworker" def __init__(self, reactor): """Initialize a ZenHubWorker instance.""" ZCmdBase.__init__(self) self.__reactor = reactor if self.options.profiling: self.profiler = ContinuousProfiler('ZenHubWorker', log=self.log) self.profiler.start() reactor.addSystemEventTrigger( 'before', 'shutdown', self.profiler.stop, ) self.current = IDLE self.currentStart = 0 self.numCalls = Metrology.meter("zenhub.workerCalls") self.zem = self.dmd.ZenEventManager loadPlugins(self.dmd) self.__registry = ServiceRegistry() loader = ServiceLoader() factory = ServiceReferenceFactory(self) self.__manager = ServiceManager(self.__registry, loader, factory) # Configure/initialize the ZenHub client creds = UsernamePassword( self.options.hubusername, self.options.hubpassword, ) endpointDescriptor = "tcp:{host}:{port}".format( host=self.options.hubhost, port=self.options.hubport, ) endpoint = clientFromString(reactor, endpointDescriptor) self.__client = ZenHubClient( reactor, endpoint, creds, self, 10.0, self.worklistId, ) # Setup Metric Reporting self.log.debug("Creating async MetricReporter") self._metric_manager = MetricManager(daemon_tags={ 'zenoss_daemon': 'zenhub_worker_%s' % self.instanceId, 'zenoss_monitor': self.options.monitor, 'internal': True, }, ) # Make the metric manager available via zope.component.getUtility getGlobalSiteManager().registerUtility( self._metric_manager, IMetricManager, name='zenhub_worker_metricmanager', ) def start(self): """Start zenhubworker processing.""" self.log.debug("establishing SIGUSR1 signal handler") signal.signal(signal.SIGUSR1, self.sighandler_USR1) self.log.debug("establishing SIGUSR2 signal handler") signal.signal(signal.SIGUSR2, self.sighandler_USR2) self.__client.start() self.__reactor.addSystemEventTrigger( 'before', 'shutdown', self.__client.stop, ) self._metric_manager.start() self.__reactor.addSystemEventTrigger( 'before', 'shutdown', self._metric_manager.stop, ) self.__reactor.addSystemEventTrigger( "after", "shutdown", self.reportStats, ) def audit(self, action): """Override default audit behavior. Zenhubworker restarts frequently, so no need to audit. """ pass def parseOptions(self): """Parse options for zenhubworker. Override parseOptions to capture the worklistId argument. """ super(ZenHubWorker, self).parseOptions() if len(self.args) == 0: raise OptParseError("ZenHub worklist name not specified") self.worklistId = self.args[0] self.instanceId = "%s_%s" % (self.worklistId, self.options.workerid) def setupLogging(self): """Configure logging for zenhubworker. Override setupLogging to add instance id/count information to all log messages. """ super(ZenHubWorker, self).setupLogging() template = ("%%(asctime)s %%(levelname)s %%(name)s: (%s) %%(message)s" ) % self.instanceId rootLog = logging.getLogger() formatter = logging.Formatter(template) for handler in rootLog.handlers: handler.setFormatter(formatter) def sighandler_USR1(self, signum, frame): """Handle USR1 signals. When a USR1 signals is caught and profiling is enabled, the zenhubworker's profiler will dump its current statistics before calling the base class's sighandler_USR1 method. """ try: if self.options.profiling: self.profiler.dump_stats() super(ZenHubWorker, self).sighandler_USR1(signum, frame) except Exception: pass def sighandler_USR2(self, *args): """Handle USR2 signals.""" try: self.reportStats() except Exception: pass def _work_started(self, startTime): self.currentStart = startTime self.numCalls.mark() def _work_finished(self, duration, method): self.log.debug("Time in %s: %.2f", method, duration) self.current = IDLE self.currentStart = 0 if self.numCalls.count >= self.options.call_limit: self.log.info( "Call limit of %s reached, " "proceeding to shutdown (and restart)", self.options.call_limit, ) self.__reactor.callLater(0, self._shutdown) def reportStats(self): """Write zenhubworker's current statistics to the log.""" now = time.time() if self.current != IDLE: self.log.info( "Currently performing %s, elapsed %.2f s", self.current, now - self.currentStart, ) else: self.log.info("Currently IDLE") if self.__registry: loglines = ["Running statistics:"] sorted_data = sorted( self.__registry.iteritems(), key=lambda kv: kv[0].rpartition('.')[-1], ) loglines.append(" %-50s %-32s %8s %12s %8s %s" % ( "Service", "Method", "Count", "Total", "Average", "Last Run", )) for svc, svcob in sorted_data: svc = "%s" % svc.rpartition('.')[-1] for method, stats in sorted(svcob.callStats.items()): loglines.append( " - %-48s %-32s %8d %12.2f %8.2f %s" % ( svc, method, stats.numoccurrences, stats.totaltime, stats.totaltime / stats.numoccurrences if stats.numoccurrences else 0.0, isoDateTime(stats.lasttime), ), ) self.log.info('\n'.join(loglines)) else: self.log.info("no service activity statistics") def remote_reportStatus(self): """Write zenhubworker's current statistics to the log. This method is the RPC interface to reportStats. """ try: self.reportStats() except Exception: self.log.exception("Failed to report status") def remote_getService(self, name, monitor): """Return a reference to the named service. @param name {str} Name of the service to load @param monitor {str} Name of the collection monitor """ try: return self.__manager.getService(name, monitor) except RemoteBadMonitor: # Catch and rethrow this Exception derived exception. raise except UnknownServiceError: self.log.error("Service '%s' not found", name) raise except Exception as ex: self.log.exception("Failed to get service '%s'", name) raise pb.Error(str(ex)) def remote_ping(self): """Return "pong". Used by ZenHub to determine whether zenhubworker is still active. """ return "pong" def _shutdown(self): self.log.info("Shutting down") try: self.__reactor.stop() except error.ReactorNotRunning: pass def buildOptions(self): """Add optparse options to the options parser.""" ZCmdBase.buildOptions(self) self.parser.add_option( '--hubhost', dest='hubhost', default='localhost', help="Host to use for connecting to ZenHub", ) self.parser.add_option( '--hubport', dest='hubport', type='int', default=PB_PORT, help="Port to use for connecting to ZenHub", ) self.parser.add_option( '--hubusername', dest='hubusername', default='admin', help="Login name to use when connecting to ZenHub", ) self.parser.add_option( '--hubpassword', dest='hubpassword', default='zenoss', help="password to use when connecting to ZenHub", ) self.parser.add_option( '--call-limit', dest='call_limit', type='int', default=200, help="Maximum number of remote calls before restarting worker", ) self.parser.add_option( '--profiling', dest='profiling', action='store_true', default=False, help="Run with profiling on", ) self.parser.add_option( '--monitor', dest='monitor', default='localhost', help='Name of the performance monitor this hub runs on', ) self.parser.add_option( '--workerid', dest='workerid', type='int', default=0, help=SUPPRESS_HELP, )
class ZenHubWorker(ZCmdBase, pb.Referenceable): """Execute ZenHub requests.""" mname = name = "zenhubworker" def __init__(self, reactor): """Initialize a ZenHubWorker instance.""" ZCmdBase.__init__(self) self.__reactor = reactor if self.options.profiling: self.profiler = ContinuousProfiler('ZenHubWorker', log=self.log) self.profiler.start() reactor.addSystemEventTrigger( 'before', 'shutdown', self.profiler.stop, ) self.instanceId = self.options.workerid self.current = IDLE self.currentStart = 0 self.numCalls = Metrology.meter("zenhub.workerCalls") self.zem = self.dmd.ZenEventManager loadPlugins(self.dmd) serviceFactory = ServiceReferenceFactory(self) self.__registry = HubServiceRegistry(self.dmd, serviceFactory) # Configure/initialize the ZenHub client creds = UsernamePassword( self.options.hubusername, self.options.hubpassword, ) endpointDescriptor = "tcp:{host}:{port}".format( host=self.options.hubhost, port=self.options.hubport, ) endpoint = clientFromString(reactor, endpointDescriptor) self.__client = ZenHubClient(reactor, endpoint, creds, self, 10.0) # Setup Metric Reporting self.log.debug("Creating async MetricReporter") self._metric_manager = MetricManager( daemon_tags={ 'zenoss_daemon': 'zenhub_worker_%s' % self.options.workerid, 'zenoss_monitor': self.options.monitor, 'internal': True, }, ) def start(self): """Start zenhubworker processing.""" self.log.debug("establishing SIGUSR1 signal handler") signal.signal(signal.SIGUSR1, self.sighandler_USR1) self.log.debug("establishing SIGUSR2 signal handler") signal.signal(signal.SIGUSR2, self.sighandler_USR2) self.__client.start() self.__reactor.addSystemEventTrigger( 'before', 'shutdown', self.__client.stop, ) self._metric_manager.start() self.__reactor.addSystemEventTrigger( 'before', 'shutdown', self._metric_manager.stop, ) self.__reactor.addSystemEventTrigger( "after", "shutdown", self.reportStats, ) def audit(self, action): """Override default audit behavior. Zenhubworker restarts frequently, so no need to audit. """ pass def setupLogging(self): """Configure logging for zenhubworker. Override setupLogging to add instance id/count information to all log messages. """ super(ZenHubWorker, self).setupLogging() instanceInfo = "(%s)" % (self.options.workerid,) template = ( "%%(asctime)s %%(levelname)s %%(name)s: %s %%(message)s" ) % instanceInfo rootLog = logging.getLogger() formatter = logging.Formatter(template) for handler in rootLog.handlers: handler.setFormatter(formatter) def sighandler_USR1(self, signum, frame): """Handle USR1 signals. When a USR1 signals is caught and profiling is enabled, the zenhubworker's profiler will dump its current statistics before calling the base class's sighandler_USR1 method. """ try: if self.options.profiling: self.profiler.dump_stats() super(ZenHubWorker, self).sighandler_USR1(signum, frame) except Exception: pass def sighandler_USR2(self, *args): """Handle USR2 signals.""" try: self.reportStats() except Exception: pass def _work_started(self, startTime): self.currentStart = startTime self.numCalls.mark() def _work_finished(self, duration, method): self.log.debug("Time in %s: %.2f", method, duration) self.current = IDLE self.currentStart = 0 if self.numCalls.count >= self.options.call_limit: self.log.info( "Call limit of %s reached, " "proceeding to shutdown (and restart)", self.options.call_limit, ) self.__reactor.callLater(0, self._shutdown) def reportStats(self): """Write zenhubworker's current statistics to the log.""" now = time.time() if self.current != IDLE: self.log.info( "Currently performing %s, elapsed %.2f s", self.current, now - self.currentStart, ) else: self.log.info("Currently IDLE") if self.__registry: loglines = ["Running statistics:"] sorted_data = sorted( self.__registry.iteritems(), key=lambda kvp: (kvp[0][1], kvp[0][0].rpartition('.')[-1]), ) for svc, svcob in sorted_data: svc = "%s/%s" % (svc[1], svc[0].rpartition('.')[-1]) for method, stats in sorted(svcob.callStats.items()): loglines.append( " - %-48s %-32s %8d %12.2f %8.2f %s" % ( svc, method, stats.numoccurrences, stats.totaltime, stats.totaltime / stats.numoccurrences if stats.numoccurrences else 0.0, isoDateTime(stats.lasttime), ), ) self.log.info('\n'.join(loglines)) else: self.log.info("no service activity statistics") def remote_reportStatus(self): """Write zenhubworker's current statistics to the log. This method is the RPC interface to reportStats. """ try: self.reportStats() except Exception: self.log.exception("Failed to report status") def remote_getService(self, name, monitor): """Return a reference to the named service. @param name {str} Name of the service to load @param monitor {str} Name of the collection monitor """ try: return self.__registry.getService(name, monitor) except RemoteBadMonitor: # Catch and rethrow this Exception derived exception. raise except UnknownServiceError: self.log.error("Service '%s' not found", name) raise except Exception as ex: self.log.exception("Failed to get service '%s'", name) raise pb.Error(str(ex)) def _shutdown(self): self.log.info("Shutting down") try: self.__reactor.stop() except error.ReactorNotRunning: pass def buildOptions(self): """Add optparse options to the options parser.""" ZCmdBase.buildOptions(self) self.parser.add_option( '--hubhost', dest='hubhost', default='localhost', help="Host to use for connecting to ZenHub", ) self.parser.add_option( '--hubport', dest='hubport', type='int', default=PB_PORT, help="Port to use for connecting to ZenHub", ) self.parser.add_option( '--hubusername', dest='hubusername', default='admin', help="Login name to use when connecting to ZenHub", ) self.parser.add_option( '--hubpassword', dest='hubpassword', default='zenoss', help="password to use when connecting to ZenHub", ) self.parser.add_option( '--call-limit', dest='call_limit', type='int', default=200, help="Maximum number of remote calls before restarting worker", ) self.parser.add_option( '--profiling', dest='profiling', action='store_true', default=False, help="Run with profiling on", ) self.parser.add_option( '--monitor', dest='monitor', default='localhost', help='Name of the performance monitor this hub runs on', ) self.parser.add_option( '--workerid', dest='workerid', type='int', default=0, help=SUPPRESS_HELP, )
class ZenHub(ZCmdBase): """A server managing access to the Model and Event databases. Listen for changes to objects in the Zeo database and update the collectors' configuration. The remote collectors connect the ZenHub and request configuration information and stay connected. When changes are detected in the Zeo database, configuration updates are sent out to collectors asynchronously. In this way, changes made in the web GUI can affect collection immediately, instead of waiting for a configuration cycle. Each collector uses a different, pluggable service within ZenHub to translate objects into configuration and data. ZenPacks can add services for their collectors. Collectors communicate using Twisted's Perspective Broker, which provides authenticated, asynchronous, bidirectional method invocation. ZenHub also provides an XmlRPC interface to some common services to support collectors written in other languages. ZenHub does very little work in its own process, but instead dispatches the work to a pool of zenhubworkers, running zenhubworker.py. zenhub manages these workers with 1 data structure: - workers - a list of remote PB instances TODO: document invalidation workers """ totalTime = 0. totalEvents = 0 totalCallTime = 0. mname = name = 'zenhub' def __init__(self): self.shutdown = False super(ZenHub, self).__init__() load_config("hub.zcml", ZENHUB_MODULE) notify(HubWillBeCreatedEvent(self)) if self.options.profiling: self.profiler = ContinuousProfiler('zenhub', log=self.log) self.profiler.start() self.zem = self.dmd.ZenEventManager # responsible for sending messages to the queues load_config_override('twistedpublisher.zcml', QUEUEMESSAGING_MODULE) notify(HubCreatedEvent(self)) self.sendEvent(eventClass=App_Start, summary="%s started" % self.name, severity=0) # Initialize ZenHub's RPC servers self._monitor = StatsMonitor() self._status_reporter = ZenHubStatusReporter(self._monitor) self._pools = make_pools() self._service_manager = make_service_manager(self._pools) authenticators = getCredentialCheckers(self.options.passwordfile) self._server_factory = make_server_factory( self._pools, self._service_manager, authenticators, ) self._xmlrpc_manager = XmlRpcManager(self.dmd, authenticators[0]) register_legacy_worklist_metrics() # Invalidation Processing self._invalidation_manager = InvalidationManager( self.dmd, self.log, self.async_syncdb, self.storage.poll_invalidations, self.sendEvent, poll_interval=self.options.invalidation_poll_interval, ) # Setup Metric Reporting self._metric_manager = MetricManager( daemon_tags={ 'zenoss_daemon': 'zenhub', 'zenoss_monitor': self.options.monitor, 'internal': True, }) provideUtility(self._metric_manager) self._metric_writer = self._metric_manager.metric_writer self.rrdStats = self._metric_manager.get_rrd_stats( self._getConf(), self.zem.sendEvent, ) # set up SIGUSR2 handling try: signal.signal(signal.SIGUSR2, self.sighandler_USR2) except ValueError as ex: log.warn("Exception registering USR2 signal handler: %s", ex) # If we get called multiple times, this will generate an exception: # ValueError: signal only works in main thread # Ignore it as we've already set up the signal handler. pass # ZEN-26671 Wait at least this duration in secs # before signaling a worker process self.SIGUSR_TIMEOUT = 5 def main(self): """Start the main event loop.""" if self.options.cycle: reactor.callLater(0, self.heartbeat) self.log.debug("Creating async MetricReporter") self._metric_manager.start() reactor.addSystemEventTrigger( 'before', 'shutdown', self._metric_manager.stop, ) # preserve legacy API self.metricreporter = self._metric_manager.metricreporter # Start ZenHub services server start_server(reactor, self._server_factory) # Start XMLRPC server self._xmlrpc_manager.start(reactor) # Start Processing Invalidations self.process_invalidations_task = task.LoopingCall( self._invalidation_manager.process_invalidations, ) self.process_invalidations_task.start( self.options.invalidation_poll_interval, ) reactor.run() self.shutdown = True getUtility(IEventPublisher).close() if self.options.profiling: self.profiler.stop() @property def counters(self): return self._monitor.counters def sighandler_USR2(self, signum, frame): try: self.log.info("\n%s\n", self._status_reporter.getReport()) notify(ReportWorkerStatus()) except Exception: self.log.exception("Failed to produce report") def sighandler_USR1(self, signum, frame): if self.options.profiling: self.profiler.dump_stats() super(ZenHub, self).sighandler_USR1(signum, frame) def stop(self): self.shutdown = True def _getConf(self): confProvider = IHubConfProvider(self) return confProvider.getHubConf() def getService(self, service, monitor): return self._service_manager.getService(service, monitor) # Legacy API def getRRDStats(self): return self._metric_manager.get_rrd_stats( self._getConf(), self.zem.sendEvent, ) # Legacy API @inlineCallbacks def processQueue(self): """Periodically process database changes.""" yield self._invalidation_manager.process_invalidations() # Legacy API def _initialize_invalidation_filters(self): self._invalidation_filters = self._invalidation_manager\ .initialize_invalidation_filters() def sendEvent(self, **kw): """Post events to the EventManager. @type kw: keywords (dict) @param kw: the values for an event: device, summary, etc. @return: None """ if 'device' not in kw: kw['device'] = self.options.monitor if 'component' not in kw: kw['component'] = self.name try: self.zem.sendEvent(Event(**kw)) except Exception: self.log.exception("Unable to send an event") def heartbeat(self): """Send Heartbeat events. Also used to update legacy metrics/statistics data. """ seconds = 30 evt = EventHeartbeat( self.options.monitor, self.name, self.options.heartbeatTimeout, ) self.zem.sendEvent(evt) self.niceDoggie(seconds) reactor.callLater(seconds, self.heartbeat) r = self.rrdStats r.counter( 'totalTime', int(self._invalidation_manager.totalTime * 1000), ) r.counter('totalEvents', self._invalidation_manager.totalEvents) self._monitor.update_rrd_stats(r, self._service_manager) try: hbcheck = IHubHeartBeatCheck(self) hbcheck.check() except Exception: self.log.exception("Error processing heartbeat hook") def buildOptions(self): """Add ZenHub command-line options.""" ZCmdBase.buildOptions(self) self.parser.add_option( '--xmlrpcport', '-x', dest='xmlrpcport', type='int', default=server_config.defaults.xmlrpcport, help='Port to use for XML-based Remote Procedure Calls (RPC)') self.parser.add_option('--pbport', dest='pbport', type='int', default=server_config.defaults.pbport, help="Port to use for Twisted's pb service") self.parser.add_option('--passwd', dest='passwordfile', type='string', default=zenPath('etc', 'hubpasswd'), help='File where passwords are stored') self.parser.add_option( '--monitor', dest='monitor', default='localhost', help='Name of the distributed monitor this hub runs on') self.parser.add_option( '--workers-reserved-for-events', dest='workersReservedForEvents', type='int', default=1, help="Number of worker instances to reserve for handling events") self.parser.add_option( '--invalidation-poll-interval', type='int', default=30, help="Interval at which to poll invalidations (default: %default)") self.parser.add_option('--profiling', dest='profiling', action='store_true', default=False, help="Run with profiling on") self.parser.add_option( '--modeling-pause-timeout', type='int', default=server_config.defaults.modeling_pause_timeout, help='Maximum number of seconds to pause modeling during ZenPack' ' install/upgrade/removal (default: %default)') notify(ParserReadyForOptionsEvent(self.parser)) def parseOptions(self): # Override parseOptions to initialize and install the # ServiceManager configuration utility. super(ZenHub, self).parseOptions() server_config.modeling_pause_timeout = \ int(self.options.modeling_pause_timeout) server_config.xmlrpcport = int(self.options.xmlrpcport) server_config.pbport = int(self.options.pbport) config_util = server_config.ModuleObjectConfig(server_config) provideUtility(config_util, IHubServerConfig)
def __init__(self): self.shutdown = False super(ZenHub, self).__init__() load_config("hub.zcml", ZENHUB_MODULE) notify(HubWillBeCreatedEvent(self)) if self.options.profiling: self.profiler = ContinuousProfiler('zenhub', log=self.log) self.profiler.start() self.zem = self.dmd.ZenEventManager # responsible for sending messages to the queues load_config_override('twistedpublisher.zcml', QUEUEMESSAGING_MODULE) notify(HubCreatedEvent(self)) self.sendEvent(eventClass=App_Start, summary="%s started" % self.name, severity=0) # Initialize ZenHub's RPC servers self._monitor = StatsMonitor() self._status_reporter = ZenHubStatusReporter(self._monitor) self._pools = make_pools() self._service_manager = make_service_manager(self._pools) authenticators = getCredentialCheckers(self.options.passwordfile) self._server_factory = make_server_factory( self._pools, self._service_manager, authenticators, ) self._xmlrpc_manager = XmlRpcManager(self.dmd, authenticators[0]) register_legacy_worklist_metrics() # Invalidation Processing self._invalidation_manager = InvalidationManager( self.dmd, self.log, self.async_syncdb, self.storage.poll_invalidations, self.sendEvent, poll_interval=self.options.invalidation_poll_interval, ) # Setup Metric Reporting self._metric_manager = MetricManager( daemon_tags={ 'zenoss_daemon': 'zenhub', 'zenoss_monitor': self.options.monitor, 'internal': True, }) provideUtility(self._metric_manager) self._metric_writer = self._metric_manager.metric_writer self.rrdStats = self._metric_manager.get_rrd_stats( self._getConf(), self.zem.sendEvent, ) # set up SIGUSR2 handling try: signal.signal(signal.SIGUSR2, self.sighandler_USR2) except ValueError as ex: log.warn("Exception registering USR2 signal handler: %s", ex) # If we get called multiple times, this will generate an exception: # ValueError: signal only works in main thread # Ignore it as we've already set up the signal handler. pass # ZEN-26671 Wait at least this duration in secs # before signaling a worker process self.SIGUSR_TIMEOUT = 5