def main(): """Start up NagCat, profiling things as requested""" options = parse_options() if options.profile_init or options.profile_run: import cProfile profiler = cProfile.Profile() if options.profile_init: profiler.runcall(init, options) else: init(options) if options.profile_run: profiler.runcall(reactor.run) else: reactor.run() if options.profile_init or options.profile_run: if options.profile_dump: log.info("Dumping profiler data to %s" % options.profile_dump) profiler.dump_stats(options.profile_dump) else: log.info("Generating profiler stats...") import pstats stats = pstats.Stats(profiler) stats.strip_dirs() stats.sort_stats('time', 'cumulative') stats.print_stats(40)
def shutdown(self): """Remove any unused spool files""" if self._data_queue: log.info("Removing unsubmitted results") while self._data_queue: self._cleanup()
def _log_stats(self): """Report the number of tasks""" log.info("Tasks: %s", self._task_stats['count']) log.info("Groups: %s", self._task_stats['Group']['count']) log.info("Tests: %s", self._task_stats['Test']['count']) log.info("Queries: %s", self._task_stats['Query']['count']) for query_type, query_info in self._task_stats['Query'].iteritems(): if query_type == "count": continue log.info("Query %s: %s", query_type, query_info['count'])
def start(self): """Start up the scheduler!""" assert self._startup and not self._shutdown self._startup = False self._shutdown = deferred = defer.Deferred() del self._group_index if not self._registered: self.stop() return deferred if self.monitor: reactor.listenTCP(self._monitor_port, self.monitor) self._log_stats() # Collect runnables that query the same host so that we can # avoid hitting a host with many queries at once host_groups = {} for runnable in self._registered: runnable.finalize() if runnable.host in host_groups: host_groups[runnable.host].append(runnable) else: host_groups[runnable.host] = [runnable] for host_name, host_group in host_groups.iteritems(): log.debug("Scheduling host %s", host_name) # The first runnable in the group will start between now and # the end of the slot time period. Any remaining runnables will # start after the number of seconds in the slot. This should # evenly distribute queries that are sent to the same host. slot = 60.0 / len(host_group) assert slot delay = random.random() * slot for runnable in host_group: self.schedule(runnable, delay) delay += slot # Start latency self-checker self._latency_call = reactor.callLater(1.0, self.latency, time.time()) log.info("Startup complete, running...") return deferred
def __init__(self, config, nagios_cfg, **kwargs): """Read given Nagios config file and load tests""" cfg = nagios_objects.ConfigParser(nagios_cfg, ('object_cache_file', 'status_file', 'command_file', 'check_result_path')) self._nagios_obj = cfg['object_cache_file'] spool = nagios_api.spool_path(cfg['check_result_path'], 'nagcat') self._nagios_cmd = nagios_api.NagiosCommander( cfg['command_file'], spool) self._status_file = cfg['status_file'] self._status_cache = None self._status_mtime = 0 log.info("Using Nagios object cache: %s", self._nagios_obj) log.info("Using Nagios command file: %s", cfg['command_file']) log.info("Using Nagios status file: %s", self._status_file) return super(NagcatNagios, self).__init__(config, **kwargs)
def __init__(self, config, nagios_cfg, **kwargs): """Read given Nagios config file and load tests""" # TODO: The NagcatNagios class needs to be easier to test, # that way we can actually call the __init__ for it in unit tests # and test NagcatMerlin and NagcatNagios more effectively. # More specifically, nagios_cfg currently *has* to be a file, # which makes unit testing extraordinarily difficult. cfg = nagios_objects.ConfigParser(nagios_cfg, ('object_cache_file', 'status_file', 'command_file', 'check_result_path')) self._nagios_obj = cfg['object_cache_file'] spool = nagios_api.spool_path(cfg['check_result_path'], 'nagcat') self._nagios_cmd = nagios_api.NagiosCommander( cfg['command_file'], spool) self._status_file = cfg['status_file'] self._status_cache = None self._status_mtime = 0 log.info("Using Nagios object cache: %s", self._nagios_obj) log.info("Using Nagios command file: %s", cfg['command_file']) log.info("Using Nagios status file: %s", self._status_file) return super(NagcatNagios, self).__init__(config, **kwargs)
def _report(self, report): log.info("REPORT:\n%s" % report["text"])
def do_cmd(force=False): calls['shutdown'] = None calls['timer'] = None log.info("Canceling %s downtimes in Nagios" % len(commands)) self._cmdobj.cmdlist(None, commands, force)
def xmlrpc_scheduleDowntime(self, expr, start, stop, user, comment): """schedule host and service downtimes expr: an expression defining the set to operate on operators: or (the union of two sets) and (the intersection of two sets) identifiers: host:hostname service:servicename hostgroup:groupname servicegroup:groupname Quotes (' or ") must be placed around service names when they contain whitespace. If a name contains any of the characters []?*+^$ it will be treated as a regular expression, otherwise it must be an exact match. start: date/time to start (in seconds since epoch!) stop: date/time to auto-cancel the downtime user: identifier defining who/what sent this request comment: arbitrary comment about the downtime returns a key to use to cancel this downtime early """ try: start = int(start) stop = int(stop) except: raise xmlrpc.Fault(1, "start/stop must be integers") now = int(time.time()) if start < now: start = now if stop < now: raise xmlrpc.Fault(1, "stop must be in the future") key = base64.urlsafe_b64encode(struct.pack('ib', now, random.randint(-127,127))).strip('=') comment += ' key:%s expr:%s' % (key, expr.strip()) tokenizer = self._groupTokenizer(expr+')') group_set = self._groupParser(tokenizer) if not group_set: raise xmlrpc.Fault(1, "expression evaluated to an empty set") commands = set() for item in group_set: if item[0] == 'host': commands.add(('SCHEDULE_HOST_DOWNTIME', item[1], start, stop, 1, 0, 0, user, comment)) elif item[0] == 'service': commands.add(('SCHEDULE_SVC_DOWNTIME', item[1], item[2], start, stop, 1, 0, 0, user, comment)) else: assert 0 log.info("Scheduling %s downtimes in Nagios" % len(commands)) self._cmdobj.cmdlist(now, commands) return key
def _reopen_file(self): """Attempt to reopen the pipe.""" if self._timer: if not self._timer.called: self._timer.cancel() self._timer = None try: self._open_file() except OSError, (errno, errmsg): log.warn("Failed to reopen nagios pipe: %s" % errmsg) self._timer = reactor.callLater(10.0, self._reopen_file) else: log.info("Reopened nagios pipe, resuming writes.") def _open_file(self): """Open a named pipe file for writing.""" self._close_file() self._fd = os.open(self._file, os.O_WRONLY | os.O_NONBLOCK) self.startWriting() def _close_file(self): """Close the named pipe if open""" if self._fd is not None: self.stopWriting() try: os.close(self._fd) except OSError: