Beispiel #1
0
def main():
    """Start up NagCat, profiling things as requested"""

    options = parse_options()

    if options.profile_init or options.profile_run:
        import cProfile
        profiler = cProfile.Profile()

    if options.profile_init:
        profiler.runcall(init, options)
    else:
        init(options)

    if options.profile_run:
        profiler.runcall(reactor.run)
    else:
        reactor.run()

    if options.profile_init or options.profile_run:
        if options.profile_dump:
            log.info("Dumping profiler data to %s" % options.profile_dump)
            profiler.dump_stats(options.profile_dump)
        else:
            log.info("Generating profiler stats...")
            import pstats
            stats = pstats.Stats(profiler)
            stats.strip_dirs()
            stats.sort_stats('time', 'cumulative')
            stats.print_stats(40)
Beispiel #2
0
    def shutdown(self):
        """Remove any unused spool files"""
        if self._data_queue:
            log.info("Removing unsubmitted results")

            while self._data_queue:
                self._cleanup()
Beispiel #3
0
    def _log_stats(self):
        """Report the number of tasks"""

        log.info("Tasks: %s", self._task_stats['count'])
        log.info("Groups: %s", self._task_stats['Group']['count'])
        log.info("Tests: %s", self._task_stats['Test']['count'])
        log.info("Queries: %s", self._task_stats['Query']['count'])
        for query_type, query_info in self._task_stats['Query'].iteritems():
            if query_type == "count":
                continue
            log.info("Query %s: %s", query_type, query_info['count'])
Beispiel #4
0
    def start(self):
        """Start up the scheduler!"""
        assert self._startup and not self._shutdown
        self._startup = False
        self._shutdown = deferred = defer.Deferred()
        del self._group_index

        if not self._registered:
            self.stop()
            return deferred

        if self.monitor:
            reactor.listenTCP(self._monitor_port, self.monitor)

        self._log_stats()

        # Collect runnables that query the same host so that we can
        # avoid hitting a host with many queries at once
        host_groups = {}
        for runnable in self._registered:
            runnable.finalize()
            if runnable.host in host_groups:
                host_groups[runnable.host].append(runnable)
            else:
                host_groups[runnable.host] = [runnable]

        for host_name, host_group in host_groups.iteritems():
            log.debug("Scheduling host %s", host_name)
            # The first runnable in the group will start between now and
            # the end of the slot time period. Any remaining runnables will
            # start after the number of seconds in the slot. This should
            # evenly distribute queries that are sent to the same host.
            slot = 60.0 / len(host_group)
            assert slot
            delay = random.random() * slot

            for runnable in host_group:
                self.schedule(runnable, delay)
                delay += slot

        # Start latency self-checker
        self._latency_call = reactor.callLater(1.0, self.latency, time.time())

        log.info("Startup complete, running...")
        return deferred
Beispiel #5
0
    def __init__(self, config, nagios_cfg, **kwargs):
        """Read given Nagios config file and load tests"""

        cfg = nagios_objects.ConfigParser(nagios_cfg,
                ('object_cache_file', 'status_file',
                 'command_file', 'check_result_path'))
        self._nagios_obj = cfg['object_cache_file']
        spool = nagios_api.spool_path(cfg['check_result_path'], 'nagcat')
        self._nagios_cmd = nagios_api.NagiosCommander(
                cfg['command_file'], spool)

        self._status_file = cfg['status_file']
        self._status_cache = None
        self._status_mtime = 0

        log.info("Using Nagios object cache: %s", self._nagios_obj)
        log.info("Using Nagios command file: %s", cfg['command_file'])
        log.info("Using Nagios status file: %s", self._status_file)
        return super(NagcatNagios, self).__init__(config, **kwargs)
Beispiel #6
0
    def __init__(self, config, nagios_cfg, **kwargs):
        """Read given Nagios config file and load tests"""

        # TODO: The NagcatNagios class needs to be easier to test,
        # that way we can actually call the __init__ for it in unit tests
        # and test NagcatMerlin and NagcatNagios more effectively.
        # More specifically, nagios_cfg currently *has* to be a file,
        # which makes unit testing extraordinarily difficult.
        cfg = nagios_objects.ConfigParser(nagios_cfg,
                ('object_cache_file', 'status_file',
                 'command_file', 'check_result_path'))
        self._nagios_obj = cfg['object_cache_file']
        spool = nagios_api.spool_path(cfg['check_result_path'], 'nagcat')
        self._nagios_cmd = nagios_api.NagiosCommander(
                cfg['command_file'], spool)

        self._status_file = cfg['status_file']
        self._status_cache = None
        self._status_mtime = 0

        log.info("Using Nagios object cache: %s", self._nagios_obj)
        log.info("Using Nagios command file: %s", cfg['command_file'])
        log.info("Using Nagios status file: %s", self._status_file)
        return super(NagcatNagios, self).__init__(config, **kwargs)
Beispiel #7
0
 def _report(self, report):
     log.info("REPORT:\n%s" % report["text"])
Beispiel #8
0
 def do_cmd(force=False):
     calls['shutdown'] = None
     calls['timer'] = None
     log.info("Canceling %s downtimes in Nagios" % len(commands))
     self._cmdobj.cmdlist(None, commands, force)
Beispiel #9
0
    def xmlrpc_scheduleDowntime(self, expr, start, stop, user, comment):
        """schedule host and service downtimes

        expr: an expression defining the set to operate on
            operators:
                or (the union of two sets)
                and (the intersection of two sets)
            identifiers:
                host:hostname
                service:servicename
                hostgroup:groupname
                servicegroup:groupname

            Quotes (' or ") must be placed around service names
            when they contain whitespace.

            If a name contains any of the characters []?*+^$ it
            will be treated as a regular expression, otherwise it
            must be an exact match.

        start: date/time to start (in seconds since epoch!)
        stop: date/time to auto-cancel the downtime
        user: identifier defining who/what sent this request
        comment: arbitrary comment about the downtime

        returns a key to use to cancel this downtime early
        """
        try:
            start = int(start)
            stop = int(stop)
        except:
            raise xmlrpc.Fault(1, "start/stop must be integers")

        now = int(time.time())

        if start < now:
            start = now
        if stop < now:
            raise xmlrpc.Fault(1, "stop must be in the future")

        key = base64.urlsafe_b64encode(struct.pack('ib',
                now, random.randint(-127,127))).strip('=')
        comment += ' key:%s expr:%s' % (key, expr.strip())

        tokenizer = self._groupTokenizer(expr+')')
        group_set = self._groupParser(tokenizer)

        if not group_set:
            raise xmlrpc.Fault(1, "expression evaluated to an empty set")

        commands = set()
        for item in group_set:
            if item[0] == 'host':
                commands.add(('SCHEDULE_HOST_DOWNTIME', item[1],
                        start, stop, 1, 0, 0, user, comment))
            elif item[0] == 'service':
                commands.add(('SCHEDULE_SVC_DOWNTIME', item[1], item[2],
                        start, stop, 1, 0, 0, user, comment))
            else:
                assert 0

        log.info("Scheduling %s downtimes in Nagios" % len(commands))
        self._cmdobj.cmdlist(now, commands)

        return key
Beispiel #10
0
    def _reopen_file(self):
        """Attempt to reopen the pipe."""

        if self._timer:
            if not self._timer.called:
                self._timer.cancel()
            self._timer = None

        try:
            self._open_file()
        except OSError, (errno, errmsg):
            log.warn("Failed to reopen nagios pipe: %s" % errmsg)
            self._timer = reactor.callLater(10.0, self._reopen_file)
        else:
            log.info("Reopened nagios pipe, resuming writes.")

    def _open_file(self):
        """Open a named pipe file for writing."""
        self._close_file()
        self._fd = os.open(self._file, os.O_WRONLY | os.O_NONBLOCK)
        self.startWriting()

    def _close_file(self):
        """Close the named pipe if open"""

        if self._fd is not None:
            self.stopWriting()
            try:
                os.close(self._fd)
            except OSError: