Ejemplo n.º 1
0
Archivo: nagios.py Proyecto: mv/nagcat
    def _send_report(self, report, host_name, service_description):
        log.debug("Submitting report for %s %s to Nagios",
                host_name, service_description)

        self._nagios_cmd.command(report['time'],
                'PROCESS_SERVICE_CHECK_RESULT', host_name,
                service_description, report['state_id'], report['text'])
Ejemplo n.º 2
0
 def _set_peer_id_and_timestamp(self):
     """ Gets a peer_id and sets a timestamp for when it acquired the peer_id
     The peer_id comes from merlin, and is obtained by reading a database,
     which Merlin outputs data to."""
     try:
         db = MySQLdb.connect(
             user=self._merlin_db_info['merlin_db_user'],
             host=self._merlin_db_info['merlin_db_host'],
             passwd=self._merlin_db_info['merlin_db_pass'],
             db=self._merlin_db_info['merlin_db_name'])
         curs = db.cursor()
         num_rows = curs.execute(
             """select * from merlin_peers where state=3;""")
         self._num_peers = num_rows
         log.debug("Setting self._num_peers = %s", self._num_peers)
         for i in range(num_rows):
             row = curs.fetchone()
             if row[0] == "localhost":
                 self._peer_id = row[5]
                 self._peer_id_timestamp = time.time()
                 log.debug(("Setting self._peer_id = %s",
                     str(self._peer_id)) +
                     ("and self._peer_id_timestamp = %s",
                     self._peer_id_timestamp))
     except MySQLdb.Error, e:
         log.error("Error reading merlin db %d: %s" % (e.args[0], e.args[1]))
Ejemplo n.º 3
0
    def _apply_time_limit(self, state):
        if not self._warning_time_limit or state != "WARNING":
            return state

        status = self._nagcat.nagios_status()

        found = None
        for service in status['service']:
            if (service['service_description'] == self._description
                    and service['host_name'] == self.host):
                found = service
                break

        if not found:
            return state

        if found['last_hard_state'] != '1': # WARNING
            return state

        limit = (int(found['last_hard_state_change']) +
                 self._warning_time_limit)
        if self._now > limit:
            log.debug("Warning time limit of %s exceeded for %s",
                    self._warning_time_limit, self)
            state = "CRITICAL"

        return state
Ejemplo n.º 4
0
def daemonize(pid_file, cwd="/"):
    """Background the current process"""

    log.debug("daemonizing process")

    # BROKEN: the pid file may have already been created by write_pid
    # however, I'm not even using nagcat in daemon mode right now so
    # I'll just leave this commented out for now...
    # Also, this has a major race condition...
    #try:
    #    # A trivial check to see if we are already running
    #    pidfd = open(pid_file)
    #    pid = int(pidfd.readline().strip())
    #    pidfd.close()
    #    os.kill(pid, 0)
    #except (IOError, OSError):
    #    pass # Assume all is well if the test raised errors
    #else:
    #    log.error("PID file exits and process %s is running!" % pid)
    #    sys.exit(1)

    try:
        pidfd = open(pid_file, 'w')
    except IOError, ex:
        log.error("Failed to open PID file %s" % pid_file)
        log.error("Error: %s" % (ex,))
        sys.exit(1)
Ejemplo n.º 5
0
 def start(self):
     """Decides whether or not to start the test, based on _should_run."""
     if self._should_run():
         log.debug("Running test %s", self)
         return super(MerlinTest,self).start()
     else:
         log.debug("Skipping start of %s", self)
         return defer.succeed(None)
Ejemplo n.º 6
0
 def _start_dependencies(self):
     if self.__depends:
         log.debug("Starting dependencies for %s", self)
         deferlist = []
         for dep in self.__depends:
             deferlist.append(dep.start())
         return defer.DeferredList(deferlist)
     else:
         return defer.succeed(None)
Ejemplo n.º 7
0
    def _filter_without_default(self, result):
        log.debug("Fetching cell %s,%s from table", self.row, self.col)

        try:
            sniffer = csv.Sniffer()
            dialect = sniffer.sniff(result)
            reader = csv.reader(StringIO(result), dialect)
            table = list(reader)
        except csv.Error, ex:
            raise errors.TestCritical("Failed to parse table: %s" % ex)
Ejemplo n.º 8
0
    def filter(self, result):
        log.debug("Converting date using format '%s'", self.arguments)

        try:
            return str(time.mktime(time.strptime(result, self.arguments)))
        except ValueError:
            if self.default is not None:
                return self.default
            else:
                raise errors.TestCritical(
                    "Failed to parse date with format '%s'" % self.arguments)
Ejemplo n.º 9
0
 def _should_run(self):
     """Decides whether or not a test should be run, based on its task
     index and the schedulers peer_id. Returns True if it should run, False
     if it should not."""
     peer_id, num_peers = self._nagcat.get_peer_id_num_peers()
     log.debug("Running should_run, test_index=%s, num_peers=%s, peer_id=%s",
         str(self._test_index), num_peers, peer_id)
     if peer_id and num_peers:
         if self._test_index % num_peers != peer_id:
             return False
     return True
Ejemplo n.º 10
0
    def schedule(self, runnable, delay=None):
        """(re)schedule a top level runnable"""
        if delay is None:
            delay = runnable.repeat

        if not delay:
            log.error("Task %s has no repeat value.", runnable)
        else:
            log.debug("Scheduling %s in %s seconds.", runnable, delay)
            deferred = task.deferLater(reactor, delay, runnable.start)
            deferred.addBoth(lambda x: self.schedule(runnable))
Ejemplo n.º 11
0
 def _update_peer_id(self):
     log.debug("Updating peer_id with _merlin_db_info=%s",
         self._merlin_db_info)
     if self._peer_id and self._peer_id_timestamp:
         if time.time() - self._peer_id_timestamp >= 60:
             # peer_id should be refreshed.
             self._set_peer_id_and_timestamp()
         else:
             # peer_id is still valid, return.
             return
     else: # We are missing peer_id or peer_id_timestamp...
         if self._merlin_db_info:
             self._set_peer_id_and_timestamp()
Ejemplo n.º 12
0
    def filter(self, result):
        def format(data):
            if etree.iselement(data):
                ret = etree.tostring(data, pretty_print=True)
            else:
                ret = str(data)
            return ret.strip()

        log.debug("Fetching XML element %s", self.arguments)

        try:
            root = etree.fromstring(result)
        except etree.XMLSyntaxError, ex:
            raise errors.TestCritical("Invalid XML: %s" % ex)
Ejemplo n.º 13
0
    def filter(self, result):
        log.debug("Matching regex '%s'", self.arguments)

        match = self.regex.search(result)
        if match:
            if match.groups():
                return match.group(1)
            else:
                return match.group(0)
        elif self.default is not None:
            return self.default
        else:
            raise errors.TestCritical(
                    "Failed to match regex '%s'" % self.arguments)
Ejemplo n.º 14
0
    def _done(self, result):
        """Save the result, log unhandled errors"""

        log.debug("Stopping %s", self)
        log.debug("Result: %s", result)
        self.result = result
        self.lastrun = time.time()
        self.deferred = None

        if isinstance(result, failure.Failure):
            if isinstance(result.value, errors.TestError):
                if result.tb is not None:
                    log.warn("TestError with a traceback in %s:\n%s" %
                            (self, result.getTraceback()))
            else:
                log.error("Unhandled error in %s:\n%s" %
                        (self, result.getTraceback()))
Ejemplo n.º 15
0
    def start(self):
        """Start up the scheduler!"""
        assert self._startup and not self._shutdown
        self._startup = False
        self._shutdown = deferred = defer.Deferred()
        del self._group_index

        if not self._registered:
            self.stop()
            return deferred

        if self.monitor:
            reactor.listenTCP(self._monitor_port, self.monitor)

        self._log_stats()

        # Collect runnables that query the same host so that we can
        # avoid hitting a host with many queries at once
        host_groups = {}
        for runnable in self._registered:
            runnable.finalize()
            if runnable.host in host_groups:
                host_groups[runnable.host].append(runnable)
            else:
                host_groups[runnable.host] = [runnable]

        for host_name, host_group in host_groups.iteritems():
            log.debug("Scheduling host %s", host_name)
            # The first runnable in the group will start between now and
            # the end of the slot time period. Any remaining runnables will
            # start after the number of seconds in the slot. This should
            # evenly distribute queries that are sent to the same host.
            slot = 60.0 / len(host_group)
            assert slot
            delay = random.random() * slot

            for runnable in host_group:
                self.schedule(runnable, delay)
                delay += slot

        # Start latency self-checker
        self._latency_call = reactor.callLater(1.0, self.latency, time.time())

        log.info("Startup complete, running...")
        return deferred
Ejemplo n.º 16
0
        def maybe_read(key, private=False):
            filetype = self.conf[key+'_type']
            path = self.conf[key]
            if not path:
                return None

            log.debug("Loading %s from %s", key, path)

            try:
                fd = open(path)
                try:
                    data = fd.read()
                finally:
                    fd.close()
            except IOError, ex:
                self.init_errors.append("Failed to read %s file %s: %s" %
                                        (key, path, ex.strerror))
                return None
Ejemplo n.º 17
0
    def _startProcess(self, command):
        command = [str(x) for x in command]
        log.debug("Running process: %s", command)

        proto = SubprocessProtocol()
        proto.factory = self

        # Setup timeout
        call_id = reactor.callLater(self.conf['timeout'], proto.timeout)
        self.deferred.addBoth(self._cancelTimeout, call_id)

        # Setup shutdown cleanup
        call_id = reactor.addSystemEventTrigger('after', 'shutdown',
                proto.timeout)
        self.deferred.addBoth(self._cancelCleanup, call_id)

        process.Process.__init__(self, reactor, command[0], command,
                self.conf['environment'], path=None, proto=proto)
Ejemplo n.º 18
0
    def filter(self, result):
        log.debug("Grepping regex '%s'", self.arguments)

        output = ""
        for line in result.splitlines(True):
            if self.regex.search(line):
                if not self.invert:
                    output += line
            else:
                if self.invert:
                    output += line

        if output:
            return output
        elif self.default is not None:
            return self.default
        else:
            raise errors.TestCritical(
                    "Failed to match regex '%s'" % self.arguments)
Ejemplo n.º 19
0
    def start(self):
        """Start a Runnable object"""

        # Don't start again if we are already running
        if self.deferred is not None:
            return self.deferred

        # Reuse old results if our time isn't up yet
        elif self.lastrun + self.repeat.seconds > time.time():
            log.debug("Skipping start of %s", self)
            return defer.succeed(None)

        else:
            # use deferred instead of self.deferred because
            # __done could have been called already
            self.deferred = deferred = self._start_dependencies()
            deferred.addBoth(lambda x: self._start_self())
            deferred.addBoth(self._done)
            return deferred
Ejemplo n.º 20
0
        def maybe_read(key, private=False):
            # Only support PEM for now
            filetype = crypto.FILETYPE_PEM
            path = self.conf[key]
            filetype = self.conf[key+'_type']
            if not path:
                return None

            log.debug("Loading %s from %s", key, path)

            try:
                fd = open(path)
                try:
                    data = fd.read()
                finally:
                    fd.close()
            except IOError, ex:
                raise errors.InitError("Failed to read %s file %s: %s" %
                                       (path, key, ex.strerror))
Ejemplo n.º 21
0
    def _computeReturn(self):
        if self._compound:
            data = {'NOW': util.MathString(self._now)}
            for name, subtest in self._subtests.iteritems():
                if isinstance(subtest.result, failure.Failure):
                    raise ChildError()
                data[name] = util.MathString(subtest.result)

            log.debug("Evaluating return '%s' with data = %s",
                    self._return, data)

            result = str(eval(self._return, {'data': data}))
        else:
            subtest = self._subtests['query']
            if isinstance(subtest.result, failure.Failure):
                raise ChildError()
            else:
                result = subtest.result

        return result
Ejemplo n.º 22
0
    def new_query(self, conf, qcls=None):
        """Create a new query and register it or return an existing one"""

        # Find the correct Query class for this type
        if not qcls:
            qtype = conf.get('type')
            qcls = plugin.search(IQuery, qtype, None)
            if not qcls:
                raise errors.ConfigError(conf,
                        "Unknown query type '%s'" % qtype)

        qobj = qcls(self._nagcat, conf)
        key = str(qobj)
        if key in self._queries:
            log.debug("Reusing query '%s'", key)
            qobj = self._queries[key]
            qobj.update(conf)
        else:
            log.debug("Adding query '%s'", key)
            self._queries[key] = qobj

        return qobj
Ejemplo n.º 23
0
    def _parse_tests(self, tag):
        """Get the list of NagCat services in the object cache"""

        parser = nagios_objects.ObjectParser(
                self._nagios_obj, ('host', 'service'))
        hosts = {}
        tests = []

        for host in parser['host']:
            hosts[host['host_name']] = host

        for service in parser['service']:
            host = hosts[service['host_name']]
            if "_TEST" not in service:
                continue
            elif tag and service.get("_TAG", host.get('_TAG', None)) != tag:
                continue

            test_defaults = {
                    'host': service['host_name'],
                    'addr': host['address'],
                    'description': service['service_description']}

            test_overrides = {}

            for key in service:
                if len(key) < 2 or key[0] != "_":
                    continue

                # save all vars that start with '_'
                # coil is normally in lower case and Nagios is case insensitive
                test_overrides[key[1:].lower()] = service[key]

            log.debug("Found Nagios service: %s", test_defaults)
            log.debug("Service overrides: %s", test_overrides)
            tests.append((test_defaults, test_overrides))

        return tests
Ejemplo n.º 24
0
 def _start_self(self):
     log.debug("Starting %s", self)
     return task.deferLater(reactor, 0, self._start)