def doWrite(self): """Write data out to the pipe.""" while self._data or self._data_queue: if not self._data: self._data = self._data_queue.popleft() log.trace("Writing Nagios command to fifo: %s", self._data) try: data_written = os.write(self._fd, self._data) except OSError, (errno, errmsg): if errno == 11: # EAGAIN, pause writing until next doWrite() return else: log.warn("Failed to write to nagios pipe: %s" % errmsg) self._reopen_file() return if len(self._data) != data_written: self._data = self._data[data_written:] return else: self._data = None
def timeout(self): self.timedout = True if self.transport.pid: try: os.kill(self.transport.pid, signal.SIGTERM) except OSError, ex: log.warn("Failed to send TERM to a subprocess: %s", ex)
def timeout(self): self.timedout = True self.transport.loseConnection() # Kill all processes in the child's process group if self.transport.pid: try: os.kill(-int(self.transport.pid), signal.SIGTERM) except OSError, ex: log.warn("Failed to send TERM to a subprocess: %s", ex)
def latency(self, last): now = time.time() self._latency_call = reactor.callLater(1.0, self.latency, now) latency = now - last - 1.0 self._latency.append(latency) if latency > 5.0: log.error("Callback latency: %s" % latency) elif latency > 1.5: log.warn("Callback latency: %s" % latency)
def _reopen_file(self): """Attempt to reopen the pipe.""" if self._timer: if not self._timer.called: self._timer.cancel() self._timer = None try: self._open_file() except OSError, (errno, errmsg): log.warn("Failed to reopen nagios pipe: %s" % errmsg) self._timer = reactor.callLater(10.0, self._reopen_file)
def _done(self, result): """Save the result, log unhandled errors""" log.debug("Stopping %s", self) log.debug("Result: %s", result) self.result = result self.lastrun = time.time() self.deferred = None if isinstance(result, failure.Failure): if isinstance(result.value, errors.TestError): if result.tb is not None: log.warn("TestError with a traceback in %s:\n%s" % (self, result.getTraceback())) else: log.error("Unhandled error in %s:\n%s" % (self, result.getTraceback()))
def __init__(self, type_, macros, config): assert type_ in ('host', 'service') self.type = type_ self.macros = macros self.config = config self.trend = None # Attempt to generate an rrdtool graph if this is a Nagcat service if (type_ == "service" and self.config['rradir'] and self.macros.get('_SERVICETEST', None)): try: self.trend = graph.Graph(self.config['rradir'], self.macros['HOSTNAME'], self.macros['SERVICEDESC']) except errors.InitError, ex: log.warn("Unable to load RRDTool info for %s/%s: %s" % (self.macros['HOSTNAME'], self.macros['SERVICEDESC'], ex))
for item in os.listdir(self.spool_dir): path = "%s/%s" % (self.spool_dir, item) try: info = os.stat(path) except: continue if info.st_mtime < threshold: try: os.unlink(path) except OSError, ex: log.error("Failed to remove %s: %s" % (path, ex)) else: count += 1 if count: log.warn("Removed %d stale nagios command files" % count) # Schedule the next cleanup to run from a thread in 1 minute reactor.callLater(60, reactor.callInThread, self._cleanup_spool) class NagiosXMLRPC(xmlrpc.XMLRPC): """A XMLRPC Protocol for Nagios""" def __init__(self, nagios_cfg): xmlrpc.XMLRPC.__init__(self) xmlrpc.addIntrospection(self) cfg = nagios_objects.ConfigParser(nagios_cfg, ('object_cache_file', 'command_file', 'status_file', 'check_result_path'))