def check(self): """ Method called from the base class. Iterate over all HTTP status listed in observable config value and check number of occurrences of each by querying statistics of CouchDB. """ for code in self.config.observables: occurrences = self.sample(str(code)) if occurrences is not None: for threshold, level in zip(self.thresholds, self.levels): if occurrences >= threshold: details = dict(HTTPCode = code, occurrences = occurrences, threshold = threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first m = ("%s: checked code:%s current occurrences:%s" % (self.__class__.__name__, code, occurrences)) logging.debug(m)
def check(self): """ First gets number on directory usage. If the usage exceeds soft, resp. critical limits, the alert is sent. """ if not self._dbDirectory: return usage = self.sample(self._dbDirectory) if usage == None: # should be logged above return usageStr = "%s %s" % (usage, self._currSizeUnit) for threshold, level in zip(self.thresholds, self.levels): if usage >= threshold: details = dict(databasedir = self._dbDirectory, usage = usageStr, threshold = threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self._myName a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first m = "%s: measurements results: %s" % (self._myName, usageStr) logging.debug(m)
def sendAlertFunc(level, **args): if sender: alert = Alert(**preAlert) alert.setTimestamp() alert["Level"] = level alert["Details"] = args sender(alert)
def testSetTimestamp(self): a = Alert() self.assertEqual(a["Timestamp"], None) self.assertEqual(a["TimestampDecoded"], None) a.setTimestamp() self.assertTrue(isinstance(a["Timestamp"], float)) tsd = a["TimestampDecoded"] tsdTested = time.strftime(a.TIMESTAMP_FORMAT, time.gmtime(a["Timestamp"])) self.assertEqual(tsd, tsdTested)
def _handleFailedPolling(self, ex): """ Handle (log and send alert) if polling failed. """ trace = traceback.format_exception(*sys.exc_info()) traceString = '\n '.join(trace) errMsg = ("Polling failed in %s, reason: %s" % (self.__class__.__name__, ex)) logging.error("%s\n%s" % (errMsg, traceString)) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = dict(msg = errMsg) a["Level"] = 10 logging.info("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a)
def check(self): """ Checks the output of df command for percentage of disk space usage. The command output pattern: ' Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda2 1953276 382040 1467026 21% / udev 4085528 336 4085192 1% /dev none 4085528 628 4084900 1% /dev/shm ' """ out = self.sample() if out == None: # should be logged above return percs = [] try: # don't do the first line and also the last line is empty (iterate over partitions) for line in out.split('\n')[1:-1]: arr = line.split() if len(arr) < 6: # 6 elements on the partition entry of df output continue percStr, mount = arr[4:6] # see the df output pattern if mount == "/usr/vice/cache": # do not check AFS cache dir continue perc = int(percStr[:-1]) # without the percent sign for threshold, level in zip(self.thresholds, self.levels): if perc >= threshold: details = dict(mountPoint = mount, usage = "%s%%" % perc, threshold = "%s%%" % threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first percs.append(percStr) except (ValueError, IndexError) as ex: logging.error("Could not check available disk space, reason: %s" % ex) m = "%s: measurements results: %s" % (self.__class__.__name__, percs) logging.debug(m)
def _handleFailedPolling(self, ex): """ Handle (log and send alert) if polling failed. """ trace = traceback.format_exception(*sys.exc_info()) traceString = '\n '.join(trace) errMsg = ("Polling failed in %s, reason: %s" % (self.__class__.__name__, ex)) logging.error("%s\n%s" % (errMsg, traceString)) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = dict(msg=errMsg) a["Level"] = 10 logging.info("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a)
def check(self): """ Checks the output of df command for percentage of disk space usage. The command output pattern: ' Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda2 1953276 382040 1467026 21% / udev 4085528 336 4085192 1% /dev none 4085528 628 4084900 1% /dev/shm ' """ out = self.sample() if out == None: # should be logged above return percs = [] try: # don't do the first line and also the last line is empty (iterate over partitions) for line in out.split('\n')[1:-1]: arr = line.split() if len(arr) < 6: # 6 elements on the partition entry of df output continue percStr, mount = arr[4:6] # see the df output pattern perc = int(percStr[:-1]) # without the percent sign for threshold, level in zip(self.thresholds, self.levels): if perc >= threshold: details = dict(mountPoint = mount, usage = "%s%%" % perc, threshold = "%s%%" % threshold) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first percs.append(percStr) except (ValueError, IndexError), ex: logging.error("Could not check available disk space, reason: %s" % ex)
def check(self, pd, measurements): """ Method is used commonly for system properties (e.g. overall CPU) as well as for particular process monitoring. pd - (processDetail) - information about monitored process, may be None if this method is called from system monitoring pollers (e.g. CPU usage). measurements - Measurements class instance. """ v = self.sample(pd) measurements.append(v) avgPerc = None if len(measurements) >= measurements._numOfMeasurements: # evaluate: calculate average value and react avgPerc = round((sum(measurements) / len(measurements)), 2) details = dict(period=self.config.period, numMeasurements=len(measurements), average="%s%%" % avgPerc) if pd: details.update(pd.getDetails()) measurements.clear() for threshold, level in zip(self.thresholds, self.levels): if avgPerc >= threshold: a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ details["threshold"] = "%s%%" % threshold a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first if avgPerc != None: m = ("%s: measurements result: %s%%" % (self.__class__.__name__, avgPerc)) logging.debug(m)
def check(self, pd, measurements): """ Method is used commonly for system properties (e.g. overall CPU) as well as for particular process monitoring. pd - (processDetail) - information about monitored process, may be None if this method is called from system monitoring pollers (e.g. CPU usage). measurements - Measurements class instance. """ v = self.sample(pd) measurements.append(v) avgPerc = None if len(measurements) >= measurements._numOfMeasurements: # evaluate: calculate average value and react avgPerc = round((sum(measurements) / len(measurements)), 2) details = dict(period = self.config.period, numMeasurements = len(measurements), average = "%s%%" % avgPerc) if pd: details.update(pd.getDetails()) measurements.clear() for threshold, level in zip(self.thresholds, self.levels): if avgPerc >= threshold: a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ details["threshold"] = "%s%%" % threshold a["Details"] = details a["Level"] = level logging.debug("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) break # send only one alert, critical threshold tested first if avgPerc != None: m = ("%s: measurements result: %s%%" % (self.__class__.__name__, avgPerc)) logging.debug(m)