def test_classify_single_metric(self): mc = MetricClassifier(goodmetricdefs) for m in okmetrics: self.assertEqual(mc.classify(m[0], m[1]), 'OK') for m in warnmetrics: self.assertEqual(mc.classify(m[0], m[1]), 'WARNING') for m in criticalmetrics: self.assertEqual(mc.classify(m[0], m[1]), 'CRITICAL')
def test_classify_process(self): mc = MetricClassifier(goodmetricdefs) self.assertEqual(mc.classify_metrics(samplemetrics), samplemetricresults) self.assertEqual(mc.worst_metric(samplemetrics), ('d', 3)) self.assertEqual(mc.return_code(samplemetrics), 3) self.assertEqual(mc.return_code(samplemetrics, unknown_as_critical=True), 2)
def test_classify_multiple_metrics(self): mc = MetricClassifier(goodmetricdefs) self.assertEqual(mc.classify_metrics(samplemetrics), samplemetricresults)
def test_create_good_metricclassifier(self): mc = MetricClassifier(goodmetricdefs) self.assertNotEqual(mc, None)
def __init__(self, checks, objs): self.checks = checks self.objs = objs self.mc = MetricClassifier(_metricdefs)
class NTPAlerter(object): def __init__(self, checks, objs): self.checks = checks self.objs = objs self.mc = MetricClassifier(_metricdefs) def collectmetrics(self, debug): """ Get metrics from each registered metric source and add all relevant aliases. """ self.metrics = {} for o in self.objs: self.metrics.update(self.objs[o].getmetrics()) if debug: pprint.pprint(self.metrics) metrics.addaliases(self.metrics, _aliases) if 'proc' in self.checks: self.checks.append('runtime') if 'trace' in self.checks: self.checks.append('tracehosts') self.checks.append('traceloops') if 'vars' in self.checks and 'offset' not in self.checks: self.checks.append('sysoffset') def custom_message(self, metric, result): """ Special cases for message formats """ if metric == 'runtime': return self.custom_message_runtime(result) elif metric == 'sync': return self.custom_message_sync(result) elif metric == 'tracehosts': return self.custom_message_tracehosts(result) elif metric == 'traceloops': return self.custom_message_traceloops(result) return None def custom_message_runtime(self, result): proc = self.objs['proc'] if result == 'CRITICAL': return '%s: No NTP process could be found. Please check that an NTP server is installed and running.' % ( result, ) elif result == 'WARNING': return 'OK: %s has only been running %d seconds' % ( proc.name, proc.getruntime()) elif result == 'OK': return '%s: %s has been running %d seconds' % (result, proc.name, proc.getruntime()) return None def custom_message_sync(self, result): if result == 'CRITICAL': return '%s: No sync peer selected' % (result, ) elif result == 'OK': return '%s: Time is in sync with %s' % ( result, self.objs['peers'].syncpeer()) return None def custom_message_traceloops(self, result): if result == 'CRITICAL': return '%s: Trace loop detected at host %s' % ( result, self.objs['trace'].loophost) elif result == 'OK': return '%s: Trace detected no loops' % (result, ) return None def custom_message_tracehosts(self, result): trace = self.objs['trace'] return '%s: %d hosts detected in trace: %s' % ( result, trace.results['tracehosts'], ", ".join(trace.hostlist)) def alert_collectd(self, hostname, interval): """ Produce collectd output for the metrics """ self.collectmetrics(False) self.mc.classify_metrics(self.metrics) (m, rc) = self.mc.worst_metric(self.checks) self.metrics['result'] = self.return_code() for metric in sorted(_collectdtypes.keys()): if metric in _collectdtypes and metric in self.metrics: print('PUTVAL "%s/ntpmon-%s" interval=%d N:%.9f' % ( hostname, _collectdtypes[metric], interval, self.metrics[metric], )) if sys.stdout.isatty(): # we're outputting to a terminal; must be test mode print('') else: # flush standard output to ensure metrics are sent to collectd immediately sys.stdout.flush() def alert_nagios(self, debug): """ Produce nagios output for the metrics """ self.collectmetrics(debug) results = self.mc.classify_metrics(self.metrics) msgs = {} for metric in self.checks: if metric in results: msgs[metric] = self.custom_message(metric, results[metric]) if msgs[metric] is None: msgs[metric] = self.mc.message(metric, _formats[metric][0], _formats[metric][1]) if debug: for m in msgs: print(msgs[m]) else: (m, rc) = self.mc.worst_metric(self.checks) self.metrics['result'] = self.return_code() print("%s | %s" % (msgs[m], self.report())) def report(self): """ Report metric values. """ items = [] for m in sorted(_aliases.keys()): if m in self.metrics: if _formats[m] is None: fmt = 'f' else: fmt = _formats[m][1] if _formats[m][1] != '%' else 'f' val = self.mc.fmtstr(fmt) % self.metrics[m] items.append("%s=%s" % (m, val)) else: items.append("%s=" % (m, )) return " ".join(items) def return_code(self): """ Don't return anything other than OK until ntpd has been running for at least enough time for 8 polling intervals of 64 seconds each. This prevents false positives due to ntpd restarts or short-lived VMs. """ if 'runtime' in self.mc.results and self.mc.results[ 'runtime'] == 'WARNING': return 0 else: return self.mc.return_code(self.checks)
def __init__(self, checks): self.checks = checks self.mc = MetricClassifier(_metricdefs) self.metrics = {} self.objs = {}
class NTPAlerter(object): def __init__(self, checks, objs): self.checks = checks self.objs = objs self.mc = MetricClassifier(_metricdefs) def collectmetrics(self, debug): """ Get metrics from each registered metric source and add all relevant aliases. """ self.metrics = {} for o in self.objs: self.metrics.update(self.objs[o].getmetrics()) if debug: pprint.pprint(self.metrics) metrics.addaliases(self.metrics, _aliases) if 'proc' in self.checks: self.checks.append('runtime') if 'trace' in self.checks: self.checks.append('tracehosts') self.checks.append('traceloops') if 'vars' in self.checks and 'offset' not in self.checks: self.checks.append('sysoffset') def custom_message(self, metric, result): """ Special cases for message formats """ if metric == 'runtime': return self.custom_message_runtime(result) elif metric == 'sync': return self.custom_message_sync(result) elif metric == 'tracehosts': return self.custom_message_tracehosts(result) elif metric == 'traceloops': return self.custom_message_traceloops(result) return None def custom_message_runtime(self, result): proc = self.objs['proc'] if result == 'CRITICAL': return '%s: No NTP process could be found. Please check that an NTP server is installed and running.' % (result,) elif result == 'WARNING': return 'OK: %s has only been running %d seconds' % (proc.name, proc.getruntime()) elif result == 'OK': return '%s: %s has been running %d seconds' % (result, proc.name, proc.getruntime()) return None def custom_message_sync(self, result): if result == 'CRITICAL': return '%s: No sync peer selected' % (result,) elif result == 'OK': return '%s: Time is in sync with %s' % (result, self.objs['peers'].syncpeer()) return None def custom_message_traceloops(self, result): if result == 'CRITICAL': return '%s: Trace loop detected at host %s' % (result, self.objs['trace'].loophost) elif result == 'OK': return '%s: Trace detected no loops' % (result,) return None def custom_message_tracehosts(self, result): trace = self.objs['trace'] return '%s: %d hosts detected in trace: %s' % ( result, trace.results['tracehosts'], ", ".join(trace.hostlist) ) def alert_collectd(self, hostname, interval): """ Produce collectd output for the metrics """ self.collectmetrics(False) self.mc.classify_metrics(self.metrics) (m, rc) = self.mc.worst_metric(self.checks) self.metrics['result'] = self.return_code() for metric in sorted(_collectdtypes.keys()): if metric in _collectdtypes and metric in self.metrics: print('PUTVAL "%s/ntpmon-%s" interval=%d N:%.9f' % ( hostname, _collectdtypes[metric], interval, self.metrics[metric], )) if sys.stdout.isatty(): # we're outputting to a terminal; must be test mode print('') else: # flush standard output to ensure metrics are sent to collectd immediately sys.stdout.flush() def alert_nagios(self, debug): """ Produce nagios output for the metrics """ self.collectmetrics(debug) results = self.mc.classify_metrics(self.metrics) msgs = {} for metric in self.checks: if metric in results: msgs[metric] = self.custom_message(metric, results[metric]) if msgs[metric] is None: msgs[metric] = self.mc.message(metric, _formats[metric][0], _formats[metric][1]) if debug: for m in msgs: print(msgs[m]) else: (m, rc) = self.mc.worst_metric(self.checks) self.metrics['result'] = self.return_code() print("%s | %s" % (msgs[m], self.report())) def report(self): """ Report metric values. """ items = [] for m in sorted(_aliases.keys()): if m in self.metrics: if _formats[m] is None: fmt = 'f' else: fmt = _formats[m][1] if _formats[m][1] != '%' else 'f' val = self.mc.fmtstr(fmt) % self.metrics[m] items.append("%s=%s" % (m, val)) else: items.append("%s=" % (m,)) return " ".join(items) def return_code(self): """ Don't return anything other than OK until ntpd has been running for at least enough time for 8 polling intervals of 64 seconds each. This prevents false positives due to ntpd restarts or short-lived VMs. """ if 'runtime' in self.mc.results and self.mc.results['runtime'] == 'WARNING': return 0 else: return self.mc.return_code(self.checks)