def probe(self): _log.debug('probe: %r', self.path) with open(self.path, 'w') as f: f.write('asdf') f.flush() os.fsync(f) return nagiosplugin.Metric(self.path, True, context='null')
def probe(self): oid = '1.3.6.1.4.1.674.10892.1.700.20.1.6.1.1' alert_temp = float(self.argoss_snmp.fetch_oid(oid)) yield nagiosplugin.Metric('alert_temp', round(alert_temp / 10.0, 1), None, context='alert_temp')
def probe(self): logging.info('getting stats from cloudwatch') cw = self._connect() datapoint1_start_time = ( datetime.utcnow() - timedelta(seconds=self.period) - timedelta(seconds=self.lag)) - timedelta(seconds=self.delta) datapoint1_end_time = datetime.utcnow() - timedelta(seconds=self.delta) datapoint1_stats = cw.get_metric_statistics( self.period, datapoint1_start_time, datapoint1_end_time, self.metric, self.namespace, self.statistic, self.dimensions) datapoint2_start_time = datetime.utcnow() - timedelta( seconds=self.period) - timedelta(seconds=self.lag) datapoint2_end_time = datetime.utcnow() datapoint2_stats = cw.get_metric_statistics( self.period, datapoint2_start_time, datapoint2_end_time, self.metric, self.namespace, self.statistic, self.dimensions) if len(datapoint1_stats) == 0 or len(datapoint2_stats) == 0: return [] datapoint1_stat = datapoint1_stats[0] datapoint2_stat = datapoint2_stats[0] num_delta = datapoint2_stat[self.statistic] - datapoint1_stat[ self.statistic] per_delta = (100 / datapoint2_stat[self.statistic]) * num_delta return [nagiosplugin.Metric('cloudwatchmetric', per_delta, '%')]
def probe(self): """ Querys the REST-API and create certificate metrics. :return: a certificate metric. """ _log.info('Reading XML from: %s', self.xml_obj.build_request_url()) soup = self.xml_obj.read() certificates = soup.find_all('entry') for certificate in certificates: not_valid_after = Finder.find_item(certificate, 'not-valid-after').replace( "GMT", "").strip() date_object = datetime.strptime(not_valid_after, '%b %d %H:%M:%S %Y') difference = date_object - get_now() _log.debug('Certificate %s difference: %s days' % ( certificate.get('name'), difference.days)) try: status = Finder.find_item(certificate, 'status') except np.CheckError: status = "" if certificate.get('name') not in self.exclude: if status != "revoked": yield np.Metric(certificate.get('name'), difference.days, context='certificates')
def probe(self): r = None proxies = {'http': '', 'https': ''} try: r = requests.get(self.url, proxies=proxies, timeout=2) except requests.exceptions.ConnectionError: print("Connection refused") if r: """soup = BeautifulSoup(r.content, "lxml") parsed = soup.find(re.compile('int|real')) try: val = float(parsed['val']) except (KeyError, TypeError): raise nagiosplugin.CheckError( 'Value not available')""" parsed = xmltodict.parse(r.content) try: val = float(parsed[list(parsed.keys())[0]]['@val']) except (KeyError, TypeError): raise nagiosplugin.CheckError('Value is not available') yield nagiosplugin.Metric(self.metric, val, context='alert_dogate_' + self.metric)
def probe(self): log.debug("Graylog2Throughput.probe started") try: # /system/throughput sometimes returns 0 (which is normal), # retry max_retry times before returns 0 for _ in xrange(self._max_retry): log.debug("try #%d", _) r = requests.get( self._api_url, auth=(self._username, self._password)) log.debug("response: %s", r.content) throughput = r.json()['throughput'] log.debug("throughput : %s", str(throughput)) if throughput != 0: break time.sleep(1) # take a break before retry except requests.ConnectionError: log.warn("Could not conect to server: %s", self._api_url) raise nagiosplugin.CheckError( 'Could not connect to graylog2 server: {}'.format( self._api_url)) except ValueError: log.warn("Could not parse response") raise nagiosplugin.CheckError( 'Invalid response from graylog2 server: {}'.format( self._api_url)) log.debug("Graylog2Throughput finished") log.debug("returning %d", int(throughput)) return [ nagiosplugin.Metric('throughput', int(throughput), min=0) ]
def probe(self): master_uri = self.baseuri log.debug('Looking at %s for redirect', master_uri) try: response = requests.head(master_uri + '/master/redirect', timeout=5) if response.status_code != 307: yield nagiosplugin.Metric('leader redirect', UNHEALTHY) master_uri = response.headers['Location'] log.info('Redirect response is %s', response) # yield the leader redirect later, the summary takes the first check which we want to be 'master health' except requests.exceptions.RequestException, e: log.error('leader redirect %s', e) yield nagiosplugin.Metric('leader redirect', UNHEALTHY) return
def probe(self): """Fetch values :rtype: nagiosplugin.Metric :raise CheckHWGroupError: various reasons, see error messages below """ yield nagiosplugin.Metric(*self._probe(), context='check_hwgroup')
def probe(self): fainfo = self.get_alerts() if not fainfo: return [] # Increment each counter for each type of event for alert in fainfo: if alert['current_severity'] == 'critical': self.crit += 1 elif alert['current_severity'] == 'warning': self.warn += 1 elif alert['current_severity'] == 'info': self.info += 1 return [nagiosplugin.Metric('critical', self.crit, min=0), nagiosplugin.Metric('warning', self.warn, min=0), nagiosplugin.Metric('info', self.info, min=0)]
def probe(self): try: conn = docker.Client(base_url=self.url, timeout=20) docker_info = conn.info() self.running = 0 except: self.running = 1 yield nagiosplugin.Metric('service', self.running) if self.running == 0: for k,v in docker_info.items(): # Only pick numbers that we can generate metrics from # Nagios is not a config management system. if isinstance(v, (int, float, complex)): yield nagiosplugin.Metric(k, v)
def probe(self): oid = '1.3.6.1.4.1.25506.2.6.1.1.1.1.8.3' response = int(self.argoss_snmp.fetch_oid(oid)) yield nagiosplugin.Metric('alert_mem_percent', response, None, context='alert_mem_percent')
def probe(self): oid = '1.3.6.1.4.1.1872.2.5.1.2.12.2.0' response = int(self.argoss_snmp.fetch_oid(oid)) yield nagiosplugin.Metric('alert_throughput', response, None, context='alert_throughput')
def probe(self): dividend = self.dividend_metric.probe()[0] divisor = self.divisor_metric.probe()[0] ratio_unit = '%s / %s' % ( dividend.uom, divisor.uom) return [nagiosplugin.Metric('cloudwatchmetric', dividend.value / divisor.value, ratio_unit)]
def probe(self): self.setup_interfaces() for iface, context in self.ifaces.items(): if not self.exists(iface): continue speed, duplex = self.query(iface) yield nagiosplugin.Metric( "{}_spd".format(iface), speed, "Mb/s", min=0, context="{}_spd".format(context), ) yield nagiosplugin.Metric("{}_dup".format(iface), duplex, context="{}_dup".format(context))
def probe(self): _log.debug("probe: %r", self.path) with open(self.path, "w") as f: f.write("asdf") f.flush() os.fsync(f) return nagiosplugin.Metric(self.path, True, context="null")
def probe(self): _log.debug('Getting volume information from the cluster') try: resp_json = self.sx.listVolumes.json_call() except sxclient.exceptions.SXClusterNonFatalError as exc: self._raise_connection_error(exc) usages = dict() if self.check_all_volumes: volumes = resp_json['volumeList'].keys() else: volumes = self.volumes _log.info('Number of checked volumes: %i' % len(volumes)) _log.debug('These volumes will be checked: ' + ', '.join(repr(vol) for vol in volumes)) for volume in volumes: try: usages[volume] = self.calculate_usage(volume, resp_json) except KeyError: raise LookupError("No such volume: '%s'" % volume) for volume, usage in usages.iteritems(): label = '%r usage' % volume label = label.replace("'", '"') yield nagiosplugin.Metric(label, usage, uom='%', context='usage')
def probe(self): fainfo = self.get_status() status = fainfo.get('status') name = fainfo.get('name') if (status == 'not_installed') or (name != self.component): return [] if (status == 'ok'): metric = nagiosplugin.Metric(self.component + ' status', 0, context='default') else: metric = nagiosplugin.Metric(self.component + ' status', 1, context='default') return metric
def probe(self): oid_status = '1.3.6.1.4.1.12356.101.12.2.2.1.20.' + self.tunnel status = int(self.argoss_snmp.fetch_oid(oid_status)) yield nagiosplugin.Metric('state_tunnel', status, context='state_tunnel')
def probe(self): misconfigured = sorted(set(self.left) ^ set(self.right)) yield nagiosplugin.Metric("misconfigured", len(misconfigured)) if misconfigured: logger.warning("Misconfigured: %s", ", ".join(misconfigured)) inconsistent = [] for interface in set(self.left) & set(self.right): if self.left[interface] == self.right[interface]: status = self.left[interface] inconsistent.append("{} (both {})".format(interface, status)) inconsistent.sort() yield nagiosplugin.Metric("inconsistent", len(inconsistent)) if inconsistent: logger.warning("Inconsistent state: %s", ", ".join(inconsistent))
def probe(self): """ Method to get the status :return: metric(str): nagios status. """ failed_runs = self.failed_backup_runs() if len(failed_runs[0]) + len(failed_runs[1]) == 0: _log.info( "Cluster ip = {}: ".format(self.args.cluster_vip) + "In the past " + str(self.args.days) + " days, there are no backup/copy run failures") else: _log.info( "Cluster ip = {}: ".format(self.args.cluster_vip) + "In the past " + str(self.args.days) + " days, there are " + str(len(failed_runs[0])) + " backup run failures and " + str(len(failed_runs[1])) + " copy run failures") for backup_run in failed_runs[0][0:5]: _log.info(backup_run) for copy_run in failed_runs[1][0:5]: _log.info(copy_run) metric = nagiosplugin.Metric( "Failed backup/copy runs", len(failed_runs[0]) + len(failed_runs[1]), min=0, context='failed_runs') return metric
def probe(self): log.debug("PgSQLQuery.probe started") try: log.debug("connecting with postgresql") c = psycopg2.connect(host=self.host, port=self.port, user=self.user, password=self.passwd, database=self.database) cursor = c.cursor() log.debug("about to execute query: %s", self.query) cursor.execute(self.query) records = cursor.rowcount log.debug("resulted in %d records", records) log.debug(records) log.debug(cursor.fetchall()) except psycopg2.Error as err: log.critical(err) raise nap.CheckError( 'Something went wrong with ' 'PostgreSQL query operation, Error: {}'.format(err)) log.debug("PgSQLQuery.probe finished") log.debug("returning %d", records) return [nap.Metric('record', records, context='records')]
def probe(self): log.debug("XMPPCheck.probe started") xmpp = SendMsg(self._jid, self._password, self._jid, 'a test message') def raise_failed_auth(event): log.warn("XMPPCheck authentication failed") xmpp.disconnect(wait=False) self.state = nagiosplugin.state.Critical self.error = "Authentication failed." if xmpp.connect(reattempt=False, address=self._address, use_tls=self._use_tls): xmpp.add_event_handler("failed_auth", raise_failed_auth) log.debug("XMPPCheck connected to server") try: xmpp.process(block=True) except sleekxmpp.exceptions.XMPPError as err: self.state = nagiosplugin.state.Critical self.error = "Error occurs: {}".format(err) else: log.debug("Could not connect, jid: {}, password: {}.".format( self._jid, self._password)) self.state = nagiosplugin.state.Critical self.error = "Could not connect to XMPP server." return [nagiosplugin.Metric('xmpp', self.state, context='xmpp')]
def test_fmt_callable(self): def format_metric(metric, context): return '{0} formatted by {1}'.format(metric.name, context.name) m1 = nagiosplugin.Metric('foo', 1, 's', min=0) c = Context('describe_callable', fmt_metric=format_metric) self.assertEqual('foo formatted by describe_callable', c.describe(m1))
def probe(self): try: # only load config from /etc/salt/minion, not configs in # minion.d as that may require higher permission and they are # unnecessary for this check default_config = salt.config.DEFAULT_MINION_OPTS.update( {'default_include': None}) __opts__ = salt.config.minion_config('/etc/salt/minion', defaults=default_config) datamod = salt.loader.raw_mod(__opts__, 'data', None) ts = datamod['data.getval'](TS_KEY) except Exception as e: log.critical('Cannot get value of %s. Error: %s', TS_KEY, e, exc_info=True) raise else: try: hours = (datetime.datetime.now() - datetime.datetime.strptime( ts, "%Y-%m-%dT%H:%M:%S.%f")).total_seconds() / 3600 ret = [ nap.Metric('last_success', hours, min=0, context='hours') ] return ret except Exception: log.critical(('Expected a string presents time in ISO format, ' 'got %r. If it is None, probably timestamps ' 'returner has never returned.'), ts) raise
def probe(self): path = "/v2/servers/{}/vhosts/{}/applications/{}/instances/{}/incomingstreams/{}".format( self.serverName, self.vhostName, self.appName, self.instanceName, self.streamName) netloc = "{}:{}".format(self.host, self.port) url = urlunparse((self.scheme, netloc, path, None, None, None)) headers = { 'user-agent': 'nagiosplugin-check-wowza-stream/0.0.1', 'Content-Type': 'application/json; charset=utf-8', 'Accept': 'application/json; charset=utf-8', } try: r = requests.get(url, headers=headers, auth=HTTPDigestAuth(self.user, self.password), timeout=self.timeout) # Raise for all responses which are not 200 r.raise_for_status() if r.json()['isConnected']: self.status = True except (requests.ConnectionError, requests.HTTPError) as err: print("Check Error: %s" % err) return [nagiosplugin.Metric('status', self.status, context='status')]
def probe(self): logger.debug("EventCountCheck.probe started") # send an event for testing dsn = self._sentry_dsn if not dsn.startswith("requests+"): dsn = "requests+" + dsn client = raven.Client(dsn=dsn) logger.debug("send monitoring messaage to sentry") client.captureMessage("sentry monitoring") try: r = requests.get(self._url, auth=(self._public_key, self._secret_key), verify=self._verify_ssl) data = r.json() logger.debug("response: %s", data) events = 0 for group in data: events += int(group["count"]) logger.debug("number of events: %d", events) return [nagiosplugin.Metric('number_of_events', events, min=0)] except requests.ConnectionError as err: raise nagiosplugin.CheckError("Could not connect to Sentry: %s", err)
def probe(self): oid = '1.3.6.1.4.1.1872.2.5.1.2.2.3.0' response = float(self.argoss_snmp.fetch_oid(oid)) yield nagiosplugin.Metric('alert_cpu_percent', response, None, context='alert_cpu_percent')
def probe(self): """Query AWS CloudWatch for health data Returns: generator yielding nagiosplugin.Metric objects """ response = self._statistics_get() if "Datapoints" not in response or not response["Datapoints"]: return [] label = response["Label"] if label != self.cmdargs.metric: raise UnexpectedLabel( f"Unexpected Metric in Response. Got: {label}, Expected: {self.cmdargs.metric}" ) stat_name = self.cmdargs.statistic.capitalize() for point in response["Datapoints"]: # If a unit was provided, make sure it matches the returned datapoint.unit if self.cmdargs.unit and point["Unit"] != self.cmdargs.unit: unit = point["Unit"] raise UnexpectedDatapointUnit( f"Unexpected datapoint unit: {unit}, expected: {self.cmdargs.unit}" ) stat_val = point.get(stat_name) # @TODO - add uom from point["Unit"], preferably using ISO abbreviations, i.e. "Seconds"=>"s" yield nagiosplugin.Metric(self.cmdargs.metric, stat_val)
def probe(self): try: status = loadJson(health_url(args))['value']['CurrentStatus'] return np.Metric('CurrentStatus', status, context='health') except IOError as e: return np.Metric('Fetching network FAILED: ' + str(e), -1, context='health') except ValueError as e: return np.Metric('Decoding Json FAILED: ' + str(e), -1, context='health') except KeyError as e: return np.Metric('Getting Values FAILED: ' + str(e), -1, context='health')
def probe(self): poller = MiFloraPoller(self._mac, GatttoolBackend) _log.info('-'*50) _log.info('Getting data from Mi Flora') _log.info('-'*50) # logging name and firmware creates another poll, activate only if needed #_log.info('Firmware: %s', poller.firmware_version()) #_log.info('Name: %s', poller.name()) return [ nagiosplugin.Metric('Temperature', poller.parameter_value(MI_TEMPERATURE), min=0, context='temperature'), nagiosplugin.Metric('Moisture', poller.parameter_value(MI_MOISTURE), min=0, context='moisture'), nagiosplugin.Metric('Light', poller.parameter_value(MI_LIGHT), min=0, context='light'), nagiosplugin.Metric('Conductivity', poller.parameter_value(MI_CONDUCTIVITY), min=0, context='conductivity'), nagiosplugin.Metric('Battery', poller.parameter_value(MI_BATTERY), min=0, context='battery') ]