loc = 0 try: for i in sitemap.UrlSet.from_url(self.sitemap): loc += 1 return loc except Exception as e: print 'SITEMAP WARNING - %s' % str(e) sys.exit(1) def probe(self): return [nagiosplugin.Metric('sitemap', self.loc(), context="sitemap")] def check_sitemap(config): """ Required configs: - sitemap - warning - critical """ return ( SiteMap(config['sitemap']), nagiosplugin.ScalarContext('sitemap', config['warning'], config['critical']), ) if __name__ == "__main__": nrpe.check(check_sitemap)
log.debug('processing %s', filename) f = self.make_file(filename, None) if not f: log.debug('skipping') continue key, value = f.items()[0] # we may want to run fstat on this filename later on f[key]['filename'] = filename # keeps only the newest file for each facility if (key not in files) or (value['date'] > files[key]['date']): log.debug('first or newer.') files.update(f) else: log.debug('was old') # now fetch fstat for each file, and yield them for k, f in files.items(): log.debug('getting fstat for %s', f['filename']) filestat = ftp.stat(f['filename']) f['size'] = filestat.st_size yield {k: f} def check_backup(config): return base_check(SCPBackupFile, config) if __name__ == '__main__': nrpe.check(check_backup, defaults)
log = logging.getLogger("nagiosplugin.apt.half_installed") class HalfRemoved(nagiosplugin.Resource): def probe(self): log.debug("HalfRemoved.probe started") pkgs = [] dpkg = os.popen('dpkg -l') for line in dpkg.readlines(): cols = line.split() if cols[0] == 'rc': log.debug("Half-Removed package: %s", cols[0]) pkgs.append(cols[1]) log.debug("HalfRemoved.probe finished") log.debug("returning %d", len(pkgs)) return [nagiosplugin.Metric('halfinstalled', len(pkgs), min=0)] def half_installed_check(config): return ( HalfRemoved(), nagiosplugin.ScalarContext( 'halfinstalled', config['warning'], config['warning'], fmt_metric='{value} half-installed packages')) if __name__ == '__main__': nrpe.check(half_installed_check, {'warning': '0'})
list(all_ids - ids_from_mine)) log.debug('Diff minion IDs: %s', diff_ids) self.diff = diff_ids log.debug("MineMinion.probe ended") log.debug("returning %d", len(diff_ids)) return [ nap.Metric('mine_minions', len(diff_ids), min=0, context='minions') ] class MineSummary(nap.Summary): def problem(self, result): return ('{0} IDs do not match are: {1}'.format( result.results[0].metric.value, ', '.join(result.results[0].resource.diff))) def check_mine_minions(_): return ( MineMinion(), nap.ScalarContext('minions', '0:0', '0:0'), MineSummary(), ) if __name__ == "__main__": nrpe.check(check_mine_minions)
return [nagiosplugin.Metric( "git_branch", self._state, context="git_branch")] class GitBranchesContext(nagiosplugin.Context): def evaluate(self, metric, resource): if metric.value == nagiosplugin.state.Ok: result = self.result_cls( nagiosplugin.state.Ok, hint="no git branch name with bad characters found.", metric=metric) else: result = self.result_cls( nagiosplugin.state.Critical, hint=resource._error, metric=metric) return result def check_git(config): return( GitBranchesCheck(path=config["path"]), GitBranchesContext("git_branch"), ) if __name__ == '__main__': nrpe.check(check_git, { "path": "/etc/salt/master", })
def probe(self): import salt.syspaths as syspaths import salt.config import salt.minion config_file = os.path.join(syspaths.CONFIG_DIR, 'minion') opts = salt.config.minion_config(config_file) opts['auth_timeout'] = self.salt_timeout minion = salt.minion.SMinion(opts) output = minion.functions['pillar.items']() if "_errors" in output: error = output['_errors'] log.error('Error: %s', error) render_errors = 1 else: render_errors = 0 return [nap.Metric('pillar_render_errors', render_errors, context='errors')] def check_good_pillar(config): return ( PillarRender(config['salt_timeout']), nap.ScalarContext('errors', warning='0:0', critical='0:0'), ) if __name__ == "__main__": nrpe.check(check_good_pillar, {})
for prv_id, prv_id_data in providers.iteritems(): for prv_name, prv_data in prv_id_data.iteritems(): a_provider_images = all_images[prv_id][prv_name] try: ids.update(str(a_provider_images[inst]['id']) for inst in a_provider_images) except KeyError: # amazon uses key ``imageId`` ids.update(str(a_provider_images[inst]['imageId']) for inst in a_provider_images) log.debug("received ids: %s", ids) imgs = set(str(prof['image']) for prof in profile_list.values()) log.debug("profile images: %s", imgs) yield nagiosplugin.Metric('missing', imgs - ids) log.debug("ImageIds.probe ended") def check_saltcloud_images(config): return ( ImageIds(config['cloud_config_file']), MissingImageContext('missing'), Summary()) if __name__ == '__main__': nrpe.check(check_saltcloud_images, { 'cloud_config_file': '/etc/salt/cloud', })
time_in_log = ' '.join(log_msg.split()[:3]) logtime = '{0} {1}'.format(this_year, time_in_log) lt = datetime.datetime.strptime(logtime, '%Y %b %d %H:%M:%S') now = datetime.datetime.now() if now < lt: # this means log_msg is created in last year, and now is new year return False else: delta = (now - lt).total_seconds() return delta <= self.second_ago def number_of_oom_message(self): cntr = 0 syslog_files = glob.glob('/var/log/syslog*') for fn in syslog_files: with open(fn) as f: for line in f: if 'Out of memory' in line and self.is_near(line): cntr += 1 return cntr def check_oom(config): oom = (OOM_Message(config['seconds']) if config['seconds'] else OOM_Message()) return (oom, nap.ScalarContext('msg', '0:0', '0:0')) if __name__ == "__main__": nrpe.check(check_oom, {'seconds': None})
log.debug("resulted in %d records", records) log.debug(cursor.fetchall()) except pymysql.err.Error as err: log.critical(err) raise nap.CheckError( 'Something went wrong with ' 'MySQL query operation, Error: ()'.format(err)) log.debug("MysqlQuery.probe finished") log.debug("returning %d", records) return [nap.Metric('records', records, context='records')] def check_mysql_query(config): critical = config['critical'] return ( MysqlQuery(host=config['host'], user=config['user'], passwd=config['passwd'], database=config['database'], query=config['query']), nap.ScalarContext('records', critical, critical) ) if __name__ == "__main__": nrpe.check(check_mysql_query, { 'critical': '1:', 'query': 'select @@max_connections;', })
rp = robotparser.RobotFileParser() url = "http://%s/robots.txt" % self.domain rp.set_url(url) try: rp.read() except Exception, err: yield nagiosplugin.Metric("robotsfile", "Can't get %s: %s" % (url, err)) if not rp.can_fetch("*", "/"): yield nagiosplugin.Metric("robotsfile", "Can't fetch /") # "None" is a successful result yield nagiosplugin.Metric("robotsfile", None) def check_robots(config): """ Required configs: - domain """ return ( RobotsFile(domain=config['domain']), BoolishContext("robotsfile") ) if __name__ == '__main__': nrpe.check(check_robots)
self._url, auth=(self._public_key, self._secret_key), verify=self._verify_ssl) data = r.json() logger.debug("response: %s", data) events = 0 for group in data: events += int(group["count"]) logger.debug("number of events: %d", events) return [ nagiosplugin.Metric('number_of_events', events, min=0) ] except requests.ConnectionError as err: raise nagiosplugin.CheckError( "Could not connect to Sentry: %s", err) def count_events(config): return ( EventCountCheck(dsn_file=config["dsn_file"]), nagiosplugin.ScalarContext("number_of_events", critical="0:"), ) if __name__ == '__main__': nrpe.check(count_events, { "dsn_file": "/var/lib/deployments/sentry/monitoring_dsn.yml" })
class VBoxKernelModules(nap.Resource): def probe(self): log.debug("VBoxKernelModules.probe started") try: output = subprocess.check_output(['/etc/init.d/vboxdrv', 'status']) except OSError: pass else: kernel_modules = re.findall('vbox', output) log.debug("VBoxKernelModules.probe finished") return [nap.Metric('kernelmodules', len(kernel_modules), min=0)] def check_vbox_kernel_modules(config): return ( VBoxKernelModules(), nap.ScalarContext('kernelmodules', nap.Range('{}:{}'.format(config['warning'], config['warning'])), nap.Range('{}:{}'.format(config['warning'], config['warning'])), fmt_metric='{value} kernel modules are loaded') ) if __name__ == '__main__': nrpe.check(check_vbox_kernel_modules, {'warning': 4})
else: print 'SITEMAPLINK WARNING - %s' % check_ret['errors'] sys.exit(1) except IOError as e: print 'SITEMAPLINK WARNING - Cannot find check results: ' + str(e) sys.exit(1) def probe(self): return [nagiosplugin.Metric('sitemaplink', self.failed(), context="sitemaplink")] def check_sitemaplink(config): """ Required configs: - sitemap - warning - critical """ return ( SiteMapLink(config['sitemap']), nagiosplugin.ScalarContext('sitemaplink', config['warning'], config['critical']), ) if __name__ == "__main__": nrpe.check(check_sitemaplink)
import requests import logging from pysc import nrpe log = logging.getLogger("nagiosplugin.elasticsearch.cluster_nodes") logging.getLogger("requests").setLevel(logging.WARNING) class ClusterNodes(nagiosplugin.Resource): def probe(self): log.debug("ClusterNode.probe started") rsc = "health" log.debug("calling localhost to get cluster %s", rsc) req = requests.get("http://127.0.0.1:9200/_cluster/" + rsc) log.debug("response: %s", req.content) log.debug("ClusterNode.probe finished") return [nagiosplugin.Metric("nodes", req.json()["number_of_nodes"], min=0)] def check_procs(config): return ( ClusterNodes(), nagiosplugin.ScalarContext( "nodes", config["critical"], config["critical"], fmt_metric="{value} nodes in cluster" ), ) if __name__ == "__main__": nrpe.check(check_procs)
query_time = self.get_query_time() return [nap.Metric('query time', query_time)] def main(config): kwargs = { 'server': config['server'], 'domain': config['domain'], 'record': config['record'] } warning = config['warning'] critical = config['critical'] return (DnsCaching(**kwargs), nap.ScalarContext('query time', warning, critical, fmt_metric="Query time: {value} msec")) if __name__ == '__main__': nrpe.check( main, { 'server': '127.0.0.1', 'domain': 'robotinfra.com', 'record': 'a', 'warning': '1', 'critical': '2', })
self._send_email_for_test('Testing spam with GTUBE', body) found = self.grep_msg(body, 'spam', msg_set='latest') return found def test_send_and_receive_email_in_inbox_mailbox(self): body = 'Test inbox' self._send_email_for_test('Testing send email to INBOX', body) found = self.grep_msg(body, 'INBOX', msg_set='latest') return found def test_send_virus_email_and_discarded_by_amavis(self): # http://en.wikipedia.org/wiki/EICAR_test_file body = (r'X5O!P%@AP[4\PZX54(P^)7CC)7}$' r'EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*') self._send_email_for_test('Testing send virus email ', body) found = self.grep_msg(body, 'INBOX', msg_set='latest') # not found the msg means antivirus worked return (not found) def check_mail_stack(config): return [ MailStackHealth(config['imap_server'], config['smtp_server'], config['username'], config['password'], config['smtp_wait'], config['ssl']) ] if __name__ == "__main__": nrpe.check(check_mail_stack, {'timeout': 300})
self.domain = domain def probe(self): rp = robotparser.RobotFileParser() url = "http://%s/robots.txt" % self.domain rp.set_url(url) try: rp.read() except Exception, err: yield nagiosplugin.Metric("robotsfile", "Can't get %s: %s" % (url, err)) if not rp.can_fetch("*", "/"): yield nagiosplugin.Metric("robotsfile", "Can't fetch /") # "None" is a successful result yield nagiosplugin.Metric("robotsfile", None) def check_robots(config): """ Required configs: - domain """ return (RobotsFile(domain=config["domain"]), BoolishContext("robotsfile")) if __name__ == "__main__": nrpe.check(check_robots)
if len(offline_slaves) > 0: state = nap.state.Critical else: state = nap.state.Ok return nap.Result(state, metric=metric) class Summary(nap.Summary): def ok(self, results): return 'All slaves are working' def problem(self, results): value = results.results[0].metric.value slaves_number, offline_slaves = value return "{0}/{1} slaves is/are offline: {2}".format( len(offline_slaves), slaves_number, offline_slaves) def check_jenkins_slaves(config): return (JenkinsSlaves(config['url'], config['username'], config['token']), SlavesContext('jenkins slaves'), Summary() ) if __name__ == "__main__": # disable urllib3 warnings requests.packages.urllib3.disable_warnings() nrpe.check(check_jenkins_slaves, {'timeout': 100})
def main(config): """ Required configs: - host """ kwargs = dict( host=config['host'], port=config['port'] ) return ( SslConfiguration(**kwargs), nap.ScalarContext('sslscore', nap.Range('@65:80'), nap.Range('@0:65')), nap.ScalarContext('serverHostname', nap.Range('@65:80'), nap.Range('@0:65')), nap.ScalarContext('validationResult', nap.Range('@65:80'), nap.Range('@0:65')), nap.ScalarContext('expireInDays', nap.Range('@65:80'), nap.Range('@0:65')), SslSummary(**kwargs) ) if __name__ == "__main__": nrpe.check(main, { 'port': 443, 'timeout': 60, })
bad_branches = _branch_filter(repos) log.debug("bad branch names: %s", bad_branches) if bad_branches: log.debug("found branch name with bad character") self._error = "bad branches: {0}".format(bad_branches) self._state = nagiosplugin.state.Critical log.debug("GitBranchesCheck.probe finished") return [nagiosplugin.Metric("git_branch", self._state, context="git_branch")] class GitBranchesContext(nagiosplugin.Context): def evaluate(self, metric, resource): if metric.value == nagiosplugin.state.Ok: result = self.result_cls( nagiosplugin.state.Ok, hint="no git branch name with bad characters found.", metric=metric ) else: result = self.result_cls(nagiosplugin.state.Critical, hint=resource._error, metric=metric) return result def check_git(config): return (GitBranchesCheck(path=config["path"]), GitBranchesContext("git_branch")) if __name__ == "__main__": nrpe.check(check_git, {"path": "/etc/salt/master"})
self.link = link def probe(self): dead_links = [] for i in sitemap.UrlSet.from_url(self.link): url = i.loc.split()[0] try: if not requests.get(url).ok: dead_links.append(url) except requests.RequestException: dead_links.append(url) yield nagiosplugin.Metric("deadlinks", len(dead_links)) def check_links(config): """ Required configs: - sitemap: the simtemap link """ return ( DeadLinks(link=config['sitemap']), nagiosplugin.ScalarContext("deadlinks", ":0"), ) if __name__ == "__main__": nrpe.check(check_links)
sys.exit(1) except IOError as e: print 'SITEMAPLINK WARNING - Cannot find check results: ' + str(e) sys.exit(1) def probe(self): return [ nagiosplugin.Metric('sitemaplink', self.failed(), context="sitemaplink") ] def check_sitemaplink(config): """ Required configs: - sitemap - warning - critical """ return ( SiteMapLink(config['sitemap']), nagiosplugin.ScalarContext('sitemaplink', config['warning'], config['critical']), ) if __name__ == "__main__": nrpe.check(check_sitemaplink)
TS_KEY, e, exc_info=True) raise else: try: hours = (datetime.datetime.now() - datetime.datetime.strptime( ts, "%Y-%m-%dT%H:%M:%S.%f")).total_seconds() / 3600 ret = [nap.Metric('last_success', hours, min=0, context='hours')] return ret except Exception: log.critical(('Expected a string presents time in ISO format, ' 'got %r. If it is None, probably timestamps ' 'returner has never returned.'), ts) raise def check_last_success(config): threshold = '0:%s' % config['max_hours'] return ( LastSuccess(), nap.ScalarContext('hours', threshold, threshold, fmt_metric='{value} hours ago') ) if __name__ == "__main__": nrpe.check(check_last_success, {'max_hours': 24})
log.debug("Process %s - %s type %s", host, name, file_type) dates = hosts[host][name][file_type].keys() dates.sort() latest = hosts[host][name][file_type][dates[-1]] log.debug("Latest backup %s", latest.date.isoformat()) if now - latest.date > max_time: log.debug("Expired backup %s", latest) missing_backup.append('-'.join((host, file_type))) else: log.debug("Good backup %s", latest) number_backups += 1 log.info("check finished") log.debug("missing backups: %s", str(missing_backup)) yield nagiosplugin.Metric('missing', (missing_backup, number_backups)) def check_backups(config): return ( Backups(max_hours=config['max_hours'], backup_dir=config['backup_dir']), MissingBackupsContext('missing'), ) if __name__ == '__main__': nrpe.check(check_backups, { 'max_hours': 36, 'backup_dir': '/var/lib/backup' })
return "sslscore is 0 ({0})".format(ssl_configuration.check()[0]) elif results["sslscore"].context.name == "validationResult": return "sslscore is 0 ({0})".format(ssl_configuration.check()[1]) elif results["sslscore"].context.name == "expireInDays": return ("sslscore is 0 (The certificate expired {0} days" "ago)").format(ssl_configuration.check()[2]) def problem(self, results): return self.status_line(results) def main(config): """ Required configs: - host """ kwargs = dict(host=config["host"], port=config["port"]) return ( SslConfiguration(**kwargs), nap.ScalarContext("sslscore", nap.Range("@65:80"), nap.Range("@0:65")), nap.ScalarContext("serverHostname", nap.Range("@65:80"), nap.Range("@0:65")), nap.ScalarContext("validationResult", nap.Range("@65:80"), nap.Range("@0:65")), nap.ScalarContext("expireInDays", nap.Range("@65:80"), nap.Range("@0:65")), SslSummary(**kwargs), ) if __name__ == "__main__": nrpe.check(main, {"port": 443, "timeout": 60})
class Rules(nagiosplugin.Resource): def probe(self): log.debug("Rules.probe started") total = 0 try: proc = subprocess.Popen(['iptables-save'], stdout=subprocess.PIPE) except OSError: pass else: for line in proc.stdout.readlines(): if line.startswith('-'): total += 1 log.debug("Rules.probe finished") log.debug("Returning %d", total) return [nagiosplugin.Metric('rules', total, min=0)] def check_firewall(config): return ( Rules(), nagiosplugin.ScalarContext('rules', config['warning'], config['warning'], fmt_metric='{value} rules in iptables') ) if __name__ == '__main__': nrpe.check(check_firewall)
self._jid, self._password)) self.state = nagiosplugin.state.Critical self.error = "Could not connect to XMPP server." return [nagiosplugin.Metric('xmpp', self.state, context='xmpp')] class XMPPContext(nagiosplugin.Context): def evaluate(self, metric, resource): if metric.value == nagiosplugin.state.Ok: result = self.result_cls(nagiosplugin.state.Ok, hint="your message is successfully sent.", metric=metric) else: result = self.result_cls(nagiosplugin.state.Critical, hint=getattr(resource, 'error', ''), metric=metric) return result def check_xmpp(config): return (XMPPCheck(jid=config['jid'], password=config['password'], address=config['address'], use_tls=config['use_tls']), XMPPContext('xmpp')) if __name__ == '__main__': nrpe.check(check_xmpp, { 'address': None, })
timeout=self._timeout ) data = r.json() logger.debug("response: %s", data) events = 0 for group in data: events += int(group["count"]) logger.debug("number of events: %d", events) return [ nagiosplugin.Metric('number_of_events', events, min=0) ] except requests.ConnectionError as err: raise nagiosplugin.CheckError( "Could not connect to Sentry: %s", err) def count_events(config): return ( EventCountCheck(dsn_file=config["dsn_file"], api_key_file=config["api_key_file"], timeout=config["timeout"]), nagiosplugin.ScalarContext("number_of_events", critical="0:"), ) if __name__ == '__main__': nrpe.check(count_events, {})
TS_KEY, e, exc_info=True) raise else: try: hours = (datetime.datetime.now() - datetime.datetime.strptime( ts, "%Y-%m-%dT%H:%M:%S.%f")).total_seconds() / 3600 ret = [ nap.Metric('last_success', hours, min=0, context='hours') ] return ret except Exception: log.critical(('Expected a string presents time in ISO format, ' 'got %r. If it is None, probably timestamps ' 'returner has never returned.'), ts) raise def check_last_success(config): threshold = '0:%s' % config['max_hours'] return (LastSuccess(), nap.ScalarContext('hours', threshold, threshold, fmt_metric='{value} hours ago')) if __name__ == "__main__": nrpe.check(check_last_success, {'max_hours': 24})
'Could not connect to graylog2 server: {}'.format( self._api_url)) except ValueError: log.warn("Could not parse response") raise nagiosplugin.CheckError( 'Invalid response from graylog2 server: {}'.format( self._api_url)) log.debug("Graylog2Throughput finished") log.debug("returning %d", int(throughput)) return [ nagiosplugin.Metric('throughput', int(throughput), min=0) ] def check_new_logs(config): return ( Graylog2Throughput(api_url=config['api_url'], username=config['username'], password=config['password'], max_retry=config['max_retry']), nagiosplugin.ScalarContext('throughput', critical=config['crit_range']) ) if __name__ == '__main__': nrpe.check(check_new_logs, { 'crit_range': '1:10000', 'max_retry': '20', })
try: r = requests.get(self._url, auth=(self._public_key, self._secret_key), verify=self._verify_ssl) data = r.json() logger.debug("response: %s", data) events = 0 for group in data: events += int(group["count"]) logger.debug("number of events: %d", events) return [nagiosplugin.Metric('number_of_events', events, min=0)] except requests.ConnectionError as err: raise nagiosplugin.CheckError("Could not connect to Sentry: %s", err) def count_events(config): return ( EventCountCheck(dsn_file=config["dsn_file"]), nagiosplugin.ScalarContext("number_of_events", critical="0:"), ) if __name__ == '__main__': nrpe.check(count_events, {"dsn_file": "/var/lib/deployments/sentry/monitoring_dsn.yml"})
log.debug(records) log.debug(cursor.fetchall()) except psycopg2.Error as err: log.critical(err) raise nap.CheckError( 'Something went wrong with ' 'PostgreSQL query operation, Error: {}'.format(err)) log.debug("PgSQLQuery.probe finished") log.debug("returning %d", records) return [nap.Metric('record', records, context='records')] def check_pgsql_query(config): critical = config['critical'] return (PgSQLQuery(host=config['host'], port=config['port'], user=config['user'], passwd=config['passwd'], database=config['database'], query=config['query']), nap.ScalarContext('records', critical, critical)) if __name__ == "__main__": nrpe.check(check_pgsql_query, { 'critical': '1:', 'port': '5432', 'query': 'show max_connections;', })
class XMPPContext(nagiosplugin.Context): def evaluate(self, metric, resource): if metric.value == nagiosplugin.state.Ok: result = self.result_cls( nagiosplugin.state.Ok, hint="your message is successfully sent.", metric=metric) else: result = self.result_cls( nagiosplugin.state.Critical, hint=getattr(resource, 'error', ''), metric=metric) return result def check_xmpp(config): return( XMPPCheck( jid=config['jid'], password=config['password'], address=config['address'], use_tls=config['use_tls'] ), XMPPContext('xmpp') ) if __name__ == '__main__': nrpe.check(check_xmpp, { 'address': None, })
for prv_id, prv_id_data in providers.iteritems(): for prv_name, prv_data in prv_id_data.iteritems(): a_provider_images = all_images[prv_id][prv_name] try: ids.update( str(a_provider_images[inst]['id']) for inst in a_provider_images) except KeyError: # amazon uses key ``imageId`` ids.update( str(a_provider_images[inst]['imageId']) for inst in a_provider_images) log.debug("received ids: %s", ids) imgs = set(str(prof['image']) for prof in profile_list.values()) log.debug("profile images: %s", imgs) yield nagiosplugin.Metric('missing', imgs - ids) log.debug("ImageIds.probe ended") def check_saltcloud_images(config): return (ImageIds(config['cloud_config_file']), MissingImageContext('missing'), Summary()) if __name__ == '__main__': nrpe.check(check_saltcloud_images, { 'cloud_config_file': '/etc/salt/cloud', })
- ('path', help='Path used when backup') - ('bucket', help='s3://bucket/prefix to check uploaded file') """ try: parsed = boto.urlparse.urlparse(config['bucket']) bucket_name, prefix = parsed.netloc, parsed.path prefix = prefix[1:] # prefix must not start with / return (BackupAge( config['s3']['access_key'], config['s3']['secret_key'], bucket_name, prefix, config['path'], config['minion_id'], allow_empty=config['empty'], ), nap.ScalarContext('age', config['warning'], config['warning'])) except boto.exception.S3ResponseError: raise ValueError('Bad or non-existing bucket name') if __name__ == "__main__": nrpe.check( s3lite_backup_client_check, { 'empty': False, 'warning': '48', 'timeout': None, 'config': '/etc/nagios/backup.yml', })
log.debug(cursor.fetchall()) except psycopg2.Error as err: log.critical(err) raise nap.CheckError( 'Something went wrong with ' 'PostgreSQL query operation, Error: {}'.format(err)) log.debug("PgSQLQuery.probe finished") log.debug("returning %d", records) return [nap.Metric('record', records, context='records')] def check_pgsql_query(config): critical = config['critical'] return ( PgSQLQuery(host=config['host'], port=config['port'], user=config['user'], passwd=config['passwd'], database=config['database'], query=config['query']), nap.ScalarContext('records', critical, critical) ) if __name__ == "__main__": nrpe.check(check_pgsql_query, { 'critical': '1:', 'port': '5432', 'query': 'show max_connections;', })
try: ids.update(str(a_provider_images[inst]['id']) for inst in a_provider_images) except KeyError: # amazon uses key ``imageId`` ids.update(str(a_provider_images[inst]['imageId']) for inst in a_provider_images) log.debug("received ids: %s", ids) imgs = set(str(prof['image']) for prof in profile_list.values()) log.debug("profile images: %s", imgs) yield nagiosplugin.Metric('missing', imgs - ids) log.debug("ImageIds.probe ended") def check_saltcloud_images(config): return ( ImageIds(config['cloud_config_file'], config['wanted_slugs']), MissingImageContext('missing'), Summary()) if __name__ == '__main__': # disable urllib3 warnings requests.packages.urllib3.disable_warnings() nrpe.check(check_saltcloud_images, { 'cloud_config_file': '/etc/salt/cloud', 'wanted_slugs': ('ubuntu-14-04-x64', 'ubuntu-12-04-x64') })
def probe(self): query_time = self.get_query_time() return [nap.Metric('query time', query_time)] def main(config): kwargs = { 'server': config['server'], 'domain': config['domain'], 'record': config['record'] } warning = config['warning'] critical = config['critical'] return ( DnsCaching(**kwargs), nap.ScalarContext('query time', warning, critical, fmt_metric="Query time: {value} msec") ) if __name__ == '__main__': nrpe.check(main, { 'server': 'localhost', 'domain': 'robotinfra.com', 'record': 'a', 'warning': '3', 'critical': '5', })
log.debug(cmd) output = subprocess.check_output(cmd).split('\n') log.debug(output) for line in output: cols = line.split(' | ') if (self.dbname == cols[0].strip() and self.encoding == cols[2].strip()): log.debug(self.dbname) log.debug('Expect: {0}, found {1}'.format( self.encoding, cols[2].strip())) log.debug("Encoding.probe finished") log.debug("returning %d", 0) return [nap.Metric('encoding', 0, context='encoding')] log.debug("Ecoding.probe finished") log.debug("returning %d", 1) return [nap.Metric('encoding', 1, context='encoding')] def check_psql_encoding(config): """ Required configurations: - ('name', help="The database name to check") """ enc = Encoding(config['name'], config['encoding']) return (enc, nap.ScalarContext('encoding', '0:0', '0:0')) if __name__ == "__main__": nrpe.check(check_psql_encoding, {'encoding': 'UTF8'})
log = logging.getLogger("nagiosplugin.apt.half_installed") class HalfRemoved(nagiosplugin.Resource): def probe(self): log.debug("HalfRemoved.probe started") pkgs = [] dpkg = os.popen('dpkg -l') for line in dpkg.readlines(): cols = line.split() if cols[0] == 'rc': log.debug("Half-Removed package: %s", cols[0]) pkgs.append(cols[1]) log.debug("HalfRemoved.probe finished") log.debug("returning %d", len(pkgs)) return [nagiosplugin.Metric('halfinstalled', len(pkgs), min=0)] def half_installed_check(config): return (HalfRemoved(), nagiosplugin.ScalarContext( 'halfinstalled', config['warning'], config['warning'], fmt_metric='{value} half-installed packages')) if __name__ == '__main__': nrpe.check(half_installed_check, {'warning': '0'})
# :fixme: fails if one of these lines is missing in /proc/meminfo return memTotal, memCached, memFree def percentFreeMem(): memTotal, memCached, memFree = MemValues() return (((int(memFree) + int(memCached)) * 100) / int(memTotal)) class UsedMemory(nagiosplugin.Resource): def probe(self): pmemUsage = 100 - percentFreeMem() yield nagiosplugin.Metric('usedmemory', pmemUsage) def memory_check(config): return ( UsedMemory(), nagiosplugin.ScalarContext('usedmemory', warning=':{}'.format(config['warning']), critical=":{}".format(config['critical']), fmt_metric="Memory usage: {value:2.1f}%"), ) if __name__ == '__main__': nrpe.check(memory_check, { 'warning': '80', 'critical': '90', })
else: diff_ids = (set(ids_from_salt_key) ^ set(ids_from_mine)) diff_ids = diff_ids - set(ignored_ids) log.debug('Diff minion IDs: %s', diff_ids) self.diff = diff_ids log.debug("MineMinion.probe ended") log.debug("returning %d", len(diff_ids)) return [nap.Metric('mine_minions', len(diff_ids), min=0, context='minions')] class MineSummary(nap.Summary): def problem(self, result): return ( '{0} IDs do not match are: {1}'.format( result.results[0].metric.value, ', '.join(result.results[0].resource.diff) ) ) def check_mine_minions(_): return (MineMinion(), nap.ScalarContext('minions', '0:0', '0:0'), MineSummary(), ) if __name__ == "__main__": nrpe.check(check_mine_minions)
# cause unexpected errors later # validate=False, # S3 allows /// as a valid path, but we won't support that case # user may wrongly config path with/without ending '/', add one # and use delimiter to only list file at top prefix level, not # list all file recursively, which is expensive. prefix = self.prefix.strip("/") + "/" # if user set prefix = /, he means to use empty prefix if prefix == "/": prefix = "" for key in bucket.list(prefix=prefix, delimiter="/"): log.debug("Processing key %s", key.name) if isinstance(key, boto.s3.prefix.Prefix): # prefix is a concept same as "directory" log.debug("%s is a Prefix, skipping ...", key.name) continue backup_file = self.make_file(os.path.basename(key.name), key.size) # I expect file to have one and only one element if backup_file: yield backup_file log.info("finished iterating files") def check_backup(config): return base_check(S3BackupFile, config) if __name__ == "__main__": nrpe.check(check_backup, defaults)
""" try: parsed = boto.urlparse.urlparse(config['bucket']) bucket_name, prefix = parsed.netloc, parsed.path prefix = prefix[1:] # prefix must not start with / return ( BackupAge( config['s3']['access_key'], config['s3']['secret_key'], bucket_name, prefix, config['path'], config['minion_id'], allow_empty=config['empty'], ), nap.ScalarContext('age', config['warning'], config['warning']) ) except boto.exception.S3ResponseError: raise ValueError('Bad or non-existing bucket name') if __name__ == "__main__": nrpe.check(s3lite_backup_client_check, { 'empty': False, 'warning': '48', 'timeout': None, 'config': '/etc/nagios/backup.yml', })
log = logging.getLogger("nagiosplugin.firewall.rules") class Rules(nagiosplugin.Resource): def probe(self): log.debug("Rules.probe started") total = 0 try: proc = subprocess.Popen(['iptables-save'], stdout=subprocess.PIPE) except OSError: pass else: for line in proc.stdout.readlines(): if line.startswith('-'): total += 1 log.debug("Rules.probe finished") log.debug("Returning %d", total) return [nagiosplugin.Metric('rules', total, min=0)] def check_firewall(config): return (Rules(), nagiosplugin.ScalarContext('rules', config['warning'], config['warning'], fmt_metric='{value} rules in iptables')) if __name__ == '__main__': nrpe.check(check_firewall)
def percentFreeMem(): memTotal, memCached, memFree = MemValues() return (((int(memFree) + int(memCached)) * 100) / int(memTotal)) class UsedMemory(nagiosplugin.Resource): def probe(self): pmemUsage = 100 - percentFreeMem() yield nagiosplugin.Metric('usedmemory', pmemUsage) def memory_check(config): return ( UsedMemory(), nagiosplugin.ScalarContext( 'usedmemory', warning=':{}'.format(config['warning']), critical=":{}".format(config['critical']), fmt_metric="Memory usage: {value:2.1f}%"), ) if __name__ == '__main__': nrpe.check( memory_check, { 'warning': '80', 'critical': '90', } )
class MailFeatureContext(nap.Context): def evaluate(self, metric, resource): if metric.value == 3: state = nap.state.Ok elif metric.value == 1 or metric.value == 2: state = nap.state.Warn elif metric.value == 0: state = nap.state.Critical else: state = nap.state.Unknown return nap.Result(state, metric=metric) class Summary(nap.Summary): def problem(self, result): return "%s/3 features is working" % (result.results[0].metric.value) def check_mail_stack(config): return [MailFeatures(config['imap_server'], config['smtp_server'], config['username'], config['password'], config['smtp_wait'], config['ssl']), MailFeatureContext("working"), Summary() ] if __name__ == "__main__": nrpe.check(check_mail_stack, {'timeout': 300})
log.debug("about to execute query: %s", self.query) records = cursor.execute(self.query) log.debug("resulted in %d records", records) log.debug(cursor.fetchall()) except pymysql.err.Error as err: log.critical(err) raise nap.CheckError( 'Something went wrong with ' 'MySQL query operation, Error: ()'.format(err)) log.debug("MysqlQuery.probe finished") log.debug("returning %d", records) return [nap.Metric('records', records, context='records')] def check_mysql_query(config): critical = config['critical'] return (MysqlQuery(host=config['host'], user=config['user'], passwd=config['passwd'], database=config['database'], query=config['query']), nap.ScalarContext('records', critical, critical)) if __name__ == "__main__": nrpe.check(check_mysql_query, { 'critical': '1:', 'query': 'select @@max_connections;', })
import logging from pysc import nrpe log = logging.getLogger("nagiosplugin.elasticsearch.cluster_nodes") logging.getLogger("requests").setLevel(logging.WARNING) class ClusterNodes(nagiosplugin.Resource): def probe(self): log.debug("ClusterNode.probe started") rsc = 'health' log.debug("calling localhost to get cluster %s", rsc) req = requests.get('http://127.0.0.1:9200/_cluster/' + rsc) log.debug("response: %s", req.content) log.debug("ClusterNode.probe finished") return [ nagiosplugin.Metric('nodes', req.json()['number_of_nodes'], min=0) ] def check_procs(config): return (ClusterNodes(), nagiosplugin.ScalarContext('nodes', config['critical'], config['critical'], fmt_metric='{value} nodes in cluster')) if __name__ == '__main__': nrpe.check(check_procs)
import salt.syspaths as syspaths import salt.config import salt.minion config_file = os.path.join(syspaths.CONFIG_DIR, 'minion') opts = salt.config.minion_config(config_file) opts['auth_timeout'] = self.salt_timeout minion = salt.minion.SMinion(opts) output = minion.functions['pillar.items']() if "_errors" in output: error = output['_errors'] log.error('Error: %s', error) render_errors = 1 else: render_errors = 0 return [ nap.Metric('pillar_render_errors', render_errors, context='errors') ] def check_good_pillar(config): return ( PillarRender(config['salt_timeout']), nap.ScalarContext('errors', warning='0:0', critical='0:0'), ) if __name__ == "__main__": nrpe.check(check_good_pillar, {})
query_time = self.get_query_time() return [nap.Metric('query time', query_time)] def main(config): kwargs = { 'server': config['server'], 'domain': config['domain'], 'record': config['record'] } warning = config['warning'] critical = config['critical'] return (DnsCaching(**kwargs), nap.ScalarContext('query time', warning, critical, fmt_metric="Query time: {value} msec")) if __name__ == '__main__': nrpe.check( main, { 'server': 'localhost', 'domain': 'robotinfra.com', 'record': 'a', 'warning': '3', 'critical': '5', })
# In most cases, the second time is much faster. logger.debug("First query took %f ms", request.req().args["elapsed"]) answer = request.req() logger.debug("Second query took %f ms", answer.args["elapsed"]) except Exception as e: print("DNSCACHING WARNING - {0}".format(e)) sys.exit(1) return answer.args["elapsed"] def probe(self): query_time = self.get_query_time() return [nap.Metric("query time", query_time)] def main(config): kwargs = {"server": config["server"], "domain": config["domain"], "record": config["record"]} warning = config["warning"] critical = config["critical"] return ( DnsCaching(**kwargs), nap.ScalarContext("query time", warning, critical, fmt_metric="Query time: {value} msec"), ) if __name__ == "__main__": nrpe.check( main, {"server": "127.0.0.1", "domain": "robotinfra.com", "record": "a", "warning": "1", "critical": "2"} )