if all((x['lag'] == -1 for x in results)): avg_lag = -1 else: avg_lag = sum( (x['lag'] for x in results if x['lag'] >= 0)) / len(results) self.lag = avg_lag self.measures = [ nagiosplugin.Measure('alive-ssh_lag', self.lag, warning=self.warning, critical=self.critical) ] elif sent_data_recently: self.measures = [ nagiosplugin.Measure('alive', int(self.alive), critical=0) ] #'alive', self.alive, warning=self.warning, critical=0)] def default_message(self): if hasattr(self, 'lag'): return 'average latency to perform SSH banner exchange: %f' % self.lag elif hasattr(self, 'alive') and self.alive: return "has sent in data recently and is alive, but is having network problems" main = nagiosplugin.Controller(AliveCheck) if __name__ == '__main__': main()
except IndexError: print('What server am I supposed to check?!') import sys sys.exit(3) def obtain_data(self): db = pymongo.Connection(self.db_server).clio coll_name = 'system_%s' % datetime.utcnow().strftime('%Y%m') field = 'data.fs' res = db[coll_name].find_one({'host': self.server}, sort=[('ts', pymongo.DESCENDING)], fields=[field, 'ts']) assert (datetime.utcnow() - res['ts']).seconds < 60, "stale data! is arke running?" self.usages = {} self.measures = [] for fs in res['data']['fs']: percent_used = res['data']['fs'][fs]['percent'] self.usages[fs] = percent_used self.measures.append(nagiosplugin.Measure( fs, percent_used, '%', self.warning, self.critical, 0, 100)) def default_message(self): return '\n'.join(['%s is %i%% full' % (fs,used) for fs,used in self.usages.iteritems()]) main = nagiosplugin.Controller(AllDiskSpaceCheck) if __name__ == '__main__': main()
sc.execute('SELECT pg_last_xlog_replay_location()') slave_loc = xlog_to_bytes(sc.fetchone()[0]) self.slave_conn.commit() self.slave_conn.close() self.lag = (master_loc - slave_loc) / 1024 self.measures = [ nagiosplugin.Measure('lag', self.lag, 'kB', self.warning, self.critical) ] def default_message(self): return "lag is %s kB" % self.lag def xlog_to_bytes(xlog): """ Convert an xlog number like '0/C6321D98' to an integer representing the number of bytes into the xlog. Logic here is taken from https://github.com/mhagander/munin-plugins/blob/master/postgres/postgres_streaming_.in. I assume it's correct... """ logid, offset = xlog.split('/') return (int('ffffffff', 16) * int(logid, 16)) + int(offset, 16) if __name__ == '__main__': nagiosplugin.Controller(StreamingReplicationCheck)()
self.measures = [ nagiosplugin.Measure("Num_failed_checks", len(self.badChecks), warning=self.warning, critical=self.critical, minimum=0) ] def default_message(self): if len(self.badChecks): return "The following checks failed: %s" % (", ".join( self.badChecks)) return "All checks pass." def fetch(self, url): if self.username is not None: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() # this creates a password manager passman.add_password(None, url, self.username, self.password) authhandler = urllib2.HTTPBasicAuthHandler(passman) opener = urllib2.build_opener(authhandler) urllib2.install_opener(opener) handle = urllib2.urlopen(url) data = handle.read() return data main = nagiosplugin.Controller(Symfony2Check) if __name__ == '__main__': main()
results = [x for x in found['data'] if x['to'] == self.server] assert len(results) > 0, "no results!" assert len( results ) > self.minimum, "not enough results! only found %i results." % len( results) if all((x['lag'] == -1 for x in results)): avg_lag = -1 else: avg_lag = sum( (x['lag'] for x in results if x['lag'] >= 0)) / len(results) self.lag = avg_lag self.measures = [ nagiosplugin.Measure('ssh_lag', self.lag, warning=self.warning, critical=self.critical) ] def default_message(self): return 'average latency to perform SSH banner exchange: %f' % self.lag main = nagiosplugin.Controller(SSHLagCheck) if __name__ == '__main__': main()
def main(): nagiosplugin.Controller(CheckZopeTestRunner)()
properties['name'], properties['cmdline'], )) if parent_pids: parent_pids = map(int, parent_pids) for properties in result['data']['processes']: if properties['ppid'] in parent_pids: processes.append(( properties['pid'], properties['name'], properties['cmdline'], )) self.processes = processes self.found_count = len(processes) self.measures = [nagiosplugin.Measure( 'processes_found', self.found_count, warning=self.warning, critical=self.critical)] def default_message(self): fields = ('pid', 'name', 'cmdline') processes = (pformat(zip(fields, p)) for p in self.processes) processes_str = '\n'.join(processes) return 'Found the following %i processes:\n%s' % (self.found_count, processes_str) main = nagiosplugin.Controller(ProcessCheck) if __name__ == '__main__': main()
if member.get('state', None) == 1: primary = member if primary is me: self.primary = True self.repl_lag = 0 else: self.primary = False self.repl_lag = max(0, primary['optime']['t'] - me['optime']['t']) #assert primary['optime']['t'] >= me['optime']['t'], "optime of master is less than the slave. the hell?\n%s" % pformat(res) self.measures = [ nagiosplugin.Measure('mongodb_repl_lag', self.repl_lag, warning=self.warning, critical=self.critical) ] def default_message(self): if self.primary is None: return 'not in a replica set' if self.primary: return 'currently the primary server. not lagging behind self.' return 'optime is %i behind primary' % self.repl_lag main = nagiosplugin.Controller(MongodbReplLagCheck) if __name__ == '__main__': main()
try: self.server = args[0] except IndexError: print('What server am I supposed to check?!') import sys sys.exit(3) def obtain_data(self): db = pymongo.Connection(self.db_server).clio coll_name = 'system_%s' % datetime.utcnow().strftime('%Y%m') field = 'data.fs.%s.percent' % self.filesystem res = db[coll_name].find_one({'host': self.server}, sort=[('ts', pymongo.DESCENDING)], fields=[field, 'ts']) assert (datetime.utcnow() - res['ts']).seconds < 60, "stale data! is arke running?" fs_perc = res['data']['fs'][self.filesystem]['percent'] self.usage = fs_perc self.measures = [ nagiosplugin.Measure('/', self.usage, '%', self.warning, self.critical, 0, 100) ] def default_message(self): return '%s is %i%% full' % (self.filesystem, self.usage) main = nagiosplugin.Controller(DiskCheck) if __name__ == '__main__': main()
me = None for slave in slaves: if slave['host'] == self.ec2_public_hostname: me = slave break self.receive_delay = master_num - calc_offset(me['r']) self.replay_delay = master_num - calc_offset(me['p']) self.measures = [ nagiosplugin.Measure('postgres_receive_delay', self.receive_delay, warning=self.warning, critical=self.critical), nagiosplugin.Measure('postgres_replay_delay', self.replay_delay, warning=self.warning, critical=self.critical), ] def default_message(self): if self.primary: return 'currently the primary server. not lagging behind self.' return 'receive time is %i behind primary, and replay time is %i behind primary' % \ (self.receive_delay, self.replay_delay) main = nagiosplugin.Controller(PostgresReplLagCheck) if __name__ == '__main__': main()