예제 #1
0
            if all((x['lag'] == -1 for x in results)):
                avg_lag = -1
            else:
                avg_lag = sum(
                    (x['lag']
                     for x in results if x['lag'] >= 0)) / len(results)

            self.lag = avg_lag
            self.measures = [
                nagiosplugin.Measure('alive-ssh_lag',
                                     self.lag,
                                     warning=self.warning,
                                     critical=self.critical)
            ]
        elif sent_data_recently:
            self.measures = [
                nagiosplugin.Measure('alive', int(self.alive), critical=0)
            ]
            #'alive', self.alive, warning=self.warning, critical=0)]

    def default_message(self):
        if hasattr(self, 'lag'):
            return 'average latency to perform SSH banner exchange: %f' % self.lag
        elif hasattr(self, 'alive') and self.alive:
            return "has sent in data recently and is alive, but is having network problems"


main = nagiosplugin.Controller(AliveCheck)
if __name__ == '__main__':
    main()
예제 #2
0
        except IndexError:
            print('What server am I supposed to check?!')
            import sys
            sys.exit(3)

    def obtain_data(self):
        db = pymongo.Connection(self.db_server).clio
        coll_name = 'system_%s' % datetime.utcnow().strftime('%Y%m')
        field = 'data.fs'
        res = db[coll_name].find_one({'host': self.server},
                                         sort=[('ts', pymongo.DESCENDING)],
                                         fields=[field, 'ts'])
        assert (datetime.utcnow() - res['ts']).seconds < 60, "stale data! is arke running?"

        self.usages = {}
        self.measures = []

        for fs in res['data']['fs']:
            percent_used = res['data']['fs'][fs]['percent']
            self.usages[fs] = percent_used
            self.measures.append(nagiosplugin.Measure(
                fs, percent_used, '%', self.warning, self.critical, 0, 100))

    def default_message(self):
        return '\n'.join(['%s is %i%% full' % (fs,used) for fs,used in self.usages.iteritems()])


main = nagiosplugin.Controller(AllDiskSpaceCheck)
if __name__ == '__main__':
   main()
예제 #3
0
        sc.execute('SELECT pg_last_xlog_replay_location()')
        slave_loc = xlog_to_bytes(sc.fetchone()[0])
        self.slave_conn.commit()
        self.slave_conn.close()

        self.lag = (master_loc - slave_loc) / 1024
        self.measures = [
            nagiosplugin.Measure('lag', self.lag, 'kB', self.warning,
                                 self.critical)
        ]

    def default_message(self):
        return "lag is %s kB" % self.lag


def xlog_to_bytes(xlog):
    """
    Convert an xlog number like '0/C6321D98' to an integer representing the
    number of bytes into the xlog.
    
    Logic here is taken from 
    https://github.com/mhagander/munin-plugins/blob/master/postgres/postgres_streaming_.in.
    I assume it's correct...
    """
    logid, offset = xlog.split('/')
    return (int('ffffffff', 16) * int(logid, 16)) + int(offset, 16)


if __name__ == '__main__':
    nagiosplugin.Controller(StreamingReplicationCheck)()
예제 #4
0
        self.measures = [
            nagiosplugin.Measure("Num_failed_checks",
                                 len(self.badChecks),
                                 warning=self.warning,
                                 critical=self.critical,
                                 minimum=0)
        ]

    def default_message(self):
        if len(self.badChecks):
            return "The following checks failed: %s" % (", ".join(
                self.badChecks))
        return "All checks pass."

    def fetch(self, url):
        if self.username is not None:
            passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
            # this creates a password manager
            passman.add_password(None, url, self.username, self.password)
            authhandler = urllib2.HTTPBasicAuthHandler(passman)
            opener = urllib2.build_opener(authhandler)
            urllib2.install_opener(opener)
        handle = urllib2.urlopen(url)
        data = handle.read()
        return data


main = nagiosplugin.Controller(Symfony2Check)
if __name__ == '__main__':
    main()
예제 #5
0
        results = [x for x in found['data'] if x['to'] == self.server]

        assert len(results) > 0, "no results!"
        assert len(
            results
        ) > self.minimum, "not enough results! only found %i results." % len(
            results)

        if all((x['lag'] == -1 for x in results)):
            avg_lag = -1
        else:
            avg_lag = sum(
                (x['lag'] for x in results if x['lag'] >= 0)) / len(results)

        self.lag = avg_lag
        self.measures = [
            nagiosplugin.Measure('ssh_lag',
                                 self.lag,
                                 warning=self.warning,
                                 critical=self.critical)
        ]

    def default_message(self):
        return 'average latency to perform SSH banner exchange: %f' % self.lag


main = nagiosplugin.Controller(SSHLagCheck)
if __name__ == '__main__':
    main()
예제 #6
0
def main():
    nagiosplugin.Controller(CheckZopeTestRunner)()
                        properties['name'],
                        properties['cmdline'],
                    ))

        if parent_pids:
            parent_pids = map(int, parent_pids)
            for properties in result['data']['processes']:
                if properties['ppid'] in parent_pids:
                    processes.append((
                        properties['pid'],
                        properties['name'],
                        properties['cmdline'],
                    ))


        self.processes = processes
        self.found_count = len(processes)
        self.measures = [nagiosplugin.Measure(
            'processes_found', self.found_count, warning=self.warning, critical=self.critical)]

    def default_message(self):
        fields = ('pid', 'name', 'cmdline')
        processes = (pformat(zip(fields, p)) for p in self.processes)
        processes_str = '\n'.join(processes)
        return 'Found the following %i processes:\n%s' % (self.found_count, processes_str)


main = nagiosplugin.Controller(ProcessCheck)
if __name__ == '__main__':
   main()
                if member.get('state', None) == 1:
                    primary = member

            if primary is me:
                self.primary = True
                self.repl_lag = 0
            else:
                self.primary = False
                self.repl_lag = max(0,
                                    primary['optime']['t'] - me['optime']['t'])

            #assert primary['optime']['t'] >= me['optime']['t'], "optime of master is less than the slave. the hell?\n%s" % pformat(res)
        self.measures = [
            nagiosplugin.Measure('mongodb_repl_lag',
                                 self.repl_lag,
                                 warning=self.warning,
                                 critical=self.critical)
        ]

    def default_message(self):
        if self.primary is None:
            return 'not in a replica set'
        if self.primary:
            return 'currently the primary server. not lagging behind self.'
        return 'optime is %i behind primary' % self.repl_lag


main = nagiosplugin.Controller(MongodbReplLagCheck)
if __name__ == '__main__':
    main()
예제 #9
0
        try:
            self.server = args[0]
        except IndexError:
            print('What server am I supposed to check?!')
            import sys
            sys.exit(3)

    def obtain_data(self):
        db = pymongo.Connection(self.db_server).clio
        coll_name = 'system_%s' % datetime.utcnow().strftime('%Y%m')
        field = 'data.fs.%s.percent' % self.filesystem
        res = db[coll_name].find_one({'host': self.server},
                                     sort=[('ts', pymongo.DESCENDING)],
                                     fields=[field, 'ts'])
        assert (datetime.utcnow() -
                res['ts']).seconds < 60, "stale data! is arke running?"
        fs_perc = res['data']['fs'][self.filesystem]['percent']
        self.usage = fs_perc
        self.measures = [
            nagiosplugin.Measure('/', self.usage, '%', self.warning,
                                 self.critical, 0, 100)
        ]

    def default_message(self):
        return '%s is %i%% full' % (self.filesystem, self.usage)


main = nagiosplugin.Controller(DiskCheck)
if __name__ == '__main__':
    main()
            me = None
            for slave in slaves:
                if slave['host'] == self.ec2_public_hostname:
                    me = slave
                    break

            self.receive_delay = master_num - calc_offset(me['r'])
            self.replay_delay = master_num - calc_offset(me['p'])

        self.measures = [
            nagiosplugin.Measure('postgres_receive_delay',
                                 self.receive_delay,
                                 warning=self.warning,
                                 critical=self.critical),
            nagiosplugin.Measure('postgres_replay_delay',
                                 self.replay_delay,
                                 warning=self.warning,
                                 critical=self.critical),
        ]

    def default_message(self):
        if self.primary:
            return 'currently the primary server. not lagging behind self.'
        return 'receive time is %i behind primary, and replay time is %i behind primary' % \
                (self.receive_delay, self.replay_delay)


main = nagiosplugin.Controller(PostgresReplLagCheck)
if __name__ == '__main__':
    main()