def cmd_cahash(args): """ Calculates a hash of all entries in the contact_access table. This is really only useful for debugging purposes, but doesn't block execution of anything, so run it if you feel like it. """ conn = merlin_db.connect(mconf) dbc = conn.cursor() hash = sha1() dbc.execute("SELECT contact, host FROM contact_access " "WHERE service IS NULL " "ORDER BY contact, host") rows = 0 for row in dbc.fetchall(): rows += 1 hash.update("%d %d" % (row[0], row[1])) dbc.execute("SELECT contact, service FROM contact_access " "WHERE host IS NULL " "ORDER BY contact, service") for row in dbc.fetchall(): rows += 1 hash.update("%d %d" % (row[0], row[1])) print("rows: %d; hash: %s" % (rows, hash.hexdigest()))
def cmd_purge(args): """[--remove-older-than=<difference>] Removes data no longer in use. If --remove-older-than is specified, also removes log files and database entries older than <difference>. The difference is specified as a number, followed by a unit - 'y' for year, 'm' for month, 'w' for week, 'd' for day. For instance, to delete all logs older than 1 year: mon log purge --remove-older-than=1y """ import time, glob, merlin_db # units rounded upwards units = {'y':31622400, 'm':2678400, 'w':604800, 'd':86400} if os.path.exists('/opt/monitor/op5/pnp/perfdata/.trash'): subprocess.call(['find', '/opt/monitor/op5/pnp/perfdata/.trash', '-mindepth', '1', '-delete']) oldest = False for arg in args: if arg.startswith('--remove-older-than='): if not arg[-1] in units.keys(): print("Invalid unit: " + arg[-1]) return False try: diff = float(arg[20:-1]) * units[arg[-1]] except ValueError: print "Invalid number: " + arg[20:-1] return False oldest = time.mktime(time.gmtime()) - diff if not oldest: return True conn = merlin_db.connect(mconf) dbc = conn.cursor() dbc.execute('DELETE FROM notification WHERE end_time < %s', int(oldest)) dbc.execute('DELETE FROM report_data WHERE timestamp < %s', int(oldest)) return purge_naemon_log_files(oldest)
def db_connect(self): if self.db: return True self.mconf = mconf_mockup( host='localhost', user='******', dbname=self.db_name, dbpass='******' ) self.db = merlin_db.connect(self.mconf, False) self.dbc = self.db.cursor()
def connect_to_db(self): """Self-explanatory, really""" if self.db: return True mock_conf = mconf_mockup(host=self.db_host, user=self.db_user, dbname=self.db_name, dbpass=self.db_pass) self.db = merlin_db.connect(mock_conf) if not self.db: return False self.dbc = self.db.cursor() return True
def cmd_mark(args): """--name=<name> [--table=<table>] <key1=value1> <keyN=valueN> Adds a timestamp marker into the table pointed to by --table, which lets a tester know when some event last occurred. This comes in pretty handy when making sure active checks, notifications and eventhandlers are working as expected. Each key translates to a column in the selected database table. Each value is the value which should go into that table (avoid strings that need quoting, pretty please). Each table used for such tests must contain the columns 'parent_pid' and 'timestamp', which are automagically added for tracking purposes. """ table = 'tests' param_keys = [] param_values = [] db = merlin_db.connect(mconf) dbc = db.cursor() for arg in args: if arg.startswith('--table='): table = arg.split('=')[1] elif '=' in arg: (k, v) = arg.split('=', 1) param_keys.append(k) param_values.append(db.escape_string(v)) if len(param_keys) == 0: prettyprint_docstring('mark', cmd_mark.__doc__, 'No parameters. At least one must be supplied') sys.exit(1) param_keys.append('timestamp') param_values.append("%d" % time.time()) param_keys.append('parent_pid') param_values.append("%d" % os.getppid()) query = "INSERT INTO %s(%s) VALUES('%s')" % ( table, ', '.join(param_keys), "', '".join(param_values) ) try: dbc.execute(query) except Exception, e: print("Failed to execute query: %s" % e) dbc.close() db.close() sys.exit(1)
def cmd_install_time(args): """ Print the time the system first recorded an event, as a unix timestamp. If no event is recorded, assume an event will happen quite soon, so print the current tiemstamp instead """ starttime = int(time.time()) dbc = merlin_db.connect(mconf).cursor() dbc.execute('SELECT MIN(timestamp) FROM report_data') for (tstamp, ) in dbc: if tstamp: starttime = tstamp merlin_db.disconnect() print starttime
def cmd_install_time(args): """ Print the time the system first recorded an event, as a unix timestamp. If no event is recorded, assume an event will happen quite soon, so print the current tiemstamp instead """ starttime = int(time.time()) dbc = merlin_db.connect(mconf).cursor() dbc.execute('SELECT MIN(timestamp) FROM report_data') for (tstamp,) in dbc: if tstamp: starttime = tstamp merlin_db.disconnect() print starttime
def cmd_purge(args): """[--remove-older-than=<difference>] Remove data no longer in use. If --remove-older-than is specified, also removes log files and database entries older than <difference>. The difference is specified as a number, followed by a unit - 'y' for year, 'm' for month, 'w' for week, 'd' for day. For instance, to delete all logs older than 1 year: mon log purge --remove-older-than=1y """ import time, glob, merlin_db # units rounded upwards units = {'y': 31622400, 'm': 2678400, 'w': 604800, 'd': 86400} if os.path.exists('/opt/monitor/op5/pnp/perfdata/.trash'): subprocess.call([ 'find', '/opt/monitor/op5/pnp/perfdata/.trash', '-mindepth', '1', '-delete' ]) oldest = False for arg in args: if arg.startswith('--remove-older-than='): if not arg[-1] in units.keys(): print("Invalid unit: " + arg[-1]) return False try: diff = float(arg[20:-1]) * units[arg[-1]] except ValueError: print "Invalid number: " + arg[20:-1] return False oldest = time.mktime(time.gmtime()) - diff if not oldest: return True conn = merlin_db.connect(mconf) dbc = conn.cursor() dbc.execute('DELETE FROM notification WHERE end_time < %s', int(oldest)) dbc.execute('DELETE FROM report_data WHERE timestamp < %s', int(oldest)) for log in glob.glob(archive_dir + '/nagios-*.log'): if time.mktime( time.strptime( log, archive_dir + '/nagios-%m-%d-%Y-%H.log')) < oldest: os.remove(log) return True
def cmd_fixindexes(args): """ Fixes indexes on merlin tables containing historical data. Don't run this tool unless you're asked to by op5 support staff or told to do so by a message during an rpm or yum upgrade. """ print 'Adding indexes...' print '(if this takes forever, aborting this and running' print '\tmon log import --truncate-db' print 'might be quicker, but could permanently remove some old logs)' conn = merlin_db.connect(mconf) cursor = conn.cursor() log = [] for table in glob.glob(merlin_dir + '/sql/mysql/*-indexes.sql'): queries = file(table) try: cursor.execute(queries.read()) except Exception, ex: log.append('%s: %s' % (os.path.basename(table), ex[1])) queries.close()
def cmd_purge(args): """[--remove-older-than=<difference>] Removes data no longer in use. If --remove-older-than is specified, also removes log files and database entries older than <difference>. The difference is specified as a number, followed by a unit - 'y' for year, 'm' for month, 'w' for week, 'd' for day. For instance, to delete all logs older than 1 year: mon log purge --remove-older-than=1y """ import time, glob, merlin_db # units rounded upwards units = {'y': 31622400, 'm': 2678400, 'w': 604800, 'd': 86400} if os.path.exists('/opt/monitor/op5/pnp/perfdata/.trash'): subprocess.call([ 'find', '/opt/monitor/op5/pnp/perfdata/.trash', '-mindepth', '1', '-delete' ]) oldest = False for arg in args: if arg.startswith('--remove-older-than='): if not arg[-1] in units.keys(): print("Invalid unit: " + arg[-1]) return False try: diff = float(arg[20:-1]) * units[arg[-1]] except ValueError: print "Invalid number: " + arg[20:-1] return False oldest = time.mktime(time.gmtime()) - diff if not oldest: return True conn = merlin_db.connect(mconf) dbc = conn.cursor() dbc.execute('DELETE FROM notification WHERE end_time < %s', [int(oldest)]) dbc.execute( 'DELETE FROM report_data WHERE id NOT IN (SELECT id FROM (SELECT MAX(id) AS id FROM report_data WHERE timestamp < %s GROUP BY service_description, host_name) as tbl) AND timestamp < %s', [int(oldest), int(oldest)]) conn.commit() return purge_naemon_log_files(oldest)
def cmd_purge(args): """[--remove-older-than=<difference>] Remove data no longer in use. If --remove-older-than is specified, also removes log files and database entries older than <difference>. The difference is specified as a number, followed by a unit - 'y' for year, 'm' for month, 'w' for week, 'd' for day. For instance, to delete all logs older than 1 year: mon log purge --remove-older-than=1y """ import time, glob, merlin_db # units rounded upwards units = {"y": 31622400, "m": 2678400, "w": 604800, "d": 86400} if os.path.exists("/opt/monitor/op5/pnp/perfdata/.trash"): subprocess.call(["find", "/opt/monitor/op5/pnp/perfdata/.trash", "-mindepth", "1", "-delete"]) oldest = False for arg in args: if arg.startswith("--remove-older-than="): if not arg[-1] in units.keys(): print ("Invalid unit: " + arg[-1]) return False try: diff = float(arg[20:-1]) * units[arg[-1]] except ValueError: print "Invalid number: " + arg[20:-1] return False oldest = time.mktime(time.gmtime()) - diff if not oldest: return True conn = merlin_db.connect(mconf) dbc = conn.cursor() dbc.execute("DELETE FROM notification WHERE end_time < %s", int(oldest)) dbc.execute("DELETE FROM report_data WHERE timestamp < %s", int(oldest)) for log in glob.glob(archive_dir + "/nagios-*.log"): if time.mktime(time.strptime(log, archive_dir + "/nagios-%m-%d-%Y-%H.log")) < oldest: os.remove(log) return True
def cmd_pasv(args): """ Submits passive checkresults to the nagios.cmd pipe and verifies that the data gets written to database correctly and in a timely manner. Available options for 'mon test pasv' --nagios-cfg=<file> default /opt/monitor/var/etc/nagios.cfg --counters=<int> number of counters per object (default 30) --hosts=<int> number of hosts (default 1) --services=<int> number of services (default 5) --loops=<int> number of loops (default 1) --interval=<int> interval in seconds between loops (def 1800) --delay=<int> delay between submitting and checking (def 25) !!! WARNING !!! !!! WARNING !!! This command will disble active checks on your system and have other side-effects as well. !!! WARNING !!! !!! WARNING !!! """ global verbose nagios_cfg = False num_hosts = 1 num_services = 5 num_loops = 1 num_counters = 30 interval = 1800 delay = 25 cmd_pipe = False global send_host_checks for arg in args: if arg.startswith('--nagios-cfg='): nagios_cfg = arg.split('=')[1] elif arg.startswith('--counters='): num_counters = int(arg.split('=')[1]) elif arg.startswith('--hosts='): num_hosts = int(arg.split('=')[1]) elif arg.startswith('--services='): num_services = int(arg.split('=')[1]) elif arg.startswith('--loops='): num_loops = int(arg.split('=')[1]) elif arg.startswith('--interval='): interval = int(arg.split('=')[1]) elif arg.startswith('--delay='): delay = int(arg.split('=')[1]) elif arg == '--verbose' or arg == '-v': verbose = True elif arg == '--nohostchecks': send_host_checks = False else: prettyprint_docstring("pasv", cmd_pasv.__doc__, "Unknown argument: %s" % arg) if arg == '--help' or arg == 'help': sys.exit(0) else: sys.exit(1) if nagios_cfg: comp = cconf.parse_conf(nagios_cfg) for v in comp.params: if v[0] == 'command_file': cmd_pipe = v[1] break db = merlin_db.connect(mconf) dbc = db.cursor() if not cmd_pipe: cmd_pipe = "/opt/monitor/var/rw/nagios.cmd" cmd_fd = _pasv_open_cmdpipe(cmd_pipe) # disable active checks while we test the passive ones, or # active checkresults might overwrite the passive ones and # contaminate the testresults test_cmd(cmd_fd, "STOP_EXECUTING_HOST_CHECKS") test_cmd(cmd_fd, "STOP_EXECUTING_SVC_CHECKS") test_cmd(cmd_fd, "START_ACCEPTING_PASSIVE_HOST_CHECKS") test_cmd(cmd_fd, "START_ACCEPTING_PASSIVE_SVC_CHECKS") os.close(cmd_fd) # now we update the database with impossible values so we # know we don't get the right test-data by mistake in case # the test-case is run multiple times directly following # each other dbc.execute("UPDATE host SET last_check = 5, current_state = 5") dbc.execute("UPDATE service SET last_check = 5, current_state = 5") host_list = [] test_objs = [] query = "SELECT host_name FROM host ORDER BY host_name ASC" dbc.execute(query) hi = 0 # arbitrary (very) large value min_services = 100123098 min_services_host = '' for row in dbc.fetchall(): if hi < num_hosts: obj = pasv_test_host(row[0]) host_list.append(obj) if send_host_checks: test_objs.append(obj) hi += 1 for host in host_list: query = ("SELECT service_description FROM service " "WHERE host_name = '%s' ORDER BY service_description ASC" % host.name) dbc.execute(query) services = 0 si = 0 for row in dbc.fetchall(): if si < num_services: services += 1 obj = pasv_test_service(host.name, row[0]) host.services.append(obj) test_objs.append(obj) si += 1 if services < min_services: min_services_host = host.name min_services = services if num_hosts > host_list: print("Can't run tests for %d hosts when only %d are configured" % (num_hosts, len(host_list))) if num_services > min_services: print("Can't run tests for %d services / host when %s has only %d configured" % (num_services, min_services_host, min_services)) # primary testing loop loops = 0 while loops < num_loops: loops += 1 # generate the counters we'll be using. # We get fresh ones for each iteration counters = _generate_counters(num_counters) cnt_string = "%s" % " ".join(counters) cnt_hash = hashlib.sha(cnt_string).hexdigest() # why we have to disconnect from db and re-open the # command pipe is beyond me, but that's the way it is, it # seems. It also matches real-world use a lot better, # since the reader imitates ninja and the writer imitates # nsca. cmd_fd = _pasv_open_cmdpipe(cmd_pipe) merlin_db.disconnect() # new status every time so we can differ between the values # and also test the worst-case scenario where the daemon has # to run two queries for each passive checkresult status = loops % 3 loop_start = time.time() print("Submitting passive check results (%s) @ %s" % (cnt_hash, time.time())) for t in test_objs: cmd = _pasv_build_cmd(t, status) cmd += "%s|%s\n" % (cnt_hash, cnt_string) t.cmd_hash = hashlib.sha(cmd).hexdigest() t.submit_time = time.time() result = os.write(cmd_fd, cmd) test(result, len(cmd), "%d of %d bytes written for %s" % (result, len(cmd), t.name)) os.close(cmd_fd) db = merlin_db.connect(mconf) dbc = db.cursor() print("Sleeping %d seconds before reaping results" % delay) time.sleep(delay) for t in test_objs: query = ("SELECT " "last_check, current_state, output, perf_data " "FROM %s" % t.query()) dbc.execute(query) row = dbc.fetchone() test(row[0] + delay > t.submit_time, True, "reasonable delay for %s" % t.name) test(row[1], status, "status updated for %s" % t.name) test(str(row[2]), cnt_hash, "output update for %s" % t.name) test(str(row[3]), cnt_string, "counter truncation check for %s" % t.name) if loops < num_loops: interval_sleep = (loop_start + interval) - time.time() if interval_sleep > 0: print("Sleeping %d seconds until next test-set" % interval_sleep) time.sleep(interval_sleep) total_tests = failed + passed print("failed: %d/%.3f%%" % (failed, float(failed * 100) / total_tests)) print("passed: %d/%.3f%%" % (passed, float(passed * 100) / total_tests)) print("total tests: %d" % total_tests)
#!/usr/bin/env python import sys from nacoma.hooks import Change reportable_types = ['host', 'service'] libexec_dir = "@@LIBEXECDIR@@/mon/modules" sys.path.insert(0, libexec_dir) import merlin_conf as mconf mconf.parse() import merlin_db conn = merlin_db.connect(mconf) cursor = conn.cursor() for line in sys.stdin: change = Change(line) if change.type not in reportable_types: continue if change.is_renamed(): if change.type == 'host': arg = (change.oldname, change.newname) query = 'INSERT INTO rename_log(from_host_name, from_service_description, to_host_name, to_service_description) VALUES (%s, NULL, %s, NULL)' cursor.execute(query, arg) else: arg = change.oldname.split(';') + change.newname.split(';') query = 'INSERT INTO rename_log(from_host_name, from_service_description, to_host_name, to_service_description) VALUES (%s, %s, %s, %s)' cursor.execute(query, arg) conn.commit() conn.close()
#!/usr/bin/env python import sys from nacoma.hooks import Change reportable_types = ['host', 'service'] libexec_dir = "/usr/libexec/merlin/modules" sys.path.insert(0, libexec_dir) import merlin_conf as mconf mconf.parse() import merlin_db conn = merlin_db.connect(mconf) cursor = conn.cursor() # this terrible kludge is required because merlin_db gives us no way to access the database module mysqlparamstyle = 'format' oracleparamstyle = 'named' if 'Oracle' in str(type(conn)): paramstyle = oracleparamstyle else: paramstyle = mysqlparamstyle for line in sys.stdin: change = Change(line) if change.type not in reportable_types: continue if change.is_renamed(): if change.type == 'host': arg = (change.oldname, change.newname) if paramstyle == 'named': arg = dict(zip(('oldhost', 'newhost'), arg))