class DHT(Plugin): devicetype = make_option("-d","--device", dest="devicetype", type="int") pin = make_option("-p","--pin", dest="pin", type="int") mode = make_option("-t","--type", dest="mode", type="string", help="temp|hum") unit = make_option("-u","--unit", dest="unit", type="string", help="c|f") def check(self): sensor = self.options.devicetype pin = self.options.pin unit = self.options.unit #read from DHT humidity, temperature = Adafruit_DHT.read_retry(sensor, pin) #generate output if mode == 'temp': value = temperature if unit == 'c': temperature = temperature * 1.8 + 32 output = 'Temperature is {0:0.1f}*F'.format(temperature) elif unit == 'hum': value = humidity output = 'Humidity is {0:0.1f}%'.format(humidity) else: print "You shoudn't be here" #return response result = self.response_for_value(value, message=output) result.set_perf_data("Value", value,warn=self.options.warning,crit=self.options.critical) return result
class RabbitQueueCheck(BaseRabbitCheck): """ performs a nagios compliant check on a single queue and attempts to catch all errors. expected usage is with a critical threshold of 0 """ vhost = make_option("--vhost", dest="vhost", help="RabbitMQ vhost", type="string", default='%2F') queue = make_option("--queue", dest="queue", help="Name of the queue in inspect", type="string") def makeUrl(self): """ forms self.url, a correct url to polling a rabbit queue """ try: if self.options.use_ssl is True: self.url = "https://%s:%s/api/queues/%s/%s" % ( self.options.hostname, self.options.port, self.options.vhost, self.options.queue) else: self.url = "http://%s:%s/api/queues/%s/%s" % ( self.options.hostname, self.options.port, self.options.vhost, self.options.queue) return True except Exception, e: self.rabbit_error = 3 self.rabbit_note = "problem forming api url:", e return False
class BaseRabbitCheck(Plugin): """ performs a nagios compliant check attempts to catch all errors. expected usage is with a critical threshold of 0 """ username = make_option("--username", dest="username", help="RabbitMQ API username", type="string", default="guest") password = make_option("--password", dest="password", help="RabbitMQ API password", type="string", default="guest") port = make_option("--port", dest="port", help="RabbitMQ API port", type="string", default="15672") use_ssl = make_option("--ssl", action="store_true", dest="use_ssl", default=False, help="Use SSL") def doApiGet(self): """ performs and returns content from an api get """ password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, self.url, self.options.username, self.options.password) handler = urllib2.HTTPBasicAuthHandler(password_mgr) opener = urllib2.build_opener(handler) response = None try: request = opener.open(self.url) response = request.read() request.close() except Exception, e: response = False self.rabbit_error = 2 self.rabbit_note = "problem with api get:" + str(e) return response
class CheckXtraBackupLog(Plugin): xtrabackup_log = make_option("-f", "--file", dest="xtrabackup_log", help="path to Xtrabackup-agent log", metavar="FILE") days = make_option('-d', '--days', dest='days', help="number of days backup shouldn't be older than") def check(self): self.options.warning = self.options.warning if self.options.warning is not None else Range( '0') status_arr = [] for i in reversed( self.check_log(self.options.xtrabackup_log, self.options.days)): status_arr.append(i) if len(status_arr) > 0 and status_arr[0] == 0: return self.response_for_value(0, 'last backup is OK') else: return self.response_for_value(1, 'Problem found with last backup') def check_log(self, logfile, days_to_check): p1 = re.compile('msg (\\d*)\..*prints "completed OK.*\..*') p2 = re.compile('.*innobackupex-.*: completed OK!.*') lines = [] with open(logfile, 'r') as f: lines = f.readlines() chk_for_start = 1 chk_for_end = 0 backup_status = [] for line in lines: m1 = p1.match(line) if m1: backdate = datetime.fromtimestamp(float(m1.group(1))) if backdate.timetuple() > (datetime.now() - timedelta( days=int(days_to_check))).timetuple(): if chk_for_end == 1: chk_for_end = 0 backup_status.append(2) chk_for_start = 0 chk_for_end = 1 if chk_for_end == 1: m2 = p2.match(line) if m2: chk_for_end = 0 chk_for_start = 1 backup_status.append(0) return backup_status
class RabbitExchangeCheck(Plugin): """ performs a nagios compliant check on a single queue and attempts to catch all errors. expected usage is with a critical threshold of 0 """ username = make_option("--username", dest="username", help="RabbitMQ API username", type="string", default="guest") password = make_option("--password", dest="password", help="RabbitMQ API password", type="string", default="guest") port = make_option("--port", dest="port", help="RabbitMQ API port", type="string", default="15672") vhost = make_option("--vhost", dest="vhost", help="RabbitMQ vhost", type="string", default='%2F') exchange = make_option("--exchange", dest="exchange", help="Name of the exchange in inspect", type="string", default="") def doApiGet(self): """ performs and returns content from an api get """ password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, self.url, self.options.username, self.options.password) handler = urllib2.HTTPBasicAuthHandler(password_mgr) opener = urllib2.build_opener(handler) response = None try: request = opener.open(self.url) response = request.read() request.close() except Exception, e: response = False self.rabbit_error = 2 self.rabbit_note = "problem with api get:", e return response
class FacedetectCheck(Plugin): port = make_option('-p', '--port', type='int', default=4000, help='Use the following port') use_ssl = make_option('-S', '--use-ssl', action='store_false', default=False, help="Use HTTPS instead of HTTP") expected = make_option('-e', '--expected', type='str', default=None, help="Expect the following string in response") def check(self): hostname = self.options.hostname port = self.options.port use_ssl = self.options.use_ssl expected = self.options.expected timeout = self.options.timeout if self.options.timeout > 0 else 10 if not hostname: return Response(UNKNOWN, 'Hostname is missing') url = "http%s://%s:%s/status" % ('s'[:use_ssl], hostname, port) try: f = urllib2.urlopen(url, timeout=timeout) response = json.load(f) except urllib2.URLError, e: return Response(CRITICAL, e.reason[1]) if not response: return Response(CRITICAL, 'No data received') status = response.pop('status') ret = OK if expected and expected not in status: ret = CRITICAL result = Response(ret, status) for k, v in response.items(): result.set_perf_data(k, int(v)) return result
class RabbitCheckServer(BaseRabbitCheck): """ performs a nagios compliant check on a single queue and attempts to catch all errors. expected usage is with a critical threshold of 0 """ type = make_option("--type", dest="type", help="Type of check - mem, fd, proc, sockets, disk", type="string", default='%2F') def makeUrl(self): """ forms self.url, a correct url to polling a rabbit queue """ try: if self.options.use_ssl is True: self.url = "https://%s:%s/api/nodes" % (self.options.hostname, self.options.port) else: self.url = "http://%s:%s/api/nodes" % (self.options.hostname, self.options.port) return True except Exception, e: print str(e) self.rabbit_error = 3 self.rabbit_note = "problem forming api url:", e return False
class RabbitAllQueuesCheck(BaseRabbitCheck): vhost = make_option("--vhost", dest="vhost", help="RabbitMQ vhost", type="string", default='%2F') def makeUrl(self): """ forms self.url, a correct url to polling a rabbit queue """ try: if self.options.use_ssl is True: self.url = "https://%s:%s/api/queues/%s" % ( self.options.hostname, self.options.port, self.options.vhost) else: self.url = "http://%s:%s/api/queues/%s" % ( self.options.hostname, self.options.port, self.options.vhost) return True except Exception, e: self.rabbit_error = 3 self.rabbit_note = "problem forming api url:", e return False
class StatsdCheck(Plugin): port = make_option('-p', '--port', type='int', default=8126) metric = make_option('-m', '--metric', type='str', default='uptime') _socket = None def _connect(self, host, port, timeout): exception = None for (af, socktype, proto, cname, sa) in socket.getaddrinfo( host, port, socket.AF_UNSPEC, socket.SOCK_STREAM): try: self._socket = socket.socket(af, socktype, proto) self._socket.settimeout(timeout) self._socket.connect(sa) return except socket.error as e: if self._socket: self._socket.close() self._socket = None exception = e raise exception def check(self): timeout = self.options.timeout if self.options.timeout > 0 else 10 metric = self.options.metric.strip().lower() if not self.options.hostname: return Response(UNKNOWN, 'Hostname is missing') try: self._connect(self.options.hostname, self.options.port, timeout) self._socket.sendall('stats') data = '' while True: data += self._socket.recv(1024) if data and 'END' in data: break self._socket.close() except socket.error, (errno, msg): return Response(CRITICAL, msg) except socket.timeout, msg: return Response(CRITICAL, msg)
class CheckSplunkOpsecLea(pynagios.Plugin): username = pynagios.make_option("-U", type="string", help="Username used to log into Splunk") password = pynagios.make_option("-P", type="string", help="Password used to log into Splunk") port = pynagios.make_option("-p", type="int", default=8089, help="splunkd Port on server, default 8089") use_ssl = pynagios.make_option("-n", action="store_false", default=True, help="Disable HTTPS (use http)") entity = pynagios.make_option("--entity", type="string", help="Name of OPSEC entity to check") def check(self): splunkd = SplunkServer(self.options.hostname, self.options.username, self.options.password, self.options.port, self.options.use_ssl) try: root = splunkd._get_url( "/servicesNS/nobody/Splunk_TA_opseclea_linux22/opsec/entity_log_status/{0}" .format(self.options.entity)) except ApiError as e: return pynagios.Response(pynagios.CRITICAL, str(e)) sdict = root.find( "./{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}content/{http://dev.splunk.com/ns/rest}dict" ) skey = sdict.find( "./{http://dev.splunk.com/ns/rest}key[@name='last_log_update_timestamp']" ) last_updated_at = datetime.datetime.strptime(skey.text, "%Y-%m-%dT%H:%M:%SZ") now = datetime.datetime.utcnow() delta = now - last_updated_at return self.response_for_value( delta.seconds, "Last updated {0} seconds ago".format(delta.seconds))
class SupervisordXmlRpcCheck(Plugin): port = make_option('-p', '--port', type='int', default=9080, help='Use the following port for XML-RPC connection') username = make_option('-u', '--user', type='str', default=None, help='Username for XML-RPC connection') password = make_option('-P', '--password', type='str', default=None, help='Password for XML-RPC connection') def check(self): hostname = self.options.hostname port = self.options.port username = self.options.username password = self.options.password timeout = self.options.timeout if self.options.timeout > 0 else 10 if not hostname: return Response(UNKNOWN, 'Hostname is missing!') if username and password: auth = '%s:%s@' % (username, password) else: auth = '' procs = None try: url = "http://%s%s:%s" % (auth, hostname, port) socket.setdefaulttimeout(timeout) s = xmlrpclib.ServerProxy(url) procs = s.supervisor.getAllProcessInfo() except xmlrpclib.Fault, e: return Response(UNKNOWN, "getAllProcessInfo: %s" % e.faultString) except (socket.gaierror, socket.timeout, socket.error), e: return Response(CRITICAL, "%s: %s" % (hostname, e))
class GraphiteNagios(Plugin): username = make_option("--username", "-U", help="Username (HTTP Basic Auth)") password = make_option("--password", "-P", help="Password (HTTP Basic Auth)") name = make_option("--name", "-N", help="Metric name", default="metric") target = make_option("--target", "-M", help="Graphite target (series or query)") from_ = make_option("--from", "-F", help="Starting offset", default="1minute") func = make_option("--algorithm", "-A", help=("Algorithm for combining metrics, options: " "{}, (default: avg)".format(F_OPTS)), default="avg", choices=FUNCTIONS.keys()) def check(self): value = check_graphite(self.options) if value is None: return Response(UNKNOWN, "No results returned!") message = "{} ({} = {})".format(self.options.name, self.options.func, value) response = self.response_for_value(value, message) response.set_perf_data(self.options.func, value) return response
class GraphiteNagios(Plugin): username = make_option("--username", "-U", help="Username (HTTP Basic Auth)") password = make_option("--password", "-P", help="Password (HTTP Basic Auth)") name = make_option("--name", "-N", help="Metric name", default="metric") target = make_option("--target", "-M", help="Graphite target (series or query)") from_ = make_option("--from", "-F", help="Starting offset", default="1minute") until = make_option("--until", "-u", help="Ending offset", default="") func = make_option("--algorithm", "-A", help=("Algorithm for combining metrics, options: " "{0}, (default: avg)".format(F_OPTS)), default="avg", choices=FUNCTIONS.keys()) http_timeout = make_option("--http-timeout", "-o", help="HTTP request timeout", default=10, type=int) def check(self): value = check_graphite(self.options) if value is None: return Response(UNKNOWN, "No results returned!") message = "{0} ({1} is {2})".format(self.options.name, self.options.func, value) response = self.response_for_value(value, message) try: response.set_perf_data(self.options.func, value) except ValueError as e: raise ValueError("failed to set {} as perf data: {}".format( value, str(e))) return response
class CheckZrmBackup(Plugin): days = make_option('-d', '--days', dest='days', help="number of days backup shouldn't be older than") bsets = make_option('-b', '--backup-set', dest='bsets', help="comma-separated backup sets to check", default="allsets") bdir = make_option('-p', '--backup-dir', dest="bdir", help="path to --destination for mysql-zrm-reporter") def check(self): self.options.warning = self.options.warning if self.options.warning is not None else Range( '0') days = int(self.options.days) today = date.today() daynr = today.timetuple()[2] l0_broken_list = [] l1_broken_list = [] bset_msgs = {} exit_code = 0 if self.options.bsets == 'allsets': bsets = self.get_backupset_list(self.options.bdir) else: bsets = self.options.bsets.split(',') for bset in bsets: ret = self.check_log(bset, self.options.bdir, days) msgs = [] if len(ret[0]) == 0: exit_code = 1 msgs.append('No level 0 backup found') elif ret[0][0] == 0: msgs.append('Level 0 backup is OK') else: exit_code = 1 msgs.append('Level 0 backup is broken') if len(ret[1]) == 0: exit_code = 1 msgs.append('No level 1 backup found') elif ret[1][0] == 0: msgs.append('Level 1 backup is OK') else: exit_code = 1 msgs.append('Level 1 backup is broken') bset_msgs.update({bset: msgs}) msg = '\n' for k, v in bset_msgs.items(): msg += '%s\n' % (bset) for bset_msg in v: msg += '\t- %s\n' % (bset_msg) return self.response_for_value(exit_code, msg) def get_backupset_list(self, bdir): bsets = [] cmd = subprocess.Popen( '/usr/bin/mysql-zrm-reporter --fields backup-set --noheader --destination %s 2> /dev/null' % (self.options.bdir), shell=True, stdout=subprocess.PIPE) lines = cmd.stdout.readlines() lines = [line.strip() for line in lines] bsets = sorted(set(lines)) return bsets def check_log(self, bset, bdir, days_to_check): lev0 = [] lev1 = [] #Use --type html for easier parsing cmd = subprocess.Popen( '/usr/bin/mysql-zrm-reporter --fields backup-set,backup-status,backup-level,backup-date --type html --destination %s 2> /dev/null' % (bdir), shell=True, stdout=subprocess.PIPE) doc = cmd.stdout.read() doc_soup = BeautifulSoup(doc) for element in doc_soup.findAll(attrs={'class': 'r_normal'}): bset_name = element.find(attrs={'class': 'c_backup_set'}).text if bset != bset_name: continue stat_string = element.find(attrs={'class': 'c_backup_status'}).text level = element.find(attrs={'class': 'c_backup_level'}).text date_string = element.find(attrs={'class': 'c_backup_date'}).text #Remove the day date_string = date_string.split(',', 1)[1] # Match strings like this <day> <month>, <year> <hr:min:sec> <AM/PM> (eg. 19 February, 2013 11:05:43 PM) p = re.compile( '\d{2}\s\w+[,]\s\d{4}\s\d{2}[:]\d{2}[:]\d{2}\s\w{2}') m = p.search(date_string) if m is not None: dtime_backup = datetime.strptime(m.group(), '%d %B, %Y %I:%M:%S %p') if dtime_backup.timetuple() > (datetime.now() - timedelta( days=days_to_check)).timetuple(): #Backup ok if 'Backup succeeded' in stat_string: if level == '0': lev0.append(0) if level == '1': lev1.append(0) #Backup in progress elif '----' in stat_string: if level == '0': lev0.append(1) if level == '1': lev1.append(1) #Backup broken else: if level == '0': lev0.append(2) if level == '1': lev1.append(2) return lev0, lev1
class CheckMaxValue(Plugin): """A nagios plugin for checking Integer Overflow""" port = make_option('-P', '--port', dest='port', type='int', default=3306, help='The port to be used') user = make_option('-u', '--user', dest='user', help='Database user') password = make_option('-p', '--password', dest='password', help='Database password') use_dbs = make_option( '-d', '--use-dbs', dest='use_dbs', help='A comma-separated list of db names to be inspected') ignore_dbs = make_option( '-i', '--ignore-dbs', dest='ignore_dbs', help='A comma-separated list of db names to be ignored') config = make_option('-C', '--config', dest='config', help='Configuration filename') threads = make_option('-T', '--threads', dest='threads', type=int, default=2, help='Number of threads to spawn') exclude_columns = make_option( '-e', '--exclude-columns', dest='exclude_columns', help=('Specify columns to exclude in the following format: ' 'schema1.table1=col1,col2,colN;schemaN.tableN=colN;...')) row_count_max_ratio = make_option( '--row-count-max-ratio', default=50, type=float, help= 'If table row count is less than this value, exclude this column from display.' ) display_row_count_max_ratio_columns = make_option( '--display-row-count-max-ratio-columns', action='store_true', help= 'In separate section, display columns containing high values compared to maximum for the column datatype, but number of rows is less than the value of --row-count-max-ratio.' ) results_host = make_option('--results-host', default=None, help='Results database hostname.') results_database = make_option('--results-database', default=None, help='Results database name.') results_user = make_option('--results-user', default=None, help='Results database username.') results_password = make_option('--results-password', default=None, help='Results database password.') results_port = make_option('--results-port', default=None, help='Results database port.') scan_all_columns = make_option('--scan-all-columns', action='store_true', help='All columns are searched.', default=False) secondary_keys = make_option('--secondary-keys', action='store_true', help='Secondary keys are also searched.', default=False) def get_options_from_config_file(self): """Returns options from YAML file.""" if self.options.config: with open(self.options.config) as f: return yaml.load(f) else: return None def get_merged_options(self, additional_options): """Returns argument options merged with additional options.""" options = {} if self.options.ignore_dbs: options['ignore_dbs'] = self.options.ignore_dbs if self.options.use_dbs: options['use_dbs'] = self.options.use_dbs if self.options.port: options['port'] = self.options.port if self.options.user: options['user'] = self.options.user if self.options.password: options['password'] = self.options.password if self.options.hostname: options['hostname'] = self.options.hostname if self.options.warning: options['warning'] = self.options.warning if self.options.critical: options['critical'] = self.options.critical if self.options.threads: options['threads'] = self.options.threads if self.options.exclude_columns: options['exclude_columns'] = self.options.exclude_columns if self.options.row_count_max_ratio: options['row_count_max_ratio'] = self.options.row_count_max_ratio if self.options.display_row_count_max_ratio_columns: options[ 'display_row_count_max_ratio_columns'] = self.options.display_row_count_max_ratio_columns if self.options.results_host: options['results_host'] = self.options.results_host if self.options.results_database: options['results_database'] = self.options.results_database if self.options.results_user: options['results_user'] = self.options.results_user if self.options.results_password: options['results_password'] = self.options.results_password if self.options.results_port: options['results_port'] = self.options.results_port options['scan_all_columns'] = self.options.scan_all_columns options['secondary_keys'] = self.options.secondary_keys if additional_options: options.update(additional_options) return options def create_exclude_columns_dict(self, s): """Convert string of format 'schema.table=col1,colN;...' to dict.""" d = {} items = s.split(';') for item in items: schema_table, columns = item.split('=') column_list = columns.split(',') d[schema_table] = column_list return d def merge_options(self): self.config_options = self.get_options_from_config_file() merged_options = self.get_merged_options(self.config_options) # Thresholds if self.config_options and 'critical' in self.config_options: critical = float(self.config_options['critical']) else: critical = (float(self.options.critical.__str__()) if self.options.critical is not None else 100) if self.config_options and 'warning' in self.config_options: warning = float(self.config_options['warning']) else: warning = (float(self.options.warning.__str__()) if self.options.warning is not None else 100) merged_options['critical'] = critical merged_options['warning'] = warning # fix string versions of ignore_dbs, use_dbs, exclude_columns if 'ignore_dbs' in merged_options: ignore_dbs = merged_options['ignore_dbs'] if ignore_dbs and isinstance(ignore_dbs, basestring): # convert string to list ignore_dbs = ignore_dbs.strip() if ignore_dbs: merged_options['ignore_dbs'] = ignore_dbs.split(',') if 'use_dbs' in merged_options: use_dbs = merged_options['use_dbs'] if use_dbs and isinstance(use_dbs, basestring): # convert string to list use_dbs = use_dbs.strip() if use_dbs: merged_options['use_dbs'] = use_dbs.split(',') if 'exclude_columns' in merged_options: exclude_columns = merged_options['exclude_columns'] if exclude_columns and isinstance(exclude_columns, basestring): # convert string to dict exclude_columns = exclude_columns.strip('; ') if exclude_columns: merged_options['exclude_columns'] = ( self.create_exclude_columns_dict(exclude_columns)) self.merged_options = merged_options def get_schema_tables(self): merged_options = self.merged_options query = """ SELECT c.TABLE_SCHEMA, c.TABLE_NAME, c.COLUMN_NAME, c.COLUMN_TYPE, t.TABLE_ROWS, c.COLUMN_KEY, s.SEQ_IN_INDEX FROM INFORMATION_SCHEMA.COLUMNS c LEFT JOIN INFORMATION_SCHEMA.TABLES t ON c.TABLE_SCHEMA = t.TABLE_SCHEMA AND c.TABLE_NAME = t.TABLE_NAME LEFT JOIN INFORMATION_SCHEMA.STATISTICS s ON c.TABLE_SCHEMA = s.TABLE_SCHEMA AND c.TABLE_NAME = s.TABLE_NAME AND c.COLUMN_NAME = s.COLUMN_NAME WHERE c.COLUMN_TYPE LIKE '%int%' """ if 'use_dbs' in merged_options: # set comma separated schema names enclosed in single-quotes use_dbs = ','.join("'%s'" % (db, ) for db in merged_options['use_dbs']) if use_dbs: query += """ AND c.TABLE_SCHEMA IN (%s) """ % (use_dbs, ) if 'ignore_dbs' in merged_options: # set comma separated schema names enclosed in single-quotes ignore_dbs = ','.join("'%s'" % (db, ) for db in merged_options['ignore_dbs']) if ignore_dbs: query += """ AND c.TABLE_SCHEMA NOT IN (%s) """ % (ignore_dbs, ) conn = create_connection(merged_options) try: log.debug('%s' % (query, )) rows = fetchall(conn, query) log.debug('len(rows)=%s' % (len(rows), )) log.debug(pprint.pformat(rows)) if 'exclude_columns' in self.merged_options: exclude_columns = self.merged_options['exclude_columns'] else: exclude_columns = None schema_tables = {} added_columns = [] for row in rows: schema = row[0] table = row[1] column = row[2] column_type = row[3] row_count = row[4] column_key = row[5] if column_key is not None: column_key = column_key.strip().lower() seq_in_index = row[6] scan_secondary_keys = merged_options['secondary_keys'] scan_all_columns = merged_options['scan_all_columns'] schema_table = '%s.%s' % (schema, table) if (exclude_columns and schema_table in exclude_columns and column in exclude_columns[schema_table]): # this column is excluded log.debug('Excluded column: %s.%s.%s' % (schema, table, column)) pass else: include_column = False if column_key and column_key == 'pri': # always include primary keys include_column = True if scan_secondary_keys: if (column_key and column_key != 'pri' and seq_in_index and seq_in_index == 1): include_column = True # if ( # (not merged_options['primary_keys']) and # (not merged_options['secondary_keys'])): # include_column = True if scan_all_columns: include_column = True if include_column: column_to_add = '%s.%s.%s' % (schema, table, column) if column_to_add in added_columns: # prevent duplicates include_column = False else: added_columns.append(column_to_add) if include_column: if schema_table in schema_tables: schema_tables[schema_table]['columns'].append( dict(column_name=column, column_type=column_type)) else: schema_tables[schema_table] = dict( schema=schema, table=table, row_count=row_count, columns=[ dict(column_name=column, column_type=column_type) ]) # end for finally: conn.close() return schema_tables def configure_logging(self): try: from logging.config import dictConfig except ImportError: from logutils.dictconfig import dictConfig if 'logging' in self.merged_options and self.merged_options['logging']: dictConfig(self.merged_options['logging']) def check(self): try: self.merge_options() self.configure_logging() merged_options = self.merged_options hostname = '' if 'hostname' in merged_options and merged_options['hostname']: hostname = merged_options['hostname'] self.results_db_conn_opts = {} if 'results_host' in merged_options and merged_options[ 'results_host']: self.results_db_conn_opts['host'] = merged_options[ 'results_host'] if 'results_port' in merged_options and merged_options[ 'results_port']: self.results_db_conn_opts['port'] = merged_options[ 'results_port'] if 'results_user' in merged_options and merged_options[ 'results_user']: self.results_db_conn_opts['user'] = merged_options[ 'results_user'] if 'results_password' in merged_options and merged_options[ 'results_password']: self.results_db_conn_opts['passwd'] = merged_options[ 'results_password'] if 'results_database' in merged_options and merged_options[ 'results_database']: self.results_db_conn_opts['db'] = merged_options[ 'results_database'] if self.results_db_conn_opts: if not ('db' in self.results_db_conn_opts and self.results_db_conn_opts['db']): raise Error('results_database is required.') log.debug('Check started with the following options:\n%s' % (pprint.pformat(self.merged_options), )) schema_tables = self.get_schema_tables() log.debug('Schema tables:\n%s' % (pprint.pformat(schema_tables), )) q = Queue.Queue() for v in schema_tables.itervalues(): q.put(v) threads = self.merged_options['threads'] results = Queue.Queue() thread_list = [] for n in range(threads): thread = TableProcessor(schema_tables=q, merged_options=self.merged_options, results=results) thread.name = 'Thread #%d' % (n, ) thread.daemon = True thread.start() thread_list.append(thread) # wait for all threads to finish log.debug('Waiting for all threads to finish running.') while True: dead = [] for thread in thread_list: dead.append(not thread.is_alive()) if all(dead): break time.sleep(0.01) log.debug('All threads finished.') critical_columns = [] warning_columns = [] errors = [] investigate_columns = [] while True: try: result = results.get_nowait() if 'critical_column' in result: critical_columns.append(result['critical_column']) if 'warning_column' in result: warning_columns.append(result['warning_column']) if 'error' in result: errors.append(result['error']) if 'investigate_column' in result: investigate_columns.append( result['investigate_column']) results.task_done() except Queue.Empty, e: break log.info('Critical columns:\n%s\n\nWarning columns:\n%s' % (pprint.pformat(critical_columns), pprint.pformat(warning_columns))) if len(critical_columns) > 0: columns = sorted(critical_columns) + sorted(warning_columns) status = pynagios.CRITICAL elif len(warning_columns) > 0: columns = warning_columns status = pynagios.WARNING else: status = pynagios.OK msg = '' if status != pynagios.OK: msg = '\n'.join( '%s.%s\t%s\t%s\t%s\t%.2f%%' % (col.get('schema'), col.get('table'), col.get('column_name'), col.get('column_type'), col.get('max_value'), col.get('overflow_percentage')) for col in columns) msg = '\n' + msg ############################################################## # store critical/warning columns in db ############################################################## if self.results_db_conn_opts: conn = MySQLdb.connect(**self.results_db_conn_opts) with conn as cursor: sql = ("INSERT INTO int_overflow_check_results(" " hostname, dbname, table_name, column_name, " " max_size, percentage, reason, timestamp) " "VALUE (%s, %s, %s, %s, %s, %s, %s, %s)") for col in critical_columns: cursor.execute( sql, (hostname, col.get('schema'), col.get('table'), col.get('column_name'), col.get('max_value'), col.get('overflow_percentage'), 'critical', datetime.datetime.now())) for col in warning_columns: cursor.execute( sql, (hostname, col.get('schema'), col.get('table'), col.get('column_name'), col.get('max_value'), col.get('overflow_percentage'), 'warning', datetime.datetime.now())) row_count_max_ratio = self.merged_options.get( 'row_count_max_ratio', 0) if investigate_columns: log.info('Investigate columns:\n%s' % (pprint.pformat(investigate_columns, ))) if msg: msg += '\n' msg += (( '\nColumns containing high values compared to maximum for the column datatype, but number of rows is less than %s%% of maximum for the column type:\n' % (row_count_max_ratio, )) + ('\n'.join( '%s.%s\t%s\t%s\t%s\t%.2f%%' % (col.get('schema'), col.get('table'), col.get('column_name'), col.get('column_type'), col.get('max_value'), col.get('overflow_percentage')) for col in investigate_columns))) ############################################################## # store investigate columns in db ############################################################## if self.results_db_conn_opts: conn = MySQLdb.connect(**self.results_db_conn_opts) with conn as cursor: sql = ("INSERT INTO int_overflow_check_results(" " hostname, dbname, table_name, column_name, " " max_size, percentage, reason, timestamp) " "VALUE (%s, %s, %s, %s, %s, %s, %s, %s)") for col in investigate_columns: cursor.execute( sql, (hostname, col.get('schema'), col.get('table'), col.get('column_name'), col.get('max_value'), col.get('overflow_percentage'), 'investigate', datetime.datetime.now())) log.info('status: %s\n\nmsg:\n%s' % (status, msg)) self.exit_code = status.exit_code return Response(status, msg) except Exception, e: log.exception('Exception.') return Response(pynagios.UNKNOWN, 'ERROR: {0}'.format(e))
class beerbug(Plugin): data = make_option( "-d", "--data", dest="data", type="int", help= "1=Battery, 2=sg, 3=plato, 4=alcohol, 5=beerbug temperature, 6=probe temperature, 7=process" ) tempunit = make_option("-f", "--farenheit", dest="tempunit", action="store_true", help="Use Farenheit. Only for temperature options") def check(self): data = self.options.data tempunit = self.options.tempunit j = self.apijson() #Battery Percentage if data == 1: unit = "%" result = self.response_for_value(j['battPercentage'], message=j['battPercentage']) result.set_perf_data("Battery %", j['battPercentage'], uom=unit, warn=self.options.warning, crit=self.options.critical) return result # Specific Gravity elif data == 2: unit = "" result = self.response_for_value(j['sg'], message=j['sg']) result.set_perf_data("SG", j['sg'], warn=self.options.warning, crit=self.options.critical) return result # Plato elif data == 3: unit = "" result = self.response_for_value(j['plato'], message=j['plato']) result.set_perf_data("Plato", j['plato'], warn=self.options.warning, crit=self.options.critical) return result # Alcohol elif data == 4: unit = "%" result = self.response_for_value(j['al'], message=j['al']) result.set_perf_data("Alcohol", j['al'], warn=self.options.warning, crit=self.options.critical) return result # Progress elif data == 7: unit = "%" result = self.response_for_value(j['progress'], message=j['progress']) result.set_perf_data("Progress", j['progress'], uom=unit, warn=self.options.warning, crit=self.options.critical) return result # Beerbug Temp in F elif data == 5 and tempunit == True: unit = "F" result = self.response_for_value(j['t1f'], message=j['t1f']) result.set_perf_data("Beerbug Temperature", j['t1f'], warn=self.options.warning, crit=self.options.critical) return result # Probe Temp in F elif data == 6 and tempunit == True: unit = "F" result = self.response_for_value(j['t2f'], message=j['t2f']) result.set_perf_data("Probe Temperature", j['t2f'], warn=self.options.warning, crit=self.options.critical) return result # Beerbug Temp in C elif data == 5: unit = "C" result = self.response_for_value(j['t1c'], message=j['t1c']) result.set_perf_data("Beerbug Temperature", j['t1c'], warn=self.options.warning, crit=self.options.critical) return result # Probe Temp in C elif data == 6: unit = "C" result = self.response_for_value(j['t2c'], message=j['t2c']) result.set_perf_data("Probe Temperature", j['t2c'], warn=self.options.warning, crit=self.options.critical) return result else: print "options are not correct" sys.exit() def apijson(self): r = requests.get( 'http://www.thebeerbug.com/api/?api_key=%s&beerbug_id=%s' % (apikey, bugid)) if r.status_code != 200: exitmsg = "Unable to contact server, errorcode: %s" % r.status.code exitstatus = 3 j = json.loads(r.text) exitstatus = 0 if j['success'] == False: exitmsg = "Failure, Errorcode: %s" % j['reason'] exitstatus = 3 if exitstatus == 3: print exitmsg sys.exit(3) else: result = self.response_for_value(j['battPercentage'], message=j['battPercentage']) return j
class GraphDBCheck(Plugin): url = make_option('-l', '--url', metavar='<url>', help='URL to check', dest="url", type='string') repository = make_option('-r', '--repository', metavar='<repository>', help='Repository to check', dest="repository", type='string') user = make_option('-u', '--user', metavar='<basic_auth_user>', help='User for auth', dest="user") password = make_option('-p', '--password', metavar='<basic_auth_password>', help='Password for auth', dest="password") checks = make_option('-k', '--checks', metavar='<checks>', help='Array of checks to perform. e.g. read-availability;long-running-queries', dest="checks") debug = make_option('-d', '--debug', metavar='<debug>', help='set log level to debug', action="store_true", dest="debug") def check(self): if self.options.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) logging.getLogger("urllib3").setLevel(logging.WARNING) payload = {} auth = None if self.options.user and self.options.password: auth = HTTPBasicAuth(self.options.user, self.options.password) if self.options.checks: payload['checks'] = [] for check in self.options.checks.split(';'): payload['checks'].append(check) final_url = urljoin(self.options.url, '/repositories/' + self.options.repository + '/health') start = time.time() try: response = requests.get(final_url, params=payload, auth=auth) total_time = time.time() - start except Exception as ex: return Response(CRITICAL, "Error occurred while making connection to GraphDB instance:\n%s" % ex) if self.options.critical is None: self.options.critical = Range('@500:') if self.options.warning is None: self.options.warning = Range('@206:500') result = self.response_for_value(response.status_code, "Repository[%s] at [%s]\nResponse: %s\nResponse time: %ss\n" % ( self.options.repository, self.options.url, trim_response(response.text), total_time)) result.set_perf_data('response-time', total_time, uom='s') for check, state in response.json().iteritems(): if check == 'status' or check == 'master-status': if state == 'green': status_in_percent = 100 elif state == 'yellow': status_in_percent = 50 else: status_in_percent = 0 result.set_perf_data(check, status_in_percent, uom='%', warn=Range('50'), crit=Range('0')) else: if state == 'OK': status_in_percent = 100 else: status_in_percent = 0 result.set_perf_data(check, status_in_percent, uom='%', warn=Range('50'), crit=Range('0')) return result
class CheckSplunk(pynagios.Plugin): __metaclass__ = PluginMeta _usage = "usage: %prog [options] command" hostname = pynagios.make_option("-H", type="string", help="IP or FQDN of the Splunk server") username = pynagios.make_option("-U", type="string", help="Username used to log into Splunk") password = pynagios.make_option("-P", type="string", help="Password used to log into Splunk") port = pynagios.make_option("-p", type="int", default=8089, help="splunkd Port on server, default 8089") use_ssl = pynagios.make_option("-n", action="store_false", default=True, help="Disable HTTPS (use http)") zabbix = pynagios.make_option("-Z", action="store_true", default=False, help="Output in Zabbix format") warning = pynagios.make_option("-w", type="int", help="Warning level") critical = pynagios.make_option("-c", type="int", help="Critical level") check_index_opts = OptionGroup( "Index Check Options", "Options for the index checks", pynagios.make_option("--index", default="main", help="Name of a Splunk index, default is 'main'"), ) check_license_opts = OptionGroup( "License Check Options", "Options for license checks", pynagios.make_option( "--license-pool", default="auto_generated_pool_enterprise", help= "Name of a Splunk license pool, default is 'auto_generated_pool_enterprise'" ), pynagios.make_option( "--capacity", default=0, help="Capactiy to consider 100%, defaults to pool capacity"), ) check_search_peer_opts = OptionGroup( "check_search_peer Options", "Options for search peer check", pynagios.make_option( "--search-peer", type="string", help="Name of an indexer used by this search head"), ) check_output_opts = OptionGroup( "check_output Options", "Options for TCP output check", pynagios.make_option("--appname", type="string", help="App containing the output"), pynagios.make_option("--output", type="string", help="Host/port pair of a forward-server"), ) check_cluster_peer_opts = OptionGroup( "check_cluster_peer Options", "Options for cluster peer check", pynagios.make_option("--cluster-peer", type="string", help="Name of a cluster slave (indexer)"), ) check_deployment_client_opts = OptionGroup( "check_deployment_client Options", "Options for deployment client check", pynagios.make_option("--deployment-client", type="string", help="IP, Hostname or ID of a deployment client")) def __init__(self, *args, **kwargs): epilog_lines = list() epilog_lines.append("Valid check commands:") for attr in dir(self): if attr.startswith("check_") and callable(getattr(self, attr)): f = getattr(self, attr) command = attr[6:] if hasattr(f, "description"): epilog_lines.append(" {0}{1}".format( string.ljust(command, 30), f.description)) else: epilog_lines.append(" {0}".format(command)) if hasattr(f, "usage"): epilog_lines.append(" Usage:") epilog_lines.append( self._option_parser.expand_prog_name( " %prog {0}".format(f.usage))) self._option_parser.epilog = "\n".join(epilog_lines) super(CheckSplunk, self).__init__(*args, **kwargs) def response_for_value(self, value, message=None, ok_value=None, critical_value=None, zabbix_ok=None, zabbix_critical=None): if critical_value is None and ok_value is None: if value >= self.options.critical: ret = pynagios.CRITICAL elif value >= self.options.warning: ret = pynagios.WARNING else: ret = pynagios.OK else: if ok_value is not None and critical_value is not None: if value == critical_value: ret = pynagios.CRITICAL elif value == ok_value: ret = pynagios.OK else: ret = pynagios.UNKNOWN elif ok_value is None: if value == critical_value: ret = pynagios.CRITICAL else: ret = pynagios.OK elif critical_value is None: if value == ok_value: ret = pynagios.OK else: ret = pynagios.CRITICAL else: ret = pynagios.UNKNOWN if self.options.zabbix or message is None: if ret == pynagios.OK and zabbix_ok: return ZabbixResponse(ret, zabbix_ok) elif ret == pynagios.CRITICAL and zabbix_critical: return ZabbixResponse(ret, zabbix_critical) return ZabbixResponse(ret, value) else: return pynagios.Response(ret, message) def check(self): #try: splunkd = SplunkServer(self.options.hostname, self.options.username, self.options.password, self.options.port, self.options.use_ssl) #except: # return pynagios.Response(pynagios.UNKNOWN, "Failed to login to splunkd") check = getattr(self, "check_{0}".format(self.args[1]), None) if check is None: check = getattr(self, self.args[1], None) if callable(check): try: return check(splunkd) except ConnectionError: return pynagios.Response(pynagios.CRITICAL, "Unable to connect to splunkd") except ApiError as e: return pynagios.Response(pynagios.CRITICAL, str(e)) else: return pynagios.Response(pynagios.UNKNOWN, "Invalid check requested") @add_description("Check the usage of a given index (indexer)") @add_usage("--index=main -w 80 -c 90") def check_index(self, splunkd): try: (used, capacity, pct) = splunkd.get_index_usage(self.options.index) except AttributeError: return pynagios.Response( pynagios.CRITICAL, "{0} index not found".format(self.options.index)) output = "{0}% of MaxTotalDBSize ({1}) is used".format(pct, capacity) result = self.response_for_value(pct, output) result.set_perf_data("currentDBSizeMB", used * 1048576, "B") result.set_perf_data("maxTotalDataSizeMB", capacity * 1048576, "B") return result @add_description("Check the latency of a given index (indexer)") @add_usage("--index=main -w 5 -c 10") def check_index_latency(self, splunkd): latency = splunkd.get_index_latency(self.options.index) output = "Average latency is {0} seconds".format(latency) result = self.response_for_value(latency, output) result.set_perf_data("latency", latency, "s") return result @add_description("Check usage of a given license pool (licensemaster)") @add_usage("--license-pool=auto_generated_pool_enterprise") def check_license(self, splunkd): (used, capacity, pct) = splunkd.get_license_pool_usage(self.options.license_pool) if self.options.capacity != 0: capacity = int(self.options.capacity) pct = int(used * 100 / capacity) output = "{0}% of license capacity ({1}) is used".format(pct, capacity) result = self.response_for_value(pct, output) result.set_perf_data("license_used", used, "B") result.set_perf_data("license_capacity", capacity, "B") return result @add_description("Check connectivity to the license master (all)") @add_usage("-w 60 -c 120") def check_license_master(self, splunkd): info = splunkd.license_slave_info last_success = info["last_master_contact_success_time"] last_attempt = info["last_master_contact_attempt_time"] success_diff = int(time.time()) - int(last_success) output = "Last connected to master {0} seconds ago".format( success_diff) return self.response_for_value(success_diff, output, zabbix_ok="1", zabbix_critical="0") @add_description( "Check connectivity to a given search peer (searchhead, cluster-master)" ) @add_usage("--search-peer=acme-corp-indexer-01") def check_search_peer(self, splunkd): status = splunkd.get_search_peer_status(self.options.search_peer) output = "Search peer is {0}".format(status) return self.response_for_value(status, output, critical_value="Down", zabbix_ok="1", zabbix_critical="0") @add_description( "Check the number of current running searches (searchhead)") @add_usage("-w 25 -c 50") def check_concurrent_searches(self, splunkd): searches = len(list(splunkd.running_jobs)) output = "{0} searches are currently running".format(searches) result = self.response_for_value(searches, output) result.set_perf_data("searches", searches) return result @add_description( "Check a TCP output for connectivity to the forward-server (forwarder)" ) @add_usage("--output=192.168.1.1:9997") def check_output(self, splunkd): status = splunkd.get_tcp_output_status(self.options.appname, self.options.output) output = "{0} is currently in status '{1}'".format( self.options.output, status) return self.response_for_value(status, output, ok_value="connect_done", zabbix_ok="1", zabbix_critical="0") @add_description( "Check that a cluster peer is connected to the master (cluster-master)" ) @add_usage("--cluster-peer=acme-corp-indexer-01") def check_cluster_peer(self, splunkd): status = splunkd.get_cluster_peer_status(self.options.cluster_peer) output = "Cluster peer '{0}' is {1}".format(self.options.cluster_peer, status) return self.response_for_value(status, output, ok_value="Up", zabbix_ok="1", zabbix_critical="0") @add_description("Check that all buckets are valid (cluster-master)") @add_usage("") def check_cluster_valid(self, splunkd): config = splunkd.cluster_config invalid = list() for bucket in splunkd.cluster_buckets: if bucket["standalone"] == "1": continue valid = 0 for (peer, info) in bucket["peers"].items(): if info["search_state"] == "Searchable": valid += 1 if valid < int(config["search_factor"]): invalid.append(bucket) output = "{0} invalid buckets".format(len(invalid)) result = self.response_for_value(len(invalid), output, ok_value=0) result.set_perf_data("invalid", len(invalid)) return result @add_description("Check that all buckets are complete (cluster-master)") @add_usage("") def check_cluster_complete(self, splunkd): config = splunkd.cluster_config incomplete = list() for bucket in splunkd.cluster_buckets: if bucket["standalone"] == "1": continue complete = 0 for (peer, info) in bucket["peers"].items(): if info["status"] in ("Complete", "StreamingSource", "StreamingTarget"): complete += 1 if complete < int(config["replication_factor"]): incomplete.append(bucket) output = "{0} incomplete buckets".format(len(incomplete)) result = self.response_for_value(len(incomplete), output, ok_value=0) result.set_perf_data("incomplete", len(incomplete)) return result @add_description("Verify slave is connected to master (indexer)") @add_usage("") def check_cluster_connection(self, splunkd): connected = bool(splunkd.cluster_slave_info["is_registered"] == "1") master = splunkd.cluster_config["master_uri"] output = "Connected to {0}".format( master) if connected else "Disconnected" return self.response_for_value(connected, output, ok_value=True, zabbix_ok="1", zabbix_critical="0") @add_description("Verify clustering status of slave (indexer)") @add_usage("") def check_cluster_status(self, splunkd): status = splunkd.cluster_slave_info["status"] output = "Slave is {0}".format(status) return self.response_for_value(status, output, ok_value="Up", zabbix_ok="1", zabbix_critical="0") @add_description( "Verify a deployment client has checked in (deployment-server)") @add_usage("--deployment-client=192.168.1.1") def check_deployment_client(self, splunkd): try: phoneHomeTime = splunkd.get_deployment_client_info( self.options.deployment_client)["phoneHomeTime"] except StopIteration: return pynagios.Response( pynagios.CRITICAL, "Unable to get phone home time for {0}".format( self.options.deployment_client)) import datetime dt = datetime.datetime.strptime(phoneHomeTime, "%a %b %d %H:%M:%S %Y") diff = (datetime.datetime.now() - dt).seconds output = "Client checked in {0} seconds ago".format(diff) return self.response_for_value(diff, output, zabbix_ok="1", zabbix_critical="0") # @add_description("Return the given field from the first search result of the given search") # @add_usage("--search='host=X sourcetype=Y' --earliest-time='-1h@h' --latest-time='@h' --field=Z") # def check_search_result(self, splunkd): # result = splunkd.get_search_first_result(self.options.search, self.options.field, self.options.earliest, self.options.latest) # # output = "Result: {0}={1}".format(field, result) # return self.response_for_value(result, output) @add_description("Check bundle replication status") def check_distributed_search_peers(self, splunkd): failedPeers = list() for peer in splunkd.distributed_search_peers: if peer["replicationStatus"] != "Successful": failedPeers.append(peer["guid"]) if len(failedPeers) == 0: return pynagios.Response(pynagios.OK, "All peers replicating successfully") else: return pynagios.Response( pynagios.CRITICAL, "Peers failed replication: %s" % ",".join(failedPeers)) @add_description("Check for messages displayed in the Splunk UI") @add_usage("") def check_messages(self, splunkd): count = len(list(splunkd.messages)) output = "{0} messages in Splunk UI".format(count) return self.response_for_value(count, output, ok_value=0, zabbix_ok="1", zabbix_critical="0")
class MyChild(Plugin): explode = make_option("-H", type="string")
class CheckSplunk(pynagios.Plugin): username = pynagios.make_option("-u", type="string") password = pynagios.make_option("-p", type="string") index = pynagios.make_option("-I", type="string", default="main") license_pool = pynagios.make_option( "-L", type="string", default="auto_generated_pool_enterprise") warn = pynagios.make_option("-W", type="int", default=75) crit = pynagios.make_option("-C", type="int", default=90) def __init__(self, *args, **kwargs): super(CheckSplunk, self).__init__(*args, **kwargs) splunk_kwargs = { 'index_name': self.options.index, 'license_pool': self.options.license_pool, 'license_hash': self.options.license_pool, # See note in check_license_expiration } self.splunk = SplunkServer(self.options.hostname, self.options.username, self.options.password, **splunk_kwargs) def check(self): check = self.args[1] if hasattr(self, "check_%s" % check): return getattr(self, "check_%s" % check)() else: return pynagios.Response(pynagios.UNKNOWN, "Invalid check requested") def check_license(self): if self.splunk.isFree: return pynagios.Response(pynagios.OK, "Splunk Community Edition") if self.splunk.isTrial: return pynagios.Response(pynagios.OK, "Splunk Download Trial") # Request list of licenses licenses = self.splunk.licenses valid_licenses = filter(lambda l: licenses[l]['status'] == 'VALID', licenses.keys()) valid_licenses = filter(lambda l: licenses[l]['type'] == 'enterprise', valid_licenses) try: quota = sum( map(lambda l: int(licenses[l]['quota']), valid_licenses)) except: quota = 0 if quota == 0: return pynagios.Response(pynagios.CRITICAL, "No valid licenses available") # Get the pool's current usedBytes value used_bytes = sum(map(lambda p: int(p['used_bytes']), self.splunk.pools)) WARN_QUOTA = self.options.warn * quota / 100 CRIT_QUOTA = self.options.crit * quota / 100 USED_PERCENT = int(used_bytes * 100 / quota) output_string = "%d%% of license capacity is used" % USED_PERCENT if used_bytes > CRIT_QUOTA: result = pynagios.Response(pynagios.CRITICAL, output_string) elif used_bytes > WARN_QUOTA: result = pynagios.Response(pynagios.WARNING, output_string) else: result = pynagios.Response(pynagios.OK, output_string) result.set_perf_data("used", used_bytes, "") result.set_perf_data("quota", quota, "") return result def check_index(self): USED_PERCENT = int(self.splunk.current_db_size * 100 / self.splunk.max_db_size) output_string = "%d%% of MaxTotalDBSize is used" % USED_PERCENT if USED_PERCENT > self.options.crit: result = pynagios.Response(pynagios.CRITICAL, output_string) elif USED_PERCENT > self.options.warn: result = pynagios.Response(pynagios.WARNING, output_string) else: result = pynagios.Response(pynagios.OK, output_string) result.set_perf_data("currentDBSizeMB", self.splunk.current_db_size, "") result.set_perf_data("maxTotalDataSizeMB", self.splunk.max_db_size, "") return result def check_license_expiration(self): # For now I'm going to reuse -L but this might change in the future. # This means we refer to the license hash as "license_pool" internally in a few places. # Boo. Let's fix that later. expire_ts = self.splunk.license_expiration_time now_ts = int(time.time()) if expire_ts <= now_ts: # Exit now, its expired return pynagios.Response( pynagios.CRITICAL, "License %s is expired" % self.options.license_pool) diff_secs = expire_ts - now_ts diff_days = int(diff_secs / 86400) if diff_days < self.options.crit: return pynagios.Response( pynagios.CRITICAL, "License %s expires in %d days" % (self.options.license_pool, diff_days)) elif diff_days < self.options.warn: return pynagios.Response( pynagios.WARNING, "License %s expires in %d days" % (self.options.license_pool, diff_days)) else: return pynagios.Response( pynagios.OK, "License %s expires in %d days" % (self.options.license_pool, diff_days))
class MyCheck(Plugin): """nagios check plugin that returns the state of the savesets of the last day by using the mminfo binary of the legato client. the legato client has to be installed and configured properly. try to execute a mminfo command separatly before. i.e.: /mminfo -o n -s serverxy -q "client='clientxy',savetime>=last day" -r "client,name,savetime(17),nsavetime,level" usage: -H = Hostname in fqn or normal form (no ip address) --server = Name or IP of the networker server --timeout = timeout value for killing the mminfo process """ server = make_option("--server", type="string") def check(self): class AlarmError(Exception): pass def alarm_handler(signum, frame): raise AlarmError def get_process_children(pid): args = ['ps', '--no-headers', '-o', 'pid', '--ppid', pid] p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'LANG': 'de_DE@euro'}) stdout, stderr = p.communicate() return [int(p) for p in stdout.split()] ## timeout timeout = self.options.timeout ## kill_tree kill_tree = True #servername = 'nsr_srv' servername = self.options.server ## get the hostname hostname = self.options.hostname ## search for a dot in possibly full qualified hostname m = re.search(r"(.*)\.(.*)", hostname) ## if search succesfull if m != None: ## extract hostname hostname_fields = hostname.split(".") hostname = hostname_fields[0] # mminfo command queryspec = "client='%s',savetime>=last day" % hostname reportspec = "client,name,savetime(17),nsavetime,level,ssflags" args = [ '/usr/sbin/mminfo', '-o', 'n', '-s', servername, '-q', queryspec, '-r', reportspec, '-x', 'c;' ] args2 = [ 'ping', 'localhost', '-c 10', ] ## create a subprocess with LANG=de_DE@euro process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'LANG': 'de_DE@euro'}) processpid = process.pid ## create a alarm with timeout parameter if timeout != -1: signal(SIGALRM, alarm_handler) alarm(timeout) try: ## catch stdout and stderr output = process.communicate() ## get current date and convert it to special form today = datetime.datetime.today() today_2 = today.strftime('%d.%m.%Y') yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime('%d.%m.%Y') lines = str(output[0]).strip("\n").split("\n") lines2 = ''.join(str(output[1]).strip("\n").split("\n")) del lines[0] anzahl_lines = len(lines) #nr = 0 errorcounter = 0 liste = [] ## If no output in stdout, then ... no_data = '6095:mminfo: no matches found for the query' if anzahl_lines < 1 and lines2 == no_data: info = "The query has no results." liste.append(info) else: for line in lines: #nr = nr + 1 fields = line.split(";") date_full = fields[2].split() date = date_full[0] time = date_full[1] ssflag = fields[5] if ssflag == 'vF': state = 'no errors.' elif ssflag == 'I': state = 'working' elif ssflag != '': state = "with flags " + ssflag if date == yesterday or date == today_2: info = "'" + fields[1] + "' " + date + ", " + \ time + " , backup level: \"" + fields[4] + "\" => " + state + "\n" liste.append(info) else: errorcounter = errorcounter + 1 info = "'" + fields[1] + "' " + date + ", " + \ time + " , backup level: \"" + fields[4] + "\" => " + state + "\n" liste.append(info) if timeout != -1: ## alarm reset to null alarm(0) except AlarmError: pids = [process.pid] if kill_tree: pids.extend(get_process_children(pid)) for pid in pids: # process might have died before getting to this line # so wrap to avoid OSError: no such process try: os.kill(pid, SIGKILL) except OSError: pass return -9, '', '' # Return a response finaloutput = "".join(liste).strip("\n") #print finaloutput if errorcounter > 1: errorcounter = 1 result = self.response_for_value(errorcounter, message=finaloutput) return result