def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) perfdata = [] try: # Get a keystone token keystone = keystone_client.Client( username=args['username'], tenant_name=args['tenant'], password=args['password'], auth_url=args['auth_url'], ) # Auth with glance start_time = datetime.datetime.now() client = glance_client.Client( auth_url=args['auth_url'], username=args['username'], tenant=args['tenant'], endpoint=args['endpoint'], host=args.get('host'), token=keystone.auth_token, ) end_time = datetime.datetime.now() perfdata.append( PerfData('auth_time', ((end_time - start_time).total_seconds() / 1000), min_='0', unit='ms')) except Exception as e: self.exit(STATES.UNKNOWN, str(e)) # Get the images images = [image for image in client.images.list()] # Get the image count image_count = len(images) perfdata.append( PerfData('image_count', image_count, min_=(args.get('req_count')))) # Check the count of images if args.get('req_count') and image_count < args.get('req_count'): self.exit( STATES.CRITICAL, 'Not enough images (%s < %s)' % (image_count, args.get('req_count')), *perfdata) # Check the required images missing_images = [] if args.get('req_images'): for image in args.get('req_images'): if image not in images: missing_images.append(image) if len(missing_images) > 0: self.exit(STATES.CRITICAL, 'Images: %s are missing' % ' '.join(missing_images), *perfdata) self.exit(STATES.OK, 'OK - %s images found' % image_count, *perfdata)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) data_list = self.get_data_list() # reset these variables at 0 number_stations = 0 number_open_stations = 0 day_downhill = 0 day_open = 0 night_downhill = 0 night_open = 0 # use to verify if all stations in region are undetermined number_unknown_stations = 0 region = args.get("region").decode("utf8") for index in data_list: if index["region"] == region or region == u"Tout le Québec": number_stations += 1 if index["state"] == u"Ouvert": number_open_stations += 1 day_downhill += int(index["day"][1]) day_open += int(index["day"][0]) night_downhill += int(index["night"][1]) night_open += int(index["night"][0]) if index["state"] == u"Indéterminé": number_unknown_stations += 1 # handle if all stations in region are undetermined if number_unknown_stations == number_stations: message = "OK: There's not information for this region" self.exit(STATES.UNKNOWN, message) # calculate the number of stations per region # included open and close number_stations = number_stations - number_unknown_stations p1 = PerfData("open_stations", number_open_stations, unit='stations', crit="0", min_="0", max_=number_stations) p2 = PerfData('day_open', day_open, unit='downhills', min_="0", max_=day_downhill) p3 = PerfData('night_open', night_open, unit='downhills', min_="0", max_=night_downhill) if number_open_stations == 0: message = ("CRITICAL: There's %s on %s stations open in %s" % (number_open_stations, number_stations, region)) self.exit(STATES.CRITICAL, message, p1, p2, p3) else: message = ("OK: There's %s on %s stations open in %s" % (number_open_stations, number_stations, region)) self.exit(STATES.OK, message, p1, p2, p3)
def run(self, args): """ Main Plugin function """ # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) p1 = PerfData('spam', 42, unit='%', warn=70, crit=90, min_=0, max_=100) p2 = PerfData('eggs', 6, unit='%', warn=20, crit=30, min_=0, max_=100) self.ok("Everything was perfect", p1, p2)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) perfdata = [] try: start_time = datetime.datetime.now() c = client.Client( username=args['username'], tenant_name=args['tenant'], password=args['password'], auth_url=args['auth_url'], ) if not c.authenticate(): self.exit(STATES.UNKNOWN, 'Authentication failed') end_time = datetime.datetime.now() perfdata.append( PerfData('auth_time', ((end_time - start_time).total_seconds() / 1000), min_='0', unit='ms')) except Exception as e: self.exit(STATES.UNKNOWN, str(e)) endpoints = c.service_catalog.get_endpoints() services = args['services'] or endpoints.keys() msgs = [] for service in services: if service not in endpoints.keys(): msgs.append("`%s' service is missing" % service) continue if not len(endpoints[service]): msgs.append("`%s' service is empty" % service) continue if not any([ "publicURL" in endpoint.keys() for endpoint in endpoints[service] ]): msgs.append("`%s' service has no publicURL" % service) perfdata.append(PerfData('service_count', len(endpoints), min_='0')) if len(msgs) > 0: self.exit(STATES.CRITICAL, ' '.join(msgs), *perfdata) else: self.exit( STATES.OK, "Got token %s for user %s and tenant %s" % (c.auth_token, c.auth_user_id, c.auth_tenant_id), *perfdata)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) url = args['url'] xml = self.get_xml(url) try: tree = etree.fromstring(xml) except Exception as e: self.exit(STATES.UNKNOWN, 'Incorrect XML received or parser error: %s' % e) empty_stations = full_stations = 0 total = len(tree) for station in tree: try: nbBikes = int(station.find('nbBikes').text) nbEmptyDocks = int(station.find('nbEmptyDocks').text) except Exception as e: self.exit(STATES.UNKNOWN, 'Incorrect XML received or parser error: %s' % e) if nbEmptyDocks == 0: # Houston, we have a problem empty_stations += 1 if nbBikes == 0: full_stations += 1 problems = empty_stations + full_stations str_pb = 'problems' if problems >= 2 else 'problem' p1 = PerfData('empty_stations', empty_stations, max_=total) p2 = PerfData('full_stations', full_stations, max_=total) # no warning and critic, since the values we have are associated to empty+full if problems < int(args['warning']): self.exit(STATES.OK, 'OK - %d %s / %d stations' % (problems, str_pb, total), p1, p2) elif problems < int(args['critical']): self.exit( STATES.WARNING, 'WARNING - %d %s / %d stations' % (problems, str_pb, total), p1, p2) else: self.exit( STATES.CRITICAL, 'CRITICAL - %d %s / %d stations' % (problems, str_pb, total), p1, p2)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) # fetches data url = args['url'] + '?token=' + args['token'] data = self.get_feed(url) alerts = gtfs.FeedMessage() try: alerts.ParseFromString(data) except Exception as e: self.exit(STATES.UNKNOWN, 'UNKNOWN - Bad data format or unknown error: %s' % e) nb = len(alerts.entity) str_plural = 'problems' if nb >= 2 else 'problem' message = '%d %s' % (nb, str_plural) perfdata = PerfData('problems', nb, warn=int(args['warning']), crit=int(args['warning']), min_=0) if nb < int(args['warning']): self.exit(STATES.OK, 'OK - ' + message, perfdata) elif nb < int(args['critical']): self.exit(STATES.WARNING, 'WARNING - ' + message, perfdata) else: self.exit(STATES.CRITICAL, 'CRITICAL - ' + message, perfdata)
def run(self, args): try: request = urllib2.Request(args.url) opener = urllib2.build_opener() request.add_header('User-Agent', args.user_agent) start = datetime.datetime.now() code = opener.open(request).getcode() end = datetime.datetime.now() except Exception as err: try: code = err.code self.critical('HTTP %s - %s' % (code, err)) except Exception: self.unknown('%s' % err) else: if code >= 400: self.critical('HTTP %s' % code) else: response_time = end - start perfdata = PerfData('response_time', response_time.microseconds / 1000 + response_time.seconds * 1000, unit='ms', min_=0) self.ok('HTTP %s' % code, perfdata)
def check_used_memory_human(self, con, host, port, warning, critical, unit): result = con.info() used_memory_human = result["used_memory_human"] used_memory_human = used_memory_human[:-1] used_memory_human = float(used_memory_human) warning = float(warning) critical = float(critical) if unit == "GB": used_memory_human = used_memory_human / 1024**2 elif unit == "MB": used_memory_human = used_memory_human / 1024 if used_memory_human > critical: message = "CRITICAL: current used memory is %.2f %s" % (used_memory_human, unit) code = STATES.CRITICAL elif used_memory_human > warning: message = "WARNING: current used memory is %.2f %s" % (used_memory_human, unit) code = STATES.WARNING else: message = "OK: current used memory is %.2f %s" % (used_memory_human, unit) code = STATES.OK used_memory_human = "%0.2f"%(used_memory_human) warning = "%0.2f"%(warning) critical = "%0.2f"%(critical) p1 = PerfData("used_memory_human", used_memory_human, unit=unit, warn=warning, crit=critical, min_=0) self.exit(code, message, p1)
def check_latency(self, con, host, port, warning, critical): st = time.time() count = 10000 for i in range(count): con.ping() et = time.time() total_time = et - st total_time=float(total_time) critical= float(critical) warning= float(warning) if total_time > critical: total_time="%0.2f"%(float(total_time)) message = "CRITICAL: ping %d times cost %s seconds" % (count, total_time) code = STATES.CRITICAL elif total_time > warning: total_time="%0.2f"%(float(total_time)) message = "WARNING: ping %d times cost %s seconds" % (count, total_time) code = STATES.WARNING else: total_time="%0.2f"%(float(total_time)) message = "OK: ping %d times cost %s seconds" % (count, total_time) code = STATES.OK warning = "%0.2f"%(warning) critical = "%0.2f"%(critical) p1 = PerfData("total_time", total_time, unit="s", warn=warning, crit=critical, min_=0) self.exit(code, message, p1)
def check_connect(self, con, host, port, warning, critical, db): signal.signal(signal.SIGALRM, self.handler_timeout) signal.alarm(30) #p = subprocess.Popen(["redis-cli", "info"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) #out, err = p.communicate() try: start_time = time.time() con.info() end_time = time.time() except Exception: code = STATES.CRITICAL message = "Could not connect to Redis at host: %s, %d" %(host, port) self.exit(code, message) connection_time = end_time - start_time warning = float(warning) critical = float(critical) if connection_time > critical: code = STATES.CRITICAL message = "Connection to Redis seems slow: %0.2f s" % connection_time elif connection_time > warning: code = STATES.WARNING message = "Connection to Redis seems slow: %0.2f s" % connection_time else: code = STATES.OK message = "connection is good: %0.2f s" % connection_time p1 = PerfData("connection_time", "%0.2f" % connection_time, unit="s", warn=warning, crit=critical, min_=0) self.exit(code, message, p1)
def run(self, args): """ Main Plugin function """ # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) args.critical = float(args.critical) args.warning = float(args.warning) workers = [] for i in range(int(args.workers)): workers.append(Worker(args.url, int(args.queries))) workers[i].start() all_done = False while not all_done: time.sleep(0.1) all_done = True for worker in workers: if not worker.done.value: all_done = False resp_time = [] nb_fail = 0 for worker in workers: while not worker.resp_time.empty(): resp_time.append(worker.resp_time.get()) nb_fail += worker.nb_fail.value if len(resp_time) != 0: total = 0. for value in resp_time: total += value mean_time = total / len(resp_time) else: mean_time = -1 message = 'Mean time: %.2f seconds' if nb_fail > int(args.max_fail): self.critical('Too many requests failed') elif mean_time >= args.critical: code = STATES.CRITICAL elif mean_time >= args.warning: code = STATES.WARNING elif 0 < mean_time < args.warning: code = STATES.OK else: self.unknown("Exited in a unknown state") if args.perfdata: perf = PerfData('meantime', mean_time, warn=args.warning, crit=args.critical, min_=min(resp_time), max_=max(resp_time)) self.exit(code, message % mean_time, perf) self.exit(code, message % mean_time)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) html = self.get_html(args.url) tree = lxml.html.fromstring(html) data1 = tree.xpath(args.functional_stretchers) data2 = tree.xpath(args.occupied_stretchers) if len(data1) == len(data2) == 1: try: data1 = int(data1[0].strip()) except Exception as e: self.exit(STATES.UNKNOWN, 'Not integer (functional stretchers): %s' % data1) try: data2 = int(data2[0].strip()) except Exception as e: self.exit(STATES.UNKNOWN, 'Not integer (occupied stretchers): %s' % data2) result = 100 * data2 / data1 p1 = PerfData('functional_stretchers', data1, min_=0) p2 = PerfData('occupied_stretchers', data2, min_=0) p3 = PerfData('occupation', result, warn=args.warning, crit=args.critical, min_=0, max_=100) if result < int(args.warning): self.exit(STATES.OK, '%d%%' % result, p1, p2, p3) elif result < int(args.critical): self.exit(STATES.WARNING, '%d%%' % result, p1, p2, p3) else: self.exit(STATES.CRITICAL, '%d%%' % result, p1, p2, p3) self.exit(STATES.UNKNOWN, 'Wrong data extracted: %s - %s' % (data1, data2))
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) data = self.get_json(args.url) try: data = json.loads(data) except Exception as e: self.exit(STATES.UNKNOWN, 'Error loading json data: %s' % e) try: result = data['reports'][args.metric] except Exception as e: self.exit(STATES.UNKNOWN, 'Metric not found: %s' % args.metric) if args.warning is not None: # then we're working with numbers if result is None: self.unknown("No data available for %s (None)" % args.metric) try: result = float(result) except Exception as e: self.exit(STATES.UNKNOWN, 'Error, %s is not a number' % result) perfdata = PerfData(args.metric, result, warn=args.warning, crit=args.critical) if result < float(args.warning): self.exit(STATES.OK, '%s' % result, perfdata) elif result < float(args.critical): self.exit(STATES.WARNING, '%s' % result, perfdata) else: self.exit(STATES.CRITICAL, '%s' % result, perfdata) else: perfdata = PerfData(args.metric, result) self.exit(STATES.OK, '%s' % result, perfdata)
def test_perfdata_none(self): perfdata = PerfData( 'label', 'value', unit=None, warn=None, crit=None, min_=None, max_=None,) self.assertEqual(perfdata.unit, '') self.assertEqual(perfdata.warn, '') self.assertEqual(perfdata.crit, '') self.assertEqual(perfdata.min_, '') self.assertEqual(perfdata.max_, '')
def run(self, args): """ Main Plugin function """ # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) scheme = 'https://' if args.ssl else 'http://' url = scheme + args.hostname + "/" + args.url + "?auto" try: filehandle = urllib.urlopen(url) except Exception as err: self.unknown("Unexpected error: %s" % err) metrics = { "Total Accesses:(.*)": ("total_acc", "accesses"), "Total kBytes:(.*)": ("total_Kb", "Kb"), "CPULoad:(.*)": ("cpu_load", ""), "Uptime:(.*)": ("uptime", "s"), "ReqPerSec:(.*)": ("req_per_sec", "req_per_sec"), "BytesPerSec:(.*)": ("bytes_per_sec", "bytes_per_sec"), "BytesPerReq:(.*)": ("bytes_per_req", "bytes_per_req"), "BusyWorkers:(.*)": ("busy_workers", "workers"), "IdleWorkers:(.*)": ("idle_workers", "workers"), } results = {} perfdatas = [] for line in filehandle.readlines(): for metric, metric_info in metrics.items(): name, unit = metric_info match = re.match(metric, line) if match: value = float(match.group(1).strip()) results[name] = value if args.perfdata: perfdatas.append( PerfData(name, value, unit=unit, min_=0)) if results == {}: message = "No data found on %s. Please check you apache configuration" self.unknown("Server seems not available (%s)" % message) message = " # ".join( ["%s: %0.2f" % (n, v) for n, v in results.items()]) self.ok(message, *perfdatas)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) html = self.get_html() tree = lxml.html.fromstring(html) status = tree.xpath(STATUS_XPATH) lines = tree.xpath(LINES_XPATH) if len(status) == len(lines) == LINES_COUNT: problems = [] for i in range(LINES_COUNT): if status[i] != u'Normal métro service': problems.append(lines[i].strip()) if 0 <= len(problems) < args.warning: msg = 'OK' code = STATES.OK elif args.warning <= len(problems) < args.critical: msg = 'WARNING' code = STATES.WARNING else: msg = 'CRITICAL' code = STATES.CRITICAL perfdata = PerfData('problems', len(problems), warn=args.warning, crit=args.critical, min_=0, max_=LINES_COUNT) is_problems = len(problems) > 0 final_msg = ( '%(msg)s - %(problems)d problem%(plural)s %(list)s' % { 'msg': msg, 'problems': len(problems), 'plural': 's' if len(problems) >= 2 else '', 'list': ': ' + ', '.join(problems) if is_problems else '' }) self.exit(code, final_msg, perfdata) self.unknown('Wrong data received: %s [...]' % html[:100])
def check_connected_clients(self, con, host, port, warning, critical): result = con.info() connected_count = int(result["connected_clients"]) critical = int(critical) warning = int(warning) if connected_count > critical: message = "CRITICAL: there's %s connected clients" % connected_count code = STATES.CRITICAL elif connected_count > warning: message = "WARNING: there's %s connected clients" % connected_count code = STATES.WARNING else: message = "OK: there's %s connected clients" % connected_count code = STATES.OK p1 = PerfData("connected_count", connected_count, unit="clients", warn=warning, crit=critical, min_=0) self.exit(code, message, p1)
def check_used_memory_rss(self, con, host, port, warning, critical): result = con.info() used_memory_rss = float(result["used_memory_rss"]) critical= float(critical) warning= float(warning) if used_memory_rss > critical: message = "CRITICAL: current used memory rss is %s B" % used_memory_rss code = STATES.CRITICAL elif used_memory_rss > warning: message = "WARNING: current used memory rss is %s B" % used_memory_rss code = STATES.WARNING else: message = "OK: current used memory rss is %s B " % used_memory_rss code = STATES.OK p1 = PerfData("used_memory_rss", used_memory_rss, unit="B", warn=warning, crit=critical, min_=0) self.exit(code, message, p1)
def get_emc_temperature(self, args, values): try: values = [(str(k), float(str(v)) / 10.0) for k, v in values] # Value is XXX for XX.X except Exception: self.unknown("Can't parse data for emc temperature") critical = False warning = False msg = "" perfdatas = [] for oid, val in values: if val >= args.critical: if not critical: msg = "EMC Temperature - Critical threshold %d Celsius" % args.critical msg += " - Unit%s: %0.1fC" % ( oid[-1], val ) #The last char is a number and is picked identify the Unit critical = True elif val >= args.warning and not critical: if not warning: msg = "EMC Temperature - Warning threshold %d Celsius" % args.warning msg += " - Unit%s: %0.1fC" % ( oid[-1], val ) #The last char is a number and is picked identify the Unit warning = True elif not warning and not critical: if msg == "": msg = "EMC Temperature" msg += " - Unit%s: %0.1fC" % ( oid[-1], val ) #The last char is a number and is picked identify the Unit perfdatas.append( PerfData("Unit%s" % oid[-1], val, unit="C", min_=0)) if critical: self.critical(msg, *perfdatas) elif warning: self.warning(msg, *perfdatas) else: self.ok(msg, *perfdatas)
def get_output_percent(self, args, values): try: values = [(str(k), int(str(v))) for k, v in values] except Exception: self.unknown("Can't parse data for output percent") critical = False warning = False msg = "" perfdatas = [] for oid, val in values: if val >= args.critical: if not critical: msg = "Output percent - Critical threshold %d%%" % args.critical msg += " - Unit%s: %d%%" % ( oid[-1], val ) #The last char is a number and is picked identify the Unit critical = True elif val >= args.warning and not critical: if not warning: msg = "Output percent - Warning threshold %d%%" % args.warning msg += " - Unit%s: %d%%" % ( oid[-1], val ) #The last char is a number and is picked identify the Unit warning = True elif not warning and not critical: if msg == "": msg = "Output percent" msg += " - Unit%s: %d%%" % ( oid[-1], val ) #The last char is a number and is picked identify the Unit perfdatas.append( PerfData("Unit%s" % oid[-1], val, unit="%", min_=0)) if critical: self.critical(msg, *perfdatas) elif warning: self.warning(msg, *perfdatas) else: self.ok(msg, *perfdatas)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) acceskey = args.get('acceskey') secretkey = args.get('secretkey') if acceskey is not None and secretkey is not None: sqs = boto.connect_sqs( aws_access_key_id=acceskey, aws_secret_access_key=secretkey ) else: sqs = boto.connect_sqs() queue = sqs.get_all_queues(prefix=args.get('queue'))[0] if queue is None: self.exit(STATES.UNKNOWN, "Could not find queue") queue_size = queue.count() perf_data = [] perf_data.append( PerfData( 'QueueLenght', queue_size, warn=args.get('warn'), crit=args.get('crit'), min_=0, ) ) if queue_size > args.get('critical'): self.exit(STATES.CRITICAL, "CRITICAL", *perf_data) elif queue_size > args.get('warning'): self.exit(STATES.WARNING, "WARNING", *perf_data) else: self.exit(STATES.OK, "OK", *perf_data)
def get_battery_status(self, args, values): if len(values) != 1: self.unknown("Only one battery is managed now sorry") try: out = int(str(values[0][1])) except Exception: self.unknown("Can't parse data for battery status") perfdatas = [PerfData("Status", out)] if out == 1: self.critical("Battery status UNKNOWN", *perfdatas) elif out == 2: self.ok("Battery Status Normal", *perfdatas) elif out == 3: self.critical("Battery Status : LOW", *perfdatas) elif out == 4: self.critical("Battery Status : DEPLETED", *perfdatas) else: self.unknown('Plugin ERROR')
def compute_output(self, interface, args): """Prepare output """ perf_datas = [] # If show only up enabled and interface down # Doesn't it it in output if args['only-up'] and interface['oper'] != 1: return seconds = int(interface.get('date', 0)) - int( interface.get('old_date', 0)) # seconds == 0, it's impossible if seconds == 0: return STATES.OK, "Waiting next check" output_datas = {} output_datas['name'] = interface['descr'] # HANDLE COUNTER RESET ## Set limit ## 32bits : 4294967295 ## 64bits : 18446744073709551615 if args['64bits']: # Check 64Bits support if self.out_octet_64 is None or self.in_octet_64 is None: print "This device doesn't support 64bits counters" sys.exit(STATES.UNKNOWN) limit = 18446744073709551615 in_counter_name = "in_octet_64" out_counter_name = "out_octet_64" old_in_counter_name = "old_in_octet_64" old_out_counter_name = "old_out_octet_64" else: limit = 4294967295 in_counter_name = "in_octet" out_counter_name = "out_octet" old_in_counter_name = "old_in_octet" old_out_counter_name = "old_out_octet" ## Calc and handle Reset ### IN if int(interface.get(old_in_counter_name, 0)) > int( interface.get(in_counter_name, 0)): in_octets = limit - int(interface.get( old_in_counter_name, 0)) + int( interface.get(in_counter_name, 0)) else: in_octets = int(interface.get(in_counter_name, 0)) - int( interface.get(old_in_counter_name, 0)) ### OUT if int(interface.get(old_out_counter_name, 0)) > int( interface.get(out_counter_name, 0)): out_octets = limit - int(interface.get( old_out_counter_name, 0)) + int( interface.get(out_counter_name, 0)) else: out_octets = int(interface.get(out_counter_name, 0)) - int( interface.get(old_out_counter_name, 0)) output_datas['raw_in_bandwidth'] = in_octets / seconds output_datas['raw_out_bandwidth'] = out_octets / seconds output_datas['in_bandwidth'], output_datas['in_unit'] = convert_octets( output_datas['raw_in_bandwidth']) output_datas['out_bandwidth'], output_datas[ 'out_unit'] = convert_octets(output_datas['raw_out_bandwidth']) output_datas['state'] = IF_STATUS[interface.get('oper', 2)] if args['realbandwidth']: interface['speed'] = args['realbandwidth'] # bit to byte output_datas['speed'] = interface['speed'] / 8.0 msg = ("%(name)s:%(state)s " "(%(in_bandwidth).2f%(in_unit)s/" "%(out_bandwidth).2f%(out_unit)s)" % output_datas) output_datas['name'] = output_datas['name'].replace(" ", "_") if args['perfdata']: # handle one interface perf_datas.append( PerfData('%(name)s_in_Bps' % output_datas, '%(raw_in_bandwidth)0.2f' % output_datas, unit='Bps', min_=0.0)) perf_datas.append( PerfData('%(name)s_out_Bps' % output_datas, '%(raw_out_bandwidth)0.2f' % output_datas, unit='Bps', min_=0.0)) if output_datas['speed'] != 0: output_datas['prct_in'] = output_datas[ 'raw_in_bandwidth'] * 100 / output_datas['speed'] output_datas['prct_out'] = output_datas[ 'raw_out_bandwidth'] * 100 / output_datas['speed'] # Put % data in first for Nagvis perf_datas.insert( 0, PerfData('%(name)s_in_prct' % output_datas, '%(prct_in)0.2f' % output_datas, '%', min_=0.0, max_=100.0)) perf_datas.insert( 0, PerfData('%(name)s_out_prct' % output_datas, '%(prct_out)0.2f' % output_datas, '%', min_=0.0, max_=100.0)) if args['error']: output_datas['raw_in_discard'] = self.in_discard output_datas['raw_out_discard'] = self.out_discard perf_datas.append( PerfData('%(name)s_in_discard' % output_datas, '%(raw_in_discard)0.2f' % output_datas, 'c')) perf_datas.append( PerfData('%(name)s_out_discard' % output_datas, '%(raw_out_discard)0.2f' % output_datas, 'c')) output_datas['raw_in_error'] = self.in_error output_datas['raw_out_error'] = self.out_error perf_datas.append( PerfData('%(name)s_in_error' % output_datas, '%(raw_in_error)0.2f' % output_datas, 'c')) perf_datas.append( PerfData('%(name)s_out_error' % output_datas, '%(raw_out_error)0.2f' % output_datas, 'c')) return STATES.OK, msg, perf_datas
class Plugin(BasePlugin): NAME = 'check-json-by-ec2-tags' VERSION = '0.1' DESCRIPTION = 'Runs check-json on all AWS ec2 instances with a particular tag.' AUTHOR = 'Alexandre Viau' EMAIL = '*****@*****.**' ARGS = [ # Can't touch this: ('h', 'help', 'display plugin help', False), ('v', 'version', 'display plugin version number', False), # Hammer time^W^W Add your plugin arguments here: # ('short', 'long', 'description', 'does it expect a value?') ('t', 'tag', 'The tag to look for', True), ('e', 'endpoint', 'The endpoint of the json api. ex: /local_stats', True), ('p', 'port', 'The port of the json api. ex: /local_stats', True), ('a', 'attribute', 'The attribute to look for', True), ('k', 'acceskey', 'AWS Acces Key ID (optional)', True), ('s', 'secretkey', 'AWS Secret Key ID (optional)', True), ('w', 'warning', 'Limit to result in a warning state', True), ('c', 'critical', 'Limit to result in a critical state', True), ] def check_args(self, args): # You can do your various arguments check here. # If you don't need to check things, you can safely remove the method. if args.get('url') and not args['url'].startswith('http'): return False, 'the url must be fetchable through http' if not args.get('tag'): return False, 'you must specify a tag' if not args.get('endpoint'): return False, 'you must specify an endpoint' if not args.get('port'): return False, 'you must specify a port' if not args.get('attribute'): return False, 'you must specify an attribute to look for' return True, None def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) acceskey = args.get('acceskey') secretkey = args.get('secretkey') if acceskey is not None and secretkey is not None: ec2 = boto.connect_ec2(aws_access_key_id=acceskey, aws_secret_access_key=secretkey) else: ec2 = boto.connect_ec2() callers = ec2.get_all_instances(filters={'tag-value': args.get('tag')}) ips = [r.instances[0].ip_address for r in callers] values = [] for ip in ips: try: r = requests.get('http://%s:%s%s' % (ip, args.get('port'), args.get('endpoint'))) value = int(r.json()[args.get('attribute')]) values.append(value) except requests.exceptions.RequestException, e: self.exit(STATES.UNKNOWN, "UNKNOWN - %s" % e, []) lowest_value = min(values) highest_value = max(values) lowest_value_perfdata = PerfData( 'lowest', lowest_value, warn=args.get('warning') or '', crit=args.get('critical') or '', min_=0, ) highest_value_perfdata = PerfData( 'highest', highest_value, warn=args.get('warning') or '', crit=args.get('critical') or '', min_=0, ) performance_data = [lowest_value_perfdata, highest_value_perfdata] if args.get('critical') and highest_value > args.get('critical'): self.exit(STATES.CRITICAL, "CRITICAL", *performance_data) elif args.get('warning') and highest_value > args.get('warning'): self.exit(STATES.WARNING, "WARNING", *performance_data) else: self.exit(STATES.OK, "OK", *performance_data)
def run(self, args): """ Main Plugin function """ # Define variables ## perfdata p1 = "" p2 = "" p3 = "" p4 = "" ## total usage_per_month = 0.0 used_total_percent = 0.0 receive_per_month = 0.0 transmit_per_month = 0.0 ## output msg = "" # Get reset day day = args.reset_day # Get interface name interface = args.interface_name # Prepare interface variables # create cache_file with the name of interface and the path cache_file = os.path.join(args.cache_folder, interface + ".txt") interface_total = interface + "_total" interface_percent = interface + "_percent" interface_received = interface + "_received" interface_transmitted = interface + "_transmitted" # Get thresholds one_gb = 1024 * 1024 * 1024 warning = args.warning critical = args.critical if not isinstance(warning, PercentValue): warning /= one_gb if not isinstance(critical, PercentValue): critical /= one_gb # Get limit limit = args.limit / one_gb if args.limit is not None else None # check if cache_folder exist if not create one if not os.path.exists(args.cache_folder): os.makedirs(args.cache_folder) # Get total new_total = self.get_total(interface) # Get received new_received = self.get_receive(interface) # Get transmit new_transmit = self.get_transmit(interface) # if the file doesn't exist if not os.path.isfile(cache_file): try: int_fh = open(cache_file, "w") except IOError: self.unknown("Cannot write temp file: %s" % cache_file) old_update = "00000000" # Write datas in file int_fh.write("%s %0.30f %0.30f %0.30f %0.30f " "%0.30f %0.30f %0.30f %0.30f %0.30f" % (old_update, new_total, new_total, new_received, new_received, new_transmit, new_transmit, new_total, new_received, new_transmit)) # Close file int_fh.close() # Exit for the first launch self.ok("First use of plugin") else: try: int_fh = open(cache_file, "r+") except IOError: self.unknown("Cannot read/write temp file: %s" % cache_file) # read file data_list = int_fh.readline() # prepare read datas data_list = data_list.strip().split() # get data old_update = data_list[0] # get totals prev_used_total = float(data_list[1]) prev_receive_total = float(data_list[3]) prev_transmit_total = float(data_list[5]) # Get offsetsl prev_offset = float(data_list[2]) prev_receive_offset = float(data_list[4]) prev_transmit_offset = float(data_list[6]) # get old datas old_data = float(data_list[7]) old_receive = float(data_list[8]) old_transmit = float(data_list[9]) # get reset date reset_date = self.get_reset_day(day) # Get today to_day = time.strftime("%m%d%Y") # Check if the counter restarted if new_total >= prev_used_total: used_total = new_total receive_total = new_received transmit_total = new_transmit else: # Handle counter restart if new_total >= old_data: used_total = prev_used_total + (new_total - old_data) receive_total = prev_receive_total + (new_received - old_receive) transmit_total = prev_transmit_total + (new_transmit - old_transmit) else: used_total = prev_used_total + new_total receive_total = prev_receive_total + new_received transmit_total = prev_transmit_total + new_transmit # get all data to update storage file old_data = new_total old_receive = new_received old_transmit = new_transmit # get all data for output usage_per_month = used_total - prev_offset receive_per_month = receive_total - prev_receive_offset transmit_per_month = transmit_total - prev_transmit_offset # check if to day is reset date if to_day == reset_date and to_day != old_update: prev_offset = used_total prev_receive_offset = receive_total prev_transmit_offset = transmit_total old_update = reset_date # write data to file int_fh.seek(0) int_fh.write( "%s %0.30f %0.30f %0.30f %0.30f " "%0.30f %0.30f %0.30f %0.30f %0.30f" % (old_update, used_total, prev_offset, receive_total, prev_receive_offset, transmit_total, prev_transmit_offset, old_data, old_receive, old_transmit)) int_fh.close() # check_linux_bandwidth_usage -i eth0 -W 50 -C 90 -d 10 -l 500 if limit is not None and isinstance(warning, PercentValue): # calculate bandwidth per month in % usage_per_month_percent = (usage_per_month / limit) * 100 # user does not give warning in GB so we have to calculate it s_warning = "%0.2f" % ((warning / 100) * limit) s_critical = "%0.2f" % ((warning / 100) * limit) # convert all data to float with 2 decimal places receive_per_month = "%0.2f" % (float(receive_per_month)) transmit_per_month = "%0.2f" % (float(transmit_per_month)) usage_per_month = "%0.2f" % usage_per_month msg = "%s usage: %0.2f%% (%s/%0.2fGB)" % ( interface, usage_per_month_percent, usage_per_month, limit) if usage_per_month_percent < warning: func = self.ok elif warning <= usage_per_month_percent < critical: func = self.warning else: func = self.critical # check_linux_bandwidth_usage -i eth0 -W 50 -C 90 -d 10 -l 500 -f if args.perfdata and isinstance( warning, PercentValue) and limit is not None: p1 = PerfData(interface_percent, used_total_percent, unit="%", warn=s_warning, crit=s_critical, min_="%0.2f" % (0), max_="%0.2f" % (100)) p2 = PerfData(interface_total, usage_per_month, unit="GB", warn=warning, crit=critical, min_="%0.2f" % (0), max_="%0.2f" % (limit)) p3 = PerfData(interface_received, receive_per_month, unit="GB", warn="", crit="", min_="%0.2f" % (0)) p4 = PerfData(interface_transmitted, transmit_per_month, unit="GB", warn="", crit="", min_="%0.2f" % (0)) func(msg, p1, p2, p3, p4) # check_linux_bandwidth_usage -i eth0 -w 50 -c 100 -d 10 -l 500 -f if args.perfdata and args.limit is not None: # user give warning in GB so calculate in % usage_per_month_percent = (usage_per_month / limit) * 100 msg = ( "%s usage: %0.2f%% " "(%0.2f/%0.2fGB)" % (interface, usage_per_month_percent, usage_per_month, limit)) if usage_per_month < warning: func = self.ok elif usage_per_month >= warning and usage_per_month < critical: func = self.warning else: func = self.critical # convert all data to float with 2 decimal places warning_percent = "%0.2f" % ((float(warning) / limit) * 100) critical_percent = "%0.2f" % ((float(critical) / limit) * 100) used_total_percent = "%0.2f" % (usage_per_month_percent) warning = "%0.2f" % ((float(warning_percent) / 100) * limit) critical = "%0.2f" % ((float(critical_percent) / 100) * limit) usage_per_month = "%0.2f" % (float(usage_per_month)) receive_per_month = "%0.2f" % (float(receive_per_month)) transmit_per_month = "%0.2f" % (float(transmit_per_month)) # create all perf data p1 = PerfData(interface_percent, used_total_percent, unit="%", warn=warning_percent, crit=critical_percent, min_="%0.2f" % (0), max_="%0.2f" % (100)) p2 = PerfData(interface_total, usage_per_month, unit="GB", warn=warning, crit=critical, min_="%0.2f" % (0), max_="%0.2f" % (limit)) p3 = PerfData(interface_received, receive_per_month, unit="GB", warn="", crit="", min_="%0.2f" % (0)) p4 = PerfData(interface_transmitted, transmit_per_month, unit="GB", warn="", crit="", min_="%0.2f" % (0)) func(msg, p1, p2, p3, p4) msg = "%s usage: %0.2fGB" % (interface, usage_per_month) # check_linux_bandwidth_usage -i eth0 -w 50 -c 90 -d 5 if usage_per_month < warning: func = self.ok elif usage_per_month >= warning and usage_per_month < critical: func = self.warning else: func = self.critical # check_linux_bandwidth_usage -i eth0 -w 50 -c 90 -d 5 -f if args.perfdata: # convert all data to float with 2 decimal places receive_per_month = "%0.2f" % (float(receive_per_month)) transmit_per_month = "%0.2f" % (float(transmit_per_month)) usage_per_month = "%0.2f" % (float(usage_per_month)) p1 = PerfData(interface_total, usage_per_month, unit="GB", warn="%0.2f" % (warning), crit="%0.2f" % (critical), min_="%0.2f" % (0)) p2 = PerfData(interface_received, receive_per_month, unit="GB", warn="", crit="", min_="%0.2f" % (0)) p3 = PerfData(interface_transmitted, transmit_per_month, unit="GB", warn="", crit="", min_="%0.2f" % (0)) func(msg, p1, p2, p3)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) perfdata = [] expected_flavors = args['flavors'] try: c = ksclient.Client( username=args['username'], tenant_name=args['tenant'], password=args['password'], auth_url=args['auth_url'], ) token = c.auth_token tenant_id = c.auth_ref['token']['tenant']['id'] except Exception as e: self.exit(STATES.UNKNOWN, "Authentification failed: " + str(e)) try: headers = { "X-Auth-Token": token, "content-type": "application/json", "accept": "application/json", } start_time = datetime.datetime.now() resp = requests.get( "%s/%s/flavors" % (args['nova_endpoint'], tenant_id), headers=headers ) end_time = datetime.datetime.now() perfdata.append( PerfData( 'flavor_list_time', ((end_time - start_time).total_seconds()/1000), min_='0', unit='ms' ) ) flavors = resp.json() flavors_names = [f['name'] for f in flavors['flavors']] perfdata.append(PerfData('flavors_count', len(flavors_names), min_='0')) except Exception as e: self.exit(STATES.UNKNOWN, "Could not list flavors" + str(e)) msgs = [] for flavor in expected_flavors: if flavor not in flavors_names: msgs.append("'%s' flavor is missing" % flavor) if len(msgs) > 0: self.exit( STATES.CRITICAL, ' '.join(msgs), *perfdata ) else: self.exit( STATES.OK, "Nova API OK", *perfdata )
def run(self, args): clearance_percent = 0 planned_percent = 0 in_action_percent = 0 without_info_percent = 0 state = None message = None # get json data from "http://infoneige.ca/vdm/stats.json" url = "http://infoneige.ca/vdm/stats.json" response = urllib.urlopen(url) json_data = json.loads(response.read()) # get warning and critical from args warning = float(args["warning"]) critical = float(args["critical"]) # get borough from args to compare to get the right borough input_borough = args["borough"] input_borough = input_borough.decode("utf8") borough = json_data["avancement"] for i in borough: if input_borough == i["arrondissment"].replace(' ', ''): # get all data non_clear = float(i[u"enneigé"]) clear = float(i[u"déneigé"]) planned = float(i[u"planifié"]) in_action = float(i[u"en cours"]) without_info = float(i[u"pas d info"]) # calculate the total of streets sides in one borough total = non_clear + clear + planned + in_action # calculate the total included without info total_without_info = total + without_info # calculate all data in percentage if total: clearance_percent = (clear / total) * 100.0 planned_percent = (planned / total) * 100.0 in_action_percent = (in_action / total) * 100.0 else: clearance_percent = 100 planned_percent = 0 in_action_percent = 0 if total_without_info: without_info_percent = (without_info / total_without_info) * 100.0 else: without_info_percent = 100 if clearance_percent < critical: message = ("CRITICAL: %0.1f%% in '%s' is clear" % (clearance_percent, input_borough)) state = STATES.CRITICAL elif clearance_percent <= warning: message = ("WARNING: %0.1f%% in '%s' is clear" % (clearance_percent, input_borough)) state = STATES.WARNING else: message = ("OK: %0.1f%% in '%s' is clear" % (clearance_percent, input_borough)) state = STATES.OK clearance_percent = "%0.1f" % clearance_percent planned_percent = "%0.1f" % planned_percent in_action_percent = "%0.1f" % in_action_percent without_info_percent = "%0.1f" % without_info_percent p1 = PerfData("clearance_percent", clearance_percent, unit="%", warn=warning, crit=critical, min_="0.0", max_=100.0) p2 = PerfData("planned_percent", planned_percent, unit="%", warn=warning, crit=critical, min_="0.0", max_=100.0) p3 = PerfData("in_action_percent", in_action_percent, unit="%", warn=warning, crit=critical, min_="0.0", max_=100.0) p4 = PerfData("without_info_percent", without_info_percent, unit="%", warn=warning, crit=critical, min_="0.0", max_=100.0) self.exit(state, message, p1, p2, p3, p4)
def run(self, args): """ Main Plugin function """ now = time.clock() try: r = requests.get('http://' + args.host + ':%d/ping' % args.port, timeout=args.timeout) except requests.exceptions.Timeout: self.exit(3, 'Timeout ') except requests.exceptions.ConnectionError: self.exit(3, 'Connection Error ') if 204 == r.status_code: # Connection time if args.mode == CheckInfluxdb.mode[0]: answer = time.clock() - now p = PerfData('ping', answer, unit='ms', warn=args.warning, crit=args.critical, min_=0.0, max_=args.timeout) client = InfluxDBClient(args.host, args.port, args.user, args.password) list_db = client.get_list_database() is_present = False for j in range(0, len(list_db) - 1): if '_influxdb' in list_db[j].values(): is_present = True break if is_present: # Uptime client.switch_database('_influxdb') if args.mode == CheckInfluxdb.mode[1]: db = client.query("select uptime from server_diag ") ans = list(db['server_diag'])[len(list(db['server_diag'])) - 1]['uptime'] hour = re.split("[a-z]", ans) if len(hour) == 4: answer = 3600 * int(hour[0]) + 60 * int(hour[1]) + float(hour[2]) else: answer = 60 * hour[0] + hour[1] p = PerfData('Uptime', answer, unit='s', warn=args.warning, crit=args.critical, min_=0) # Number of shards db = client.query("select numShards from server_diag") nb_shards = list(db['server_diag'])[len(list(db['server_diag'])) - 1]['numShards'] if args.mode == CheckInfluxdb.mode[2]: answer = nb_shards p = PerfData('Number of shards', answer, warn=args.warning, crit=args.critical, min_=0) # nb-write-total db = client.query("select value from server_batchWriteRx") nb_write_total = list(db['server_batchWriteRx'])[len(list(db['server_batchWriteRx'])) - 1]['value'] if args.mode == CheckInfluxdb.mode[3]: answer = nb_write_total p = PerfData('Number of write queries', answer, warn=args.warning, crit=args.critical, min_=0) # write-since-last if args.mode == CheckInfluxdb.mode[4]: answer = nb_write_total - list(db['server_batchWriteRx'])[len(list(db['server_batchWriteRx'])) - 2][ 'value'] p = PerfData('Number of write since last check', answer, warn=args.warning, crit=args.critical, min_=0) # nb-read-total db = client.query("select value from server_queriesExecuted") nb_read_total = list(db['server_queriesExecuted'])[len(list(db['server_queriesExecuted'])) - 1]['value'] if args.mode == CheckInfluxdb.mode[5]: answer = nb_read_total p = PerfData('Number of read queries', nb_read_total, warn=args.warning, crit=args.critical, min_=0) # read-since-last if args.mode == CheckInfluxdb.mode[6]: answer = nb_read_total - \ list(db['server_queriesExecuted'])[len(list(db['server_queriesExecuted'])) - 2]['value'] p = PerfData('Number of read queries since last check', answer, warn=args.warning, crit=args.critical, min_=0) # ROM-allocate rom_free = psutil.disk_usage('/')[2] if args.mode == CheckInfluxdb.mode[7]: answer = nb_shards * 1024 * 1024 p = PerfData('ROM allocate', answer, unit='bytes', warn=args.warning, crit=args.critical, min_=0, max_=answer + rom_free) # Ram used if args.mode == CheckInfluxdb.mode[8]: db = client.query("select alloc from server_memory") answer = list(db['server_memory'])[len(list(db['server_memory'])) - 1]['alloc'] ram_value = psutil.virtual_memory()[0] p = PerfData('RAM used', answer, unit='bytes', warn=args.warning, crit=args.critical, min_=0, max_=ram_value) # Rom Used if args.mode == CheckInfluxdb.mode[9]: db = client.query("select value from shard_shardBytes") bytes_per_shard = [] for i in range(len(list(db['server_shardBytes'])) - 1 - nb_shards, len(list(db['shard_shardBytes'])) - 1): bytes_per_shard.append(list(db['shard_shardBytes'])[i]['value']) answer = sum(bytes_per_shard) p = PerfData('ROM used', answer, unit='bytes', warn=args.warning, crit=args.critical, min_=0, max_=answer + rom_free) # continuous-query if args.mode == CheckInfluxdb.mode[10]: db = client.query("select cqLastRun from server_diag") ans = list(db['server_diag'])[len(list(db['server_diag'])) - 1]['cqLastRun'] hour = re.split("[+,' ',:,-]", ans) s = (int(hour[0]) - 1) * 365 * 24 * 3600 + (int(hour[1]) - 1) * 30.5 * 24 * 3600 + (int( hour[2]) - 1) * 24 * 3600 + int(hour[3]) * 3600 + int(hour[4]) * 60 + int(hour[5]) answer = s + int(hour[7]) * 0.0001 p = PerfData('Time since last continuous query launch', answer, unit='s', warn=args.warning, crit=args.critical, min_=0) # routine-go if args.mode == CheckInfluxdb.mode[11]: db = client.query("select numGoRoutine from server_go") answer = list(db['server_go'])[len(list(db['server_go'])) - 1]['numGoRoutine'] p = PerfData('Number of go routine', answer, warn=args.warning, crit=args.critical, min_=0) if answer > args.critical: self.exit(2, args.mode + " > %f" % args.critical, p) elif answer > args.warning: self.exit(1, args.mode + " > %f" % args.warning, p) else: self.exit(0, "all is good for " + args.mode, p) else: self.exit(3, "Monitoring database doesn't exist") else: self.exit(3, "Bad response from Influxdb")
def run(self, args): # Get and parse datas result_dict = Plugin.create_result_dict() if 'list' in args: # Handle show region list output = "\n".join([r.encode("utf-8") for r in result_dict.keys()]) # Exit showing region list self.exit(STATES.OK, 'regions:\n' + output) # Prepare warning if 'warning' in args: warning = args['warning'] else: warning = 100.00 # prepare critical if 'critical' in args: critical = args['critical'] else: critical = 100.00 # Get specified region dats customers_blackout = Decimal( result_dict[args['region']]['customers_blackout']) customers = Decimal(result_dict[args['region']]['customers']) interruptions = result_dict[args['region']]['interruptions'] # Get percent percent = customers_blackout / customers * 100 # Clean data round_percent = percent.quantize(Decimal('.01'), rounding=ROUND_HALF_UP) # Compare thresholds if round_percent < warning: msg = 'OK: %s: %0.2f%% out of service' % (args['region'], round_percent) code = STATES.OK elif round_percent >= warning and round_percent < critical: msg = 'WARNING: %s: %0.2f%% out of service' % (args['region'], round_percent) code = STATES.WARNING elif round_percent >= critical: msg = 'CRITICAL: %s: %0.2f%% out of service' % (args['region'], round_percent) code = STATES.CRITICAL # Prepare perfdatas p1 = PerfData("percent_blackout", round_percent, unit="%", warn=warning, crit=critical, min_=0, max_=100) p2 = PerfData("interruptions", interruptions, unit='', min_=0, max_=None) p3 = PerfData("custumers_blackout", customers_blackout, unit='', min_=0, max_=customers) # Exit plugin self.exit(code, msg, p1, p2, p3)
def run(self, args): # Here is the core of the plugin. # After doing your verifications, escape by doing: # self.exit(return_code, 'return_message', *performance_data) acceskey = args.get('acceskey') secretkey = args.get('secretkey') if acceskey is not None and secretkey is not None: cloudwatch = boto.connect_cloudwatch( aws_access_key_id=acceskey, aws_secret_access_key=secretkey) else: cloudwatch = boto.connect_cloudwatch() perf_data = [] msg_received = cloudwatch.get_metric_statistics( metric_name='NumberOfMessagesReceived', period=60, start_time=datetime.datetime.utcnow() - datetime.timedelta(seconds=600), end_time=datetime.datetime.utcnow(), namespace='AWS/SQS', unit='Count', statistics=['Sum'], dimensions={'QueueName': [args.get('queue')]}) msg_deleted = cloudwatch.get_metric_statistics( metric_name='NumberOfMessagesDeleted', period=60, start_time=datetime.datetime.utcnow() - datetime.timedelta(seconds=600), end_time=datetime.datetime.utcnow(), namespace='AWS/SQS', unit='Count', statistics=['Sum'], dimensions={'QueueName': [args.get('queue')]}) if len(msg_deleted) == 0 or len(msg_received) == 0: self.exit(STATES.UNKNOWN, "Could not retrieve any metrics") number_msg_received = msg_received[0]['Sum'] perf_data.append( PerfData( 'NumberOfMessagesReceived', number_msg_received, min_=0, )) number_msg_deleted = msg_deleted[0]['Sum'] perf_data.append( PerfData( 'NumberOfMessagesDeleted', number_msg_deleted, min_=0, )) if number_msg_received > 0 and not number_msg_deleted > 0: self.exit( STATES.CRITICAL, "Queue %s is receiving messages but they are not being deleted" % args.get('queue'), *perf_data) else: self.exit(STATES.OK, "OK", *perf_data)