def run(self): data = {} for profile in self.config: for region in self.config[profile]: for db in self.config[profile][region]: if (profile == 'default'): profile = None rds = BotoRDS(region=region, profile=profile, identifier=db) for metric in self.metrics.keys(): if metric == 'FreeableMemory': info = rds.get_info() try: memory = self.db_classes[ info.instance_class] * 1024**3 except IndexError: print 'Unknown DB instance class "%s"' % info.instance_class sys.exit(1) data['{0}_{1}_{2}'.format( region, db, 'UsedMemory')] = memory - stats data['{0}_{1}_{2}'.format(region, db, 'TotalMemory')] = memory elif metric == 'FreeStorageSpace': info = rds.get_info() storage = float(info.allocated_storage) * 1024**3 data['{0}_{1}_{2}'.format( region, db, 'UsedDiskUsage')] = storage - stats data['{0}_{1}_{2}'.format( region, db, 'TotalDiskUsage')] = storage else: stats = rds.get_metric(metric) data['{0}_{1}_{2}'.format(region, db, metric)] = stats return data
def fetch_stats(self, data, rds): for metric, values in self.metrics.items(): try: stats = rds.get_metric(metric) inst = rds.identifier if metric in self.byte_related: # formatting to megabytes stats = stats / 10**6 if metric == 'FreeableMemory': info = rds.get_info() try: memory = self.db_classes[info.instance_class] * 1000 used_mem = memory - stats mem_name = self.metrics[metric] data['{0}_{1}'.format(inst, 'used_memory')] = used_mem data['{0}_{1}'.format(inst, 'total_memory')] = memory data['{0}_{1}'.format(inst, mem_name)] = stats except IndexError as e: msg = 'RDS: Unknown DB instance class "{}"' self.checks_logger.error( msg.format(info.instance_class) ) elif metric == 'FreeStorageSpace': info = rds.get_info() storage = float(info.allocated_storage) * 1000 used = storage - stats data['{0}_{1}'.format(inst, self.metrics[metric])] = stats data['{0}_{1}'.format(inst, 'used_diskusage')] = used data['{0}_{1}'.format(inst, 'total_diskusage')] = storage elif metric in self.byte_related: data['{0}_{1}'.format(inst, self.metrics[metric])] = stats else: data['{0}_{1}'.format(inst, self.metrics[metric])] = stats except NoMetricError as e: msg = 'RDS: {} was not available for {}' self.checks_logger.info(msg.format(metric, rds.identifier))
def main(): """Main function""" global options short_status = { OK: 'OK', WARNING: 'WARN', CRITICAL: 'CRIT', UNKNOWN: 'UNK' } # DB instance classes as listed on # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.DBInstanceClass.html db_classes = { 'db.t1.micro': 0.615, 'db.m1.small': 1.7, 'db.m1.medium': 3.75, 'db.m1.large': 7.5, 'db.m1.xlarge': 15, 'db.m4.large': 8, 'db.m4.xlarge': 16, 'db.m4.2xlarge': 32, 'db.m4.4xlarge': 64, 'db.m4.10xlarge': 160, 'db.r3.large': 15, 'db.r3.xlarge': 30.5, 'db.r3.2xlarge': 61, 'db.r3.4xlarge': 122, 'db.r3.8xlarge': 244, 'db.t2.micro': 1, 'db.t2.small': 2, 'db.t2.medium': 4, 'db.t2.large': 8, 'db.m3.medium': 3.75, 'db.m3.large': 7.5, 'db.m3.xlarge': 15, 'db.m3.2xlarge': 30, 'db.m2.xlarge': 17.1, 'db.m2.2xlarge': 34.2, 'db.m2.4xlarge': 68.4, 'db.cr1.8xlarge': 244, } # RDS metrics http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/rds-metricscollected.html metrics = { 'status': 'RDS availability', 'load': 'CPUUtilization', 'memory': 'FreeableMemory', 'storage': 'FreeStorageSpace' } units = ('percent', 'GB') # Parse options parser = optparse.OptionParser() parser.add_option('-l', '--list', help='list of all DB instances', action='store_true', default=False, dest='db_list') parser.add_option('-n', '--profile', default=None, help='AWS profile from ~/.boto or /etc/boto.cfg. Default: None, fallbacks to "[Credentials]".') parser.add_option('-r', '--region', default='us-east-1', help='AWS region. Default: us-east-1. If set to "all", we try to detect the instance region ' 'across all of them, note this will be slower than if you specify the region explicitly.') parser.add_option('-i', '--ident', help='DB instance identifier') parser.add_option('-p', '--print', help='print status and other details for a given DB instance', action='store_true', default=False, dest='printinfo') parser.add_option('-m', '--metric', help='metric to check: [%s]' % ', '.join(metrics.keys())) parser.add_option('-w', '--warn', help='warning threshold') parser.add_option('-c', '--crit', help='critical threshold') parser.add_option('-u', '--unit', help='unit of thresholds for "storage" and "memory" metrics: [%s]. ' 'Default: percent' % ', '.join(units), default='percent') parser.add_option('-t', '--time', help='time period in minutes to query. Default: 5', type='int', default=5) parser.add_option('-a', '--avg', help='time average in minutes to request. Default: 1', type='int', default=1) parser.add_option('-d', '--debug', help='enable debug output', action='store_true', default=False) options, _ = parser.parse_args() if options.debug: boto.set_stream_logger('boto') rds = RDS(region=options.region, profile=options.profile, identifier=options.ident) # Check args if len(sys.argv) == 1: parser.print_help() sys.exit() elif options.db_list: info = rds.get_list() print 'List of all DB instances in %s region(s):' % (options.region,) pprint.pprint(info) sys.exit() elif not options.ident: parser.print_help() parser.error('DB identifier is not set.') elif options.printinfo: info = rds.get_info() if info: pprint.pprint(vars(info)) else: print 'No DB instance "%s" found on your AWS account and %s region(s).' % (options.ident, options.region) sys.exit() elif not options.metric or options.metric not in metrics.keys(): parser.print_help() parser.error('Metric is not set or not valid.') elif not options.warn and options.metric != 'status': parser.print_help() parser.error('Warning threshold is not set.') elif not options.crit and options.metric != 'status': parser.print_help() parser.error('Critical threshold is not set.') elif options.avg <= 0 and options.metric != 'status': parser.print_help() parser.error('Average must be greater than zero.') elif options.time <= 0 and options.metric != 'status': parser.print_help() parser.error('Time must be greater than zero.') now = datetime.datetime.utcnow() status = None note = '' perf_data = None # RDS Status if options.metric == 'status': info = rds.get_info() if not info: status = UNKNOWN note = 'Unable to get RDS instance' else: status = OK try: version = info.EngineVersion except: version = info.engine_version note = '%s %s. Status: %s' % (info.engine, version, info.status) # RDS Load Average elif options.metric == 'load': # Check thresholds try: warns = [float(x) for x in options.warn.split(',')] crits = [float(x) for x in options.crit.split(',')] fail = len(warns) + len(crits) except: fail = 0 if fail != 6: parser.error('Warning and critical thresholds should be 3 comma separated numbers, e.g. 20,15,10') loads = [] fail = False j = 0 perf_data = [] for i in [1, 5, 15]: if i == 1: # Some stats are delaying to update on CloudWatch. # Let's pick a few points for 1-min load avg and get the last point. points = 5 else: points = i load = rds.get_metric(metrics[options.metric], now - datetime.timedelta(seconds=points * 60), now, i * 60) if not load: status = UNKNOWN note = 'Unable to get RDS statistics' perf_data = None break loads.append(str(load)) perf_data.append('load%s=%s;%s;%s;0;100' % (i, load, warns[j], crits[j])) # Compare thresholds if not fail: if warns[j] > crits[j]: parser.error('Parameter inconsistency: warning threshold is greater than critical.') elif load >= crits[j]: status = CRITICAL fail = True elif load >= warns[j]: status = WARNING j = j + 1 if status != UNKNOWN: if status is None: status = OK note = 'Load average: %s%%' % '%, '.join(loads) perf_data = ' '.join(perf_data) # RDS Free Storage # RDS Free Memory elif options.metric in ['storage', 'memory']: # Check thresholds try: warn = float(options.warn) crit = float(options.crit) except: parser.error('Warning and critical thresholds should be integers.') if crit > warn: parser.error('Parameter inconsistency: critical threshold is greater than warning.') if options.unit not in units: parser.print_help() parser.error('Unit is not valid.') info = rds.get_info() free = rds.get_metric(metrics[options.metric], now - datetime.timedelta(seconds=options.time * 60), now, options.avg * 60) if not info or not free: status = UNKNOWN note = 'Unable to get RDS details and statistics' else: if options.metric == 'storage': storage = float(info.allocated_storage) elif options.metric == 'memory': try: storage = db_classes[info.instance_class] except: print 'Unknown DB instance class "%s"' % info.instance_class sys.exit(CRITICAL) free = '%.2f' % (free / 1024 ** 3) free_pct = '%.2f' % (float(free) / storage * 100) if options.unit == 'percent': val = float(free_pct) val_max = 100 elif options.unit == 'GB': val = float(free) val_max = storage # Compare thresholds if val <= crit: status = CRITICAL elif val <= warn: status = WARNING if status is None: status = OK note = 'Free %s: %s GB (%.0f%%) of %s GB' % (options.metric, free, float(free_pct), storage) perf_data = 'free_%s=%s;%s;%s;0;%s' % (options.metric, val, warn, crit, val_max) # Final output if status != UNKNOWN and perf_data: print '%s %s | %s' % (short_status[status], note, perf_data) else: print '%s %s' % (short_status[status], note) sys.exit(status)
def main(): """Main function""" global options # DB instance classes as listed on # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.DBInstanceClass.html db_classes = { 'db.t1.micro': 0.615, 'db.m1.small': 1.7, 'db.m1.medium': 3.75, 'db.m1.large': 7.5, 'db.m1.xlarge': 15, 'db.m4.large': 8, 'db.m4.xlarge': 16, 'db.m4.2xlarge': 32, 'db.m4.4xlarge': 64, 'db.m4.10xlarge': 160, 'db.r3.large': 15, 'db.r3.xlarge': 30.5, 'db.r3.2xlarge': 61, 'db.r3.4xlarge': 122, 'db.r3.8xlarge': 244, 'db.t2.micro': 1, 'db.t2.small': 2, 'db.t2.medium': 4, 'db.t2.large': 8, 'db.m3.medium': 3.75, 'db.m3.large': 7.5, 'db.m3.xlarge': 15, 'db.m3.2xlarge': 30, 'db.m2.xlarge': 17.1, 'db.m2.2xlarge': 34.2, 'db.m2.4xlarge': 68.4, 'db.cr1.8xlarge': 244, } # RDS metrics http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/rds-metricscollected.html metrics = { 'BinLogDiskUsage': 'binlog_disk_usage', # The amount of disk space occupied by binary logs on the master. Units: Bytes 'CPUUtilization': 'utilization', # The percentage of CPU utilization. Units: Percent 'DatabaseConnections': 'connections', # The number of database connections in use. Units: Count 'DiskQueueDepth': 'disk_queue_depth', # The number of outstanding IOs (read/write requests) waiting to access the disk. Units: Count 'ReplicaLag': 'replica_lag', # The amount of time a Read Replica DB Instance lags behind the source DB Instance. Units: Seconds 'SwapUsage': 'swap_usage', # The amount of swap space used on the DB Instance. Units: Bytes 'FreeableMemory': 'used_memory', # The amount of available random access memory. Units: Bytes 'FreeStorageSpace': 'used_space', # The amount of available storage space. Units: Bytes 'ReadIOPS': 'read_iops', # The average number of disk I/O operations per second. Units: Count/Second 'WriteIOPS': 'write_iops', # The average number of disk I/O operations per second. Units: Count/Second 'ReadLatency': 'read_latency', # The average amount of time taken per disk I/O operation. Units: Seconds 'WriteLatency': 'write_latency', # The average amount of time taken per disk I/O operation. Units: Seconds 'ReadThroughput': 'read_throughput', # The average number of bytes read from disk per second. Units: Bytes/Second 'WriteThroughput': 'write_throughput', # The average number of bytes written to disk per second. Units: Bytes/Second } # Parse options parser = optparse.OptionParser() parser.add_option('-l', '--list', help='list DB instances', action='store_true', default=False, dest='db_list') parser.add_option('-n', '--profile', default=None, help='AWS profile from ~/.boto or /etc/boto.cfg. Default: None, fallbacks to "[Credentials]".') parser.add_option('-r', '--region', default='us-east-1', help='AWS region. Default: us-east-1. If set to "all", we try to detect the instance region ' 'across all of them, note this will be slower than if you specify the region explicitly.') parser.add_option('-i', '--ident', help='DB instance identifier') parser.add_option('-p', '--print', help='print status and other details for a given DB instance', action='store_true', default=False, dest='printinfo') parser.add_option('-m', '--metric', help='metrics to retrive separated by comma: [%s]' % ', '.join(metrics.keys())) parser.add_option('-d', '--debug', help='enable debugging', action='store_true', default=False) options, _ = parser.parse_args() # Strip a prefix _ which is sent by Cacti, so an empty argument is interpreted correctly. # Than set defaults if argument is supposed to be empty. options.region = options.region.lstrip('_') options.profile = options.profile.lstrip('_') if not options.region: options.region = 'us-east-1' if not options.profile: options.profile = None if options.debug: boto.set_stream_logger('boto') rds = RDS(region=options.region, profile=options.profile, identifier=options.ident) # Check args if len(sys.argv) == 1: parser.print_help() sys.exit() elif options.db_list: info = rds.get_list() print 'List of all DB instances in %s region(s):' % (options.region,) pprint.pprint(info) sys.exit() elif not options.ident: parser.print_help() parser.error('DB identifier is not set.') elif options.printinfo: info = rds.get_info() pprint.pprint(vars(info)) sys.exit() elif not options.metric: parser.print_help() parser.error('Metric is not set.') selected_metrics = options.metric.split(',') for metric in selected_metrics: if metric not in metrics.keys(): parser.print_help() parser.error('Invalid metric.') # Do not remove the empty lines in the start and end of this docstring perl_magic_vars = """ # Define the variables to output. I use shortened variable names so maybe # it'll all fit in 1024 bytes for Cactid and Spine's benefit. Strings must # have some non-hex characters (non a-f0-9) to avoid a Cacti bug. This list # must come right after the word MAGIC_VARS_DEFINITIONS. The Perl script # parses it and uses it as a Perl variable. $keys = array( 'binlog_disk_usage' => 'gg', 'utilization' => 'gh', 'connections' => 'gi', 'disk_queue_depth' => 'gj', 'replica_lag' => 'gk', 'swap_usage' => 'gl', 'used_memory' => 'gm', 'total_memory' => 'gn', 'used_space' => 'go', 'total_space' => 'gp', 'read_iops' => 'gq', 'write_iops' => 'gr', 'read_latency' => 'gs', 'write_latency' => 'gt', 'read_throughput' => 'gu', 'write_throughput' => 'gv', ); """ output = dict() for row in perl_magic_vars.split('\n'): if row.find('=>') >= 0: k = row.split(' => ')[0].strip().replace("'", '') v = row.split(' => ')[1].strip().replace("'", '').replace(',', '') output[k] = v debug('Perl magic vars: %s' % output) debug('Metric associations: %s' % dict((k, output[v]) for (k, v) in metrics.iteritems())) # Handle metrics results = [] for metric in selected_metrics: stats = rds.get_metric(metric) if metric == 'FreeableMemory': info = rds.get_info() try: memory = db_classes[info.instance_class] * 1024 ** 3 except IndexError: print 'Unknown DB instance class "%s"' % info.instance_class sys.exit(1) results.append('%s:%.0f' % (output['used_memory'], memory - stats)) results.append('%s:%.0f' % (output['total_memory'], memory)) elif metric == 'FreeStorageSpace': info = rds.get_info() storage = float(info.allocated_storage) * 1024 ** 3 results.append('%s:%.0f' % (output['used_space'], storage - stats)) results.append('%s:%.0f' % (output['total_space'], storage)) else: short_var = output.get(metrics[metric]) if not short_var: print 'Chosen metric does not have a correspondent entry in perl magic vars' sys.exit(1) results.append('%s:%s' % (short_var, stats)) print ' '.join(results)
def main(): """Main function""" global options # DB instance classes as listed on # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.DBInstanceClass.html db_classes = { 'db.t1.micro': 0.615, 'db.m1.small': 1.7, 'db.m1.medium': 3.75, 'db.m1.large': 7.5, 'db.m1.xlarge': 15, 'db.m4.large': 8, 'db.m4.xlarge': 16, 'db.m4.2xlarge': 32, 'db.m4.4xlarge': 64, 'db.m4.10xlarge': 160, 'db.r3.large': 15, 'db.r3.xlarge': 30.5, 'db.r3.2xlarge': 61, 'db.r3.4xlarge': 122, 'db.r3.8xlarge': 244, 'db.t2.micro': 1, 'db.t2.small': 2, 'db.t2.medium': 4, 'db.t2.large': 8, 'db.m3.medium': 3.75, 'db.m3.large': 7.5, 'db.m3.xlarge': 15, 'db.m3.2xlarge': 30, 'db.m2.xlarge': 17.1, 'db.m2.2xlarge': 34.2, 'db.m2.4xlarge': 68.4, 'db.cr1.8xlarge': 244, } # RDS metrics http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/rds-metricscollected.html metrics = { 'BinLogDiskUsage': 'binlog_disk_usage', # The amount of disk space occupied by binary logs on the master. Units: Bytes 'CPUUtilization': 'utilization', # The percentage of CPU utilization. Units: Percent 'DatabaseConnections': 'connections', # The number of database connections in use. Units: Count 'DiskQueueDepth': 'disk_queue_depth', # The number of outstanding IOs (read/write requests) waiting to access the disk. Units: Count 'ReplicaLag': 'replica_lag', # The amount of time a Read Replica DB Instance lags behind the source DB Instance. Units: Seconds 'SwapUsage': 'swap_usage', # The amount of swap space used on the DB Instance. Units: Bytes 'FreeableMemory': 'used_memory', # The amount of available random access memory. Units: Bytes 'FreeStorageSpace': 'used_space', # The amount of available storage space. Units: Bytes 'ReadIOPS': 'read_iops', # The average number of disk I/O operations per second. Units: Count/Second 'WriteIOPS': 'write_iops', # The average number of disk I/O operations per second. Units: Count/Second 'ReadLatency': 'read_latency', # The average amount of time taken per disk I/O operation. Units: Seconds 'WriteLatency': 'write_latency', # The average amount of time taken per disk I/O operation. Units: Seconds 'ReadThroughput': 'read_throughput', # The average number of bytes read from disk per second. Units: Bytes/Second 'WriteThroughput': 'write_throughput', # The average number of bytes written to disk per second. Units: Bytes/Second } # Parse options parser = optparse.OptionParser() parser.add_option('-l', '--list', help='list DB instances', action='store_true', default=False, dest='db_list') parser.add_option( '-n', '--profile', default=None, help= 'AWS profile from ~/.boto or /etc/boto.cfg. Default: None, fallbacks to "[Credentials]".' ) parser.add_option( '-r', '--region', default='us-east-1', help= 'AWS region. Default: us-east-1. If set to "all", we try to detect the instance region ' 'across all of them, note this will be slower than if you specify the region explicitly.' ) parser.add_option('-i', '--ident', help='DB instance identifier') parser.add_option( '-p', '--print', help='print status and other details for a given DB instance', action='store_true', default=False, dest='printinfo') parser.add_option('-m', '--metric', help='metrics to retrive separated by comma: [%s]' % ', '.join(metrics.keys())) parser.add_option('-d', '--debug', help='enable debugging', action='store_true', default=False) options, _ = parser.parse_args() # Strip a prefix _ which is sent by Cacti, so an empty argument is interpreted correctly. # Than set defaults if argument is supposed to be empty. options.region = options.region.lstrip('_') options.profile = options.profile.lstrip('_') if not options.region: options.region = 'us-east-1' if not options.profile: options.profile = None if options.debug: boto.set_stream_logger('boto') rds = RDS(region=options.region, profile=options.profile, identifier=options.ident) # Check args if len(sys.argv) == 1: parser.print_help() sys.exit() elif options.db_list: info = rds.get_list() print 'List of all DB instances in %s region(s):' % (options.region, ) pprint.pprint(info) sys.exit() elif not options.ident: parser.print_help() parser.error('DB identifier is not set.') elif options.printinfo: info = rds.get_info() pprint.pprint(vars(info)) sys.exit() elif not options.metric: parser.print_help() parser.error('Metric is not set.') selected_metrics = options.metric.split(',') for metric in selected_metrics: if metric not in metrics.keys(): parser.print_help() parser.error('Invalid metric.') # Do not remove the empty lines in the start and end of this docstring perl_magic_vars = """ # Define the variables to output. I use shortened variable names so maybe # it'll all fit in 1024 bytes for Cactid and Spine's benefit. Strings must # have some non-hex characters (non a-f0-9) to avoid a Cacti bug. This list # must come right after the word MAGIC_VARS_DEFINITIONS. The Perl script # parses it and uses it as a Perl variable. $keys = array( 'binlog_disk_usage' => 'gg', 'utilization' => 'gh', 'connections' => 'gi', 'disk_queue_depth' => 'gj', 'replica_lag' => 'gk', 'swap_usage' => 'gl', 'used_memory' => 'gm', 'total_memory' => 'gn', 'used_space' => 'go', 'total_space' => 'gp', 'read_iops' => 'gq', 'write_iops' => 'gr', 'read_latency' => 'gs', 'write_latency' => 'gt', 'read_throughput' => 'gu', 'write_throughput' => 'gv', ); """ output = dict() for row in perl_magic_vars.split('\n'): if row.find('=>') >= 0: k = row.split(' => ')[0].strip().replace("'", '') v = row.split(' => ')[1].strip().replace("'", '').replace(',', '') output[k] = v debug('Perl magic vars: %s' % output) debug('Metric associations: %s' % dict( (k, output[v]) for (k, v) in metrics.iteritems())) # Handle metrics results = [] for metric in selected_metrics: stats = rds.get_metric(metric) if metric == 'FreeableMemory': info = rds.get_info() try: memory = db_classes[info.instance_class] * 1024**3 except IndexError: print 'Unknown DB instance class "%s"' % info.instance_class sys.exit(1) results.append('%s:%.0f' % (output['used_memory'], memory - stats)) results.append('%s:%.0f' % (output['total_memory'], memory)) elif metric == 'FreeStorageSpace': info = rds.get_info() storage = float(info.allocated_storage) * 1024**3 results.append('%s:%.0f' % (output['used_space'], storage - stats)) results.append('%s:%.0f' % (output['total_space'], storage)) else: short_var = output.get(metrics[metric]) if not short_var: print 'Chosen metric does not have a correspondent entry in perl magic vars' sys.exit(1) results.append('%s:%s' % (short_var, stats)) print ' '.join(results)