def test_get_metric_statistics(self):
     c = CloudWatchConnection()
     m = c.list_metrics()[0]
     end = datetime.datetime.utcnow()
     start = end - datetime.timedelta(hours=24 * 14)
     c.get_metric_statistics(3600 * 24, start, end, m.name, m.namespace,
                             ['Average', 'Sum'])
Beispiel #2
0
 def test_get_metric_statistics(self):
     c = CloudWatchConnection()
     m = c.list_metrics()[0]
     end = datetime.datetime.now()
     start = end - datetime.timedelta(hours=24*14)
     c.get_metric_statistics(
         3600*24, start, end, m.name, m.namespace, ['Average', 'Sum'])
Beispiel #3
0
class BotoWatchInterface(WatchInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        #boto.set_stream_logger('foo')
        path='/services/CloudWatch'
        port=8773
        if clc_host[len(clc_host)-13:] == 'amazonaws.com':
            clc_host = clc_host.replace('ec2', 'monitoring', 1)
            path = '/'
            reg = None
            port=443
        reg = RegionInfo(name='eucalyptus', endpoint=clc_host)
        self.conn = CloudWatchConnection(access_id, secret_key, region=reg,
                                  port=port, path=path,
                                  is_secure=True, security_token=token, debug=0)
        self.conn.https_validate_certificates = False
        self.conn.http_connection_kwargs['timeout'] = 30

    def __save_json__(self, obj, name):
        f = open(name, 'w')
        json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2)
        f.close()

    def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit):
        obj = self.conn.get_metric_statistics(period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Statistics.json")
        return obj

    def list_metrics(self, next_token=None, dimensions=None, metric_name=None, namespace=None):
        obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Metrics.json")
        return obj

    def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics):
        return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics)

    def describe_alarms(self, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None,
                        state_value=None, next_token=None):
        obj = self.conn.describe_alarms(action_prefix, alarm_name_prefix, alarm_names, max_records, state_value, next_token)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Alarms.json")
        return obj

    def delete_alarms(self, alarm_names):
        return self.conn.delete_alarms(alarm_names)

    def enable_alarm_actions(self, alarm_names):
        return self.conn.enable_alarm_actions(alarm_names)

    def disable_alarm_actions(self, alarm_names):
        return self.conn.disable_alarm_actions(alarm_names)

    def put_metric_alarm(self, alarm):
        return self.conn.put_metric_alarm(alarm)
def getCloudWatchMetric():
    end_time = datetime.datetime.now()
    # adding 65 seconds due amazon caracteristic
    end_time = end_time - datetime.timedelta(seconds=65)
    start_time = end_time - datetime.timedelta(seconds=args.interval)

    if args.verbose:
        debug = args.verbose
    else:
        debug = 0
    regions = boto.ec2.cloudwatch.regions()
    for reg in regions:
        if reg.name == args.region:
            cloudwatch = CloudWatchConnection(is_secure=True,
                                              debug=debug,
                                              region=reg)
    cloudwatch_result = None

    # Check if the metric has collected statistics. If it does not, say so
    metricsList = cloudwatch.list_metrics(dimensions=dimension,
                                          namespace=args.namespace)
    metricTest = 'Metric:' + args.metric
    strMetricsList = []
    for item in metricsList:
        strMetricsList.append(str(item))
    if metricTest in strMetricsList:
        # Specify the application load balancer as follows: app/load-balancer-name/1234567890123456 (the final portion of the load balancer ARN)
        #tested metrics for ALB: TargetResponseTime(Average),RequestCount(Sum),ActiveConnectionCount(Sum),NewConnectionCount(Sum),HTTPCode_Target_4XX_Count(Sum),HTTPCode_Target_5XX_Count(Sum),HealthyHostCount(Average)
        cloudwatch_result = cloudwatch.get_metric_statistics(
            args.interval,
            start_time,
            end_time,
            args.metric,
            args.namespace,
            statistics=args.statistic,
            dimensions=dimension)
        if len(cloudwatch_result) > 0:
            cloudwatch_result = cloudwatch_result[0]
            if len(cloudwatch_result) > 0:
                if len(repr(cloudwatch_result[args.statistic])) > 6:
                    cloudwatch_result = long(cloudwatch_result[args.statistic])
                else:
                    cloudwatch_result = float(
                        cloudwatch_result[args.statistic])
        else:
            # Assuming value is 0 if AWS returned empty list
            cloudwatch_result = 0
        print cloudwatch_result
    else:
        print 'Unsupported Metric'
    return
Beispiel #5
0
def get_cloudwatch_top_metrics():
    conn = CloudWatchConnection()

    metrics_names = []
    next_token = None
    while True:
        res = conn.list_metrics(next_token=next_token,
                                dimensions=settings.CLOUDWATCH_DIMENSIONS,
                                namespace=settings.CLOUDWATCH_NAMESPACE)
        metrics_names.extend([m.name for m in res])
        next_token = res.next_token
        if next_token is None:
            break

    # List of tuples like [(metric_name, count), ...]
    metrics = []

    for metric_name in metrics_names:
        res = conn.get_metric_statistics(int(START_DELTA_AGO.total_seconds()),
                                         datetime.datetime.now() - START_DELTA_AGO,
                                         datetime.datetime.now(),
                                         metric_name,
                                         settings.CLOUDWATCH_NAMESPACE,
                                         'Sum',
                                         settings.CLOUDWATCH_DIMENSIONS,
                                         'Count')

        if not res:
            # Some metrics will not have (or no longer have) results
            continue

        count = int(res[0]['Sum'])

        if count >= TOP_THRESHOLD_COUNT:
            metrics.append((metric_name, count))

    metrics.sort(key=lambda x: x[1], reverse=True)

    text = 'Responses sent\n----------------------\n'
    for metric in metrics:
        metric_name = 'TOTAL' if metric[0] == settings.CLOUDWATCH_TOTAL_SENT_METRIC_NAME else metric[0]
        if metric_name == settings.CLOUDWATCH_PROCESSING_TIME_METRIC_NAME:
            continue
        text += '%s %s\n' % (str(metric[1]).rjust(5), metric_name)

    return text
Beispiel #6
0
def get_cloudwatch_top_metrics():
    conn = CloudWatchConnection()

    metrics_names = []
    next_token = None
    while True:
        res = conn.list_metrics(next_token=next_token,
                                dimensions=settings.CLOUDWATCH_DIMENSIONS,
                                namespace=settings.CLOUDWATCH_NAMESPACE)
        metrics_names.extend([m.name for m in res])
        next_token = res.next_token
        if next_token is None:
            break

    # List of tuples like [(metric_name, count), ...]
    metrics = []

    for metric_name in metrics_names:
        res = conn.get_metric_statistics(int(START_DELTA_AGO.total_seconds()),
                                         datetime.datetime.now() - START_DELTA_AGO,
                                         datetime.datetime.now(),
                                         metric_name,
                                         settings.CLOUDWATCH_NAMESPACE,
                                         'Sum',
                                         settings.CLOUDWATCH_DIMENSIONS,
                                         'Count')

        if not res:
            # Some metrics will not have (or no longer have) results
            continue

        count = int(res[0]['Sum'])

        if count >= TOP_THRESHOLD_COUNT:
            metrics.append((metric_name, count))

    metrics.sort(key=lambda x: x[1], reverse=True)

    text = 'Responses sent\n----------------------\n'
    for metric in metrics:
        metric_name = 'TOTAL' if metric[0] == settings.CLOUDWATCH_TOTAL_SENT_METRIC_NAME else metric[0]
        if metric_name == settings.CLOUDWATCH_PROCESSING_TIME_METRIC_NAME:
            continue
        text += '%s %s\n' % (str(metric[1]).rjust(5), metric_name)

    return text
class BotoWatchInterface(WatchInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        #boto.set_stream_logger('foo')
        path='/services/CloudWatch'
        port=8773
        if clc_host[len(clc_host)-13:] == 'amazonaws.com':
            clc_host = clc_host.replace('ec2', 'monitoring', 1)
            path = '/'
            reg = None
            port=443
        reg = RegionInfo(name='eucalyptus', endpoint=clc_host)
        if boto.__version__ < '2.6':
            self.conn = CloudWatchConnection(access_id, secret_key, region=reg,
                                  port=port, path=path,
                                  is_secure=True, security_token=token, debug=0)
        else:
            self.conn = CloudWatchConnection(access_id, secret_key, region=reg,
                                  port=port, path=path, validate_certs=False,
                                  is_secure=True, security_token=token, debug=0)
        self.conn.http_connection_kwargs['timeout'] = 30

    def __save_json__(self, obj, name):
        f = open(name, 'w')
        json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2)
        f.close()

    def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit):
        obj = self.conn.get_metric_statistics(period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Statistics.json")
        return obj

    def list_metrics(self, next_token, dimensions, metric_name, namespace):
        obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Metrics.json")
        return obj

    def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics):
        return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics)
class BotoWatchInterface(WatchInterface):
    conn = None
    saveclcdata = False

    def __init__(self, clc_host, access_id, secret_key, token):
        # boto.set_stream_logger('foo')
        path = "/services/CloudWatch"
        port = 8773
        if clc_host[len(clc_host) - 13 :] == "amazonaws.com":
            clc_host = clc_host.replace("ec2", "monitoring", 1)
            path = "/"
            reg = None
            port = 443
        reg = RegionInfo(name="eucalyptus", endpoint=clc_host)
        self.conn = CloudWatchConnection(
            access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0
        )
        self.conn.https_validate_certificates = False
        self.conn.http_connection_kwargs["timeout"] = 30

    def __save_json__(self, obj, name):
        f = open(name, "w")
        json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2)
        f.close()

    def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit):
        obj = self.conn.get_metric_statistics(
            period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit
        )
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Statistics.json")
        return obj

    def list_metrics(self, next_token=None, dimensions=None, metric_name=None, namespace=None):
        obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace)
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Metrics.json")
        return obj

    def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics):
        return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics)

    def describe_alarms(
        self,
        action_prefix=None,
        alarm_name_prefix=None,
        alarm_names=None,
        max_records=None,
        state_value=None,
        next_token=None,
    ):
        obj = self.conn.describe_alarms(
            action_prefix, alarm_name_prefix, alarm_names, max_records, state_value, next_token
        )
        if self.saveclcdata:
            self.__save_json__(obj, "mockdata/CW_Alarms.json")
        return obj

    def delete_alarms(self, alarm_names):
        return self.conn.delete_alarms(alarm_names)

    def enable_alarm_actions(self, alarm_names):
        return self.conn.enable_alarm_actions(alarm_names)

    def disable_alarm_actions(self, alarm_names):
        return self.conn.disable_alarm_actions(alarm_names)

    def put_metric_alarm(self, alarm):
        return self.conn.put_metric_alarm(alarm)
Beispiel #9
0
class Monitor:
	def __init__(self, key, access):
		try:
			url = "http://169.254.169.254/latest/"

			self.userdata = json.load(urlopen(url + "user-data/"))
			public_hostname = urlopen(url + "meta-data/public-hostname/").read()
			zone = urlopen(url + "meta-data/placement/availability-zone/").read()
			region = zone[:-1]
		except:
			sys.exit("We should be getting user-data here...")

		# the name (and identity) of the cluster (the master)
		self.cluster = self.userdata['cluster']
		self.name = "{0}.{1}".format(self.userdata['name'], self.cluster)

		endpoint = "monitoring.{0}.amazonaws.com".format(region)
		region_info = RegionInfo(name=region, endpoint=endpoint)

		self.cloudwatch = CloudWatchConnection(key, access, region=region_info)
		self.namespace = '9apps/postgres'

		self.connection = psycopg2.connect(host=settings.host,
								port=5432,
								dbname=settings.database_name,
								user=settings.database_user,
								password=settings.database_password)

		# now, the non-system database connections
		self.databases = []
		try:
			database_cursor = self.connection.cursor()

			database_cursor.execute("select datname from pg_stat_database where datname !~ '(template[0-9]+|root|postgres)'")
			for database in database_cursor:
				self.databases.append([database[0],
								psycopg2.connect(host=settings.host, port=5432,
								dbname=database[0], user=settings.database_user,
								password=settings.database_password)])
		finally:
			database_cursor.close()

		self.pgbouncer = psycopg2.connect(host=settings.host,
								port=6432,
								dbname='pgbouncer',
								user=settings.database_user,
								password=settings.database_password)
		# without this it doesn't work
		self.pgbouncer.set_isolation_level(0)

	def __del__(self):
		self.connection.close()

	def is_in_recovery(self):
		self.connection.autocommit = True

		try:
			cur = self.connection.cursor()

			cur.execute("select pg_is_in_recovery()")
			in_recovery = cur.fetchone()[0]
		finally:
			cur.close()

		return in_recovery == True

	def collect(self, monitoring = 'on'):
		if monitoring not in ['on', 'all']:
			return [[], [], [], {}]

		now = datetime.now()

		names = []
		values = []
		units = []
		dimensions = { 'name' : self.name,
					'cluster' : self.cluster }

		if 'master' in self.userdata:
			[offset, receive_offset, replay_offset] = self._get_standby_lag()

			if receive_offset != None:
				names.append('receive_lag')
				values.append(int(offset - receive_offset))
				units.append('Bytes')

			if replay_offset != None:
				names.append('replay_lag')
				values.append(int(offset - replay_offset))
				units.append('Bytes')

		for database in self.databases:
			for relation in ["heap", "idx"]:
				[read, hit, hitratio] = self._get_hitratio(database[1], relation)

				names.append("{0}_{1}_read".format(database[0], relation))
				values.append(int(read))
				units.append("Count")

				names.append("{0}_{1}_hit".format(database[0], relation))
				values.append(int(hit))
				units.append("Count")

				if hitratio != None:
					names.append("{0}_{1}_hitratio".format(database[0], relation))
					values.append(float(hitratio * 100))
					units.append("Percent")

			conflicts = self._get_conflicts(database[0])
			names.append("{0}_{1}".format(database[0], 'confl_tablespace'))
			values.append(int(conflicts[0]))
			units.append("Count")

			names.append("{0}_{1}".format(database[0], 'confl_lock'))
			values.append(int(conflicts[1]))
			units.append("Count")

			names.append("{0}_{1}".format(database[0], 'confl_snapshot'))
			values.append(int(conflicts[2]))
			units.append("Count")

			names.append("{0}_{1}".format(database[0], 'confl_bufferpin'))
			values.append(int(conflicts[3]))
			units.append("Count")

			names.append("{0}_{1}".format(database[0], 'confl_deadlock'))
			values.append(int(conflicts[4]))
			units.append("Count")

			indexes_size = self._get_indexes_size(database[1])
			names.append("{0}_indexes_size".format(database[0]))
			values.append(int(indexes_size))
			units.append("Bytes")

			tables_size = self._get_tables_size(database[1])
			names.append("{0}_tables_size".format(database[0]))
			values.append(int(tables_size))
			units.append("Bytes")

		# nr of wal files
		size = self._get_nr_wal_files()
		names.append("wal_files")
		values.append(int(size))
		units.append("Count")

		# pgbouncer stats
		stats = self._get_pgbouncer_stats()
		names.append("pgbouncer_avg_req")
		values.append(int(stats[0]))
		units.append("Count/Second")

		names.append("pgbouncer_avg_recv")
		values.append(int(stats[1]))
		units.append("Bytes/Second")

		names.append("pgbouncer_avg_sent")
		values.append(int(stats[2]))
		units.append("Bytes/Second")

		names.append("pgbouncer_avg_query")
		values.append(float(stats[3] / 1000000))
		units.append("Seconds")

		# pgbouncer pools
		pools = self._get_pgbouncer_pools()
		names.append("pgbouncer_cl_active")
		values.append(float(pools[0]))
		units.append("Count")

		names.append("pgbouncer_cl_waiting")
		values.append(float(pools[1]))
		units.append("Count")

		names.append("pgbouncer_sv_active")
		values.append(float(pools[2]))
		units.append("Count")

		names.append("pgbouncer_sv_idle")
		values.append(float(pools[3]))
		units.append("Count")

		names.append("pgbouncer_sv_used")
		values.append(float(pools[4]))
		units.append("Count")

		names.append("pgbouncer_sv_tested")
		values.append(float(pools[5]))
		units.append("Count")

		names.append("pgbouncer_sv_login")
		values.append(float(pools[6]))
		units.append("Count")

		names.append("pgbouncer_maxwait")
		values.append(float(pools[7]))
		units.append("Count")

		return [names, values, units, dimensions]

	def put(self):
		result = False
		try:
			# only monitor if we are told to (this will break, if not set)
			monitoring = self.userdata['monitoring']
		except:
			monitoring = 'on'

		if monitoring in ['on', 'all']:
			# first get all we need
			[names, values, units, dimensions] = self.collect(monitoring)
			while len(names) > 0:
				names20 = names[:20]
				values20 = values[:20]
				units20 = units[:20]

				# we can't send all at once, only 20 at a time
				# first aggregated over all
				result = self.cloudwatch.put_metric_data(self.namespace,
								names20, value=values20, unit=units20)
				for dimension in dimensions:
					dimension = { dimension : dimensions[dimension] }
					result &= self.cloudwatch.put_metric_data(
								self.namespace, names20, value=values20,
								unit=units20, dimensions=dimension)

				del names[:20]
				del values[:20]
				del units[:20]
		else:
			print "we are not monitoring"

		return result
	
	def metrics(self):
		return self.cloudwatch.list_metrics()

	def _get_nr_wal_files(self):
		try:
			cursor = self.connection.cursor()

			sql = "select count(name) from (select pg_ls_dir('pg_xlog') as name) as xlogs where name != 'archive_status'"
			cursor.execute(sql)
			
			[size] = cursor.fetchone()
		finally:
			cursor.close()

		return size

	def _get_tables_size(self, connection):
		try:
			cursor = connection.cursor()

			sql = "select sum(pg_relation_size(relid)) from pg_stat_user_tables"
			cursor.execute(sql)
			
			[size] = cursor.fetchone()
		finally:
			cursor.close()

		return size

	def _get_indexes_size(self, connection):
		try:
			cursor = connection.cursor()

			sql = "select sum(pg_relation_size(indexrelid)) from pg_stat_user_indexes"
			cursor.execute(sql)
			
			[size] = cursor.fetchone()
		finally:
			cursor.close()

		return size

	def _get_conflicts(self, database):
		try:
			cursor = self.connection.cursor()

			sql = "select * from pg_stat_database_conflicts where datname = '{0}'".format(database)
			cursor.execute(sql)

			conflicts = cursor.fetchone()
		finally:
			cursor.close()

		return [conflicts[2], conflicts[3], conflicts[4], 
				conflicts[5], conflicts[6]] 

	def _get_hitratio(self, connection, relation="heap"):
		if relation == "heap":
			table = "tables"
		else:
			table = "indexes"

		try:
			cursor = connection.cursor()

			sql = "select sum({0}_blks_read) as read, sum({0}_blks_hit) as hit, (sum({0}_blks_hit) - sum({0}_blks_read)) / nullif(sum({0}_blks_hit),0) as hitratio from pg_statio_user_{1}".format(relation, table)
			cursor.execute(sql)
			
			[read, hit, hitratio] = cursor.fetchone()
		finally:
			cursor.close()

		return [read, hit, hitratio]

	def _get_standby_lag(self):
		try:
			master = psycopg2.connect(host=self.userdata['master'],
							dbname=settings.database_name,
							user=settings.database_user,
							password=settings.database_password)

			master.autocommit = True
			try:
				cursor = master.cursor()
				cursor.execute( "SELECT pg_current_xlog_location() AS location")
				[x, y] = (cursor.fetchone()[0]).split('/')
				offset = (int('ff000000', 16) * int(x, 16)) + int(y, 16)
			finally:
				cursor.close()

			try:
				cursor = self.connection.cursor()

				cursor.execute( "SELECT pg_last_xlog_receive_location(), pg_last_xlog_replay_location()")
				one = cursor.fetchone()
				
				try:
					[x, y] = (one[0]).split('/')
					receive_offset = (int('ff000000', 16) * int(x, 16)) + int(y, 16)
				except:
					receive_offset = None
				
				try:
					[x, y] = (one[1]).split('/')
					replay_offset = (int('ff000000', 16) * int(x, 16)) + int(y, 16)
				except:
					replay_offset = None
			finally:
				cursor.close()
		finally:
			master.close()

		return [offset, receive_offset, replay_offset]

	def _get_pgbouncer_stats(self):
		try:
			cursor = self.pgbouncer.cursor()
			cursor.execute('show stats')

			# ('pgbouncer\x00', 119L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
			[name, total_requests, total_received,
				total_sent, total_query_time, avg_req,
				avg_recv, avg_sent, avg_query] = cursor.fetchone()
		finally:
			cursor.close()

		return [avg_req, avg_recv, avg_sent, avg_query]

	def _get_pgbouncer_pools(self):
		cl_active = cl_waiting = sv_active = sv_idle = 0
		sv_used = sv_tested = sv_login = maxwait = 0
		try:
			cursor = self.pgbouncer.cursor()
			cursor.execute('show pools')

			# ('pgbouncer\x00', 'pgbouncer\x00', 1, 0, 0, 0, 0, 0, 0, 0)
			for pool in cursor:
				cl_active += pool[2]
				cl_waiting += pool[3]
				sv_active += pool[4]
				sv_idle += pool[5]
				sv_used += pool[6]
				sv_tested += pool[7]
				sv_login += pool[8]
				maxwait = max(maxwait, pool[9])
		finally:
			cursor.close()

		return [cl_active, cl_waiting, sv_active, sv_idle,
					sv_used, sv_tested, sv_login, maxwait]
Beispiel #10
0
class Monitor:
	def __init__(self, key, access, cluster):
		try:
			url = "http://169.254.169.254/latest/"

			self.userdata = json.load(urlopen(url + "user-data/"))
			public_hostname = urlopen(url + "meta-data/public-hostname/").read()
			zone = urlopen(url + "meta-data/placement/availability-zone/").read()
			region = zone[:-1]
		except:
			sys.exit("We should be getting user-data here...")

		# the name (and identity) of the cluster (the master)
		self.cluster = cluster

		self.redis = redis.StrictRedis(host='localhost', port=6379)

		endpoint = "monitoring.{0}.amazonaws.com".format(region)
		region_info = RegionInfo(name=region, endpoint=endpoint)

		self.cloudwatch = CloudWatchConnection(key, access, region=region_info)
		self.namespace = '9apps/redis'

		# get the host, but without the logging
		self.node = public_hostname

	def collect(self, monitoring = 'on'):
		if monitoring not in ['on', 'all']:
			return [[], [], [], {}]

		now = datetime.now()

		items = self.redis.info()

		names = []
		values = []
		units = []
		dimensions = { 'node' : self.node,
					'cluster' : self.cluster }

		slowlog_len = self.redis.execute_command('SLOWLOG','LEN')
		names.append('slowlog_len')
		values.append(slowlog_len)
		units.append('Count')

		if items['aof_enabled']:
			names.append('bgrewriteaof_in_progress')
			values.append(items['bgrewriteaof_in_progress'])
			units.append('Count')

			names.append('aof_pending_bio_fsync')
			values.append(items['aof_pending_bio_fsync'])
			units.append('Count')

			names.append('aof_buffer_length')
			values.append(items['aof_buffer_length'])
			units.append('Count')

			names.append('aof_current_size')
			values.append(items['aof_current_size'])
			units.append('Bytes')

			names.append('aof_pending_rewrite')
			values.append(items['aof_pending_rewrite'])
			units.append('Count')

			names.append('aof_base_size')
			values.append(items['aof_base_size'])
			units.append('Bytes')

		# master/slave
		names.append(items['role'])
		values.append(1)
		units.append('Count')

		for item in items:
			if item >= 'db0' and item < 'dc':
				names.append("{0}_keys".format(item))
				values.append(items[item]['keys'])
				units.append('Count')

				names.append("{0}_expires".format(item))
				values.append(items[item]['expires'])
				units.append('Count')

				# and now add some info on the keys, if we want
				if monitoring == 'all':
					nr = item.lstrip('db')
					db = redis.StrictRedis(host='localhost', port=6379, db=nr)
					keys = db.keys('*')
					for key in keys:
						key_type = db.type(key)
						key = key.replace( '.', '_')

						if key_type == "list":
							llen = db.llen(key)
							names.append("{0}_{1}_llen".format(item, key))
							values.append(llen)
							units.append('Count')
						elif key_type == "hash":
							hlen = db.hlen(key)
							names.append("{0}_{1}_hlen".format(item, key))
							values.append(hlen)
							units.append('Count')
						elif key_type == "set":
							scard = db.scard(key)
							names.append("{0}_{1}_scard".format(item, key))
							values.append(scard)
							units.append('Count')
						elif key_type == "zset":
							zcard = db.zcard(key)
							names.append("{0}_{1}_zcard".format(item, key))
							values.append(zcard)
							units.append('Count')
						elif key_type == "string":
							strlen = db.strlen(key)
							names.append("{0}_{1}_strlen".format(item, key))
							values.append(strlen)
							units.append('Count')

		# pub/sub
		names.append('pubsub_channels')
		values.append(items['pubsub_channels'])
		units.append('Count')

		names.append('pubsub_patterns')
		values.append(items['pubsub_patterns'])
		units.append('Count')

		# memory
		names.append('used_memory')
		values.append(items['used_memory'])
		units.append('Bytes')

		names.append('used_memory_peak')
		values.append(items['used_memory_peak'])
		units.append('Bytes')

		names.append('used_memory_rss')
		values.append(items['used_memory_rss'])
		units.append('Bytes')

		names.append('mem_fragmentation_ratio')
		values.append(items['mem_fragmentation_ratio'])
		units.append('None')

		names.append('connected_slaves')
		values.append(items['connected_slaves'])
		units.append('Count')

		#
		names.append('loading')
		values.append(items['loading'])
		units.append('Count')

		names.append('bgsave_in_progress')
		values.append(items['bgsave_in_progress'])
		units.append('Count')

		# clients
		names.append('connected_clients')
		values.append(items['connected_clients'])
		units.append('Count')

		names.append('blocked_clients')
		values.append(items['blocked_clients'])
		units.append('Count')

		# connection/command totals
		#names.append('total_connections_received')
		#values.append(items['total_connections_received'])
		#units.append('Count')

		#names.append('total_commands_processed')
		#values.append(items['total_commands_processed'])
		#units.append('Count')

		# client input/output
		names.append('client_biggest_input_buf')
		values.append(items['client_biggest_input_buf'])
		units.append('Bytes')

		names.append('client_longest_output_list')
		values.append(items['client_longest_output_list'])
		units.append('Bytes')

		# keys
		names.append('expired_keys')
		values.append(items['expired_keys'])
		units.append('Count')

		names.append('evicted_keys')
		values.append(items['evicted_keys'])
		units.append('Count')

		# last_save
		names.append('changes_since_last_save')
		values.append(items['changes_since_last_save'])
		units.append('Count')

		# keyspace
		#names.append('keyspace_misses')
		#values.append(items['keyspace_misses'])
		#units.append('Count')

		#names.append('keyspace_hits')
		#values.append(items['keyspace_hits'])
		#units.append('Count')

		return [names, values, units, dimensions]

	def put(self):
		result = False
		try:
			# only monitor if we are told to (this will break, if not set)
			monitoring = self.userdata['monitoring']
		except:
			monitoring = 'on'

		if monitoring in ['on', 'all']:
			# first get all we need
			[names, values, units, dimensions] = self.collect(monitoring)
			print [names, values, units, dimensions]
			while len(names) > 0:
				names20 = names[:20]
				values20 = values[:20]
				units20 = units[:20]

				# we can't send all at once, only 20 at a time
				# first aggregated over all
				result = self.cloudwatch.put_metric_data(self.namespace,
								names20, value=values20, unit=units20)
				for dimension in dimensions:
					dimension = { dimension : dimensions[dimension] }
					result &= self.cloudwatch.put_metric_data(
								self.namespace, names20, value=values20,
								unit=units20, dimensions=dimension)

				del names[:20]
				del values[:20]
				del units[:20]
		else:
			print "we are not monitoring"

		return result
	
	def metrics(self):
		return self.cloudwatch.list_metrics()
Beispiel #11
0
class Monitor:
	def __init__(self, key, access, cluster):
		try:
			url = "http://169.254.169.254/latest/meta-data/"

			public_hostname = urlopen(url + "public-hostname").read()
			zone = urlopen(url + "placement/availability-zone").read()
			region = zone[:-1]
		except:
			sys.exit("We should be getting user-data here...")

		# the name (and identity) of the cluster (the master)
		self.cluster = cluster

		self.redis = redis.StrictRedis(host='localhost', port=6379)

		endpoint = "monitoring.{0}.amazonaws.com".format(region)
		region_info = RegionInfo(name=region, endpoint=endpoint)

		self.cloudwatch = CloudWatchConnection(key, access, region=region_info)
		self.namespace = '9apps/redis'

		self.events = Events(key, access, cluster)

		# get the host, but without the logging
		self.host = Host(cluster)
		self.node = self.host.get_node()

	def __log(self, message, logging='warning'):
		self.events.log(self.node, 'Monitor', message, logging)

	def collect(self):
		self.__log('collecting metrics data from Redis INFO', 'info')
		now = datetime.now()

		items = self.redis.info()

		names = []
		values = []
		units = []
		dimensions = { 'node' : self.node,
					'cluster' : self.cluster }

		if items['aof_enabled']:
			self.__log('aof enabled: getting metrics data for the AOF', 'info')
			names.append('bgrewriteaof_in_progress')
			values.append(items['bgrewriteaof_in_progress'])
			units.append('Count')

			names.append('aof_pending_bio_fsync')
			values.append(items['aof_pending_bio_fsync'])
			units.append('Count')

			names.append('aof_buffer_length')
			values.append(items['aof_buffer_length'])
			units.append('Count')

			names.append('aof_current_size')
			values.append(items['aof_current_size'])
			units.append('Bytes')

			names.append('aof_pending_rewrite')
			values.append(items['aof_pending_rewrite'])
			units.append('Count')

			names.append('aof_base_size')
			values.append(items['aof_base_size'])
			units.append('Bytes')

		# master/slave
		names.append(items['role'])
		values.append(1)
		units.append('Count')

		for item in items:
			if item >= 'db0' and item < 'dc':
				self.__log('adding metrics data for database: {0}'.format(item), 'info')
				names.append("{0}_keys".format(item))
				values.append(items[item]['keys'])
				units.append('Count')

				names.append("{0}_expires".format(item))
				values.append(items[item]['expires'])
				units.append('Count')

				# and now add some info on the keys
				nr = item.lstrip('db')
				db = redis.StrictRedis(host='localhost', port=6379, db=nr)
				keys = db.keys('*')
				for key in keys:
					key = key.split('.')[-1]
					key_type = db.type(key)

					if key_type == "list":
						llen = db.llen(key)
						names.append("{0}_{1}_llen".format(item, key))
						values.append(llen)
						units.append('Count')
					elif key_type == "hash":
						hlen = db.hlen(key)
						names.append("{0}_{1}_hlen".format(item, key))
						values.append(hlen)
						units.append('Count')
					elif key_type == "set":
						scard = db.scard(key)
						names.append("{0}_{1}_scard".format(item, key))
						values.append(scard)
						units.append('Count')
					elif key_type == "zset":
						zcard = db.zcard(key)
						names.append("{0}_{1}_zcard".format(item, key))
						values.append(zcard)
						units.append('Count')
					elif key_type == "string":
						strlen = db.strlen(key)
						names.append("{0}_{1}_strlen".format(item, key))
						values.append(strlen)
						units.append('Count')

		# pub/sub
		names.append('pubsub_channels')
		values.append(items['pubsub_channels'])
		units.append('Count')

		names.append('pubsub_patterns')
		values.append(items['pubsub_patterns'])
		units.append('Count')

		# memory
		names.append('used_memory')
		values.append(items['used_memory'])
		units.append('Bytes')

		names.append('used_memory_peak')
		values.append(items['used_memory_peak'])
		units.append('Bytes')

		names.append('used_memory_rss')
		values.append(items['used_memory_rss'])
		units.append('Bytes')

		names.append('mem_fragmentation_ratio')
		values.append(items['mem_fragmentation_ratio'])
		units.append('None')

		names.append('connected_slaves')
		values.append(items['connected_slaves'])
		units.append('Count')

		#
		names.append('loading')
		values.append(items['loading'])
		units.append('Count')

		names.append('bgsave_in_progress')
		values.append(items['bgsave_in_progress'])
		units.append('Count')

		# clients
		names.append('connected_clients')
		values.append(items['connected_clients'])
		units.append('Count')

		names.append('blocked_clients')
		values.append(items['blocked_clients'])
		units.append('Count')

		# connection/command totals
		names.append('total_connections_received')
		values.append(items['total_connections_received'])
		units.append('Count')

		names.append('total_commands_processed')
		values.append(items['total_commands_processed'])
		units.append('Count')

		# client input/output
		names.append('client_biggest_input_buf')
		values.append(items['client_biggest_input_buf'])
		units.append('Bytes')

		names.append('client_longest_output_list')
		values.append(items['client_longest_output_list'])
		units.append('Bytes')

		# keys
		names.append('expired_keys')
		values.append(items['expired_keys'])
		units.append('Count')

		names.append('evicted_keys')
		values.append(items['evicted_keys'])
		units.append('Count')

		# last_save
		names.append('changes_since_last_save')
		values.append(items['changes_since_last_save'])
		units.append('Count')

		# keyspace
		names.append('keyspace_misses')
		values.append(items['keyspace_misses'])
		units.append('Count')

		names.append('keyspace_hits')
		values.append(items['keyspace_hits'])
		units.append('Count')

		return [names, values, units, dimensions]

	def put(self):
		# first get all we need
		[names, values, units, dimensions] = self.collect()
		while len(names) > 0:
			names20 = names[:20]
			values20 = values[:20]
			units20 = units[:20]

			# we can't send all at once, only 20 at a time
			# first aggregated over all
			self.__log('put aggregated ReDiS metrics data', 'info')
			result = self.cloudwatch.put_metric_data(self.namespace,
									names20, value=values20, unit=units20)
			for dimension in dimensions:
				self.__log('put ReDiS metrics data for {0}'.format(dimensions[dimension]), 'info')
				dimension = { dimension : dimensions[dimension] }
				result &= self.cloudwatch.put_metric_data(self.namespace,
									names20, value=values20, unit=units20,
									dimensions=dimension)

			del names[:20]
			del values[:20]
			del units[:20]

		return result
	
	def metrics(self):
		return self.cloudwatch.list_metrics()