def test_get_metric_statistics(self): c = CloudWatchConnection() m = c.list_metrics()[0] end = datetime.datetime.now() start = end - datetime.timedelta(hours=24*14) c.get_metric_statistics( 3600*24, start, end, m.name, m.namespace, ['Average', 'Sum'])
def test_build_put_params_invalid(self): c = CloudWatchConnection() params = {} try: c.build_put_params(params, name=["N", "M"], value=[1, 2, 3]) except: pass else: self.fail("Should not accept lists of different lengths.")
def get_stats(key, secret, db_id, metric): end = datetime.now() start = end - timedelta(minutes=5) conn = CloudWatchConnection(key, secret) try: res = conn.get_metric_statistics(60, start, end, metric, "AWS/RDS", "Average", {"DBInstanceIdentifier": db_id}) except Exception, e: print(e) sys.exit(1)
def test_build_list_params(self): c = CloudWatchConnection() params = {} c.build_list_params( params, ['thing1', 'thing2', 'thing3'], 'ThingName%d') expected_params = { 'ThingName1': 'thing1', 'ThingName2': 'thing2', 'ThingName3': 'thing3' } self.assertEqual(params, expected_params)
def test_build_put_params_one(self): c = CloudWatchConnection() params = {} c.build_put_params(params, name="N", value=1, dimensions={"D": "V"}) expected_params = { 'MetricData.member.1.MetricName': 'N', 'MetricData.member.1.Value': 1, 'MetricData.member.1.Dimensions.member.1.Name': 'D', 'MetricData.member.1.Dimensions.member.1.Value': 'V', } self.assertEqual(params, expected_params)
def test_build_get_params_multiple_parameter_dimension1(self): self.maxDiff = None c = CloudWatchConnection() params = {} dimensions = OrderedDict((("D1", "V"), ("D2", "W"))) c.build_dimension_param(dimensions, params) expected_params = { 'Dimensions.member.1.Name': 'D1', 'Dimensions.member.1.Value': 'V', 'Dimensions.member.2.Name': 'D2', 'Dimensions.member.2.Value': 'W', } self.assertEqual(params, expected_params)
def AWSSendStatusSDK(service): """Send status to AWS using SDK pip install boto""" status = service[1] service_name = service[0] cwc = CloudWatchConnection(aws_access_key_id, \ aws_secret_access_key) if status: value = 1 else: value = 0 cwc.put_metric_data(namespace, name = service_name, value = str(value))
def test_build_put_params_multiple_metrics(self): c = CloudWatchConnection() params = {} c.build_put_params(params, name=["N", "M"], value=[1, 2], dimensions={"D": "V"}) expected_params = { "MetricData.member.1.MetricName": "N", "MetricData.member.1.Value": 1, "MetricData.member.1.Dimensions.member.1.Name": "D", "MetricData.member.1.Dimensions.member.1.Value": "V", "MetricData.member.2.MetricName": "M", "MetricData.member.2.Value": 2, "MetricData.member.2.Dimensions.member.1.Name": "D", "MetricData.member.2.Dimensions.member.1.Value": "V", } self.assertEqual(params, expected_params)
def test_build_put_params_multiple_dimensions(self): c = CloudWatchConnection() params = {} c.build_put_params(params, name="N", value=[1, 2], dimensions=[{"D": "V"}, {"D": "W"}]) expected_params = { 'MetricData.member.1.MetricName': 'N', 'MetricData.member.1.Value': 1, 'MetricData.member.1.Dimensions.member.1.Name': 'D', 'MetricData.member.1.Dimensions.member.1.Value': 'V', 'MetricData.member.2.MetricName': 'N', 'MetricData.member.2.Value': 2, 'MetricData.member.2.Dimensions.member.1.Name': 'D', 'MetricData.member.2.Dimensions.member.1.Value': 'W', } self.assertEqual(params, expected_params)
def put_cloudwatch_metric_data(name, value, unit, namespace, use_autoscaling_group=True): # TODO: Make this more efficient? There are some uses of this function that # call it multiple times in succession -- should there be a batch mode? dimensions = None if use_autoscaling_group: autoscaling_group = _get_autoscaling_group() dimensions = { 'AutoScalingGroupName': autoscaling_group } if autoscaling_group else None cloudwatch = CloudWatchConnection() cloudwatch.put_metric_data(namespace, name, value, unit=unit, dimensions=dimensions)
def __init__(self, key, access, cluster): try: url = "http://169.254.169.254/latest/meta-data/" public_hostname = urlopen(url + "public-hostname").read() zone = urlopen(url + "placement/availability-zone").read() region = zone[:-1] except: sys.exit("We should be getting user-data here...") # the name (and identity) of the cluster (the master) self.cluster = cluster self.redis = redis.StrictRedis(host='localhost', port=6379) endpoint = "monitoring.{0}.amazonaws.com".format(region) region_info = RegionInfo(name=region, endpoint=endpoint) self.cloudwatch = CloudWatchConnection(key, access, region=region_info) self.namespace = '9apps/redis' self.events = Events(key, access, cluster) # get the host, but without the logging self.host = Host(cluster) self.node = self.host.get_node()
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, is_secure=False, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, region=None, path='/', security_token=None, validate_certs=True): """ Init method to create a new connection to EC2 Load Balancing Service. note:: The region argument is overridden by the region specified in the boto configuration file. """ if not region: region = RegionInfo(self, self.DefaultRegionName, self.DefaultRegionEndpoint) self.region = region self.cw_con = CloudWatchConnection(aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) ELBConnection.__init__(self, aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs)
def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname
def test_build_get_params_multiple_parameter_dimension2(self): from collections import OrderedDict self.maxDiff = None c = CloudWatchConnection() params = {} dimensions = OrderedDict((("D1", ["V1", "V2"]), ("D2", "W"), ("D3", None))) c.build_dimension_param(dimensions, params) expected_params = { 'Dimensions.member.1.Name': 'D1', 'Dimensions.member.1.Value': 'V1', 'Dimensions.member.2.Name': 'D1', 'Dimensions.member.2.Value': 'V2', 'Dimensions.member.3.Name': 'D2', 'Dimensions.member.3.Value': 'W', 'Dimensions.member.4.Name': 'D3', } self.assertEqual(params, expected_params)
def get_cloudwatch_top_metrics(): conn = CloudWatchConnection() metrics_names = [] next_token = None while True: res = conn.list_metrics(next_token=next_token, dimensions=settings.CLOUDWATCH_DIMENSIONS, namespace=settings.CLOUDWATCH_NAMESPACE) metrics_names.extend([m.name for m in res]) next_token = res.next_token if next_token is None: break # List of tuples like [(metric_name, count), ...] metrics = [] for metric_name in metrics_names: res = conn.get_metric_statistics(int(START_DELTA_AGO.total_seconds()), datetime.datetime.now() - START_DELTA_AGO, datetime.datetime.now(), metric_name, settings.CLOUDWATCH_NAMESPACE, 'Sum', settings.CLOUDWATCH_DIMENSIONS, 'Count') if not res: # Some metrics will not have (or no longer have) results continue count = int(res[0]['Sum']) if count >= TOP_THRESHOLD_COUNT: metrics.append((metric_name, count)) metrics.sort(key=lambda x: x[1], reverse=True) text = 'Responses sent\n----------------------\n' for metric in metrics: metric_name = 'TOTAL' if metric[0] == settings.CLOUDWATCH_TOTAL_SENT_METRIC_NAME else metric[0] if metric_name == settings.CLOUDWATCH_PROCESSING_TIME_METRIC_NAME: continue text += '%s %s\n' % (str(metric[1]).rjust(5), metric_name) return text
def main(): queues = os.getenv('RABBITMQ_CLOUWATCH_QUEUES', '').split(',') aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID') aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY') cloudwatch_namespace = os.getenv( 'RABBITMQ_CLOUWATCH_NAMESPACE', 'rabbitmq_cloudwatch') if not queues or not queues[0]: raise RabbitmqCloudwatchException('Queues may not be empty') broker_url = os.getenv('RABBITMQ_HTTP_URL') if not broker_url: raise RabbitmqCloudwatchException('Invalid URL') broker_url = urlparse(broker_url) if not all([ broker_url.hostname, broker_url.username, broker_url.password]): raise RabbitmqCloudwatchException('Invalid URL') if not all([aws_access_key_id, aws_secret_access_key]): raise RabbitmqCloudwatchException('Invalid AWS Credentials') cwc = CloudWatchConnection(aws_access_key_id, aws_secret_access_key) for queue in queues: response = requests.get(broker_url.geturl() + queue) if response.status_code == 200: queue_messages = response.json()['messages'] print 'Queue {} currently has {} messages'.format( queue, queue_messages) cwc.put_metric_data(cloudwatch_namespace, queue, queue_messages) else: raise RabbitmqCloudwatchException( 'Unable to fetch queue {} from url: {}. ' 'Error: {}={}'.format( queue, broker_url.geturl() + queue, response.status_code, response.reason))
def test_build_put_params_multiple_parameter_dimension(self): self.maxDiff = None c = CloudWatchConnection() params = {} dimensions = [OrderedDict((("D1", "V"), ("D2", "W")))] c.build_put_params(params, name="N", value=[1], dimensions=dimensions) expected_params = { 'MetricData.member.1.MetricName': 'N', 'MetricData.member.1.Value': 1, 'MetricData.member.1.Dimensions.member.1.Name': 'D1', 'MetricData.member.1.Dimensions.member.1.Value': 'V', 'MetricData.member.1.Dimensions.member.2.Name': 'D2', 'MetricData.member.1.Dimensions.member.2.Value': 'W', } self.assertEqual(params, expected_params)
def test_describe_alarms(self): c = CloudWatchConnection() def make_request(*args, **kwargs): class Body(object): def __init__(self): self.status = 200 def read(self): return DESCRIBE_ALARMS_BODY return Body() c.make_request = make_request alarms = c.describe_alarms() self.assertEquals(alarms[0].name, 'FancyAlarm') self.assertEquals(alarms[0].comparison, '<') self.assertEquals(alarms[0].dimensions, {u'Job': [u'ANiceCronJob']}) self.assertEquals(alarms[1].name, 'SuperFancyAlarm') self.assertEquals(alarms[1].comparison, '>') self.assertEquals(alarms[1].dimensions, {u'Job': [u'ABadCronJob']})
def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path='/services/CloudWatch' port=8773 if clc_host[len(clc_host)-13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'monitoring', 1) path = '/' reg = None port=443 reg = RegionInfo(name='eucalyptus', endpoint=clc_host) if boto.__version__ < '2.6': self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) else: self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, validate_certs=False, is_secure=True, security_token=token, debug=0) self.conn.http_connection_kwargs['timeout'] = 30
class BotoWatchInterface(WatchInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): #boto.set_stream_logger('foo') path='/services/CloudWatch' port=8773 if clc_host[len(clc_host)-13:] == 'amazonaws.com': clc_host = clc_host.replace('ec2', 'monitoring', 1) path = '/' reg = None port=443 reg = RegionInfo(name='eucalyptus', endpoint=clc_host) if boto.__version__ < '2.6': self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0) else: self.conn = CloudWatchConnection(access_id, secret_key, region=reg, port=port, path=path, validate_certs=False, is_secure=True, security_token=token, debug=0) self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2) f.close() def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit): obj = self.conn.get_metric_statistics(period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Statistics.json") return obj def list_metrics(self, next_token, dimensions, metric_name, namespace): obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Metrics.json") return obj def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics): return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics)
def connect_cloudwatch(aws_access_key_id=None, aws_secret_access_key=None, **kwargs): """ :type aws_access_key_id: string :param aws_access_key_id: Your AWS Access Key ID :type aws_secret_access_key: string :param aws_secret_access_key: Your AWS Secret Access Key :rtype: :class:`boto.ec2.cloudwatch.CloudWatchConnection` :return: A connection to Amazon's EC2 Monitoring service """ from boto.ec2.cloudwatch import CloudWatchConnection return CloudWatchConnection(aws_access_key_id, aws_secret_access_key, **kwargs)
def set_endpoint(self, endpoint): #boto.set_stream_logger('foo') reg = RegionInfo(name='eucalyptus', endpoint=endpoint) path = '/services/CloudWatch' port = 8773 if endpoint[len(endpoint)-13:] == 'amazonaws.com': endpoint = endpoint.replace('ec2', 'monitoring', 1) path = '/' reg = RegionInfo(endpoint=endpoint) port = 443 self.conn = CloudWatchConnection(self.access_id, self.secret_key, region=reg, port=port, path=path, is_secure=True, security_token=self.token, debug=0) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs['timeout'] = 30
def __init__(self, clc_host, access_id, secret_key, token): # boto.set_stream_logger('foo') path = "/services/CloudWatch" port = 8773 if clc_host[len(clc_host) - 13 :] == "amazonaws.com": clc_host = clc_host.replace("ec2", "monitoring", 1) path = "/" reg = None port = 443 reg = RegionInfo(name="eucalyptus", endpoint=clc_host) self.conn = CloudWatchConnection( access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0 ) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs["timeout"] = 30
def __init__(self, args): """ Initializing basic variables needed for auto scaling """ self.configs = ConfigParser.RawConfigParser() self.args = args self.test_props = {} self.props = {} self.ec2_connection = EC2Connection(self.args.access_key, self.args.secret_key) self.autoscale_connection = AutoScaleConnection(self.args.access_key, self.args.secret_key) self.elb_connection = ELBConnection(self.args.access_key, self.args.secret_key) self.cw_connection = CloudWatchConnection(self.args.access_key, self.args.secret_key) self.firstInstance = None self.launchConfiguration = None self.healthCheck = None
def __init__(self, key, access): try: url = "http://169.254.169.254/latest/" self.userdata = json.load(urlopen(url + "user-data/")) public_hostname = urlopen(url + "meta-data/public-hostname/").read() zone = urlopen(url + "meta-data/placement/availability-zone/").read() region = zone[:-1] except: sys.exit("We should be getting user-data here...") # the name (and identity) of the cluster (the master) self.cluster = self.userdata['cluster'] self.name = "{0}.{1}".format(self.userdata['name'], self.cluster) endpoint = "monitoring.{0}.amazonaws.com".format(region) region_info = RegionInfo(name=region, endpoint=endpoint) self.cloudwatch = CloudWatchConnection(key, access, region=region_info) self.namespace = '9apps/postgres' self.connection = psycopg2.connect(host=settings.host, port=5432, dbname=settings.database_name, user=settings.database_user, password=settings.database_password) # now, the non-system database connections self.databases = [] try: database_cursor = self.connection.cursor() database_cursor.execute("select datname from pg_stat_database where datname !~ '(template[0-9]+|root|postgres)'") for database in database_cursor: self.databases.append([database[0], psycopg2.connect(host=settings.host, port=5432, dbname=database[0], user=settings.database_user, password=settings.database_password)]) finally: database_cursor.close() self.pgbouncer = psycopg2.connect(host=settings.host, port=6432, dbname='pgbouncer', user=settings.database_user, password=settings.database_password) # without this it doesn't work self.pgbouncer.set_isolation_level(0)
def connect_cloudwatch(aws_access_key_id=None, aws_secret_access_key=None, **kwargs): """ :type aws_access_key_id: string :param aws_access_key_id: Your AWS Access Key ID :type aws_secret_access_key: string :param aws_secret_access_key: Your AWS Secret Access Key :rtype: :class:`boto.ec2.cloudwatch.CloudWatchConnection` :return: A connection to Amazon's EC2 Monitoring service """ access_key, secret_key = get_govcloud_creds(aws_access_key_id, aws_secret_access_key) from boto.ec2.cloudwatch import CloudWatchConnection region = RegionInfo(name='govcloud', endpoint='monitoring.us-gov-west-1.amazonaws.com') return CloudWatchConnection(access_key, secret_key, region=region, **kwargs)
def cw_connect(region=None, *args, **kwargs): """Helper to connect to Amazon Web Services EC2, using identify provided by environment, as also optional region in arguments. """ if not os_environ.get("AWS_ACCESS_KEY_ID", None): raise EC2TemplateError( "Environment variable AWS_ACCESS_KEY_ID is not set.") if not os_environ.get("AWS_SECRET_ACCESS_KEY", None): raise EC2TemplateError( "Environment variable AWS_SECRET_ACCESS_KEY is not set.") if not region: region = env.get("ec2_region") for reg in boto.ec2.cloudwatch.regions(): if reg.name == region: region = reg connection = CloudWatchConnection(os_environ.get("AWS_ACCESS_KEY_ID"), os_environ.get("AWS_SECRET_ACCESS_KEY"), region=region) return connection
def create_AutoScaling(): print "Creating AutoScaling..." # establish connection as_conn = AutoScaleConnection(AWSAccessKeyId, AWSSecretKey) # create launch configuration global lc lc = LaunchConfiguration(name='lc', image_id=DATA_CEN_AMI, key_name=ACCESS_KEY, instance_monitoring=True, security_groups=[SECURITY_GRP], instance_type=MACHINE_TYPE) as_conn.create_launch_configuration(lc) # create tag for autoscaling group as_tag = Tag(key="Project", value="2.2", propagate_at_launch=True, resource_id='my_group') # create aotoscaling group global ag ag = AutoScalingGroup(group_name='my_group', load_balancers=['myELB'], availability_zones=['us-east-1a'], launch_config=lc, min_size=MIN_SIZE, max_size=MAX_SIZE, connection=as_conn, tags=[as_tag]) # associate the autoscaling group with launch configuration as_conn.create_auto_scaling_group(ag) # build the scale policy scale_up_policy = ScalingPolicy(name='scale_up', adjustment_type='ChangeInCapacity', as_name='my_group', scaling_adjustment=1, cooldown=60) scale_down_policy = ScalingPolicy(name='scale_down', adjustment_type='ChangeInCapacity', as_name='my_group', scaling_adjustment=-1, cooldown=60) # register the scale policy as_conn.create_scaling_policy(scale_up_policy) as_conn.create_scaling_policy(scale_down_policy) # refresh the scale policy for extra information scale_up_policy = as_conn.get_all_policies(as_group='my_group', policy_names=['scale_up'])[0] scale_down_policy = as_conn.get_all_policies(as_group='my_group', policy_names=['scale_down' ])[0] # create cloudwatch alarm cloudwatch = CloudWatchConnection(aws_access_key_id=AWSAccessKeyId, aws_secret_access_key=AWSSecretKey, is_secure=True) # region='us-east-1a') # assocate cloudwatch with alarm alarm_dimensions = {"AutoScalingGroupName": 'my_group'} # create scale up alarm scale_up_alarm = MetricAlarm(name='scale_up_on_cpu', namespace='AWS/EC2', metric='CPUUtilization', statistic='Average', comparison='>', threshold='50', period='60', evaluation_periods=2, alarm_actions=[scale_up_policy.policy_arn], dimensions=alarm_dimensions) cloudwatch.create_alarm(scale_up_alarm) # create scale down alarm scale_down_alarm = MetricAlarm( name='scale_down_on_cpu', namespace='AWS/EC2', metric='CPUUtilization', statistic='Average', comparison='<', threshold='20', period='60', evaluation_periods=1, alarm_actions=[scale_down_policy.policy_arn], dimensions=alarm_dimensions) cloudwatch.create_alarm(scale_down_alarm) print "AutoScaling created successfully"
def connect_cloudwatch(self): access_key = self.config['access_key'] secret_key = self.config['secret_key'] LOGGER.info('connecting to cloudwatch server') conn = CloudWatchConnection(access_key, secret_key) return conn
def __init__(self, environment, disco_sns=None, alarm_configs=None): self.cloudwatch = CloudWatchConnection() self.environment = environment self._disco_sns = disco_sns self._alarm_configs = alarm_configs
from boto.ec2.cloudwatch import CloudWatchConnection from boto.exception import NoAuthHandlerFound from cloudmetrics.backends import MetricsBackend # Limit defined by Amazon - don't change it unless they do. AWS_MAX_BATCH_SIZE = 10 # This relies on /etc/boto.cfg being configured. try: CLOUDWATCH_CONNECTION = CloudWatchConnection() except NoAuthHandlerFound: raise class CloudWatchMetricsBackend(MetricsBackend): # Send as many items as possible in each CloudWatch API call. BUFFER_SIZE = AWS_MAX_BATCH_SIZE # Set this per environment. ENVIRONMENT = 'production' def _get_dimensions(self): """ Create a dimensions dictionary. Use the hostname if "use_hostname" has been called. Differentiate between production, staging, dev, etc, because CloudWatch is a single pool of metric data. """
def create_cloudwatch_connection(key): return CloudWatchConnection(key.access_key_id, key.secret_key)
def __init__(self, application, configuration=None, resource=None): super(CloudWatch, self).__init__(application, configuration, resource) self.cloudwatch = CloudWatchConnection()
"value": None }, "ReadThroughput": { "type": "float", "value": None }, "FreeStorageSpace": { "type": "float", "value": None } } end = datetime.datetime.now() start = end - datetime.timedelta(minutes=5) conn = CloudWatchConnection(options.access_key, options.secret_key) for k, vh in metrics.items(): try: res = conn.get_metric_statistics( 60, start, end, k, "AWS/RDS", "Average", {"DBInstanceIdentifier": options.instance_id}) except Exception, e: print "status err Error running rds_stats: %s" % e.error_message sys.exit(1) average = res[-1]["Average"] # last item in result set if (k == "FreeStorageSpace" or k == "FreeableMemory"): average = average / 1024.0**3.0 if vh["type"] == "float": metrics[k]["value"] = "%.4f" % average if vh["type"] == "int": metrics[k]["value"] = "%i" % average
def test_put_metric_data(self): c = CloudWatchConnection() now = datetime.datetime.now() name, namespace = 'unit-test-metric', 'boto-unit-test' c.put_metric_data(namespace, name, 5, now, 'Bytes')
class EucaELBConnection(ELBConnection): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, is_secure=False, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, region=None, path='/', security_token=None, validate_certs=True): """ Init method to create a new connection to EC2 Load Balancing Service. note:: The region argument is overridden by the region specified in the boto configuration file. """ if not region: region = RegionInfo(self, self.DefaultRegionName, self.DefaultRegionEndpoint) self.region = region self.cw_con = CloudWatchConnection(aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) ELBConnection.__init__(self, aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) def put_cw_metric(self, servo_instance_id, metric): params = {'InstanceId':servo_instance_id} namespace = 'Servo' name = ['Latency','RequestCount','HTTPCode_ELB_4XX','HTTPCode_ELB_5XX','HTTPCode_Backend_2XX','HTTPCode_Backend_3XX','HTTPCode_Backend_4XX','HTTPCode_Backend_5XX'] value = [metric.Latency, metric.RequestCount, metric.HTTPCode_ELB_4XX, metric.HTTPCode_ELB_5XX, metric.HTTPCode_Backend_2XX, metric.HTTPCode_Backend_3XX, metric.HTTPCode_Backend_4XX, metric.HTTPCode_Backend_5XX] unit = ['Milliseconds','Count','Count','Count','Count','Count','Count','Count'] self.cw_con.build_put_params(params, name, value=value,timestamp=None, unit=unit, dimensions=None, statistics=None) return self.get_status('PutServoStates', params) def put_instance_health(self, servo_instance_id, instances): """ Test the internal loadbalancer vms """ params = {'InstanceId':servo_instance_id} if instances: self.build_list_params(params, instances, 'Instances.member.%d.InstanceId') return self.get_status('PutServoStates', params) def get_servo_load_balancers(self, servo_instance_id): #marker = "servo:%s" % servo_instance_id params = {"InstanceId": servo_instance_id} lbs = self.get_list('DescribeLoadBalancersByServo', params, [('member', LoadBalancer)]) for lb in lbs: instances = [] if lb.instances is not None and isinstance(lb.instances, Iterable): for inst in lb.instances: inst_id=str(inst.id) if inst_id.find(':')>=0: token = inst_id.split(':') inst_id=token[0] ipaddr=token[1] hostname_cache.register(inst_id, ipaddr) inst.id = inst_id return lbs
class AutoScale: def __init__(self, args): """ Initializing basic variables needed for auto scaling """ self.configs = ConfigParser.RawConfigParser() self.args = args self.test_props = {} self.props = {} self.ec2_connection = EC2Connection(self.args.access_key, self.args.secret_key) self.autoscale_connection = AutoScaleConnection( self.args.access_key, self.args.secret_key) self.elb_connection = ELBConnection(self.args.access_key, self.args.secret_key) self.cw_connection = CloudWatchConnection(self.args.access_key, self.args.secret_key) self.firstInstance = None self.launchConfiguration = None self.healthCheck = None def loadConfigs(self): """ FIX ME: Currently doesnt do anything This method will load the configurations from boto config file if present else will accept parameters passed by user. """ if os.path.isfile("/etc/boto.cfg"): self.configs.read("/etc/boto.cfg") conf = self.configs.sections() self.populateConfigs(conf) if os.path.isfile("~/.boto"): self.configs.read("~/.boto") conf = self.configs.sections() self.populateConfigs(conf) print ">>> Loaded configs" def populateConfigs(self, sections): for section in sections: self.boto_props[section] = self.configs.items(section) for item in self.boto_props[section]: key, value = item if not self.props.has_key(key): self.props[key] = value def createLaunchConfiguration(self, lc_name, ami_id, key_name): """ Creates launch configuration for the auto scaling cluster """ self.launchConfiguration = LaunchConfiguration(name=lc_name, image_id=ami_id, key_name=key_name) self.autoscale_connection.create_launch_configuration( self.launchConfiguration) print ">>> Created launch configuration: " + lc_name def createAutoScaleGroup(self, asg_name): """ Create a Auto scaling group for the auto scaling cluster """ autoScalingGroup = AutoScalingGroup( group_name=asg_name, load_balancers=[self.args.lb_name], launch_config=self.launchConfiguration, min_size=self.args.min_size, max_size=self.args.max_size, availability_zones=['us-east-1a']) self.autoscale_connection.create_auto_scaling_group(autoScalingGroup) print ">>> Created auto scaling group: " + asg_name def createTrigger(self, trigger_name, measure, asg_name): """ Trigger to spawn new instances as per specific metrics """ alarm_actions = [] dimensions = {"AutoScalingGroupName": asg_name} policies = self.autoscale_connection.get_all_policies( as_group=self.args.asg_name, policy_names=[self.args.asp_name]) for policy in policies: alarm_actions.append(policy.policy_arn) alarm = MetricAlarm(name=trigger_name, namespace="AWS/EC2", metric=measure, statistic="Average", comparison=">=", threshold=50, period=60, unit="Percent", evaluation_periods=2, alarm_actions=alarm_actions, dimensions=dimensions) self.cw_connection.create_alarm(alarm) print ">>> Created trigger: " + self.args.trigger def createAutoScalePolicy(self, asp_name): """ Creates a Auto scaling policy to Add/Remove a instance from auto scaling cluster """ self.autoScalingUpPolicy = ScalingPolicy( name=asp_name + '-up', adjustment_type="ChangeInCapacity", as_name=self.args.asg_name, scaling_adjustment=1, cooldown=180) self.autoScalingDownPolicy = ScalingPolicy( name=asp_name + '-down', adjustment_type="ChangeInCapacity", as_name=self.args.asg_name, scaling_adjustment=-1, cooldown=180) self.autoscale_connection.create_scaling_policy( self.autoScalingUpPolicy) self.autoscale_connection.create_scaling_policy( self.autoScalingDownPolicy) print ">>> Created auto scaling policy: " + asp_name def configureHealthCheck(self, target): """ Configures health check for the cluster """ self.healthCheck = HealthCheck(target=target, timeout=5) print ">>> Configured health check for: " + target def createLoadBalancer(self, lb_name, region, lb_port, instance_port, protocol): """ Creates a load balancer for cluster """ listener = (int(lb_port), int(instance_port), protocol) tuple_list = [] tuple_list.append(listener) lbs = self.elb_connection.get_all_load_balancers() for lb in lbs: if lb.name != lb_name: self.elb_connection.create_load_balancer( lb_name, [region], tuple_list) self.elb_connection.configure_health_check( name=lb_name, health_check=self.healthCheck) print ">>> Created load balancer: " + lb_name else: print "Load balancer with name '" + lb_name + "' already exists" def startInstance(self, image_id, key_name, region, instance_type): """ Starts the first instance which will be serving requests irrespective of auto scaling instances. """ reservation = self.ec2_connection.run_instances( image_id=image_id, min_count=1, max_count=1, placement=region, key_name=key_name, instance_type=instance_type) # for instance in reservation.instances: # instance.add_tag('node', '0') # break self.firstInstance = reservation.instances[0].id.split('\'')[0] print ">>> Started instance: ", self.firstInstance def registerInstanceToELB(self, lb_name): """ Register the first instance started to the Elastic Load Balancer. """ self.elb_connection.register_instances(load_balancer_name=lb_name, instances=[self.firstInstance]) print ">>> Registered instance '", self.firstInstance, "' to load balancer '" + lb_name + "'" def setUp(self): """ Set's up the auto scaling for the application """ # STEP 1: Load the configurations self.loadConfigs() # STEP 2: Configure the health check for the instances self.configureHealthCheck(self.args.lb_target) # STEP 3: Create a load balancer self.createLoadBalancer(self.args.lb_name, self.args.region, self.args.lb_port, self.args.instance_port, self.args.protocol) # STEP 4: Start the first instance self.startInstance(self.args.ami_id, self.args.key_name, self.args.region, self.args.instance_type) # STEP 5: Register the instance to the load balancer created in STEP 4 self.registerInstanceToELB(self.args.lb_name) # STEP 6: Create launch configuration to launch instances by auto scale self.createLaunchConfiguration(self.args.lc_name, self.args.ami_id, self.args.key_name) # STEP 7: Create a auto scale group which will manage the instances started by auto scaling self.createAutoScaleGroup(self.args.asg_name) # STEP 8: Create a auto scaling policy to say add/remove a node self.createAutoScalePolicy(self.args.asp_name) # STEP 9: Create a trigger, so that auto scaling can trigger it to start # or remove a instance from auto scaling group self.createTrigger(self.args.trigger, self.args.measure, self.args.asg_name)
def __init__(self, aws_access_key, aws_secret_key): self.ec2_conn = EC2Connection(aws_access_key, aws_secret_key) self.elb_conn = ELBConnection(aws_access_key, aws_secret_key) self.auto_scale_conn = AutoScaleConnection(aws_access_key, aws_secret_key) self.cloud_watch_conn = CloudWatchConnection(aws_access_key, aws_secret_key) self.default_cooldown = 60
class EucaELBConnection(ELBConnection): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, is_secure=False, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, region=None, path='/', security_token=None, validate_certs=True): """ Init method to create a new connection to EC2 Load Balancing Service. note:: The region argument is overridden by the region specified in the boto configuration file. """ if not region: region = RegionInfo(self, self.DefaultRegionName, self.DefaultRegionEndpoint) self.region = region self.cw_con = CloudWatchConnection(aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) ELBConnection.__init__(self, aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) def put_cw_metric(self, servo_instance_id, metric): params = {'InstanceId': servo_instance_id} namespace = 'Servo' name = [ 'Latency', 'RequestCount', 'HTTPCode_ELB_4XX', 'HTTPCode_ELB_5XX', 'HTTPCode_Backend_2XX', 'HTTPCode_Backend_3XX', 'HTTPCode_Backend_4XX', 'HTTPCode_Backend_5XX' ] value = [ metric.Latency, metric.RequestCount, metric.HTTPCode_ELB_4XX, metric.HTTPCode_ELB_5XX, metric.HTTPCode_Backend_2XX, metric.HTTPCode_Backend_3XX, metric.HTTPCode_Backend_4XX, metric.HTTPCode_Backend_5XX ] unit = [ 'Milliseconds', 'Count', 'Count', 'Count', 'Count', 'Count', 'Count', 'Count' ] self.cw_con.build_put_params(params, name, value=value, timestamp=None, unit=unit, dimensions=None, statistics=None) response = self.get_object('PutServoStates', params, PutServoStatesResponseType) try: config.set_query_period( response.servo_response_metadata.get_lb_interval) config.set_cwatch_report_period( response.servo_response_metadata.put_metric_interval) config.set_backend_instance_health_period( response.servo_response_metadata.put_instance_health_interval) except Exception, err: pass
def __init__(self, settings): self.conn = CloudWatchConnection(settings['key'], settings['secret']) self.namespace = settings['namespace']
class WatchData: datafile = "/tmp/watchdata.p" dry = False low_limit = 70 high_limit = 90 high_urgent = 95 stats_period = 120 history_size = 0 def __init__(self): self.name = '' self.instances = 0 self.new_desired = 0 self.desired = 0 self.instances_info = None self.previous_instances = 0 self.action = "" self.action_ts = 0 self.changed_ts = 0 self.total_load = 0 self.avg_load = 0 self.max_load = 0 self.up_ts = 0 self.down_ts= 0 self.max_loaded = None self.loads = {} self.measures = {} self.emergency = False self.history = None def __getstate__(self): """ Don't store these objets """ d = self.__dict__.copy() del d['ec2'] del d['cw'] del d['autoscale'] del d['group'] del d['instances_info'] return d def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname def get_instances_info(self): ids = [i.instance_id for i in self.group.instances] self.instances_info = self.ec2.get_only_instances(instance_ids = ids) def get_CPU_loads(self): """ Read instances load and store in data """ for instance in self.group.instances: load = self.get_instance_CPU_load(instance.instance_id) if load is None: continue self.total_load += load self.loads[instance.instance_id] = load if load > self.max_load: self.max_load = load self.max_loaded = instance.instance_id self.avg_load = self.total_load/self.instances def get_instance_CPU_load(self, instance): end = datetime.datetime.now() start = end - datetime.timedelta(seconds=300) m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance}) if len(m) > 0: self.measures[instance] = len(m) ordered = sorted(m, key=lambda x: x['Timestamp'], reverse=True) return ordered[0]['Average'] return None @classmethod def from_file(cls): try: data = pickle.load( open(cls.datafile, "rb" )) except: data = WatchData() return data def store(self, annotation = False): if self.history_size > 0: if not self.history: self.history = [] self.history.append([int(time.time()), len(self.group.instances), int(round(self.total_load))]) self.history = self.history[-self.history_size:] pickle.dump(self, open(self.datafile, "wb" )) if annotation: import utils text = json.dumps(self.__getstate__(), skipkeys=True) utils.store_annotation("ec2_watch", text) def check_too_low(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4: self.emergency = True self.check_avg_low() # Check if the desired instanes can be decreased self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load) self.kill_instance(instance) return True return self.emergency def check_too_high(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[instance] > 1 and load > self.high_urgent: self.emergency = True self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load) if self.instances > 1 and load > self.avg_load * 1.5: self.action += " killing bad instance" self.kill_instance(instance) else: self.action += " increasing instances to %d" % (self.instances+1,) self.set_desired(self.instances+1) return True return self.emergency def check_avg_high(self): threshold = self.high_limit if self.instances == 1: threshold = threshold * 0.9 # Increase faster if there is just one instance if self.avg_load > threshold: self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1) self.set_desired(self.instances + 1) return True def check_avg_low(self): if self.instances <= self.group.min_size: return False if self.total_load/(self.instances-1) < self.low_limit: self.action = "low load: %d -> %d " % (self.instances, self.instances - 1) self.set_desired(self.instances - 1) def kill_instance(self, id): if self.action: print self.action print "Kill instance", id syslog.syslog(syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action)) if self.dry: return self.ec2.terminate_instances(instance_ids=[id]) self.action_ts = time.time() def set_desired(self, desired): if self.action: print self.action print "Setting instances from %d to %d" % (self.instances, desired) syslog.syslog(syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action)) if self.dry: return if desired >= self.group.min_size: self.group.set_capacity(desired) self.action_ts = time.time() self.new_desired = desired
class EucaELBConnection(ELBConnection): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, is_secure=False, port=None, proxy=None, proxy_port=None, proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, region=None, path='/', security_token=None, validate_certs=True): """ Init method to create a new connection to EC2 Load Balancing Service. note:: The region argument is overridden by the region specified in the boto configuration file. """ if not region: region = RegionInfo(self, self.DefaultRegionName, self.DefaultRegionEndpoint) self.region = region self.cw_con = CloudWatchConnection(aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) ELBConnection.__init__(self, aws_access_key_id, aws_secret_access_key, is_secure, port, proxy, proxy_port, proxy_user, proxy_pass, debug, https_connection_factory, region, path, security_token, validate_certs=validate_certs) def put_cw_metric(self, servo_instance_id, metric): params = {'InstanceId': servo_instance_id} namespace = 'Servo' name = [ 'Latency', 'RequestCount', 'HTTPCode_ELB_4XX', 'HTTPCode_ELB_5XX', 'HTTPCode_Backend_2XX', 'HTTPCode_Backend_3XX', 'HTTPCode_Backend_4XX', 'HTTPCode_Backend_5XX' ] value = [ metric.Latency, metric.RequestCount, metric.HTTPCode_ELB_4XX, metric.HTTPCode_ELB_5XX, metric.HTTPCode_Backend_2XX, metric.HTTPCode_Backend_3XX, metric.HTTPCode_Backend_4XX, metric.HTTPCode_Backend_5XX ] unit = [ 'Milliseconds', 'Count', 'Count', 'Count', 'Count', 'Count', 'Count', 'Count' ] self.cw_con.build_put_params(params, name, value=value, timestamp=None, unit=unit, dimensions=None, statistics=None) return self.get_status('PutServoStates', params) def put_instance_health(self, servo_instance_id, instances): """ Test the internal loadbalancer vms """ params = {'InstanceId': servo_instance_id} if instances: self.build_list_params(params, instances, 'Instances.member.%d.InstanceId') return self.get_status('PutServoStates', params) def get_servo_load_balancers(self, servo_instance_id): #marker = "servo:%s" % servo_instance_id params = {"InstanceId": servo_instance_id} lbs = self.get_list('DescribeLoadBalancersByServo', params, [('member', LoadBalancer)]) for lb in lbs: instances = [] if lb.instances is not None and isinstance(lb.instances, Iterable): for inst in lb.instances: inst_id = str(inst.id) if inst_id.find(':') >= 0: token = inst_id.split(':') inst_id = token[0] ipaddr = token[1] hostname_cache.register(inst_id, ipaddr) inst.id = inst_id return lbs
class WatchData: datafile = "/var/tmp/watchdata.p" dry = False low_limit = 72 high_limit = 90 high_urgent = 95 stats_period = 60 history_size = 0 def __init__(self): self.name = '' self.instances = 0 self.new_desired = 0 self.desired = 0 self.instances_info = None self.previous_instances = 0 self.action = "" self.action_ts = 0 self.changed_ts = 0 self.total_load = 0 self.avg_load = 0 self.max_load = 0 self.up_ts = 0 self.down_ts = 0 self.max_loaded = None self.loads = {} self.measures = {} self.emergency = False self.history = None self.trend = 0 self.exponential_average = 0 self.ts = 0 def __getstate__(self): """ Don't store these objets """ d = self.__dict__.copy() del d['ec2'] del d['cw'] del d['autoscale'] del d['group'] del d['instances_info'] return d def connect(self, groupname): self.ec2 = boto.connect_ec2() self.cw = CloudWatchConnection() self.autoscale = AutoScaleConnection() self.group = self.autoscale.get_all_groups(names=[groupname])[0] self.instances = len(self.group.instances) self.desired = self.group.desired_capacity self.name = groupname self.ts = int(time.time()) def get_instances_info(self): ids = [i.instance_id for i in self.group.instances] self.instances_info = self.ec2.get_only_instances(instance_ids=ids) def get_CPU_loads(self): """ Read instances load and store in data """ measures = 0 for instance in self.group.instances: load = self.get_instance_CPU_load(instance.instance_id) if load is None: continue measures += 1 self.total_load += load self.loads[instance.instance_id] = load if load > self.max_load: self.max_load = load self.max_loaded = instance.instance_id if measures > 0: self.avg_load = self.total_load / measures def get_instance_CPU_load(self, instance): end = datetime.datetime.now() start = end - datetime.timedelta(seconds=int(self.stats_period * 3)) m = self.cw.get_metric_statistics(self.stats_period, start, end, "CPUUtilization", "AWS/EC2", ["Average"], {"InstanceId": instance}) if len(m) > 0: measures = self.measures[instance] = len(m) ordered = sorted(m, key=lambda x: x['Timestamp']) averages = [x['Average'] for x in ordered] average = reduce(lambda x, y: 0.4 * x + 0.6 * y, averages[-2:]) return average return None @classmethod def from_file(cls): try: data = pickle.load(open(cls.datafile, "rb")) except: data = WatchData() return data def store(self, annotation=False): if self.history_size > 0: if not self.history: self.history = [] self.history.append([ int(time.time()), len(self.group.instances), int(round(self.total_load)), int(round(self.avg_load)) ]) self.history = self.history[-self.history_size:] pickle.dump(self, open(self.datafile, "wb")) if annotation: import utils text = json.dumps(self.__getstate__(), skipkeys=True) utils.store_annotation("ec2_watch", text) def check_too_low(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[ instance] > 1 and self.instances > 1 and load < self.avg_load * 0.2 and load < 4: self.emergency = True self.check_avg_low( ) # Check if the desired instanes can be decreased self.action = "EMERGENCY LOW (%s %5.2f%%) " % (instance, load) self.kill_instance(instance) return True return self.emergency def check_too_high(self): for instance, load in self.loads.iteritems(): if load is not None and self.measures[ instance] > 1 and load > self.high_urgent: self.emergency = True self.action = "EMERGENCY HIGH (%s %5.2f%%) " % (instance, load) if self.instances > 1 and load > self.avg_load * 1.5: self.action += " killing bad instance" self.kill_instance(instance) else: self.action += " increasing instances to %d" % ( self.instances + 1, ) self.set_desired(self.instances + 1) return True return self.emergency def check_avg_high(self): threshold = self.high_limit if self.instances == 1: threshold = threshold * 0.9 # Increase faster if there is just one instance if self.avg_load > threshold: self.action = "WARN, high load: %d -> %d " % (self.instances, self.instances + 1) self.set_desired(self.instances + 1) return True def check_avg_low(self): if self.instances <= self.group.min_size: return False if self.total_load / (self.instances - 1) < self.low_limit: self.action = "low load: %d -> %d " % (self.instances, self.instances - 1) self.set_desired(self.instances - 1) def kill_instance(self, id): if self.action: print(self.action) print("Kill instance", id) syslog.syslog( syslog.LOG_INFO, "ec2_watch kill_instance: %s instances: %d (%s)" % (id, self.instances, self.action)) if self.dry: return self.ec2.terminate_instances(instance_ids=[id]) self.action_ts = time.time() def set_desired(self, desired): if self.action: print(self.action) print("Setting instances from %d to %d" % (self.instances, desired)) syslog.syslog( syslog.LOG_INFO, "ec2_watch set_desired: %d -> %d (%s)" % (self.instances, desired, self.action)) if self.dry: return if desired >= self.group.min_size: self.group.set_capacity(desired) self.action_ts = time.time() self.new_desired = desired
class BotoWatchInterface(WatchInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): self.access_id = access_id self.secret_key = secret_key self.token = token self.set_endpoint(clc_host) def set_endpoint(self, endpoint): #boto.set_stream_logger('foo') reg = RegionInfo(name='eucalyptus', endpoint=endpoint) path = '/services/CloudWatch' port = 8773 if endpoint[len(endpoint) - 13:] == 'amazonaws.com': endpoint = endpoint.replace('ec2', 'monitoring', 1) path = '/' reg = RegionInfo(endpoint=endpoint) port = 443 self.conn = CloudWatchConnection(self.access_id, self.secret_key, region=reg, port=port, path=path, is_secure=True, security_token=self.token, debug=0) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs['timeout'] = 30 def __save_json__(self, obj, name): f = open(name, 'w') json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2) f.close() def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit): obj = self.conn.get_metric_statistics(period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Statistics.json") return obj def list_metrics(self, next_token=None, dimensions=None, metric_name=None, namespace=None): obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Metrics.json") return obj def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics): return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics) def describe_alarms(self, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, state_value=None, next_token=None): obj = self.conn.describe_alarms(action_prefix, alarm_name_prefix, alarm_names, max_records, state_value, next_token) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Alarms.json") return obj def delete_alarms(self, alarm_names): return self.conn.delete_alarms(alarm_names) def enable_alarm_actions(self, alarm_names): return self.conn.enable_alarm_actions(alarm_names) def disable_alarm_actions(self, alarm_names): return self.conn.disable_alarm_actions(alarm_names) def put_metric_alarm(self, alarm): return self.conn.put_metric_alarm(alarm)
def publish_depths_to_cloudwatch(depths, namespace, envrionment): cwc = CloudWatchConnection() for queue in depths: publish_queue_depth_to_cloudwatch(cwc, queue, depths[queue], namespace, envrionment)
def test_put_metric_data(self): c = CloudWatchConnection() now = datetime.datetime.utcnow() name, namespace = 'unit-test-metric', 'boto-unit-test' c.put_metric_data(namespace, name, 5, now, 'Bytes')
class Monitor: def __init__(self, key, access): try: url = "http://169.254.169.254/latest/" self.userdata = json.load(urlopen(url + "user-data/")) public_hostname = urlopen(url + "meta-data/public-hostname/").read() zone = urlopen(url + "meta-data/placement/availability-zone/").read() region = zone[:-1] except: sys.exit("We should be getting user-data here...") # the name (and identity) of the cluster (the master) self.cluster = self.userdata['cluster'] self.name = "{0}.{1}".format(self.userdata['name'], self.cluster) endpoint = "monitoring.{0}.amazonaws.com".format(region) region_info = RegionInfo(name=region, endpoint=endpoint) self.cloudwatch = CloudWatchConnection(key, access, region=region_info) self.namespace = '9apps/postgres' self.connection = psycopg2.connect(host=settings.host, port=5432, dbname=settings.database_name, user=settings.database_user, password=settings.database_password) # now, the non-system database connections self.databases = [] try: database_cursor = self.connection.cursor() database_cursor.execute("select datname from pg_stat_database where datname !~ '(template[0-9]+|root|postgres)'") for database in database_cursor: self.databases.append([database[0], psycopg2.connect(host=settings.host, port=5432, dbname=database[0], user=settings.database_user, password=settings.database_password)]) finally: database_cursor.close() self.pgbouncer = psycopg2.connect(host=settings.host, port=6432, dbname='pgbouncer', user=settings.database_user, password=settings.database_password) # without this it doesn't work self.pgbouncer.set_isolation_level(0) def __del__(self): self.connection.close() def is_in_recovery(self): self.connection.autocommit = True try: cur = self.connection.cursor() cur.execute("select pg_is_in_recovery()") in_recovery = cur.fetchone()[0] finally: cur.close() return in_recovery == True def collect(self, monitoring = 'on'): if monitoring not in ['on', 'all']: return [[], [], [], {}] now = datetime.now() names = [] values = [] units = [] dimensions = { 'name' : self.name, 'cluster' : self.cluster } if 'master' in self.userdata: [offset, receive_offset, replay_offset] = self._get_standby_lag() if receive_offset != None: names.append('receive_lag') values.append(int(offset - receive_offset)) units.append('Bytes') if replay_offset != None: names.append('replay_lag') values.append(int(offset - replay_offset)) units.append('Bytes') for database in self.databases: for relation in ["heap", "idx"]: [read, hit, hitratio] = self._get_hitratio(database[1], relation) names.append("{0}_{1}_read".format(database[0], relation)) values.append(int(read)) units.append("Count") names.append("{0}_{1}_hit".format(database[0], relation)) values.append(int(hit)) units.append("Count") if hitratio != None: names.append("{0}_{1}_hitratio".format(database[0], relation)) values.append(float(hitratio * 100)) units.append("Percent") conflicts = self._get_conflicts(database[0]) names.append("{0}_{1}".format(database[0], 'confl_tablespace')) values.append(int(conflicts[0])) units.append("Count") names.append("{0}_{1}".format(database[0], 'confl_lock')) values.append(int(conflicts[1])) units.append("Count") names.append("{0}_{1}".format(database[0], 'confl_snapshot')) values.append(int(conflicts[2])) units.append("Count") names.append("{0}_{1}".format(database[0], 'confl_bufferpin')) values.append(int(conflicts[3])) units.append("Count") names.append("{0}_{1}".format(database[0], 'confl_deadlock')) values.append(int(conflicts[4])) units.append("Count") indexes_size = self._get_indexes_size(database[1]) names.append("{0}_indexes_size".format(database[0])) values.append(int(indexes_size)) units.append("Bytes") tables_size = self._get_tables_size(database[1]) names.append("{0}_tables_size".format(database[0])) values.append(int(tables_size)) units.append("Bytes") # nr of wal files size = self._get_nr_wal_files() names.append("wal_files") values.append(int(size)) units.append("Count") # pgbouncer stats stats = self._get_pgbouncer_stats() names.append("pgbouncer_avg_req") values.append(int(stats[0])) units.append("Count/Second") names.append("pgbouncer_avg_recv") values.append(int(stats[1])) units.append("Bytes/Second") names.append("pgbouncer_avg_sent") values.append(int(stats[2])) units.append("Bytes/Second") names.append("pgbouncer_avg_query") values.append(float(stats[3] / 1000000)) units.append("Seconds") # pgbouncer pools pools = self._get_pgbouncer_pools() names.append("pgbouncer_cl_active") values.append(float(pools[0])) units.append("Count") names.append("pgbouncer_cl_waiting") values.append(float(pools[1])) units.append("Count") names.append("pgbouncer_sv_active") values.append(float(pools[2])) units.append("Count") names.append("pgbouncer_sv_idle") values.append(float(pools[3])) units.append("Count") names.append("pgbouncer_sv_used") values.append(float(pools[4])) units.append("Count") names.append("pgbouncer_sv_tested") values.append(float(pools[5])) units.append("Count") names.append("pgbouncer_sv_login") values.append(float(pools[6])) units.append("Count") names.append("pgbouncer_maxwait") values.append(float(pools[7])) units.append("Count") return [names, values, units, dimensions] def put(self): result = False try: # only monitor if we are told to (this will break, if not set) monitoring = self.userdata['monitoring'] except: monitoring = 'on' if monitoring in ['on', 'all']: # first get all we need [names, values, units, dimensions] = self.collect(monitoring) while len(names) > 0: names20 = names[:20] values20 = values[:20] units20 = units[:20] # we can't send all at once, only 20 at a time # first aggregated over all result = self.cloudwatch.put_metric_data(self.namespace, names20, value=values20, unit=units20) for dimension in dimensions: dimension = { dimension : dimensions[dimension] } result &= self.cloudwatch.put_metric_data( self.namespace, names20, value=values20, unit=units20, dimensions=dimension) del names[:20] del values[:20] del units[:20] else: print "we are not monitoring" return result def metrics(self): return self.cloudwatch.list_metrics() def _get_nr_wal_files(self): try: cursor = self.connection.cursor() sql = "select count(name) from (select pg_ls_dir('pg_xlog') as name) as xlogs where name != 'archive_status'" cursor.execute(sql) [size] = cursor.fetchone() finally: cursor.close() return size def _get_tables_size(self, connection): try: cursor = connection.cursor() sql = "select sum(pg_relation_size(relid)) from pg_stat_user_tables" cursor.execute(sql) [size] = cursor.fetchone() finally: cursor.close() return size def _get_indexes_size(self, connection): try: cursor = connection.cursor() sql = "select sum(pg_relation_size(indexrelid)) from pg_stat_user_indexes" cursor.execute(sql) [size] = cursor.fetchone() finally: cursor.close() return size def _get_conflicts(self, database): try: cursor = self.connection.cursor() sql = "select * from pg_stat_database_conflicts where datname = '{0}'".format(database) cursor.execute(sql) conflicts = cursor.fetchone() finally: cursor.close() return [conflicts[2], conflicts[3], conflicts[4], conflicts[5], conflicts[6]] def _get_hitratio(self, connection, relation="heap"): if relation == "heap": table = "tables" else: table = "indexes" try: cursor = connection.cursor() sql = "select sum({0}_blks_read) as read, sum({0}_blks_hit) as hit, (sum({0}_blks_hit) - sum({0}_blks_read)) / nullif(sum({0}_blks_hit),0) as hitratio from pg_statio_user_{1}".format(relation, table) cursor.execute(sql) [read, hit, hitratio] = cursor.fetchone() finally: cursor.close() return [read, hit, hitratio] def _get_standby_lag(self): try: master = psycopg2.connect(host=self.userdata['master'], dbname=settings.database_name, user=settings.database_user, password=settings.database_password) master.autocommit = True try: cursor = master.cursor() cursor.execute( "SELECT pg_current_xlog_location() AS location") [x, y] = (cursor.fetchone()[0]).split('/') offset = (int('ff000000', 16) * int(x, 16)) + int(y, 16) finally: cursor.close() try: cursor = self.connection.cursor() cursor.execute( "SELECT pg_last_xlog_receive_location(), pg_last_xlog_replay_location()") one = cursor.fetchone() try: [x, y] = (one[0]).split('/') receive_offset = (int('ff000000', 16) * int(x, 16)) + int(y, 16) except: receive_offset = None try: [x, y] = (one[1]).split('/') replay_offset = (int('ff000000', 16) * int(x, 16)) + int(y, 16) except: replay_offset = None finally: cursor.close() finally: master.close() return [offset, receive_offset, replay_offset] def _get_pgbouncer_stats(self): try: cursor = self.pgbouncer.cursor() cursor.execute('show stats') # ('pgbouncer\x00', 119L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) [name, total_requests, total_received, total_sent, total_query_time, avg_req, avg_recv, avg_sent, avg_query] = cursor.fetchone() finally: cursor.close() return [avg_req, avg_recv, avg_sent, avg_query] def _get_pgbouncer_pools(self): cl_active = cl_waiting = sv_active = sv_idle = 0 sv_used = sv_tested = sv_login = maxwait = 0 try: cursor = self.pgbouncer.cursor() cursor.execute('show pools') # ('pgbouncer\x00', 'pgbouncer\x00', 1, 0, 0, 0, 0, 0, 0, 0) for pool in cursor: cl_active += pool[2] cl_waiting += pool[3] sv_active += pool[4] sv_idle += pool[5] sv_used += pool[6] sv_tested += pool[7] sv_login += pool[8] maxwait = max(maxwait, pool[9]) finally: cursor.close() return [cl_active, cl_waiting, sv_active, sv_idle, sv_used, sv_tested, sv_login, maxwait]
class DiscoAlarm(object): """ Class orchestrating CloudWatch alarms """ def __init__(self, disco_sns=None): self.cloudwatch = CloudWatchConnection() self._disco_sns = disco_sns def upsert_alarm(self, alarm): """ Create an alarm, delete and re-create if it already exists """ existing_alarms = self.cloudwatch.describe_alarms( alarm_names=[alarm.name]) for existing_alarm in existing_alarms: throttled_call(existing_alarm.delete) throttled_call(self.cloudwatch.create_alarm, alarm) @property def disco_sns(self): """ Lazy sns connection """ self._disco_sns = self._disco_sns or DiscoSNS() return self._disco_sns def _sns_topic(self, alarm): """ retrieve SNS topic correspoding to the alarm """ return self.disco_sns.topic_arn_from_name(alarm.notification_topic) def create_alarms(self, alarms): """ Create alarms from dict of DiscoAlarmConfig objects. """ for alarm in alarms: self.upsert_alarm(alarm.to_metric_alarm(self._sns_topic(alarm))) def alarms(self): """ Iterate alarms """ next_token = None while True: alarms = throttled_call( self.cloudwatch.describe_alarms, next_token=next_token, ) for alarm in alarms: yield alarm next_token = alarms.next_token if not next_token: break def get_alarms(self, desired=None): """ Get all alarms for an environment filtered on the desired dictionary keys """ desired = desired or {} keys = set(desired.keys()) def _key_filter(dictionary, keys): return { key: value for key, value in dictionary.iteritems() if key in keys } return [ alarm for alarm in self.alarms() if _key_filter(DiscoAlarmConfig.decode_alarm_name(alarm.name), keys) == desired ] def _delete_alarms(self, alarms): alarm_names = [alarm.name for alarm in alarms] alarm_len = len(alarm_names) logging.debug("Deleting %s alarms.", alarm_len) for index in range(0, alarm_len, DELETE_BATCH_SIZE): throttled_call( self.cloudwatch.delete_alarms, alarm_names[index:min(index + DELETE_BATCH_SIZE, alarm_len)]) def delete_hostclass_environment_alarms(self, environment, hostclass): """ Delete alarm in an environment by hostclass name """ self._delete_alarms( self.get_alarms({ "env": environment, "hostclass": hostclass })) def delete_environment_alarms(self, environment): """ Delete all alarms for an environment """ self._delete_alarms(self.get_alarms({"env": environment}))
class BotoWatchInterface(WatchInterface): conn = None saveclcdata = False def __init__(self, clc_host, access_id, secret_key, token): # boto.set_stream_logger('foo') path = "/services/CloudWatch" port = 8773 if clc_host[len(clc_host) - 13 :] == "amazonaws.com": clc_host = clc_host.replace("ec2", "monitoring", 1) path = "/" reg = None port = 443 reg = RegionInfo(name="eucalyptus", endpoint=clc_host) self.conn = CloudWatchConnection( access_id, secret_key, region=reg, port=port, path=path, is_secure=True, security_token=token, debug=0 ) self.conn.https_validate_certificates = False self.conn.http_connection_kwargs["timeout"] = 30 def __save_json__(self, obj, name): f = open(name, "w") json.dump(obj, f, cls=BotoJsonWatchEncoder, indent=2) f.close() def get_metric_statistics(self, period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit): obj = self.conn.get_metric_statistics( period, start_name, end_time, metric_name, namespace, statistics, dimensions, unit ) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Statistics.json") return obj def list_metrics(self, next_token=None, dimensions=None, metric_name=None, namespace=None): obj = self.conn.list_metrics(next_token, dimensions, metric_name, namespace) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Metrics.json") return obj def put_metric_data(self, namespace, name, value, timestamp, unit, dimensions, statistics): return self.conn.put_metric_data(namespace, name, value, timestamp, unit, dimensions, statistics) def describe_alarms( self, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, state_value=None, next_token=None, ): obj = self.conn.describe_alarms( action_prefix, alarm_name_prefix, alarm_names, max_records, state_value, next_token ) if self.saveclcdata: self.__save_json__(obj, "mockdata/CW_Alarms.json") return obj def delete_alarms(self, alarm_names): return self.conn.delete_alarms(alarm_names) def enable_alarm_actions(self, alarm_names): return self.conn.enable_alarm_actions(alarm_names) def disable_alarm_actions(self, alarm_names): return self.conn.disable_alarm_actions(alarm_names) def put_metric_alarm(self, alarm): return self.conn.put_metric_alarm(alarm)
def __init__(self, disco_sns=None): self.cloudwatch = CloudWatchConnection() self._disco_sns = disco_sns
def cloud_watch_connection(self): if self._cloud_watch_connection is None: self._cloud_watch_connection = CloudWatchConnection( self.aws_access_key, self.aws_secret_access_key) return self._cloud_watch_connection
def conn_to_cw(): return CloudWatchConnection(get_region())