def get_elb_stats(name, metric, minutes=60, period=60): """ Get CloudWatch statistics for a load balancer API docs: http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_GetMetricStatistics.html Hint: to figure out the exact params, use the AWS console and look at the query params when clicking on cloudwatch metrics... @param name: string, ELB name @param metric: string, metric to retrieve (RequestCount, HTTPCode_Backend_2XX, etc) @param minutes: int, minutes to look back @param period: int, sample bucket size in seconds """ print 'Stats for \'%s\' for the last %dm (bucket: %ds):' % (name, minutes, period) try: c = boto.connect_cloudwatch() end = datetime.datetime.utcnow() start = end - datetime.timedelta(minutes=minutes) stats = c.get_metric_statistics( period, start, end, metric, 'AWS/ELB', 'Sum', InstanceDimension("LoadBalancerName", name)) for stat in stats: print '\t%s: %f' % (stat[u'Timestamp'], stat[u'Sum']) except BotoServerError, error: print >> sys.stderr, 'Boto API error: ', error
def test_create_alarm(): conn = boto.connect_cloudwatch() alarm = MetricAlarm( name='tester', comparison='>=', threshold=2.0, period=60, evaluation_periods=5, statistic='Average', description='A test', dimensions={'InstanceId': ['i-0123456,i-0123457']}, alarm_actions=['arn:alarm'], ok_actions=['arn:ok'], insufficient_data_actions=['arn:insufficient'], unit='Seconds', ) conn.create_alarm(alarm) alarms = conn.describe_alarms() alarms.should.have.length_of(1) alarm = alarms[0] alarm.name.should.equal('tester') alarm.comparison.should.equal('>=') alarm.threshold.should.equal(2.0) alarm.period.should.equal(60) alarm.evaluation_periods.should.equal(5) alarm.statistic.should.equal('Average') alarm.description.should.equal('A test') dict(alarm.dimensions).should.equal({'InstanceId': ['i-0123456,i-0123457']}) list(alarm.alarm_actions).should.equal(['arn:alarm']) list(alarm.ok_actions).should.equal(['arn:ok']) list(alarm.insufficient_data_actions).should.equal(['arn:insufficient']) alarm.unit.should.equal('Seconds')
def __init__(self, ag_name, lower, upper, lower_threshold, upper_threshold): self.cloudwatch = boto.connect_cloudwatch() self.upper = upper - 3 self.lower = lower + 3 self.lower_threshold = lower_threshold self.upper_threshold = upper_threshold self.ag_name = ag_name
def test_delete_alarm(): conn = boto.connect_cloudwatch() alarm = MetricAlarm( name='tester', comparison='>=', threshold=2.0, period=60, evaluation_periods=5, statistic='Average', description='A test', dimensions={'InstanceId': ['i-0123456,i-0123457']}, alarm_actions=['arn:alarm'], ok_actions=['arn:ok'], insufficient_data_actions=['arn:insufficient'], unit='Seconds', ) conn.create_alarm(alarm) alarms = conn.describe_alarms() alarms.should.have.length_of(1) alarms[0].delete() alarms = conn.describe_alarms() alarms.should.have.length_of(0)
def get_rds_stats(db_ident, metric): """Function for fetching RDS statistics from CloudWatch""" cw = boto.connect_cloudwatch() result = cw.get_metric_statistics( 300, datetime.datetime.utcnow() - datetime.timedelta(seconds=300), datetime.datetime.utcnow(), metric, 'AWS/RDS', 'Average', dimensions={'DBInstanceIdentifier': [db_ident]}) debug('Result: %s' % result) if result: if metric in ('ReadLatency', 'WriteLatency'): # Transform into miliseconds result = '%.2f' % float(result[0]['Average'] * 1000) else: result = '%.2f' % float(result[0]['Average']) elif metric == 'ReplicaLag': # This metric can be missed result = 0 else: print 'Unable to get RDS statistics' sys.exit(1) return float(result)
def find_node_freespace(desc,elasticache_numnodes): end_time = datetime.datetime.utcnow() start_time = end_time - datetime.timedelta(minutes=1) # This sets up the connection information to CloudWatch. cloudwatch_connection = boto.connect_cloudwatch(autoscale_config.AWS_ACCESS_KEY,autoscale_config.AWS_SECRET_KEY) #metrics = cloudwatch_connection.list_metrics(metric_name='FreeableMemory') metrics = cloudwatch_connection.list_metrics(namespace="AWS/ElastiCache", dimensions={'CacheClusterId':autoscale_config.CACHE_CLUSTER_NAME,'CacheNodeId':'0001'}) statistics = ['Average', 'Minimum'] unit='Bytes' metric_name='UnusedMemory' namespace="AWS/ElastiCache" elasticache_Nodeid = [] free_space = [] for i in range(0, elasticache_numnodes): Nodeid = desc['DescribeCacheClustersResponse']['DescribeCacheClustersResult']['CacheClusters'][0]['CacheNodes'][i]['CacheNodeId'] # print Nodeid dimensions={'CacheClusterId':autoscale_config.CACHE_CLUSTER_NAME,'CacheNodeId':Nodeid} # print dimensions freespace = cloudwatch_connection.get_metric_statistics(60, start_time, end_time, metric_name, namespace, statistics, dimensions, unit) # print freespace # print freespace[1]['Average'] free_space.append(freespace[0]['Average']) # print free_space[i] return free_space
def create_alarm(name='test-alarm', metric='CPUUtilization', namespace='AWS/EC2', statistic='Average', comparison='>=', threshold=90, period=500, evaluation_periods=1, unit='Percent', description='Test Alarm', dimensions=None): if dimensions is None: dimensions = {} cw_conn = boto.connect_cloudwatch('us-east') metric_alarm = MetricAlarm(name=name, metric=metric, namespace=namespace, statistic=statistic, comparison=comparison, threshold=threshold, period=period, evaluation_periods=evaluation_periods, unit=unit, description=description, dimensions=dimensions) alarm_created = cw_conn.put_metric_alarm(metric_alarm) return alarm_created
def test_fetch_alarms_for_instance(self): instance_id = 'i-123456' self.create_alarm(dimensions={'InstanceId': [instance_id]}) cw_conn = boto.connect_cloudwatch('us-east') instance_alarms = Alarm.get_alarms_for_resource( instance_id, dimension_key='InstanceId', cw_conn=cw_conn) self.assertEqual(len(instance_alarms), 1)
def test_describe_alarms(): conn = boto.connect_cloudwatch() alarms = conn.describe_alarms() alarms.should.have.length_of(0) conn.create_alarm(alarm_fixture(name="nfoobar", action="afoobar")) conn.create_alarm(alarm_fixture(name="nfoobaz", action="afoobaz")) conn.create_alarm(alarm_fixture(name="nbarfoo", action="abarfoo")) conn.create_alarm(alarm_fixture(name="nbazfoo", action="abazfoo")) alarms = conn.describe_alarms() alarms.should.have.length_of(4) alarms = conn.describe_alarms(alarm_name_prefix="nfoo") alarms.should.have.length_of(2) alarms = conn.describe_alarms( alarm_names=["nfoobar", "nbarfoo", "nbazfoo"]) alarms.should.have.length_of(3) alarms = conn.describe_alarms(action_prefix="afoo") alarms.should.have.length_of(2) for alarm in conn.describe_alarms(): alarm.delete() alarms = conn.describe_alarms() alarms.should.have.length_of(0)
def collect(self): DBInstanceIdentifiers_arr=re.split(',',self.config['DBInstanceIdentifiers']) instanceStats = [] attribs = ['BinLogDiskUsage', 'CPUUtilization', 'DatabaseConnections', 'DiskQueueDepth', 'FreeableMemory', 'FreeStorageSpace', 'ReplicaLag', 'SwapUsage', 'ReadIOPS', 'WriteIOPS', 'ReadLatency', 'WriteLatency', 'ReadThroughput', 'WriteThroughput', 'NetworkReceiveThroughput', 'NetworkTransmitThroughput'] botoRDS = boto.connect_cloudwatch(aws_access_key_id=self.config['access_key_id'], aws_secret_access_key=self.config['secret_access_key']) for dbInstanceIdentifier in DBInstanceIdentifiers_arr: for attribute in attribs: try: instanceStats = botoRDS.get_metric_statistics(period=60, start_time=datetime.datetime.utcnow() - datetime.timedelta(seconds=120), end_time=datetime.datetime.utcnow(), namespace="AWS/RDS", metric_name=attribute, statistics=["Sum"], dimensions={'DBInstanceIdentifier':dbInstanceIdentifier}) except Exception: self.log.error('An error occurred collecting from RDS, %s', e) if instanceStats != []: sorted_metric_arr=sorted(instanceStats, key=itemgetter('Timestamp')) self.publish('%s.%s' % (dbInstanceIdentifier,attribute),sorted_metric_arr[len(sorted_metric_arr)-1]['Sum'])
def test_describe_alarms(): conn = boto.connect_cloudwatch() alarms = conn.describe_alarms() alarms.should.have.length_of(0) conn.create_alarm(alarm_fixture(name="nfoobar", action="afoobar")) conn.create_alarm(alarm_fixture(name="nfoobaz", action="afoobaz")) conn.create_alarm(alarm_fixture(name="nbarfoo", action="abarfoo")) conn.create_alarm(alarm_fixture(name="nbazfoo", action="abazfoo")) enabled = alarm_fixture(name="enabled1", action=["abarfoo"]) enabled.add_alarm_action("arn:alarm") conn.create_alarm(enabled) alarms = conn.describe_alarms() alarms.should.have.length_of(5) alarms = conn.describe_alarms(alarm_name_prefix="nfoo") alarms.should.have.length_of(2) alarms = conn.describe_alarms( alarm_names=["nfoobar", "nbarfoo", "nbazfoo"]) alarms.should.have.length_of(3) alarms = conn.describe_alarms(action_prefix="afoo") alarms.should.have.length_of(2) alarms = conn.describe_alarms(alarm_name_prefix="enabled") alarms.should.have.length_of(1) alarms[0].actions_enabled.should.equal("true") for alarm in conn.describe_alarms(): alarm.delete() alarms = conn.describe_alarms() alarms.should.have.length_of(0)
def main(): sns=boto.connect_sns(); raw_input("Going to create the alpha and beta topics. Enter to continue"); sns.create_topic("Alpha1"); sns.create_topic("Beta1"); raw_input("Alpha and Beta topics made. Here is the list of topics."); dic=sns.get_all_topics()['ListTopicsResponse']['ListTopicsResult']['Topics']; print dic; showTopics(dic); delete=raw_input("I am now going to delete the Beta topic. Copy beta and enter it here: "); sns.delete_topic(delete); arn=raw_input("Beta was deleted here is the new list of topics."); dic=sns.get_all_topics()['ListTopicsResponse']['ListTopicsResult']['Topics']; showTopics(dic); arn=raw_input("We are now going to subscribe to the alpha topic. Copy and paste alpha here: "); sns.subscribe(arn, "email", "*****@*****.**"); print arn; sns.subscribe(arn, "email-json", "*****@*****.**"); sns.subscribe(arn, "http", "http://cloud.comtor.org/csc470logger/logger"); raw_input("There should now be 3 subscriptions added to the topic. Go check in console if needed. \nHere are the detials"); printinfo(arn) raw_input("I am now going to change the display name for the topic."); sns.set_topic_attributes(arn,"DisplayName", "NewName"); raw_input("Name change made. The new information is... "); printinfo(arn) raw_input("I am now going to send a message to all of those who have subscribed."); print sns.publish(arn, "Hello classmates. What is int?"); raw_input("Message sent. Please check mail."); raw_input("We are now going to make a cloud watch alarm."); cw=boto.connect_cloudwatch() myMetric=cw.list_metrics()[0] print cw.describe_alarms()[0] cw.create_alarm(cw.describe_alarms()[0])
def test_get_metric_statistics(): conn = boto.connect_cloudwatch() metric_timestamp = datetime(2018, 4, 9, 13, 0, 0, 0) conn.put_metric_data( namespace="tester", name="metric", value=1.5, dimensions={"InstanceId": ["i-0123456,i-0123457"]}, timestamp=metric_timestamp, ) metric_kwargs = dict( namespace="tester", metric_name="metric", start_time=metric_timestamp, end_time=datetime.now(), period=3600, statistics=["Minimum"], ) datapoints = conn.get_metric_statistics(**metric_kwargs) datapoints.should.have.length_of(1) datapoint = datapoints[0] datapoint.should.have.key("Minimum").which.should.equal(1.5) datapoint.should.have.key("Timestamp").which.should.equal(metric_timestamp)
def test_describe_alarms(): conn = boto.connect_cloudwatch() alarms = conn.describe_alarms() alarms.should.have.length_of(0) conn.create_alarm(alarm_fixture(name="nfoobar", action="afoobar")) conn.create_alarm(alarm_fixture(name="nfoobaz", action="afoobaz")) conn.create_alarm(alarm_fixture(name="nbarfoo", action="abarfoo")) conn.create_alarm(alarm_fixture(name="nbazfoo", action="abazfoo")) alarms = conn.describe_alarms() alarms.should.have.length_of(4) alarms = conn.describe_alarms(alarm_name_prefix="nfoo") alarms.should.have.length_of(2) alarms = conn.describe_alarms(alarm_names=["nfoobar", "nbarfoo", "nbazfoo"]) alarms.should.have.length_of(3) alarms = conn.describe_alarms(action_prefix="afoo") alarms.should.have.length_of(2) for alarm in conn.describe_alarms(): alarm.delete() alarms = conn.describe_alarms() alarms.should.have.length_of(0)
def get_elb_stats(name, metric, minutes=60, period=60): """ Get CloudWatch statistics for a load balancer API docs: http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_GetMetricStatistics.html Hint: to figure out the exact params, use the AWS console and look at the query params when clicking on cloudwatch metrics... @param name: string, ELB name @param metric: string, metric to retrieve (RequestCount, HTTPCode_Backend_2XX, etc) @param minutes: int, minutes to look back @param period: int, sample bucket size in seconds """ print 'Stats for \'%s\' for the last %dm (bucket: %ds):' % (name, minutes, period) try: c = boto.connect_cloudwatch() end = datetime.datetime.utcnow() start = end - datetime.timedelta(minutes=minutes) stats = c.get_metric_statistics(period, start, end, metric, 'AWS/ELB', 'Sum', InstanceDimension( "LoadBalancerName", name) ) for stat in stats: print '\t%s: %f' % (stat[u'Timestamp'], stat[u'Sum']) except BotoServerError, error: print >> sys.stderr, 'Boto API error: ', error
def main(): ''' ''' setup_logger(environ.get('AWS_ACCESS_KEY_ID'), environ.get('AWS_SECRET_ACCESS_KEY'), environ.get('AWS_SNS_ARN')) config = load_config() checkin_time = time() try: # Rely on boto AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY variables. cw = connect_cloudwatch() except: cw = False while True: try: with db_connect(config['DATABASE_URL']) as conn: task_Q = db_queue(conn, TASK_QUEUE) done_Q = db_queue(conn, DONE_QUEUE) due_Q = db_queue(conn, DUE_QUEUE) beat_Q = db_queue(conn, HEARTBEAT_QUEUE) pop_task_from_donequeue(done_Q, config['GITHUB_AUTH']) pop_task_from_duequeue(due_Q, config['GITHUB_AUTH']) flush_heartbeat_queue(beat_Q) if time() < checkin_time: continue # Report basic information about current status. with beat_Q as db: workers_n = len(get_recent_workers(db)) task_n, done_n, due_n = map(len, (task_Q, done_Q, due_Q)) _L.info( '{workers_n} active workers; queue lengths: {task_n} tasks, {done_n} done, {due_n} due' .format(**locals())) if cw: ns = environ.get('AWS_CLOUDWATCH_NS') cw.put_metric_data(ns, 'tasks queue', task_n, unit='Count') cw.put_metric_data(ns, 'done queue', done_n, unit='Count') cw.put_metric_data(ns, 'due queue', due_n, unit='Count') cw.put_metric_data(ns, 'expected results', task_n + workers_n, unit='Count') cw.put_metric_data(ns, 'active workers', workers_n, unit='Count') checkin_time = time() + 30 except KeyboardInterrupt: raise except: _L.error('Error in dequeue main()', exc_info=True) sleep(5)
def publish_cw_metrics(ri_account_credentials, region, ri_surplus): access_key_id, secret_access_key = ri_account_credentials conn = boto.connect_cloudwatch(aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key) for itype, surplus in ri_surplus.items(): conn.put_metric_data("RI-usage-%s" % region, "%s-available-RIs" % itype, surplus) conn.close()
def main(): ''' Single threaded worker to serve the job queue. ''' args = parser.parse_args() setup_logger(None, None, args.sns_arn, log_level=args.loglevel) s3 = S3(None, None, args.bucket) autoscale = connect_autoscale(None, None) cloudwatch = connect_cloudwatch(None, None) github_auth = args.github_token, 'x-oauth-basic' next_queue_interval, next_autoscale_interval = 60, 8 * 3600 try: with db_connect(args.database_url) as conn: task_Q = db_queue(conn, TASK_QUEUE) next_queue_report = time() + next_queue_interval next_autoscale_grow = time() + next_autoscale_interval minimum_capacity = count(1) with task_Q as db: run_times = get_batch_run_times(db, args.owner, args.repository) sources = find_batch_sources(args.owner, args.repository, github_auth, run_times) with task_Q as db: new_set = add_set(db, args.owner, args.repository) for expected_count in enqueue_sources(task_Q, new_set, sources): if time() >= next_queue_report: next_queue_report, n = time() + next_queue_interval, len( task_Q) _L.debug( 'Task queue has {} item{}, {} sources expected'.format( n, 's' if n != 1 else '', expected_count)) try: if time() >= next_autoscale_grow: next_autoscale_grow = time() + next_autoscale_interval set_autoscale_capacity(autoscale, cloudwatch, args.cloudwatch_ns, next(minimum_capacity)) except Exception as e: _L.error('Problem during autoscale', exc_info=True) if expected_count: sleep(2) with task_Q as db: _L.debug('Rendering that shit') render_set_maps(s3, db, new_set) except: _L.error('Error in worker main()', exc_info=True) return 1 else: return 0
def main(): p = argparse.ArgumentParser() p.add_argument('cluster_name') p.add_argument('--dry-run', action='store_true') a = p.parse_args() ec2 = boto.connect_ec2() spot_instances = [instance for res in ec2.get_all_instances() for instance in res.instances if instance.state in set(['pending', 'running', 'stopping', 'stopped']) and instance.spot_instance_request_id is not None] logging.info('%d spot instances', len(spot_instances)) candidates = [i for i in spot_instances if i.tags.get('spark_cluster_name') == a.cluster_name] logging.info('%d candidates', len(candidates)) cloudwatch = boto.connect_cloudwatch() extant_alarms = set(i.name for i in cloudwatch.describe_alarms()) for instance in candidates: dimensions = {'InstanceId': instance.id} alarm_name = '{0}-idle-term'.format(instance.id) # The name of the metric to request. This list can be retrieved by # calling ListMetrics metric_name = 'CPUUtilization' # The namespace of the metric. This can also be retrieved by calling # ListMetrics actions = ['arn:aws:automate:{0}:ec2:terminate'.format(instance.region.name), 'arn:aws:sns:us-west-2:602821995734:cmccoy-alarm'] metric = cloudwatch.list_metrics(dimensions=dimensions, metric_name=metric_name) if not metric: raise ValueError("Missing: " + metric_name) metric = metric[0] if alarm_name in extant_alarms: logging.warn("Alarm %s already exists - overwriting", alarm_name) # Terminate instances when average CPU < 15% for 18 # periods of 5 minutes (an hour and a half) res = metric.create_alarm(name=alarm_name, comparison='<=', threshold=10, period=300, evaluation_periods=24, statistic='Average', alarm_actions=actions, unit='Percent') logging.info("%s - %s", alarm_name, res) extant_alarms.add(alarm_name)
def get_instance_metric(start, end, step_size, metric_name, instance_id, region_name="us-west-1"): """A wrapper function for getting the values of a metric for an instance over a time period. :param start: start time of observation. :type start: datetime. :param end: end time of observation. :type end: datetime. :param step_size: the length of each period :type step_size: int. :param metric_name: the metric's name. :type metric_name: str :param instance_id: the instance observerd id. :type instance_id: int :param region_name: the region's name. :type region_name: str :returns: datapoints -- datapoints of the metric values. """ #Get RegionInfo object from region name cloudwatch_regions = boto.ec2.cloudwatch.regions() for r in cloudwatch_regions: if r.name == region_name: region = r break # Connect to aws cloudwatch conn = boto.connect_cloudwatch(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, region=region) # The statistic to request i.e. Average, Sum, Maximum, Minimum or SampleCount operations_type = ['Average', 'Maximum', 'Minimum'] # This is the dimensions to request from CloudWatch # To get data for a specific instance replace instanceId for the instance you're looking for # Detailed monitoring adds additional dimensions including AmiId, InstanceType dimensions = {'InstanceId': instance_id} # Get unit based on metric_name unit = UNIT_OF_METRIC[metric_name] # Get namespace based on metric_name namespace = NAMESPACE_OF_METRIC[metric_name] # This wil request data for for the period, for the time of start to end, and datapoints = conn.get_metric_statistics(step_size, start, end, metric_name, namespace, operations_type, dimensions, unit) return datapoints
def test_fetch_alarms_for_load_balancer(self): elb_name = 'test_elb' alarm_kwargs = dict( metric='RequestCount', namespace='AWS/ELB', statistic='Sum', unit=None, dimensions={'LoadBalancerName': elb_name}, ) self.create_alarm(**alarm_kwargs) cw_conn = boto.connect_cloudwatch('us-east') elb_alarms = Alarm.get_alarms_for_resource(elb_name, dimension_key='LoadBalancerName', cw_conn=cw_conn) self.assertEqual(len(elb_alarms), 1)
def refresh_instance(instance, config): CW_METRIC_NAMES = [ ['CPUUtilization', 'Percent'], ['CPUCreditBalance', 'Count'], ['DiskWriteBytes', 'Bytes'], ['DiskReadBytes', 'Bytes'], ['DiskWriteOps', 'Count'], ['DiskReadOps', 'Count'], ['NetworkIn', 'Bytes'], ['NetworkOut', 'Bytes'], ] # timeframe and period (2 weeks, and 20-minute period for AWS. No option to # set period for New Relic) START = datetime.now() - timedelta(days=14) # subtract a minute to get better granularity on New Relic metrics END = datetime.now() - timedelta(minutes=1) PERIOD = 1200 # 20 minutes id = instance['id'] env = instance['env'] ret = {'id': id} cw = boto.connect_cloudwatch( aws_access_key_id=config['aws_ec2']['aws_access_key'], aws_secret_access_key=config['aws_ec2']['aws_secret_key']) for metric, unit in CW_METRIC_NAMES: timeseries = cw.get_metric_statistics( period=PERIOD, start_time=START, end_time=END, metric_name=metric, namespace='AWS/EC2', statistics='Average', dimensions={'InstanceId': [id]}, unit=unit) if not timeseries: perctile_01 = '' median = '' perctile_95 = '' perctile_99 = '' else: sorted_by_value = sorted(timeseries, key=lambda k: k['Average']) total_length = len(sorted_by_value) perctile_01 = sorted_by_value[int(0.01*total_length)]['Average'] median = sorted_by_value[int(total_length/2)]['Average'] perctile_95 = sorted_by_value[int(0.9*total_length)]['Average'] perctile_99 = sorted_by_value[int(0.99*total_length)]['Average'] # append unit to metric name, unless it's already part of the name metric_key = (metric+unit if unit not in metric else metric) ret[metric_key] = {'1': perctile_01, '50': median, '95': perctile_95, '99': perctile_99} return ret
def main(): ''' ''' setup_logger(environ.get('AWS_SNS_ARN')) config = load_config() checkin_time = time() try: # Rely on boto AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY variables. cw = connect_cloudwatch() except: cw = False while True: try: with db_connect(config['DATABASE_URL']) as conn: task_Q = db_queue(conn, TASK_QUEUE) done_Q = db_queue(conn, DONE_QUEUE) due_Q = db_queue(conn, DUE_QUEUE) pop_task_from_donequeue(done_Q, config['GITHUB_AUTH']) pop_task_from_duequeue(due_Q, config['GITHUB_AUTH']) if time() < checkin_time: continue # Report basic information about current status. task_n, done_n, due_n = map(len, (task_Q, done_Q, due_Q)) _L.info( 'Queue lengths: {task_n} tasks, {done_n} done, {due_n} due' .format(**locals())) if cw: cw.put_metric_data('openaddr.ci', 'tasks queue', task_n, unit='Count') cw.put_metric_data('openaddr.ci', 'done queue', done_n, unit='Count') cw.put_metric_data('openaddr.ci', 'due queue', due_n, unit='Count') checkin_time = time() + 30 except KeyboardInterrupt: raise except: _L.error('Error in dequeue main()', exc_info=True) sleep(5)
def create_alarm(name='test-alarm', metric='CPUUtilization', namespace='AWS/EC2', statistic='Average', comparison='>=', threshold=90, period=500, evaluation_periods=1, unit='Percent', description='Test Alarm', dimensions=None): if dimensions is None: dimensions = {} cw_conn = boto.connect_cloudwatch('us-east') metric_alarm = MetricAlarm( name=name, metric=metric, namespace=namespace, statistic=statistic, comparison=comparison, threshold=threshold, period=period, evaluation_periods=evaluation_periods, unit=unit, description=description, dimensions=dimensions ) alarm_created = cw_conn.put_metric_alarm(metric_alarm) return alarm_created
def probe(self): logging.info('getting stats from cloudwatch') cw = boto.connect_cloudwatch() start_time = datetime.utcnow() - timedelta(minutes=1) end_time = datetime.utcnow() stats = [] stats = cw.get_metric_statistics(60, start_time, end_time, self.metric, self.namespace, self.statistic, self.dimensions) if len(stats) == 0: return [] stat = stats[0] return [nagiosplugin.Metric('cloudwatchmetric', stat[self.statistic], stat['Unit'])]
def test_describe_alarms_for_metric(): conn = boto.connect_cloudwatch() conn.create_alarm(alarm_fixture(name="nfoobar", action="afoobar")) conn.create_alarm(alarm_fixture(name="nfoobaz", action="afoobaz")) conn.create_alarm(alarm_fixture(name="nbarfoo", action="abarfoo")) conn.create_alarm(alarm_fixture(name="nbazfoo", action="abazfoo")) alarms = conn.describe_alarms_for_metric("nbarfoo_metric", "nbarfoo_namespace") alarms.should.have.length_of(1) alarms = conn.describe_alarms_for_metric("nbazfoo_metric", "nbazfoo_namespace") alarms.should.have.length_of(1)
def main(): """ Single threaded worker to serve the job queue. """ args = parser.parse_args() setup_logger(args.sns_arn, log_level=args.loglevel) s3 = S3(args.access_key, args.secret_key, args.bucket) autoscale = connect_autoscale(args.access_key, args.secret_key) cloudwatch = connect_cloudwatch(args.access_key, args.secret_key) github_auth = args.github_token, "x-oauth-basic" next_queue_interval, next_autoscale_interval = 60, 43200 try: with db_connect(args.database_url) as conn: task_Q = db_queue(conn, TASK_QUEUE) next_queue_report = time() + next_queue_interval next_autoscale_grow = time() + next_autoscale_interval minimum_capacity = count(1) with task_Q as db: run_times = get_batch_run_times(db, args.owner, args.repository) sources = find_batch_sources(args.owner, args.repository, github_auth, run_times) with task_Q as db: new_set = add_set(db, args.owner, args.repository) for expected_count in enqueue_sources(task_Q, new_set, sources): if time() >= next_queue_report: next_queue_report, n = time() + next_queue_interval, len(task_Q) args = n, "s" if n != 1 else "", expected_count _L.debug("Task queue has {} item{}, {} sources expected".format(*args)) try: if time() >= next_autoscale_grow: next_autoscale_grow = time() + next_autoscale_interval set_autoscale_capacity(autoscale, cloudwatch, next(minimum_capacity)) except Exception as e: _L.error("Problem during autoscale", exc_info=True) if expected_count: sleep(5) with task_Q as db: _L.debug("Rendering that shit") render_set_maps(s3, db, new_set) except: _L.error("Error in worker main()", exc_info=True) return 1 else: return 0
def run(self): end = datetime.datetime.utcnow() start = end - datetime.timedelta(minutes=5) try: self.cw = boto.connect_cloudwatch(self.identity, self.secret, validate_certs=False) self.rds = None regions = boto.rds.regions() for r in regions: if r.name == self.region: self.rds = boto.connect_rds(self.identity, self.secret, region=r, validate_certs=False) except Exception, e: print "Boto Error: %s" % (e,) sys.exit(1)
def get_cloudwatch_conn(self): """This function returns the Cloudwatch connection object """ if self.cloudwatch_conn: return self.cloudwatch_conn else: import boto try: self.cloudwatch_conn = boto.connect_cloudwatch( self.api_key, self.api_secret) return self.cloudwatch_conn except: print "Unable to get the cloudwatch connection, Wrong API KEY, SECRET combination" return None
def test_fetch_alarms_for_load_balancer(self): elb_name = 'test_elb' alarm_kwargs = dict( metric='RequestCount', namespace='AWS/ELB', statistic='Sum', unit=None, dimensions={'LoadBalancerName': elb_name}, ) self.create_alarm(**alarm_kwargs) cw_conn = boto.connect_cloudwatch('us-east') elb_alarms = Alarm.get_alarms_for_resource( elb_name, dimension_key='LoadBalancerName', cw_conn=cw_conn) self.assertEqual(len(elb_alarms), 1)
def setUpCloudWatch(self, instance_ids, env="stg"): alarm = MetricAlarm(name="servergmsextender_CloudWatchAlarm" + env, namespace="AWS/EC2", metric="CPUUtilization", comparison=">=", threshold="90", evaluation_periods=1, statistic="Average", period=300, dimensions={'InstanceId': instance_ids}, alarm_actions=['arn:alarm'], ok_actions=['arn:ok']) watch_conn = boto.connect_cloudwatch() watch_conn.put_metric_alarm(alarm)
def get_cloudwatch_conn(self): """This function returns the Cloudwatch connection object """ if self.cloudwatch_conn: return self.cloudwatch_conn else: import boto try: self.cloudwatch_conn = boto.connect_cloudwatch(self.api_key, self.api_secret) return self.cloudwatch_conn except: print "Unable to get the cloudwatch connection, Wrong API KEY, SECRET combination" return None
def get_instance_metric(start, end, step_size, metric_name, instance_id, region_name="us-west-1"): """A wrapper function for getting the values of a metric for an instance over a time period. :param start: start time of observation. :type start: datetime. :param end: end time of observation. :type end: datetime. :param step_size: the length of each period :type step_size: int. :param metric_name: the metric's name. :type metric_name: str :param instance_id: the instance observerd id. :type instance_id: int :param region_name: the region's name. :type region_name: str :returns: datapoints -- datapoints of the metric values. """ #Get RegionInfo object from region name cloudwatch_regions=boto.ec2.cloudwatch.regions() for r in cloudwatch_regions: if r.name == region_name: region = r break # Connect to aws cloudwatch conn = boto.connect_cloudwatch(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, region=region) # The statistic to request i.e. Average, Sum, Maximum, Minimum or SampleCount operations_type = ['Average', 'Maximum', 'Minimum'] # This is the dimensions to request from CloudWatch # To get data for a specific instance replace instanceId for the instance you're looking for # Detailed monitoring adds additional dimensions including AmiId, InstanceType dimensions = { 'InstanceId':instance_id } # Get unit based on metric_name unit = UNIT_OF_METRIC[metric_name] # Get namespace based on metric_name namespace = NAMESPACE_OF_METRIC[metric_name] # This wil request data for for the period, for the time of start to end, and datapoints = conn.get_metric_statistics(step_size, start, end, metric_name, namespace, operations_type, dimensions, unit); return datapoints
def main(): ''' ''' setup_logger(environ.get('AWS_SNS_ARN'), None) config = load_config() checkin_time = time() try: # Rely on boto environment. cw = connect_cloudwatch() except: cw = False while True: try: with db_connect(config['DATABASE_URL']) as conn: task_Q = db_queue(conn, TASK_QUEUE) done_Q = db_queue(conn, DONE_QUEUE) due_Q = db_queue(conn, DUE_QUEUE) beat_Q = db_queue(conn, HEARTBEAT_QUEUE) pop_task_from_donequeue(done_Q, config['GITHUB_AUTH']) pop_task_from_duequeue(due_Q, config['GITHUB_AUTH']) flush_heartbeat_queue(beat_Q) if time() < checkin_time: continue # Report basic information about current status. with beat_Q as db: workers_n = len(get_recent_workers(db)) task_n, done_n, due_n = map(len, (task_Q, done_Q, due_Q)) _L.info('{workers_n} active workers; queue lengths: {task_n} tasks, {done_n} done, {due_n} due'.format(**locals())) if cw: ns = environ.get('AWS_CLOUDWATCH_NS') cw.put_metric_data(ns, 'tasks queue', task_n, unit='Count') cw.put_metric_data(ns, 'done queue', done_n, unit='Count') cw.put_metric_data(ns, 'due queue', due_n, unit='Count') cw.put_metric_data(ns, 'expected results', task_n + workers_n, unit='Count') cw.put_metric_data(ns, 'active workers', workers_n, unit='Count') checkin_time = time() + 30 except KeyboardInterrupt: raise except: _L.error('Error in dequeue main()', exc_info=True) sleep(2)
def run(self): try: logger.debug("Starting CloudWatch sender process.") connection = boto.connect_cloudwatch(self._aws_access_key, self._aws_secret_key) except: logger.exception("Failed to connect to CloudWatch.") self._metrics.enable_deprecated() while True: metrics = { "name": [], "value": [], "unit": [], "timestamp": [], "dimensions": [], } metric = self._metrics.get_deprecated() append_metric(metrics, metric) while len(metrics["name"]) < MAX_BATCH_METRICS: try: metric = self._metrics.get_nowait_deprecated() append_metric(metrics, metric) except Empty: break try: connection.put_metric_data(self._namespace, **metrics) logger.debug("Sent %d CloudWatch metrics", len(metrics["name"])) except: for i in range(len(metrics["name"])): self._metrics.put_deprecated( metrics["name"][i], metrics["value"][i], unit=metrics["unit"][i], dimensions=metrics["dimensions"][i], timestamp=metrics["timestamp"][i], ) logger.exception("Failed to write to CloudWatch: %s", metrics) logger.debug("Attempted to requeue %d metrics.", len(metrics["name"])) # random int between 1/2 and 1 1/2 of FAILED_SEND_SLEEP duration sleep_secs = random.randint(FAILED_SEND_SLEEP_SECS / 2, 3 * FAILED_SEND_SLEEP_SECS / 2) time.sleep(sleep_secs)
def getStuff(): c = boto.connect_cloudwatch(aws_access_key_id=config.access_key,aws_secret_access_key=config.secret_key) networkIn = c.list_metrics(metric_name="NetworkIn")[0] end = datetime.datetime.now() start = end - datetime.timedelta(days=31) derp = [] for item in networkIn.query(start,end,'Sum','Bytes',period=3600): item['Timestamp'] = str(item['Timestamp']) derp.append(item) derp.sort(key=lambda r: r['Timestamp']) return json.dumps(derp)
def test_put_metric_data(): conn = boto.connect_cloudwatch() conn.put_metric_data( namespace='tester', name='metric', value=1.5, dimensions={'InstanceId': ['i-0123456,i-0123457']}, ) metrics = conn.list_metrics() metrics.should.have.length_of(1) metric = metrics[0] metric.namespace.should.equal('tester') metric.name.should.equal('metric') dict(metric.dimensions).should.equal({'InstanceId': ['i-0123456,i-0123457']})
def test_delete_alarm(): conn = boto.connect_cloudwatch() alarms = conn.describe_alarms() alarms.should.have.length_of(0) alarm = alarm_fixture() conn.create_alarm(alarm) alarms = conn.describe_alarms() alarms.should.have.length_of(1) alarms[0].delete() alarms = conn.describe_alarms() alarms.should.have.length_of(0)
def get_elb_stats(name, metric, minutes=60, period=60): print '%s for %s for the last %dm (bucket: %ds):' % (metric, name, minutes, period) try: c = boto.connect_cloudwatch(aws_access_key_id=creds[0], aws_secret_access_key=creds[1]) end = datetime.datetime.utcnow() start = end - datetime.timedelta(minutes=minutes) stats = c.get_metric_statistics(period, start, end, metric, 'AWS/ELB', 'Sum', MyInstances('LoadBalancerName', name)) for stat in stats: print '\t%s: %f' % (stat[u'Timestamp'], stat[u'Sum']) except BotoServerError, error: print >> sys.stderr, 'Boto API error: ', error
def test_put_metric_data(): conn = boto.connect_cloudwatch() conn.put_metric_data( namespace="tester", name="metric", value=1.5, dimensions={"InstanceId": ["i-0123456,i-0123457"]}, ) metrics = conn.list_metrics() metrics.should.have.length_of(1) metric = metrics[0] metric.namespace.should.equal("tester") metric.name.should.equal("metric") dict(metric.dimensions).should.equal({"InstanceId": ["i-0123456,i-0123457"]})
def setUpCloudWatchWithWrongConfig(self, instance_ids, env="stg"): alarm = MetricAlarm( name = "servergmsextender_CloudWatchAlarm" + env, namespace = "AWS/EC2", metric = "CPUUtilization", comparison = "GreaterThanThreshold", # wrong configuration that would generate error. threshold = "90", evaluation_periods = 1, statistic = "Average", period = 300, dimensions = {'InstanceId': instance_ids}, alarm_actions=['arn:alarm'], ok_actions=['arn:ok'] ) watch_conn = boto.connect_cloudwatch() watch_conn.put_metric_alarm(alarm)
def setUpCloudWatch(self, instance_ids, env="stg"): alarm = MetricAlarm( name = "servergmsextender_CloudWatchAlarm" + env, namespace = "AWS/EC2", metric = "CPUUtilization", comparison = ">=", threshold = "90", evaluation_periods = 1, statistic = "Average", period = 300, dimensions = {'InstanceId': instance_ids}, alarm_actions=['arn:alarm'], ok_actions=['arn:ok'] ) watch_conn = boto.connect_cloudwatch() watch_conn.put_metric_alarm(alarm)
def getStuff(): c = boto.connect_cloudwatch(aws_access_key_id=config.access_key, aws_secret_access_key=config.secret_key) networkIn = c.list_metrics(metric_name="NetworkIn")[0] end = datetime.datetime.now() start = end - datetime.timedelta(days=31) derp = [] for item in networkIn.query(start, end, 'Sum', 'Bytes', period=3600): item['Timestamp'] = str(item['Timestamp']) derp.append(item) derp.sort(key=lambda r: r['Timestamp']) return json.dumps(derp)
def test_put_metric_data(): conn = boto.connect_cloudwatch() conn.put_metric_data( namespace='tester', name='metric', value=1.5, dimensions={'InstanceId': ['i-0123456,i-0123457']}, ) metrics = conn.list_metrics() metrics.should.have.length_of(1) metric = metrics[0] metric.namespace.should.equal('tester') metric.name.should.equal('metric') dict(metric.dimensions).should.equal( {'InstanceId': ['i-0123456,i-0123457']})
def get_rds_stats(step, start_time, end_time, metric, indentifier): """Function for fetching RDS statistics from CloudWatch""" cw = boto.connect_cloudwatch() result = cw.get_metric_statistics(step, start_time, end_time, metric, 'AWS/RDS', 'Average', dimensions={'DBInstanceIdentifier': [indentifier]}) if result: if len(result) > 1: # Get the last point result = sorted(result, key=lambda k: k['Timestamp']) result.reverse() result = float('%.2f' % result[0]['Average']) return result
def update_alarm(alarm_name, description): conn = boto.connect_cloudwatch() def get_alarm(): alarms = conn.describe_alarms(alarm_names=[alarm_name]) if not alarms: raise Exception("Alarm '%s' not found" % alarm_name) return alarms[0] alarm = get_alarm() # work around boto comparison serialization issue # https://github.com/boto/boto/issues/1311 alarm.comparison = alarm._cmp_map.get(alarm.comparison) print(alarm.name + " : " + alarm.description) alarm.description = "this is " + alarm.name conn.update_alarm(alarm)
def aws_cost_count(app_name, aws_access_key, aws_secret_access_key, result): #boto接続 conn = boto.connect_cloudwatch(aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_access_key) #時間を設定 #何日前(select_days)の何時間(select_hours)何分前(select_minutes) time_now = datetime.datetime.utcnow() - datetime.timedelta(hours=4) time_start = time_now - datetime.timedelta(days=result["--day"], hours=result["--hour"], minutes=result["--minutes"]) time_end = time_start + datetime.timedelta(hours=4) service_cost_list = [] print time_start print time_end #app_cost for aws_service in aws_service_list: app_service_result = conn.get_metric_statistics( dimensions={ 'ServiceName': aws_service, 'Currency': 'USD' }, metric_name='EstimatedCharges', namespace='AWS/Billing', statistics='Maximum', start_time=time_start, end_time=time_end, period=60, unit='None') if len(app_service_result) == 0: service_cost = 0 else: service_cost = app_service_result[0]['Maximum'] print aws_service, int(service_cost) service_cost_list.append(service_cost) # google spreadsheetへ書き込み sheet_name = app_name + "_cost" spreadsheets.update_sheet(sheet_name, app_name, service_cost_list, time_end)
def main(urls): # params = { # 'region': 'us-east-1', # 'aws_access_key_id': ACCESS_KEY, # 'aws_secret_access_key': SECRET_KEY # } route = boto.connect_route53() # route = boto.connect_route53(params) cw = boto.connect_cloudwatch() # cw = boto.connect_cloudwatch(params) url_to_healthcheck = {} for url in urls: url_to_healthcheck[url] = create_healthcheck(route, url) for url in url_to_healthcheck: create_cloudwatch_alarm(cw, url, url_to_healthcheck[url])
def __init__(self, programs, groups, any, sendmail, email, rpc, aws_access_key, aws_secret_key): self.programs = programs self.groups = groups self.any = any self.sendmail = sendmail self.email = email self.rpc = rpc self.stdin = sys.stdin self.stdout = sys.stdout self.stderr = sys.stderr self.pscommand = 'ps -orss= -p %s' self.mailed = False # for unit tests self.aws_access_key = aws_access_key self.aws_secret_key = aws_secret_key self._hostname = socket.gethostname() self.cloudwatch = None if self.aws_access_key and self.aws_secret_key: self.cloudwatch = boto.connect_cloudwatch(self.aws_access_key, self.aws_secret_key)
def main(): sns = boto.connect_sns() raw_input("Going to create the alpha and beta topics. Enter to continue") sns.create_topic("Alpha1") sns.create_topic("Beta1") raw_input("Alpha and Beta topics made. Here is the list of topics.") dic = sns.get_all_topics( )['ListTopicsResponse']['ListTopicsResult']['Topics'] print dic showTopics(dic) delete = raw_input( "I am now going to delete the Beta topic. Copy beta and enter it here: " ) sns.delete_topic(delete) arn = raw_input("Beta was deleted here is the new list of topics.") dic = sns.get_all_topics( )['ListTopicsResponse']['ListTopicsResult']['Topics'] showTopics(dic) arn = raw_input( "We are now going to subscribe to the alpha topic. Copy and paste alpha here: " ) sns.subscribe(arn, "email", "*****@*****.**") print arn sns.subscribe(arn, "email-json", "*****@*****.**") sns.subscribe(arn, "http", "http://cloud.comtor.org/csc470logger/logger") raw_input( "There should now be 3 subscriptions added to the topic. Go check in console if needed. \nHere are the detials" ) printinfo(arn) raw_input("I am now going to change the display name for the topic.") sns.set_topic_attributes(arn, "DisplayName", "NewName") raw_input("Name change made. The new information is... ") printinfo(arn) raw_input( "I am now going to send a message to all of those who have subscribed." ) print sns.publish(arn, "Hello classmates. What is int?") raw_input("Message sent. Please check mail.") raw_input("We are now going to make a cloud watch alarm.") cw = boto.connect_cloudwatch() myMetric = cw.list_metrics()[0] print cw.describe_alarms()[0] cw.create_alarm(cw.describe_alarms()[0])
def retrieve_stats(): c = boto.connect_cloudwatch() end = datetime.datetime.now() start = end - datetime.timedelta(minutes=3) dimension = {'DBInstanceIdentifier': options.identifier} stats = c.get_metric_statistics( 60, start, end, options.metric, 'AWS/RDS', 'Average', dimension) if len(stats) > 0: # sort datapoints based on its timestamp stats_sorted = sorted(stats, key=lambda stat: stat['Timestamp']) print stats_sorted[-1]['Average'] else: sys.exit("ZBX_UNSUPPORTED")
def make_sleepy(self,parameters, instance_id): print "Making instance", instance_id, "sleepy..." credentials = parameters[self.PARAM_CREDENTIALS] ec2 = boto.connect_cloudwatch(str(credentials['EC2_ACCESS_KEY']),str(credentials['EC2_SECRET_KEY'])) region = "us-east-1" terminate_arn = 'arn:aws:automate:{0}:ec2:terminate'.format(region) alarm_name = 'ec2_shutdown_sleepy_{0}'.format(instance_id) # define our alarm to terminate the instance if it gets sleepy # i.e. if CPU utilisation is less than 10% for 1 x 4 hr intervals sleepy_alarm = MetricAlarm( name=alarm_name, namespace='AWS/EC2', metric='CPUUtilization', statistic='Average', comparison='<', threshold='10', period='3600', evaluation_periods=4, alarm_actions=[terminate_arn], dimensions={'InstanceId':instance_id}) # create the alarm.. Zzzz! ec2.create_alarm(sleepy_alarm)
def make_sleepy(self, parameters, instance_id, period='3600'): print "Making instance", instance_id, "sleepy..." credentials = parameters[self.PARAM_CREDENTIALS] ec2 = boto.connect_cloudwatch(str(credentials['EC2_ACCESS_KEY']), str(credentials['EC2_SECRET_KEY'])) region = "us-east-1" terminate_arn = 'arn:aws:automate:{0}:ec2:terminate'.format(region) alarm_name = 'ec2_shutdown_sleepy_{0}'.format(instance_id) # define our alarm to terminate the instance if it gets sleepy # i.e. if CPU utilisation is less than 10% for 1 x 4 hr intervals sleepy_alarm = MetricAlarm( name=alarm_name, namespace='AWS/EC2', metric='CPUUtilization', statistic='Average', comparison='<', threshold='10', period=period, evaluation_periods=4, alarm_actions=[terminate_arn], dimensions={'InstanceId': instance_id}) # create the alarm.. Zzzz! ec2.put_metric_alarm(sleepy_alarm)
def main(): ''' ''' setup_logger(environ.get('AWS_SNS_ARN')) config = load_config() checkin_time = time() try: # Rely on boto AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY variables. cw = connect_cloudwatch() except: cw = False while True: try: with db_connect(config['DATABASE_URL']) as conn: task_Q = db_queue(conn, TASK_QUEUE) done_Q = db_queue(conn, DONE_QUEUE) due_Q = db_queue(conn, DUE_QUEUE) pop_task_from_donequeue(done_Q, config['GITHUB_AUTH']) pop_task_from_duequeue(due_Q, config['GITHUB_AUTH']) if time() < checkin_time: continue # Report basic information about current status. task_n, done_n, due_n = map(len, (task_Q, done_Q, due_Q)) _L.info('Queue lengths: {task_n} tasks, {done_n} done, {due_n} due'.format(**locals())) if cw: cw.put_metric_data('openaddr.ci', 'tasks queue', task_n, unit='Count') cw.put_metric_data('openaddr.ci', 'done queue', done_n, unit='Count') cw.put_metric_data('openaddr.ci', 'due queue', due_n, unit='Count') checkin_time = time() + 30 except KeyboardInterrupt: raise except: _L.error('Error in dequeue main()', exc_info=True) sleep(5)