class ImageAgeFilter(AgeFilter, LaunchConfigFilterBase): """Filter asg by image age.""" date_attribute = "CreationDate" schema = type_schema('image-age', op={ 'type': 'string', 'enum': OPERATORS.keys() }, days={'type': 'number'}) def process(self, asgs, event=None): self.initialize(asgs) return super(ImageAgeFilter, self).process(asgs, event) def initialize(self, asgs): from c7n.resources.ami import AMI super(ImageAgeFilter, self).initialize(asgs) image_ids = set() for cfg in self.configs.values(): image_ids.add(cfg['ImageId']) results = AMI(self.manager.ctx, {}).resources() self.images = {i['ImageId']: i for i in results} def get_resource_date(self, i): cfg = self.configs[i['LaunchConfigurationName']] ami = self.images.get(cfg['ImageId'], {}) return parse(ami.get(self.date_attribute, "2000-01-01T01:01:01.000Z"))
class LaunchConfigAge(AgeFilter): date_attribute = "CreatedTime" schema = type_schema( 'age', op={'type': 'string', 'enum': OPERATORS.keys()}, days={'type': 'number'})
class ImageAge(AgeFilter, InstanceImageBase): """EC2 AMI age filter Filters EC2 instances based on the age of their AMI image (in days) :Example: .. code-block: yaml policies: - name: ec2-ancient-ami resource: ec2 filters: - type: image-age op: ge days: 90 """ date_attribute = "CreationDate" schema = type_schema( 'image-age', op={'type': 'string', 'enum': OPERATORS.keys()}, days={'type': 'number'}) def process(self, resources, event=None): self.image_map = self.get_image_mapping(resources) return super(ImageAge, self).process(resources, event) def get_resource_date(self, i): if i['ImageId'] not in self.image_map: # our image is no longer available return parse("2000-01-01T01:01:01.000Z") image = self.image_map[i['ImageId']] return parse(image['CreationDate'])
class ElastiCacheSnapshotAge(AgeFilter): schema = type_schema('age', days={'type': 'number'}, op={ 'type': 'string', 'enum': OPERATORS.keys() }) date_attribute = 'dummy' def get_resource_date(self, snapshot): """ Override superclass method as there is no single snapshot date attribute. """ def to_datetime(v): if not isinstance(v, datetime): v = parse(v) if not v.tzinfo: v = v.replace(tzinfo=tzutc()) return v # Return the earliest of the node snaphot creation times. return min([ to_datetime(ns['SnapshotCreateTime']) for ns in snapshot['NodeSnapshots'] ])
class InstanceAgeFilter(AgeFilter): date_attribute = "LaunchTime" ebs_key_func = operator.itemgetter('AttachTime') schema = type_schema('instance-age', op={ 'type': 'string', 'enum': OPERATORS.keys() }, days={'type': 'number'}) def get_resource_date(self, i): # LaunchTime is basically how long has the instance # been on, use the oldest ebs vol attach time found = False ebs_vols = [ block['Ebs'] for block in i['BlockDeviceMappings'] if 'Ebs' in block ] if not ebs_vols: # Fall back to using age attribute (ephemeral instances) return super(InstanceAgeFilter, self).get_resource_date(i) # Lexographical sort on date ebs_vols = sorted(ebs_vols, key=self.ebs_key_func) return ebs_vols[0]['AttachTime']
class RDSSnapshotAge(AgeFilter): """Filters RDS snapshots based on age (in days) :example: .. code-block: yaml policies: - name: rds-snapshot-expired resource: rds-snapshot filters: - type: age days: 28 op: ge actions: - delete """ schema = type_schema('age', days={'type': 'number'}, op={ 'type': 'string', 'enum': OPERATORS.keys() }) date_attribute = 'SnapshotCreateTime'
class SnapshotAge(AgeFilter): """EBS Snapshot Age Filter Filters an EBS snapshot based on the age of the snapshot (in days) :example: .. code-block: yaml policies: - name: ebs-snapshots-week-old resource: ebs-snapshot filters: - type: age days: 7 op: ge """ schema = type_schema('age', days={'type': 'number'}, op={ 'type': 'string', 'enum': OPERATORS.keys() }) date_attribute = 'StartTime'
class StateTransitionAge(AgeFilter): """Age an instance has been in the given state. .. code-block: yaml policies: - name: ec2-state-running-7-days resource: ec2 filters: - type: state-age op: ge days: 7 """ RE_PARSE_AGE = re.compile("\(.*?\)") # this filter doesn't use date_attribute, but needs to define it # to pass AgeFilter's validate method date_attribute = "dummy" schema = type_schema('state-age', op={ 'type': 'string', 'enum': list(OPERATORS.keys()) }, days={'type': 'number'}) def get_resource_date(self, i): v = i.get('StateTransitionReason') if not v: return None dates = self.RE_PARSE_AGE.findall(v) if dates: return parse(dates[0][1:-1]) return None
class ImageAgeFilter(AgeFilter): date_attribute = "CreationDate" schema = type_schema( 'image-age', op={'type': 'string', 'enum': OPERATORS.keys()}, days={'type': 'number', 'minimum': 0})
class ImageAgeFilter(AgeFilter, LaunchConfigFilterBase): """Filter asg by image age.""" date_attribute = "CreationDate" schema = type_schema('image-age', op={ 'type': 'string', 'enum': OPERATORS.keys() }, days={'type': 'number'}) def process(self, asgs, event=None): self.initialize(asgs) return super(ImageAgeFilter, self).process(asgs, event) def initialize(self, asgs): super(ImageAgeFilter, self).initialize(asgs) image_ids = set() for cfg in self.configs.values(): image_ids.add(cfg['ImageId']) ec2 = local_session(self.manager.session_factory).client('ec2') results = ec2.describe_images(ImageIds=list(image_ids)) self.images = {i['ImageId']: i for i in results['Images']} def get_resource_date(self, i): cfg = self.configs[i['LaunchConfigurationName']] ami = self.images[cfg['ImageId']] return parse(ami[self.date_attribute])
class RedshiftSnapshotAge(AgeFilter): schema = type_schema( 'age', days={'type': 'number'}, op={'type': 'string', 'enum': OPERATORS.keys()}) date_attribute = 'SnapshotCreateTime'
class TagCountFilter(Filter): """Simplify tag counting.. ie. these two blocks are equivalent .. code-block :: yaml - filters: - type: value key: "[length(Tags)][0]" op: gte value: 8 - filters: - type: tag-count value: 8 """ schema = utils.type_schema( 'tag-count', count={'type': 'integer', 'minimum': 0}, op={'enum': OPERATORS.keys()}) def __call__(self, i): count = self.data.get('count', 10) op_name = self.data.get('op', 'gte') op = OPERATORS.get(op_name) tag_count = len([ t['Key'] for t in i.get('Tags', []) if not t['Key'].startswith('aws:')]) return op(tag_count, count)
class ElastiCacheSnapshotAge(AgeFilter): """Filters elasticache snapshots based on their age (in days) :example: .. code-block: yaml policies: - name: elasticache-stale-snapshots resource: cache-snapshot filters: - type: age days: 30 op: ge """ schema = type_schema( 'age', days={'type': 'number'}, op={'type': 'string', 'enum': OPERATORS.keys()}) date_attribute = 'dummy' def get_resource_date(self, snapshot): """ Override superclass method as there is no single snapshot date attribute. """ def to_datetime(v): if not isinstance(v, datetime): v = parse(v) if not v.tzinfo: v = v.replace(tzinfo=tzutc()) return v # Return the earliest of the node snaphot creation times. return min([to_datetime(ns['SnapshotCreateTime']) for ns in snapshot['NodeSnapshots']])
class ImageAgeFilter(AgeFilter): """Filters images based on the age (in days) :example: .. code-block:: yaml policies: - name: ami-remove-launch-permissions resource: ami filters: - type: image-age days: 30 """ date_attribute = "CreationDate" schema = type_schema('image-age', op={ 'type': 'string', 'enum': list(OPERATORS.keys()) }, days={ 'type': 'number', 'minimum': 0 })
def __call__(self, i): count = self.data.get('count', 10) op_name = self.data.get('op', 'gte') op = OPERATORS.get(op_name) tag_count = len([ t['Key'] for t in i.get('Tags', []) if not t['Key'].startswith('aws:')]) return op(tag_count, count)
class UpTimeFilter(AgeFilter): date_attribute = "LaunchTime" schema = type_schema( 'instance-uptime', op={'type': 'string', 'enum': OPERATORS.keys()}, days={'type': 'number'})
class StateTransitionAge(AgeFilter): """Age an instance has been in the given state. """ RE_PARSE_AGE = re.compile("\(.*?\)") # this filter doesn't use date_attribute, but needs to define it to pass AgeFilter's validate method date_attribute = "dummy" schema = type_schema( 'state-age', op={'type': 'string', 'enum': OPERATORS.keys()}, days={'type': 'number'}) def get_resource_date(self, i): v = i.get('StateTransitionReason') if not v: return None return parse(self.RE_PARSE_AGE.findall(v)[0][1:-1])
class ImageAge(AgeFilter, InstanceImageBase): date_attribute = "CreationDate" schema = type_schema( 'image-age', op={'type': 'string', 'enum': OPERATORS.keys()}, days={'type': 'number'}) def process(self, resources, event=None): self.image_map = self.get_image_mapping(resources) return super(ImageAge, self).process(resources, event) def get_resource_date(self, i): if i['ImageId'] not in self.image_map: # our image is no longer available return parse("2000-01-01T01:01:01.000Z") image = self.image_map[i['ImageId']] return parse(image['CreationDate'])
class ImageAgeFilter(AgeFilter, LaunchConfigFilterBase): """Filter asg by image age (in days). :example: .. code-block: yaml policies: - name: asg-older-image resource: asg filters: - type: image-age days: 90 op: ge """ permissions = ("ec2:DescribeImages", "autoscaling:DescribeLaunchConfigurations") date_attribute = "CreationDate" schema = type_schema('image-age', op={ 'type': 'string', 'enum': list(OPERATORS.keys()) }, days={'type': 'number'}) def process(self, asgs, event=None): self.initialize(asgs) return super(ImageAgeFilter, self).process(asgs, event) def initialize(self, asgs): super(ImageAgeFilter, self).initialize(asgs) image_ids = set() for cfg in self.configs.values(): image_ids.add(cfg['ImageId']) results = self.manager.get_resource_manager('ami').resources() self.images = {i['ImageId']: i for i in results} def get_resource_date(self, i): cfg = self.configs[i['LaunchConfigurationName']] ami = self.images.get(cfg['ImageId'], {}) return parse(ami.get(self.date_attribute, "2000-01-01T01:01:01.000Z"))
class InstanceAgeFilter(AgeFilter): """Filters instances based on their age (in days) :Example: .. code-block: yaml policies: - name: ec2-30-days-plus resource: ec2 filters: - type: instance-age op: ge days: 30 """ date_attribute = "LaunchTime" ebs_key_func = operator.itemgetter('AttachTime') schema = type_schema('instance-age', op={ 'type': 'string', 'enum': list(OPERATORS.keys()) }, days={'type': 'number'}, hours={'type': 'number'}, minutes={'type': 'number'}) def get_resource_date(self, i): # LaunchTime is basically how long has the instance # been on, use the oldest ebs vol attach time ebs_vols = [ block['Ebs'] for block in i['BlockDeviceMappings'] if 'Ebs' in block ] if not ebs_vols: # Fall back to using age attribute (ephemeral instances) return super(InstanceAgeFilter, self).get_resource_date(i) # Lexographical sort on date ebs_vols = sorted(ebs_vols, key=self.ebs_key_func) return ebs_vols[0]['AttachTime']
class LaunchConfigAge(AgeFilter): """Filter ASG launch configuration by age (in days) :example: .. code-block: yaml policies: - name: asg-launch-config-old resource: launch-config filters: - type: age days: 90 op: ge """ date_attribute = "CreatedTime" schema = type_schema( 'age', op={'type': 'string', 'enum': list(OPERATORS.keys())}, days={'type': 'number'})
class RDSSnapshotAge(AgeFilter): """Filters rds cluster snapshots based on age (in days) :example: .. code-block:: yaml policies: - name: rds-cluster-snapshots-expired resource: rds-cluster-snapshot filters: - type: age days: 30 op: gt """ schema = type_schema( 'age', days={'type': 'number'}, op={'type': 'string', 'enum': list(OPERATORS.keys())}) date_attribute = 'SnapshotCreateTime'
class RedshiftSnapshotAge(AgeFilter): """Filters redshift snapshots based on age (in days) :example: .. code-block: yaml policies: - name: redshift-old-snapshots resource: redshift-snapshot filters: - type: age days: 21 op: gt """ schema = type_schema( 'age', days={'type': 'number'}, op={'type': 'string', 'enum': OPERATORS.keys()}) date_attribute = 'SnapshotCreateTime'
class ImageAge(AgeFilter, InstanceImageBase): """EC2 AMI age filter Filters EC2 instances based on the age of their AMI image (in days) :Example: .. code-block: yaml policies: - name: ec2-ancient-ami resource: ec2 filters: - type: image-age op: ge days: 90 """ date_attribute = "CreationDate" schema = type_schema('image-age', op={ 'type': 'string', 'enum': list(OPERATORS.keys()) }, days={'type': 'number'}) def get_permissions(self): return self.manager.get_resource_manager('ami').get_permissions() def process(self, resources, event=None): self.prefetch_instance_images(resources) return super(ImageAge, self).process(resources, event) def get_resource_date(self, i): image = self.get_instance_image(i) if image: return parse(image['CreationDate']) else: return parse("2000-01-01T01:01:01.000Z")
class MetricsFilter(Filter): """Supports cloud watch metrics filters on resources. Docs on cloud watch metrics - GetMetricStatistics - http://goo.gl/w8mMEY - Supported Metrics - http://goo.gl/n0E0L7 usage:: yaml - name: ec2-underutilized resource: ec2 filters: - type: metric name: CPUUtilization days: 4 period: 86400 value: 30 op: less-than Note periods when a resource is not sending metrics are not part of calculated statistics as in the case of a stopped ec2 instance, nor for resources to new to have existed the entire period. ie. being stopped for an ec2 intsance wouldn't lower the average cpu utilization, nor would Todo - support offhours considerations (just run at night?) - support additional stats, values Use Case / Find Underutilized servers non-inclusive of offhour periods If server has no data for period, its omitted. So a server that's off reports no metrics for the relevant period. """ schema = type_schema( 'metrics', namespace={'type': 'string'}, name={'type': 'string'}, dimensions={'type': 'array', 'items': {'type': 'string'}}, # Type choices statistics={'type': 'string', 'enum': [ 'Average', 'Sum', 'Maximum', 'Minimum', 'SampleCount']}, days={'type': 'number'}, op={'type': 'string', 'enum': OPERATORS.keys()}, value={'type': 'number'}, required=('value', 'name')) MAX_QUERY_POINTS = 50850 MAX_RESULT_POINTS = 1440 # Default per service, for overloaded services like ec2 # we do type specific default namespace annotation # specifically AWS/EBS and AWS/EC2Spot # ditto for spot fleet DEFAULT_NAMESPACE = { 'cloudfront': 'AWS/CloudFront', 'cloudsearch': 'AWS/CloudSearch', 'dynamodb': 'AWS/DynamoDB', 'ecs': 'AWS/ECS', 'elasticache': 'AWS/ElastiCache', 'ec2': 'AWS/EC2', 'elb': 'AWS/ELB', 'elbv2': 'AWS/ApplicationELB', 'emr': 'AWS/EMR', 'es': 'AWS/ES', 'events': 'AWS/Events', 'firehose': 'AWS/Firehose', 'kinesis': 'AWS/Kinesis', 'lambda': 'AWS/Lambda', 'logs': 'AWS/Logs', 'redshift': 'AWS/Redshift', 'rds': 'AWS/RDS', 'route53': 'AWS/Route53', 's3': 'AWS/S3', 'sns': 'AWS/SNS', 'sqs': 'AWS/SQS', } def process(self, resources, event=None): days = self.data.get('days', 14) duration = timedelta(days) self.metric = self.data['name'] self.end = datetime.utcnow() self.start = self.end - duration self.period = int(self.data.get('period', duration.total_seconds())) self.statistics = self.data.get('statistics', 'Average') self.model = self.manager.get_model() self.op = OPERATORS[self.data.get('op', 'less-than')] self.value = self.data['value'] ns = self.data.get('namespace') if not ns: ns = getattr(self.model, 'default_namespace', None) if not ns: ns = self.DEFAULT_NAMESPACE[self.model.service] self.namespace = ns self.log.debug("Querying metrics for %d", len(resources)) matched = [] with self.executor_factory(max_workers=3) as w: futures = [] for resource_set in chunks(resources, 50): futures.append( w.submit(self.process_resource_set, resource_set)) for f in as_completed(futures): if f.exception(): self.log.warning( "CW Retrieval error: %s" % f.exception()) continue matched.extend(f.result()) return matched def get_dimensions(self, resource): return [{'Name': self.model.dimension, 'Value': resource[self.model.dimension]}] def process_resource_set(self, resource_set): client = local_session( self.manager.session_factory).client('cloudwatch') matched = [] for r in resource_set: # if we overload dimensions with multiple resources we get # the statistics/average over those resources. dimensions = self.get_dimensions(r) r['Metrics'] = client.get_metric_statistics( Namespace=self.namespace, MetricName=self.metric, Statistics=[self.statistics], StartTime=self.start, EndTime=self.end, Period=self.period, Dimensions=dimensions)['Datapoints'] if len(r['Metrics']) == 0: continue if self.op(r['Metrics'][0][self.statistics], self.value): matched.append(r) return matched
class CredentialReport(Filter): """Use IAM Credential report to filter users. The IAM Credential report ( https://goo.gl/sbEPtM ) aggregates multiple pieces of information on iam users. This makes it highly efficient for querying multiple aspects of a user that would otherwise require per user api calls. For example if we wanted to retrieve all users with mfa who have never used their password but have active access keys from the last month .. code-block: yaml - name: iam-mfa-active-keys-no-login resource: iam-user filters: - type: credential key: mfa_active value: true - type: credential key: password_last_used value: absent - type: credential key: access_keys.last_used value_type: age value: 30 op: less-than Credential Report Transforms We perform some default transformations from the raw credential report. Sub-objects (access_key_1, cert_2) are turned into array of dictionaries for matching purposes with their common prefixes stripped. N/A values are turned into None, TRUE/FALSE are turned into boolean values. """ schema = type_schema( 'credential', value_type={ 'type': 'string', 'enum': ['age', 'expiration', 'size', 'regex'] }, key={ 'type': 'string', 'title': 'report key to search', 'enum': [ 'user', 'arn', 'user_creation_time', 'password_enabled', 'password_last_used', 'password_last_changed', 'password_next_rotation', 'mfa_active', 'access_keys', 'access_keys.active', 'access_keys.last_used_date', 'access_keys.last_used_region', 'access_keys.last_used_service', 'access_keys.last_rotated', 'certs', 'certs.active', 'certs.last_rotated', ] }, value={ 'oneOf': [{ 'type': 'array' }, { 'type': 'string' }, { 'type': 'boolean' }, { 'type': 'number' }, { 'type': 'null' }] }, op={'enum': OPERATORS.keys()}, report_generate={ 'title': 'Generate a report if none is present.', 'default': True, 'type': 'boolean' }, report_delay={ 'title': 'Number of seconds to wait for report generation.', 'default': 10, 'type': 'number' }, report_max_age={ 'title': 'Number of seconds to consider a report valid.', 'default': 60 * 60 * 24, 'type': 'number' }) list_sub_objects = (('access_key_1_', 'access_keys'), ('access_key_2_', 'access_keys'), ('cert_1_', 'certs'), ('cert_2_', 'certs')) permissions = ('iam:GenerateCredentialReport', 'iam:GetCredentialReport') def get_value_or_schema_default(self, k): if k in self.data: return self.data[k] return self.schema['properties'][k]['default'] def get_credential_report(self): report = self.manager._cache.get('iam-credential-report') if report: return report data = self.fetch_credential_report() report = {} reader = csv.reader(StringIO(data)) headers = reader.next() for line in reader: info = dict(zip(headers, line)) report[info['user']] = self.process_user_record(info) self.manager._cache.save('iam-credential-report', report) return report @classmethod def process_user_record(cls, info): """Type convert the csv record, modifies in place.""" keys = info.keys() # Value conversion for k in keys: v = info[k] if v in ('N/A', 'no_information'): info[k] = None elif v == 'false': info[k] = False elif v == 'true': info[k] = True # Object conversion for p, t in cls.list_sub_objects: obj = dict([(k[len(p):], info.pop(k)) for k in keys if k.startswith(p)]) if obj.get('active', False): info.setdefault(t, []).append(obj) return info def fetch_credential_report(self): client = local_session(self.manager.session_factory).client('iam') try: report = client.get_credential_report() except ClientError as e: if e.response['Error']['Code'] != 'ReportNotPresent': raise report = None if report: threshold = datetime.datetime.now(tz=tzutc()) - timedelta( seconds=self.get_value_or_schema_default('report_max_age')) if not report['GeneratedTime'].tzinfo: threshold = threshold.replace(tzinfo=None) if report['GeneratedTime'] < threshold: report = None if report is None: if not self.get_value_or_schema_default('report_generate'): raise ValueError("Credential Report Not Present") client.generate_credential_report() time.sleep(self.get_value_or_schema_default('report_delay')) report = client.get_credential_report() return report['Content'] def process(self, resources, event=None): if '.' in self.data['key']: self.matcher_config = dict(self.data) self.matcher_config['key'] = self.data['key'].split('.', 1)[1] return [] def match(self, info): if info is None: return False k = self.data.get('key') if '.' not in k: vf = ValueFilter(self.data) vf.annotate = False return vf(info) prefix, sk = k.split('.', 1) vf = ValueFilter(self.matcher_config) vf.annotate = False for v in info.get(prefix, ()): if vf.match(v): return True
class MetricsFilter(Filter): """Supports cloud watch metrics filters on resources. All resources that have cloud watch metrics are supported. Docs on cloud watch metrics - GetMetricStatistics - http://goo.gl/w8mMEY - Supported Metrics - http://goo.gl/n0E0L7 .. code-block:: yaml - name: ec2-underutilized resource: ec2 filters: - type: metric name: CPUUtilization days: 4 period: 86400 value: 30 op: less-than Note periods when a resource is not sending metrics are not part of calculated statistics as in the case of a stopped ec2 instance, nor for resources to new to have existed the entire period. ie. being stopped for an ec2 intsance wouldn't lower the average cpu utilization, nor would """ schema = type_schema( 'metrics', **{'namespace': {'type': 'string'}, 'name': {'type': 'string'}, 'dimensions': {'type': 'array', 'items': {'type': 'string'}}, # Type choices 'statistics': {'type': 'string', 'enum': [ 'Average', 'Sum', 'Maximum', 'Minimum', 'SampleCount']}, 'days': {'type': 'number'}, 'op': {'type': 'string', 'enum': OPERATORS.keys()}, 'value':{'type': 'number'}, 'period': {'type': 'number'}, 'percent-attr': {'type': 'string'}, 'required': ('value', 'name')}) permissions = ("cloudwatch:GetMetricStatistics",) MAX_QUERY_POINTS = 50850 MAX_RESULT_POINTS = 1440 # Default per service, for overloaded services like ec2 # we do type specific default namespace annotation # specifically AWS/EBS and AWS/EC2Spot # ditto for spot fleet DEFAULT_NAMESPACE = { 'cloudfront': 'AWS/CloudFront', 'cloudsearch': 'AWS/CloudSearch', 'dynamodb': 'AWS/DynamoDB', 'ecs': 'AWS/ECS', 'elasticache': 'AWS/ElastiCache', 'ec2': 'AWS/EC2', 'elb': 'AWS/ELB', 'elbv2': 'AWS/ApplicationELB', 'emr': 'AWS/EMR', 'es': 'AWS/ES', 'events': 'AWS/Events', 'firehose': 'AWS/Firehose', 'kinesis': 'AWS/Kinesis', 'lambda': 'AWS/Lambda', 'logs': 'AWS/Logs', 'redshift': 'AWS/Redshift', 'rds': 'AWS/RDS', 'route53': 'AWS/Route53', 's3': 'AWS/S3', 'sns': 'AWS/SNS', 'sqs': 'AWS/SQS', } def process(self, resources, event=None): days = self.data.get('days', 14) duration = timedelta(days) self.metric = self.data['name'] self.end = datetime.utcnow() self.start = self.end - duration self.period = int(self.data.get('period', duration.total_seconds())) self.statistics = self.data.get('statistics', 'Average') self.model = self.manager.get_model() self.op = OPERATORS[self.data.get('op', 'less-than')] self.value = self.data['value'] ns = self.data.get('namespace') if not ns: ns = getattr(self.model, 'metrics_namespace', None) if not ns: ns = self.DEFAULT_NAMESPACE[self.model.service] self.namespace = ns self.log.debug("Querying metrics for %d", len(resources)) matched = [] with self.executor_factory(max_workers=3) as w: futures = [] for resource_set in chunks(resources, 50): futures.append( w.submit(self.process_resource_set, resource_set)) for f in as_completed(futures): if f.exception(): self.log.warning( "CW Retrieval error: %s" % f.exception()) continue matched.extend(f.result()) return matched def get_dimensions(self, resource): return [{'Name': self.model.dimension, 'Value': resource[self.model.dimension]}] def process_resource_set(self, resource_set): client = local_session( self.manager.session_factory).client('cloudwatch') matched = [] for r in resource_set: # if we overload dimensions with multiple resources we get # the statistics/average over those resources. dimensions = self.get_dimensions(r) collected_metrics = r.setdefault('c7n.metrics', {}) # Note this annotation cache is policy scoped, not across # policies, still the lack of full qualification on the key # means multiple filters within a policy using the same metric # across different periods or dimensions would be problematic. key = "%s.%s.%s" % (self.namespace, self.metric, self.statistics) if key not in collected_metrics: collected_metrics[key] = client.get_metric_statistics( Namespace=self.namespace, MetricName=self.metric, Statistics=[self.statistics], StartTime=self.start, EndTime=self.end, Period=self.period, Dimensions=dimensions)['Datapoints'] if len(collected_metrics[key]) == 0: continue if self.data.get('percent-attr'): percent = (collected_metrics[key][0][self.statistics] / r[self.data.get('percent-attr')]) * 100 if self.op(percent, self.value): matched.append(r) if self.op(collected_metrics[key][0][self.statistics], self.value) and not self.data.get('percent-attr'): matched.append(r) return matched