def test_chunks(self): self.assertEqual( list(utils.chunks(range(100), size=50)), [list(range(50)), list(range(50, 100, 1))], ) self.assertEqual(list(utils.chunks(range(1), size=50)), [[0]]) self.assertEqual( list(utils.chunks(range(60), size=50)), [list(range(50)), list(range(50, 60, 1))], )
def process(self, param_groups): client = local_session(self.manager.session_factory).client('rds') params = [] for param in self.data.get('params', []): params.append({ 'ParameterName': param['name'], 'ParameterValue': param['value'], 'ApplyMethod': param.get('apply-method', 'immediate'), }) for param_group in param_groups: name = self.get_pg_name(param_group) # Fetch the existing parameters for this DB, so we only try to change the ones that are # different. cur_params = self.get_current_params(client, name) changed_params = [] for param in params: param_name = param['ParameterName'] if (param_name not in cur_params or cur_params[param_name]['ParameterValue'] != param['ParameterValue']): changed_params.append(param) # Can only do 20 elements at a time per docs, so if we have more than that we will # break it into multiple requests: https://goo.gl/Z6oGNv for param_set in chunks(changed_params, 5): self.do_modify(client, name, param_set) self.log.info('Modified RDS parameter group %s (%i parameters changed, %i unchanged)', name, len(changed_params), len(params) - len(changed_params))
def flush(self): buf = self.buf self.buf = [] for segment_set in utils.chunks(buf, 50): self.client.put_trace_segments( TraceSegmentDocuments=[ s.serialize() for s in segment_set])
def process(self, resources): # Legacy msg = self.data.get('msg') msg = self.data.get('value') or msg tag = self.data.get('tag', DEFAULT_TAG) tag = self.data.get('key') or tag # Support setting multiple tags in a single go with a mapping tags = self.data.get('tags') if tags is None: tags = [] else: tags = [{'Key': k, 'Value': v} for k, v in tags.items()] if msg: tags.append({'Key': tag, 'Value': msg}) batch_size = self.data.get('batch_size', self.batch_size) with self.executor_factory(max_workers=self.concurrency) as w: futures = [] for resource_set in utils.chunks(resources, size=batch_size): futures.append( w.submit(self.process_resource_set, resource_set, tags)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception removing tags: %s on resources:%s \n %s" % ( tags, self.id_key, f.exception()))
def _process_bucket(self, b, p, key_log, w): content_key = self.get_bucket_op(b, 'contents_key') count = 0 for key_set in p: count += len(key_set.get(content_key, [])) # Empty bucket check if not content_key in key_set and not key_set['IsTruncated']: return {'Bucket': b['Name'], 'Remediated': key_log.count, 'Count': count} futures = [] for batch in chunks(key_set.get(content_key, []), size=100): if not batch: continue futures.append(w.submit(self.process_chunk, batch, b)) for f in as_completed(futures): if f.exception(): log.exception("Exception Processing bucket:%s key batch %s" % ( b['Name'], f.exception())) continue r = f.result() if r: key_log.add(r) # Log completion at info level, progress at debug level if key_set['IsTruncated']: log.debug('Scan progress bucket:%s keys:%d remediated:%d ...', b['Name'], count, key_log.count) else: log.info('Scan Complete bucket:%s keys:%d remediated:%d', b['Name'], count, key_log.count) return {'Bucket': b['Name'], 'Remediated': key_log.count, 'Count': count}
def add_tags_to_results(self, client, elbs): """ Gets the tags for the ELBs and adds them to the result set. """ elb_names = [elb['LoadBalancerName'] for elb in elbs] names_to_tags = {} fn = partial(self.process_tags, client=client) futures = [] with self.executor_factory(max_workers=3) as w: # max 20 ELBs per call (API limitation) for elb_names_chunk in chunks(elb_names, size=20): futures.append( w.submit(fn, elb_names_chunk)) for f in as_completed(futures): if f.exception(): self.log.exception("Exception Processing ELB: %s" % ( f.exception())) continue r = f.result() if r: names_to_tags.update(r) for elb in elbs: elb['Tags'] = names_to_tags[elb['LoadBalancerName']]
def process(self, resources, event=None): for batch in utils.chunks(resources, 500): message = {'resources': batch, 'event': event, 'action': self.data, 'policy': self.manager.data} self.send_data_message(message)
def process(self, resources, event=None): if not resources: return resources client = local_session(self.manager.session_factory).client( 'health', region_name='us-east-1') f = self.get_filter_parameters() resource_map = {r[self.manager.get_model().id]: r for r in resources} found = set() seen = set() for resource_set in chunks(resource_map.keys(), 100): f['entityValues'] = resource_set events = client.describe_events(filter=f)['events'] events = [e for e in events if e['arn'] not in seen] entities = self.process_event(events) event_map = {e['arn']: e for e in events} for e in entities: rid = e['entityValue'] if rid not in resource_map: continue resource_map[rid].setdefault( 'c7n:HealthEvent', []).append(event_map[e['eventArn']]) found.add(rid) seen.update(event_map.keys()) return [resource_map[resource_id] for resource_id in found]
def augment(self, resources): client = local_session(self.session_factory).client('health') for resource_set in chunks(resources, 10): event_map = {r['arn']: r for r in resource_set} event_details = client.describe_event_details( eventArns=event_map.keys())['successfulSet'] for d in event_details: event_map[d['event']['arn']][ 'Description'] = d['eventDescription']['latestDescription'] event_arns = [r['arn'] for r in resource_set if r['eventTypeCategory'] != 'accountNotification'] if not event_arns: continue paginator = client.get_paginator('describe_affected_entities') entities = list(itertools.chain( *[p['entities']for p in paginator.paginate( filter={'eventArns': event_arns})])) for e in entities: event_map[e.pop('eventArn')].setdefault( 'AffectedEntities', []).append(e) return resources
def process(self, instances): instances = self._filter_ec2_with_volumes( self.filter_instance_state(instances)) if not len(instances): return client = utils.local_session(self.manager.session_factory).client('ec2') failures = {} # Play nice around aws having insufficient capacity... for itype, t_instances in utils.group_by( instances, 'InstanceType').items(): for izone, z_instances in utils.group_by( t_instances, 'Placement.AvailabilityZone').items(): for batch in utils.chunks(z_instances, self.batch_size): fails = self.process_instance_set(client, batch, itype, izone) if fails: failures["%s %s" % (itype, izone)] = [i['InstanceId'] for i in batch] if failures: fail_count = sum(map(len, failures.values())) msg = "Could not start %d of %d instances %s" % ( fail_count, len(instances), utils.dumps(failures)) self.log.warning(msg) raise RuntimeError(msg)
def _datapipeline_info(pipes, session_factory, executor_factory, retry): def process_tags(pipe_set): client = local_session(session_factory).client('datapipeline') pipe_map = {pipe['id']: pipe for pipe in pipe_set} while True: try: results = retry( client.describe_pipelines, pipelineIds=list(pipe_map.keys())) break except ClientError as e: if e.response['Error']['Code'] != 'PipelineNotFound': raise msg = e.response['Error']['Message'] _, lb_name = msg.strip().rsplit(' ', 1) pipe_map.pop(lb_name) if not pipe_map: results = {'TagDescriptions': []} break continue for pipe_desc in results['pipelineDescriptionList']: pipe = pipe_map[pipe_desc['pipelineId']] pipe['tags'] = pipe_desc['tags'] for field in pipe_desc['fields']: key = field['key'] if not key.startswith('@'): continue pipe[key[1:]] = field['stringValue'] with executor_factory(max_workers=2) as w: return list(w.map(process_tags, chunks(pipes, 20)))
def process(self, resources): if self.data['target_region'] == self.manager.config.region: self.log.warning( "Source and destination region are the same, skipping copy") return for resource_set in chunks(resources, 20): self.process_resource_set(resource_set)
def execute_in_parallel(resources, event, execution_method, executor_factory, log, max_workers=constants.DEFAULT_MAX_THREAD_WORKERS, chunk_size=constants.DEFAULT_CHUNK_SIZE): futures = [] results = [] exceptions = [] if ThreadHelper.disable_multi_threading: try: result = execution_method(resources, event) if result: results.extend(result) except Exception as e: exceptions.append(e) else: with executor_factory(max_workers=max_workers) as w: for resource_set in chunks(resources, chunk_size): futures.append(w.submit(execution_method, resource_set, event)) for f in as_completed(futures): if f.exception(): log.error( "Execution failed with error: %s" % f.exception()) exceptions.append(f.exception()) else: result = f.result() if result: results.extend(result) return results, list(set(exceptions))
def load_manifest_file(client, bucket, schema, versioned, ifilters, key_info): """Given an inventory csv file, return an iterator over keys """ # To avoid thundering herd downloads, we do an immediate yield for # interspersed i/o yield None # Inline these values to avoid the local var lookup, they are constants # rKey = schema['Key'] # 1 # rIsLatest = schema['IsLatest'] # 3 # rVersionId = schema['VersionId'] # 2 with tempfile.NamedTemporaryFile() as fh: client.download_fileobj(Bucket=bucket, Key=key_info['key'], Fileobj=fh) fh.seek(0) reader = csv.reader(gzip.GzipFile(fileobj=fh, mode='r')) for key_set in chunks(reader, 1000): keys = [] for kr in key_set: k = kr[1] if inventory_filter(ifilters, schema, kr): continue k = unquote_plus(k) if versioned: if kr[3] == 'true': keys.append((k, kr[2], True)) else: keys.append((k, kr[2])) else: keys.append(k) yield keys
def process_resource_set(self, resource_set): client = self.manager.session_factory( region=self.data['target_region']).client('ec2') if self.data['target_region'] != self.manager.config.region: cross_region = True params = {} params['Encrypted'] = self.data.get('encrypted', True) if params['Encrypted']: params['KmsKeyId'] = self.data['target_key'] for snapshot_set in chunks(resource_set, 5): for r in snapshot_set: snapshot_id = client.copy_snapshot( SourceRegion=self.manager.config.region, SourceSnapshotId=r['SnapshotId'], Description=r.get('Description', ''), **params)['SnapshotId'] client.create_tags( Resources=[snapshot_id], Tags=r['Tags']) r['CopiedSnapshot'] = snapshot_id if not cross_region or len(snapshot_set) < 5: continue copy_ids = [r['CopiedSnapshot'] for r in snapshot_set] self.log.debug( "Waiting on cross-region snapshot copy %s", ",".join(copy_ids)) waiter = client.get_waiter('snapshot_completed') waiter.wait(SnapshotIds=copy_ids) self.log.debug( "Cross region copy complete %s", ",".join(copy_ids))
def get_elb_bucket_locations(self): session = local_session(self.manager.session_factory) client = session.client('elb') # Try to use the cache if it exists elbs = self.manager._cache.get( {'region': self.manager.config.region, 'resource': 'elb'}) # Sigh, post query refactor reuse, we can't save our cache here # as that resource manager does extra lookups on tags. Not # worth paginating, since with cache usage we have full set in # mem. if elbs is None: p = client.get_paginator('describe_load_balancers') results = p.paginate() elbs = results.build_full_result().get( 'LoadBalancerDescriptions', ()) self.log.info("Queried %d elbs", len(elbs)) else: self.log.info("Using %d cached elbs", len(elbs)) get_elb_attrs = functools.partial( _query_elb_attrs, self.manager.session_factory) with self.executor_factory(max_workers=2) as w: futures = [] for elb_set in chunks(elbs, 100): futures.append(w.submit(get_elb_attrs, elb_set)) for f in as_completed(futures): if f.exception(): log.error("Error while scanning elb log targets: %s" % ( f.exception())) continue for tgt in f.result(): yield tgt
def process(self, resources): # Move this to policy? / no resources bypasses actions? if not len(resources): return msg_tmpl = self.data.get( 'msg', 'Resource does not meet policy: {op}@{action_date}') op = self.data.get('op', 'stop') tag = self.data.get('tag', DEFAULT_TAG) date = self.data.get('days', 4) n = datetime.now(tz=tzutc()) action_date = n + timedelta(days=date) msg = msg_tmpl.format( op=op, action_date=action_date.strftime('%Y/%m/%d')) self.log.info("Tagging %d resources for %s on %s" % ( len(resources), op, action_date.strftime('%Y/%m/%d'))) tags = [{'Key': tag, 'Value': msg}] with self.executor_factory(max_workers=2) as w: futures = [] for resource_set in utils.chunks(resources, size=200): futures.append( w.submit(self.process_resource_set, resource_set, tags)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception tagging resource set: %s \n %s" % ( tags, f.exception()))
def process(self, resources): stopped_instances = self.filter_instance_state( resources, ('stopped',)) running_instances = self.filter_instance_state( resources, ('running',)) if self.data.get('restart') and running_instances: Stop({'terminate-ephemeral': False}, self.manager).process(running_instances) client = utils.local_session( self.manager.session_factory).client('ec2') waiter = client.get_waiter('instance_stopped') try: waiter.wait( InstanceIds=[r['InstanceId'] for r in running_instances]) except ClientError as e: self.log.exception( "Exception stopping instances for resize:\n %s" % e) for instance_set in utils.chunks(itertools.chain( stopped_instances, running_instances), 20): self.process_resource_set(instance_set) if self.data.get('restart') and running_instances: client.start_instances( InstanceIds=[i['InstanceId'] for i in running_instances]) return list(itertools.chain(stopped_instances, running_instances))
def resources(self, query=None): client = local_session(self.manager.session_factory).client('config') paginator = client.get_paginator('list_discovered_resources') pages = paginator.paginate( resourceType=self.manager.get_model().config_type) results = [] with self.manager.executor_factory(max_workers=5) as w: ridents = pager(pages, self.retry) resource_ids = [ r['resourceId'] for r in ridents.get('resourceIdentifiers', ())] self.manager.log.debug( "querying %d %s resources", len(resource_ids), self.manager.__class__.__name__.lower()) for resource_set in chunks(resource_ids, 50): futures = [] futures.append(w.submit(self.get_resources, resource_set)) for f in as_completed(futures): if f.exception(): self.manager.log.error( "Exception getting resources from config \n %s" % ( f.exception())) results.extend(f.result()) return results
def _elb_tags(elbs, session_factory, executor_factory, retry): def process_tags(elb_set): client = local_session(session_factory).client('elb') elb_map = {elb['LoadBalancerName']: elb for elb in elb_set} while True: try: results = retry( client.describe_tags, LoadBalancerNames=elb_map.keys()) break except ClientError as e: if e.response['Error']['Code'] != 'LoadBalancerNotFound': raise msg = e.response['Error']['Message'] _, lb_name = msg.strip().rsplit(' ', 1) elb_map.pop(lb_name) if not elb_map: results = {'TagDescriptions': []} break continue for tag_desc in results['TagDescriptions']: elb_map[tag_desc['LoadBalancerName']]['Tags'] = tag_desc['Tags'] with executor_factory(max_workers=2) as w: list(w.map(process_tags, chunks(elbs, 20)))
def process_account(account_info): """Scan all buckets in an account and schedule processing""" log = logging.getLogger('salactus.bucket-iterator') log.info("processing account %s", account_info) session = get_session(account_info) client = session.client('s3', config=s3config) buckets = client.list_buckets()['Buckets'] connection.hset( 'bucket-accounts', account_info['name'], json.dumps(account_info)) for b in buckets: connection.hset( 'bucket-ages', bucket_id(account_info, b['Name']), b['CreationDate'].isoformat()) account_buckets = account_info.pop('buckets', None) buckets = [n['Name'] for n in buckets if not account_buckets or n['Name'] in account_buckets] account_not_buckets = account_info.pop('not-buckets', None) buckets = [n for n in buckets if not account_not_buckets or n not in account_not_buckets] log.info("processing %d buckets in account %s", len(buckets), account_info['name']) for bucket_set in chunks(buckets, 50): invoke(process_bucket_set, account_info, bucket_set)
def process(self, resources): if self.data['target_region'] == self.manager.config.region: self.log.info( "Source and destination region are the same, skipping") return with self.executor_factory(max_workers=2) as w: list(w.map(self.process_resource_set, chunks(resources, 20)))
def _process_bucket(self, b, p, key_log, w): count = 0 for key_set in p: keys = self.get_keys(b, key_set) count += len(keys) futures = [] for batch in chunks(keys, size=100): if not batch: continue futures.append(w.submit(self.process_chunk, batch, b)) for f in as_completed(futures): if f.exception(): log.exception("Exception Processing bucket:%s key batch %s" % ( b['Name'], f.exception())) continue r = f.result() if r: key_log.add(r) # Log completion at info level, progress at debug level if key_set['IsTruncated']: log.debug('Scan progress bucket:%s keys:%d remediated:%d ...', b['Name'], count, key_log.count) else: log.info('Scan Complete bucket:%s keys:%d remediated:%d', b['Name'], count, key_log.count) b['KeyScanCount'] = count b['KeyRemediated'] = key_log.count return { 'Bucket': b['Name'], 'Remediated': key_log.count, 'Count': count}
def bulk_invoke(func, args, nargs): """Bulk invoke a function via queues Uses internal implementation details of rq. """ # for comparison, simplest thing that works # for i in nargs: # argv = list(args) # argv.append(i) # func.delay(*argv) # some variances between cpy and pypy, sniff detect for closure in func.delay.func_closure: if getattr(closure.cell_contents, 'queue', None): ctx = closure.cell_contents break q = Queue(ctx.queue, connection=connection) argv = list(args) argv.append(None) job = Job.create( func, args=argv, connection=connection, description="bucket-%s" % func.func_name, origin=q.name, status=JobStatus.QUEUED, timeout=ctx.timeout, result_ttl=0, ttl=ctx.ttl) for n in chunks(nargs, 100): job.created_at = datetime.utcnow() with connection.pipeline() as pipe: for s in n: argv[-1] = s job._id = unicode(uuid4()) job.args = argv q.enqueue_job(job, pipeline=pipe) pipe.execute()
def process(self, resources, event=None): client = utils.local_session( self.manager.session_factory).client('lambda') params = dict(FunctionName=self.data['function']) if self.data.get('qualifier'): params['Qualifier'] = self.data['Qualifier'] if self.data.get('async', True): params['InvocationType'] = 'Event' payload = { 'version': VERSION, 'event': event, 'action': self.data, 'policy': self.manager.data} results = [] for resource_set in utils.chunks(resources, self.data.get('batch_size', 250)): payload['resources'] = resource_set params['Payload'] = utils.dumps(payload) result = client.invoke(**params) result['Payload'] = result['Payload'].read() results.append(result) return results
def process(self, resources, event=None): days = self.data.get('days', 14) duration = timedelta(days) self.metric = self.data['name'] self.end = datetime.utcnow() self.start = self.end - duration self.period = int(self.data.get('period', duration.total_seconds())) self.statistics = self.data.get('statistics', 'Average') self.model = self.manager.get_model() self.op = OPERATORS[self.data.get('op', 'less-than')] self.value = self.data['value'] ns = self.data.get('namespace') if not ns: ns = getattr(self.model, 'default_namespace', None) if not ns: ns = self.DEFAULT_NAMESPACE[self.model.service] self.namespace = ns self.log.debug("Querying metrics for %d", len(resources)) matched = [] with self.executor_factory(max_workers=3) as w: futures = [] for resource_set in chunks(resources, 50): futures.append( w.submit(self.process_resource_set, resource_set)) for f in as_completed(futures): if f.exception(): self.log.warning( "CW Retrieval error: %s" % f.exception()) continue matched.extend(f.result()) return matched
def initialize(self, asgs): """Get launch configs for the set of asgs""" config_names = set() for a in asgs: config_names.add(a['LaunchConfigurationName']) session = local_session(self.manager.session_factory) client = session.client('autoscaling') self.configs = {} if len(asgs) > 50 and self.manager._cache.load(): configs = self.manager._cache.get( {'region': self.manager.config.region, 'resource': 'launch-config'}) if configs: self.log.info("Using cached configs") self.configs = { cfg['LaunchConfigurationName']: cfg for cfg in configs} return self.log.debug("querying %d launch configs" % len(config_names)) for cfg_set in chunks(config_names, 50): for cfg in client.describe_launch_configurations( LaunchConfigurationNames=cfg_set)['LaunchConfigurations']: self.configs[cfg['LaunchConfigurationName']] = cfg if len(asgs) > 50: self.manager._cache.save( {'region': self.manager.config.region, 'resource': 'launch-config'}, self.configs.values())
def process(self, resources): client = local_session( self.manager.session_factory).client('cloudwatch') for resource_set in chunks(resources, size=100): self.manager.retry( client.delete_alarms, AlarmNames=[r['AlarmName'] for r in resource_set])
def process_cluster_resources(self, client, cluster_id, services): results = [] for service_set in chunks(services, self.manager.chunk_size): results.extend( client.describe_services( cluster=cluster_id, services=service_set).get('services', [])) return results
def process(self, resources): if self.attr_filter: resources = self.filter_resources(resources) m = self.manager.get_model() session = local_session(self.manager.session_factory) client = self.get_client(session, m) for resource_set in chunks(resources, self.chunk_size): self.process_resource_set(client, m, resource_set)
def create_elb_active_attributes_tuples(self, elb_policy_tuples): """ creates a list of tuples for all attributes that are marked as "true" in the load balancer's polices, e.g. (myelb,['Protocol-SSLv1','Protocol-SSLv2']) """ active_policy_attribute_tuples = [] with self.executor_factory(max_workers=3) as w: futures = [] for elb_policy_set in chunks(elb_policy_tuples, 50): futures.append( w.submit(self.process_elb_policy_set, elb_policy_set)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception processing elb policies \n %s" % (f.exception())) continue for elb_policies in f.result(): active_policy_attribute_tuples.append(elb_policies) return active_policy_attribute_tuples
def bulk_invoke(func, args, nargs): """Bulk invoke a function via queues Uses internal implementation details of rq. """ ctx = func.delay.func_closure[-1].cell_contents q = Queue(ctx.queue, connection=connection) argv = list(args) argv.append(None) job = Job.create( func, args=argv, connection=connection, description="bucket-%s" % func.func_name, origin=q.name, status=JobStatus.QUEUED, timeout=None, result_ttl=500, ttl=ctx.ttl) for n in chunks(nargs, 100): job.created_at = datetime.utcnow() with connection.pipeline() as pipe: for s in n: argv[-1] = s job._id = unicode(uuid4()) job.args = argv q.enqueue_job(job, pipeline=pipe)
def process(self, snapshots): self.image_snapshots = snaps = set() # Be careful re image snapshots, we do this by default # to keep things safe by default, albeit we'd get an error # if we did try to delete something associated to an image. if self.data.get('skip-ami-snapshots', True): # Auto filter ami referenced snapshots, build map c = local_session(self.manager.session_factory).client('ec2') for i in c.describe_images(Owners=['self'])['Images']: for dev in i.get('BlockDeviceMappings'): if 'Ebs' in dev: snaps.add(dev['Ebs']['SnapshotId']) log.info("Deleting %d snapshots", len(snapshots)) with self.executor_factory(max_workers=3) as w: futures = [] for snapshot_set in chunks(reversed(snapshots), size=50): futures.append( w.submit(self.process_snapshot_set, snapshot_set)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception deleting snapshot set \n %s" % ( f.exception()))
def process(self, snapshots): self.image_snapshots = set() # Be careful re image snapshots, we do this by default # to keep things safe by default, albeit we'd get an error # if we did try to delete something associated to an image. pre = len(snapshots) snapshots = list(filter(None, _filter_ami_snapshots(self, snapshots))) post = len(snapshots) log.info("Deleting %d snapshots, auto-filtered %d ami-snapshots", post, pre - post) client = local_session(self.manager.session_factory).client('ec2') with self.executor_factory(max_workers=2) as w: futures = [] for snapshot_set in chunks(reversed(snapshots), size=50): futures.append( w.submit(self.process_snapshot_set, client, snapshot_set)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception deleting snapshot set \n %s" % ( f.exception())) return snapshots
def _describe_route53_tags(model, resources, session_factory, executor_factory, retry): def process_tags(resources): client = local_session(session_factory).client('route53') resource_map = {} for r in resources: k = r[model.id] if "hostedzone" in k: k = k.split("/")[-1] resource_map[k] = r for resource_batch in chunks(list(resource_map.keys()), 10): results = retry(client.list_tags_for_resources, ResourceType=model.arn_type, ResourceIds=resource_batch) for resource_tag_set in results['ResourceTagSets']: if ('ResourceId' in resource_tag_set and resource_tag_set['ResourceId'] in resource_map): resource_map[resource_tag_set['ResourceId']][ 'Tags'] = resource_tag_set['Tags'] with executor_factory(max_workers=2) as w: return list(w.map(process_tags, chunks(resources, 20)))
def get_elb_bucket_locations(self): session = local_session(self.manager.session_factory) client = session.client('elb') # Try to use the cache if it exists elbs = self.manager._cache.get({ 'region': self.manager.config.region, 'resource': 'elb' }) # Sigh, post query refactor reuse, we can't save our cache here # as that resource manager does extra lookups on tags. Not # worth paginating, since with cache usage we have full set in # mem. if elbs is None: p = client.get_paginator('describe_load_balancers') results = p.paginate() elbs = results.build_full_result().get('LoadBalancerDescriptions', ()) self.log.info("Queried %d elbs", len(elbs)) else: self.log.info("Using %d cached elbs", len(elbs)) get_elb_attrs = functools.partial(_query_elb_attrs, self.manager.session_factory) with self.executor_factory(max_workers=2) as w: futures = [] for elb_set in chunks(elbs, 100): futures.append(w.submit(get_elb_attrs, elb_set)) for f in as_completed(futures): if f.exception(): log.error("Error while scanning elb log targets: %s" % (f.exception())) continue for tgt in f.result(): yield tgt
def execute_in_parallel(resources, event, execution_method, executor_factory, log, max_workers=constants.DEFAULT_MAX_THREAD_WORKERS, chunk_size=constants.DEFAULT_CHUNK_SIZE, **kwargs): futures = [] results = [] exceptions = [] if ThreadHelper.disable_multi_threading: try: result = execution_method(resources, event, **kwargs) if result: results.extend(result) except Exception as e: exceptions.append(e) else: with executor_factory(max_workers=max_workers) as w: for resource_set in chunks(resources, chunk_size): futures.append( w.submit(execution_method, resource_set, event, **kwargs)) for f in as_completed(futures): if f.exception(): log.error("Execution failed with error: %s" % f.exception()) exceptions.append(f.exception()) else: result = f.result() if result: results.extend(result) return results, list(set(exceptions))
def _datapipeline_info(pipes, session_factory, executor_factory, retry): client = local_session(session_factory).client('datapipeline') def process_tags(pipe_set): pipe_map = {pipe['id']: pipe for pipe in pipe_set} while True: try: results = retry(client.describe_pipelines, pipelineIds=list(pipe_map.keys())) break except ClientError as e: if e.response['Error']['Code'] != 'PipelineNotFound': raise msg = e.response['Error']['Message'] _, lb_name = msg.strip().rsplit(' ', 1) pipe_map.pop(lb_name) if not pipe_map: results = {'TagDescriptions': []} break continue for pipe_desc in results['pipelineDescriptionList']: pipe = pipe_map[pipe_desc['pipelineId']] pipe['Tags'] = [{ 'Key': t['key'], 'Value': t['value'] } for t in pipe_desc['tags']] for field in pipe_desc['fields']: key = field['key'] if not key.startswith('@'): continue pipe[key[1:]] = field['stringValue'] with executor_factory(max_workers=2) as w: return list(w.map(process_tags, chunks(pipes, 20)))
def process(self, param_groups): client = local_session(self.manager.session_factory).client('rds') params = [] for param in self.data.get('params', []): params.append({ 'ParameterName': param['name'], 'ParameterValue': param['value'], 'ApplyMethod': param.get('apply-method', 'immediate'), }) for param_group in param_groups: name = self.get_pg_name(param_group) # Fetch the existing parameters for this DB, so we only try to change the ones that are # different. cur_params = self.get_current_params(client, name) changed_params = [] for param in params: param_name = param['ParameterName'] if (param_name not in cur_params or cur_params[param_name]['ParameterValue'] != param['ParameterValue']): changed_params.append(param) # Can only do 20 elements at a time per docs, so if we have more than that we will # break it into multiple requests: https://goo.gl/Z6oGNv for param_set in chunks(changed_params, 5): self.do_modify(client, name, param_set) self.log.info( 'Modified RDS parameter group %s (%i parameters changed, %i unchanged)', name, len(changed_params), len(params) - len(changed_params))
def process_resource_set(self, resource_set): client = self.manager.session_factory( region=self.data['target_region']).client('ec2') if self.data['target_region'] != self.manager.config.region: cross_region = True params = {} params['Encrypted'] = self.data.get('encrypted', True) if params['Encrypted']: params['KmsKeyId'] = self.data['target_key'] for snapshot_set in chunks(resource_set, 5): for r in snapshot_set: snapshot_id = client.copy_snapshot( SourceRegion=self.manager.config.region, SourceSnapshotId=r['SnapshotId'], Description=r.get('Description', ''), **params)['SnapshotId'] client.create_tags( Resources=[snapshot_id], Tags=r['Tags']) r['CopiedSnapshot'] = snapshot_id if not cross_region or len(snapshot_set) < 5: continue copy_ids = [r['CopiedSnapshot'] for r in snapshot_set] self.log.debug( "Waiting on cross-region snapshot copy %s", ",".join(copy_ids)) waiter = client.get_waiter('snapshot_completed') waiter.config.delay = 60 waiter.config.max_attempts = 60 waiter.wait(SnapshotIds=copy_ids) self.log.debug( "Cross region copy complete %s", ",".join(copy_ids))
def process(self, instances): instances = self._filter_ec2_with_volumes( self.filter_instance_state(instances)) if not len(instances): return client = utils.local_session( self.manager.session_factory).client('ec2') # Play nice around aws having insufficient capacity... for itype, t_instances in utils.group_by(instances, 'InstanceType').items(): for izone, z_instances in utils.group_by( t_instances, 'AvailabilityZone').items(): for batch in utils.chunks(z_instances, self.batch_size): self.process_instance_set(client, batch, itype, izone) # Raise an exception after all batches process if self.exception: if self.exception.response['Error']['Code'] not in ( 'InsufficientInstanceCapacity'): self.log.exception("Error while starting instances error %s", self.exception) raise self.exception
def process_account(account_info): """Scan all buckets in an account and schedule processing""" log = logging.getLogger('salactus.bucket-iterator') log.info("processing account %s", account_info) session = get_session(account_info) client = session.client('s3', config=s3config) buckets = client.list_buckets()['Buckets'] connection.hset('bucket-accounts', account_info['name'], json.dumps(account_info)) for b in buckets: connection.hset('bucket-ages', bucket_id(account_info, b['Name']), b['CreationDate'].isoformat()) account_buckets = account_info.pop('buckets', None) buckets = [ n['Name'] for n in buckets if not account_buckets or n['Name'] in account_buckets ] log.info("processing %d buckets in account %s", len(buckets), account_info['name']) for bucket_set in chunks(buckets, 50): invoke(process_bucket_set, account_info, bucket_set)
def process(self, resources, event=None): params = dict(FunctionName=self.data['function']) if self.data.get('qualifier'): params['Qualifier'] = self.data['Qualifier'] if self.data.get('async', True): params['InvocationType'] = 'Event' config = Config(read_timeout=self.data.get('timeout', 90), region_name=self.data.get('region', None)) client = utils.local_session(self.manager.session_factory).client( 'lambda', config=config) alias = utils.get_account_alias_from_sts( utils.local_session(self.manager.session_factory)) payload = { 'version': VERSION, 'event': event, 'account_id': self.manager.config.account_id, 'account': alias, 'region': self.manager.config.region, 'action': self.data, 'policy': self.manager.data } results = [] for resource_set in utils.chunks(resources, self.data.get('batch_size', 250)): payload['resources'] = resource_set params['Payload'] = utils.dumps(payload) result = client.invoke(**params) result['Payload'] = result['Payload'].read() if isinstance(result['Payload'], bytes): result['Payload'] = result['Payload'].decode('utf-8') results.append(result) return results
def initialize(self, asgs): """Get launch configs for the set of asgs""" config_names = set() for a in asgs: config_names.add(a['LaunchConfigurationName']) session = local_session(self.manager.session_factory) client = session.client('autoscaling') self.configs = {} if len(asgs) > 50 and self.manager._cache.load(): configs = self.manager._cache.get({ 'region': self.manager.config.region, 'resource': 'launch-config' }) if configs: self.log.info("Using cached configs") self.configs = { cfg['LaunchConfigurationName']: cfg for cfg in configs } return self.log.debug("querying %d launch configs" % len(config_names)) for cfg_set in chunks(config_names, 50): for cfg in client.describe_launch_configurations( LaunchConfigurationNames=cfg_set)['LaunchConfigurations']: self.configs[cfg['LaunchConfigurationName']] = cfg if len(asgs) > 50: self.manager._cache.save( { 'region': self.manager.config.region, 'resource': 'launch-config' }, self.configs.values())
def process(self, resources): self.id_key = self.manager.get_model().id # Move this to policy? / no resources bypasses actions? if not len(resources): return msg_tmpl = self.data.get( 'msg', 'Resource does not meet policy: {op}@{action_date}') op = self.data.get('op', 'stop') tag = self.data.get('tag', DEFAULT_TAG) date = self.data.get('days', 4) n = datetime.now(tz=tzutc()) action_date = n + timedelta(days=date) msg = msg_tmpl.format( op=op, action_date=action_date.strftime('%Y/%m/%d')) self.log.info("Tagging %d resources for %s on %s" % ( len(resources), op, action_date.strftime('%Y/%m/%d'))) tags = [{'Key': tag, 'Value': msg}] with self.executor_factory(max_workers=2) as w: futures = [] for resource_set in utils.chunks(resources, size=self.batch_size): futures.append( w.submit(self.process_resource_set, resource_set, tags)) for f in as_completed(futures): if f.exception(): self.log.error( "Exception tagging resource set: %s \n %s" % ( tags, f.exception()))
def process(self, resources): self.id_key = self.manager.get_model().id # Legacy msg = self.data.get('msg') msg = self.data.get('value') or msg tag = self.data.get('tag', DEFAULT_TAG) tag = self.data.get('key') or tag # Support setting multiple tags in a single go with a mapping tags = self.data.get('tags') if tags is None: tags = [] else: tags = [{'Key': k, 'Value': v} for k, v in tags.items()] if msg: tags.append({'Key': tag, 'Value': msg}) batch_size = self.data.get('batch_size', self.batch_size) with self.executor_factory(max_workers=self.concurrency) as w: futures = {} for resource_set in utils.chunks(resources, size=batch_size): futures[w.submit(self.process_resource_set, resource_set, tags)] = resource_set for f in as_completed(futures): if f.exception(): self.log.error( "Exception removing tags: %s on resources:%s \n %s" % (tags, ", ".join( [r[self.id_key] for r in resource_set]), f.exception()))
def process(self, resources, event=None): client = local_session(self.manager.session_factory).client('ssm') item_template = self.get_item_template() resources = list( sorted(resources, key=operator.itemgetter(self.manager.resource_type.id))) items = self.get_items(client, item_template) if items: # - Use a copy of the template as we'll be passing in status changes on updates. # - The return resources will be those that we couldn't fit into updates # to existing resources. resources = self.update_items(client, items, dict(item_template), resources) item_ids = [i['OpsItemId'] for i in items[:5]] for resource_set in chunks(resources, 100): resource_arns = json.dumps([{ 'arn': arn } for arn in sorted(self.manager.get_arns(resource_set))]) item_template['OperationalData']['/aws/resources'] = { 'Type': 'SearchableString', 'Value': resource_arns } if items: item_template['RelatedOpsItems'] = [{ 'OpsItemId': item_ids[:5] }] try: oid = client.create_ops_item(**item_template).get('OpsItemId') item_ids.insert(0, oid) except client.exceptions.OpsItemAlreadyExistsException: pass for r in resource_set: r['c7n:opsitem'] = oid
def universal_augment(self, resources): # Resource Tagging API Support # https://docs.aws.amazon.com/awsconsolehelpdocs/latest/gsg/supported-resources.html # Bail on empty set if not resources: return resources # For global resources, tags don't populate in the get_resources call # unless the call is being made to us-east-1 region = getattr(self.resource_type, 'global_resource', None) and 'us-east-1' or self.region client = utils.local_session(self.session_factory).client( 'resourcegroupstaggingapi', region_name=region) # Lazy for non circular :-( from c7n.query import RetryPageIterator paginator = client.get_paginator('get_resources') paginator.PAGE_ITERATOR_CLS = RetryPageIterator rfetch = [r for r in resources if 'Tags' not in r] for arn_resource_set in utils.chunks(zip(self.get_arns(rfetch), rfetch), 100): arn_resource_map = dict(arn_resource_set) resource_tag_results = client.get_resources( ResourceARNList=list(arn_resource_map.keys())).get( 'ResourceTagMappingList', ()) resource_tag_map = { r['ResourceARN']: r['Tags'] for r in resource_tag_results } for arn, r in arn_resource_map.items(): r['Tags'] = resource_tag_map.get(arn, []) return resources
def process(self, resources): session = local_session(self.manager.session_factory) m = self.manager.get_model() client = session.client(m.group, m.version) for resource_set in chunks(resources, self.chunk_size): self.process_resource_set(client, resource_set)
def put_evaluations(self, client, token, evaluations): for eval_set in utils.chunks(evaluations, 100): self.policy.resource_manager.retry(client.put_evaluations, Evaluations=eval_set, ResultToken=token)
def process(self, resources): for resource_set in chunks(resources, 50): self.process_resource_set(resource_set)
def process(self, resources, event=None): results = [] filtered = [] attached = [] stats = Counter() marker_date = parse_date('2016-11-01T00:00:00+00:00') # Filter volumes for r in resources: # unsupported type if r['VolumeType'] == 'standard': stats['vol-type'] += 1 filtered.append(r['VolumeId']) continue # unattached are easy if not r.get('Attachments'): results.append(r) continue # check for attachment date older then supported date if r['Attachments'][0]['AttachTime'] < marker_date: stats['attach-time'] += 1 filtered.append(r['VolumeId']) continue attached.append(r) # Filter volumes attached to unsupported instance types ec2 = self.manager.get_resource_manager('ec2') instance_map = {} for v in attached: instance_map.setdefault( v['Attachments'][0]['InstanceId'], []).append(v) instances = ec2.get_resources(list(instance_map.keys())) for i in instances: if i['InstanceType'] in self.older_generation: stats['instance-type'] += len(instance_map[i['InstanceId']]) filtered.extend([v['VolumeId'] for v in instance_map.pop(i['InstanceId'])]) else: results.extend(instance_map.pop(i['InstanceId'])) # Filter volumes that are currently under modification client = local_session(self.manager.session_factory).client('ec2') modifying = set() for vol_set in chunks(list(results), 200): vol_ids = [v['VolumeId'] for v in vol_set] mutating = client.describe_volumes_modifications( Filters=[ {'Name': 'volume-id', 'Values': vol_ids}, {'Name': 'modification-state', 'Values': ['modifying', 'optimizing', 'failed']}]) for vm in mutating.get('VolumesModifications', ()): stats['vol-mutation'] += 1 filtered.append(vm['VolumeId']) modifying.add(vm['VolumeId']) self.log.debug( "filtered %d of %d volumes due to %s", len(filtered), len(resources), sorted(stats.items())) return [r for r in results if r['VolumeId'] not in modifying]
def enable_region(master_info, accounts_config, executor, message, region, enable_email_notification): master_session = get_session(master_info.get('role'), 'c7n-guardian', master_info.get('profile'), region=region) master_client = master_session.client('guardduty') detector_id = get_or_create_detector_id(master_client) if not detector_id: # Couldn't get a detector in this region; perhaps region not opted-in log.info( f"""{region}\tCouldn't get or create GuardDuty detector with role {master_info.get('role')} - Perhaps the region isn't enabled?""" ) return # The list gd_members contains active GuardDuty Member Accounts # If the account is in the specified account list (.yml) and not in gd_members, # we'll ensure it's emabled results = master_client.get_paginator('list_members').paginate( DetectorId=detector_id, OnlyAssociated="True") gd_members = results.build_full_result().get('Members', ()) gd_member_ids = {m['AccountId'] for m in gd_members} # Build this the ugly way to ensure we break if there's an unexpected RelationshipStatus; # rather break than silently pass active_ids = [] invited_ids = [] suspended_ids = [] resigned_ids = [] removed_ids = [] for m in gd_members: if m['RelationshipStatus'] == 'Enabled': active_ids.append(m['AccountId']) elif m['RelationshipStatus'] in ['Invited']: invited_ids.append(m['AccountId']) elif m['RelationshipStatus'] == 'Disabled': suspended_ids.append(m['AccountId']) elif m['RelationshipStatus'] == 'Removed': # The GD detector has been entirely removed - re-invite & accept removed_ids.append(m['AccountId']) elif m['RelationshipStatus'] == 'Resigned': # If member is Resigned = member has switched off the master (GD still enabled though) # We should not re-invite, but should re-accept resigned_ids.append(m['AccountId']) else: raise Exception(f'''GuardDuty member account {m["AccountId"]} had unknown RelationshipStatus "{m["RelationshipStatus"]}" - bailing''') # Filter by accounts under consideration per config and cli flags suspended_ids = { a['account_id'] for a in accounts_config['accounts'] if a['account_id'] in suspended_ids } if suspended_ids: unprocessed = master_client.start_monitoring_members( DetectorId=detector_id, AccountIds=list(suspended_ids)).get('UnprocessedAccounts') if unprocessed: log.warning( f"""{region}\tUnprocessed accounts on re-start monitoring {format_event(unprocessed)}""") log.info( "{region}\tRestarted monitoring on {len(suspended_ids)} accounts") accounts_not_members = [{ 'AccountId': account['account_id'], 'Email': account['email'] } for account in accounts_config['accounts'] if account['account_id'] not in gd_member_ids] if not accounts_not_members: if not suspended_ids and not invited_ids and not resigned_ids and not removed_ids: log.info(f"{region}\tAll accounts already enabled") return list(active_ids) if (len(accounts_not_members) + len(gd_member_ids)) > 1000: raise ValueError(f"""{region}\tGuard Duty only supports 1000 member accounts per master account""") log.info( f"{region}\tEnrolling {len(accounts_not_members)} accounts in guard duty" ) unprocessed = [] for account_set in chunks(accounts_not_members, 25): new_members = master_client.create_members(DetectorId=detector_id, AccountDetails=account_set) unprocessed.extend(new_members.get('UnprocessedAccounts', [])) # If the account was already a member, ignore unprocessed = list( filter(lambda x: x['Result'].find('already a membe') == -1, unprocessed)) if unprocessed: log.warning(f"""{region}\tAccounts were unprocessed - member create {format_event(unprocessed)}""") log.info(f"{region}\tInviting {len(accounts_not_members)} member accounts") unprocessed = [] for account_set in chunks([ m for m in accounts_not_members if not m['AccountId'] in invited_ids + resigned_ids ], 25): params = { 'AccountIds': [m['AccountId'] for m in account_set], 'DetectorId': detector_id, 'DisableEmailNotification': not enable_email_notification, } if message: params['Message'] = message unprocessed.extend( master_client.invite_members(**params).get('UnprocessedAccounts', [])) if unprocessed: log.warning(f"""{region}\tAccounts were unprocessed invite-members {format_event(unprocessed)}""") accounts_not_members = [{ 'AccountId': account['account_id'], 'Email': account['email'] } for account in accounts_config['accounts'] if account['account_id'] not in active_ids] log.info( f"{region}\tAccepting {len(accounts_not_members)} invitations in members" ) with executor(max_workers=WORKER_COUNT) as w: futures = {} for a in accounts_config['accounts']: if a == master_info: continue if a['account_id'] in active_ids: continue futures[w.submit(enable_account, a, master_info['account_id'], region)] = a for f in as_completed(futures): a = futures[f] if f.exception(): log.error( f"{region}\tError processing account:{a['name']} error:{f.exception()}" ) continue if f.result(): log.info( f"{region}\tEnabled guard duty on account:{a['name']}") return accounts_not_members
def flush(self): buf = self.buf self.buf = [] for segment_set in utils.chunks(buf, 50): self.client.put_trace_segments( TraceSegmentDocuments=[s.serialize() for s in segment_set])
def process(self, resources): client = local_session(self.manager.session_factory).client('ssm') for resource_set in chunks(resources, 50): self.process_resource_set(client, resource_set)
def enable_region(master_info, accounts_config, executor, message, region): master_session = get_session( master_info.get('role'), 'c7n-guardian', master_info.get('profile'), region=region) master_client = master_session.client('guardduty') detector_id = get_or_create_detector_id(master_client) results = master_client.get_paginator( 'list_members').paginate(DetectorId=detector_id, OnlyAssociated="FALSE") extant_members = results.build_full_result().get('Members', ()) extant_ids = {m['AccountId'] for m in extant_members} # Find active members active_ids = {m['AccountId'] for m in extant_members if m['RelationshipStatus'] == 'Enabled'} # Find invited members invited_ids = {m['AccountId'] for m in extant_members if m['RelationshipStatus'] == 'Invited'} # Find extant members not currently enabled suspended_ids = {m['AccountId'] for m in extant_members if m['RelationshipStatus'] == 'Disabled'} # Filter by accounts under consideration per config and cli flags suspended_ids = {a['account_id'] for a in accounts_config['accounts'] if a['account_id'] in suspended_ids} if suspended_ids: unprocessed = master_client.start_monitoring_members( DetectorId=detector_id, AccountIds=list(suspended_ids)).get('UnprocessedAccounts') if unprocessed: log.warning( "Region: %s Unprocessed accounts on re-start monitoring %s", region, format_event(unprocessed)) log.info("Region: %s Restarted monitoring on %d accounts", region, len(suspended_ids)) members = [{'AccountId': account['account_id'], 'Email': account['email']} for account in accounts_config['accounts'] if account['account_id'] not in extant_ids] if not members: if not suspended_ids and not invited_ids: log.info("Region:%s All accounts already enabled", region) return list(active_ids) if (len(members) + len(extant_ids)) > 1000: raise ValueError( ("Region:%s Guard Duty only supports " "1000 member accounts per master account") % (region)) log.info( "Region:%s Enrolling %d accounts in guard duty", region, len(members)) unprocessed = [] for account_set in chunks(members, 25): unprocessed.extend(master_client.create_members( DetectorId=detector_id, AccountDetails=account_set).get('UnprocessedAccounts', [])) if unprocessed: log.warning( "Region:%s accounts where unprocessed - member create\n %s", region, format_event(unprocessed)) log.info("Region:%s Inviting %d member accounts", region, len(members)) unprocessed = [] for account_set in chunks( [m for m in members if not m['AccountId'] in invited_ids], 25): params = {'AccountIds': [m['AccountId'] for m in account_set], 'DetectorId': detector_id} if message: params['Message'] = message unprocessed.extend(master_client.invite_members( **params).get('UnprocessedAccounts', [])) if unprocessed: log.warning( "Region:%s accounts where unprocessed invite-members\n %s", region, format_event(unprocessed)) members = [{'AccountId': account['account_id'], 'Email': account['email']} for account in accounts_config['accounts'] if account['account_id'] not in active_ids] log.info("Region:%s Accepting %d invitations in members", region, len(members)) with executor(max_workers=WORKER_COUNT) as w: futures = {} for a in accounts_config['accounts']: if a == master_info: continue if a['account_id'] in active_ids: continue futures[w.submit(enable_account, a, master_info['account_id'], region)] = a for f in as_completed(futures): a = futures[f] if f.exception(): log.error("Region:%s Error processing account:%s error:%s", region, a['name'], f.exception()) continue if f.result(): log.info('Region:%s Enabled guard duty on account:%s', region, a['name']) return members
def process_keyset(bid, key_set): account, bucket = bid.split(':', 1) region = connection.hget('bucket-regions', bid) versioned = bool(int(connection.hget('bucket-versions', bid))) account_info = json.loads(connection.hget('bucket-accounts', account)) visitors = get_key_visitors(account_info) object_reporting = account_info.get('object-reporting') session = get_session(account_info) patch_ssl() s3 = session.client('s3', region_name=region, config=s3config) error_count = sesserr = connerr = enderr = missing_count = 0 throttle_count = denied_count = remediation_count = 0 key_count = len(key_set) start_time = time.time() objects = {v.visitor_name: [] for v in visitors} objects['objects_denied'] = [] with bucket_ops(bid, 'key'): #MainThreadExecutor.async = False #with MainThreadExecutor(max_workers=10) as w: with ThreadPoolExecutor(max_workers=10) as w: futures = {} for kchunk in chunks(key_set, 100): for v in visitors: processor = (versioned and v.process_version or v.process_key) futures[w.submit(process_key_chunk, s3, bucket, kchunk, processor, bool(object_reporting))] = v.visitor_name for f in as_completed(futures): if f.exception(): log.warning("key error: %s", f.exception()) error_count += 1 continue stats = f.result() remediation_count += stats['remediated'] denied_count += stats['denied'] missing_count += stats['missing'] throttle_count += stats['throttle'] sesserr += stats['session'] enderr += stats['endpoint'] connerr += stats['connection'] if object_reporting: vname = futures[f] objects[vname].extend(stats['objects']) objects['objects_denied'].extend(stats['objects_denied']) with connection.pipeline() as p: if remediation_count: p.hincrby('keys-matched', bid, remediation_count) if denied_count: p.hincrby('keys-denied', bid, denied_count) if missing_count: p.hincrby('keys-missing', bid, missing_count) if throttle_count: p.hincrby('keys-throttled', bid, throttle_count) if sesserr: p.hincrby('keys-sesserr', bid, sesserr) if connerr: p.hincrby('keys-connerr', bid, connerr) if enderr: p.hincrby('keys-enderr', bid, enderr) if error_count: p.hincrby('keys-error', bid, error_count) p.hincrby('keys-scanned', bid, key_count) # track count again as we reset metrics period p.hincrby('keys-count', bid, key_count) p.hincrby('keys-time', bid, int(time.time() - start_time)) p.execute() # write out object level info if object_reporting: publish_object_records(bid, objects, object_reporting) # trigger some mem collection if getattr(sys, 'pypy_version_info', None): gc.collect()
def enable(config, master, tags, accounts, debug, message, region): """enable guard duty on a set of accounts""" accounts_config, master_info, executor = guardian_init( config, debug, master, accounts, tags) master_session = get_session(master_info.get('role'), 'c7n-guardian', master_info.get('profile'), region=region) master_client = master_session.client('guardduty') detector_id = get_or_create_detector_id(master_client) extant_members = master_client.list_members(DetectorId=detector_id).get( 'Members', ()) extant_ids = {m['AccountId'] for m in extant_members} # Find extant members not currently enabled suspended_ids = { m['AccountId'] for m in extant_members if m['RelationshipStatus'] == 'Disabled' } # Filter by accounts under consideration per config and cli flags suspended_ids = { a['account_id'] for a in accounts_config['accounts'] if a['account_id'] in suspended_ids } if suspended_ids: unprocessed = master_client.start_monitoring_members( DetectorId=detector_id, AccountIds=list(suspended_ids)).get('UnprocessedAccounts') if unprocessed: log.warning("Unprocessed accounts on re-start monitoring %s" % (format_event(unprocessed))) log.info("Restarted monitoring on %d accounts" % (len(suspended_ids))) members = [{ 'AccountId': account['account_id'], 'Email': account['email'] } for account in accounts_config['accounts'] if account['account_id'] not in extant_ids] if not members: if not suspended_ids: log.info("All accounts already enabled") return if (len(members) + len(extant_ids)) > 1000: raise ValueError( "Guard Duty only supports 1000 member accounts per master account") log.info("Enrolling %d accounts in guard duty" % len(members)) log.info("Creating member accounts:%d region:%s", len(members), region) unprocessed = [] for account_set in chunks(members, 25): unprocessed.extend( master_client.create_members(DetectorId=detector_id, AccountDetails=account_set).get( 'UnprocessedAccounts', [])) if unprocessed: log.warning("Following accounts where unprocessed\n %s" % format_event(unprocessed)) log.info("Inviting member accounts") params = { 'AccountIds': [m['AccountId'] for m in members], 'DetectorId': detector_id } if message: params['Message'] = message unprocessed = master_client.invite_members( **params).get('UnprocessedAccounts') if unprocessed: log.warning("Following accounts where unprocessed\n %s" % format_event(unprocessed)) log.info("Accepting invitations") with executor(max_workers=WORKER_COUNT) as w: futures = {} for a in accounts_config['accounts']: if a == master_info: continue if a['account_id'] in extant_ids: continue futures[w.submit(enable_account, a, master_info['account_id'], region)] = a for f in as_completed(futures): a = futures[f] if f.exception(): log.error("Error processing account:%s error:%s", a['name'], f.exception()) continue if f.result(): log.info('Enabled guard duty on account:%s' % a['name'])
def process_bucket_partitions( account_info, bucket, prefix_set=('',), partition='/', strategy=None, limit=4): """Split up a bucket keyspace into smaller sets for parallel iteration. """ if strategy is None: return detect_partition_strategy(account_info, bucket) strategy = get_partition_strategy(account_info, bucket, strategy) strategy.limit = limit strategy.partition = partition (contents_key, contents_method, continue_tokens) = BUCKET_OBJ_DESC[bucket['versioned']] prefix_queue = strategy.initialize_prefixes(prefix_set) keyset = [] bid = bucket_id(account_info, bucket['name']) log.info("Process partition bid:%s strategy:%s delimiter:%s queue:%d limit:%d", bid, strategy.__class__.__name__[0], partition, len(prefix_queue), limit) session = get_session(account_info) s3 = session.client('s3', region_name=bucket['region'], config=s3config) def statm(prefix): return "keyset:%d queue:%d prefix:%s bucket:%s size:%d" % ( len(keyset), len(prefix_queue), prefix, bid, bucket['keycount']) while prefix_queue: connection.hincrby('bucket-partition', bid, 1) prefix = prefix_queue.pop() if strategy.is_depth_exceeded(prefix): log.info("Partition max depth reached, %s", statm(prefix)) invoke(process_bucket_iterator, account_info, bucket, prefix) continue method = getattr(s3, contents_method, None) results = page_strip(method( Bucket=bucket['name'], Prefix=prefix, Delimiter=partition), bucket) keyset.extend(results.get(contents_key, ())) # As we probe we find keys, process any found if len(keyset) > PARTITION_KEYSET_THRESHOLD: log.info("Partition, processing keyset %s", statm(prefix)) invoke( process_keyset, account_info, bucket, page_strip({contents_key: keyset}, bucket)) keyset = [] strategy.find_partitions(prefix_queue, results) # Do we have more than 1k keys at this level, continue iteration continuation_params = { k: results[k] for k in continue_tokens if k in results} if continuation_params: bp = int(connection.hget('bucket-partition', bid)) log.info("Partition has 1k keys, %s %s", statm(prefix), bp) if not prefix_queue and bp < 5: log.info("Recursive detection") return detect_partition_strategy(account_info, bucket, prefix=prefix) invoke(process_bucket_iterator, account_info, bucket, prefix, delimiter=partition, **continuation_params) # If the queue get too deep, then go parallel if len(prefix_queue) > PARTITION_QUEUE_THRESHOLD: log.info("Partition add friends, %s", statm(prefix)) for s_prefix_set in chunks( prefix_queue[PARTITION_QUEUE_THRESHOLD - 1:], PARTITION_QUEUE_THRESHOLD - 1): for s in list(s_prefix_set): if strategy.is_depth_exceeded(prefix): invoke(process_bucket_iterator, account_info, bucket, s) s_prefix_set.remove(s) if not s_prefix_set: continue invoke(process_bucket_partitions, account_info, bucket, prefix_set=s_prefix_set, partition=partition, strategy=strategy, limit=limit) prefix_queue = prefix_queue[:PARTITION_QUEUE_THRESHOLD - 1] if keyset: invoke(process_keyset, account_info, bucket, {contents_key: keyset})
def test_chunks(self): self.assertEqual(list(utils.chunks(range(100), size=50)), [range(50), range(50, 100, 1)]) self.assertEqual(list(utils.chunks(range(1), size=50)), [range(1)]) self.assertEqual(list(utils.chunks(range(60), size=50)), [range(50), range(50, 60, 1)])