def poller_processor_handler(event, context): """ Historical S3 Poller Processor. This will receive events from the Poller Tasker, and will list all objects of a given technology for an account/region pair. This will generate `polling events` which simulate changes. These polling events contain configuration data such as the account/region defining where the collector should attempt to gather data from. """ log.debug('[@] Running Poller...') queue_url = get_queue_url(os.environ.get('POLLER_QUEUE_NAME', 'HistoricalS3Poller')) records = deserialize_records(event['Records']) for record in records: # Skip accounts that have role assumption errors: try: # List all buckets in the account: all_buckets = list_buckets(account_number=record['account_id'], assume_role=HISTORICAL_ROLE, session_name="historical-cloudwatch-s3list", region=record['region'])["Buckets"] events = [s3_polling_schema.serialize_me(record['account_id'], bucket) for bucket in all_buckets] produce_events(events, queue_url) except ClientError as e: log.error('[X] Unable to generate events for account. Account Id: {account_id} Reason: {reason}'.format( account_id=record['account_id'], reason=e)) log.debug('[@] Finished generating polling events. Events Created: {}'.format(len(record['account_id'])))
def test_schema_serialization(): # Make an object to serialize: now = datetime.utcnow().replace(tzinfo=None, microsecond=0).isoformat() + "Z" bucket_details = { "bucket_name": "muhbucket", "creation_date": now, } serialized = s3_polling_schema.serialize_me("012345678910", bucket_details) # The dumped data: loaded_serialized = json.loads(serialized) # The dumped data loaded again: loaded_data = s3_polling_schema.loads(serialized).data assert loaded_serialized["version"] == loaded_data["version"] == "1" assert loaded_serialized["detail-type"] == loaded_data["detail_type"] == "Historical Polling Event" assert loaded_serialized["source"] == loaded_data["source"] == "historical" assert loaded_serialized["account"] == loaded_data["account"] == "012345678910" # Not checking if other times are equal to now, since it's possible they could be off # the exception is bucket creation date. assert loaded_serialized["detail"]["eventTime"] == loaded_data["detail"]["event_time"] assert loaded_serialized["detail"]["eventSource"] == loaded_data["detail"]["event_source"] == "historical.s3.poller" assert loaded_serialized["detail"]["eventName"] == loaded_data["detail"]["event_name"] == "DescribeBucket" assert loaded_serialized["detail"]["requestParameters"]["bucketName"] == \ loaded_data["detail"]["request_parameters"]["bucket_name"] == "muhbucket" assert loaded_serialized["detail"]["requestParameters"]["creationDate"] == \ loaded_data["detail"]["request_parameters"]["creation_date"] == now
def get_record(all_buckets, index, account): return { "Data": bytes( s3_polling_schema.serialize_me( account, { "bucket_name": all_buckets[index]["Name"], "creation_date": all_buckets[index]["CreationDate"].replace( tzinfo=None, microsecond=0).isoformat() + "Z" }), "utf-8"), "PartitionKey": uuid.uuid4().hex }
def handler(event, context): """ Historical S3 Poller. This poller is run at a set interval in order to ensure that changes do not go undetected by historical. Historical pollers generate `polling events` which simulate changes. These polling events contain configuration data such as the account/region defining where the collector should attempt to gather data from. """ log.debug('Running poller. Configuration: {}'.format(event)) queue_url = get_queue_url( os.environ.get('POLLER_QUEUE_NAME', 'HistoricalS3Poller')) for account in get_historical_accounts(): # Skip accounts that have role assumption errors: try: # List all buckets in the account: all_buckets = list_buckets( account_number=account['id'], assume_role=HISTORICAL_ROLE, session_name="historical-cloudwatch-s3list", region=CURRENT_REGION)["Buckets"] events = [ s3_polling_schema.serialize_me(account['id'], bucket) for bucket in all_buckets ] produce_events(events, queue_url) except ClientError as e: log.warning( 'Unable to generate events for account. AccountId: {account_id} Reason: {reason}' .format(account_id=account['id'], reason=e)) log.debug( 'Finished generating polling events. Events Created: {}'.format( len(account['id'])))