def lambda_handler(event, context): """ This function adds a new hourly partition to athena table. It runs every hour, triggered by a CloudWatch event rule. """ log = logging.getLogger() log.debug('[add-athena-partition lambda_handler] Start') try: # --------------------------------------------------------- # Set Log Level # --------------------------------------------------------- log_level = str(environ['LOG_LEVEL'].upper()) if log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']: log_level = 'ERROR' log.setLevel(log_level) # ---------------------------------------------------------- # Process event # ---------------------------------------------------------- log.info(event) athena_client = create_client('athena') database_name = event['glueAccessLogsDatabase'] access_log_bucket = event['accessLogBucket'] waf_log_bucket = event['wafLogBucket'] athena_work_group = event['athenaWorkGroup'] try: # Add athena partition for cloudfront or alb logs if len(access_log_bucket) > 0: execute_athena_query(log, access_log_bucket, database_name, event['glueAppAccessLogsTable'], athena_client, athena_work_group) except Exception as error: log.error('[add-athena-partition lambda_handler] App access log Athena query execution failed: %s'%str(error)) try: # Add athena partition for waf logs if len(waf_log_bucket) > 0: execute_athena_query(log, waf_log_bucket, database_name, event['glueWafAccessLogsTable'], athena_client, athena_work_group) except Exception as error: log.error('[add-athena-partition lambda_handler] WAF access log Athena query execution failed: %s'%str(error)) except Exception as error: log.error(str(error)) raise log.debug('[add-athena-partition lambda_handler] End')
def remove_s3_bucket_lambda_event(log, bucket_name, lambda_function_arn, lambda_log_partition_function_arn): if lambda_function_arn != None: log.info("[remove_s3_bucket_lambda_event] Start") s3_client = create_client('s3') try: new_conf = {} notification_conf = s3_client.get_bucket_notification_configuration( Bucket=bucket_name) log.info("[remove_s3_bucket_lambda_event]notification_conf:\n %s" % (notification_conf)) if 'TopicConfigurations' in notification_conf: new_conf['TopicConfigurations'] = notification_conf[ 'TopicConfigurations'] if 'QueueConfigurations' in notification_conf: new_conf['QueueConfigurations'] = notification_conf[ 'QueueConfigurations'] if 'LambdaFunctionConfigurations' in notification_conf: new_conf['LambdaFunctionConfigurations'] = [] for lfc in notification_conf['LambdaFunctionConfigurations']: if lfc['LambdaFunctionArn'] == lambda_function_arn or \ lfc['LambdaFunctionArn'] == lambda_log_partition_function_arn: log.info( "[remove_s3_bucket_lambda_event]%s match found, continue." % lfc['LambdaFunctionArn']) continue # remove all references else: new_conf['LambdaFunctionConfigurations'].append(lfc) log.info( "[remove_s3_bucket_lambda_event]lfc appended: %s" % lfc) log.info("[remove_s3_bucket_lambda_event]new_conf:\n %s" % (new_conf)) s3_client.put_bucket_notification_configuration( Bucket=bucket_name, NotificationConfiguration=new_conf) except Exception as error: log.error( "Failed to remove S3 Bucket lambda event. Check if the bucket still exists, you own it and has proper access policy." ) log.error(str(error)) log.info("[remove_s3_bucket_lambda_event] End")
def generate_waf_log_parser_conf_file(log, stack_name, request_threshold, block_period, waf_access_log_bucket, overwrite): log.debug("[generate_waf_log_parser_conf_file] Start") local_file = '/tmp/' + stack_name + '-waf_log_conf_LOCAL.json' remote_file = stack_name + '-waf_log_conf.json' default_conf = { 'general': { 'requestThreshold': request_threshold, 'blockPeriod': block_period, 'ignoredSufixes': [] }, 'uriList': {} } if not overwrite: try: s3_resource = create_resource('s3') file_obj = s3_resource.Object(waf_access_log_bucket, remote_file) file_content = file_obj.get()['Body'].read() remote_conf = json.loads(file_content) if 'general' in remote_conf and 'ignoredSufixes' in remote_conf[ 'general']: default_conf['general']['ignoredSufixes'] = remote_conf[ 'general']['ignoredSufixes'] if 'uriList' in remote_conf: default_conf['uriList'] = remote_conf['uriList'] except Exception as e: log.debug( "[generate_waf_log_parser_conf_file] \tFailed to merge existing conf file data." ) log.debug(e) with open(local_file, 'w') as outfile: json.dump(default_conf, outfile) s3_client = create_client('s3') s3_client.upload_file(local_file, waf_access_log_bucket, remote_file, ExtraArgs={'ContentType': "application/json"}) log.debug("[generate_waf_log_parser_conf_file] End")
def check_app_log_bucket(log, region, bucket_name): log.info("[check_app_log_bucket] Start") if bucket_name.strip() == "": raise Exception( 'Failed to configure access log bucket. Name cannot be empty!') # ------------------------------------------------------------------------------------------------------------------ # Check if bucket exists (and inside the specified region) # ------------------------------------------------------------------------------------------------------------------ exists = True s3_client = create_client('s3') try: response = s3_client.head_bucket(Bucket=bucket_name) log.info("[check_app_log_bucket]response: \n%s" % response) except botocore.exceptions.ClientError as e: # If a client error is thrown, then check that it was a 404 error. # If it was a 404 error, then the bucket does not exist. error_code = int(e.response['Error']['Code']) if error_code == 404: exists = False log.info("[check_app_log_bucket]error_code: %s." % error_code) # ------------------------------------------------------------------------------------------------------------------ # Check if the bucket was created in the specified Region or create one (if not exists) # ------------------------------------------------------------------------------------------------------------------ if exists: response = None try: response = s3_client.get_bucket_location(Bucket=bucket_name) except Exception as e: raise Exception( 'Failed to access the existing bucket information. Check if you own this bucket and if it has proper access policy.' ) if response['LocationConstraint'] == None: response['LocationConstraint'] = 'us-east-1' elif response['LocationConstraint'] == 'EU': # Fix for github issue #72 response['LocationConstraint'] = 'eu-west-1' if response['LocationConstraint'] != region: raise Exception( 'Bucket located in a different region. S3 bucket and Log Parser Lambda (and therefore, you CloudFormation Stack) must be created in the same Region.' ) log.info("[check_app_log_bucket] End")
def write_output(log, bucket_name, key_name, output_key_name, outstanding_requesters): log.debug('[write_output] Start') try: current_data = '/tmp/' + key_name.split('/')[-1] + '_LOCAL.json' with open(current_data, 'w') as outfile: json.dump(outstanding_requesters, outfile) s3 = create_client('s3') s3.upload_file(current_data, bucket_name, output_key_name, ExtraArgs={'ContentType': "application/json"}) remove(current_data) except Exception as e: log.error("[write_output] \tError to write output file") log.error(e) log.debug('[write_output] End')
def add_athena_partitions(log, add_athena_partition_lambda_function, resource_type, glue_database, access_log_bucket, glue_access_log_table, glue_waf_log_table, waf_log_bucket, athena_work_group): log.info("[add_athena_partitions] Start") lambda_client = create_client('lambda') response = lambda_client.invoke( FunctionName=add_athena_partition_lambda_function.rsplit(":", 1)[-1], Payload="""{ "resourceType":"%s", "glueAccessLogsDatabase":"%s", "accessLogBucket":"%s", "glueAppAccessLogsTable":"%s", "glueWafAccessLogsTable":"%s", "wafLogBucket":"%s", "athenaWorkGroup":"%s" }""" % (resource_type, glue_database, access_log_bucket, glue_access_log_table, glue_waf_log_table, waf_log_bucket, athena_work_group)) log.info("[add_athena_partitions] Lambda invocation response:\n%s" % response) log.info("[add_athena_partitions] End")
def process_athena_result(log, bucket_name, key_name, ip_set_type): log.debug('[process_athena_result] Start') try: # -------------------------------------------------------------------------------------------------------------- log.info("[process_athena_result] \tDownload file from S3") # -------------------------------------------------------------------------------------------------------------- local_file_path = '/tmp/' + key_name.split('/')[-1] s3 = create_client('s3') s3.download_file(bucket_name, key_name, local_file_path) # -------------------------------------------------------------------------------------------------------------- log.info("[process_athena_result] \tRead file content") # -------------------------------------------------------------------------------------------------------------- outstanding_requesters = {'general': {}, 'uriList': {}} utc_now_timestamp_str = datetime.datetime.now( datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z") with open(local_file_path, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: # max_counter_per_min is set as 1 just to reuse lambda log parser data structure # and reuse update_ip_set. outstanding_requesters['general'][row['client_ip']] = { "max_counter_per_min": row['max_counter_per_min'], "updated_at": utc_now_timestamp_str } remove(local_file_path) # -------------------------------------------------------------------------------------------------------------- log.info("[process_athena_result] \tUpdate WAF IP Sets") # -------------------------------------------------------------------------------------------------------------- update_ip_set(log, ip_set_type, outstanding_requesters) except Exception: log.error("[process_athena_result] \tError to read input file") log.debug('[process_athena_result] End')
def __init__(self, log): self.log = log self.sns_client = create_client('sns')
def configure_s3_bucket(log, region, bucket_name, access_logging_bucket_name): log.info("[configure_s3_bucket] Start") if bucket_name.strip() == "": raise Exception( 'Failed to configure access log bucket. Name cannot be empty!') # ------------------------------------------------------------------------------------------------------------------ # Create the S3 bucket (if not exist) # ------------------------------------------------------------------------------------------------------------------ s3_client = create_client('s3') try: response = s3_client.head_bucket(Bucket=bucket_name) log.info("[configure_s3_bucket]response head_bucket: \n%s" % response) # Enable access logging if needed put_s3_bucket_access_logging(log, s3_client, bucket_name, access_logging_bucket_name) except botocore.exceptions.ClientError as e: # If a client error is thrown, then check that it was a 404 error. # If it was a 404 error, then the bucket does not exist. error_code = int(e.response['Error']['Code']) if error_code == 404: log.info( "[configure_s3_bucket]: %s doesn't exist. Create bucket." % bucket_name) if region == 'us-east-1': s3_client.create_bucket(Bucket=bucket_name, ACL='private') else: s3_client.create_bucket( Bucket=bucket_name, ACL='private', CreateBucketConfiguration={'LocationConstraint': region}) # Begin waiting for the S3 bucket, mybucket, to exist s3_bucket_exists_waiter = s3_client.get_waiter('bucket_exists') s3_bucket_exists_waiter.wait(Bucket=bucket_name) # Enable server side encryption on the S3 bucket response = s3_client.put_bucket_encryption( Bucket=bucket_name, ServerSideEncryptionConfiguration={ 'Rules': [ { 'ApplyServerSideEncryptionByDefault': { 'SSEAlgorithm': 'AES256' } }, ] }) log.info( "[configure_s3_bucket]response put_bucket_encryption: \n%s" % response) # block public access response = s3_client.put_public_access_block( Bucket=bucket_name, PublicAccessBlockConfiguration={ 'BlockPublicAcls': True, 'IgnorePublicAcls': True, 'BlockPublicPolicy': True, 'RestrictPublicBuckets': True }) log.info( "[configure_s3_bucket]response put_public_access_block: \n%s" % response) # Enable access logging put_s3_bucket_access_logging(log, s3_client, bucket_name, access_logging_bucket_name) log.info("[configure_s3_bucket] End")
def add_s3_bucket_lambda_event(log, bucket_name, lambda_function_arn, lambda_log_partition_function_arn, lambda_parser, athena_parser): log.info("[add_s3_bucket_lambda_event] Start") try: s3_client = create_client('s3') if lambda_function_arn is not None and (lambda_parser or athena_parser): notification_conf = s3_client.get_bucket_notification_configuration( Bucket=bucket_name) log.info("[add_s3_bucket_lambda_event] notification_conf:\n %s" % (notification_conf)) new_conf = {} new_conf['LambdaFunctionConfigurations'] = [] if 'TopicConfigurations' in notification_conf: new_conf['TopicConfigurations'] = notification_conf[ 'TopicConfigurations'] if 'QueueConfigurations' in notification_conf: new_conf['QueueConfigurations'] = notification_conf[ 'QueueConfigurations'] if lambda_parser: new_conf['LambdaFunctionConfigurations'].append({ 'Id': 'Call Log Parser', 'LambdaFunctionArn': lambda_function_arn, 'Events': ['s3:ObjectCreated:*'], 'Filter': { 'Key': { 'FilterRules': [{ 'Name': 'suffix', 'Value': 'gz' }] } } }) if athena_parser: new_conf['LambdaFunctionConfigurations'].append({ 'Id': 'Call Athena Result Parser', 'LambdaFunctionArn': lambda_function_arn, 'Events': ['s3:ObjectCreated:*'], 'Filter': { 'Key': { 'FilterRules': [{ 'Name': 'prefix', 'Value': 'athena_results/' }, { 'Name': 'suffix', 'Value': 'csv' }] } } }) if lambda_log_partition_function_arn is not None: new_conf['LambdaFunctionConfigurations'].append({ 'Id': 'Call s3 log partition function', 'LambdaFunctionArn': lambda_log_partition_function_arn, 'Events': ['s3:ObjectCreated:*'], 'Filter': { 'Key': { 'FilterRules': [{ 'Name': 'prefix', 'Value': 'AWSLogs/' }, { 'Name': 'suffix', 'Value': 'gz' }] } } }) log.info( "[add_s3_bucket_lambda_event] LambdaFunctionConfigurations:\n %s" % (new_conf['LambdaFunctionConfigurations'])) s3_client.put_bucket_notification_configuration( Bucket=bucket_name, NotificationConfiguration=new_conf) except Exception as error: log.error(error) log.info("[add_s3_bucket_lambda_event] End")
def merge_outstanding_requesters(log, bucket_name, key_name, log_type, output_key_name, outstanding_requesters): log.debug('[merge_outstanding_requesters] Start') force_update = False need_update = False s3 = create_client('s3') # -------------------------------------------------------------------------------------------------------------- log.info("[merge_outstanding_requesters] \tCalculate Last Update Age") # -------------------------------------------------------------------------------------------------------------- response = None try: response = s3.head_object(Bucket=bucket_name, Key=output_key_name) except Exception: log.info('[merge_outstanding_requesters] No file to be merged.') need_update = True return outstanding_requesters, need_update utc_last_modified = response['LastModified'].astimezone( datetime.timezone.utc) utc_now_timestamp = datetime.datetime.now(datetime.timezone.utc) utc_now_timestamp_str = utc_now_timestamp.strftime( "%Y-%m-%d %H:%M:%S %Z%z") last_update_age = int( ((utc_now_timestamp - utc_last_modified).total_seconds()) / 60) # -------------------------------------------------------------------------------------------------------------- log.info("[merge_outstanding_requesters] \tDownload current blocked IPs") # -------------------------------------------------------------------------------------------------------------- local_file_path = '/tmp/' + key_name.split('/')[-1] + '_REMOTE.json' s3.download_file(bucket_name, output_key_name, local_file_path) # ---------------------------------------------------------------------------------------------------------- log.info( "[merge_outstanding_requesters] \tProcess outstanding requesters files" ) # ---------------------------------------------------------------------------------------------------------- remote_outstanding_requesters = {'general': {}, 'uriList': {}} with open(local_file_path, 'r') as file_content: remote_outstanding_requesters = json.loads(file_content.read()) remove(local_file_path) threshold = 'requestThreshold' if log_type == 'waf' else "errorThreshold" try: if 'general' in remote_outstanding_requesters: for k, v in remote_outstanding_requesters['general'].items(): try: if k in outstanding_requesters['general'].keys(): log.info( "[merge_outstanding_requesters] \t\tUpdating general data of BLOCK %s rule" % k) outstanding_requesters['general'][k][ 'updated_at'] = utc_now_timestamp_str if v['max_counter_per_min'] > outstanding_requesters[ 'general'][k]['max_counter_per_min']: outstanding_requesters['general'][k][ 'max_counter_per_min'] = v[ 'max_counter_per_min'] else: utc_prev_updated_at = datetime.datetime.strptime( v['updated_at'], "%Y-%m-%d %H:%M:%S %Z%z").astimezone( datetime.timezone.utc) total_diff_min = ( (utc_now_timestamp - utc_prev_updated_at).total_seconds()) / 60 if v['max_counter_per_min'] < config['general'][ threshold]: force_update = True log.info( "[merge_outstanding_requesters] \t\t%s is bellow the current general threshold" % k) elif total_diff_min < config['general']['blockPeriod']: log.debug( "[merge_outstanding_requesters] \t\tKeeping %s in general" % k) outstanding_requesters['general'][k] = v else: force_update = True log.info( "[merge_outstanding_requesters] \t\t%s expired in general" % k) except Exception: log.error( "[merge_outstanding_requesters] \tError merging general %s rule" % k) except Exception: log.error( '[merge_outstanding_requesters] Failed to process general group.') try: if 'uriList' in remote_outstanding_requesters: if 'uriList' not in config or len(config['uriList']) == 0: force_update = True log.info( "[merge_outstanding_requesters] \t\tCurrent config file does not contain uriList anymore" ) else: for uri in remote_outstanding_requesters['uriList'].keys(): if 'ignoredSufixes' in config['general'] and uri.endswith( tuple(config['general']['ignoredSufixes'])): force_update = True log.info( "[merge_outstanding_requesters] \t\t%s is in current ignored sufixes list." % uri) continue for k, v in remote_outstanding_requesters['uriList'][ uri].items(): try: if uri in outstanding_requesters['uriList'].keys() and k in \ outstanding_requesters['uriList'][uri].keys(): log.info( "[merge_outstanding_requesters] \t\tUpdating uriList (%s) data of BLOCK %s rule" % (uri, k)) outstanding_requesters['uriList'][uri][k][ 'updated_at'] = utc_now_timestamp_str if v['max_counter_per_min'] > outstanding_requesters[ 'uriList'][uri][k][ 'max_counter_per_min']: outstanding_requesters['uriList'][uri][k][ 'max_counter_per_min'] = v[ 'max_counter_per_min'] else: utc_prev_updated_at = datetime.datetime.strptime( v['updated_at'], "%Y-%m-%d %H:%M:%S %Z%z").astimezone( datetime.timezone.utc) total_diff_min = ( (utc_now_timestamp - utc_prev_updated_at).total_seconds()) / 60 if v['max_counter_per_min'] < config[ 'uriList'][uri][threshold]: force_update = True log.info( "[merge_outstanding_requesters] \t\t%s is bellow the current uriList (%s) threshold" % (k, uri)) elif total_diff_min < config['general'][ 'blockPeriod']: log.debug( "[merge_outstanding_requesters] \t\tKeeping %s in uriList (%s)" % (k, uri)) if uri not in outstanding_requesters[ 'uriList'].keys(): outstanding_requesters['uriList'][ uri] = {} outstanding_requesters['uriList'][uri][ k] = v else: force_update = True log.info( "[merge_outstanding_requesters] \t\t%s expired in uriList (%s)" % (k, uri)) except Exception: log.error( "[merge_outstanding_requesters] \tError merging uriList (%s) %s rule" % (uri, k)) except Exception: log.error( '[merge_outstanding_requesters] Failed to process uriList group.') need_update = (force_update or last_update_age > int(os.getenv('MAX_AGE_TO_UPDATE')) or len(outstanding_requesters['general']) > 0 or len(outstanding_requesters['uriList']) > 0) log.debug('[merge_outstanding_requesters] End') return outstanding_requesters, need_update
def get_outstanding_requesters(log, bucket_name, key_name, log_type): log.debug('[get_outstanding_requesters] Start') counter = {'general': {}, 'uriList': {}} outstanding_requesters = {'general': {}, 'uriList': {}} try: # -------------------------------------------------------------------------------------------------------------- log.info("[get_outstanding_requesters] \tDownload file from S3") # -------------------------------------------------------------------------------------------------------------- local_file_path = '/tmp/' + key_name.split('/')[-1] s3 = create_client('s3') s3.download_file(bucket_name, key_name, local_file_path) # -------------------------------------------------------------------------------------------------------------- log.info("[get_outstanding_requesters] \tRead file content") # -------------------------------------------------------------------------------------------------------------- error_count = 0 with gzip.open(local_file_path, 'r') as content: for line in content: try: request_key = "" uri = "" return_code_index = None if log_type == 'waf': line = line.decode( ) # Remove the b in front of each field line_data = json.loads(str(line)) request_key = datetime.datetime.fromtimestamp( int(line_data['timestamp']) / 1000.0).isoformat( sep='T', timespec='minutes') request_key += ' ' + line_data['httpRequest'][ 'clientIp'] uri = urlparse(line_data['httpRequest']['uri']).path elif log_type == 'alb': line = line.decode('utf8') if line.startswith('#'): continue line_data = line.split(LINE_FORMAT_ALB['delimiter']) request_key = line_data[ LINE_FORMAT_ALB['timestamp']].rsplit(':', 1)[0] request_key += ' ' + line_data[ LINE_FORMAT_ALB['source_ip']].rsplit(':', 1)[0] return_code_index = LINE_FORMAT_ALB['code'] uri = urlparse(line_data[LINE_FORMAT_ALB['uri']]).path elif log_type == 'cloudfront': line = line.decode('utf8') if line.startswith('#'): continue line_data = line.split( LINE_FORMAT_CLOUD_FRONT['delimiter']) request_key = line_data[ LINE_FORMAT_CLOUD_FRONT['date']] request_key += ' ' + line_data[ LINE_FORMAT_CLOUD_FRONT['time']][:-3] request_key += ' ' + line_data[ LINE_FORMAT_CLOUD_FRONT['source_ip']] return_code_index = LINE_FORMAT_CLOUD_FRONT['code'] uri = urlparse( line_data[LINE_FORMAT_CLOUD_FRONT['uri']]).path else: return outstanding_requesters if 'ignoredSufixes' in config['general'] and uri.endswith( tuple(config['general']['ignoredSufixes'])): log.debug( "[get_outstanding_requesters] \t\tSkipping line %s. Included in ignoredSufixes." % line) continue if return_code_index == None or line_data[ return_code_index] in config['general'][ 'errorCodes']: if request_key in counter['general'].keys(): counter['general'][request_key] += 1 else: counter['general'][request_key] = 1 if 'uriList' in config and uri in config[ 'uriList'].keys(): if uri not in counter['uriList'].keys(): counter['uriList'][uri] = {} if request_key in counter['uriList'][uri].keys(): counter['uriList'][uri][request_key] += 1 else: counter['uriList'][uri][request_key] = 1 except Exception as e: error_count += 1 log.error( "[get_outstanding_requesters] \t\tError to process line: %s" % line) log.error(str(e)) if error_count == 5: #Allow 5 errors before stopping the function execution raise remove(local_file_path) # -------------------------------------------------------------------------------------------------------------- log.info( "[get_outstanding_requesters] \tKeep only outstanding requesters") # -------------------------------------------------------------------------------------------------------------- threshold = 'requestThreshold' if log_type == 'waf' else "errorThreshold" utc_now_timestamp_str = datetime.datetime.now( datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z") for k, num_reqs in counter['general'].items(): try: k = k.split(' ')[-1] if num_reqs >= config['general'][threshold]: if k not in outstanding_requesters['general'].keys() or num_reqs > \ outstanding_requesters['general'][k]['max_counter_per_min']: outstanding_requesters['general'][k] = { 'max_counter_per_min': num_reqs, 'updated_at': utc_now_timestamp_str } except Exception as e: log.error( "[get_outstanding_requesters] \t\tError to process outstanding requester: %s" % k) for uri in counter['uriList'].keys(): for k, num_reqs in counter['uriList'][uri].items(): try: k = k.split(' ')[-1] if num_reqs >= config['uriList'][uri][threshold]: if uri not in outstanding_requesters['uriList'].keys(): outstanding_requesters['uriList'][uri] = {} if k not in outstanding_requesters['uriList'][uri].keys() or num_reqs > \ outstanding_requesters['uriList'][uri][k]['max_counter_per_min']: outstanding_requesters['uriList'][uri][k] = { 'max_counter_per_min': num_reqs, 'updated_at': utc_now_timestamp_str } except Exception as e: log.error( "[get_outstanding_requesters] \t\tError to process outstanding requester: (%s) %s" % (uri, k)) except Exception as e: log.error("[get_outstanding_requesters] \tError to read input file") log.error(e) log.debug('[get_outstanding_requesters] End') return outstanding_requesters
def send_anonymous_usage_data(log): try: if 'SEND_ANONYMOUS_USAGE_DATA' not in environ or os.getenv( 'SEND_ANONYMOUS_USAGE_DATA').lower() != 'yes': return log.info("[send_anonymous_usage_data] Start") cw = create_client('cloudwatch') usage_data = { "data_type": "log_parser", "scanners_probes_set_size": 0, "http_flood_set_size": 0, "allowed_requests": 0, "blocked_requests_all": 0, "blocked_requests_scanners_probes": 0, "blocked_requests_http_flood": 0, "allowed_requests_WAFWebACL": 0, "blocked_requests_WAFWebACL": 0, "waf_type": os.getenv('LOG_TYPE') } # -------------------------------------------------------------------------------------------------------------- log.info("[send_anonymous_usage_data] Get num allowed requests") # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='AllowedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=300, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": "ALL" }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['allowed_requests'] = response['Datapoints'][0][ 'Sum'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get Num Allowed Requests" ) log.debug(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info( "[send_anonymous_usage_data] Get num blocked requests - all rules") # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='BlockedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=300, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": "ALL" }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['blocked_requests_all'] = response['Datapoints'][0][ 'Sum'] except Exception as error: log.info( "[send_anonymous_usage_data] Failed to get num blocked requests - all rules" ) log.error(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info("[send_anonymous_usage_data] Get scanners probes data") # -------------------------------------------------------------------------------------------------------------- if 'IP_SET_ID_SCANNERS_PROBESV4' in environ or 'IP_SET_ID_SCANNERS_PROBESV6' in environ: try: countv4 = 0 response = waflib.get_ip_set( log, scope, os.getenv('IP_SET_NAME_SCANNERS_PROBESV4'), os.getenv('IP_SET_ID_SCANNERS_PROBESV4')) log.info(response) if response is not None: countv4 = len(response['IPSet']['Addresses']) log.info("Scanner Probes IPV4 address Count: %s", countv4) countv6 = 0 response = waflib.get_ip_set( log, scope, os.getenv('IP_SET_NAME_SCANNERS_PROBESV6'), os.getenv('IP_SET_ID_SCANNERS_PROBESV6')) log.info(response) if response is not None: countv6 = len(response['IPSet']['Addresses']) log.info("Scanner Probes IPV6 address Count: %s", countv6) usage_data['scanners_probes_set_size'] = str(countv4 + countv6) response = cw.get_metric_statistics( MetricName='BlockedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=300, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": os.getenv('METRIC_NAME_PREFIX') + 'ScannersProbesRule' }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['blocked_requests_scanners_probes'] = response[ 'Datapoints'][0]['Sum'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get scanners probes data" ) log.debug(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info("[send_anonymous_usage_data] Get HTTP flood data") # -------------------------------------------------------------------------------------------------------------- if 'IP_SET_ID_HTTP_FLOODV4' in environ or 'IP_SET_ID_HTTP_FLOODV6' in environ: try: countv4 = 0 response = waflib.get_ip_set( log, scope, os.getenv('IP_SET_NAME_HTTP_FLOODV4'), os.getenv('IP_SET_ID_HTTP_FLOODV4')) log.info(response) if response is not None: countv4 = len(response['IPSet']['Addresses']) log.info("HTTP Flood IPV4 address Count: %s", countv4) countv6 = 0 response = waflib.get_ip_set( log, scope, os.getenv('IP_SET_NAME_HTTP_FLOODV6'), os.getenv('IP_SET_ID_HTTP_FLOODV6')) log.info(response) if response is not None: countv6 = len(response['IPSet']['Addresses']) log.info("HTTP Flood IPV6 address Count: %s", countv6) usage_data['http_flood_set_size'] = str(countv4 + countv6) response = cw.get_metric_statistics( MetricName='BlockedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=300, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": os.getenv('METRIC_NAME_PREFIX') + 'HttpFloodRegularRule' }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['blocked_requests_http_flood'] = response[ 'Datapoints'][0]['Sum'] except Exception as error: log.info( "[send_anonymous_usage_data] Failed to get HTTP flood data" ) log.error(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info( "[send_anonymous_usage_data] Get num allowed requests - WAF Web ACL" ) # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='AllowedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=300, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": os.getenv('METRIC_NAME_PREFIX') + 'WAFWebACL' }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['allowed_requests_WAFWebACL'] = response[ 'Datapoints'][0]['Sum'] except Exception as error: log.info( "[send_anonymous_usage_data] Failed to get num blocked requests - all rules" ) log.error(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info( "[send_anonymous_usage_data] Get num blocked requests - WAF Web ACL" ) # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='BlockedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=300, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=300), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": os.getenv('METRIC_NAME_PREFIX') + 'WAFWebACL' }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['blocked_requests_WAFWebACL'] = response[ 'Datapoints'][0]['Sum'] except Exception as error: log.info( "[send_anonymous_usage_data] Failed to get num blocked requests - all rules" ) log.error(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info("[send_anonymous_usage_data] Send Data") # -------------------------------------------------------------------------------------------------------------- response = send_metrics(data=usage_data) response_code = response.status_code log.info('[send_anonymous_usage_data] Response Code: {}'.format( response_code)) log.info("[send_anonymous_usage_data] End") except Exception as error: log.info("[send_anonymous_usage_data] Failed to send data") log.error(str(error))
def send_anonymous_usage_data(log, scope): try: if 'SEND_ANONYMOUS_USAGE_DATA' not in os.environ or os.getenv( 'SEND_ANONYMOUS_USAGE_DATA').lower() != 'yes': return log.debug("[send_anonymous_usage_data] Start") cw = create_client('cloudwatch') usage_data = { "data_type": "reputation_lists", "ipv4_reputation_lists_size": 0, "ipv4_reputation_lists": 0, "ipv6_reputation_lists_size": 0, "ipv6_reputation_lists": 0, "allowed_requests": 0, "blocked_requests": 0, "blocked_requests_ip_reputation_lists": 0, "waf_type": os.getenv('LOG_TYPE') } # -------------------------------------------------------------------------------------------------------------- log.debug( "[send_anonymous_usage_data] Get size of the Reputation List IP set" ) # -------------------------------------------------------------------------------------------------------------- try: response = waflib.get_ip_set(log, scope, os.getenv('IP_SET_NAME_REPUTATIONV4'), os.getenv('IP_SET_ID_REPUTATIONV4')) if response is not None: usage_data['ipv4_reputation_lists_size'] = len( response['IPSet']['Addresses']) usage_data['ipv4_reputation_lists'] = response['IPSet'][ 'Addresses'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get size of the Reputation List IPV4 set" ) log.debug(str(error)) try: response = waflib.get_ip_set(log, scope, os.getenv('IP_SET_NAME_REPUTATIONV6'), os.getenv('IP_SET_ID_REPUTATIONV6')) if response is not None: usage_data['ipv6_reputation_lists_size'] = len( response['IPSet']['Addresses']) usage_data['ipv6_reputation_lists'] = response['IPSet'][ 'Addresses'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get size of the Reputation List IPV6 set" ) log.debug(str(error)) # -------------------------------------------------------------------------------------------------------------- log.debug( "[send_anonymous_usage_data] Get total number of allowed requests") # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='AllowedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=3600, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=3600), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": "ALL" }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['allowed_requests'] = response['Datapoints'][0][ 'Sum'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get Num Allowed Requests" ) log.debug(str(error)) # -------------------------------------------------------------------------------------------------------------- log.debug( "[send_anonymous_usage_data] Get total number of blocked requests") # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='BlockedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=3600, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=3600), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": "ALL" }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['blocked_requests'] = response['Datapoints'][0][ 'Sum'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get Num Allowed Requests" ) log.debug(str(error)) # -------------------------------------------------------------------------------------------------------------- log.debug( "[send_anonymous_usage_data] Get total number of blocked requests for Reputation Lists Rule" ) # -------------------------------------------------------------------------------------------------------------- try: response = cw.get_metric_statistics( MetricName='BlockedRequests', Namespace='AWS/WAFV2', Statistics=['Sum'], Period=3600, StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=3600), EndTime=datetime.datetime.utcnow(), Dimensions=[{ "Name": "Rule", "Value": os.getenv('IPREPUTATIONLIST_METRICNAME') }, { "Name": "WebACL", "Value": os.getenv('STACK_NAME') }, { "Name": "Region", "Value": os.getenv('AWS_REGION') }]) if len(response['Datapoints']): usage_data['blocked_requests_ip_reputation_lists'] = response[ 'Datapoints'][0]['Sum'] except Exception as error: log.debug( "[send_anonymous_usage_data] Failed to get Num Allowed Requests" ) log.debug(str(error)) # -------------------------------------------------------------------------------------------------------------- log.info("[send_anonymous_usage_data] Send Data") # -------------------------------------------------------------------------------------------------------------- response = send_metrics(data=usage_data) response_code = response.status_code log.debug('[send_anonymous_usage_data] Response Code: {}'.format( response_code)) log.debug("[send_anonymous_usage_data] End") except Exception as error: log.debug("[send_anonymous_usage_data] Failed to send data")
# # # or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES # # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # # and limitations under the License. # ###################################################################################################################### # import boto3 # from botocore.config import Config from botocore.exceptions import ClientError from ipaddress import ip_address from backoff import on_exception, expo, full_jitter from lib.boto3_util import create_client API_CALL_NUM_RETRIES = 5 MAX_TIME = 20 client = create_client('wafv2') class WAFLIBv2(object): def __init__(self): return # Parse arn into ip_set_id def arn_to_id(self, arn): if arn == None: return None tmp = arn.split('/') return tmp.pop() # Determine network version for source_ip def which_ip_version(self, log, source_ip):
def lambda_handler(event, context): """ This function is triggered by S3 event to move log files (upon their arrival in s3) from their original location to a partitioned folder structure created per timestamps in file names, hence allowing the usage of partitioning within AWS Athena. Sample partitioned folder structure: AWSLogs-Partitioned/year=2020/month=04/day=09/hour=23/ """ logging.getLogger().debug('[partition_s3_logs lambda_handler] Start') try: # --------------------------------------------------------- # Set Log Level # --------------------------------------------------------- global log_level log_level = str(environ['LOG_LEVEL'].upper()) if log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']: log_level = 'ERROR' logging.getLogger().setLevel(log_level) # ---------------------------------------------------------- # Process event # ---------------------------------------------------------- logging.getLogger().info(event) keep_original_data = str(environ['KEEP_ORIGINAL_DATA'].upper()) endpoint = str(environ['ENDPOINT'].upper()) logging.getLogger().info("\n[partition_s3_logs lambda_handler] KEEP ORIGINAL DATA: %s; End POINT: %s." %(keep_original_data, endpoint)) s3 = create_client('s3') count = 0 # Iterate through all records in the event for record in event['Records']: # Get S3 bucket bucket = record['s3']['bucket']['name'] # Get source S3 object key key = record['s3']['object']['key'] # Get file name, which should be the last one in the string filename = "" number = len(key.split('/')) if number >= 1: number = number - 1 filename = key.split('/')[number] if endpoint == 'CLOUDFRONT': dest = parse_cloudfront_logs(key, filename) else: # ALB endpoint dest = parse_alb_logs(key, filename) source_path = bucket + '/' + key dest_path = bucket + '/' + dest # Copy S3 object to destionation s3.copy_object(Bucket=bucket, Key=dest, CopySource=source_path) logging.getLogger().info("\n[partition_s3_logs lambda_handler] Copied file %s to destination %s"%(source_path, dest_path)) # Only delete source S3 object from its original folder if keeping original data is no if keep_original_data == 'NO': s3.delete_object(Bucket=bucket, Key=key) logging.getLogger().info("\n[partition_s3_logs lambda_handler] Removed file %s"%source_path) count = count + 1 logging.getLogger().info("\n[partition_s3_logs lambda_handler] Successfully partitioned %s file(s)."%(str(count))) except Exception as error: logging.getLogger().error(str(error)) raise logging.getLogger().debug('[partition_s3_logs lambda_handler] End')