def lambda_handler(event, context):
    """
    This function adds a new hourly partition to athena table.
    It runs every hour, triggered by a CloudWatch event rule.
    """
    log = logging.getLogger()
    log.debug('[add-athena-partition lambda_handler] Start')
    try:
        # ---------------------------------------------------------
        # Set Log Level
        # ---------------------------------------------------------
        log_level = str(environ['LOG_LEVEL'].upper())
        if log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']:
            log_level = 'ERROR'
        log.setLevel(log_level)

        # ----------------------------------------------------------
        # Process event
        # ----------------------------------------------------------
        log.info(event)

        athena_client = create_client('athena')
        database_name = event['glueAccessLogsDatabase']
        access_log_bucket = event['accessLogBucket']
        waf_log_bucket = event['wafLogBucket']
        athena_work_group = event['athenaWorkGroup']

        try:
            # Add athena partition for cloudfront or alb logs
            if len(access_log_bucket) > 0:
                execute_athena_query(log, access_log_bucket,
                                     database_name,
                                     event['glueAppAccessLogsTable'],
                                     athena_client,
                                     athena_work_group)
        except Exception as error:
            log.error('[add-athena-partition lambda_handler] App access log Athena query execution failed: %s'%str(error))

        try:
            # Add athena partition for waf logs
            if len(waf_log_bucket) > 0:
                execute_athena_query(log, waf_log_bucket,
                                     database_name,
                                     event['glueWafAccessLogsTable'],
                                     athena_client,
                                     athena_work_group)
        except Exception as error:
            log.error('[add-athena-partition lambda_handler] WAF access log Athena query execution failed: %s'%str(error))

    except Exception as error:
        log.error(str(error))
        raise

    log.debug('[add-athena-partition lambda_handler] End')
Exemple #2
0
def remove_s3_bucket_lambda_event(log, bucket_name, lambda_function_arn,
                                  lambda_log_partition_function_arn):
    if lambda_function_arn != None:
        log.info("[remove_s3_bucket_lambda_event] Start")

        s3_client = create_client('s3')
        try:
            new_conf = {}
            notification_conf = s3_client.get_bucket_notification_configuration(
                Bucket=bucket_name)

            log.info("[remove_s3_bucket_lambda_event]notification_conf:\n %s" %
                     (notification_conf))

            if 'TopicConfigurations' in notification_conf:
                new_conf['TopicConfigurations'] = notification_conf[
                    'TopicConfigurations']
            if 'QueueConfigurations' in notification_conf:
                new_conf['QueueConfigurations'] = notification_conf[
                    'QueueConfigurations']

            if 'LambdaFunctionConfigurations' in notification_conf:
                new_conf['LambdaFunctionConfigurations'] = []
                for lfc in notification_conf['LambdaFunctionConfigurations']:
                    if lfc['LambdaFunctionArn'] == lambda_function_arn or  \
                       lfc['LambdaFunctionArn'] == lambda_log_partition_function_arn:
                        log.info(
                            "[remove_s3_bucket_lambda_event]%s match found, continue."
                            % lfc['LambdaFunctionArn'])
                        continue  # remove all references
                    else:
                        new_conf['LambdaFunctionConfigurations'].append(lfc)
                        log.info(
                            "[remove_s3_bucket_lambda_event]lfc appended: %s" %
                            lfc)

            log.info("[remove_s3_bucket_lambda_event]new_conf:\n %s" %
                     (new_conf))

            s3_client.put_bucket_notification_configuration(
                Bucket=bucket_name, NotificationConfiguration=new_conf)

        except Exception as error:
            log.error(
                "Failed to remove S3 Bucket lambda event. Check if the bucket still exists, you own it and has proper access policy."
            )
            log.error(str(error))

        log.info("[remove_s3_bucket_lambda_event] End")
Exemple #3
0
def generate_waf_log_parser_conf_file(log, stack_name, request_threshold,
                                      block_period, waf_access_log_bucket,
                                      overwrite):
    log.debug("[generate_waf_log_parser_conf_file] Start")

    local_file = '/tmp/' + stack_name + '-waf_log_conf_LOCAL.json'
    remote_file = stack_name + '-waf_log_conf.json'
    default_conf = {
        'general': {
            'requestThreshold': request_threshold,
            'blockPeriod': block_period,
            'ignoredSufixes': []
        },
        'uriList': {}
    }

    if not overwrite:
        try:
            s3_resource = create_resource('s3')
            file_obj = s3_resource.Object(waf_access_log_bucket, remote_file)
            file_content = file_obj.get()['Body'].read()
            remote_conf = json.loads(file_content)

            if 'general' in remote_conf and 'ignoredSufixes' in remote_conf[
                    'general']:
                default_conf['general']['ignoredSufixes'] = remote_conf[
                    'general']['ignoredSufixes']

            if 'uriList' in remote_conf:
                default_conf['uriList'] = remote_conf['uriList']

        except Exception as e:
            log.debug(
                "[generate_waf_log_parser_conf_file] \tFailed to merge existing conf file data."
            )
            log.debug(e)

    with open(local_file, 'w') as outfile:
        json.dump(default_conf, outfile)

    s3_client = create_client('s3')
    s3_client.upload_file(local_file,
                          waf_access_log_bucket,
                          remote_file,
                          ExtraArgs={'ContentType': "application/json"})

    log.debug("[generate_waf_log_parser_conf_file] End")
def check_app_log_bucket(log, region, bucket_name):
    log.info("[check_app_log_bucket] Start")

    if bucket_name.strip() == "":
        raise Exception(
            'Failed to configure access log bucket. Name cannot be empty!')

    # ------------------------------------------------------------------------------------------------------------------
    # Check if bucket exists (and inside the specified region)
    # ------------------------------------------------------------------------------------------------------------------
    exists = True
    s3_client = create_client('s3')
    try:
        response = s3_client.head_bucket(Bucket=bucket_name)
        log.info("[check_app_log_bucket]response: \n%s" % response)

    except botocore.exceptions.ClientError as e:
        # If a client error is thrown, then check that it was a 404 error.
        # If it was a 404 error, then the bucket does not exist.
        error_code = int(e.response['Error']['Code'])
        if error_code == 404:
            exists = False
        log.info("[check_app_log_bucket]error_code: %s." % error_code)
    # ------------------------------------------------------------------------------------------------------------------
    # Check if the bucket was created in the specified Region or create one (if not exists)
    # ------------------------------------------------------------------------------------------------------------------
    if exists:
        response = None
        try:
            response = s3_client.get_bucket_location(Bucket=bucket_name)
        except Exception as e:
            raise Exception(
                'Failed to access the existing bucket information. Check if you own this bucket and if it has proper access policy.'
            )

        if response['LocationConstraint'] == None:
            response['LocationConstraint'] = 'us-east-1'
        elif response['LocationConstraint'] == 'EU':
            # Fix for github issue #72
            response['LocationConstraint'] = 'eu-west-1'

        if response['LocationConstraint'] != region:
            raise Exception(
                'Bucket located in a different region. S3 bucket and Log Parser Lambda (and therefore, you CloudFormation Stack) must be created in the same Region.'
            )

    log.info("[check_app_log_bucket] End")
def write_output(log, bucket_name, key_name, output_key_name,
                 outstanding_requesters):
    log.debug('[write_output] Start')

    try:
        current_data = '/tmp/' + key_name.split('/')[-1] + '_LOCAL.json'
        with open(current_data, 'w') as outfile:
            json.dump(outstanding_requesters, outfile)

        s3 = create_client('s3')
        s3.upload_file(current_data,
                       bucket_name,
                       output_key_name,
                       ExtraArgs={'ContentType': "application/json"})
        remove(current_data)

    except Exception as e:
        log.error("[write_output] \tError to write output file")
        log.error(e)

    log.debug('[write_output] End')
Exemple #6
0
def add_athena_partitions(log, add_athena_partition_lambda_function,
                          resource_type, glue_database, access_log_bucket,
                          glue_access_log_table, glue_waf_log_table,
                          waf_log_bucket, athena_work_group):
    log.info("[add_athena_partitions] Start")

    lambda_client = create_client('lambda')
    response = lambda_client.invoke(
        FunctionName=add_athena_partition_lambda_function.rsplit(":", 1)[-1],
        Payload="""{
                "resourceType":"%s",
                "glueAccessLogsDatabase":"%s",
                "accessLogBucket":"%s",
                "glueAppAccessLogsTable":"%s",
                "glueWafAccessLogsTable":"%s",
                "wafLogBucket":"%s",
                "athenaWorkGroup":"%s"
            }""" % (resource_type, glue_database, access_log_bucket,
                    glue_access_log_table, glue_waf_log_table, waf_log_bucket,
                    athena_work_group))
    log.info("[add_athena_partitions] Lambda invocation response:\n%s" %
             response)
    log.info("[add_athena_partitions] End")
def process_athena_result(log, bucket_name, key_name, ip_set_type):
    log.debug('[process_athena_result] Start')

    try:
        # --------------------------------------------------------------------------------------------------------------
        log.info("[process_athena_result] \tDownload file from S3")
        # --------------------------------------------------------------------------------------------------------------
        local_file_path = '/tmp/' + key_name.split('/')[-1]
        s3 = create_client('s3')
        s3.download_file(bucket_name, key_name, local_file_path)

        # --------------------------------------------------------------------------------------------------------------
        log.info("[process_athena_result] \tRead file content")
        # --------------------------------------------------------------------------------------------------------------
        outstanding_requesters = {'general': {}, 'uriList': {}}
        utc_now_timestamp_str = datetime.datetime.now(
            datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z")
        with open(local_file_path, 'r') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                # max_counter_per_min is set as 1 just to reuse lambda log parser data structure
                # and reuse update_ip_set.
                outstanding_requesters['general'][row['client_ip']] = {
                    "max_counter_per_min": row['max_counter_per_min'],
                    "updated_at": utc_now_timestamp_str
                }
        remove(local_file_path)

        # --------------------------------------------------------------------------------------------------------------
        log.info("[process_athena_result] \tUpdate WAF IP Sets")
        # --------------------------------------------------------------------------------------------------------------
        update_ip_set(log, ip_set_type, outstanding_requesters)

    except Exception:
        log.error("[process_athena_result] \tError to read input file")

    log.debug('[process_athena_result] End')
Exemple #8
0
 def __init__(self, log):
     self.log = log
     self.sns_client = create_client('sns')
Exemple #9
0
def configure_s3_bucket(log, region, bucket_name, access_logging_bucket_name):
    log.info("[configure_s3_bucket] Start")

    if bucket_name.strip() == "":
        raise Exception(
            'Failed to configure access log bucket. Name cannot be empty!')

    # ------------------------------------------------------------------------------------------------------------------
    # Create the S3 bucket (if not exist)
    # ------------------------------------------------------------------------------------------------------------------
    s3_client = create_client('s3')

    try:
        response = s3_client.head_bucket(Bucket=bucket_name)
        log.info("[configure_s3_bucket]response head_bucket: \n%s" % response)

        # Enable access logging if needed
        put_s3_bucket_access_logging(log, s3_client, bucket_name,
                                     access_logging_bucket_name)
    except botocore.exceptions.ClientError as e:
        # If a client error is thrown, then check that it was a 404 error.
        # If it was a 404 error, then the bucket does not exist.
        error_code = int(e.response['Error']['Code'])
        if error_code == 404:
            log.info(
                "[configure_s3_bucket]: %s doesn't exist. Create bucket." %
                bucket_name)
            if region == 'us-east-1':
                s3_client.create_bucket(Bucket=bucket_name, ACL='private')
            else:
                s3_client.create_bucket(
                    Bucket=bucket_name,
                    ACL='private',
                    CreateBucketConfiguration={'LocationConstraint': region})

            # Begin waiting for the S3 bucket, mybucket, to exist
            s3_bucket_exists_waiter = s3_client.get_waiter('bucket_exists')
            s3_bucket_exists_waiter.wait(Bucket=bucket_name)

            # Enable server side encryption on the S3 bucket
            response = s3_client.put_bucket_encryption(
                Bucket=bucket_name,
                ServerSideEncryptionConfiguration={
                    'Rules': [
                        {
                            'ApplyServerSideEncryptionByDefault': {
                                'SSEAlgorithm': 'AES256'
                            }
                        },
                    ]
                })
            log.info(
                "[configure_s3_bucket]response put_bucket_encryption: \n%s" %
                response)

            # block public access
            response = s3_client.put_public_access_block(
                Bucket=bucket_name,
                PublicAccessBlockConfiguration={
                    'BlockPublicAcls': True,
                    'IgnorePublicAcls': True,
                    'BlockPublicPolicy': True,
                    'RestrictPublicBuckets': True
                })
            log.info(
                "[configure_s3_bucket]response put_public_access_block: \n%s" %
                response)

            # Enable access logging
            put_s3_bucket_access_logging(log, s3_client, bucket_name,
                                         access_logging_bucket_name)

    log.info("[configure_s3_bucket] End")
Exemple #10
0
def add_s3_bucket_lambda_event(log, bucket_name, lambda_function_arn,
                               lambda_log_partition_function_arn,
                               lambda_parser, athena_parser):
    log.info("[add_s3_bucket_lambda_event] Start")

    try:
        s3_client = create_client('s3')
        if lambda_function_arn is not None and (lambda_parser
                                                or athena_parser):
            notification_conf = s3_client.get_bucket_notification_configuration(
                Bucket=bucket_name)

            log.info("[add_s3_bucket_lambda_event] notification_conf:\n %s" %
                     (notification_conf))

            new_conf = {}
            new_conf['LambdaFunctionConfigurations'] = []

            if 'TopicConfigurations' in notification_conf:
                new_conf['TopicConfigurations'] = notification_conf[
                    'TopicConfigurations']

            if 'QueueConfigurations' in notification_conf:
                new_conf['QueueConfigurations'] = notification_conf[
                    'QueueConfigurations']

            if lambda_parser:
                new_conf['LambdaFunctionConfigurations'].append({
                    'Id':
                    'Call Log Parser',
                    'LambdaFunctionArn':
                    lambda_function_arn,
                    'Events': ['s3:ObjectCreated:*'],
                    'Filter': {
                        'Key': {
                            'FilterRules': [{
                                'Name': 'suffix',
                                'Value': 'gz'
                            }]
                        }
                    }
                })

            if athena_parser:
                new_conf['LambdaFunctionConfigurations'].append({
                    'Id':
                    'Call Athena Result Parser',
                    'LambdaFunctionArn':
                    lambda_function_arn,
                    'Events': ['s3:ObjectCreated:*'],
                    'Filter': {
                        'Key': {
                            'FilterRules': [{
                                'Name': 'prefix',
                                'Value': 'athena_results/'
                            }, {
                                'Name': 'suffix',
                                'Value': 'csv'
                            }]
                        }
                    }
                })

            if lambda_log_partition_function_arn is not None:
                new_conf['LambdaFunctionConfigurations'].append({
                    'Id':
                    'Call s3 log partition function',
                    'LambdaFunctionArn':
                    lambda_log_partition_function_arn,
                    'Events': ['s3:ObjectCreated:*'],
                    'Filter': {
                        'Key': {
                            'FilterRules': [{
                                'Name': 'prefix',
                                'Value': 'AWSLogs/'
                            }, {
                                'Name': 'suffix',
                                'Value': 'gz'
                            }]
                        }
                    }
                })

            log.info(
                "[add_s3_bucket_lambda_event] LambdaFunctionConfigurations:\n %s"
                % (new_conf['LambdaFunctionConfigurations']))

            s3_client.put_bucket_notification_configuration(
                Bucket=bucket_name, NotificationConfiguration=new_conf)
    except Exception as error:
        log.error(error)

    log.info("[add_s3_bucket_lambda_event] End")
def merge_outstanding_requesters(log, bucket_name, key_name, log_type,
                                 output_key_name, outstanding_requesters):
    log.debug('[merge_outstanding_requesters] Start')

    force_update = False
    need_update = False
    s3 = create_client('s3')

    # --------------------------------------------------------------------------------------------------------------
    log.info("[merge_outstanding_requesters] \tCalculate Last Update Age")
    # --------------------------------------------------------------------------------------------------------------
    response = None
    try:
        response = s3.head_object(Bucket=bucket_name, Key=output_key_name)
    except Exception:
        log.info('[merge_outstanding_requesters] No file to be merged.')
        need_update = True
        return outstanding_requesters, need_update

    utc_last_modified = response['LastModified'].astimezone(
        datetime.timezone.utc)
    utc_now_timestamp = datetime.datetime.now(datetime.timezone.utc)

    utc_now_timestamp_str = utc_now_timestamp.strftime(
        "%Y-%m-%d %H:%M:%S %Z%z")
    last_update_age = int(
        ((utc_now_timestamp - utc_last_modified).total_seconds()) / 60)

    # --------------------------------------------------------------------------------------------------------------
    log.info("[merge_outstanding_requesters] \tDownload current blocked IPs")
    # --------------------------------------------------------------------------------------------------------------
    local_file_path = '/tmp/' + key_name.split('/')[-1] + '_REMOTE.json'
    s3.download_file(bucket_name, output_key_name, local_file_path)

    # ----------------------------------------------------------------------------------------------------------
    log.info(
        "[merge_outstanding_requesters] \tProcess outstanding requesters files"
    )
    # ----------------------------------------------------------------------------------------------------------
    remote_outstanding_requesters = {'general': {}, 'uriList': {}}
    with open(local_file_path, 'r') as file_content:
        remote_outstanding_requesters = json.loads(file_content.read())
    remove(local_file_path)

    threshold = 'requestThreshold' if log_type == 'waf' else "errorThreshold"
    try:
        if 'general' in remote_outstanding_requesters:
            for k, v in remote_outstanding_requesters['general'].items():
                try:
                    if k in outstanding_requesters['general'].keys():
                        log.info(
                            "[merge_outstanding_requesters] \t\tUpdating general data of BLOCK %s rule"
                            % k)
                        outstanding_requesters['general'][k][
                            'updated_at'] = utc_now_timestamp_str
                        if v['max_counter_per_min'] > outstanding_requesters[
                                'general'][k]['max_counter_per_min']:
                            outstanding_requesters['general'][k][
                                'max_counter_per_min'] = v[
                                    'max_counter_per_min']

                    else:
                        utc_prev_updated_at = datetime.datetime.strptime(
                            v['updated_at'],
                            "%Y-%m-%d %H:%M:%S %Z%z").astimezone(
                                datetime.timezone.utc)
                        total_diff_min = (
                            (utc_now_timestamp -
                             utc_prev_updated_at).total_seconds()) / 60

                        if v['max_counter_per_min'] < config['general'][
                                threshold]:
                            force_update = True
                            log.info(
                                "[merge_outstanding_requesters] \t\t%s is bellow the current general threshold"
                                % k)

                        elif total_diff_min < config['general']['blockPeriod']:
                            log.debug(
                                "[merge_outstanding_requesters] \t\tKeeping %s in general"
                                % k)
                            outstanding_requesters['general'][k] = v

                        else:
                            force_update = True
                            log.info(
                                "[merge_outstanding_requesters] \t\t%s expired in general"
                                % k)

                except Exception:
                    log.error(
                        "[merge_outstanding_requesters] \tError merging general %s rule"
                        % k)
    except Exception:
        log.error(
            '[merge_outstanding_requesters] Failed to process general group.')

    try:
        if 'uriList' in remote_outstanding_requesters:
            if 'uriList' not in config or len(config['uriList']) == 0:
                force_update = True
                log.info(
                    "[merge_outstanding_requesters] \t\tCurrent config file does not contain uriList anymore"
                )
            else:
                for uri in remote_outstanding_requesters['uriList'].keys():
                    if 'ignoredSufixes' in config['general'] and uri.endswith(
                            tuple(config['general']['ignoredSufixes'])):
                        force_update = True
                        log.info(
                            "[merge_outstanding_requesters] \t\t%s is in current ignored sufixes list."
                            % uri)
                        continue

                    for k, v in remote_outstanding_requesters['uriList'][
                            uri].items():
                        try:
                            if uri in outstanding_requesters['uriList'].keys() and k in \
                                    outstanding_requesters['uriList'][uri].keys():
                                log.info(
                                    "[merge_outstanding_requesters] \t\tUpdating uriList (%s) data of BLOCK %s rule"
                                    % (uri, k))
                                outstanding_requesters['uriList'][uri][k][
                                    'updated_at'] = utc_now_timestamp_str
                                if v['max_counter_per_min'] > outstanding_requesters[
                                        'uriList'][uri][k][
                                            'max_counter_per_min']:
                                    outstanding_requesters['uriList'][uri][k][
                                        'max_counter_per_min'] = v[
                                            'max_counter_per_min']

                            else:
                                utc_prev_updated_at = datetime.datetime.strptime(
                                    v['updated_at'],
                                    "%Y-%m-%d %H:%M:%S %Z%z").astimezone(
                                        datetime.timezone.utc)
                                total_diff_min = (
                                    (utc_now_timestamp -
                                     utc_prev_updated_at).total_seconds()) / 60

                                if v['max_counter_per_min'] < config[
                                        'uriList'][uri][threshold]:
                                    force_update = True
                                    log.info(
                                        "[merge_outstanding_requesters] \t\t%s is bellow the current uriList (%s) threshold"
                                        % (k, uri))

                                elif total_diff_min < config['general'][
                                        'blockPeriod']:
                                    log.debug(
                                        "[merge_outstanding_requesters] \t\tKeeping %s in uriList (%s)"
                                        % (k, uri))

                                    if uri not in outstanding_requesters[
                                            'uriList'].keys():
                                        outstanding_requesters['uriList'][
                                            uri] = {}

                                    outstanding_requesters['uriList'][uri][
                                        k] = v
                                else:
                                    force_update = True
                                    log.info(
                                        "[merge_outstanding_requesters] \t\t%s expired in uriList (%s)"
                                        % (k, uri))

                        except Exception:
                            log.error(
                                "[merge_outstanding_requesters] \tError merging uriList (%s) %s rule"
                                % (uri, k))
    except Exception:
        log.error(
            '[merge_outstanding_requesters] Failed to process uriList group.')

    need_update = (force_update
                   or last_update_age > int(os.getenv('MAX_AGE_TO_UPDATE'))
                   or len(outstanding_requesters['general']) > 0
                   or len(outstanding_requesters['uriList']) > 0)

    log.debug('[merge_outstanding_requesters] End')
    return outstanding_requesters, need_update
def get_outstanding_requesters(log, bucket_name, key_name, log_type):
    log.debug('[get_outstanding_requesters] Start')

    counter = {'general': {}, 'uriList': {}}
    outstanding_requesters = {'general': {}, 'uriList': {}}

    try:
        # --------------------------------------------------------------------------------------------------------------
        log.info("[get_outstanding_requesters] \tDownload file from S3")
        # --------------------------------------------------------------------------------------------------------------
        local_file_path = '/tmp/' + key_name.split('/')[-1]
        s3 = create_client('s3')
        s3.download_file(bucket_name, key_name, local_file_path)

        # --------------------------------------------------------------------------------------------------------------
        log.info("[get_outstanding_requesters] \tRead file content")
        # --------------------------------------------------------------------------------------------------------------
        error_count = 0
        with gzip.open(local_file_path, 'r') as content:
            for line in content:
                try:
                    request_key = ""
                    uri = ""
                    return_code_index = None

                    if log_type == 'waf':
                        line = line.decode(
                        )  # Remove the b in front of each field
                        line_data = json.loads(str(line))

                        request_key = datetime.datetime.fromtimestamp(
                            int(line_data['timestamp']) / 1000.0).isoformat(
                                sep='T', timespec='minutes')
                        request_key += ' ' + line_data['httpRequest'][
                            'clientIp']
                        uri = urlparse(line_data['httpRequest']['uri']).path

                    elif log_type == 'alb':
                        line = line.decode('utf8')
                        if line.startswith('#'):
                            continue

                        line_data = line.split(LINE_FORMAT_ALB['delimiter'])
                        request_key = line_data[
                            LINE_FORMAT_ALB['timestamp']].rsplit(':', 1)[0]
                        request_key += ' ' + line_data[
                            LINE_FORMAT_ALB['source_ip']].rsplit(':', 1)[0]
                        return_code_index = LINE_FORMAT_ALB['code']
                        uri = urlparse(line_data[LINE_FORMAT_ALB['uri']]).path

                    elif log_type == 'cloudfront':
                        line = line.decode('utf8')
                        if line.startswith('#'):
                            continue

                        line_data = line.split(
                            LINE_FORMAT_CLOUD_FRONT['delimiter'])
                        request_key = line_data[
                            LINE_FORMAT_CLOUD_FRONT['date']]
                        request_key += ' ' + line_data[
                            LINE_FORMAT_CLOUD_FRONT['time']][:-3]
                        request_key += ' ' + line_data[
                            LINE_FORMAT_CLOUD_FRONT['source_ip']]
                        return_code_index = LINE_FORMAT_CLOUD_FRONT['code']
                        uri = urlparse(
                            line_data[LINE_FORMAT_CLOUD_FRONT['uri']]).path

                    else:
                        return outstanding_requesters

                    if 'ignoredSufixes' in config['general'] and uri.endswith(
                            tuple(config['general']['ignoredSufixes'])):
                        log.debug(
                            "[get_outstanding_requesters] \t\tSkipping line %s. Included in ignoredSufixes."
                            % line)
                        continue

                    if return_code_index == None or line_data[
                            return_code_index] in config['general'][
                                'errorCodes']:
                        if request_key in counter['general'].keys():
                            counter['general'][request_key] += 1
                        else:
                            counter['general'][request_key] = 1

                        if 'uriList' in config and uri in config[
                                'uriList'].keys():
                            if uri not in counter['uriList'].keys():
                                counter['uriList'][uri] = {}

                            if request_key in counter['uriList'][uri].keys():
                                counter['uriList'][uri][request_key] += 1
                            else:
                                counter['uriList'][uri][request_key] = 1

                except Exception as e:
                    error_count += 1
                    log.error(
                        "[get_outstanding_requesters] \t\tError to process line: %s"
                        % line)
                    log.error(str(e))
                    if error_count == 5:  #Allow 5 errors before stopping the function execution
                        raise
        remove(local_file_path)

        # --------------------------------------------------------------------------------------------------------------
        log.info(
            "[get_outstanding_requesters] \tKeep only outstanding requesters")
        # --------------------------------------------------------------------------------------------------------------
        threshold = 'requestThreshold' if log_type == 'waf' else "errorThreshold"
        utc_now_timestamp_str = datetime.datetime.now(
            datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z")
        for k, num_reqs in counter['general'].items():
            try:
                k = k.split(' ')[-1]
                if num_reqs >= config['general'][threshold]:
                    if k not in outstanding_requesters['general'].keys() or num_reqs > \
                            outstanding_requesters['general'][k]['max_counter_per_min']:
                        outstanding_requesters['general'][k] = {
                            'max_counter_per_min': num_reqs,
                            'updated_at': utc_now_timestamp_str
                        }
            except Exception as e:
                log.error(
                    "[get_outstanding_requesters] \t\tError to process outstanding requester: %s"
                    % k)

        for uri in counter['uriList'].keys():
            for k, num_reqs in counter['uriList'][uri].items():
                try:
                    k = k.split(' ')[-1]
                    if num_reqs >= config['uriList'][uri][threshold]:
                        if uri not in outstanding_requesters['uriList'].keys():
                            outstanding_requesters['uriList'][uri] = {}

                        if k not in outstanding_requesters['uriList'][uri].keys() or num_reqs > \
                                outstanding_requesters['uriList'][uri][k]['max_counter_per_min']:
                            outstanding_requesters['uriList'][uri][k] = {
                                'max_counter_per_min': num_reqs,
                                'updated_at': utc_now_timestamp_str
                            }
                except Exception as e:
                    log.error(
                        "[get_outstanding_requesters] \t\tError to process outstanding requester: (%s) %s"
                        % (uri, k))

    except Exception as e:
        log.error("[get_outstanding_requesters] \tError to read input file")
        log.error(e)

    log.debug('[get_outstanding_requesters] End')
    return outstanding_requesters
def send_anonymous_usage_data(log):
    try:
        if 'SEND_ANONYMOUS_USAGE_DATA' not in environ or os.getenv(
                'SEND_ANONYMOUS_USAGE_DATA').lower() != 'yes':
            return

        log.info("[send_anonymous_usage_data] Start")

        cw = create_client('cloudwatch')
        usage_data = {
            "data_type": "log_parser",
            "scanners_probes_set_size": 0,
            "http_flood_set_size": 0,
            "allowed_requests": 0,
            "blocked_requests_all": 0,
            "blocked_requests_scanners_probes": 0,
            "blocked_requests_http_flood": 0,
            "allowed_requests_WAFWebACL": 0,
            "blocked_requests_WAFWebACL": 0,
            "waf_type": os.getenv('LOG_TYPE')
        }

        # --------------------------------------------------------------------------------------------------------------
        log.info("[send_anonymous_usage_data] Get num allowed requests")
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='AllowedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=300,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=300),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name": "Rule",
                    "Value": "ALL"
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])
            if len(response['Datapoints']):
                usage_data['allowed_requests'] = response['Datapoints'][0][
                    'Sum']

        except Exception as error:
            log.debug(
                "[send_anonymous_usage_data] Failed to get Num Allowed Requests"
            )
            log.debug(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info(
            "[send_anonymous_usage_data] Get num blocked requests - all rules")
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='BlockedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=300,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=300),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name": "Rule",
                    "Value": "ALL"
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])

            if len(response['Datapoints']):
                usage_data['blocked_requests_all'] = response['Datapoints'][0][
                    'Sum']

        except Exception as error:
            log.info(
                "[send_anonymous_usage_data] Failed to get num blocked requests - all rules"
            )
            log.error(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info("[send_anonymous_usage_data] Get scanners probes data")
        # --------------------------------------------------------------------------------------------------------------
        if 'IP_SET_ID_SCANNERS_PROBESV4' in environ or 'IP_SET_ID_SCANNERS_PROBESV6' in environ:
            try:
                countv4 = 0
                response = waflib.get_ip_set(
                    log, scope, os.getenv('IP_SET_NAME_SCANNERS_PROBESV4'),
                    os.getenv('IP_SET_ID_SCANNERS_PROBESV4'))
                log.info(response)
                if response is not None:
                    countv4 = len(response['IPSet']['Addresses'])
                    log.info("Scanner Probes IPV4 address Count: %s", countv4)

                countv6 = 0
                response = waflib.get_ip_set(
                    log, scope, os.getenv('IP_SET_NAME_SCANNERS_PROBESV6'),
                    os.getenv('IP_SET_ID_SCANNERS_PROBESV6'))
                log.info(response)
                if response is not None:
                    countv6 = len(response['IPSet']['Addresses'])
                    log.info("Scanner Probes IPV6 address Count: %s", countv6)

                usage_data['scanners_probes_set_size'] = str(countv4 + countv6)

                response = cw.get_metric_statistics(
                    MetricName='BlockedRequests',
                    Namespace='AWS/WAFV2',
                    Statistics=['Sum'],
                    Period=300,
                    StartTime=datetime.datetime.utcnow() -
                    datetime.timedelta(seconds=300),
                    EndTime=datetime.datetime.utcnow(),
                    Dimensions=[{
                        "Name":
                        "Rule",
                        "Value":
                        os.getenv('METRIC_NAME_PREFIX') + 'ScannersProbesRule'
                    }, {
                        "Name": "WebACL",
                        "Value": os.getenv('STACK_NAME')
                    }, {
                        "Name": "Region",
                        "Value": os.getenv('AWS_REGION')
                    }])

                if len(response['Datapoints']):
                    usage_data['blocked_requests_scanners_probes'] = response[
                        'Datapoints'][0]['Sum']

            except Exception as error:
                log.debug(
                    "[send_anonymous_usage_data] Failed to get scanners probes data"
                )
                log.debug(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info("[send_anonymous_usage_data] Get HTTP flood data")
        # --------------------------------------------------------------------------------------------------------------
        if 'IP_SET_ID_HTTP_FLOODV4' in environ or 'IP_SET_ID_HTTP_FLOODV6' in environ:
            try:
                countv4 = 0
                response = waflib.get_ip_set(
                    log, scope, os.getenv('IP_SET_NAME_HTTP_FLOODV4'),
                    os.getenv('IP_SET_ID_HTTP_FLOODV4'))
                log.info(response)
                if response is not None:
                    countv4 = len(response['IPSet']['Addresses'])
                    log.info("HTTP Flood IPV4 address Count: %s", countv4)

                countv6 = 0
                response = waflib.get_ip_set(
                    log, scope, os.getenv('IP_SET_NAME_HTTP_FLOODV6'),
                    os.getenv('IP_SET_ID_HTTP_FLOODV6'))
                log.info(response)
                if response is not None:
                    countv6 = len(response['IPSet']['Addresses'])
                    log.info("HTTP Flood IPV6 address Count: %s", countv6)

                usage_data['http_flood_set_size'] = str(countv4 + countv6)

                response = cw.get_metric_statistics(
                    MetricName='BlockedRequests',
                    Namespace='AWS/WAFV2',
                    Statistics=['Sum'],
                    Period=300,
                    StartTime=datetime.datetime.utcnow() -
                    datetime.timedelta(seconds=300),
                    EndTime=datetime.datetime.utcnow(),
                    Dimensions=[{
                        "Name":
                        "Rule",
                        "Value":
                        os.getenv('METRIC_NAME_PREFIX') +
                        'HttpFloodRegularRule'
                    }, {
                        "Name": "WebACL",
                        "Value": os.getenv('STACK_NAME')
                    }, {
                        "Name": "Region",
                        "Value": os.getenv('AWS_REGION')
                    }])

                if len(response['Datapoints']):
                    usage_data['blocked_requests_http_flood'] = response[
                        'Datapoints'][0]['Sum']

            except Exception as error:
                log.info(
                    "[send_anonymous_usage_data] Failed to get HTTP flood data"
                )
                log.error(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info(
            "[send_anonymous_usage_data] Get num allowed requests - WAF Web ACL"
        )
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='AllowedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=300,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=300),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name":
                    "Rule",
                    "Value":
                    os.getenv('METRIC_NAME_PREFIX') + 'WAFWebACL'
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])

            if len(response['Datapoints']):
                usage_data['allowed_requests_WAFWebACL'] = response[
                    'Datapoints'][0]['Sum']

        except Exception as error:
            log.info(
                "[send_anonymous_usage_data] Failed to get num blocked requests - all rules"
            )
            log.error(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info(
            "[send_anonymous_usage_data] Get num blocked requests - WAF Web ACL"
        )
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='BlockedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=300,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=300),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name":
                    "Rule",
                    "Value":
                    os.getenv('METRIC_NAME_PREFIX') + 'WAFWebACL'
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])

            if len(response['Datapoints']):
                usage_data['blocked_requests_WAFWebACL'] = response[
                    'Datapoints'][0]['Sum']

        except Exception as error:
            log.info(
                "[send_anonymous_usage_data] Failed to get num blocked requests - all rules"
            )
            log.error(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info("[send_anonymous_usage_data] Send Data")
        # --------------------------------------------------------------------------------------------------------------
        response = send_metrics(data=usage_data)
        response_code = response.status_code
        log.info('[send_anonymous_usage_data] Response Code: {}'.format(
            response_code))
        log.info("[send_anonymous_usage_data] End")

    except Exception as error:
        log.info("[send_anonymous_usage_data] Failed to send data")
        log.error(str(error))
Exemple #14
0
def send_anonymous_usage_data(log, scope):
    try:
        if 'SEND_ANONYMOUS_USAGE_DATA' not in os.environ or os.getenv(
                'SEND_ANONYMOUS_USAGE_DATA').lower() != 'yes':
            return

        log.debug("[send_anonymous_usage_data] Start")
        cw = create_client('cloudwatch')
        usage_data = {
            "data_type": "reputation_lists",
            "ipv4_reputation_lists_size": 0,
            "ipv4_reputation_lists": 0,
            "ipv6_reputation_lists_size": 0,
            "ipv6_reputation_lists": 0,
            "allowed_requests": 0,
            "blocked_requests": 0,
            "blocked_requests_ip_reputation_lists": 0,
            "waf_type": os.getenv('LOG_TYPE')
        }

        # --------------------------------------------------------------------------------------------------------------
        log.debug(
            "[send_anonymous_usage_data] Get size of the Reputation List IP set"
        )
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = waflib.get_ip_set(log, scope,
                                         os.getenv('IP_SET_NAME_REPUTATIONV4'),
                                         os.getenv('IP_SET_ID_REPUTATIONV4'))

            if response is not None:
                usage_data['ipv4_reputation_lists_size'] = len(
                    response['IPSet']['Addresses'])
                usage_data['ipv4_reputation_lists'] = response['IPSet'][
                    'Addresses']

        except Exception as error:
            log.debug(
                "[send_anonymous_usage_data] Failed to get size of the Reputation List IPV4 set"
            )
            log.debug(str(error))

        try:
            response = waflib.get_ip_set(log, scope,
                                         os.getenv('IP_SET_NAME_REPUTATIONV6'),
                                         os.getenv('IP_SET_ID_REPUTATIONV6'))
            if response is not None:
                usage_data['ipv6_reputation_lists_size'] = len(
                    response['IPSet']['Addresses'])
                usage_data['ipv6_reputation_lists'] = response['IPSet'][
                    'Addresses']

        except Exception as error:
            log.debug(
                "[send_anonymous_usage_data] Failed to get size of the Reputation List IPV6 set"
            )
            log.debug(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.debug(
            "[send_anonymous_usage_data] Get total number of allowed requests")
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='AllowedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=3600,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=3600),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name": "Rule",
                    "Value": "ALL"
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])

            if len(response['Datapoints']):
                usage_data['allowed_requests'] = response['Datapoints'][0][
                    'Sum']

        except Exception as error:
            log.debug(
                "[send_anonymous_usage_data] Failed to get Num Allowed Requests"
            )
            log.debug(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.debug(
            "[send_anonymous_usage_data] Get total number of blocked requests")
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='BlockedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=3600,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=3600),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name": "Rule",
                    "Value": "ALL"
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])

            if len(response['Datapoints']):
                usage_data['blocked_requests'] = response['Datapoints'][0][
                    'Sum']

        except Exception as error:
            log.debug(
                "[send_anonymous_usage_data] Failed to get Num Allowed Requests"
            )
            log.debug(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.debug(
            "[send_anonymous_usage_data] Get total number of blocked requests for Reputation Lists Rule"
        )
        # --------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='BlockedRequests',
                Namespace='AWS/WAFV2',
                Statistics=['Sum'],
                Period=3600,
                StartTime=datetime.datetime.utcnow() -
                datetime.timedelta(seconds=3600),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[{
                    "Name": "Rule",
                    "Value": os.getenv('IPREPUTATIONLIST_METRICNAME')
                }, {
                    "Name": "WebACL",
                    "Value": os.getenv('STACK_NAME')
                }, {
                    "Name": "Region",
                    "Value": os.getenv('AWS_REGION')
                }])

            if len(response['Datapoints']):
                usage_data['blocked_requests_ip_reputation_lists'] = response[
                    'Datapoints'][0]['Sum']

        except Exception as error:
            log.debug(
                "[send_anonymous_usage_data] Failed to get Num Allowed Requests"
            )
            log.debug(str(error))

        # --------------------------------------------------------------------------------------------------------------
        log.info("[send_anonymous_usage_data] Send Data")
        # --------------------------------------------------------------------------------------------------------------

        response = send_metrics(data=usage_data)
        response_code = response.status_code
        log.debug('[send_anonymous_usage_data] Response Code: {}'.format(
            response_code))
        log.debug("[send_anonymous_usage_data] End")
    except Exception as error:
        log.debug("[send_anonymous_usage_data] Failed to send data")
Exemple #15
0
#                                                                                                                    #
#  or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES #
#  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    #
#  and limitations under the License.                                                                                #
######################################################################################################################
# import boto3
# from botocore.config import Config
from botocore.exceptions import ClientError
from ipaddress import ip_address
from backoff import on_exception, expo, full_jitter
from lib.boto3_util import create_client

API_CALL_NUM_RETRIES = 5
MAX_TIME = 20

client = create_client('wafv2')


class WAFLIBv2(object):
    def __init__(self):
        return

    # Parse arn into ip_set_id
    def arn_to_id(self, arn):
        if arn == None:
            return None
        tmp = arn.split('/')
        return tmp.pop()

    # Determine network version for source_ip
    def which_ip_version(self, log, source_ip):
def lambda_handler(event, context):
    """
    This function is triggered by S3 event to move log files
    (upon their arrival in s3) from their original location
    to a partitioned folder structure created per timestamps
    in file names, hence allowing the usage of partitioning
    within AWS Athena.

    Sample partitioned folder structure:
      AWSLogs-Partitioned/year=2020/month=04/day=09/hour=23/

    """
    logging.getLogger().debug('[partition_s3_logs lambda_handler] Start')
    try:
        # ---------------------------------------------------------
        # Set Log Level
        # ---------------------------------------------------------
        global log_level
        log_level = str(environ['LOG_LEVEL'].upper())
        if log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']:
            log_level = 'ERROR'
        logging.getLogger().setLevel(log_level)

        # ----------------------------------------------------------
        # Process event
        # ----------------------------------------------------------
        logging.getLogger().info(event)
        
        keep_original_data = str(environ['KEEP_ORIGINAL_DATA'].upper())
        endpoint = str(environ['ENDPOINT'].upper())
        logging.getLogger().info("\n[partition_s3_logs lambda_handler] KEEP ORIGINAL DATA: %s; End POINT: %s."
                                 %(keep_original_data, endpoint))

        s3 = create_client('s3')

        count = 0
        
        # Iterate through all records in the event
        for record in event['Records']:
            # Get S3 bucket
            bucket = record['s3']['bucket']['name']

            # Get source S3 object key
            key = record['s3']['object']['key']

            # Get file name, which should be the last one in the string
            filename = ""
            number = len(key.split('/'))
            if number >= 1:
                number = number - 1
            filename = key.split('/')[number]

            if endpoint == 'CLOUDFRONT':
                dest = parse_cloudfront_logs(key, filename)
            else:  # ALB endpoint
                dest = parse_alb_logs(key, filename)
                
            source_path = bucket + '/' + key
            dest_path = bucket + '/' + dest
            
            # Copy S3 object to destionation
            s3.copy_object(Bucket=bucket, Key=dest, CopySource=source_path)

            logging.getLogger().info("\n[partition_s3_logs lambda_handler] Copied file %s to destination %s"%(source_path, dest_path))
            
            # Only delete source S3 object from its original folder if keeping original data is no
            if keep_original_data == 'NO':
                s3.delete_object(Bucket=bucket, Key=key)
                logging.getLogger().info("\n[partition_s3_logs lambda_handler] Removed file %s"%source_path)
                
            count = count + 1
            
        logging.getLogger().info("\n[partition_s3_logs lambda_handler] Successfully partitioned %s file(s)."%(str(count)))

    except Exception as error:
        logging.getLogger().error(str(error))
        raise

    logging.getLogger().debug('[partition_s3_logs lambda_handler] End')