Example #1
0
def analyze_lambda_handler(event_data, lambda_context):
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'S3Objects': [...],  # S3 object keys.
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier [string] to a summary [dict] of file info and matched
        YARA rule information.
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # Build the YaraAnalyzer now if we could not do it when this file was imported.
    global ANALYZER, NUM_YARA_RULES  # pylint: disable=global-statement
    if not ANALYZER:
        ANALYZER = yara_analyzer.YaraAnalyzer(COMPILED_RULES_FILEPATH)
        NUM_YARA_RULES = ANALYZER.num_rules

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['S3Objects']))
    for s3_key in event_data['S3Objects']:
        LOGGER.info('Analyzing %s', s3_key)

        with binary_info.BinaryInfo(os.environ['S3_BUCKET_NAME'], s3_key,
                                    ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data['SQSReceipts'])

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
 def setUp(self):
     """Before each test, setup a BinaryInfo."""
     self._binary = binary_info.BinaryInfo('test-bucket', 'test-key', None)
     self._binary.s3_last_modified = 'time:right_now'
     self._binary.s3_metadata = {'test-filename': 'test.txt'}
     self._binary.computed_md5 = 'Computed_MD5'
     self._binary.computed_sha = 'Computed_SHA'
     self._binary.yara_matches = [yara_mocks.YaraMatchMock('file.yara', 'rule_name')]
    def setUp(self):
        """Before each test, create the mock environment."""
        # Create a mock Dynamo table.
        self._mock_dynamo_client = boto3_mocks.MockDynamoDBClient(
            MOCK_DYNAMO_TABLE_NAME, HASH_KEY, RANGE_KEY)
        self._mock_dynamo_table = self._mock_dynamo_client.tables[
            MOCK_DYNAMO_TABLE_NAME]

        # Setup mocks.
        boto3.client = mock.MagicMock(return_value=self._mock_dynamo_client)

        self._binary = binary_info.BinaryInfo('Bucket', 'Key', None)
        self._binary.reported_md5 = 'Original_MD5'
        self._binary.observed_path = '/bin/path/run.exe'
        self._binary.yara_matches = [
            yara_mocks.YaraMatchMock('file.yara', 'rule_name')
        ]
        self._binary.computed_sha = 'Computed_SHA'
        self._binary.computed_md5 = 'Computed_MD5'

        self._match_table = analyzer_aws_lib.DynamoMatchTable(
            MOCK_DYNAMO_TABLE_NAME)
Example #4
0
def analyze_lambda_handler(event_data: Dict[str, Any],
                           lambda_context) -> Dict[str, Dict[str, Any]]:
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'S3Objects': [...],  # S3 object keys.
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['S3Objects']))
    for s3_key in event_data['S3Objects']:
        # S3 keys in event notifications are url-encoded.
        s3_key = urllib.parse.unquote_plus(s3_key)
        LOGGER.info('Analyzing "%s"', s3_key)

        with binary_info.BinaryInfo(os.environ['S3_BUCKET_NAME'], s3_key,
                                    ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data['SQSReceipts'])

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
Example #5
0
def analyze_lambda_handler(event: Dict[str, Any],
                           lambda_context: Any) -> Dict[str, Dict[str, Any]]:
    """Analyzer Lambda function entry point.

    Args:
        event: SQS message batch sent by the dispatcher: {
            'messages': [
                {
                    'body': (str) JSON-encoded S3 put event: {
                        'Records': [
                            {
                                's3': {
                                    'object': {
                                        'key': (str)
                                    },
                                    'bucket': {
                                        'name': (str)
                                    }
                                }
                            },
                            ...
                        ]
                    },
                    'receipt': (str) SQS message receipt handle,
                    'receive_count': (int) Approx. # of times this has been received
                },
                ...
            ],
            'queue_url': (str) SQS queue url from which the message originated
        }
            Alternatively, the event can be an S3 Put Event dictionary (with no sqs information).
            This allows the analyzer to be linked directly to an S3 bucket notification if needed.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH.
    os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'],
                                        os.environ['LAMBDA_TASK_ROOT'])
    os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']

    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        LOGGER.warning('Invoked $LATEST instead of a versioned function')
        lambda_version = -1

    for bucket_name, object_key in _objects_to_analyze(event):
        LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key)

        try:
            with binary_info.BinaryInfo(bucket_name, object_key,
                                        ANALYZER) as binary:
                result[binary.s3_identifier] = binary.summary()
                binaries.append(binary)
        except analyzer_aws_lib.FileDownloadError:
            LOGGER.exception('Unable to download %s from %s', object_key,
                             bucket_name)
            continue

        if binary.yara_matches:
            LOGGER.warning('%s matched YARA rules: %s', binary,
                           binary.matched_rule_ids)
            binary.save_matches_and_alert(
                lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])

    # Delete all of the SQS receipts (mark them as completed).
    receipts_to_delete = [msg['receipt'] for msg in event.get('messages', [])]
    if receipts_to_delete:
        analyzer_aws_lib.delete_sqs_messages(event['queue_url'],
                                             receipts_to_delete)

    # Publish metrics.
    if binaries:
        try:
            analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
        except ClientError:
            LOGGER.exception('Error saving metric data')

    return result
Example #6
0
def analyze_lambda_handler(event_data: Dict[str, Any],
                           lambda_context) -> Dict[str, Dict[str, Any]]:
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'Records': [
                {
                    "s3": {
                        "object": {
                            "key": "FileName.txt"
                        },
                        "bucket": {
                            "name": "mybucket"
                        }
                    }
                }
            ],
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
            The Records are the same format as the S3 Put event, which means the analyzer could be
            directly linked to an S3 bucket notification if needed.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['Records']))
    for record in event_data['Records']:
        bucket_name = record['s3']['bucket']['name']
        s3_key = urllib.parse.unquote_plus(record['s3']['object']['key'])
        LOGGER.info('Analyzing "%s:%s"', bucket_name, s3_key)

        with binary_info.BinaryInfo(bucket_name, s3_key, ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data.get('SQSReceipts', []))

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
Example #7
0
def analyze_lambda_handler(event: Dict[str, Any],
                           lambda_context: Any) -> Dict[str, Any]:
    """Analyzer Lambda function entry point.

    Args:
        event: SQS message batch - each message body is a JSON-encoded S3 notification - {
            'Records': [
                {
                    'body': json.dumps({
                        'Records': [
                            's3': {
                                'bucket': {
                                    'name': '...'
                                },
                                'object': {
                                    'key': '...'
                                }
                            }
                        ]
                    }),
                    'messageId': '...'
                }
            ]
        }
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH.
    os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'],
                                        os.environ['LAMBDA_TASK_ROOT'])
    os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']

    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        LOGGER.warning('Invoked $LATEST instead of a versioned function')
        lambda_version = -1

    for bucket_name, object_key in _objects_to_analyze(event):
        LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key)

        try:
            with binary_info.BinaryInfo(bucket_name, object_key,
                                        ANALYZER) as binary:
                result[binary.s3_identifier] = binary.summary()
                binaries.append(binary)
        except analyzer_aws_lib.FileDownloadError:
            LOGGER.exception('Unable to download %s from %s', object_key,
                             bucket_name)
            continue

        if binary.yara_matches:
            LOGGER.warning('%s matched YARA rules: %s', binary,
                           binary.matched_rule_ids)
            binary.save_matches_and_alert(
                lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
        else:
            LOGGER.info('%s did not match any YARA rules', binary)
            if os.environ['SAFE_SNS_TOPIC_ARN']:
                binary.safe_alert_only(os.environ['SAFE_SNS_TOPIC_ARN'])

    # Publish metrics.
    if binaries:
        try:
            analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
        except ClientError:
            LOGGER.exception('Error saving metric data')

    return result