Ejemplo n.º 1
0
    def save_matches(self, binary: BinaryInfo, analyzer_version: int) -> bool:
        """Save YARA match results to the Dynamo table.

        Args:
            binary: Instance containing information about the binary.
            analyzer_version: Version of the currently executing Lambda function.

        Returns:
            Whether an alert should be fired. Returns True if:
                The current Lambda version is >= the most recent analysis version AND
                (a) Any YARA rule is matched now that was not matched in the previous version, OR
                (b) A new S3 object appears which is identical to an already matched binary.
        """
        needs_alert = False

        # Grab the most recent match results for the given SHA.
        item_tuple = self._most_recent_item(binary.computed_sha)

        if item_tuple is not None:
            # An entry already exists for this SHA.
            item_lambda_version, item_matched_rules, item_s3_objects, previous_objects = item_tuple

            # Update the DB appropriately.
            if analyzer_version != item_lambda_version:
                # This binary has never been matched by this Lambda version.
                self._create_new_entry(binary, analyzer_version)
            elif binary.s3_identifier not in item_s3_objects:
                # A new S3 object is identical to a previously-matched binary.
                self._add_s3_key(binary, analyzer_version)

            # Decide whether we need to alert.
            if analyzer_version < item_lambda_version:
                LOGGER.warning(
                    'Current Lambda version %d is < version %d from previous analysis',
                    analyzer_version, item_lambda_version)
            elif bool(binary.matched_rule_ids - item_matched_rules):
                # A new YARA rule matched this binary.
                needs_alert = True
            elif binary.s3_identifier not in item_s3_objects.union(
                    previous_objects):
                # A new S3 object matched (which did not match in the previous version).
                needs_alert = True
        else:
            # This binary has never been matched before.
            self._create_new_entry(binary, analyzer_version)
            needs_alert = True

        return needs_alert
Ejemplo n.º 2
0
def analyze_lambda_handler(event_data: Dict[str, Any],
                           lambda_context) -> Dict[str, Dict[str, Any]]:
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'S3Objects': [...],  # S3 object keys.
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['S3Objects']))
    for s3_key in event_data['S3Objects']:
        # S3 keys in event notifications are url-encoded.
        s3_key = urllib.parse.unquote_plus(s3_key)
        LOGGER.info('Analyzing "%s"', s3_key)

        with binary_info.BinaryInfo(os.environ['S3_BUCKET_NAME'], s3_key,
                                    ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data['SQSReceipts'])

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
Ejemplo n.º 3
0
def analyze_lambda_handler(event_data: Dict[str, Any],
                           lambda_context) -> Dict[str, Dict[str, Any]]:
    """Lambda function entry point.

    Args:
        event_data: [dict] of the form: {
            'Records': [
                {
                    "s3": {
                        "object": {
                            "key": "FileName.txt"
                        },
                        "bucket": {
                            "name": "mybucket"
                        }
                    }
                }
            ],
            'SQSReceipts': [...]  # SQS receipt handles (to be deleted after processing).
        }
            There can be any number of S3objects, but no more than 10 SQS receipts.
            The Records are the same format as the S3 Put event, which means the analyzer could be
            directly linked to an S3 bucket notification if needed.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        lambda_version = -1

    LOGGER.info('Processing %d record(s)', len(event_data['Records']))
    for record in event_data['Records']:
        bucket_name = record['s3']['bucket']['name']
        s3_key = urllib.parse.unquote_plus(record['s3']['object']['key'])
        LOGGER.info('Analyzing "%s:%s"', bucket_name, s3_key)

        with binary_info.BinaryInfo(bucket_name, s3_key, ANALYZER) as binary:
            result[binary.s3_identifier] = binary.summary()
            binaries.append(binary)

            if binary.yara_matches:
                LOGGER.warning('%s matched YARA rules: %s', binary,
                               binary.matched_rule_ids)
                binary.save_matches_and_alert(
                    lambda_version,
                    os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                    os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
            else:
                LOGGER.info('%s did not match any YARA rules', binary)

    # Delete all of the SQS receipts (mark them as completed).
    analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'],
                                         event_data.get('SQSReceipts', []))

    # Publish metrics.
    try:
        analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
    except BotoError:
        LOGGER.exception('Error saving metric data')

    return result
Ejemplo n.º 4
0
def analyze_lambda_handler(event: Dict[str, Any],
                           lambda_context: Any) -> Dict[str, Dict[str, Any]]:
    """Analyzer Lambda function entry point.

    Args:
        event: SQS message batch sent by the dispatcher: {
            'messages': [
                {
                    'body': (str) JSON-encoded S3 put event: {
                        'Records': [
                            {
                                's3': {
                                    'object': {
                                        'key': (str)
                                    },
                                    'bucket': {
                                        'name': (str)
                                    }
                                }
                            },
                            ...
                        ]
                    },
                    'receipt': (str) SQS message receipt handle,
                    'receive_count': (int) Approx. # of times this has been received
                },
                ...
            ],
            'queue_url': (str) SQS queue url from which the message originated
        }
            Alternatively, the event can be an S3 Put Event dictionary (with no sqs information).
            This allows the analyzer to be linked directly to an S3 bucket notification if needed.
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH.
    os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'],
                                        os.environ['LAMBDA_TASK_ROOT'])
    os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']

    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        LOGGER.warning('Invoked $LATEST instead of a versioned function')
        lambda_version = -1

    for bucket_name, object_key in _objects_to_analyze(event):
        LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key)

        try:
            with binary_info.BinaryInfo(bucket_name, object_key,
                                        ANALYZER) as binary:
                result[binary.s3_identifier] = binary.summary()
                binaries.append(binary)
        except analyzer_aws_lib.FileDownloadError:
            LOGGER.exception('Unable to download %s from %s', object_key,
                             bucket_name)
            continue

        if binary.yara_matches:
            LOGGER.warning('%s matched YARA rules: %s', binary,
                           binary.matched_rule_ids)
            binary.save_matches_and_alert(
                lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])

    # Delete all of the SQS receipts (mark them as completed).
    receipts_to_delete = [msg['receipt'] for msg in event.get('messages', [])]
    if receipts_to_delete:
        analyzer_aws_lib.delete_sqs_messages(event['queue_url'],
                                             receipts_to_delete)

    # Publish metrics.
    if binaries:
        try:
            analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
        except ClientError:
            LOGGER.exception('Error saving metric data')

    return result
Ejemplo n.º 5
0
def analyze_lambda_handler(event: Dict[str, Any],
                           lambda_context: Any) -> Dict[str, Any]:
    """Analyzer Lambda function entry point.

    Args:
        event: SQS message batch - each message body is a JSON-encoded S3 notification - {
            'Records': [
                {
                    'body': json.dumps({
                        'Records': [
                            's3': {
                                'bucket': {
                                    'name': '...'
                                },
                                'object': {
                                    'key': '...'
                                }
                            }
                        ]
                    }),
                    'messageId': '...'
                }
            ]
        }
        lambda_context: LambdaContext object (with .function_version).

    Returns:
        A dict mapping S3 object identifier to a summary of file info and matched YARA rules.
        Example: {
            'S3:bucket:key': {
                'FileInfo': { ... },
                'MatchedRules': { ... },
                'NumMatchedRules': 1
            }
        }
    """
    # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH.
    os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'],
                                        os.environ['LAMBDA_TASK_ROOT'])
    os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT']

    result = {}
    binaries = []  # List of the BinaryInfo data.

    # The Lambda version must be an integer.
    try:
        lambda_version = int(lambda_context.function_version)
    except ValueError:
        LOGGER.warning('Invoked $LATEST instead of a versioned function')
        lambda_version = -1

    for bucket_name, object_key in _objects_to_analyze(event):
        LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key)

        try:
            with binary_info.BinaryInfo(bucket_name, object_key,
                                        ANALYZER) as binary:
                result[binary.s3_identifier] = binary.summary()
                binaries.append(binary)
        except analyzer_aws_lib.FileDownloadError:
            LOGGER.exception('Unable to download %s from %s', object_key,
                             bucket_name)
            continue

        if binary.yara_matches:
            LOGGER.warning('%s matched YARA rules: %s', binary,
                           binary.matched_rule_ids)
            binary.save_matches_and_alert(
                lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'],
                os.environ['YARA_ALERTS_SNS_TOPIC_ARN'])
        else:
            LOGGER.info('%s did not match any YARA rules', binary)
            if os.environ['SAFE_SNS_TOPIC_ARN']:
                binary.safe_alert_only(os.environ['SAFE_SNS_TOPIC_ARN'])

    # Publish metrics.
    if binaries:
        try:
            analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries)
        except ClientError:
            LOGGER.exception('Error saving metric data')

    return result