def _add_s3_key(self, binary: BinaryInfo, analyzer_version: int) -> None: """Add S3 key to an existing entry. If the S3 key already exists, this is a no-op.""" LOGGER.info( 'Adding %s to existing entry (SHA256: %s, AnalyzerVersion: %d)', binary.s3_identifier, binary.computed_sha, analyzer_version) self._table.update_item( Key={ 'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version }, UpdateExpression='ADD S3Objects :s3_string_set', ExpressionAttributeValues={ ':s3_string_set': {binary.s3_identifier} })
def _create_new_entry(self, binary: BinaryInfo, analyzer_version: int) -> None: """Create a new Dynamo entry with YARA match information.""" LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)', binary.computed_sha, analyzer_version) item = { 'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version, 'MatchedRules': binary.matched_rule_ids, 'MD5': binary.computed_md5, 'S3LastModified': binary.s3_last_modified, 'S3Metadata': binary.s3_metadata, 'S3Objects': {binary.s3_identifier} } self._table.put_item(Item=item)
def save_matches_and_alert( self, analyzer_version: int, dynamo_table_name: str, sns_topic_arn: str) -> None: """Save match results to Dynamo and publish an alert to SNS if appropriate. Args: analyzer_version: The currently executing version of the Lambda function. dynamo_table_name: Save YARA match results to this Dynamo table. sns_topic_arn: Publish match alerts to this SNS topic ARN. """ table = analyzer_aws_lib.DynamoMatchTable(dynamo_table_name) needs_alert = table.save_matches(self, analyzer_version) # Send alert if appropriate. if needs_alert: LOGGER.info('Publishing an SNS alert') analyzer_aws_lib.publish_alert_to_sns(self, sns_topic_arn)
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # UPX-unpack the file if possible try: subprocess.check_call(['./upx', '-d', target_file]) LOGGER.info('Unpacked UPX-compressed file %s', target_file) except subprocess.CalledProcessError: pass # Not a packed file # Raw YARA matches (yara-python) # TODO: Once yextend is more robust, we may eventually not need yara-python anymore. raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path)) yara_python_matches = [ YaraMatch(m.rule, m.namespace, m.meta, set(t[1] for t in m.strings)) for m in raw_yara_matches ] # Yextend matches os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] yextend_output = None try: yextend_output = subprocess.check_output([ './yextend', '-r', self._compiled_rules_file, '-t', target_file, '-j' ]) yextend_list = json.loads(yextend_output.decode('utf-8')) return yara_python_matches + _convert_yextend_to_yara_match( yextend_list[0]) except Exception: # pylint: disable=broad-except # If yextend fails for any reason, still return the yara-python match results. LOGGER.exception('Error running yextend or parsing its output') if yextend_output: LOGGER.error('yextend output: <%s>', yextend_output) return yara_python_matches
def _create_new_entry(self, binary: BinaryInfo, analyzer_version: int) -> None: """Create a new Dynamo entry with YARA match information.""" LOGGER.info('Creating new entry (SHA256: %s, AnalyzerVersion: %d)', binary.computed_sha, analyzer_version) item = { 'SHA256': binary.computed_sha, 'AnalyzerVersion': analyzer_version, 'MatchedRules': binary.matched_rule_ids, 'MD5': binary.computed_md5, 'S3LastModified': binary.s3_last_modified, 'S3Metadata': self._replace_empty_strings(binary.s3_metadata), 'S3Objects': {binary.s3_identifier} } try: self._table.put_item(Item=item) except ClientError: LOGGER.error('Error saving item %s', item) raise
def save_matches_and_alert( self, analyzer_version: int, dynamo_table_name: str, sns_topic_arn: str, sns_enabled: bool = True) -> None: """Save match results to Dynamo and publish an alert to SNS if appropriate. Args: analyzer_version: The currently executing version of the Lambda function. dynamo_table_name: Save YARA match results to this Dynamo table. sns_topic_arn: Publish match alerts to this SNS topic ARN. sns_enabled: If True, match alerts are sent to SNS when applicable. """ table = analyzer_aws_lib.DynamoMatchTable(dynamo_table_name) needs_alert = table.save_matches(self, analyzer_version) # Send alert if appropriate. if needs_alert and sns_enabled: LOGGER.info('Publishing a YARA match alert to %s', sns_topic_arn) subject = '[BiAlert] {} matches a YARA rule'.format( self.filepath or self.computed_sha) analyzer_aws_lib.publish_to_sns(self, sns_topic_arn, subject)
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # UPX-unpack the file if possible try: # Ignore all UPX output subprocess.check_output(['./upx', '-q', '-d', target_file], stderr=subprocess.STDOUT) LOGGER.info('Unpacked UPX-compressed file %s', target_file) except subprocess.CalledProcessError: pass # Not a packed file # Raw YARA matches (yara-python) raw_yara_matches = self._rules.match( target_file, externals=self._yara_variables(original_target_path)) yara_python_matches = [] for match in raw_yara_matches: string_names = set() string_data = set() for _, name, data in match.strings: string_names.add(name) try: string_data.add(data.decode('utf-8')) except UnicodeDecodeError: # Bytes string is not unicode - print its hex values instead string_data.add(data.hex()) yara_python_matches.append( YaraMatch(match.rule, match.namespace, match.meta, string_names, string_data)) return yara_python_matches + self._yextend_matches(target_file)
def _objects_to_analyze( event: Dict[str, Any]) -> Generator[Tuple[str, str], None, None]: """Parse the invocation event into a list of objects to analyze. Args: event: Invocation event, from either the dispatcher or an S3 bucket Yields: (bucket_name, object_key) string tuples to analyze """ if set(event) == {'messages', 'queue_url'}: LOGGER.info('Invoked from dispatcher with %d messages', len(event['messages'])) for sqs_record in event['messages']: try: s3_records = json.loads(sqs_record['body'])['Records'] except (json.JSONDecodeError, KeyError, TypeError): LOGGER.exception('Skipping invalid SQS message %s', sqs_record) continue yield from _s3_objects(s3_records) else: LOGGER.info('Invoked with dictionary (S3 Event)') yield from _s3_objects(event['Records'])
def analyze(self, target_file: str, original_target_path: str = '') -> List[YaraMatch]: """Run YARA analysis on a file. Args: target_file: Local path to target file to be analyzed. original_target_path: Path where the target file was originally discovered. Returns: List of YaraMatch tuples. """ # UPX-unpack the file if possible try: # Ignore all UPX output subprocess.check_output(['./upx', '-q', '-d', target_file], stderr=subprocess.STDOUT) LOGGER.info('Unpacked UPX-compressed file %s', target_file) except subprocess.CalledProcessError: pass # Not a packed file thor_matches = [] # THOR matches response = requests.post('http://127.0.0.1:8080/api/check', files=dict(file=open(target_file, 'rb'))) if response.status_code == 200: messages = response.json() for message in messages: LOGGER.info("Received THOR log message: %s", str(message)) if "matches" in message: for match in message["matches"]: try: metadata = { "description": match["reason"], "reference": match["ref"], "date": match["ruledate"], "tags": ", ".join(match["tags"]), "score": match["subscore"], } namespace = "THOR" if "sigtype" in match and (match["sigtype"] == 1 or match["sigtype"] == "custom"): namespace = "custom" string_matches = match["matched"] if string_matches is None: string_matches = ["None"] thor_matches.append( YaraMatch(match["rulename"], namespace, metadata, set(["Unknown"]), set(string_matches))) except (IndexError, KeyError): # THOR match with unexpected syntax LOGGER.info("Could not parse THOR match: %s", str(match)) response.close() return thor_matches
def __init__(self) -> None: """Initialize the analyzer. """ LOGGER.info('Starting THOR server') self.proc = subprocess.Popen( ['./thor-linux-64', '--thunderstorm', '--pure-yara'], stdout=subprocess.PIPE, universal_newlines=True) self._rule_count = 0 startup_successful = False while not startup_successful and self.proc.poll() is None: line = self.proc.stdout.readline() if "service started" in line: startup_successful = True rulecountmatch = RULE_COUNT_REGEX.search(line) if rulecountmatch is not None: self._rule_count = int(rulecountmatch.group(1)) LOGGER.info(line) if not startup_successful: LOGGER.info(self.proc.stdout.read()) raise Exception("THOR startup was not successful") LOGGER.info('Started THOR server')
def analyze_lambda_handler(event_data: Dict[str, Any], lambda_context) -> Dict[str, Dict[str, Any]]: """Lambda function entry point. Args: event_data: [dict] of the form: { 'S3Objects': [...], # S3 object keys. 'SQSReceipts': [...] # SQS receipt handles (to be deleted after processing). } There can be any number of S3objects, but no more than 10 SQS receipts. lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: lambda_version = -1 LOGGER.info('Processing %d record(s)', len(event_data['S3Objects'])) for s3_key in event_data['S3Objects']: # S3 keys in event notifications are url-encoded. s3_key = urllib.parse.unquote_plus(s3_key) LOGGER.info('Analyzing "%s"', s3_key) with binary_info.BinaryInfo(os.environ['S3_BUCKET_NAME'], s3_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) else: LOGGER.info('%s did not match any YARA rules', binary) # Delete all of the SQS receipts (mark them as completed). analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'], event_data['SQSReceipts']) # Publish metrics. try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except BotoError: LOGGER.exception('Error saving metric data') return result
def analyze_lambda_handler(event: Dict[str, Any], lambda_context: Any) -> Dict[str, Dict[str, Any]]: """Analyzer Lambda function entry point. Args: event: SQS message batch sent by the dispatcher: { 'messages': [ { 'body': (str) JSON-encoded S3 put event: { 'Records': [ { 's3': { 'object': { 'key': (str) }, 'bucket': { 'name': (str) } } }, ... ] }, 'receipt': (str) SQS message receipt handle, 'receive_count': (int) Approx. # of times this has been received }, ... ], 'queue_url': (str) SQS queue url from which the message originated } Alternatively, the event can be an S3 Put Event dictionary (with no sqs information). This allows the analyzer to be linked directly to an S3 bucket notification if needed. lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH. os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'], os.environ['LAMBDA_TASK_ROOT']) os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: LOGGER.warning('Invoked $LATEST instead of a versioned function') lambda_version = -1 for bucket_name, object_key in _objects_to_analyze(event): LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key) try: with binary_info.BinaryInfo(bucket_name, object_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) except analyzer_aws_lib.FileDownloadError: LOGGER.exception('Unable to download %s from %s', object_key, bucket_name) continue if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) # Delete all of the SQS receipts (mark them as completed). receipts_to_delete = [msg['receipt'] for msg in event.get('messages', [])] if receipts_to_delete: analyzer_aws_lib.delete_sqs_messages(event['queue_url'], receipts_to_delete) # Publish metrics. if binaries: try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except ClientError: LOGGER.exception('Error saving metric data') return result
def analyze_lambda_handler(event_data: Dict[str, Any], lambda_context) -> Dict[str, Dict[str, Any]]: """Lambda function entry point. Args: event_data: [dict] of the form: { 'Records': [ { "s3": { "object": { "key": "FileName.txt" }, "bucket": { "name": "mybucket" } } } ], 'SQSReceipts': [...] # SQS receipt handles (to be deleted after processing). } There can be any number of S3objects, but no more than 10 SQS receipts. The Records are the same format as the S3 Put event, which means the analyzer could be directly linked to an S3 bucket notification if needed. lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: lambda_version = -1 LOGGER.info('Processing %d record(s)', len(event_data['Records'])) for record in event_data['Records']: bucket_name = record['s3']['bucket']['name'] s3_key = urllib.parse.unquote_plus(record['s3']['object']['key']) LOGGER.info('Analyzing "%s:%s"', bucket_name, s3_key) with binary_info.BinaryInfo(bucket_name, s3_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) else: LOGGER.info('%s did not match any YARA rules', binary) # Delete all of the SQS receipts (mark them as completed). analyzer_aws_lib.delete_sqs_messages(os.environ['SQS_QUEUE_URL'], event_data.get('SQSReceipts', [])) # Publish metrics. try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except BotoError: LOGGER.exception('Error saving metric data') return result
def analyze_lambda_handler(event: Dict[str, Any], lambda_context: Any) -> Dict[str, Any]: """Analyzer Lambda function entry point. Args: event: SQS message batch - each message body is a JSON-encoded S3 notification - { 'Records': [ { 'body': json.dumps({ 'Records': [ 's3': { 'bucket': { 'name': '...' }, 'object': { 'key': '...' } } ] }), 'messageId': '...' } ] } lambda_context: LambdaContext object (with .function_version). Returns: A dict mapping S3 object identifier to a summary of file info and matched YARA rules. Example: { 'S3:bucket:key': { 'FileInfo': { ... }, 'MatchedRules': { ... }, 'NumMatchedRules': 1 } } """ # Executables in the root of the deployment package (upx, pdftotext, etc) are added to PATH. os.environ['PATH'] = '{}:{}'.format(os.environ['PATH'], os.environ['LAMBDA_TASK_ROOT']) os.environ['LD_LIBRARY_PATH'] = os.environ['LAMBDA_TASK_ROOT'] result = {} binaries = [] # List of the BinaryInfo data. # The Lambda version must be an integer. try: lambda_version = int(lambda_context.function_version) except ValueError: LOGGER.warning('Invoked $LATEST instead of a versioned function') lambda_version = -1 for bucket_name, object_key in _objects_to_analyze(event): LOGGER.info('Analyzing "%s:%s"', bucket_name, object_key) try: with binary_info.BinaryInfo(bucket_name, object_key, ANALYZER) as binary: result[binary.s3_identifier] = binary.summary() binaries.append(binary) except analyzer_aws_lib.FileDownloadError: LOGGER.exception('Unable to download %s from %s', object_key, bucket_name) continue if binary.yara_matches: LOGGER.warning('%s matched YARA rules: %s', binary, binary.matched_rule_ids) binary.save_matches_and_alert( lambda_version, os.environ['YARA_MATCHES_DYNAMO_TABLE_NAME'], os.environ['YARA_ALERTS_SNS_TOPIC_ARN']) else: LOGGER.info('%s did not match any YARA rules', binary) if os.environ['SAFE_SNS_TOPIC_ARN']: binary.safe_alert_only(os.environ['SAFE_SNS_TOPIC_ARN']) # Publish metrics. if binaries: try: analyzer_aws_lib.put_metric_data(NUM_YARA_RULES, binaries) except ClientError: LOGGER.exception('Error saving metric data') return result