def test_process_with_threat_intel_enabled(self, mock_client): """Rules Engine - Threat Intel is enabled when process method is called""" @rule(datatypes=['sourceAddress'], outputs=['s3:sample_bucket']) def match_ipaddress(_): # pylint: disable=unused-variable """Testing dummy rule""" return True mock_client.return_value = MockDynamoDBClient() toggled_config = self.config toggled_config['global']['threat_intel']['enabled'] = True toggled_config['global']['threat_intel']['dynamodb_table'] = 'test_table_name' new_rules_engine = StreamRules(toggled_config) kinesis_data_items = [ { 'account': 123456, 'region': '123456123456', 'source': '1.1.1.2', 'detail': { 'eventName': 'ConsoleLogin', 'sourceIPAddress': '1.1.1.2', 'recipientAccountId': '654321' } } ] for data in kinesis_data_items: kinesis_data = json.dumps(data) service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(toggled_config, service, entity, raw_record) assert_equal(len(new_rules_engine.process(payload)[0]), 1)
def test_update(self): """Rules Engine - Update results passed to update method""" results = {'ipv4': [['key1']]} parent_key = 'key2' nested_results = {'username': [['sub_key1']], 'ipv4': [['sub_key2']]} StreamRules.update(results, parent_key, nested_results) expected_results = { 'username': [['key2', 'sub_key1']], 'ipv4': [['key1'], ['key2', 'sub_key2']] } assert_equal(results.keys(), expected_results.keys()) assert_equal(results['ipv4'], expected_results['ipv4']) assert_equal(results['username'], expected_results['username']) results = { 'ipv4': [['key1'], ['key3', 'sub_key3', 'sub_key4']], 'type': [['key4']] } parent_key = 'key2' nested_results = { 'username': [['sub_key1', 'sub_key11']], 'type': [['sub_key2']] } StreamRules.update(results, parent_key, nested_results) expected_results = { 'username': [['key2', 'sub_key1', 'sub_key11']], 'type': [['key4'], ['key2', 'sub_key2']], 'ipv4': [['key1'], ['key3', 'sub_key3', 'sub_key4']] } assert_equal(results.keys(), expected_results.keys()) assert_equal(results['ipv4'], expected_results['ipv4']) assert_equal(results['username'], expected_results['username']) assert_equal(results['type'], expected_results['type'])
def setup(self): """Setup before each method""" # Clear out the cached matchers and rules to avoid conflicts with production code StreamRules._StreamRules__matchers.clear() # pylint: disable=no-member StreamRules._StreamRules__rules.clear() # pylint: disable=no-member self.config = load_config('tests/unit/conf') self.config['global']['threat_intel']['enabled'] = False self.rules_engine = StreamRules(self.config)
def test_bad_rule(self, log_mock): """Rules Engine - Process Bad Rule Function""" bad_rule = namedtuple('BadRule', 'rule_function') def bad_rule_function(_): """A simple function that will raise an exception""" raise AttributeError('This rule raises an error') test_rule = bad_rule(bad_rule_function) StreamRules.process_rule({}, test_rule) log_mock.assert_called_with('Encountered error with rule: %s', 'bad_rule_function')
def test_validate_datatypes(self): """Rules Engine - validate datatypes""" normalized_types, datatypes = None, ['type1'] assert_equal( StreamRules.validate_datatypes(normalized_types, datatypes), False) normalized_types = {'type1': ['key1'], 'type2': ['key2']} datatypes = ['type1'] assert_equal( StreamRules.validate_datatypes(normalized_types, datatypes), True) datatypes = ['type1', 'type3'] assert_equal( StreamRules.validate_datatypes(normalized_types, datatypes), False)
def test_basic_rule_matcher_process(self): """Rules Engine - Basic Rule/Matcher""" @matcher def prod(rec): # pylint: disable=unused-variable return rec['environment'] == 'prod' @rule() def incomplete_rule(_): # pylint: disable=unused-variable return True @rule(logs=['test_log_type_json_nested_with_data'], outputs=['s3:sample_bucket']) def minimal_rule(rec): # pylint: disable=unused-variable return rec['unixtime'] == 1483139547 @rule(matchers=['foobar', 'prod'], logs=['test_log_type_json_nested_with_data'], outputs=['pagerduty:sample_integration']) def chef_logs(rec): # pylint: disable=unused-variable return rec['application'] == 'chef' @rule(matchers=['foobar', 'prod'], logs=['test_log_type_json_nested_with_data'], outputs=['pagerduty:sample_integration']) def test_nest(rec): # pylint: disable=unused-variable return rec['data']['source'] == 'eu' kinesis_data = { 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'application': 'chef', 'environment': 'prod', 'data': { 'category': 'web-server', 'type': '1', 'source': 'eu' } } # prepare the payloads service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, json.dumps(kinesis_data)) payload = load_and_classify_payload(self.config, service, entity, raw_record) # process payloads alerts = StreamRules.process(payload) # check alert output assert_equal(len(alerts), 3) rule_outputs_map = { 'chef_logs': ['pagerduty:sample_integration'], 'minimal_rule': ['s3:sample_bucket'], 'test_nest': ['pagerduty:sample_integration'] } # doing this because after kinesis_data is read in, types are casted per # the schema for alert in alerts: assert_items_equal(alert['record'].keys(), kinesis_data.keys()) assert_items_equal(alert['outputs'], rule_outputs_map[alert['rule_name']])
def test_process_optional_logs(self): """Rules Engine - Logs is optional when datatypes are present""" @rule(datatypes=['sourceAddress'], outputs=['s3:sample_bucket']) def no_logs_has_datatypes(rec): # pylint: disable=unused-variable """Testing rule when logs is not present, datatypes is""" results = fetch_values_by_datatype(rec, 'sourceAddress') for result in results: if result == '1.1.1.2': return True return False @rule(logs=['cloudwatch:test_match_types'], outputs=['s3:sample_bucket']) def has_logs_no_datatypes(rec): # pylint: disable=unused-variable """Testing rule when logs is present, datatypes is not""" return (rec['source'] == '1.1.1.2' or rec['detail']['sourceIPAddress'] == '1.1.1.2') @rule(logs=['cloudwatch:test_match_types'], datatypes=['sourceAddress'], outputs=['s3:sample_bucket']) def has_logs_datatypes(rec): # pylint: disable=unused-variable """Testing rule when logs is present, datatypes is""" results = fetch_values_by_datatype(rec, 'sourceAddress') for result in results: if result == '1.1.1.2': return True return False kinesis_data_items = [{ 'account': 123456, 'region': '123456123456', 'source': '1.1.1.2', 'detail': { 'eventName': 'ConsoleLogin', 'sourceIPAddress': '1.1.1.2', 'recipientAccountId': '654321' } }] alerts = [] for data in kinesis_data_items: kinesis_data = json.dumps(data) service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) alerts.extend(StreamRules.process(payload)) assert_equal(len(alerts), 3) rule_names = [ 'no_logs_has_datatypes', 'has_logs_no_datatypes', 'has_logs_datatypes' ] assert_items_equal([alerts[i]['rule_name'] for i in range(3)], rule_names)
def test_rule_disable(self): """Rules Engine - Disable Rule""" @disable @rule(logs=['test_log_type_json_2'], outputs=['pagerduty:sample_integration']) def nested_csv_disable_test(rec): # pylint: disable=unused-variable return rec['host'] == 'unit-test-host.prod.test' kinesis_data = json.dumps({ 'key4': True, 'key5': 0.0, 'key6': 1, 'key7': False }) # prepare the payloads service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) # process payloads alerts = StreamRules.process(payload) # alert tests assert_equal(len(alerts), 0)
def _process_alerts(self, classifier, payload, data): """Process records for alerts and send them to the correct places Args: classifier [StreamClassifier]: Handler for classifying a record's data payload [StreamPayload]: StreamAlert payload object being processed data [string]: Pre parsed data string from a raw_event to be parsed """ classifier.classify_record(payload, data) if not payload.valid: if self.env['lambda_alias'] != 'development': LOGGER.error('Record does not match any defined schemas: %s\n%s', payload, data) self._failed_record_count += 1 return alerts = StreamRules.process(payload) LOGGER.debug('Processed %d valid record(s) that resulted in %d alert(s).', len(payload.records), len(alerts)) if not alerts: return # Extend the list of alerts with any new ones so they can be returned self._alerts.extend(alerts) if self.enable_alert_processor: self.sinker.sink(alerts)
def _process_alerts(self, classifier, payload, data): """Process records for alerts and send them to the correct places Args: classifier [StreamClassifier]: Handler for classifying a record's data payload [StreamPayload]: StreamAlert payload object being processed data [string]: Pre parsed data string from a raw_event to be parsed """ classifier.classify_record(payload, data) if not payload.valid: LOGGER.error('Invalid data: %s\n%s', payload, json.dumps(payload.raw_record, indent=4)) return alerts = StreamRules.process(payload) if not alerts: LOGGER.debug('Valid data, no alerts') return # If we want alerts returned to the caller, extend the list. Otherwise # attempt to send them to the alert processor if self.return_alerts: self.alerts.extend(alerts) else: self.sinker.sink(alerts)
def _process_alerts(self, payload): """Process records for alerts and send them to the correct places Args: payload (StreamPayload): StreamAlert payload object being processed """ for record in payload.pre_parse(): self.classifier.classify_record(record) if not record.valid: if self.env['lambda_alias'] != 'development': LOGGER.error( 'Record does not match any defined schemas: %s\n%s', record, record.pre_parsed_record) self._failed_record_count += 1 continue LOGGER.debug( 'Classified and Parsed Payload: <Valid: %s, Log Source: %s, Entity: %s>', record.valid, record.log_source, record.entity) record_alerts = StreamRules.process(record) LOGGER.debug( 'Processed %d valid record(s) that resulted in %d alert(s).', len(payload.records), len(record_alerts)) if not record_alerts: continue # Extend the list of alerts with any new ones so they can be returned self._alerts.extend(record_alerts) if self.enable_alert_processor: self.sinker.sink(record_alerts)
def test_process_required_logs(self): """Rules Engine - Logs is required when no datatypes defined.""" @rule(outputs=['s3:sample_bucket']) def match_ipaddress(): # pylint: disable=unused-variable """Testing rule to detect matching IP address""" return True kinesis_data_items = [{ 'account': 123456, 'region': '123456123456', 'source': '1.1.1.2', 'detail': { 'eventName': 'ConsoleLogin', 'sourceIPAddress': '1.1.1.2', 'recipientAccountId': '654321' } }] for data in kinesis_data_items: kinesis_data = json.dumps(data) service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) assert_false(StreamRules.process(payload))
def test_csv_rule(self): """Rules Engine - CSV Rule""" @rule(logs=['test_log_type_csv_nested'], outputs=['pagerduty:sample_integration']) def nested_csv(rec): # pylint: disable=unused-variable return ( rec['message']['application'] == 'chef' and rec['message']['cluster_size'] == 100 ) kinesis_data = ( '"Jan 10, 2017","1485739910","host1.prod.test","Corp",' '"chef,web-server,1,100,fail"' ) # prepare the payloads service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) # process payloads alerts = StreamRules.process(payload) # alert tests assert_equal(len(alerts), 1) assert_equal(alerts[0]['rule_name'], 'nested_csv')
def test_syslog_rule(self): """Rules Engine - Syslog Rule""" @rule(logs=['test_log_type_syslog'], outputs=['s3:sample_bucket']) def syslog_sudo(rec): # pylint: disable=unused-variable return ( rec['application'] == 'sudo' and 'root' in rec['message'] ) kinesis_data = ( 'Jan 26 19:35:33 vagrant-ubuntu-trusty-64 ' 'sudo: pam_unix(sudo:session): ' 'session opened for user root by (uid=0)' ) # prepare the payloads service, entity = 'kinesis', 'test_stream_2' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) # process payloads alerts = StreamRules.process(payload) # alert tests assert_equal(len(alerts), 1) assert_equal(alerts[0]['rule_name'], 'syslog_sudo') assert_equal(alerts[0]['record']['host'], 'vagrant-ubuntu-trusty-64') assert_equal(alerts[0]['log_type'], 'syslog')
def test_kv_rule(self): """Rule Engine - KV Rule""" @rule(logs=['test_log_type_kv_auditd'], outputs=['pagerduty']) def auditd_bin_cat(rec): return (rec['type'] == 'SYSCALL' and rec['exe'] == '"/bin/cat"') @rule(logs=['test_log_type_kv_auditd'], outputs=['pagerduty']) def gid_500(rec): return (rec['gid'] == 500 and rec['euid'] == 500) auditd_test_data = ( 'type=SYSCALL msg=audit(1364481363.243:24287): ' 'arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 ' 'a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 ' 'gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 ' 'ses=1 comm="cat" exe="/bin/cat" ' 'subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 ' 'key="sshd_config" type=CWD msg=audit(1364481363.243:24287): ' 'cwd="/home/shadowman" type=PATH ' 'msg=audit(1364481363.243:24287): item=0 name="/etc/ssh/sshd_config" ' 'inode=409248 dev=fd:00 mode=0100600 ouid=0 ogid=0 ' 'rdev=00:00 obj=system_u:object_r:etc_t:s0') # prepare the payloads payload = self.make_kinesis_payload( kinesis_stream='test_kinesis_stream', kinesis_data=auditd_test_data) # process payloads alerts = StreamRules.process(payload) # alert tests assert_equal(len(alerts), 2) rule_name_alerts = set([x['rule_name'] for x in alerts]) assert_equal(rule_name_alerts, set(['gid_500', 'auditd_bin_cat']))
def test_threat_intel_match(self, mock_client): """Rules Engine - Threat Intel is enabled when threat_intel_match is called""" @rule(datatypes=['sourceAddress', 'destinationDomain', 'fileHash'], outputs=['s3:sample_bucket']) def match_rule(_): # pylint: disable=unused-variable """Testing dummy rule""" return True mock_client.return_value = MockDynamoDBClient() toggled_config = self.config toggled_config['global']['threat_intel']['enabled'] = True toggled_config['global']['threat_intel']['dynamodb_table'] = 'test_table_name' new_rules_engine = StreamRules(toggled_config) records = mock_normalized_records() alerts = new_rules_engine.threat_intel_match(records) assert_equal(len(alerts), 2)
def process_alerts(self, payload): """Process records for alerts""" if payload.valid: alerts = StreamRules.process(payload) if alerts: self.alerts.extend(alerts) else: logger.debug('Invalid data: %s', payload)
def process_alerts(self, payload): """Process records for alerts""" if payload.valid: alerts = StreamRules.process(payload) if alerts: self.alerts.extend(alerts) else: logger.error('Invalid data: %s\n%s', payload, json.dumps(payload.raw_record, indent=4))
def test_reset_normalized_types(self): """Rules Engine - Normalized types should be reset after each iteration""" @rule(datatypes=['sourceAddress'], outputs=['s3:sample_bucket']) def test_01_matching_sourceaddress_datatypes(rec): # pylint: disable=unused-variable """Testing rule to alert on matching sourceAddress""" results = fetch_values_by_datatype(rec, 'sourceAddress') for result in results: if result == '1.1.1.2': return True return False @rule( logs=['cloudwatch:test_match_types', 'test_log_type_json_nested'], outputs=['s3:sample_bucket']) def test_02_rule_without_datatypes(_): # pylint: disable=unused-variable """Testing rule without datatypes parameter""" return True kinesis_data_items = [{ 'account': 123456, 'region': '123456123456', 'source': '1.1.1.2', 'detail': { 'eventName': 'ConsoleLogin', 'sourceIPAddress': '1.1.1.2', 'recipientAccountId': '654321' } }, { 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'data': { 'category': 'web-server', 'type': '1', 'source': 'eu' } }] alerts = [] for data in kinesis_data_items: kinesis_data = json.dumps(data) service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) alerts.extend(StreamRules.process(payload)) assert_equal(len(alerts), 3) for alert in alerts: has_key_normalized_types = 'normalized_types' in alert['record'] if alert.get('rule_name') == 'test_02_rule_without_datatypes': assert_equal(has_key_normalized_types, False) else: assert_equal(has_key_normalized_types, True)
def analyze_record_delta(self, rule_name, test_record): """Provide some additional context on why this test failed. This will perform some analysis of the test record to determine which keys are missing or which unnecessary keys are causing the test to fail. Any errors are appended to a list of errors so they can be printed at the end of the test run. Args: rule_name (str): Name of rule being tested test_record (dict): Actual record data being tested """ logs = self.processor.classifier.get_log_info_for_source() rule_info = StreamRules.get_rules()[rule_name] test_record_keys = set(test_record['data']) for log in rule_info.logs: if log not in logs: message = 'Log declared in rule ({}) does not exist in logs.json'.format( log) self.status_messages.append( StatusMessage(StatusMessage.FAILURE, rule_name, message)) continue all_record_schema_keys = set(logs[log]['schema']) optional_keys = set(logs[log].get('configuration', {}).get( 'optional_top_level_keys', {})) min_req_record_schema_keys = all_record_schema_keys.difference( optional_keys) schema_diff = min_req_record_schema_keys.difference( test_record_keys) if schema_diff: message = ( 'Data is invalid due to missing key(s) in test record: {}. ' 'Rule: \'{}\'. Description: \'{}\''.format( ', '.join('\'{}\''.format(key) for key in schema_diff), rule_info.rule_name, test_record['description'])) self.status_messages.append( StatusMessage(StatusMessage.FAILURE, rule_name, message)) continue unexpected_record_keys = test_record_keys.difference( all_record_schema_keys) if unexpected_record_keys: message = ( 'Data is invalid due to unexpected key(s) in test record: {}. ' 'Rule: \'{}\'. Description: \'{}\''.format( ', '.join('\'{}\''.format(key) for key in unexpected_record_keys), rule_info.rule_name, test_record['description'])) self.status_messages.append( StatusMessage(StatusMessage.FAILURE, rule_name, message))
def check_untested_rules(all_test_rules): """Log warning message for rules that exist but do not have proper test events. Args: all_test_rules (set): A collection of all of the rules being tested """ untested_rules = set(StreamRules.get_rules()).difference(all_test_rules) if untested_rules: LOGGER_CLI.warn('%sNo test events configured for the following rules. Please add ' 'corresponding tests for these rules in \'%s\' to avoid seeing ' 'this warning\n\t%s%s', COLOR_YELLOW, TEST_EVENTS_DIR, '\n\t'.join(untested_rules), COLOR_RESET)
def __init__(self, context, enable_alert_processor=True): """Initializer Args: context (dict): An AWS context object which provides metadata on the currently executing lambda function. enable_alert_processor (bool): If the user wants to send the alerts using their own methods, 'enable_alert_processor' can be set to False to suppress sending with the StreamAlert alert processor. """ # Load the config. Validation occurs during load, which will # raise exceptions on any ConfigErrors StreamAlert.config = StreamAlert.config or load_config() # Load the environment from the context arn self.env = load_env(context) # Instantiate the sink here to handle sending the triggered alerts to the # alert processor self.sinker = StreamSink(self.env) # Instantiate a classifier that is used for this run self.classifier = StreamClassifier(config=self.config) self.enable_alert_processor = enable_alert_processor self._failed_record_count = 0 self._processed_size = 0 self._alerts = [] # Create a dictionary to hold parsed payloads by log type. # Firehose needs this information to send to its corresponding # delivery stream. self.categorized_payloads = defaultdict(list) # Firehose client initialization self.firehose_client = None # create an instance of the StreamRules class that gets cached in the # StreamAlert class as an instance property self._rule_engine = StreamRules(self.config)
def test_process_subkeys(self): """Rules Engine - Req Subkeys""" @rule(logs=['test_log_type_json_nested'], outputs=['s3:sample_bucket'], req_subkeys={'data': ['location']}) def data_location(rec): # pylint: disable=unused-variable return rec['data']['location'].startswith('us') @rule(logs=['test_log_type_json_nested'], outputs=['s3:sample_bucket'], req_subkeys={'data': ['category']}) def web_server(rec): # pylint: disable=unused-variable return rec['data']['category'] == 'web-server' kinesis_data_items = [ { 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'data': { 'category': 'web-server', 'type': '1', 'source': 'eu' } }, { 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'data': { 'location': 'us-west-2' } } ] # prepare payloads alerts = [] for data in kinesis_data_items: kinesis_data = json.dumps(data) # prepare the payloads service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) alerts.extend(StreamRules.process(payload)) # check alert output assert_equal(len(alerts), 2) # alert tests assert_equal(alerts[0]['rule_name'], 'web_server') assert_equal(alerts[1]['rule_name'], 'data_location')
def check_untested_files(all_test_rules): """Log warning message for test events that exist with invalid rule names. Args: all_test_rules (set): A collection of all of the rules being tested """ invalid_rules = all_test_rules.difference(set(StreamRules.get_rules())) if invalid_rules: LOGGER_CLI.warn('%sNo rules found in \'rules/\' that match the rules declared within ' '\'trigger_rules\' in a test event. Please update the list of ' '\'trigger_rules\' with valid rule names to avoid seeing this ' 'warning and any associated errors ' 'above\n\t%s%s', COLOR_YELLOW, '\n\t'.join(invalid_rules), COLOR_RESET)
def test_basic_rule_matcher_process(self): """Rule Engine - Basic Rule/Matcher""" @matcher def prod(rec): return rec['environment'] == 'prod' @rule() def incomplete_rule(rec): return True @rule(logs=['test_log_type_json_nested_with_data'], outputs=['s3']) def minimal_rule(rec): return rec['unixtime'] == 1483139547 @rule(matchers=['foobar', 'prod'], logs=['test_log_type_json_nested_with_data'], outputs=['pagerduty']) def chef_logs(rec): return rec['application'] == 'chef' kinesis_data = { 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'application': 'chef', 'environment': 'prod', 'data': { 'category': 'web-server', 'type': '1', 'source': 'eu' } } # prepare the payloads kinesis_data_json = json.dumps(kinesis_data) payload = self.make_kinesis_payload( kinesis_stream='test_kinesis_stream', kinesis_data=kinesis_data_json) # process payloads alerts = StreamRules.process(payload) # check alert output assert_equal(len(alerts), 2) # alert 1 tests assert_equal(alerts[1]['rule_name'], 'chef_logs') assert_equal(alerts[1]['metadata']['outputs'], ['pagerduty']) # alert 0 tests assert_equal(alerts[0]['rule_name'], 'minimal_rule') assert_equal(alerts[0]['metadata']['outputs'], ['s3'])
def test_process_req_subkeys(self): """Rule Engine - Req Subkeys""" @rule(logs=['test_log_type_json_nested'], outputs=['s3'], req_subkeys={'data': ['location']}) def data_location(rec): return rec['data']['location'].startswith('us') @rule(logs=['test_log_type_json_nested'], outputs=['s3'], req_subkeys={'data': ['category']}) def web_server(rec): return rec['data']['category'] == 'web-server' kinesis_data = [{ 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'data': { 'category': 'web-server', 'type': '1', 'source': 'eu' } }, { 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'data': { 'location': 'us-west-2' } }] # prepare payloads payloads = [] for data in kinesis_data: kinesis_data_json = json.dumps(data) payload = self.make_kinesis_payload( kinesis_stream='test_kinesis_stream', kinesis_data=kinesis_data_json) payloads.append(payload) alerts = [] for payload in payloads: # process payloads alerts.extend(StreamRules.process(payload)) # check alert output assert_equal(len(alerts), 2) # alert tests assert_equal(alerts[0]['rule_name'], 'web_server') assert_equal(alerts[1]['rule_name'], 'data_location')
def check_untested_rules(): """Function that prints warning log messages for rules that exist but do not have proper integration tests configured. """ all_test_files = {os.path.splitext(test_file)[0] for _, _, test_rule_files in os.walk(DIR_RULES) for test_file in test_rule_files} untested_rules = set(StreamRules.get_rules()).difference(all_test_files) for rule in untested_rules: LOGGER_CLI.warn('%sNo tests configured for rule: \'%s\'. Please add a ' 'corresponding test file for this rule in \'%s\' with the ' 'name \'%s.json\' to avoid seeing this warning%s', COLOR_YELLOW, rule, DIR_RULES, rule, COLOR_RESET)
def test_alert_format(self): """Rules Engine - Alert Format""" @rule(logs=['test_log_type_json_nested_with_data'], outputs=['s3:sample_bucket']) def alert_format_test(rec): # pylint: disable=unused-variable """'alert_format_test' docstring for testing rule_description""" return rec['application'] == 'web-app' kinesis_data = json.dumps({ 'date': 'Dec 01 2016', 'unixtime': '1483139547', 'host': 'host1.web.prod.net', 'application': 'web-app', 'environment': 'prod', 'data': { 'category': 'web-server', 'type': '1', 'source': 'eu' } }) # prepare the payloads service, entity = 'kinesis', 'test_kinesis_stream' raw_record = make_kinesis_raw_record(entity, kinesis_data) payload = load_and_classify_payload(self.config, service, entity, raw_record) # process payloads alerts = StreamRules.process(payload) alert_keys = { 'record', 'rule_name', 'rule_description', 'log_type', 'log_source', 'outputs', 'source_service', 'source_entity' } assert_items_equal(alerts[0].keys(), alert_keys) assert_is_instance(alerts[0]['record'], dict) assert_is_instance(alerts[0]['outputs'], list) # test alert fields assert_is_instance(alerts[0]['rule_name'], str) assert_is_instance(alerts[0]['rule_description'], str) assert_is_instance(alerts[0]['outputs'], list) assert_is_instance(alerts[0]['log_type'], str) assert_is_instance(alerts[0]['log_source'], str)
def check_untested_files(): """Function that prints warning log messages for integration test files that exist but do not have a corresponding rule configured. """ all_test_files = {os.path.splitext(test_file)[0] for _, _, test_rule_files in os.walk(DIR_RULES) for test_file in test_rule_files} untested_rules = all_test_files.difference(set(StreamRules.get_rules())) for rule in untested_rules: LOGGER_CLI.warn('%sNo rules configured for test file: \'%s.json\'. Please ' 'add a corresponding rule for this test file in \'rules/\' with ' 'the name \'%s.py\' to avoid seeing this warning and any associated ' 'errors above%s', COLOR_YELLOW, rule, rule, COLOR_RESET)
def _process_alerts(self, payload): """Process records for alerts and send them to the correct places Args: payload (StreamPayload): StreamAlert payload object being processed """ for record in payload.pre_parse(): # Increment the processed size using the length of this record self._processed_size += len(record.pre_parsed_record) self.classifier.classify_record(record) if not record.valid: if self.env['lambda_alias'] != 'development': LOGGER.error('Record does not match any defined schemas: %s\n%s', record, record.pre_parsed_record) self._failed_record_count += 1 continue LOGGER.debug( 'Classified and Parsed Payload: <Valid: %s, Log Source: %s, Entity: %s>', record.valid, record.log_source, record.entity) record_alerts = StreamRules.process(record) LOGGER.debug('Processed %d valid record(s) that resulted in %d alert(s).', len(payload.records), len(record_alerts)) # Add all parsed records to the categorized payload dict # only if Firehose is enabled if self.firehose_client: # Only send payloads with enabled types if payload.log_source.split(':')[0] not in self.config['global'] \ ['infrastructure'].get('firehose', {}).get('disabled_logs', []): self.categorized_payloads[payload.log_source].extend(payload.records) if not record_alerts: continue # Extend the list of alerts with any new ones so they can be returned self._alerts.extend(record_alerts) if self.enable_alert_processor: self.sinker.sink(record_alerts)