def test_mult_schema_match(self, log_mock):
        """StreamClassifier - Multiple Schema Matching with Log Patterns"""
        kinesis_data = json.dumps({
            'name': 'file removal test',
            'identifier': 'host4.this.test.also',
            'time': 'Jan 01 2017',
            'type': 'random',
            'message': 'bad_001.txt was removed'
        })
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)

        self.classifier.load_sources(service, entity)

        payload = list(payload.pre_parse())[0]

        schema_matches = self.classifier._process_log_schemas(payload)

        assert_equal(len(schema_matches), 2)
        self.classifier._check_schema_match(schema_matches)

        calls = [
            call('Log classification matched for multiple schemas: %s',
                 'test_multiple_schemas:01, test_multiple_schemas:02'),
            call('Proceeding with schema for: %s', 'test_multiple_schemas:01')
        ]

        log_mock.assert_has_calls(calls)
    def test_mult_schema_match_success(self):
        """StreamClassifier - Multiple Schema Matching with Log Patterns, Success"""
        kinesis_data = json.dumps({
            'name': 'file added test',
            'identifier': 'host4.this.test',
            'time': 'Jan 01 2017',
            'type': 'lol_file_added_event_test',
            'message': 'bad_001.txt was added'
        })
        # Make sure support for multiple schema matching is ON
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)

        self.classifier.load_sources(service, entity)

        payload = list(payload.pre_parse())[0]

        schema_matches = self.classifier._process_log_schemas(payload)

        assert_equal(len(schema_matches), 2)
        assert_equal(schema_matches[0].log_name, 'test_multiple_schemas:01')
        assert_equal(schema_matches[1].log_name, 'test_multiple_schemas:02')
        schema_match = self.classifier._check_schema_match(schema_matches)

        assert_equal(schema_match.log_name, 'test_multiple_schemas:01')
    def test_rule_disable(self):
        """Rules Engine - Disable Rule"""
        @disable
        @rule(logs=['test_log_type_json_2'],
              outputs=['pagerduty:sample_integration'])
        def nested_csv_disable_test(rec):  # pylint: disable=unused-variable
            return rec['host'] == 'unit-test-host.prod.test'

        kinesis_data = json.dumps({
            'key4': True,
            'key5': 0.0,
            'key6': 1,
            'key7': False
        })

        # prepare the payloads
        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_and_classify_payload(self.config, service, entity,
                                            raw_record)

        # process payloads
        alerts = StreamRules.process(payload)

        # alert tests
        assert_equal(len(alerts), 0)
    def test_rule_modify_context(self):
        """Rules Engine - Testing Context Modification"""
        @rule(logs=['test_log_type_json_nested_with_data'],
              outputs=['s3:sample_bucket'],
              context={'assigned_user': '******', 'assigned_policy_id': 'not_set2'})
        def modify_context_test(rec, context): # pylint: disable=unused-variable
            """Modify context rule"""
            context['assigned_user'] = '******'
            context['assigned_policy_id'] = 'valid_policy_id'
            return rec['application'] == 'web-app'

        kinesis_data = json.dumps({
            'date': 'Dec 01 2016',
            'unixtime': '1483139547',
            'host': 'host1.web.prod.net',
            'application': 'web-app',
            'environment': 'prod',
            'data': {
                'category': 'web-server',
                'type': '1',
                'source': 'eu'
            }
        })

        # prepare the payloads
        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_and_classify_payload(self.config, service, entity, raw_record)

        # process payloads
        alerts, _ = self.rules_engine.run(payload)

        # alert tests
        assert_equal(alerts[0].context['assigned_user'], 'valid_user')
        assert_equal(alerts[0].context['assigned_policy_id'], 'valid_policy_id')
    def test_csv_nested(self):
        """StreamClassifier - Classify Nested CSV"""
        csv_nested_data = (
            '"Jan 10 2017","1485635414","host1.prod.test","Corp",'
            '"chef,web-server,1,10,success"')

        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, csv_nested_data)
        payload = self._prepare_and_classify_payload(service, entity,
                                                     raw_record)

        # valid record test
        assert_equal(payload.valid, True)
        assert_is_instance(payload.records[0], dict)

        # record value tests
        assert_equal(payload.records[0]['date'], 'Jan 10 2017')
        assert_equal(payload.records[0]['host'], 'host1.prod.test')
        assert_equal(payload.records[0]['time'], 1485635414)
        assert_equal(payload.records[0]['message']['role'], 'web-server')
        assert_equal(payload.records[0]['message']['cluster_size'], 10)

        # type test
        assert_equal(payload.type, 'csv')

        # log source test
        assert_equal(payload.log_source, 'test_log_type_csv_nested')
    def test_process_required_logs(self):
        """Rules Engine - Logs is required when no datatypes defined."""
        @rule(outputs=['s3:sample_bucket'])
        def match_ipaddress():  # pylint: disable=unused-variable
            """Testing rule to detect matching IP address"""
            return True

        kinesis_data_items = [{
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        }]

        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity,
                                                raw_record)

            assert_false(StreamRules.process(payload))
    def test_classify_kv(self):
        """StreamClassifier - Classify Key/Value"""
        auditd_test_data = (
            'type=SYSCALL msg=audit(1364481363.243:24287): '
            'arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 '
            'a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 '
            'gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 '
            'ses=1 comm="cat" exe="/bin/cat" '
            'subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 '
            'key="sshd_config" type=CWD msg=audit(1364481363.243:24287):  '
            'cwd="/home/shadowman" type=PATH '
            'msg=audit(1364481363.243:24287): item=0 name="/etc/ssh/sshd_config" '
            'inode=409248 dev=fd:00 mode=0100600 ouid=0 ogid=0 '
            'rdev=00:00 obj=system_u:object_r:etc_t:s0')

        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, auditd_test_data)
        payload = self._prepare_and_classify_payload(service, entity,
                                                     raw_record)

        # valid record test
        assert_equal(payload.valid, True)
        assert_is_instance(payload.records[0], dict)

        # record value tests
        assert_equal(payload.records[0]['type'], 'SYSCALL')
        assert_equal(payload.records[0]['suid'], 500)
        assert_equal(payload.records[0]['pid'], 3538)
        assert_equal(payload.records[0]['type_3'], 'PATH')

        # type test
        assert_equal(payload.type, 'kv')
    def test_classify_nested_json(self):
        """StreamClassifier - Classify Nested JSON"""
        kinesis_data = json.dumps({
            'date': 'Jan 01 2017',
            'unixtime': '1485556524',
            'host': 'my-host-name',
            'data': {
                'key1': 'test',
                'key2': 'one'
            }
        })

        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = self._prepare_and_classify_payload(service, entity,
                                                     raw_record)

        # valid record test
        assert_equal(payload.valid, True)
        assert_is_instance(payload.records[0], dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json_nested')

        # payload type test
        assert_equal(payload.type, 'json')

        # record type test
        assert_is_instance(payload.records[0]['date'], str)
        assert_is_instance(payload.records[0]['unixtime'], int)
        assert_is_instance(payload.records[0]['data'], dict)

        # record value test
        assert_equal(payload.records[0]['date'], 'Jan 01 2017')
        assert_equal(payload.records[0]['data']['key1'], 'test')
    def test_basic_rule_matcher_process(self):
        """Rules Engine - Basic Rule/Matcher"""
        @matcher
        def prod(rec):  # pylint: disable=unused-variable
            return rec['environment'] == 'prod'

        @rule()
        def incomplete_rule(_):  # pylint: disable=unused-variable
            return True

        @rule(logs=['test_log_type_json_nested_with_data'],
              outputs=['s3:sample_bucket'])
        def minimal_rule(rec):  # pylint: disable=unused-variable
            return rec['unixtime'] == 1483139547

        @rule(matchers=['foobar', 'prod'],
              logs=['test_log_type_json_nested_with_data'],
              outputs=['pagerduty:sample_integration'])
        def chef_logs(rec):  # pylint: disable=unused-variable
            return rec['application'] == 'chef'

        @rule(matchers=['foobar', 'prod'],
              logs=['test_log_type_json_nested_with_data'],
              outputs=['pagerduty:sample_integration'])
        def test_nest(rec):  # pylint: disable=unused-variable
            return rec['data']['source'] == 'eu'

        kinesis_data = {
            'date': 'Dec 01 2016',
            'unixtime': '1483139547',
            'host': 'host1.web.prod.net',
            'application': 'chef',
            'environment': 'prod',
            'data': {
                'category': 'web-server',
                'type': '1',
                'source': 'eu'
            }
        }

        # prepare the payloads
        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, json.dumps(kinesis_data))
        payload = load_and_classify_payload(self.config, service, entity, raw_record)

        # process payloads
        alerts = StreamRules.process(payload)

        # check alert output
        assert_equal(len(alerts), 3)
        rule_outputs_map = {
            'chef_logs': ['pagerduty:sample_integration'],
            'minimal_rule': ['s3:sample_bucket'],
            'test_nest': ['pagerduty:sample_integration']
        }
        # doing this because after kinesis_data is read in, types are casted per
        # the schema
        for alert in alerts:
            assert_items_equal(alert['record'].keys(), kinesis_data.keys())
            assert_items_equal(alert['outputs'], rule_outputs_map[alert['rule_name']])
    def test_mult_schema_match_failure(self, log_mock):
        """StreamClassifier - Multiple Schema Matching with Log Patterns, Fail"""
        kinesis_data = json.dumps({
            'name': 'file removal test',
            'identifier': 'host4.this.test.also',
            'time': 'Jan 01 2017',
            'type': 'file_removed_event_test_file_added_event',
            'message': 'bad_001.txt was removed'
        })
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)

        self.classifier.load_sources(service, entity)

        payload = payload.pre_parse().next()

        schema_matches = self.classifier._process_log_schemas(payload)

        assert_equal(len(schema_matches), 2)
        self.classifier._check_schema_match(schema_matches)

        log_mock.assert_called_with('Proceeding with schema for: %s',
                                    'test_multiple_schemas:01')
    def test_csv_rule(self):
        """Rules Engine - CSV Rule"""
        @rule(logs=['test_log_type_csv_nested'],
              outputs=['pagerduty:sample_integration'])
        def nested_csv(rec):  # pylint: disable=unused-variable
            return (
                rec['message']['application'] == 'chef' and
                rec['message']['cluster_size'] == 100
            )

        kinesis_data = (
            '"Jan 10, 2017","1485739910","host1.prod.test","Corp",'
            '"chef,web-server,1,100,fail"'
        )
        # prepare the payloads
        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_and_classify_payload(self.config, service, entity, raw_record)

        # process payloads
        alerts, _ = self.rules_engine.run(payload)

        # alert tests
        assert_equal(len(alerts), 1)
        assert_equal(alerts[0].rule_name, 'nested_csv')
    def test_process_with_threat_intel_enabled(self, mock_client):
        """Rules Engine - Threat Intel is enabled when process method is called"""
        @rule(datatypes=['sourceAddress'], outputs=['s3:sample_bucket'])
        def match_ipaddress(_):  # pylint: disable=unused-variable
            """Testing dummy rule"""
            return True

        mock_client.return_value = MockDynamoDBClient()
        toggled_config = self.config
        toggled_config['global']['threat_intel']['enabled'] = True
        toggled_config['global']['threat_intel'][
            'dynamodb_table'] = 'test_table_name'

        new_rules_engine = RulesEngine(toggled_config)
        kinesis_data_items = [{
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        }]

        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(toggled_config, service,
                                                entity, raw_record)

            assert_equal(len(new_rules_engine.run(payload)[0]), 1)
    def test_syslog_rule(self):
        """Rules Engine - Syslog Rule"""
        @rule(logs=['test_log_type_syslog'],
              outputs=['s3:sample_bucket'])
        def syslog_sudo(rec):  # pylint: disable=unused-variable
            return (
                rec['application'] == 'sudo' and
                'root' in rec['message']
            )

        kinesis_data = (
            'Jan 26 19:35:33 vagrant-ubuntu-trusty-64 '
            'sudo: pam_unix(sudo:session): '
            'session opened for user root by (uid=0)'
        )
        # prepare the payloads
        service, entity = 'kinesis', 'test_stream_2'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_and_classify_payload(self.config, service, entity, raw_record)

        # process payloads
        alerts, _ = self.rules_engine.run(payload)

        # alert tests
        assert_equal(len(alerts), 1)
        assert_equal(alerts[0].rule_name, 'syslog_sudo')
        assert_equal(alerts[0].record['host'], 'vagrant-ubuntu-trusty-64')
        assert_equal(alerts[0].log_type, 'syslog')
Exemple #14
0
    def test_run_threat_intel_enabled(self, mock_threat_intel, mock_query): # pylint: disable=no-self-use
        """StreamAlert Class - Run SA when threat intel enabled"""
        @rule(datatypes=['sourceAddress'], outputs=['s3:sample_bucket'])
        def match_ipaddress(_): # pylint: disable=unused-variable
            """Testing dummy rule"""
            return True

        mock_threat_intel.return_value = StreamThreatIntel('test_table_name', 'us-east-1')
        mock_query.return_value = ([], [])

        sa_handler = StreamAlert(get_mock_context(), False)
        event = {
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        }
        events = []
        for i in range(10):
            event['source'] = '1.1.1.{}'.format(i)
            events.append(event)

        kinesis_events = {
            'Records': [make_kinesis_raw_record('test_kinesis_stream', json.dumps(event))
                        for event in events]
        }

        passed = sa_handler.run(kinesis_events)
        assert_true(passed)

        assert_equal(mock_query.call_count, 1)
    def test_process_optional_logs(self):
        """Rules Engine - Logs is optional when datatypes are present"""
        @rule(datatypes=['sourceAddress'], outputs=['s3:sample_bucket'])
        def no_logs_has_datatypes(rec):  # pylint: disable=unused-variable
            """Testing rule when logs is not present, datatypes is"""
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '1.1.1.2':
                    return True
            return False

        @rule(logs=['cloudwatch:test_match_types'],
              outputs=['s3:sample_bucket'])
        def has_logs_no_datatypes(rec):  # pylint: disable=unused-variable
            """Testing rule when logs is present, datatypes is not"""

            return (rec['source'] == '1.1.1.2'
                    or rec['detail']['sourceIPAddress'] == '1.1.1.2')

        @rule(logs=['cloudwatch:test_match_types'],
              datatypes=['sourceAddress'],
              outputs=['s3:sample_bucket'])
        def has_logs_datatypes(rec):  # pylint: disable=unused-variable
            """Testing rule when logs is present, datatypes is"""
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '1.1.1.2':
                    return True
            return False

        kinesis_data_items = [{
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        }]

        alerts = []
        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity,
                                                raw_record)

            alerts.extend(self.rules_engine.process(payload)[0])

        assert_equal(len(alerts), 3)
        rule_names = [
            'no_logs_has_datatypes', 'has_logs_no_datatypes',
            'has_logs_datatypes'
        ]
        assert_items_equal([alerts[i]['rule_name'] for i in range(3)],
                           rule_names)
    def test_json_type_casting(self):
        """StreamClassifier - JSON with various types (boolean, float, integer)"""
        kinesis_data = json.dumps({
            'key4': 'true',
            'key5': '10.001',
            'key6': '10',
            'key7': False
        })

        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = self._prepare_and_classify_payload(service, entity,
                                                     raw_record)

        # valid record test
        assert_equal(payload.valid, True)
        assert_is_instance(payload.records[0], dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json_2')

        # payload type test
        assert_equal(payload.type, 'json')

        # Check the types
        assert_is_instance(payload.records[0]['key4'], bool)
        assert_is_instance(payload.records[0]['key5'], float)
        assert_is_instance(payload.records[0]['key6'], int)
        assert_is_instance(payload.records[0]['key7'], bool)
Exemple #17
0
    def test_process_subkeys_non_dict(self):
        """Rules Engine - Req Subkeys handles non dict subkeys"""
        @rule(logs=['test_log_type_json_nested'],
              outputs=['s3:sample_bucket'],
              req_subkeys={'data': ['value']})
        def value_none(rec):  # pylint: disable=unused-variable
            return rec['data']['value'] is None

        kinesis_data_items = [
            {
                'date': 'Dec 01 2016',
                'unixtime': '1483139547',
                'host': 'host1.web.prod.net',
                'data': 123
            }
        ]
        # prepare payloads
        alerts = []
        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            # prepare the payloads
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity, raw_record)

            alerts.extend(self.rules_engine.run(payload)[0])

        # alert tests
        assert_equal(len(alerts), 0)
    def test_reset_normalized_types(self):
        """Rules Engine - Normalized types should be reset after each iteration"""
        @rule(datatypes=['sourceAddress'],
              outputs=['s3:sample_bucket'])
        def test_01_matching_sourceaddress_datatypes(rec): # pylint: disable=unused-variable
            """Testing rule to alert on matching sourceAddress"""
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '1.1.1.2':
                    return True
            return False

        @rule(logs=['cloudwatch:test_match_types', 'test_log_type_json_nested'],
              outputs=['s3:sample_bucket'])
        def test_02_rule_without_datatypes(_): # pylint: disable=unused-variable
            """Testing rule without datatypes parameter"""
            return True

        kinesis_data_items = [
            {
                'account': 123456,
                'region': '123456123456',
                'source': '1.1.1.2',
                'detail': {
                    'eventName': 'ConsoleLogin',
                    'sourceIPAddress': '1.1.1.2',
                    'recipientAccountId': '654321'
                }
            },
            {
                'date': 'Dec 01 2016',
                'unixtime': '1483139547',
                'host': 'host1.web.prod.net',
                'data': {
                    'category': 'web-server',
                    'type': '1',
                    'source': 'eu'
                }
            }
        ]

        alerts = []
        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity, raw_record)

            alerts.extend(self.rules_engine.run(payload)[0])

        assert_equal(len(alerts), 3)
        for alert in alerts:
            has_key_normalized_types = NORMALIZATION_KEY in alert.record
            if alert.rule_name == 'test_02_rule_without_datatypes':
                assert_equal(has_key_normalized_types, False)
            else:
                assert_equal(has_key_normalized_types, True)
    def test_alert_format(self):
        """Rules Engine - Alert Format"""
        @rule(logs=['test_log_type_json_nested_with_data'],
              outputs=['s3:sample_bucket'])
        def alert_format_test(rec):  # pylint: disable=unused-variable
            """'alert_format_test' docstring for testing rule_description"""
            return rec['application'] == 'web-app'

        kinesis_data = json.dumps({
            'date': 'Dec 01 2016',
            'unixtime': '1483139547',
            'host': 'host1.web.prod.net',
            'application': 'web-app',
            'environment': 'prod',
            'data': {
                'category': 'web-server',
                'type': '1',
                'source': 'eu'
            }
        })

        # prepare the payloads
        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_and_classify_payload(self.config, service, entity, raw_record)

        # process payloads
        alerts, _ = self.rules_engine.process(payload)

        alert_keys = {
            'id',
            'record',
            'rule_name',
            'rule_description',
            'log_type',
            'log_source',
            'outputs',
            'source_service',
            'source_entity',
            'context'
        }
        assert_items_equal(alerts[0].keys(), alert_keys)
        assert_is_instance(alerts[0]['id'], str)
        assert_is_instance(alerts[0]['record'], dict)
        assert_is_instance(alerts[0]['outputs'], list)
        assert_is_instance(alerts[0]['context'], dict)

        # test alert fields
        assert_is_instance(alerts[0]['rule_name'], str)
        assert_is_instance(alerts[0]['rule_description'], str)
        assert_is_instance(alerts[0]['outputs'], list)
        assert_is_instance(alerts[0]['log_type'], str)
        assert_is_instance(alerts[0]['log_source'], str)
    def test_process_subkeys(self):
        """Rules Engine - Req Subkeys"""
        @rule(logs=['test_log_type_json_nested'],
              outputs=['s3:sample_bucket'],
              req_subkeys={'data': ['location']})
        def data_location(rec):  # pylint: disable=unused-variable
            return rec['data']['location'].startswith('us')

        @rule(logs=['test_log_type_json_nested'],
              outputs=['s3:sample_bucket'],
              req_subkeys={'data': ['category']})
        def web_server(rec):  # pylint: disable=unused-variable
            return rec['data']['category'] == 'web-server'

        kinesis_data_items = [
            {
                'date': 'Dec 01 2016',
                'unixtime': '1483139547',
                'host': 'host1.web.prod.net',
                'data': {
                    'category': 'web-server',
                    'type': '1',
                    'source': 'eu'
                }
            },
            {
                'date': 'Dec 01 2016',
                'unixtime': '1483139547',
                'host': 'host1.web.prod.net',
                'data': {
                    'location': 'us-west-2'
                }
            }
        ]

        # prepare payloads
        alerts = []
        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            # prepare the payloads
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity, raw_record)

            alerts.extend(self.rules_engine.run(payload)[0])

        # check alert output
        assert_equal(len(alerts), 2)

        # alert tests
        assert_equal(alerts[0].rule_name, 'web_server')
        assert_equal(alerts[1].rule_name, 'data_location')
    def test_classify_json_optional(self):
        """StreamClassifier - Classify JSON with optional fields"""
        kinesis_data = json.dumps({
            'key1': [{
                'test': 1,
                'test2': 2
            }, {
                'test3': 3,
                'test4': 4
            }],
            'key2':
            'more sample data',
            'key3':
            '1',
            'key10': {
                'test-field': 1,
                'test-field2': 2
            }
        })

        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = self._prepare_and_classify_payload(service, entity,
                                                     raw_record)

        # valid record test
        assert_equal(payload.valid, True)
        assert_is_instance(payload.records[0], dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json')

        # payload type test
        assert_equal(payload.type, 'json')

        # record value tests
        assert_equal(len(payload.records[0]['key1']), 2)
        assert_equal(payload.records[0]['key3'], 1)
        assert_equal(payload.records[0]['key1'][1]['test4'], 4)

        # optional field tests
        assert_equal(payload.records[0]['key11'], 0.0)
        assert_equal(payload.records[0]['key9'], False)
        assert_equal(len(payload.records[0]['key10']), 2)

        # record type tests
        assert_is_instance(payload.records[0]['key1'], list)
        assert_is_instance(payload.records[0]['key2'], str)
        assert_is_instance(payload.records[0]['key3'], int)
Exemple #22
0
def test_pre_parse_kinesis(log_mock):
    """KinesisPayload - Pre Parse"""
    kinesis_data = json.dumps({'test': 'value'})
    entity = 'unit_test_entity'
    raw_record = make_kinesis_raw_record(entity, kinesis_data)
    kinesis_payload = load_stream_payload('kinesis', entity, raw_record)

    kinesis_payload = kinesis_payload.pre_parse().next()

    assert_equal(kinesis_payload.pre_parsed_record, '{"test": "value"}')

    log_mock.assert_called_with(
        'Pre-parsing record from Kinesis. '
        'eventID: %s, eventSourceARN: %s', 'unit test event id',
        'arn:aws:kinesis:us-east-1:123456789012:stream/{}'.format(entity))
    def test_kv_rule(self):
        """Rules Engine - KV Rule"""
        @rule(logs=['test_log_type_kv_auditd'],
              outputs=['pagerduty:sample_integration'])
        def auditd_bin_cat(rec):  # pylint: disable=unused-variable
            return (
                rec['type'] == 'SYSCALL' and
                rec['exe'] == '"/bin/cat"'
            )

        @rule(logs=['test_log_type_kv_auditd'],
              outputs=['pagerduty:sample_integration'])
        def gid_500(rec):  # pylint: disable=unused-variable
            return (
                rec['gid'] == 500 and
                rec['euid'] == 500
            )

        auditd_test_data = (
            'type=SYSCALL msg=audit(1364481363.243:24287): '
            'arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 '
            'a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 '
            'gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 '
            'ses=1 comm="cat" exe="/bin/cat" '
            'subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 '
            'key="sshd_config" type=CWD msg=audit(1364481363.243:24287):  '
            'cwd="/home/shadowman" type=PATH '
            'msg=audit(1364481363.243:24287): item=0 name="/etc/ssh/sshd_config" '
            'inode=409248 dev=fd:00 mode=0100600 ouid=0 ogid=0 '
            'rdev=00:00 obj=system_u:object_r:etc_t:s0'
        )

        # prepare the payloads
        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, auditd_test_data)
        payload = load_and_classify_payload(self.config, service, entity, raw_record)

        # process payloads
        alerts, _ = self.rules_engine.run(payload)

        # alert tests
        assert_equal(len(alerts), 2)

        rule_name_alerts = [x.rule_name for x in alerts]
        assert_items_equal(rule_name_alerts, ['gid_500', 'auditd_bin_cat'])
    def test_classify_syslog(self):
        """StreamClassifier - Classify syslog"""
        test_data_1 = ('Jan 26 19:35:33 vagrant-ubuntu-trusty-64 '
                       'sudo: pam_unix(sudo:session): '
                       'session opened for user root by (uid=0)')
        test_data_2 = (
            "Jan 26 12:28:06 macbook004154test authd[122]: "
            "Succeeded authorizing right 'com.apple.trust-settings.admin' "
            "by client '/usr/sbin/ocspd' [11835] for authorization created by"
            " '/usr/bin/security' [21322] (3,0)")

        fixtures = {'test_1': test_data_1, 'test_2': test_data_2}
        for name, syslog_message in fixtures.iteritems():

            service, entity = 'kinesis', 'test_stream_2'
            raw_record = make_kinesis_raw_record(entity, syslog_message)
            payload = self._prepare_and_classify_payload(
                service, entity, raw_record)

            # valid record test
            assert_equal(payload.valid, True)
            assert_is_instance(payload.records[0], dict)

            # type test
            assert_equal(payload.type, 'syslog')

            # record value tests
            if name == 'test_1':
                assert_equal(payload.records[0]['host'],
                             'vagrant-ubuntu-trusty-64')
                assert_equal(payload.records[0]['application'], 'sudo')
                assert_equal(
                    payload.records[0]['message'], 'pam_unix(sudo:session):'
                    ' session opened for user'
                    ' root by (uid=0)')
            elif name == 'test_2':
                assert_equal(payload.records[0]['host'], 'macbook004154test')
                assert_equal(payload.records[0]['application'], 'authd')
    def test_csv(self):
        """StreamClassifier - Classify CSV"""
        csv_data = 'jan102017,0100,host1,thisis some data with keyword1 in it'

        service, entity = 'kinesis', 'test_kinesis_stream'
        raw_record = make_kinesis_raw_record(entity, csv_data)
        payload = self._prepare_and_classify_payload(service, entity,
                                                     raw_record)

        # valid record test
        assert_equal(payload.valid, True)
        assert_is_instance(payload.records[0], dict)

        # record value tests
        assert_equal(payload.records[0]['message'],
                     'thisis some data with keyword1 in it')
        assert_equal(payload.records[0]['host'], 'host1')

        # type test
        assert_equal(payload.type, 'csv')

        # log source test
        assert_equal(payload.log_source, 'test_log_type_csv')
    def test_rule_staged_only(self):
        """Rules Engine - Staged Rule"""
        @rule(logs=['cloudwatch:test_match_types'], outputs=['foobar'])
        def rule_staged_only(_):  # pylint: disable=unused-variable
            """Modify context rule"""
            return True

        kinesis_data = json.dumps({
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        })
        table = RuleTable('table')
        table._remote_rule_info = {'rule_staged_only': {'Staged': True}}
        self.config['global']['infrastructure']['rules_table'][
            'enabled'] = True
        with patch.object(RulesEngine, '_RULE_TABLE', table), \
                patch.object(RulesEngine, '_RULE_TABLE_LAST_REFRESH', datetime.utcnow()):

            self.rules_engine._load_rule_table(self.config)

            # prepare the payloads
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity,
                                                raw_record)

            # process payloads
            alerts, _ = self.rules_engine.run(payload)

            # alert tests
            assert_equal(list(alerts[0].outputs)[0], 'aws-firehose:alerts')
    def test_parse_convert_fail(self, log_mock):
        """StreamClassifier - Convert Failed"""
        service, entity = 'kinesis', 'unit_test_default_stream'

        result = self.classifier.load_sources(service, entity)

        assert_true(result)

        kinesis_data = json.dumps({
            'unit_key_01': 'not an integer',
            'unit_key_02': 'valid string'
        })

        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_stream_payload(service, entity, raw_record)
        payload = list(payload.pre_parse())[0]

        result = self.classifier._parse(payload)

        assert_false(result)

        log_mock.assert_called_with(
            'Invalid schema. Value for key [%s] is not an int: %s',
            'unit_key_01', 'not an integer')
    def test_match_types(self):
        """Rules Engine - Match normalized types against record"""
        @rule(logs=['cloudwatch:test_match_types'],
              outputs=['s3:sample_bucket'],
              datatypes=['sourceAddress'])
        def match_ipaddress(rec):  # pylint: disable=unused-variable
            """Testing rule to detect matching IP address"""
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '1.1.1.2':
                    return True
            return False

        @rule(logs=['cloudwatch:test_match_types'],
              outputs=['s3:sample_bucket'],
              datatypes=['sourceAddress', 'command'])
        def mismatch_types(rec):  # pylint: disable=unused-variable
            """Testing rule with non-existing normalized type in the record. It
            should not trigger alert.
            """
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '2.2.2.2':
                    return True
            return False

        kinesis_data_items = [{
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        }, {
            'account': 654321,
            'region': '654321654321',
            'source': '2.2.2.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '2.2.2.2',
                'recipientAccountId': '123456'
            }
        }]

        # prepare payloads
        alerts = []
        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            # prepare the payloads
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity,
                                                raw_record)

            alerts.extend(StreamRules.process(payload))

        # check alert output
        assert_equal(len(alerts), 1)

        # alert tests
        assert_equal(alerts[0]['rule_name'], 'match_ipaddress')
    def test_match_types(self):
        """Rules Engine - Match normalized types against record"""
        @rule(logs=['cloudwatch:test_match_types'],
              outputs=['s3:sample_bucket'],
              datatypes=['sourceAddress'])
        def match_ipaddress(rec):  # pylint: disable=unused-variable
            """Testing rule to detect matching IP address

            Datatype 'sourceAddress' is defined in tests/unit/conf/types.json
            for cloudwatch logs. This rule should be trigger by testing event.
            """
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '1.1.1.2':
                    return True
            return False

        @rule(logs=['cloudwatch:test_match_types'],
              outputs=['s3:sample_bucket'],
              datatypes=['sourceAddress', 'command'])
        def mismatch_types(rec):  # pylint: disable=unused-variable
            """Testing rule with non-existing normalized type in the record.

            Datatype 'sourceAddress' is defined in tests/unit/conf/types.json
            for cloudwatch logs, but 'command' is not. This rule should be
            triggered by testing event since we change rule parameter 'datatypes'
            to OR operation among CEF types. See the discussion at
            https://github.com/airbnb/streamalert/issues/365
            """
            results = fetch_values_by_datatype(rec, 'sourceAddress')

            for result in results:
                if result == '2.2.2.2':
                    return True
            return False

        kinesis_data_items = [{
            'account': 123456,
            'region': '123456123456',
            'source': '1.1.1.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '1.1.1.2',
                'recipientAccountId': '654321'
            }
        }, {
            'account': 654321,
            'region': '654321654321',
            'source': '2.2.2.2',
            'detail': {
                'eventName': 'ConsoleLogin',
                'sourceIPAddress': '2.2.2.2',
                'recipientAccountId': '123456'
            }
        }]

        # prepare payloads
        alerts = []
        for data in kinesis_data_items:
            kinesis_data = json.dumps(data)
            # prepare the payloads
            service, entity = 'kinesis', 'test_kinesis_stream'
            raw_record = make_kinesis_raw_record(entity, kinesis_data)
            payload = load_and_classify_payload(self.config, service, entity,
                                                raw_record)

            alerts.extend(StreamRules.process(payload))

        # check alert output
        assert_equal(len(alerts), 2)

        # alert tests
        assert_equal(alerts[0]['rule_name'], 'match_ipaddress')
    def test_process_allow_multi_around_normalization(self, mock_client):
        """Rules Engine - Threat Intel is enabled run multi-round_normalization"""
        @rule(datatypes=['fileHash'], outputs=['s3:sample_bucket'])
        def match_file_hash(rec):  # pylint: disable=unused-variable
            """Testing dummy rule to match file hash"""
            return 'streamalert:ioc' in rec and 'md5' in rec['streamalert:ioc']

        @rule(datatypes=['fileHash'], outputs=['s3:sample_bucket'])
        def match_file_hash_again(_):  # pylint: disable=unused-variable
            """Testing dummy rule to match file hash again"""
            return False

        @rule(datatypes=['fileHash', 'sourceDomain'],
              outputs=['s3:sample_bucket'])
        def match_source_domain(rec):  # pylint: disable=unused-variable
            """Testing dummy rule to match source domain and file hash"""
            return 'streamalert:ioc' in rec

        mock_client.return_value = MockDynamoDBClient()
        toggled_config = self.config
        toggled_config['global']['threat_intel']['enabled'] = True
        toggled_config['global']['threat_intel'][
            'dynamodb_table'] = 'test_table_name'

        new_rules_engine = RulesEngine(toggled_config)
        kinesis_data = {
            "Field1": {
                "SubField1": {
                    "key1": 17,
                    "key2_md5": "md5-of-file",
                    "key3_source_domain": "evil.com"
                },
                "SubField2": 1
            },
            "Field2": {
                "Authentication": {}
            },
            "Field3": {},
            "Field4": {}
        }

        kinesis_data = json.dumps(kinesis_data)
        service, entity = 'kinesis', 'test_stream_threat_intel'
        raw_record = make_kinesis_raw_record(entity, kinesis_data)
        payload = load_and_classify_payload(toggled_config, service, entity,
                                            raw_record)
        alerts, normalized_records = new_rules_engine.run(payload)

        # Two testing rules are for threat intelligence matching. So no alert will be
        # generated before threat intel takes effect.
        assert_equal(len(alerts), 0)

        # One record will be normalized once by two different rules with different
        # normalization keys.
        assert_equal(len(normalized_records), 1)
        assert_equal(
            normalized_records[0].
            pre_parsed_record['streamalert:normalization'].keys(),
            ['fileHash', 'sourceDomain'])

        # Pass normalized records to threat intel engine.
        alerts_from_threat_intel = new_rules_engine.threat_intel_match(
            normalized_records)
        assert_equal(len(alerts_from_threat_intel), 2)
        assert_equal(alerts_from_threat_intel[0].rule_name, 'match_file_hash')
        assert_equal(alerts_from_threat_intel[1].rule_name,
                     'match_source_domain')