def test_classify_record_kinesis_json(self):
        """Payload Classify JSON - boolean, float, integer types"""
        kinesis_data = json.dumps({
            'key4': 'true',
            'key5': '10.001',
            'key6': '10',
            'key7': False
        })
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=kinesis_data)
        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        # pre parse and classify
        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json_2')

        # payload type test
        assert_equal(payload.type, 'json')
        assert_not_equal(payload.type, 'csv')

        # record type test
        assert_equal(payload.records[0]['key4'], True)
        assert_equal(payload.records[0]['key5'], 10.001)
        assert_equal(payload.records[0]['key6'], 10)
        assert_equal(payload.records[0]['key7'], False)
Exemple #2
0
    def test_classify_record_kinesis_json(self):
        """Payload Classify JSON"""
        kinesis_data = json.dumps({
            'key1': 'sample data!!!!',
            'key2': 'more sample data',
            'key3': '1'
        })
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=kinesis_data)
        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        # pre parse and classify
        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json')

        # payload type test
        assert_equal(payload.type, 'json')
        assert_not_equal(payload.type, 'csv')

        # record type test
        assert_equal(type(payload.records[0]['key1']), str)
        assert_equal(type(payload.records[0]['key2']), str)
        assert_equal(type(payload.records[0]['key3']), int)
    def test_classify_record_kinesis_csv_nested(self):
        """Payload Classify Nested CSV"""
        csv_nested_data = (
            '"Jan 10 2017","1485635414","host1.prod.test","Corp",'
            '"chef,web-server,1,10,success"')
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=csv_nested_data)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # record value tests
        assert_equal(payload.records[0]['date'], 'Jan 10 2017')
        assert_equal(payload.records[0]['host'], 'host1.prod.test')
        assert_equal(payload.records[0]['time'], 1485635414)
        assert_equal(payload.records[0]['message']['role'], 'web-server')
        assert_equal(payload.records[0]['message']['cluster_size'], 10)

        # type test
        assert_equal(payload.type, 'csv')
        assert_not_equal(payload.type, 'json')

        # log source test
        assert_equal(payload.log_source, 'test_log_type_csv_nested')
    def test_multiple_schema_matching(self):
        """Test Matching Multiple Schemas with Log Patterns"""
        kinesis_data = json.dumps({
            'name': 'file added test',
            'identifier': 'host4.this.test',
            'time': 'Jan 01 2017',
            'type': 'lol_file_added_event_test',
            'message': 'bad_001.txt was added'
        })
        # Make sure support for multiple schema matching is ON
        sa_classifier.SUPPORT_MULTIPLE_SCHEMA_MATCHING = True

        payload = self.payload_generator(kinesis_stream='test_stream_2',
                                         kinesis_data=kinesis_data)
        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        valid_parses = classifier._process_log_schemas(payload, data)

        assert_equal(len(valid_parses), 2)
        assert_equal(valid_parses[0].log_name, 'test_multiple_schemas:01')
        assert_equal(valid_parses[1].log_name, 'test_multiple_schemas:02')
        valid_parse = classifier._check_valid_parse(valid_parses)

        assert_equal(valid_parse.log_name, 'test_multiple_schemas:01')
    def test_classify_record_kinesis_csv(self):
        """Payload Classify CSV"""
        csv_data = 'jan102017,0100,host1,thisis some data with keyword1 in it'
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=csv_data)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # record value tests
        assert_equal(payload.records[0]['message'],
                     'thisis some data with keyword1 in it')
        assert_equal(payload.records[0]['host'], 'host1')

        # type test
        assert_equal(payload.type, 'csv')
        assert_not_equal(payload.type, 'json')

        # log source test
        assert_equal(payload.log_source, 'test_log_type_csv')
    def test_classify_record_kinesis_nested_json_missing_subkey_fields(self):
        """Payload Classify Nested JSON Missing Subkeys"""
        kinesis_data = json.dumps({
            'name': 'testquery',
            'hostIdentifier': 'host1.test.prod',
            'calendarTime': 'Jan 01 2017',
            'unixTime': '12321412321',
            'columns': {
                'key1': 'test',
                'key2': 'one'
            },
            'action': 'added',
            'decorations': {
                'role': 'web-server',
                'env': 'production',
                # 'cluster': 'eu-east',
                'number': '100'
            }
        })
        payload = self.payload_generator(kinesis_stream='test_stream_2',
                                         kinesis_data=kinesis_data)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # invalid record test
        assert_equal(payload.valid, False)
        assert_equal(payload.records, None)
    def test_classify_record_kinesis_json_optional(self):
        """Payload Classify JSON - optional fields"""
        kinesis_data = json.dumps({
            'key1': [{
                'test': 1,
                'test2': 2
            }, {
                'test3': 3,
                'test4': 4
            }],
            'key2':
            'more sample data',
            'key3':
            '1',
            'key10': {
                'test-field': 1,
                'test-field2': 2
            }
        })
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=kinesis_data)
        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        # pre parse and classify
        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json')

        # payload type test
        assert_equal(payload.type, 'json')
        assert_not_equal(payload.type, 'csv')

        # record value tests
        assert_equal(len(payload.records[0]['key1']), 2)
        assert_equal(payload.records[0]['key3'], 1)
        assert_equal(payload.records[0]['key1'][1]['test4'], 4)

        # optional field tests
        assert_equal(payload.records[0]['key11'], 0.0)
        assert_equal(payload.records[0]['key9'], False)
        assert_equal(len(payload.records[0]['key10']), 2)

        # record type tests
        assert_equal(type(payload.records[0]['key1']), list)
        assert_equal(type(payload.records[0]['key2']), str)
        assert_equal(type(payload.records[0]['key3']), int)
    def test_classify_record_kinesis_nested_json_osquery(self):
        """Payload Classify JSON osquery"""
        kinesis_data = json.dumps({
            'name': 'testquery',
            'hostIdentifier': 'host1.test.prod',
            'calendarTime': 'Jan 01 2017',
            'unixTime': '1485556524',
            'columns': {
                'key1': 'test',
                'key2': 'one'
            },
            'action': 'added',
            'decorations': {
                'role': 'web-server',
                'env': 'production',
                'cluster': 'eu-east',
                'number': '100'
            }
        })
        payload = self.payload_generator(kinesis_stream='test_stream_2',
                                         kinesis_data=kinesis_data)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json_nested_osquery')

        # payload type test
        assert_equal(payload.type, 'json')
        assert_not_equal(payload.type, 'csv')

        # record type test
        assert_equal(type(payload.records[0]['hostIdentifier']), str)
        assert_equal(type(payload.records[0]['unixTime']), int)
        assert_equal(type(payload.records[0]['columns']), dict)
        assert_equal(type(payload.records[0]['decorations']), dict)

        # record value test
        assert_equal(payload.records[0]['unixTime'], 1485556524)
        assert_equal(payload.records[0]['columns']['key1'], 'test')
        assert_equal(payload.records[0]['decorations']['cluster'], 'eu-east')
        assert_equal(payload.records[0]['decorations']['number'], 100)
        assert_equal(payload.records[0]['log_type'], '')
    def test_classify_record_syslog(self):
        """Payload Classify Syslog"""
        test_data_1 = ('Jan 26 19:35:33 vagrant-ubuntu-trusty-64 '
                       'sudo: pam_unix(sudo:session): '
                       'session opened for user root by (uid=0)')
        test_data_2 = (
            "Jan 26 12:28:06 macbook004154test authd[122]: "
            "Succeeded authorizing right 'com.apple.trust-settings.admin' "
            "by client '/usr/sbin/ocspd' [11835] for authorization created by"
            " '/usr/bin/security' [21322] (3,0)")

        fixtures = {'test_1': test_data_1, 'test_2': test_data_2}
        for name, syslog_message in fixtures.iteritems():
            payload = self.payload_generator(kinesis_stream='test_stream_2',
                                             kinesis_data=syslog_message)

            classifier = StreamClassifier(config=self.config)
            classifier.map_source(payload)

            data = self.pre_parse_kinesis(payload)
            classifier.classify_record(payload, data)

            # valid record test
            assert_equal(payload.valid, True)
            assert_equal(type(payload.records[0]), dict)

            # type test
            assert_equal(payload.type, 'syslog')
            assert_not_equal(payload.type, 'csv')
            assert_not_equal(payload.type, 'json')
            assert_not_equal(payload.type, 'kv')

            # record value tests
            if name == 'test_1':
                assert_equal(payload.records[0]['host'],
                             'vagrant-ubuntu-trusty-64')
                assert_equal(payload.records[0]['application'], 'sudo')
                assert_equal(
                    payload.records[0]['message'], 'pam_unix(sudo:session):'
                    ' session opened for user'
                    ' root by (uid=0)')
            elif name == 'test_2':
                assert_equal(payload.records[0]['host'], 'macbook004154test')
                assert_equal(payload.records[0]['application'], 'authd')
    def test_classify_record_kinesis_nested_json_with_data(self):
        """Payload Classify Nested JSON Generic"""
        kinesis_data = json.dumps({
            'date': 'Jan 01 2017',
            'unixtime': '1485556524',
            'host': 'host1',
            'application': 'myapp',
            'environment': 'development',
            'data': {
                'category': 'test',
                'type': '1',
                'source': 'dev-app-1'
            }
        })
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=kinesis_data)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json_nested_with_data')

        # payload type test
        assert_equal(payload.type, 'json')
        assert_not_equal(payload.type, 'csv')

        # record type test
        assert_equal(type(payload.records[0]['date']), str)
        assert_equal(type(payload.records[0]['unixtime']), int)
        assert_equal(type(payload.records[0]['data']), dict)
        assert_equal(type(payload.records[0]['data']['type']), int)
        assert_equal(type(payload.records[0]['data']['category']), str)

        # record value test
        assert_equal(payload.records[0]['date'], 'Jan 01 2017')
        assert_equal(payload.records[0]['data']['source'], 'dev-app-1')
Exemple #11
0
    def run(self, event, context):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains:
        available data sources, log formats, parser modes, and sinks.  Classifies
        logs sent into the stream into a parsed type.  Matches records against
        rules.

        Args:
            event: An AWS event mapped to a specific source/entity (kinesis stream or
                an s3 bucket event) containing data emitted to the stream.
            context: An AWS context object which provides metadata on the currently
                executing lambda function.

        Returns:
            None
        """
        logger.debug('Number of Records: %d', len(event.get('Records', [])))

        config = load_config()
        env = load_env(context)

        for record in event.get('Records', []):
            payload = StreamPayload(raw_record=record)
            classifier = StreamClassifier(config=config)
            classifier.map_source(payload)

            # If the kinesis stream or s3 bucket is not in our config,
            # go onto the next record
            if not payload.valid_source:
                continue

            if payload.service == 's3':
                self.s3_process(payload, classifier)
            elif payload.service == 'kinesis':
                self.kinesis_process(payload, classifier)
            else:
                logger.info('Unsupported service: %s', payload.service)

        # returns the list of generated alerts
        if self.return_alerts:
            return self.alerts
        # send alerts to SNS
        self.send_alerts(env, payload)
Exemple #12
0
    def test_map_source_2(self):
        """Payload Source Mapping 2"""
        data_encoded = base64.b64encode('test_map_source_data_2')
        payload = self.payload_generator(kinesis_stream='test_stream_2',
                                         kinesis_data=data_encoded)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        test_stream_2_logs = {
            'test_log_type_json_2', 'test_log_type_json_nested_osquery',
            'test_log_type_syslog'
        }
        metadata = classifier.log_metadata(payload)

        # service, entity, metadata test
        assert_equal(payload.service, 'kinesis')
        assert_equal(payload.entity, 'test_stream_2')
        assert_equal(set(metadata.keys()), test_stream_2_logs)
Exemple #13
0
    def test_rule(self, rule_name, test_record, formatted_record):
        """Feed formatted records into StreamAlert and check for alerts
        Args:
            rule_name [str]: The rule name being tested
            test_record [dict]: A single record to test
            formatted_record [dict]: A dictionary that includes the 'data' from the
                test record, formatted into a structure that is resemblant of how
                an incoming record from a service would format it.
                See test/integration/templates for example of how each service
                formats records.

        Returns:
            [list] alerts that hit for this rule
            [integer] count of expected alerts for this rule
            [bool] boolean where False indicates errors occurred during processing
        """
        event = {'Records': [formatted_record]}

        expected_alert_count = test_record.get('trigger_count')
        if not expected_alert_count:
            expected_alert_count = 1 if test_record['trigger'] else 0

        # Run the rule processor. Passing mocked context object with fake
        # values and False for suppressing sending of alerts
        processor = StreamAlert(self.context, False)
        all_records_matched_schema = processor.run(event)

        if not all_records_matched_schema:
            payload = StreamPayload(raw_record=formatted_record)
            classifier = StreamClassifier(config=load_config())
            classifier.map_source(payload)
            logs = classifier._log_metadata()
            self.analyze_record_delta(logs, rule_name, test_record)

        alerts = processor.get_alerts()

        # we only want alerts for the specific rule being tested
        alerts = [alert for alert in alerts
                  if alert['rule_name'] == rule_name]

        return alerts, expected_alert_count, all_records_matched_schema
    def make_kinesis_payload(self, kinesis_stream, kinesis_data):
        """Helper for creating the kinesis payload"""
        raw_record = {
            'eventSource':
            'aws:kinesis',
            'eventSourceARN':
            'arn:aws:kinesis:us-east-1:123456789012:stream/{}'.format(
                kinesis_stream),
            'kinesis': {
                'data': base64.b64encode(kinesis_data)
            }
        }
        payload = StreamPayload(raw_record=raw_record)
        classifier = StreamClassifier(config=self.config)

        classifier.map_source(payload)
        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        if payload.valid:
            return payload
    def test_map_source_1(self):
        """Payload Source Mapping 1"""
        data_encoded = base64.b64encode('test_map_source data')
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=data_encoded)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        test_kinesis_stream_logs = {
            'test_log_type_json', 'test_log_type_json_2',
            'test_log_type_json_nested', 'test_log_type_json_nested_with_data',
            'test_log_type_csv', 'test_log_type_csv_nested',
            'test_log_type_kv_auditd'
        }
        metadata = classifier._log_metadata()

        # service, entity, metadata test
        assert_equal(payload.service, 'kinesis')
        assert_equal(payload.entity, 'test_kinesis_stream')
        assert_equal(set(metadata.keys()), test_kinesis_stream_logs)
    def test_classify_record_kinesis_kv(self):
        """Payload Classify KV"""
        auditd_test_data = (
            'type=SYSCALL msg=audit(1364481363.243:24287): '
            'arch=c000003e syscall=2 success=no exit=-13 a0=7fffd19c5592 a1=0 '
            'a2=7fffd19c4b50 a3=a items=1 ppid=2686 pid=3538 auid=500 uid=500 '
            'gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 '
            'ses=1 comm="cat" exe="/bin/cat" '
            'subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 '
            'key="sshd_config" type=CWD msg=audit(1364481363.243:24287):  '
            'cwd="/home/shadowman" type=PATH '
            'msg=audit(1364481363.243:24287): item=0 name="/etc/ssh/sshd_config" '
            'inode=409248 dev=fd:00 mode=0100600 ouid=0 ogid=0 '
            'rdev=00:00 obj=system_u:object_r:etc_t:s0')

        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=auditd_test_data)

        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # record value tests
        assert_equal(payload.records[0]['type'], 'SYSCALL')
        assert_equal(payload.records[0]['suid'], 500)
        assert_equal(payload.records[0]['pid'], 3538)
        assert_equal(payload.records[0]['type_3'], 'PATH')

        # type test
        assert_equal(payload.type, 'kv')
        assert_not_equal(payload.type, 'csv')
        assert_not_equal(payload.type, 'json')
    def test_classify_record_kinesis_nested_json(self):
        """Payload Classify Nested JSON"""
        kinesis_data = json.dumps({
            'date': 'Jan 01 2017',
            'unixtime': '1485556524',
            'host': 'my-host-name',
            'data': {
                'key1': 'test',
                'key2': 'one'
            }
        })
        payload = self.payload_generator(kinesis_stream='test_kinesis_stream',
                                         kinesis_data=kinesis_data)
        classifier = StreamClassifier(config=self.config)
        classifier.map_source(payload)

        data = self.pre_parse_kinesis(payload)
        classifier.classify_record(payload, data)

        # valid record test
        assert_equal(payload.valid, True)
        assert_equal(type(payload.records[0]), dict)

        # log type test
        assert_equal(payload.log_source, 'test_log_type_json_nested')

        # payload type test
        assert_equal(payload.type, 'json')
        assert_not_equal(payload.type, 'csv')

        # record type test
        assert_equal(type(payload.records[0]['date']), str)
        assert_equal(type(payload.records[0]['unixtime']), int)
        assert_equal(type(payload.records[0]['data']), dict)

        # record value test
        assert_equal(payload.records[0]['date'], 'Jan 01 2017')
        assert_equal(payload.records[0]['data']['key1'], 'test')
Exemple #18
0
    def run(self, event):
        """StreamAlert Lambda function handler.

        Loads the configuration for the StreamAlert function which contains:
        available data sources, log formats, parser modes, and sinks.  Classifies
        logs sent into the stream into a parsed type.  Matches records against
        rules.

        Args:
            event: An AWS event mapped to a specific source/entity (kinesis stream or
                an s3 bucket event) containing data emitted to the stream.

        Returns:
            None
        """
        LOGGER.debug('Number of Records: %d', len(event.get('Records', [])))

        config = load_config()

        for record in event.get('Records', []):
            payload = StreamPayload(raw_record=record)
            classifier = StreamClassifier(config=config)

            # If the kinesis stream, s3 bucket, or sns topic is not in our config,
            # go onto the next record
            if not classifier.map_source(payload):
                continue

            if payload.service == 's3':
                self._s3_process(payload, classifier)
            elif payload.service == 'kinesis':
                self._kinesis_process(payload, classifier)
            elif payload.service == 'sns':
                self._sns_process(payload, classifier)
            else:
                LOGGER.info('Unsupported service: %s', payload.service)

        LOGGER.debug('%s alerts triggered', len(self.alerts))
        LOGGER.debug('\n%s\n', json.dumps(self.alerts, indent=4))

        if self.return_alerts:
            return self.alerts