Exemple #1
0
    def apply_template(self, test_event):
        """Apply default values to the given test event

        Args:
            test_event (dict): The loaded test event
        """
        event_log = self.cli_config['logs'].get(test_event['log'])

        parser = event_log['parser']
        schema = event_log['schema']
        configuration = event_log.get('configuration', {})

        # Add envelope keys
        schema.update(configuration.get('envelope_keys', {}))

        # Setup the parser to access default optional values
        self.parsers[parser] = self.parsers.get(parser, get_parser(parser))

        # Add apply default values based on the declared schema
        default_test_event = {
            key: self.parsers[parser].default_optional_values(value)
            for key, value in schema.iteritems()
        }

        # Fill in the fields left out in the 'override_record' field,
        # and update the test event with a full 'data' key
        default_test_event.update(test_event['override_record'])
        test_event['data'] = default_test_event
Exemple #2
0
    def _process_log_schemas(self, payload):
        """Get any log schemas that matched this log format

        Args:
            payload: A StreamAlert payload object

        Returns:
            list: Contains any schemas that matched this log format
                Each list entry contains the namedtuple of 'SchemaMatch' with
                values of log_name, root_schema, parser, and parsed_data
        """
        schema_match = namedtuple(
            'SchemaMatch', 'log_name, root_schema, parser, parsed_data')
        schema_matches = []
        log_info = self.get_log_info_for_source()

        # Loop over all logs declared in logs.json
        for log_name, attributes in log_info.iteritems():
            # Get the parser type to use for this log
            parser_name = payload.type or attributes['parser']

            schema = attributes['schema']
            options = attributes.get('configuration', {})

            # Setup the parser class
            parser_class = get_parser(parser_name)
            parser = parser_class(options)

            # Get a list of parsed records
            LOGGER.debug('Trying schema: %s', log_name)
            parsed_data = parser.parse(schema, payload.pre_parsed_record)

            if not parsed_data:
                continue

            LOGGER.debug('Parsed %d records with schema %s', len(parsed_data),
                         log_name)

            if SUPPORT_MULTIPLE_SCHEMA_MATCHING:
                schema_matches.append(
                    schema_match(log_name, schema, parser, parsed_data))
                continue

            log_patterns = parser.options.get('log_patterns')
            if all(
                    parser.matched_log_pattern(rec, log_patterns)
                    for rec in parsed_data):
                return [schema_match(log_name, schema, parser, parsed_data)]

        return schema_matches
Exemple #3
0
    def _convert_type(self, parsed_data, schema, options):
        """Convert a parsed payload's values into their declared types.

        If the schema is incorrectly defined for a particular field,
        this function will return False which will make the payload
        invalid.

        Args:
            parsed_data: Parsed payload dict
            schema: data schema for a specific log source
            options: parser options dict

        Returns:
            parsed dict payload with typed values
        """
        # check for list types here
        payload = parsed_data
        for key, value in schema.iteritems():
            key = str(key)
            # if the schema value is declared as string
            if value == 'string':
                payload[key] = str(payload[key])

            # if the schema value is declared as integer
            elif value == 'integer':
                try:
                    payload[key] = int(payload[key])
                except ValueError as e:
                    logger.error('Invalid schema - %s is not an int', key)
                    return False

            elif isinstance(value, (OrderedDict)):
                if len(value) == 0:
                    pass
                else:
                    # handle nested csv
                    if isinstance(payload[key], str):
                        options['hints'] = options['hints'][key]
                        parse_csv = get_parser('csv')
                        parsed_nested_key = parse_csv(payload[key],
                                                      schema[key],
                                                      options).parse()
                        # Call the first element since a list is returned
                        payload[key] = parsed_nested_key[0]

                    self._convert_type(payload[key], schema[key], options)
            else:
                logger.error('Invalid declared type - %s', value)

        return payload
Exemple #4
0
    def _process_log_schemas(self, payload, data):
        """Get any log schemas that matched this log format

        Args:
            payload: A StreamAlert payload object
            data: Pre parsed data string from a raw_event to be parsed

        Returns:
            [list] A list containing any schemas that matched this log format
                Each list entry contains the namedtuple of 'ClassifiedLog' with
                values of log_name, root_schema, parser, and parsed_data
        """
        classified_log = namedtuple('ClassifiedLog', 'log_name, root_schema, parser, parsed_data')
        log_metadata = self._log_metadata()
        valid_parses = []

        # Loop over all logs declared in logs.json
        for log_name, attributes in log_metadata.iteritems():
            # get the parser type to use for this log
            parser_name = payload.type or attributes['parser']

            schema = attributes['schema']
            options = attributes.get('configuration', {})

            # Setup the parser class
            parser_class = get_parser(parser_name)
            parser = parser_class(options)

            # Get a list of parsed records
            parsed_data = parser.parse(schema, data)

            LOGGER.debug('schema: %s', schema)
            if not parsed_data:
                continue

            if SUPPORT_MULTIPLE_SCHEMA_MATCHING:
                valid_parses.append(classified_log(log_name, schema, parser, parsed_data))
                continue

            log_patterns = parser.options.get('log_patterns')
            if all(parser.matched_log_pattern(rec, log_patterns) for rec in parsed_data):
                return [classified_log(log_name, schema, parser, parsed_data)]

        return valid_parses
    def test_process_subkeys_nested_records(self):
        """Rules Engine - Required Subkeys with Nested Records"""
        def cloudtrail_us_east_logs(rec):
            return ('us-east' in rec['awsRegion']
                    and 'AWS' in rec['requestParameters']['program'])

        rule_attrs = RuleAttributes(
            rule_name='cloudtrail_us_east_logs',
            rule_function=cloudtrail_us_east_logs,
            matchers=[],
            datatypes=[],
            logs=['test_log_type_json_nested'],
            outputs=['s3:sample_bucket'],
            req_subkeys={'requestParameters': ['program']})

        data = json.dumps({
            'Records': [
                {
                    'eventVersion': '1.05',
                    'eventID': '2',
                    'eventTime': '3',
                    'requestParameters': {
                        'program': 'AWS CLI'
                    },
                    'eventType': 'CreateSomeResource',
                    'responseElements': 'Response',
                    'awsRegion': 'us-east-1',
                    'eventName': 'CreateResource',
                    'userIdentity': {
                        'name': 'john',
                        'key': 'AVC124313414'
                    },
                    'eventSource': 'Kinesis',
                    'requestID': '12345',
                    'userAgent': 'AWS CLI v1.3109',
                    'sourceIPAddress': '127.0.0.1',
                    'recipientAccountId': '123456123456'
                },
                {
                    'eventVersion': '1.05',
                    'eventID': '2',
                    'eventTime': '3',
                    'requestParameters': {
                        'program': 'AWS UI'
                    },
                    'eventType': 'CreateSomeOtherResource',
                    'responseElements': 'Response',
                    'awsRegion': 'us-east-2',
                    'eventName': 'CreateResource',
                    'userIdentity': {
                        'name': 'ann',
                        'key': 'AD114313414'
                    },
                    'eventSource': 'Lambda',
                    'requestID': '12345',
                    'userAgent': 'Google Chrome 42',
                    'sourceIPAddress': '127.0.0.2',
                    'recipientAccountId': '123456123456'
                },
                {
                    'eventVersion': '1.05',
                    'eventID': '2',
                    'eventTime': '3',
                    # Translates from null in JSON to None in Python
                    'requestParameters': None,
                    'eventType': 'CreateSomeResource',
                    'responseElements': 'Response',
                    'awsRegion': 'us-east-1',
                    'eventName': 'CreateResource',
                    'userIdentity': {
                        'name': 'john',
                        'key': 'AVC124313414'
                    },
                    'eventSource': 'Kinesis',
                    'requestID': '12345',
                    'userAgent': 'AWS CLI',
                    'sourceIPAddress': '127.0.0.1',
                    'recipientAccountId': '123456123456'
                }
            ]
        })

        schema = self.config['logs']['test_cloudtrail']['schema']
        options = self.config['logs']['test_cloudtrail']['configuration']

        parser_class = get_parser('json')
        parser = parser_class(options)
        parsed_result = parser.parse(schema, data)

        valid_record = [
            rec for rec in parsed_result
            if rec['requestParameters'] is not None
        ][0]
        valid_subkey_check = StreamRules.process_subkeys(
            valid_record, 'json', rule_attrs)
        assert_true(valid_subkey_check)

        invalid_record = [
            rec for rec in parsed_result if rec['requestParameters'] is None
        ][0]
        invalid_subkey_check = StreamRules.process_subkeys(
            invalid_record, 'json', rule_attrs)
        assert_false(invalid_subkey_check)
 def setup(self):
     """Setup before each method"""
     # load config
     self.config = load_config('test/unit/conf')
     # load JSON parser class
     self.parser_class = get_parser('gzip-json')
Exemple #7
0
 def setup_class(cls):
     """Setup the class before any methods"""
     # load config
     cls.config = load_config('tests/unit/conf')
     # load the parser class
     cls.parser_class = get_parser(cls._parser_type())
Exemple #8
0
 def setup_class(cls):
     """Setup the class before any methods"""
     # load config
     cls.config = load_config('test/unit/conf')
     # load JSON parser class
     cls.parser_class = get_parser('kv')
Exemple #9
0
    def _parse(self, payload, data):
        """Parse a record into a declared type.

        Args:
            payload: A StreamAlert payload object
            data: Pre parsed data string from a raw_event to be parsed

        Sets:
            payload.log_source: The detected log name from the data_sources config.
            payload.type: The record's type.
            payload.records: The parsed record.

        Returns:
            A boolean representing the success of the parse.
        """

        log_metadata = self.log_metadata(payload)
        # TODO(jack) make this process more efficient.
        # Separate out parsing with key matching.
        # Right now, if keys match but the type/parser is correct,
        # it has to start over
        for log_name, attributes in log_metadata.iteritems():
            # short circuit parser determination
            if not payload.type:
                parser_name = attributes['parser']
            else:
                parser_name = payload.type

            options = {}
            options['hints'] = attributes.get('hints')
            options['delimiter'] = attributes.get('delimiter')
            options['separator'] = attributes.get('separator')
            options['parser'] = parser_name
            options['service'] = payload.service
            schema = attributes['schema']

            # Setup the parser
            parser_class = get_parser(parser_name)
            parser = parser_class(data, schema, options)
            options['nested_keys'] = parser.__dict__.get('nested_keys')
            # A list of parsed records
            parsed_data = parser.parse()

            # Used for short circuiting parser determination
            if parser.payload_type:
                payload.type = parser.payload_type

            if parsed_data:
                logger.debug('log name: %s', log_name)
                logger.debug('parsed_data: %s', parsed_data)
                typed_data = []
                for data in parsed_data:
                    # convert data types per the schema
                    typed_data.append(self._convert_type(data, schema, options))

                if typed_data:
                    payload.log_source = log_name
                    payload.type = parser_name
                    payload.records = typed_data
                    return True
        return False