Exemple #1
0
    def _parse(self, payload, data):
        """Parse a record into a declared type.

        Args:
            payload: A StreamAlert payload object
            data: Pre parsed data string from a raw_event to be parsed

        Sets:
            payload.log_source: The detected log name from the data_sources config.
            payload.type: The record's type.
            payload.record: The parsed record.

        Returns:
            A boolean representing the success of the parse.
        """
        logger.debug(data)

        for log_name, attributes in self.log_metadata(payload).iteritems():
            if not payload.type:
                parser_name = attributes['parser']
            else:
                parser_name = payload.type

            options = {}
            options['hints'] = attributes.get('hints')
            options['delimiter'] = attributes.get('delimiter')
            options['separator'] = attributes.get('separator')
            options['parser'] = parser_name
            options['service'] = payload.service
            schema = attributes['schema']

            parser_class = get_parser(parser_name)
            parser = parser_class(data, schema, options)
            parsed_data = parser.parse()

            # Used for short circuiting parser determination
            if parser.payload_type:
                payload.type = parser.payload_type

            logger.debug('log name: %s', log_name)
            logger.debug('parsed_data: %s', parsed_data)

            if parsed_data:
                parsed_and_typed_data = self._convert_type(
                    parsed_data, schema, options)
                if parsed_and_typed_data:
                    payload.log_source = log_name
                    payload.type = parser_name
                    payload.record = parsed_and_typed_data
                    return True
        return False
Exemple #2
0
    def _convert_type(self, parsed_data, schema, options):
        """Convert a parsed payload's values into their declared types.

        If the schema is incorrectly defined for a particular field,
        this function will return False which will make the payload
        invalid.

        Args:
            parsed_data: Parsed payload dict
            schema: data schema for a specific log source
            options: parser options dict

        Returns:
            parsed dict payload with typed values
        """
        # check for list types here
        payload = parsed_data
        for key, value in schema.iteritems():
            key = str(key)
            # if the schema value is declared as string
            if value == 'string':
                payload[key] = str(payload[key])

            # if the schema value is declared as integer
            elif value == 'integer':
                try:
                    payload[key] = int(payload[key])
                except ValueError as e:
                    logger.error('Invalid schema - %s is not an int', key)
                    return False

            elif isinstance(value, (OrderedDict)):
                if len(value) == 0:
                    pass
                else:
                    schema = schema[key]
                    # handle nested csv
                    if isinstance(payload[key], str):
                        options['hints'] = options['hints'][key]
                        parse_csv = get_parser('csv')
                        parsed_nested_key = parse_csv(payload[key],
                                                      schema,
                                                      options).parse()
                        # Call the first element since a list is returned
                        payload[key] = parsed_nested_key[0]
                    self._convert_type(payload[key], schema, options)
            else:
                logger.error('Invalid declared type - %s', value)

        return payload
Exemple #3
0
    def _parse(self, payload, data):
        """Parse a record into a declared type.

        Args:
            payload: A StreamAlert payload object
            data: Pre parsed data string from a raw_event to be parsed

        Sets:
            payload.log_source: The detected log name from the data_sources config.
            payload.type: The record's type.
            payload.records: The parsed record.

        Returns:
            A boolean representing the success of the parse.
        """

        log_metadata = self.log_metadata(payload)
        # TODO(jack) make this process more efficient.
        # Separate out parsing with key matching.
        # Right now, if keys match but the type/parser is correct,
        # it has to start over
        for log_name, attributes in log_metadata.iteritems():
            # short circuit parser determination
            if not payload.type:
                parser_name = attributes['parser']
            else:
                parser_name = payload.type

            options = {}
            options['hints'] = attributes.get('hints')
            options['delimiter'] = attributes.get('delimiter')
            options['separator'] = attributes.get('separator')
            options['parser'] = parser_name
            options['service'] = payload.service
            schema = attributes['schema']

            # Setup the parser
            parser_class = get_parser(parser_name)
            parser = parser_class(data, schema, options)
            options['nested_keys'] = parser.__dict__.get('nested_keys')
            # A list of parsed records
            parsed_data = parser.parse()

            # Used for short circuiting parser determination
            if parser.payload_type:
                payload.type = parser.payload_type

            if parsed_data:
                logger.debug('log name: %s', log_name)
                logger.debug('parsed_data: %s', parsed_data)
                typed_data = []
                for data in parsed_data:
                    # convert data types per the schema
                    typed_data.append(self._convert_type(data, schema, options))

                if typed_data:
                    payload.log_source = log_name
                    payload.type = parser_name
                    payload.records = typed_data
                    return True
        return False
 def setup(self):
     """Setup before each method"""
     # load config
     self.config = load_config('test/unit/conf')
     # load JSON parser class
     self.parser_class = get_parser('json')