def _parse(self, payload, data): """Parse a record into a declared type. Args: payload: A StreamAlert payload object data: Pre parsed data string from a raw_event to be parsed Sets: payload.log_source: The detected log name from the data_sources config. payload.type: The record's type. payload.record: The parsed record. Returns: A boolean representing the success of the parse. """ logger.debug(data) for log_name, attributes in self.log_metadata(payload).iteritems(): if not payload.type: parser_name = attributes['parser'] else: parser_name = payload.type options = {} options['hints'] = attributes.get('hints') options['delimiter'] = attributes.get('delimiter') options['separator'] = attributes.get('separator') options['parser'] = parser_name options['service'] = payload.service schema = attributes['schema'] parser_class = get_parser(parser_name) parser = parser_class(data, schema, options) parsed_data = parser.parse() # Used for short circuiting parser determination if parser.payload_type: payload.type = parser.payload_type logger.debug('log name: %s', log_name) logger.debug('parsed_data: %s', parsed_data) if parsed_data: parsed_and_typed_data = self._convert_type( parsed_data, schema, options) if parsed_and_typed_data: payload.log_source = log_name payload.type = parser_name payload.record = parsed_and_typed_data return True return False
def _convert_type(self, parsed_data, schema, options): """Convert a parsed payload's values into their declared types. If the schema is incorrectly defined for a particular field, this function will return False which will make the payload invalid. Args: parsed_data: Parsed payload dict schema: data schema for a specific log source options: parser options dict Returns: parsed dict payload with typed values """ # check for list types here payload = parsed_data for key, value in schema.iteritems(): key = str(key) # if the schema value is declared as string if value == 'string': payload[key] = str(payload[key]) # if the schema value is declared as integer elif value == 'integer': try: payload[key] = int(payload[key]) except ValueError as e: logger.error('Invalid schema - %s is not an int', key) return False elif isinstance(value, (OrderedDict)): if len(value) == 0: pass else: schema = schema[key] # handle nested csv if isinstance(payload[key], str): options['hints'] = options['hints'][key] parse_csv = get_parser('csv') parsed_nested_key = parse_csv(payload[key], schema, options).parse() # Call the first element since a list is returned payload[key] = parsed_nested_key[0] self._convert_type(payload[key], schema, options) else: logger.error('Invalid declared type - %s', value) return payload
def _parse(self, payload, data): """Parse a record into a declared type. Args: payload: A StreamAlert payload object data: Pre parsed data string from a raw_event to be parsed Sets: payload.log_source: The detected log name from the data_sources config. payload.type: The record's type. payload.records: The parsed record. Returns: A boolean representing the success of the parse. """ log_metadata = self.log_metadata(payload) # TODO(jack) make this process more efficient. # Separate out parsing with key matching. # Right now, if keys match but the type/parser is correct, # it has to start over for log_name, attributes in log_metadata.iteritems(): # short circuit parser determination if not payload.type: parser_name = attributes['parser'] else: parser_name = payload.type options = {} options['hints'] = attributes.get('hints') options['delimiter'] = attributes.get('delimiter') options['separator'] = attributes.get('separator') options['parser'] = parser_name options['service'] = payload.service schema = attributes['schema'] # Setup the parser parser_class = get_parser(parser_name) parser = parser_class(data, schema, options) options['nested_keys'] = parser.__dict__.get('nested_keys') # A list of parsed records parsed_data = parser.parse() # Used for short circuiting parser determination if parser.payload_type: payload.type = parser.payload_type if parsed_data: logger.debug('log name: %s', log_name) logger.debug('parsed_data: %s', parsed_data) typed_data = [] for data in parsed_data: # convert data types per the schema typed_data.append(self._convert_type(data, schema, options)) if typed_data: payload.log_source = log_name payload.type = parser_name payload.records = typed_data return True return False
def setup(self): """Setup before each method""" # load config self.config = load_config('test/unit/conf') # load JSON parser class self.parser_class = get_parser('json')