Example #1
0
    def _extract_records(self, json_payload, envelope):
        """Extract records from the original json payload using the JSON configuration

        Args:
            json_payload (dict): The parsed json data

        Returns:
            list: A list of JSON records extracted via JSON path or regex
        """
        json_records = []
        json_path_expression = self.options.get('json_path')
        json_regex_key = self.options.get('json_regex_key')
        # Handle jsonpath extraction of records
        if json_path_expression:
            LOGGER.debug('Parsing records with JSONPath')
            records_jsonpath = jsonpath_rw.parse(json_path_expression)
            matches = records_jsonpath.find(json_payload)
            if not matches:
                return False
            for match in matches:
                record = match.value
                embedded_json = self.options.get('embedded_json')
                if embedded_json:
                    try:
                        record = json.loads(match.value)
                    except ValueError:
                        LOGGER.warning('Embedded json is invalid')
                        continue
                if envelope:
                    record.update({ENVELOPE_KEY: envelope})
                json_records.append(record)

        # Handle nested json object regex matching
        elif json_regex_key and json_payload.get(json_regex_key):
            LOGGER.debug('Parsing records with JSON Regex Key')
            match = self.__regex.search(str(json_payload[json_regex_key]))
            if not match:
                return False
            match_str = match.groups('json_blob')[0]
            try:
                new_record = json.loads(match_str)
            except ValueError:
                LOGGER.debug('Matched regex string is not valid JSON: %s',
                             match_str)
                return False
            else:
                # Make sure the new_record is a dictionary and not a list.
                # Valid JSON can be either
                if not isinstance(new_record, dict):
                    return False
                if envelope:
                    new_record.update({ENVELOPE_KEY: envelope})

                json_records.append(new_record)

        return json_records
Example #2
0
    def _extract_records(self, json_payload):
        """Extract records from the original json payload using the JSON configuration

        Args:
            json_payload (dict): The parsed json data

        Returns:
            list: A list of JSON records extracted via JSON path or regex
        """
        json_records = []
        extracted_records = self._extract_json_path(json_payload)
        if extracted_records is False:
            return False

        if extracted_records:
            if not self.options.get('embedded_json'):
                return extracted_records

            for record in extracted_records:
                try:
                    record = json.loads(record)
                except (ValueError, TypeError):
                    LOGGER.debug('Embedded json is invalid')
                    continue

                if not isinstance(record, dict):
                    LOGGER.warning('Record is not a dict: %s', record)
                    continue

                json_records.append(record)
            return json_records

        json_regex_key = self.options.get('json_regex_key')
        # Handle nested json object regex matching
        if json_regex_key and json_payload.get(json_regex_key):
            LOGGER.debug('Parsing records with JSON Regex Key')
            match = self.__regex.search(str(json_payload[json_regex_key]))
            if not match:
                return False
            match_str = match.groups('json_blob')[0]
            try:
                new_record = json.loads(match_str)
            except ValueError:
                LOGGER.debug('Matched regex string is not valid JSON: %s', match_str)
                return False
            else:
                # Make sure the new_record is a dictionary and not a list.
                # Valid JSON can be either
                if not isinstance(new_record, dict):
                    return False

                json_records.append(new_record)

        return json_records