コード例 #1
0
ファイル: parsers.py プロジェクト: youprofit/pants
def parse_json(json, symbol_table=None):
    """Parses the given json encoded string into a list of top-level objects found.

  The parser accepts both blank lines and comment lines (those beginning with optional whitespace
  followed by the '#' character) as well as more than one top-level JSON object.

  The parse also supports a simple protocol for serialized types that have an `_asdict` method.
  This includes `namedtuple` subtypes as well as any custom class with an `_asdict` method defined;
  see :class:`pants.engine.exp.serializable.Serializable`.

  :param string json: A json encoded document with extra support for blank lines, comments and
                      multiple top-level objects.
  :returns: A list of decoded json data.
  :rtype: list
  :raises: :class:`ParseError` if there were any problems encountered parsing the given `json`.
  """

    decoder = JSONDecoder(encoding='UTF-8',
                          object_hook=_get_decoder(symbol_table),
                          strict=True)

    # Strip comment lines and blank lines, which we allow, but preserve enough information about the
    # stripping to constitute a reasonable error message that can be used to find the portion of the
    # JSON document containing the error.

    def non_comment_line(l):
        stripped = l.lstrip()
        return stripped if (stripped
                            and not stripped.startswith('#')) else None

    offset = 0
    objects = []
    while True:
        lines = json[offset:].splitlines()
        if not lines:
            break

        # Strip whitespace and comment lines preceding the next JSON object.
        while True:
            line = non_comment_line(lines[0])
            if not line:
                comment_line = lines.pop(0)
                offset += len(comment_line) + 1
            elif line.startswith('{') or line.startswith('['):
                # Account for leading space in this line that starts off the JSON object.
                offset += len(lines[0]) - len(line)
                break
            else:
                raise ParseError('Unexpected json line:\n{}'.format(lines[0]))

        lines = json[offset:].splitlines()
        if not lines:
            break

        # Prepare the JSON blob for parsing - strip blank and comment lines recording enough information
        # To reconstitute original offsets after the parse.
        comment_lines = []
        non_comment_lines = []
        for line_number, line in enumerate(lines):
            if non_comment_line(line):
                non_comment_lines.append(line)
            else:
                comment_lines.append((line_number, line))

        data = '\n'.join(non_comment_lines)
        try:
            obj, idx = decoder.raw_decode(data)
            objects.append(obj)
            if idx >= len(data):
                break
            offset += idx

            # Add back in any parsed blank or comment line offsets.
            parsed_line_count = len(data[:idx].splitlines())
            for line_number, line in comment_lines:
                if line_number >= parsed_line_count:
                    break
                offset += len(line) + 1
                parsed_line_count += 1
        except ValueError as e:
            json_lines = data.splitlines()
            col_width = len(str(len(json_lines)))

            col_padding = ' ' * col_width

            def format_line(line):
                return '{col_padding}  {line}'.format(col_padding=col_padding,
                                                      line=line)

            header_lines = [
                format_line(line) for line in json[:offset].splitlines()
            ]

            formatted_json_lines = [
                ('{line_number:{col_width}}: {line}'.format(
                    col_width=col_width, line_number=line_number, line=line))
                for line_number, line in enumerate(json_lines, start=1)
            ]

            for line_number, line in comment_lines:
                formatted_json_lines.insert(line_number, format_line(line))

            raise ParseError('{error}\nIn document:\n{json_data}'.format(
                error=e,
                json_data='\n'.join(header_lines + formatted_json_lines)))

    return objects