def parse_json(json, symbol_table=None): """Parses the given json encoded string into a list of top-level objects found. The parser accepts both blank lines and comment lines (those beginning with optional whitespace followed by the '#' character) as well as more than one top-level JSON object. The parse also supports a simple protocol for serialized types that have an `_asdict` method. This includes `namedtuple` subtypes as well as any custom class with an `_asdict` method defined; see :class:`pants.engine.exp.serializable.Serializable`. :param string json: A json encoded document with extra support for blank lines, comments and multiple top-level objects. :returns: A list of decoded json data. :rtype: list :raises: :class:`ParseError` if there were any problems encountered parsing the given `json`. """ decoder = JSONDecoder(encoding='UTF-8', object_hook=_get_decoder(symbol_table), strict=True) # Strip comment lines and blank lines, which we allow, but preserve enough information about the # stripping to constitute a reasonable error message that can be used to find the portion of the # JSON document containing the error. def non_comment_line(l): stripped = l.lstrip() return stripped if (stripped and not stripped.startswith('#')) else None offset = 0 objects = [] while True: lines = json[offset:].splitlines() if not lines: break # Strip whitespace and comment lines preceding the next JSON object. while True: line = non_comment_line(lines[0]) if not line: comment_line = lines.pop(0) offset += len(comment_line) + 1 elif line.startswith('{') or line.startswith('['): # Account for leading space in this line that starts off the JSON object. offset += len(lines[0]) - len(line) break else: raise ParseError('Unexpected json line:\n{}'.format(lines[0])) lines = json[offset:].splitlines() if not lines: break # Prepare the JSON blob for parsing - strip blank and comment lines recording enough information # To reconstitute original offsets after the parse. comment_lines = [] non_comment_lines = [] for line_number, line in enumerate(lines): if non_comment_line(line): non_comment_lines.append(line) else: comment_lines.append((line_number, line)) data = '\n'.join(non_comment_lines) try: obj, idx = decoder.raw_decode(data) objects.append(obj) if idx >= len(data): break offset += idx # Add back in any parsed blank or comment line offsets. parsed_line_count = len(data[:idx].splitlines()) for line_number, line in comment_lines: if line_number >= parsed_line_count: break offset += len(line) + 1 parsed_line_count += 1 except ValueError as e: json_lines = data.splitlines() col_width = len(str(len(json_lines))) col_padding = ' ' * col_width def format_line(line): return '{col_padding} {line}'.format(col_padding=col_padding, line=line) header_lines = [ format_line(line) for line in json[:offset].splitlines() ] formatted_json_lines = [ ('{line_number:{col_width}}: {line}'.format( col_width=col_width, line_number=line_number, line=line)) for line_number, line in enumerate(json_lines, start=1) ] for line_number, line in comment_lines: formatted_json_lines.insert(line_number, format_line(line)) raise ParseError('{error}\nIn document:\n{json_data}'.format( error=e, json_data='\n'.join(header_lines + formatted_json_lines))) return objects