def parse(self, filepath, filecontent): parse_globals = self._globals python = filecontent symbols = {} exec(python, parse_globals, symbols) objects = [] for name, obj in symbols.items(): if isinstance(obj, type): # Allow type imports continue if not Serializable.is_serializable(obj): raise ParseError( f"Found a non-serializable top-level object: {obj}") attributes = obj._asdict() if "name" in attributes: attributes = attributes.copy() redundant_name = attributes.pop("name", None) if redundant_name and redundant_name != name: raise ParseError( "The object named {!r} is assigned to a mismatching name {!r}" .format(redundant_name, name)) obj_type = type(obj) named_obj = obj_type(name=name, **attributes) objects.append(named_obj) return objects
def error_on_imports(build_file_content: str, filepath: str, behavior: BuildFileImportsBehavior) -> None: # Perform this check after successful execution, so we know the python is valid (and should # tokenize properly!) # Note that this is incredibly poor sandboxing. There are many ways to get around it. # But it's sufficient to tell most users who aren't being actively malicious that they're doing # something wrong, and it has a low performance overhead. if "import" in build_file_content: io_wrapped_python = StringIO(build_file_content) for token in tokenize.generate_tokens(io_wrapped_python.readline): token_str = token[1] lineno, _ = token[2] if token_str != "import": continue if behavior == BuildFileImportsBehavior.warn: logger = logging.getLogger(__name__) logger.warning( f"Import used in {filepath} at line {lineno}. Import statements should " f"be avoided in BUILD files because they can easily break Pants caching and lead to " f"stale results. Instead, consider rewriting your code into a Pants plugin: " f"https://www.pantsbuild.org/howto_plugin.html") else: raise ParseError( f"Import used in {filepath} at line {lineno}. Import statements are banned in " f"BUILD files in this repository and should generally be avoided because " f"they can easily break Pants caching and lead to stale results. Instead, consider " f"rewriting your code into a Pants plugin: " f"https://www.pantsbuild.org/howto_plugin.html")
def _import(typename): modulename, _, symbolname = typename.rpartition(".") if not modulename: raise ParseError( f"Expected a fully qualified type name, given {typename}") try: mod = importlib.import_module(modulename) try: return getattr(mod, symbolname) except AttributeError: raise ParseError( "The symbol {} was not found in module {} when attempting to convert " "type name {}".format(symbolname, modulename, typename)) except ImportError as e: raise ParseError( "Failed to import type name {} from module {}: {}".format( typename, modulename, e))
def _object_encoder(obj, inline): if isinstance(obj, Resolvable): return obj.resolve() if inline else obj.address if isinstance(obj, Address): return obj.reference() if not Serializable.is_serializable(obj): raise ParseError( "Can only encode Serializable objects in JSON, given {!r} of type {}" .format(obj, type(obj).__name__)) encoded = obj._asdict() if "type_alias" not in encoded: encoded = encoded.copy() encoded[ "type_alias"] = f"{inspect.getmodule(obj).__name__}.{type(obj).__name__}" return {k: v for k, v in encoded.items() if v}
def error_on_imports(build_file_content: str, filepath: str) -> None: # Perform this check after successful execution, so we know the Python is valid (and should # tokenize properly!) # Note that this is incredibly poor sandboxing. There are many ways to get around it. # But it's sufficient to tell most users who aren't being actively malicious that they're doing # something wrong, and it has a low performance overhead. if "import" not in build_file_content: return io_wrapped_python = StringIO(build_file_content) for token in tokenize.generate_tokens(io_wrapped_python.readline): token_str = token[1] lineno, _ = token[2] if token_str != "import": continue raise ParseError( f"Import used in {filepath} at line {lineno}. Import statements are banned in " "BUILD files because they can easily break Pants caching and lead to stale results. " "\n\nInstead, consider writing a macro (https://pants.readme.io/docs/macros) or writing " "a plugin (https://pants.readme.io/docs/plugins-overview). If you are using the V1 " "version of Pants, see https://www.pantsbuild.org/howto_plugin.html." )
def tokenize(self) -> list[tokenize.TokenInfo]: _bytes_stream = BytesIO("\n".join(self.lines).encode("utf-8")) try: return list(tokenize.tokenize(_bytes_stream.readline)) except tokenize.TokenError as e: raise ParseError(f"Failed to parse {self.path}: {e}")
def parse(self, filepath, filecontent): """Parse the given json encoded string into a list of top-level objects found. The parser accepts both blank lines and comment lines (those beginning with optional whitespace followed by the '#' character) as well as more than one top-level JSON object. The parse also supports a simple protocol for serialized types that have an `_asdict` method. This includes `namedtuple` subtypes as well as any custom class with an `_asdict` method defined; see :class:`pants.engine.serializable.Serializable`. """ json = ensure_text(filecontent) decoder = self._decoder # Strip comment lines and blank lines, which we allow, but preserve enough information about the # stripping to constitute a reasonable error message that can be used to find the portion of the # JSON document containing the error. def non_comment_line(l): stripped = l.lstrip() return stripped if (stripped and not stripped.startswith("#")) else None offset = 0 objects = [] while True: lines = json[offset:].splitlines() if not lines: break # Strip whitespace and comment lines preceding the next JSON object. while True: line = non_comment_line(lines[0]) if not line: comment_line = lines.pop(0) offset += len(comment_line) + 1 elif line.startswith("{") or line.startswith("["): # Account for leading space in this line that starts off the JSON object. offset += len(lines[0]) - len(line) break else: raise ParseError(f"Unexpected json line:\n{lines[0]}") lines = json[offset:].splitlines() if not lines: break # Prepare the JSON blob for parsing - strip blank and comment lines recording enough information # To reconstitute original offsets after the parse. comment_lines = [] non_comment_lines = [] for line_number, line in enumerate(lines): if non_comment_line(line): non_comment_lines.append(line) else: comment_lines.append((line_number, line)) data = "\n".join(non_comment_lines) try: obj, idx = decoder.raw_decode(data) objects.append(obj) if idx >= len(data): break offset += idx # Add back in any parsed blank or comment line offsets. parsed_line_count = len(data[:idx].splitlines()) for line_number, line in comment_lines: if line_number >= parsed_line_count: break offset += len(line) + 1 parsed_line_count += 1 except ValueError as e: json_lines = data.splitlines() col_width = len(str(len(json_lines))) col_padding = " " * col_width def format_line(line): return f"{col_padding} {line}" header_lines = [ format_line(line) for line in json[:offset].splitlines() ] formatted_json_lines = [ ("{line_number:{col_width}}: {line}".format( col_width=col_width, line_number=line_number, line=line)) for line_number, line in enumerate(json_lines, start=1) ] for line_number, line in comment_lines: formatted_json_lines.insert(line_number, format_line(line)) raise ParseError( "{error}\nIn document at {filepath}:\n{json_data}".format( error=e, filepath=filepath, json_data="\n".join(header_lines + formatted_json_lines), )) return objects