def parse(self, filepath, filecontent): python = filecontent.decode() # Mutate the parse context for the new path, then exec, and copy the resulting objects. # We execute with a (shallow) clone of the symbols as a defense against accidental # pollution of the namespace via imports or variable definitions. Defending against # _intentional_ mutation would require a deep clone, which doesn't seem worth the cost at # this juncture. self._parse_context._storage.clear(os.path.dirname(filepath)) exec(python, dict(self._symbols)) # Perform this check after successful execution, so we know the python is valid (and should # tokenize properly!) # Note that this is incredibly poor sandboxing. There are many ways to get around it. # But it's sufficient to tell most users who aren't being actively malicious that they're doing # something wrong, and it has a low performance overhead. if self._build_file_imports_behavior != 'allow' and 'import' in python: io_wrapped_python = StringIO(python) for token in tokenize.generate_tokens(io_wrapped_python.readline): if token[1] == 'import': line_being_tokenized = token[4] if self._build_file_imports_behavior == 'warn': logger.warn( '{} tried to import - import statements should be avoided ({})' .format(filepath, line_being_tokenized)) elif self._build_file_imports_behavior == 'error': raise ParseError( 'import statements have been banned, but tried to import: {}' .format(line_being_tokenized)) else: raise ParseError( "Didn't know what to do for build_file_imports_behavior value {}" .format(self._build_file_imports_behavior)) return list(self._parse_context._storage.objects)
def parse(self, filepath, filecontent): parse_globals = self._globals python = filecontent symbols = {} exec(python, parse_globals, symbols) objects = [] for name, obj in symbols.items(): if isinstance(obj, type): # Allow type imports continue if not Serializable.is_serializable(obj): raise ParseError( f'Found a non-serializable top-level object: {obj}') attributes = obj._asdict() if 'name' in attributes: attributes = attributes.copy() redundant_name = attributes.pop('name', None) if redundant_name and redundant_name != name: raise ParseError( 'The object named {!r} is assigned to a mismatching name {!r}' .format(redundant_name, name)) obj_type = type(obj) named_obj = obj_type(name=name, **attributes) objects.append(named_obj) return objects
def _import(typename): modulename, _, symbolname = typename.rpartition('.') if not modulename: raise ParseError('Expected a fully qualified type name, given {}'.format(typename)) try: mod = importlib.import_module(modulename) try: return getattr(mod, symbolname) except AttributeError: raise ParseError('The symbol {} was not found in module {} when attempting to convert ' 'type name {}'.format(symbolname, modulename, typename)) except ImportError as e: raise ParseError('Failed to import type name {} from module {}: {}' .format(typename, modulename, e))
def _object_encoder(obj, inline): if isinstance(obj, Resolvable): return obj.resolve() if inline else obj.address if isinstance(obj, Address): return obj.reference() if not Serializable.is_serializable(obj): raise ParseError('Can only encode Serializable objects in JSON, given {!r} of type {}' .format(obj, type(obj).__name__)) encoded = obj._asdict() if 'type_alias' not in encoded: encoded = encoded.copy() encoded['type_alias'] = '{}.{}'.format(inspect.getmodule(obj).__name__, type(obj).__name__) return {k: v for k, v in encoded.items() if v}
def parse(self, filepath: str, filecontent: bytes): python = filecontent.decode() # Mutate the parse context for the new path, then exec, and copy the resulting objects. # We execute with a (shallow) clone of the symbols as a defense against accidental # pollution of the namespace via imports or variable definitions. Defending against # _intentional_ mutation would require a deep clone, which doesn't seem worth the cost at # this juncture. self._parse_context._storage.clear(os.path.dirname(filepath)) exec(python, dict(self._symbols)) # Perform this check after successful execution, so we know the python is valid (and should # tokenize properly!) # Note that this is incredibly poor sandboxing. There are many ways to get around it. # But it's sufficient to tell most users who aren't being actively malicious that they're doing # something wrong, and it has a low performance overhead. if "globs" in python or (self._build_file_imports_behavior != BuildFileImportsBehavior.allow and 'import' in python): io_wrapped_python = StringIO(python) for token in tokenize.generate_tokens(io_wrapped_python.readline): token_str = token[1] lineno, _ = token[2] self.check_for_deprecated_globs_usage(token_str, filepath, lineno) if token_str == 'import': if self._build_file_imports_behavior == BuildFileImportsBehavior.allow: continue elif self._build_file_imports_behavior == BuildFileImportsBehavior.warn: logger.warning( f'Import used in {filepath} at line {lineno}. Import statements should ' f'be avoided in BUILD files because they can easily break Pants caching and lead to ' f'stale results. Instead, consider rewriting your code into a Pants plugin: ' f'https://www.pantsbuild.org/howto_plugin.html' ) else: raise ParseError( f'Import used in {filepath} at line {lineno}. Import statements are banned in ' f'BUILD files in this repository and should generally be avoided because ' f'they can easily break Pants caching and lead to stale results. Instead, consider ' f'rewriting your code into a Pants plugin: ' f'https://www.pantsbuild.org/howto_plugin.html' ) return list(self._parse_context._storage.objects)
def parse(self, filepath, filecontent): """Parse the given json encoded string into a list of top-level objects found. The parser accepts both blank lines and comment lines (those beginning with optional whitespace followed by the '#' character) as well as more than one top-level JSON object. The parse also supports a simple protocol for serialized types that have an `_asdict` method. This includes `namedtuple` subtypes as well as any custom class with an `_asdict` method defined; see :class:`pants.engine.serializable.Serializable`. """ json = ensure_text(filecontent) decoder = self._decoder # Strip comment lines and blank lines, which we allow, but preserve enough information about the # stripping to constitute a reasonable error message that can be used to find the portion of the # JSON document containing the error. def non_comment_line(l): stripped = l.lstrip() return stripped if (stripped and not stripped.startswith('#')) else None offset = 0 objects = [] while True: lines = json[offset:].splitlines() if not lines: break # Strip whitespace and comment lines preceding the next JSON object. while True: line = non_comment_line(lines[0]) if not line: comment_line = lines.pop(0) offset += len(comment_line) + 1 elif line.startswith('{') or line.startswith('['): # Account for leading space in this line that starts off the JSON object. offset += len(lines[0]) - len(line) break else: raise ParseError(f'Unexpected json line:\n{lines[0]}') lines = json[offset:].splitlines() if not lines: break # Prepare the JSON blob for parsing - strip blank and comment lines recording enough information # To reconstitute original offsets after the parse. comment_lines = [] non_comment_lines = [] for line_number, line in enumerate(lines): if non_comment_line(line): non_comment_lines.append(line) else: comment_lines.append((line_number, line)) data = '\n'.join(non_comment_lines) try: obj, idx = decoder.raw_decode(data) objects.append(obj) if idx >= len(data): break offset += idx # Add back in any parsed blank or comment line offsets. parsed_line_count = len(data[:idx].splitlines()) for line_number, line in comment_lines: if line_number >= parsed_line_count: break offset += len(line) + 1 parsed_line_count += 1 except ValueError as e: json_lines = data.splitlines() col_width = len(str(len(json_lines))) col_padding = ' ' * col_width def format_line(line): return f'{col_padding} {line}' header_lines = [ format_line(line) for line in json[:offset].splitlines() ] formatted_json_lines = [ ('{line_number:{col_width}}: {line}'.format( col_width=col_width, line_number=line_number, line=line)) for line_number, line in enumerate(json_lines, start=1) ] for line_number, line in comment_lines: formatted_json_lines.insert(line_number, format_line(line)) raise ParseError( '{error}\nIn document at {filepath}:\n{json_data}'.format( error=e, filepath=filepath, json_data='\n'.join(header_lines + formatted_json_lines))) return objects