def parse_python_ast(source_code: str, node: python_ast.AST, source_id: int = 0) -> vyper_ast.VyperNode: if isinstance(node, list): return _build_vyper_ast_list(source_code, node, source_id) if not isinstance(node, python_ast.AST): return node class_name = node.__class__.__name__ if isinstance(node, python_ast.Constant): if node.value is None or isinstance(node.value, bool): class_name = "NameConstant" elif isinstance(node.value, (int, float)): class_name = "Num" elif isinstance(node.value, str): class_name = "Str" elif isinstance(node.value, bytes): class_name = "Bytes" if not hasattr(vyper_ast, class_name): raise SyntaxException( f'Invalid syntax (unsupported "{class_name}" Python AST node).', node) vyper_class = getattr(vyper_ast, class_name) for field_name in vyper_class.only_empty_fields: if field_name in node._fields and getattr(node, field_name, None): raise SyntaxException( f'Invalid Vyper Syntax. "{field_name}" is an unsupported attribute field ' f'on Python AST "{class_name}" class.', field_name) init_kwargs = _build_vyper_ast_init_kwargs(source_code, node, vyper_class, source_id) return vyper_class(**init_kwargs)
def visit_Num(self, node): """ Adjust numeric node class based on the value type. Python uses `Num` to represent floats and integers. Integers may also be given in binary, octal, decimal, or hexadecimal format. This method modifies `ast_type` to seperate `Num` into more granular Vyper node classes. """ # modify vyper AST type according to the format of the literal value self.generic_visit(node) value = node.node_source_code # deduce non base-10 types based on prefix if value.lower()[:2] == "0x": if len(value) % 2: raise SyntaxException( "Hex notation requires an even number of digits", self._source_code, node.lineno, node.col_offset, ) if len(value) in (42, 66): node.ast_type = "Hex" node.n = value else: node.ast_type = "Bytes" node.value = int(value, 16).to_bytes(len(value) // 2 - 1, "big") elif value.lower()[:2] == "0b": node.ast_type = "Bytes" mod = (len(value) - 2) % 8 if mod: raise SyntaxException( f"Bit notation requires a multiple of 8 bits. {8-mod} bit(s) are missing.", self._source_code, node.lineno, node.col_offset, ) node.value = int(value, 2).to_bytes(len(value) // 8, "big") elif isinstance(node.n, float): node.ast_type = "Decimal" node.n = Decimal(value) elif isinstance(node.n, int): node.ast_type = "Int" else: raise CompilerPanic( f"Unexpected type for Constant value: {type(node.n).__name__}") return node
def visit_Constant(self, node): """ Handle `Constant` when using Python >=3.8 In Python 3.8, `NameConstant`, `Num`, `Str`, and `Bytes` are deprecated in favor of `Constant`. To maintain consistency across versions, `ast_type` is modified to create the <=3.7 node classes. """ if not isinstance(node.value, bool) and isinstance( node.value, (int, float)): return self.visit_Num(node) self.generic_visit(node) if node.value is None or isinstance(node.value, bool): node.ast_type = "NameConstant" elif isinstance(node.value, str): node.ast_type = "Str" elif isinstance(node.value, bytes): node.ast_type = "Bytes" else: raise SyntaxException( f"Invalid syntax (unsupported Python Constant AST node).", self._source_code, node.lineno, node.col_offset, ) return node
def parse_to_ast(source_code: str, source_id: int = 0, contract_name: Optional[str] = None) -> vy_ast.Module: """ Parses a Vyper source string and generates basic Vyper AST nodes. Parameters ---------- source_code : str The Vyper source code to parse. source_id : int, optional Source id to use in the `src` member of each node. Returns ------- list Untyped, unoptimized Vyper AST nodes. """ if "\x00" in source_code: raise ParserException( "No null bytes (\\x00) allowed in the source code.") class_types, reformatted_code = pre_parse(source_code) try: py_ast = python_ast.parse(reformatted_code) except SyntaxError as e: # TODO: Ensure 1-to-1 match of source_code:reformatted_code SyntaxErrors raise SyntaxException(str(e), source_code, e.lineno, e.offset) from e annotate_python_ast(py_ast, source_code, class_types, source_id, contract_name) # Convert to Vyper AST. return vy_ast.get_node(py_ast) # type: ignore
def _build_vyper_ast_init_kwargs( source_code: str, node: python_ast.AST, vyper_class: vyper_ast.VyperNode, class_name: str ) -> Generator: yield ('col_offset', getattr(node, 'col_offset', None)) yield ('lineno', getattr(node, 'lineno', None)) yield ('node_id', node.node_id) # type: ignore yield ('source_code', source_code) if isinstance(node, python_ast.ClassDef): yield ('class_type', node.class_type) # type: ignore for field_name in node._fields: val = getattr(node, field_name) if field_name in vyper_class.ignored_fields: continue elif val and field_name in vyper_class.only_empty_fields: raise SyntaxException( 'Invalid Vyper Syntax. ' f'"{field_name}" is an unsupported attribute field ' f'on Python AST "{class_name}" class.', val ) else: yield ( field_name, parse_python_ast( source_code=source_code, node=val, ) )
def get_node(ast_struct: typing.Union[typing.Dict, python_ast.AST], parent: typing.Optional["VyperNode"] = None) -> "VyperNode": """ Converts an AST structure to a vyper AST node. This is a recursive call, all child nodes of the input value are also converted to vyper nodes. Attributes ---------- ast_struct: (dict, AST) Annotated python AST node or vyper AST dict to generate the node from. parent: VyperNode, optional Parent node of the node being created. Returns ------- VyperNode The generated AST object. """ if not isinstance(ast_struct, dict): ast_struct = ast_struct.__dict__ vy_class = getattr(sys.modules[__name__], ast_struct['ast_type'], None) if vy_class is None: raise SyntaxException( f"Invalid syntax (unsupported '{ast_struct['ast_type']}'' Python AST node).", ast_struct) return vy_class(parent=parent, **ast_struct)
def _raise_syntax_exc(error_msg: str, ast_struct: dict) -> None: # helper function to raise a SyntaxException from a dict representing a node raise SyntaxException( error_msg, ast_struct.get("full_source_code"), ast_struct.get("lineno"), ast_struct.get("col_offset"), )
def __init__(self, **kwargs): for field_name, value in kwargs.items(): if field_name in self.get_slots(): setattr(self, field_name, value) elif value: raise SyntaxException( f'Unsupported non-empty value (valid in Python, but invalid in Vyper) \n' f' field_name: {field_name}, class: {type(self)} value: {value}' )
def visit_Attribute(self, node): if node.get("value.id") == "msg" and node.attr == "data": parent = node.get_ancestor() is_slice = parent.get("func.id") == "slice" is_len = parent.get("func.id") == "len" if not is_slice and not is_len: raise SyntaxException( "msg.data is only allowed inside of the slice or len functions", node.node_source_code, node.lineno, node.col_offset, )
def _validate_msg_data_attribute(node: vy_ast.Attribute) -> None: if isinstance( node.value, vy_ast.Name) and node.value.id == "msg" and node.attr == "data": parent = node.get_ancestor() if parent.get("func.id") not in ("slice", "len"): raise SyntaxException( "msg.data is only allowed inside of the slice or len functions", node.node_source_code, node.lineno, # type: ignore[attr-defined] node.col_offset, # type: ignore[attr-defined] )
def get_node(ast_struct: Union[dict, python_ast.AST], parent: Optional["VyperNode"] = None) -> "VyperNode": """ Converts an AST structure to a vyper AST node. This is a recursive call, all child nodes of the input value are also converted to vyper nodes. Attributes ---------- ast_struct: (dict, AST) Annotated python AST node or vyper AST dict to generate the node from. parent: VyperNode, optional Parent node of the node being created. Returns ------- VyperNode The generated AST object. """ if not isinstance(ast_struct, dict): ast_struct = ast_struct.__dict__ vy_class = getattr(sys.modules[__name__], ast_struct['ast_type'], None) if vy_class: return vy_class(parent=parent, **ast_struct) err_args = (ast_struct['full_source_code'], ast_struct['lineno'], ast_struct['col_offset']) if ast_struct['ast_type'] == "Delete": raise SyntaxException( "Deleting is not supported, use built-in clear() function", *err_args) elif ast_struct['ast_type'] in ("ExtSlice", "Slice"): raise SyntaxException("Vyper does not support slicing", *err_args) else: raise SyntaxException( f"Invalid syntax (unsupported '{ast_struct['ast_type']}' Python AST node)", *err_args, )
def __init__(self, parent: Optional["VyperNode"] = None, **kwargs: dict): """ AST node initializer method. Node objects are not typically instantiated directly, you should instead create them using the get_node() method. Parameters ---------- parent: VyperNode, optional Node which contains this node. **kwargs : dict Dictionary of fields to be included within the node. """ self._parent = parent self._children: set = set() for field_name in NODE_SRC_ATTRIBUTES: # when a source offset is not available, use the parent's source offset value = kwargs.get(field_name) if kwargs.get(field_name) is None: value = getattr(parent, field_name, None) setattr(self, field_name, value) for field_name, value in kwargs.items(): if field_name in NODE_SRC_ATTRIBUTES: continue if field_name in self._translated_fields: field_name = self._translated_fields[field_name] if field_name in self.get_slots(): if isinstance(value, list): value = [_to_node(i, self) for i in value] else: value = _to_node(value, self) setattr(self, field_name, value) elif value and field_name in self._only_empty_fields: raise SyntaxException( f"Syntax is valid Python but not valid for Vyper\n" f"class: {type(self).__name__}, field_name: {field_name}", kwargs['full_source_code'], kwargs['lineno'], kwargs['col_offset'], ) # add to children of parent last to ensure an accurate hash is generated if parent is not None: parent._children.add(self)
def parse_to_ast( source_code: str, source_id: int = 0, contract_name: Optional[str] = None, add_fn_node: Optional[str] = None, ) -> vy_ast.Module: """ Parses a Vyper source string and generates basic Vyper AST nodes. Parameters ---------- source_code : str The Vyper source code to parse. source_id : int, optional Source id to use in the `src` member of each node. contract_name: str, optional Name of contract. add_fn_node: str, optional If not None, adds a dummy Python AST FunctionDef wrapper node. Returns ------- list Untyped, unoptimized Vyper AST nodes. """ if "\x00" in source_code: raise ParserException( "No null bytes (\\x00) allowed in the source code.") class_types, reformatted_code = pre_parse(source_code) try: py_ast = python_ast.parse(reformatted_code) except SyntaxError as e: # TODO: Ensure 1-to-1 match of source_code:reformatted_code SyntaxErrors raise SyntaxException(str(e), source_code, e.lineno, e.offset) from e # Add dummy function node to ensure local variables are treated as `AnnAssign` # instead of state variables (`VariableDecl`) if add_fn_node: fn_node = python_ast.FunctionDef(add_fn_node, py_ast.body, [], []) fn_node.body = py_ast.body fn_node.args = python_ast.arguments(defaults=[]) py_ast.body = [fn_node] annotate_python_ast(py_ast, source_code, class_types, source_id, contract_name) # Convert to Vyper AST. return vy_ast.get_node(py_ast) # type: ignore
def vyperize(code, *, class_types=None): """Recovers the vyper contract source code from its python-valid representation. This more or less undoes what ``vyper.ast.pre_parser.pre_parse`` does. Parameters ---------- code : str The python-valid formatted vyper source code to be "un-formatted" back into "pure" vyper code aka "vyperized". class_types: dict Mapping of class types contained in the contract. """ tokens = [] previous_token = None try: code_bytes = code.encode("utf-8") g = tokenize(io.BytesIO(code_bytes).readline) for token in g: # if previous token was "class" then restore it to its vyper form if token.type == NAME and previous_token is not None: prev_token_dict = previous_token._asdict() prev_token_dict["string"] = class_types[token.string] vyper_restored_token = TokenInfo(**prev_token_dict) tokens[-1] = vyper_restored_token if (token.type == OP and token.string in ("(", ")") and previous_token is not None): continue if token.type == OP and token.string == ":" and previous_token is not None: token_dict = token._asdict() token_dict["start"] = (token.start[0], token.start[1] - 2) token_dict["end"] = (token.end[0], token.end[1] - 2) token = TokenInfo(**token_dict) previous_token = None if token.type == NAME and token.string == "class" and token.start[ 1] == 0: previous_token = token tokens.append(token) except TokenError as e: raise SyntaxException(e.args[0], code, e.args[1][0], e.args[1][1]) from e return untokenize(tokens).decode("utf-8")
def parse_python_ast(source_code: str, node: python_ast.AST) -> vyper_ast.VyperNode: if isinstance(node, list): return _build_vyper_ast_list(source_code, node) elif isinstance(node, python_ast.AST): class_name = node.__class__.__name__ if hasattr(vyper_ast, class_name): vyper_class = getattr(vyper_ast, class_name) init_kwargs = _build_vyper_ast_init_kwargs( source_code, node, vyper_class, class_name ) return vyper_class(**init_kwargs) else: raise SyntaxException( f'Invalid syntax (unsupported "{class_name}" Python AST node).', node ) else: return node
def _validate_address_code_attribute(node: vy_ast.Attribute) -> None: value_type = get_exact_type_from_node(node.value) if isinstance(value_type, AddressDefinition) and node.attr == "code": # Validate `slice(<address>.code, start, length)` where `length` is constant parent = node.get_ancestor() if isinstance(parent, vy_ast.Call): ok_func = isinstance(parent.func, vy_ast.Name) and parent.func.id == "slice" ok_args = len(parent.args) == 3 and isinstance( parent.args[2], vy_ast.Int) if ok_func and ok_args: return raise SyntaxException( "(address).code is only allowed inside of a slice function with a constant length", node.node_source_code, node.lineno, # type: ignore[attr-defined] node.col_offset, # type: ignore[attr-defined] )
def _build_vyper_ast_init_kwargs( source_code: str, node: python_ast.AST, vyper_class: vyper_ast.VyperNode, class_name: str, source_id: int, ) -> Generator: start = node.first_token.start if hasattr(node, 'first_token') else (None, None) # type: ignore yield ('col_offset', start[1]) yield ('lineno', start[0]) yield ('node_id', node.node_id) # type: ignore yield ('source_code', source_code) end = node.last_token.end if hasattr(node, 'last_token') else (None, None) # type: ignore yield ('end_lineno', end[0]) yield ('end_col_offset', end[1]) if hasattr(node, 'last_token'): start_pos = node.first_token.startpos # type: ignore end_pos = node.last_token.endpos # type: ignore yield ('src', f"{start_pos}:{end_pos-start_pos}:{source_id}") if isinstance(node, python_ast.ClassDef): yield ('class_type', node.class_type) # type: ignore for field_name in node._fields: val = getattr(node, field_name) if field_name in vyper_class.ignored_fields: continue elif val and field_name in vyper_class.only_empty_fields: raise SyntaxException( 'Invalid Vyper Syntax. ' f'"{field_name}" is an unsupported attribute field ' f'on Python AST "{class_name}" class.', val ) else: yield ( field_name, parse_python_ast( source_code=source_code, node=val, source_id=source_id ) )
def visit_Constant(self, node): # special case to handle Constant type in Python >=3.8 if not isinstance(node.value, bool) and isinstance( node.value, (int, float)): return self.visit_Num(node) self.generic_visit(node) if node.value is None or isinstance(node.value, bool): node.ast_type = "NameConstant" elif isinstance(node.value, str): node.ast_type = "Str" elif isinstance(node.value, bytes): node.ast_type = "Bytes" else: raise SyntaxException( f"Invalid syntax (unsupported Python Constant AST node).", node) return node
def __init__(self, parent: typing.Optional["VyperNode"] = None, **kwargs: dict): """ AST node initializer method. Node objects are not typically instantiated directly, you should instead create them using the get_node() method. Parameters ---------- parent: VyperNode, optional Node which contains this node. **kwargs : dict Dictionary of fields to be included within the node. """ self._parent = parent self._children: set = set() for field_name, value in kwargs.items(): if field_name in self._translated_fields: field_name = self._translated_fields[field_name] if field_name in self.get_slots(): if isinstance(value, list): value = [_to_node(i, self) for i in value] else: value = _to_node(value, self) setattr(self, field_name, value) elif value and field_name in self._only_empty_fields: raise SyntaxException( f'Unsupported non-empty value (valid in Python, but invalid in Vyper) \n' f' field_name: {field_name}, class: {type(self)} value: {value}' ) # add to children of parent last to ensure an accurate hash is generated if parent is not None: parent._children.add(self)
def pre_parse(code: str) -> Tuple[ClassTypes, str]: """ Re-formats a vyper source string into a python source string and performs some validation. More specifically, * Translates "contract" and "struct" keyword into python "class" keyword * Validates "@version" pragma against current compiler version * Prevents direct use of python "class" keyword * Prevents use of python semi-colon statement separator Also returns a mapping of detected contract and struct names to their respective vyper class types ("contract" or "struct"). Parameters ---------- code : str The vyper source code to be re-formatted. Returns ------- dict Mapping of class types for the given source. str Reformatted python source string. """ result = [] previous_keyword = None class_types: ClassTypes = {} try: code_bytes = code.encode("utf-8") g = tokenize(io.BytesIO(code_bytes).readline) for token in g: toks = [token] typ = token.type string = token.string start = token.start end = token.end line = token.line if typ == COMMENT and "@version" in string: validate_version_pragma(string[1:], start) if typ == NAME and string == "class" and start[1] == 0: raise SyntaxException( "The `class` keyword is not allowed. Perhaps you meant `contract` or `struct`?", code, start[0], start[1], ) # Make note of contract or struct name along with the type keyword # that preceded it if typ == NAME and previous_keyword is not None: class_types[string] = previous_keyword previous_keyword = None # Translate vyper-specific class keywords into python "class" # keyword if typ == NAME and string in VYPER_CLASS_TYPES and start[1] == 0: toks = [TokenInfo(NAME, "class", start, end, line)] previous_keyword = string if (typ, string) == (OP, ";"): raise SyntaxException("Semi-colon statements not allowed", code, start[0], start[1]) result.extend(toks) except TokenError as e: raise SyntaxException(e.args[0], code, e.args[1][0], e.args[1][1]) from e return class_types, untokenize(result).decode("utf-8")
def pre_parse(code: str) -> Tuple[ModificationOffsets, str]: """ Re-formats a vyper source string into a python source string and performs some validation. More specifically, * Translates "interface", "struct" and "event" keywords into python "class" keyword * Validates "@version" pragma against current compiler version * Prevents direct use of python "class" keyword * Prevents use of python semi-colon statement separator Also returns a mapping of detected interface and struct names to their respective vyper class types ("interface" or "struct"). Parameters ---------- code : str The vyper source code to be re-formatted. Returns ------- dict Mapping of offsets where source was modified. str Reformatted python source string. """ result = [] modification_offsets: ModificationOffsets = {} try: code_bytes = code.encode("utf-8") token_list = list(tokenize(io.BytesIO(code_bytes).readline)) for i in range(len(token_list)): token = token_list[i] toks = [token] typ = token.type string = token.string start = token.start end = token.end line = token.line if typ == COMMENT and "@version" in string: validate_version_pragma(string[1:], start) if typ == NAME and string in ("class", "yield"): raise SyntaxException( f"The `{string}` keyword is not allowed. ", code, start[0], start[1], ) if typ == NAME and string == "contract" and start[1] == 0: raise SyntaxException( "The `contract` keyword has been deprecated. Please use `interface`", code, start[0], start[1], ) if typ == NAME and string == "log" and token_list[i + 1].string == ".": raise SyntaxException( "`log` is no longer an object, please use it as a statement instead", code, start[0], start[1], ) if typ == NAME: if string in VYPER_CLASS_TYPES and start[1] == 0: toks = [TokenInfo(NAME, "class", start, end, line)] modification_offsets[start] = f"{string.capitalize()}Def" elif string in VYPER_EXPRESSION_TYPES: toks = [TokenInfo(NAME, "yield", start, end, line)] modification_offsets[start] = string.capitalize() if (typ, string) == (OP, ";"): raise SyntaxException("Semi-colon statements not allowed", code, start[0], start[1]) result.extend(toks) except TokenError as e: raise SyntaxException(e.args[0], code, e.args[1][0], e.args[1][1]) from e return modification_offsets, untokenize(result).decode("utf-8")
def visit_AnnAssign(self, node): name = node.get("target.id") if name is None: raise VariableDeclarationException("Invalid module-level assignment", node) if name == "implements": interface_name = node.annotation.id self.namespace[interface_name].validate_implements(node) return is_constant, is_public, is_immutable = False, False, False annotation = node.annotation if isinstance(annotation, vy_ast.Call): # the annotation is a function call, e.g. `foo: constant(uint256)` call_name = annotation.get("func.id") if call_name in ("constant", "public", "immutable"): validate_call_args(annotation, 1) if call_name == "constant": # declaring a constant is_constant = True elif call_name == "public": # declaring a public variable is_public = True # generate function type and add to metadata # we need this when builing the public getter node._metadata["func_type"] = ContractFunction.from_AnnAssign(node) elif call_name == "immutable": # declaring an immutable variable is_immutable = True # mutability is checked automatically preventing assignment # outside of the constructor, here we just check a value is assigned, # not necessarily where assignments = self.ast.get_descendants( vy_ast.Assign, filters={"target.id": node.target.id} ) if not assignments: # Special error message for common wrong usages via `self.<immutable name>` wrong_self_attribute = self.ast.get_descendants( vy_ast.Attribute, {"value.id": "self", "attr": node.target.id} ) message = ( "Immutable variables must be accessed without 'self'" if len(wrong_self_attribute) > 0 else "Immutable definition requires an assignment in the constructor" ) raise SyntaxException( message, node.node_source_code, node.lineno, node.col_offset ) # remove the outer call node, to handle cases such as `public(map(..))` annotation = annotation.args[0] data_loc = DataLocation.CODE if is_immutable else DataLocation.STORAGE type_definition = get_type_from_annotation( annotation, data_loc, is_constant, is_public, is_immutable ) node._metadata["type"] = type_definition if is_constant: if not node.value: raise VariableDeclarationException("Constant must be declared with a value", node) if not check_constant(node.value): raise StateAccessViolation("Value must be a literal", node.value) validate_expected_type(node.value, type_definition) try: self.namespace[name] = type_definition except VyperException as exc: raise exc.with_annotation(node) from None return if node.value: var_type = "Immutable" if is_immutable else "Storage" raise VariableDeclarationException( f"{var_type} variables cannot have an initial value", node.value ) if is_immutable: try: # block immutable if storage variable already exists if name in self.namespace["self"].members: raise NamespaceCollision( f"Value '{name}' has already been declared", node ) from None self.namespace[name] = type_definition except VyperException as exc: raise exc.with_annotation(node) from None return try: self.namespace.validate_assignment(name) except NamespaceCollision as exc: raise exc.with_annotation(node) from None try: self.namespace["self"].add_member(name, type_definition) node.target._metadata["type"] = type_definition except NamespaceCollision: raise NamespaceCollision(f"Value '{name}' has already been declared", node) from None except VyperException as exc: raise exc.with_annotation(node) from None