def parse_function_definition(self) -> ast.FunctionDefinition: toks = TokenList() # FUNC foo (bar : int) : str { ... } if not toks.add(self.match(token.FUNC)): raise Exception("Expected function definition.") # func FOO (bar : int) : str { ... } if not toks.add(self.match(token.IDENTIFIER)): raise ParseException("Expected function name.") name = self.peek(-1).lexeme if name in self.symbols_global: raise ParseException(f"Name '{name}' already exists in symbol table. Function definition impossible.") # Register function name before parsing parameter names (no parameter name should have the function name!) self.symbols_global[name] = bongtypes.UnknownType() # ( if not toks.add(self.match(token.LPAREN)): raise ParseException("Expected ( to start the parameter list.") # Parameters parameter_names, parameter_types = self.parse_parameters() # ) if not toks.add(self.match(token.RPAREN)): raise ParseException("Expected ) to end the parameter list.") # Return types return_types : typing.List[ast.BongtypeIdentifier] = [] if toks.add(self.match(token.COLON)): self.check_eof("Return type list expected.") return_types.append(self.parse_type()) while toks.add(self.match(token.COMMA)): return_types.append(self.parse_type()) # { if not self.peek().type == token.LBRACE: raise ParseException("Expected function body.") # New local symbol table (tree) for statement block # We could just store the global symbol table in the object because # it will always be the same. But remembering the previous symbol # table here theoretically allows to parse function definitions inside # other functions (the local symbol table would be properly restored # then). global_symbol_tree = self.symbol_tree self.symbol_tree = SymbolTree() # Parameters for param,typ in zip(parameter_names,parameter_types): if param in self.symbol_tree: raise ParseException(f"Argument name '{param}' appears twice in function definition") self.symbol_tree.register(param, bongtypes.UnknownType()) # Snapshot before block is parsed (this changes the state of the tree) func_symbol_tree_snapshot = self.symbol_tree.take_snapshot() # Function body body = self.block_stmt() # Restore symbol table/tree self.symbol_tree = global_symbol_tree return ast.FunctionDefinition(toks, name, parameter_names, parameter_types, return_types, body, func_symbol_tree_snapshot)
def parse_struct_definition(self) -> ast.StructDefinition: toks = TokenList() # STRUCT foo {bar : int, ...} if not toks.add(self.match(token.STRUCT)): raise Exception("Expected struct definition.") # func FOO {bar : int, ...} if not toks.add(self.match(token.IDENTIFIER)): raise ParseException("Expected struct name.") name = self.peek(-1).lexeme if name in self.symbols_global: raise ParseException(f"Name '{name}' already exists in global symbol table. Struct definition impossible.") # { if not toks.add(self.match(token.LBRACE)): raise ParseException("Expected { to start the field list.") # Fields field_names, field_types = self.parse_parameters() if len(field_names) == 0: raise ParseException(f"Struct {name} is empty.") fields : typing.Dict[str, ast.BongtypeIdentifier] = {} for field_name, field_type in zip(field_names, field_types): if field_name in fields: raise ParseException(f"Field '{field_name}' found multiple times" " in struct '{name}'.") fields[field_name] = field_type # If } occurs on its own line, an implicit semicolon is inserted # after the fields self.match(token.SEMICOLON) # } self.check_eof("Expected } to end the field list.") if not toks.add(self.match(token.RBRACE)): raise ParseException("Expected } to end the field list.") # If everything went fine, register the struct name self.symbols_global[name] = bongtypes.UnknownType() return ast.StructDefinition(toks, name, fields)
def let_lhs(self) -> typing.Tuple[typing.List[str],typing.List[typing.Optional[ast.BongtypeIdentifier]]]: if not self.match(token.LET): raise Exception("Expected let statement.") # Parse variable names and types variable_names, variable_types = self.parse_let_variables() # Check for duplicate names names = set() for name in variable_names: if name in names: raise ParseException(f"Name '{name}' found twice in let statement") names.add(name) # Batch register w/o failing for name in variable_names: self.symbol_tree.register(name, bongtypes.UnknownType()) return variable_names, variable_types
def parse_import(self): toks = TokenList() if not toks.add(self.match(token.IMPORT)): raise Exception("Expected import statement.") if not toks.add(self.match(token.STRING)): raise ParseException("Expected module path as string.") path = self.peek(-1).lexeme if not toks.add(self.match(token.AS)): raise ParseException("Expected as") if not toks.add(self.match(token.IDENTIFIER)): raise ParseException("Expected module alias name.") name = self.peek(-1).lexeme toks.add(self.match(token.SEMICOLON)) if not os.path.isabs(path): path = os.path.join(self.basepath, path) if name in self.symbols_global: raise ParseException(f"Name '{name}' already exists in global symbol table. Import impossible.") self.symbols_global[name] = bongtypes.UnknownType() return ast.Import(toks, name, path)
def resolve_type(self, identifier: ast.BongtypeIdentifier, unit: ast.TranslationUnit, node: ast.BaseNode) -> bongtypes.ValueType: # Arrays are resolved recursively if identifier.num_array_levels > 0: return bongtypes.Array( self.resolve_type( ast.BongtypeIdentifier(identifier.typename, identifier.num_array_levels - 1), unit, node)) # If a module name is given, propagate to the module if len(identifier.typename) > 1: modulename = identifier.typename[0] remaining_typename = identifier.typename[1:] # The following checks are a little bit convoluted to satisfy mypy if (not modulename in unit.symbols_global): raise TypecheckException( f"Module {modulename} not found in" " symbol table.", node) module_sym = unit.symbols_global[modulename] if not isinstance(module_sym, bongtypes.Module): raise TypecheckException( f"Symbol {modulename} is not a module," f" instead it is {module_sym}.", node) modulepath = module_sym.path if not modulepath in self.modules: raise TypecheckException( f"Module {module_sym} not found" " in module dictionary.", node) child_unit = self.modules[modulepath] remaining_typeidentifier = ast.BongtypeIdentifier( remaining_typename, 0) return self.resolve_type(remaining_typeidentifier, child_unit, node) # Otherwise, the typename is the only item in the list typename = identifier.typename[0] # Check missing type if not typename in unit.symbols_global: raise TypecheckException( f"Type {typename} can not be" " resolved.", node) # Already known types can be returned if not unit.symbols_global[typename].sametype(bongtypes.UnknownType()): typedef = unit.symbols_global[typename] # Prevent recursive types if isinstance(typedef, bongtypes.UnfinishedType): raise TypecheckException( f"Type {typename} is recursive." " This is currently not allowed for several reasons.", node) if not isinstance(typedef, bongtypes.Typedef): raise TypecheckException( f"Type {typename} can not be" " resolved.", node) return typedef.value_type # unpack # Everything else (structs) will be determined by determining the inner types if not typename in unit.struct_definitions: raise TypecheckException( f"Type {typename} can not be" " resolved.", node) struct_def = unit.struct_definitions[typename] # For recursion prevention, remember that we have started this type unit.symbols_global[typename] = bongtypes.UnfinishedType() fields: typing.Dict[str, bongtypes.ValueType] = {} for name, type_identifier in struct_def.fields.items(): fields[name] = self.resolve_type(type_identifier, unit, struct_def) value_type = bongtypes.Struct(typename, fields) unit.symbols_global[typename] = bongtypes.Typedef(value_type) return value_type