def interpret_FunctionDeclaration(code: str, node: Node, symbol_table: SymbolTable): if symbol_table_symbol_exists(symbol_table, node.id_): generate_error_message( node, symbol_table.symbols[node.id_], code, "Runtime Error, found duplicate function identifier", True) return symbol_table_set(symbol_table, node.id_, node)
def interpret_Identifier(code: str, node: Identifier, symbol_table: SymbolTable): value = symbol_table_get(symbol_table, node.name_) if value == None: generate_error_message(node, code, f"{node.name_} is not defined", True) return symbol_table_add_return_symbol(symbol_table, value)
def parse_function_call_parameters_loop( characters: str, tokens : List[Token] ) -> Tuple[List['Node'], List['Token']]: """Function is used to parse the parameters when parsing a function call Note: Follows the following grammar rules: 1. A function call statement starts with a TokenType.CALL and ends with a TokenType.CALL 2. A function call parameter must be separated by a '|' Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A list of nodes representing the parameters - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ node, tokens = parse_expr.parse_expression(characters, tokens) head, *tail = tokens if head.tokentype_ == TokenTypes.CALL: return [node], tokens if head.tokentype_ != TokenTypes.SEPARATOR: generate_error_message(head, characters, "Missing '|' between multiple parameters", True) nodes, tokens = parse_function_call_parameters_loop(characters, tail) return [node] + nodes, tokens
def parse_if_statement_test( characters: str, tokens: List['Token']) -> Tuple['BinaryExpression', List['Token']]: """ Functies parses a test for an if-statement Note: The following grammar rules apply: 1. An if statement must be followed by a binary expression 2. The last if statement test must be followed by an '––>' 3. An '––>' must be followed by a newline Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A BinaryExpression node - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ test, tokens = parse_expr.parse_expression(characters, tokens) head, *tail = tokens if head.tokentype_ != TokenTypes.INDENTATION: generate_error_message(head, characters, "Expected '––>' after if statement", True) head, *tail = tail if head.tokentype_ != TokenTypes.NEW_LINE: generate_error_message(head, characters, "Expected new line after if statement", True) return test, tail
def parse_function_params( characters: str, tokens: List['Token']) -> Tuple[List['Node'], List['Token']]: """ Function tries to parse function parameters Note: Follows the following sequence of grammar rules: 1. A function body starts when the TokenType.INDENTATION is found 1a. This token must be followed by a TokenType.NEW_LINE 2. If a newline is found before a TokenType.INDENTATION, raise an exception 3. Check for a TokenType.SEPARATOR, raise an exception if not found 4. Check for a TokenType.PARAMETER, raise an exception if not found 5. Check for an identifier, raise an exception if not found 6. Gerenare Identifier Node and recurse Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A List of identifier nodes which make up the function parameters - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ head, *tail = tokens if head.tokentype_ == TokenTypes.INDENTATION: head, *tail = tail if head.tokentype_ != TokenTypes.NEW_LINE: generate_error_message( head, characters, "Expected newline '––>' after function declaration", True) return [], tail if head.tokentype_ == TokenTypes.NEW_LINE: generate_error_message(head, characters, "Expected '––>' after function declaration", True) if head.tokentype_ != TokenTypes.SEPARATOR: generate_error_message( head, characters, "Expected '|' or '––>' after function parameter declaration", True) head, *tail = tail if head.tokentype_ != TokenTypes.PARAMETER: generate_error_message( head, characters, "Expected 'parameter declaration' after function separator", True) head, *tail = tail if head.tokentype_ != TokenTypes.IDENTIFIER: generate_error_message( head, characters, "Expected 'identifier' after function parameter declaration", True) param = Identifier(loc_=head.loc_, range_=head.range_, name_=head.value_) params, tokens = parse_function_params(characters, tail) return [param] + params, tokens
def parse_operand( characters: str, tokens: List['Token']) -> Optional[Tuple['Node', List['Token']]]: """ Function parses an operand for an expression Rules: - If the operand is either a TokenTypes.PLUS or TokenTypes.MINUS, a UnaryExpression node is created - If the operand is either a TokenTypes.INT or TokenTypes.Float, a Literal node is created - If the operand is a TokenTypes.CALL, a FunctionDeclaration node is created - If the operand is a TokenTypes.IDENTIFIER, an Identifier node is created - If the operand is a TokenTypes.LEFT_PARENTHESIES, a new BinaryExpression node is created - After the creation of a new BinaryExpression node, a TokenTypes.RIGHT_PARENTHESIES is required Args: tokens: The tokens that need to be parsed Returns: If no error occurs: - A Node containing the found operand - The leftover tokens that stil need to be parsed If no operand was found: returns None """ head, *tail = tokens if head.tokentype_ in (TokenTypes.PLUS, TokenTypes.MINUS): node, tail = parse_operand(characters, tail) loc_ = {"start": head.loc_["start"], "end": node.loc_["end"]} range_ = [head.range_[0], node.range_[1]] return UnaryExpression(loc_=loc_, range_=range_, operator_=head.tokentype_, argument_=node), tail if head.tokentype_ in (TokenTypes.INT, TokenTypes.FLOAT): return Literal(loc_=head.loc_, range_=head.range_, value_=int(head.value_), raw_=head.value_), tail if head.tokentype_ == TokenTypes.CALL: result, tokens = pass_func.parse_function_call( characters, tokens) #TODO: characters meegeven return result, tokens if head.tokentype_ == TokenTypes.IDENTIFIER: return Identifier(loc_=head.loc_, range_=head.range_, name_=head.value_), tail if head.tokentype_ == TokenTypes.LEFT_PARENTHESIES: node, tokens = parse_expression(characters, tail) head, *tail = tokens if head.tokentype_ != TokenTypes.RIGHT_PARENTHESIES: generate_error_message(head, characters, "Missing right parenthesies", True) return node, tail generate_error_message(head, characters, "Expected expression, literal, or function call", True)
def parse_variable_declaration( characters: str, tokens: List['Token']) -> Tuple['VariableDeclaration', List['Token']]: """Function parses a variable declaration Rules: A variable declaration is valid in the following sequence: 1. TokenTypes.VARIABLE_DECLARATION 2. TokenTypes.IDENTIFIER 3. TokenTypes.IS 4. Any Token that indicates an expression: - TokenTypes.CALL - TokenTypes.IDENTIFIER - TokenTypes.MINUS - TokenTypes.PLUS, - TokenTypes.INT - TokenTypes.FLOAT - TokenTypes.LEFT_PARENTHESIES Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: Returns a tuple with a variable declaration and a list of tokens that still need to be parsed If a grammar error occured: Raises a Syntax Error with a message of where the error occured """ variable_declaration, identifier, *tail = tokens if identifier.tokentype_ != TokenTypes.IDENTIFIER: return generate_error_message( identifier, characters, "Expected identifier after variable declaration", True) head, *tail = tail if head.tokentype_ != TokenTypes.IS: return generate_error_message(head, characters, "Expected '='", True) head, *tail = tail node, tokens = parse_expr.parse_expression(characters, [head] + tail) loc_ = { "start": variable_declaration.loc_["start"], "end": node.loc_["end"] } range_ = [variable_declaration.range_[0], node.range_[1]] node = VariableDeclaration(loc_=loc_, range_=range_, id_=identifier.value_, init_=node) return node, tokens
def parse( characters: str, tokens: List['Token'], termination_tokens: List['TokenTypes']=[], ) -> List['Node']: """Function creates an AST from the provided tokens. It raises error messages when it encounters illegal grammar Args: characters : Characters that are being lexed, parsed and interpreted tokens : List of tokens to create an AST from termination_tokens : A List of termination tokens. If the parser encounters one of these tokens OR an EOF token, stop parsing Returns: If no errors occured: - An AST in the form of a program node - An EOF Token If a grammar error occured: Raises a Syntax Error with a message of where the error occured """ if len(tokens) == 0: return [], [] head, *tail = tokens if head.tokentype_ == TokenTypes.IF: breakpoint if head.tokentype_ in (TokenTypes.EOF, *termination_tokens) : return [], tokens elif head.tokentype_ in (TokenTypes.NEW_LINE, TokenTypes.TAB) : return parse(characters, tail, termination_tokens) elif head.tokentype_ == TokenTypes.VARIABLE_DECLARATION : node, tokens = parse_var_decl.parse_variable_declaration(characters, tokens) elif head.tokentype_ == TokenTypes.FUNCTION_DECLARATION : node, tokens = parse_func_decl.parse_function_declaration(characters, tokens) elif head.tokentype_ == TokenTypes.IF : node, tokens = parse_if_stmt.parse_if_statement(characters, tokens) elif head.tokentype_ == TokenTypes.RETURN : node, tokens = parse_func_decl.parse_return_statement(characters, tokens) elif head.tokentype_ == TokenTypes.CALL : node, tokens = parse_func_call.parse_function_call(characters, tokens) else : return generate_error_message(head, characters, "Invalid Syntax", True) nodes, tokens = parse(characters, tokens, termination_tokens) return [node] + nodes, tokens
def parse_return_statement( characters: str, tokens: List['Token']) -> Tuple['ReturnStatement', List['Token']]: """Function tries to parse a return statement Note: Follows the following grammar rules: 1. A returnstatement must be followed by an expression Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A ReturnStatement node - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ return_statement_start, head, *tail = tokens node, tokens = parse_expr.parse_expression(characters, [head] + tail) return_statement_end, *tail = tokens if return_statement_end.tokentype_ != TokenTypes.RETURN: generate_error_message(return_statement_end, characters, "Expected closing '⮐' after return statement", True) loc_ = { "start": return_statement_start.loc_["start"], "end": return_statement_end.loc_["end"] } range_ = [return_statement_start.range_[0], return_statement_end.range_[1]] node = ReturnStatement(loc_=loc_, range_=range_, argument_=node) return node, tail
def parse_function_call( characters: str, tokens: List['Token'] ) -> Tuple['CallExpression', List['Token']]: """Function tries to parse a function call statement Note: Follows the following grammar rules: 1. A function call statement starts with a TokenType.CALL and ends with a TokenType.CALL 2. A function call parameter must be separated by a '|' Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A CallExpression node representing the function call - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ call_start, identifier, *tail = tokens if identifier.tokentype_ not in (TokenTypes.PRINT, TokenTypes.IDENTIFIER): generate_error_message(identifier, characters, "Expected identifier after call statement", True) callee = Identifier(loc_=identifier.loc_, range_=identifier.range_, name_=identifier.value_) arguments, tokens = parse_function_call_parameters(characters, tail) call_end, *tail = tokens loc_ = {"start": call_start.loc_["start"], "end": call_end.loc_["end"]} range_ = [call_start.range_[0], call_end.range_[1]] node = CallExpression(loc_=loc_, range_=range_, arguments_=arguments, callee_=callee) return node, tail
def parse_function_declaration( characters: str, tokens: List['Token']) -> Tuple['FunctionDeclaration', List['Token']]: """ Function tries to parse a function declaration Note: Follows the following grammar rules: 1. A function declaration needs to be followed by a TokenType.IDENTIFIER 2. After the identifier, get function parameters (if there are any) 3. Finally, parse the function body just like a normal piece of code, look for a TokenType.FUNCTION_DECLARATION_END token to stop parsing at the end of the function Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A FunctionDeclaration node - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ function_declaration_start, identifier, *tail = tokens if identifier.tokentype_ != TokenTypes.IDENTIFIER: generate_error_message( identifier, characters, "Expected identifier after function declaration", True) function_parameters, tokens = parse_function_params(characters, tail) function_body, tokens = parser.parse( characters, tokens, termination_tokens=[TokenTypes.FUNCTION_DECLARATION_END]) function_declaration_end, *tail = tokens if len(function_body) == 0: generate_error_message(identifier, characters, "Function body cannot be empty", True) loc_ = { "start": function_body[0].loc_["start"], "end": function_body[-1].loc_["end"] } range_ = [function_body[0].range_[0], function_body[-1].range_[1]] function_body = BlockStatement( loc_=loc_, range_=range_, body_=function_body) # Convert the function body to a blockstatement loc_ = { "start": function_declaration_start.loc_["start"], "end": function_declaration_end.loc_["end"] } range_ = [ function_declaration_start.range_[0], function_declaration_end.range_[1] ] node = FunctionDeclaration(loc_=loc_, range_=range_, id_=identifier.value_, params_=function_parameters, body_=function_body) return node, tail
def lex(characters: str, search_match_f: Callable[[str, List[Tuple[str, TokenTypes]], int], Optional[Tuple[re.match, str]]], token_expressions: List[Tuple[str, TokenTypes]], line_no: int = 1, index: int = 0, total_index: int = 0) -> List[Token]: """Function converts the provided characters into tokens. Uses a provided function to search for matches in characters Args: characters : The characters that need to be lexed. search_match_f : A function to match the provided characters with the provided tokens token_expressions : Try to find a match from this index in characters. Can be seen as characters[total_index:]. line_no : The current line number that is being lexed, default=1. index : The current line index that is being lexed, default=0. total_index : The total index in characters that is being lexed, default=0 Returns: If no errors occured: Returns a list of lexed tokens If no match was found: Raises a Syntax Error with a message of where the error occured """ if len( characters ) == total_index: # If the end characters has been reached return an EOF token return [ Token(loc_={ "start": { "line": line_no, "index": index }, "end": { "line": line_no, "index": index + 3 } }, range_=[total_index, total_index + 3], value_="\00", tokentype_=TokenTypes.EOF) ] match, tokentype = search_match_f(characters, token_expressions, total_index) if not match: generate_error_message(line_no, index, characters, "Invalid Syntax", True) matched_text = match.group(0) offset = match.end(0) - match.start(0) token_location = { "start": { "line": line_no, "index": index }, "end": { "line": line_no, "index": index + offset } } token_range = [match.start(0), match.end(0)] token = Token(loc_=token_location, range_=token_range, value_=matched_text, tokentype_=tokentype) if tokentype == TokenTypes.NEW_LINE: line_no += 1 index = 0 offset = 0 return [token] + lex(characters, search_match_f, token_expressions, line_no, index + offset, match.end(0))
def parse_if_statement( characters: str, tokens: List['Token']) -> Tuple['IfStatement', List['Token']]: """ Function parses an if statement Note: The following grammar rules apply: 1. An if statement starts with a TokenType.IF token 2. An if statement, elif statement and else statement must each end with a TokenType.IF_STATEMENT_END 3. An if statement must contain a test (binaryexpression) 4. Parse an if statement body just like normal code 4a. Look out for a TokenType.IF_STATEMENT_END to stop parsing the function body 5. If a TokenTypes.ELSE_IF token is found, recurse this function Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - An IfStatement node - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ valid_termination_characters = [ TokenTypes.IF_STATEMENT_END, TokenTypes.ELSE, TokenTypes.ELSE_IF ] if_statement_start, *tail = tokens test, tokens = parse_if_statement_test(characters, tail) body, tokens = parser.parse( characters, tokens, termination_tokens=valid_termination_characters) termination_token, *tail = tokens if len(body) == 0: generate_error_message(termination_token, characters, "If statement body cannot be empty", True) if termination_token.tokentype_ not in valid_termination_characters: generate_error_message(termination_token, characters, "Expected '¿', '⁈', or '⁇' after if statement", True) if termination_token.tokentype_ == TokenTypes.ELSE_IF: alternative, tokens = parse_if_statement(characters, tokens) loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]} range_ = [body[0].range_[0], body[-1].range_[1]] consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body) loc_ = { "start": if_statement_start.loc_["start"], "end": alternative.loc_["end"] } range_ = [if_statement_start.range_[0], alternative.range_[1]] return IfStatement(loc_=loc_, range_=range_, test_=test, consequent_=consequent_, alternate_=alternative), tokens elif termination_token.tokentype_ == TokenTypes.ELSE: head, *tail = tail if head.tokentype_ != TokenTypes.INDENTATION: generate_error_message( head, characters, "Expected '––>' statement after else block", True) alternative, tokens = parser.parse( characters, tail, termination_tokens=[TokenTypes.IF_STATEMENT_END]) if_statement_end, *tail = tokens if if_statement_end.tokentype_ != TokenTypes.IF_STATEMENT_END: generate_error_message(if_statement_end, characters, "Expected '¿' after if statement end", True) if len(alternative) == 0: generate_error_message(if_statement_end, characters, "Else statement body cannot be empty", True) loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]} range_ = [body[0].range_[0], body[-1].range_[1]] consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body) loc_ = { "start": alternative[0].loc_["start"], "end": alternative[-1].loc_["end"] } range_ = [alternative[0].range_[0], alternative[-1].range_[1]] alternative = BlockStatement(loc_=loc_, range_=range_, body_=alternative) loc_ = { "start": if_statement_start.loc_["start"], "end": if_statement_end.loc_["end"] } range_ = [if_statement_start.range_[0], if_statement_end.range_[1]] return IfStatement(loc_=loc_, range_=range_, test_=test, consequent_=consequent_, alternate_=alternative), tail loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]} range_ = [body[0].range_[0], body[-1].range_[1]] consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body) loc_ = { "start": if_statement_start.loc_["start"], "end": termination_token.loc_["end"] } range_ = [if_statement_start.range_[0], termination_token.range_[1]] return IfStatement(loc_=loc_, range_=range_, test_=test, consequent_=consequent_, alternate_=[]), tail