def __analyse_assignment_expression(self, ast: Ast): """ <assignment-expression> ::= <identifier><assignment-operator><expression> """ assert_ast_type(ast, AstType.ASSIGNMENT_EXPRESSION) symbol_name = self.__analyse_identifier(ast.first_child()) if self.symbol_table.is_const(symbol_name): raise AssignToConstant(get_pos(ast.first_child())) elif self.symbol_table.is_function(symbol_name): raise FunctionTypeCalculationNotSupported( get_pos(ast.first_child()), symbol_name) symbol_type = self.symbol_table.get_type(symbol_name) symbol_offset = self.symbol_table.get_offset(symbol_name) self.add_inst(PCode.LOADA, *symbol_offset) type_, _ = self.__analyse_expression(ast.children[-1]) self.convert_from_type_to_type(to_type=symbol_type, from_type=type_, from_pos=get_pos(ast.children[-1]), to_pos=get_pos(ast.first_child())) if symbol_type == TokenType.DOUBLE: self.add_inst(PCode.DSTORE) else: self.add_inst(PCode.ISTORE)
def __parse_const_qualifier(self) -> Ast: """ <const-qualifier> ::= 'const' """ ast = Ast(AstType.CONST_QUALIFIER) ast.add_child(self.__assert_token('const', TokenType.CONST)) return ast
def __analyse_function_call(self, ast: Ast) -> Tuple[str, Any]: """ <function-call> ::= <identifier> '(' [<expression-list>] ')' Return: pair of (value_type, value) value can be None if not accessible at compiling time, value_type is `INT` or `DOUBLE` or `VOID` (`CHAR` promoted to `INT`) """ assert_ast_type(ast, AstType.FUNCTION_CALL) func_name = self.__analyse_identifier(ast.first_child()) if func_name in self.symbol_table: if not self.symbol_table.is_function(func_name): raise NotCallingFunction(get_pos(ast.first_child()), func_name) else: raise FunctionNotDefined(get_pos(ast.first_child()), func_name) # prepare parameters, put values on stack-top from left to right params_info = self.elf.function_params_info(func_name) arg_count = 0 if ast.children[2].type == AstType.EXPRESSION_LIST: arg_count = self.__analyse_expression_list(ast.children[2], params_info) param_count = self.elf.function_param_count(func_name) if arg_count != param_count: raise ArgumentsNumberNotMatchException(get_pos(ast.children[1]), param_count, arg_count) func_id = self.elf.function_index(func_name) self.add_inst(PCode.CALL, func_id) return self.elf.function_return_type(func_name), None
def __parse_assignment_operator(self) -> Ast: """ <assignment-operator> ::= '=' """ ast = Ast(AstType.ASSIGNMENT_OPERATOR) ast.add_child(self.__assert_token('=', TokenType.ASSIGN)) return ast
def __parse_cast_expression(self) -> Ast: """ <cast-expression> ::= {'('<type-specifier>')'}<unary-expression> """ ast = Ast(AstType.CAST_EXPRESSION) while True: token = self.__peek_token(suppress_exception=True) if token is None: raise InvalidExpression(self.__prev_token().ed_pos) if token.tok_type != TokenType.LEFT_PARENTHESES: break ast.add_child(self.__assert_token('(', TokenType.LEFT_PARENTHESES)) token = self.__peek_token(suppress_exception=True) if token is None: raise InvalidExpression(self.__prev_token().ed_pos) if token.tok_type not in TokenType.types: self.__unread_token() break ast.add_child(self.__parse_type_specifier()) ast.add_child(self.__assert_token(')', TokenType.RIGHT_PARENTHESES)) ast.add_child(self.__parse_unary_expression()) return ast
def __parse_type_specifier(self) -> Ast: """ <type-specifier> ::= <simple-type-specifier> """ ast = Ast(AstType.TYPE_SPECIFIER) ast.add_child(self.__parse_simple_type_specifier()) return ast
def __parse_str_literal(self) -> Ast: ast = Ast(AstType.STR_LITERAL) token = self.__next_token() if token.tok_type != TokenType.STR_LITERAL: raise ExpectedStrLiteral(token.st_pos) ast.add_child(Ast(AstType.TOKEN, token)) return ast
def __parse_integer_literal(self) -> Ast: ast = Ast(AstType.INTEGER_LITERAL) token = self.__next_token() if token.tok_type != TokenType.INTEGER_LITERAL: raise ExpectedInt32(token.st_pos) ast.add_child(Ast(AstType.TOKEN, token)) return ast
def __parse_expression(self) -> Ast: """ <expression> ::= <additive-expression> """ ast = Ast(AstType.EXPRESSION) ast.add_child(self.__parse_additive_expression()) return ast
def __analyse_condition(self, ast: Ast) -> str: """ <condition> ::= <expression>[<relational-operator><expression>] After return, left the condition-value of the top of stack, value must be of type `INT`, while `0` for `False`, otherwise `True` Return: corresponding `jump` instruction needed, `JE` for `!=`, `JL` for `>=` i.e. the jump instruction that perform jumping when condition is `False` """ assert_ast_type(ast, AstType.CONDITION) l_type, _ = self.__analyse_expression(ast.first_child()) if l_type == TokenType.VOID: raise VoidTypeCalculationNotSupported(get_pos(ast.first_child())) instruction_idx = self.elf.next_inst_idx() if len(ast.children) == 1: if l_type == TokenType.DOUBLE: self.add_inst(PCode.D2I) return PCode.JE else: cmp_op = self.__analyse_relational_operator(ast.children[1]) r_type, _ = self.__analyse_expression(ast.children[-1]) if r_type == TokenType.VOID: raise VoidTypeCalculationNotSupported(get_pos( ast.children[-1])) if l_type == TokenType.CHAR: l_type = TokenType.INT if r_type == TokenType.CHAR: r_type = TokenType.INT if r_type != l_type: # `int` op `double` if l_type == TokenType.INT: l_type = TokenType.DOUBLE self.add_inst(PCode.I2D, at_idx=instruction_idx) # `double` op `int` elif l_type == TokenType.DOUBLE: self.add_inst(PCode.I2D) if l_type == TokenType.DOUBLE: self.add_inst(PCode.DCMP) else: self.add_inst(PCode.ICMP) if cmp_op == TokenType.EQ: return PCode.JNE elif cmp_op == TokenType.NEQ: return PCode.JE elif cmp_op == TokenType.LESS: return PCode.JGE elif cmp_op == TokenType.GREATER: return PCode.JLE elif cmp_op == TokenType.LEQ: return PCode.JG else: assert cmp_op == TokenType.GEQ return PCode.JL
def __parse_identifier(self) -> Ast: ast = Ast(AstType.IDENTIFIER) token = self.__next_token(suppress_exception=True) if token is None: raise ExpectedIdentifier(self.__prev_token().st_pos) if token.tok_type != TokenType.IDENTIFIER: raise ExpectedIdentifier(token.st_pos) ast.add_child(Ast(AstType.TOKEN, token)) return ast
def __parse_multiplicative_operator(self) -> Ast: """ <multiplicative-operator> ::= '*' | '/' """ ast = Ast(AstType.MULTIPLICATIVE_OPERATOR) token = self.__next_token() if token.tok_type not in [TokenType.MUL, TokenType.DIV]: raise ExpectedSymbol(token.st_pos, '* or /') ast.add_child(Ast(AstType.TOKEN, token)) return ast
def __parse_additive_operator(self) -> Ast: """ <additive-operator> ::= '+' | '-' """ ast = Ast(AstType.ADDITIVE_OPERATOR) token = self.__next_token() if token.tok_type not in [TokenType.ADD, TokenType.SUB]: raise ExpectedSymbol(token.st_pos, '+ or -') ast.add_child(Ast(AstType.TOKEN, token)) return ast
def __parse_relational_operator(self) -> Ast: """ <relational-operator> ::= '<' | '<=' | '>' | '>=' | '!=' | '==' """ ast = Ast(AstType.RELATIONAL_OPERATOR) token = self.__next_token() if token.tok_type not in TokenType.relations: raise ExpectedSymbol(token.st_pos, "'<' | '<=' | '>' | '>=' | '!=' | '=='") ast.add_child(Ast(AstType.TOKEN, token)) return ast
def __parse_simple_type_specifier(self) -> Ast: """ <simple-type-specifier> ::= 'void'|'int'|'char'|'double' """ ast = Ast(AstType.SIMPLE_TYPE_SPECIFIER) token = self.__next_token() if token.tok_type in TokenType.types: ast.add_child(Ast(AstType.TOKEN, token)) else: raise UnknownVariableType(token.st_pos, token.literal) return ast
def __analyse_primary_expression(self, ast: Ast) -> Tuple[str, Any]: """ <primary-expression> ::= '('<expression>')' |<identifier> |<integer-literal> |<char-literal> |<floating-literal> |<function-call> After return, if value_type is not `VOID` left the value be on the top of current stack Return: pair of (value_type, value) value can be None if not accessible at compiling time, value_type is `VOID` iff expression is consisted of single void function call, `CHAR` iff expression is consisted of single char-literal or char-variable, `INT` or `DOUBLE` (`CHAR` promoted to `INT`) for any other case """ assert_ast_type(ast, AstType.PRIMARY_EXPRESSION) child_type = ast.first_child().type if child_type == AstType.TOKEN: return self.__analyse_expression(ast.children[1]) elif child_type == AstType.IDENTIFIER: symbol_name = self.__analyse_identifier(ast.first_child()) if symbol_name not in self.symbol_table: raise UndefinedSymbol(get_pos(ast.first_child()), symbol_name) if self.symbol_table.is_function(symbol_name): raise FunctionTypeCalculationNotSupported( get_pos(ast.first_child()), symbol_name) symbol_offset = self.symbol_table.get_offset(symbol_name) symbol_type = self.symbol_table.get_type(symbol_name) self.add_inst(PCode.LOADA, *symbol_offset) if symbol_type in [TokenType.INT, TokenType.CHAR]: self.add_inst(PCode.ILOAD) elif symbol_type == TokenType.DOUBLE: self.add_inst(PCode.DLOAD) return self.symbol_table.get_type(symbol_name), None elif child_type == AstType.INTEGER_LITERAL: value = self.__analyse_integer_literal(ast.first_child()) self.add_inst(PCode.IPUSH, value) return TokenType.INT, value elif child_type == AstType.CHAR_LITERAL: value = self.__analyse_char_literal(ast.first_child()) value = ord(value) self.add_inst(PCode.BIPUSH, value) return TokenType.CHAR, value elif child_type == AstType.FLOAT_LITERAL: value = self.__analyse_float_literal(ast.first_child()) idx = self.elf.add_constant(Constant.DOUBLE, value) self.add_inst(PCode.LOADC, idx) return TokenType.DOUBLE, value else: assert child_type == AstType.FUNCTION_CALL, 'Unexpected error, invalid primary_expr' return self.__analyse_function_call(ast.first_child())
def __analyse_assignment_operator(ast: Ast) -> str: """ <assignment-operator> ::= '=' Return type of op, which is one of `TokenType` member """ assert_ast_type(ast, AstType.ASSIGNMENT_OPERATOR) return ast.first_child().token.tok_type
def __analyse_type_specifier(self, ast: Ast) -> str: """ <type-specifier> ::= <simple-type-specifier> Return `TokenType` of corresponding type """ assert_ast_type(ast, AstType.TYPE_SPECIFIER) return self.__analyse_simple_type_specifier(ast.first_child())
def __analyse_relational_operator(ast: Ast) -> str: """ <relational-operator> ::= '<' | '<=' | '>' | '>=' | '!=' | '==' Return type of op, which is one of `TokenType` member """ assert_ast_type(ast, AstType.RELATIONAL_OPERATOR) return ast.first_child().token.tok_type
def __analyse_multiplicative_operator(ast: Ast) -> str: """ <multiplicative-operator> ::= '*' | '/' Return type of op, which is one of `TokenType` member """ assert_ast_type(ast, AstType.MULTIPLICATIVE_OPERATOR) return ast.first_child().token.tok_type
def __analyse_additive_operator(ast: Ast) -> str: """ <additive-operator> ::= '+' | '-' Return type of op, which is one of `TokenType` member """ assert_ast_type(ast, AstType.ADDITIVE_OPERATOR) return ast.first_child().token.tok_type
def __analyse_unary_expression(self, ast: Ast) -> Tuple[str, Any]: """ <unary-expression> ::= [<unary-operator>]<primary-expression> After return, if value_type is not `VOID` left the value be on the top of current stack Return: pair of (value_type, value) value can be None if not accessible at compiling time, value_type is `VOID` iff expression is consisted of single void function call, `CHAR` iff expression is consisted of single char-literal or char-variable, `INT` or `DOUBLE` (`CHAR` promoted to `INT`) for any other case """ assert_ast_type(ast, AstType.UNARY_EXPRESSION) primary_expr = ast.children[-1] type_, _ = self.__analyse_primary_expression(primary_expr) if len(ast.children) == 2: if type_ == TokenType.VOID: raise VoidTypeCalculationNotSupported(get_pos(primary_expr)) # `char` must be converted to `int` before any calculation if type_ == TokenType.CHAR: type_ = TokenType.INT op = self.__analyse_unary_operator(ast.first_child()) if op == TokenType.SUB: if type_ == TokenType.DOUBLE: self.add_inst(PCode.DNEG) else: # INT or CHAR self.add_inst(PCode.INEG) else: assert op == TokenType.ADD return type_, None
def __assert_token(self, symbol: str, tok_type: TokenType) -> Ast: token = self.__next_token(suppress_exception=True) if token is None: raise ExpectedSymbol(self.__prev_token().ed_pos, symbol) if token.tok_type != tok_type: raise ExpectedSymbol(self.__prev_token().st_pos, symbol) return Ast(AstType.TOKEN, token)
def __analyse_unary_operator(ast: Ast): """ <unary-operator> ::= '+' | '-' Return type of op, which is one of `TokenType` member """ assert_ast_type(ast, AstType.UNARY_OPERATOR) return ast.first_child().token.tok_type
def __analyse_loop_statement(self, ast: Ast) -> dict: """ <loop-statement> ::= 'while' '(' <condition> ')' <statement> |'do' <statement> 'while' '(' <condition> ')' ';' |'for' '('<for-init-statement> [<condition>]';' [<for-update-expression>]')' <statement> Return statement statics, e.g. how many `return`s, `while`s """ assert_ast_type(ast, AstType.LOOP_STATEMENT) # NOTE: only need to complete `while` loop first_token = ast.first_child().token.tok_type if first_token == TokenType.WHILE: condition = ast.children[2] statement = ast.children[4] instruction_index_of_condition = self.elf.next_inst_idx() jmp_instruction = self.__analyse_condition(condition) jmp_instruction_index = self.elf.next_inst_idx() self.add_inst(jmp_instruction, 0) statements_info = self.__analyse_statement(statement) self.add_inst(PCode.JMP, instruction_index_of_condition) instruction_index_after_while = self.elf.next_inst_idx() offset = instruction_index_after_while self.elf.update_instruction_at(jmp_instruction_index, offset) return statements_info elif first_token == TokenType.DO: raise NotSupportedFeature(get_pos(ast), 'do') else: raise NotSupportedFeature(get_pos(ast), 'for')
def __analyse_jump_statement(self, ast: Ast) -> dict: """ <jump-statement> ::= 'break' ';' |'continue' ';' |<return-statement> Return statement statics, e.g. how many `return`s, `while`s """ assert_ast_type(ast, AstType.JUMP_STATEMENT) # NOTE: base part only contains <return-statement> child_type = ast.first_child().type if child_type == Token: raise NotSupportedFeature(get_pos(ast), 'break and continue') else: self.__analyse_return_statement(ast.first_child()) return {'return': 1}
def __parse_assignment_expression(self) -> Ast: """ <assignment-expression> ::= <identifier><assignment-operator><expression> """ ast = Ast(AstType.ASSIGNMENT_EXPRESSION) ast.add_child(self.__parse_identifier()) ast.add_child(self.__parse_assignment_operator()) ast.add_child(self.__parse_expression()) return ast
def __parse_function_call(self) -> Ast: """ <function-call> ::= <identifier> '(' [<expression-list>] ')' """ ast = Ast(AstType.FUNCTION_CALL) ast.add_child(self.__parse_identifier()) ast.add_child(self.__assert_token('(', TokenType.LEFT_PARENTHESES)) token = self.__peek_token(suppress_exception=True) if token is None: raise ExpectedSymbol(self.__prev_token().ed_pos, ')') if token.tok_type != TokenType.RIGHT_PARENTHESES: ast.add_child(self.__parse_expression_list()) ast.add_child(self.__assert_token(')', TokenType.RIGHT_PARENTHESES)) return ast
def __parse_function_definition(self) -> Ast: """ <function-definition> ::= <type-specifier><identifier><parameter-clause><compound-statement> """ ast = Ast(AstType.FUNCTION_DEFINITION) start_pos = self.__current_pos() try: ast.add_child(self.__parse_type_specifier()) ast.add_child(self.__parse_identifier()) ast.add_child(self.__parse_parameter_clause()) ast.add_child(self.__parse_compound_statement()) except TokenIndexOutOfRange as e: print(e, file=sys.stderr) raise InvalidFunctionDefinition(start_pos) return ast
def __analyse_simple_type_specifier(ast: Ast): """ <simple-type-specifier> ::= 'void'|'int'|'char'|'double' """ assert_ast_type(ast, AstType.SIMPLE_TYPE_SPECIFIER) type_ = ast.first_child().token.tok_type assert type_ in TokenType.types, 'Type error, it should be detected before analysing' return type_