def parse_identifier_list(self): node = ASTNode(n_type=ASTNodeType.IDENT_LIST_EXP) while self.get_token_type() == TokenType.IDENTIFIER: token = self.pop_token() node.add_child( ASTNode(n_type=ASTNodeType.IDENTIFIER_EXP, n_text=token.get_text(), n_line=token.get_text())) return node
def parse_identifier_expression(self): token = self.pop_token() root = ASTNode(n_type=ASTNodeType.IDENTIFIER_EXP, n_text=token.get_text(), n_line=token.get_line()) if self.get_token_type() == TokenType.L_PAREN: self.pop_token() # remove left paren node = self.parse_index_list() self.pop_token() # remove right paren root.add_child(node) return root
def parse_selection_statement(self): if self.get_token().get_text() not in ("if", 'switch'): return None node = ASTNode(n_type=ASTNodeType.SEL_STMT, n_text=self.get_token().get_text(), n_line=self.get_token().get_line()) start_token = self.pop_token() # firstly a if/switch clause = self.parse_selection_clause(start_token.get_text()) if clause is None: raise EndMissingError(start_token.row, start_token.col) node.add_child(clause) a, b = SEL_CLAUSES_MAP[start_token.get_text()] # then unlimited elseif/case while self.get_token().get_text() == a: clause = self.parse_selection_clause(self.pop_token().get_text()) if clause is None: raise EndMissingError(start_token.row, start_token.col) node.add_child(clause) # finally sometimes a else/otherwise if self.get_token().get_text() == b: clause = self.parse_selection_clause(self.pop_token().get_text()) if clause is None: raise EndMissingError(start_token.row, start_token.col) node.add_child(clause) self.pop_token() # remove 'end' return self.complete_statement(node)
def parse_assignment_expression(self): if self.get_token_type( ) != TokenType.IDENTIFIER or self.get_token_type(1) != TokenType.ASS: return None token = self.pop_token() identifier = ASTNode(n_type=ASTNodeType.IDENTIFIER_EXP, n_text=token.get_text(), n_line=token.get_line()) token = self.pop_token() node = ASTNode(n_type=ASTNodeType.ASS_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[identifier, self.parse_logic_or_expression()]) return node
def parse_statement_list(self, terminators=('None', )): """ when parse program, terminators use it default value, only if no token left will the parsing stop when parse code blocks like selection, iteration, function, terminators will be some specified keywords """ node = ASTNode(n_type=ASTNodeType.STMT_LIST) while str(self.get_token()) not in terminators: if self.get_token() is None: # indicates a invalid code block error, but raise outside the block return None if self.get_token_type() == TokenType.EO_STMT: self.pop_token() continue node.add_child(self.parse_statement()) return node
def parse_iteration_statement(self): if self.get_token().get_text() not in ('while', 'for'): return None node = ASTNode(n_type=ASTNodeType.ITR_STMT, n_text=self.get_token().get_text(), n_line=self.get_token().get_line()) start_token = self.pop_token() clause = self.parse_iteration_clause(start_token.get_text()) if clause is None: raise EndMissingError(start_token.row, start_token.col) node.add_child(clause) self.pop_token() # remove 'end' return self.complete_statement(node)
def parse_level_7_expression(self): """ binary/trinary colon operator (left associate): : """ root = self.parse_level_6_expression() while self.get_token_type() == TokenType.COLON: token = self.pop_token() # ':' node1 = self.parse_level_6_expression() root = ASTNode(n_type=ASTNodeType.CLN_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root, node1]) if self.get_token_type() == TokenType.COLON: self.pop_token() # the second ':' node2 = self.parse_level_6_expression() root.add_child(node2) return root
def parse_bracket_expression(self): token = self.pop_token() # remove left bracket node = ASTNode(n_type=ASTNodeType.ARRAY_LIST_EXP, n_line=token.get_line()) while True: while self.get_token_type() == TokenType.EO_STMT: token = self.pop_token() # EO_STMT if str(token) == "," and node.children and node.children[ -1].get_text() == ',': raise InvalidExpressionError3(token.row, token.col) node.add_child( ASTNode(n_type=ASTNodeType.EO_STMT, n_text=token.get_text(), n_line=token.get_line())) if self.get_token_type() is None: self.pop_token() raise IncompleteStatementError(self.last_token.row, self.last_token.col) node.add_child(self.parse_logic_or_expression()) token = self.get_token() if token.get_type() == TokenType.R_BRACKET: break self.pop_token() # remove right bracket return node
def complete_statement(self, node): if self.get_token_type() != TokenType.EO_STMT: token = self.get_token() raise InvalidExpressionError1(token.row, token.col) token = self.pop_token() node.add_child( ASTNode(n_type=ASTNodeType.EO_STMT, n_text=token.get_text(), n_line=token.row)) return node
def parse_logic_and_expression(self): root = self.parse_level_8_expression() while self.get_token_type() == TokenType.SCA: token = self.pop_token() # '&' child = self.parse_level_8_expression() root = ASTNode(n_type=ASTNodeType.BOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root, child]) return root
def parse_level_4_expression(self, next_level=None): """ unary prefix operators: + - ~ """ if self.get_token_type() in (TokenType.ADD, TokenType.EWN): token = self.pop_token() # unary operator symbol child = self.parse_level_4_expression() return ASTNode(n_type=ASTNodeType.UOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[child]) return self.parse_level_2_expression( ) if next_level is None else next_level()
def parse_level_5_expression(self): """ binary multiplicative operators (left associate): .* ./ .\\ * / \\ """ root = self.parse_level_4_expression() while self.get_token_type() == TokenType.MUL: token = self.pop_token() # multiplicative symbol child = self.parse_level_4_expression() root = ASTNode(n_type=ASTNodeType.BOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root, child]) return root
def parse_level_6_expression(self): """ binary additive operators (left associate): + - """ root = self.parse_level_5_expression() while self.get_token_type() == TokenType.ADD: token = self.pop_token() # additive symbol child = self.parse_level_5_expression() root = ASTNode(n_type=ASTNodeType.BOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root, child]) return root
def parse_level_2_expression(self): """ unary postfix operators: .' ' binary operators (left associate): .^ ^ """ root = self.parse_primary_expression() while self.get_token_type() in (TokenType.TRA, TokenType.POW): if self.get_token_type() == TokenType.TRA: token = self.pop_token() # transpose symbol root = ASTNode(n_type=ASTNodeType.UOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root]) else: token = self.pop_token() # power symbol child = self.parse_level_4_expression( next_level=self.parse_primary_expression) root = ASTNode(n_type=ASTNodeType.BOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root, child]) return root
def parse_level_8_expression(self): """ binary relational operators (left associate): """ root = self.parse_level_7_expression() while self.get_token_type() == TokenType.REL: token = self.pop_token() # relational symbol child = self.parse_level_7_expression() root = ASTNode(n_type=ASTNodeType.BOP_EXP, n_text=token.get_text(), n_line=token.get_line(), children=[root, child]) return root
def parse_iteration_clause(self, clause): node = ASTNode(n_type=ASTNodeType.ITR_CLS, n_text=clause) expression = self.parse_logic_or_expression( ) if clause == 'while' else self.parse_assignment_expression() if expression is None: # exception raised outside return None node.add_child(expression) statement_list = self.parse_statement_list(terminators=('end', )) if statement_list is None: # exception raised outside return None node.add_child(statement_list) return node
def parse_index_list(self): root = ASTNode(n_type=ASTNodeType.INDEX_LIST_EXP) while True: if self.get_token_type() == TokenType.COLON: token = self.pop_token() # COLON root.add_child( ASTNode(n_type=ASTNodeType.CLN_EXP, n_text=token.get_text(), n_line=token.get_line())) else: child = self.parse_logic_or_expression() root.add_child(child) token = self.get_token() if str(token) == ",": # one argument finished, continue to parse another argument self.pop_token() continue elif token.get_type() == TokenType.R_PAREN: break else: raise InvalidExpressionError3(token.row, token.col) return root
def parse_selection_clause(self, clause): node = ASTNode(n_type=ASTNodeType.SEL_ClS, n_text=clause) if clause not in ('else', 'otherwise'): expression = self.parse_logic_or_expression() node.add_child(expression) if clause != 'switch': statement_list = self.parse_statement_list( terminators=SEL_TERMINATOR_MAP[clause]) if statement_list is None: # exception raised outside return None node.add_child(statement_list) else: # remove redundant EO_STMT tokens after switch before the first case while self.get_token_type() == TokenType.EO_STMT: self.pop_token() return node
class Asmgen: _root = ASTNode() _rspoffset = 0 # size of stack frame # temp var in the cal process, key- ASTNode, value-the address of node,can be register or stack _tempvar = dict() # local var, key-var, value-address of node, in the stack _localvar = dict() # registers can be used _registersl = [ "%eax", "%ebx", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" ] _pararegisl = ["%edi", "%esi", "%edx", "%ecx", "%r8d", "%r9d"] _pararegisq = ["%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"] # code cache when scanning function _bodyAsm = "" # string literals stringliteral = [] def __init__(self, node): self._root = node def generate(self): string = "" # head of code block string += "\t.section __TEXT,__text,regular,pure_instructions\n" # function call # for node in : # generate "main" self.visitprogram(self._root) string = self.generateprogram("main", string) # text literals string += "\n# string literals\n\t.section __TEXT,__cstring,cstring_literals\n" for i in range(len(self.stringliteral)): string = string + "L.str" + str( i) + ":\n" + "\t.asciz\t\"" + self.stringliteral[i] + "\"\n" stringliteral = [] return string def generateprogram(self, name, string): # add label string = string + "\n# " + name + "\n\t.glbl _" + name + "\n_" + name + ":\n" string = string + "\n\tpushq\t%rbp\n\tmovq\t%rsp, %rbp\n" # set stack top, 16 bytes align if (self._rspoffset % 16) != 0: self._rspoffset = (self._rspoffset / 16 + 1) * 16 string = string + "\n\t# set stack top\n\tsubq\t$" + str( self._rspoffset) + ", %rsp\n" # save the values in the registers we used self.saveregisters() # function body string = string + "\n\t# process body\n" + self._bodyAsm # recover the values in the registers we used self.restoreregisters() # recover stack top string = string + "\n\t# recover stack top\n\taddq\t$" + str( self._rspoffset) + ", %rsp\n" if name == "main": string = string + "\n\t#return value\n\txorl\t%eax, %eax\n" string = string + "\n\t# end\n\tpopq\t%rbp\n\tretq\n" # reset temp vars self._rspoffset = 0 self._localvar = dict() self._tempvar = dict() self._bodyAsm = "" return string def saveregisters(self): pass def restoreregisters(self): pass def visitprogram(self, root): string = "" for child in root.get_children(): string += self.visitblockstat(child) return string def visitblockstat(self, node): string = "" if node.get_type() == ASTNodeType.EXP_STMT: # 如果是赋值语句 string += self.visitassign(node) elif True: pass return string def visitassign(self, node): identifier = node.get_child(0).get_child(0) varaddress = self.visitassignid(identifier) # 该变量要存储到的寄存器或栈 value = self.visitexpression(node.get_child(0).get_child(1)) self._bodyAsm = self._bodyAsm + "\tmovl\t" + str( value) + ", " + varaddress return varaddress def visitassignid(self, id): self._rspoffset += 4 string = "-" + str(self._rspoffset) + "(%rbp)" self._localvar[id] = string return string def visitexpression(self, node): string = "" # if (node.get_child(0)!=None) and (node.get_child(1)!=None): if node.num_children() >= 2: left = self.visitexpression(node.get_child(0)) right = self.visitexpression(node.get_child(1)) if node.get_text() == "+": string = self.allocforexp(node) if string != left: self._bodyAsm = self._bodyAsm + "\tmovl\t" + left + ", " + string + "\n" self._bodyAsm = self._bodyAsm + "\taddl\t" + right + ", " + string + "\n" elif node.get_text() == "-": string = self.allocforexp(node) self._bodyAsm = self._bodyAsm + "\tmovl\t" + left + ", " + string + "\n" self._bodyAsm = self._bodyAsm + "\tsubl\t" + right + ", " + string + "\n" elif node.get_type() == ASTNodeType.NUMBER_LIT_EXP: string = string + "$" + node.get_text() #此处之后修改添加支持a=b+1,右边支持加变量 return string def allocforexp(self, node): string = "" availregis = self.getavailregis() if availregis != -1: string = self._registersl[availregis] else: self._rspoffset += 4 string = "-" + str(self._rspoffset) + "(%rbp)" self._tempvar[node.get_text()] = string return string def getavailregis(self): res = -1 for i in range(len(self._registersl)): r = self._registersl[i] if r in self._tempvar.values(): res = i break return res
def parse_expression_statement(self): expression = self.parse_expression() if expression is None: return None node = ASTNode(n_type=ASTNodeType.EXP_STMT, children=[expression]) return self.complete_statement(node)
def parse_vector_literal(self): token = self.pop_token() return ASTNode(n_type=ASTNodeType.VECTOR_LIT_EXP, n_text=token.get_text().strip('\''), n_line=token.get_line())
def parse_string_literal(self): token = self.pop_token() return ASTNode(n_type=ASTNodeType.STRING_LIT_EXP, n_text=token.get_text().strip('\"'), n_line=token.get_line())
def parse_number_literal(self): token = self.pop_token() return ASTNode(n_type=ASTNodeType.NUMBER_LIT_EXP, n_text=token.get_text(), n_line=token.get_line())