def __init__(self, tokens): self.lex_token = tokens self.AST = SyntaxThreeNodeTree() self.pos = 0 self.cur_node = self.AST self.const_table = ConstTable() self.name_table = NameTable() self.var_table = VarNameTable() self.bytecode = [] self.gobol_const_index = 0 self.gobol_name_index = 0 self.gobol_var_index = 0 self.code_line_number = 0
class Parser(object): def __init__(self, tokens): self.lex_token = tokens self.AST = SyntaxThreeNodeTree() self.pos = 0 self.cur_node = self.AST self.const_table = ConstTable() self.name_table = NameTable() self.var_table = VarNameTable() self.bytecode = [] self.gobol_const_index = 0 self.gobol_name_index = 0 self.gobol_var_index = 0 self.code_line_number = 0 # def next_token(self): # pass def main(self): while self.lex_token[self.pos].value != "EOF": if self.lex_token[self.pos].type == "VAR": var_node = SyntaxTwoNodeTree() self.var_node() self.cur_node.set_value(var_node) if self.lex_token[self.pos].type == "FUNC": func_tmp_node = SyntaxTwoNodeTree() func_block_node = SyntaxThreeNodeTree() func_code_obj = FuncCodeObject() pre_const_table = self.const_table pre_name_table = self.name_table pre_var_table = self.var_table # self.const_table = func_code_obj.const_table # self.name_table = func_code_obj.name_table # self.var_table = func_code_obj.var_table self.func_node(func_code_obj, func_tmp_node) self.parser_block(func_block_node) func_tmp_node.set_right(func_block_node) self.cur_node.set_value(func_tmp_node) self.const_table = pre_const_table self.name_table = pre_name_table self.var_table = pre_var_table if self.lex_token[self.pos].type == "WHILE": while_node = SyntaxTwoNodeTree() while_block = SyntaxThreeNodeTree() self.while_control_node(while_node) self.parser_block(while_block) while_node.set_right(while_block) self.cur_node.set_value(while_node) # self.cur_node = while_block if self.lex_token[self.pos].type == "IF": if_node = SyntaxTwoNodeTree() if_block = SyntaxThreeNodeTree() self.while_control_node(if_node) self.parser_block(if_block) if_node.set_right(if_block) self.cur_node.set_value(if_node) if self.lex_token[self.pos].type == "ASSIGN": # self.pos += 1 assign_node = SyntaxTwoNodeTree() self.assign_node(assign_node) self.cur_node.set_value(assign_node) if self.lex_token[self.pos].type == "RETURN": self.return_value() self.pos += 1 # print self.gobol_const_index # print self.gobol_name_index def parser_block(self, node): while self.lex_token[self.pos].type != "RB_BRACKET": if self.lex_token[self.pos].type == "VAR": var_node = SyntaxTwoNodeTree() self.var_node() node.set_value(var_node) if self.lex_token[self.pos].type == "WHILE": while_node = SyntaxTwoNodeTree() while_block = SyntaxThreeNodeTree() self.while_control_node(while_node) self.parser_block(while_block) while_node.set_right(while_block) node.set_value(while_node) # self.cur_node = while_block if self.lex_token[self.pos].type == "IF": if_node = SyntaxTwoNodeTree() if_block = SyntaxThreeNodeTree() self.while_control_node(if_node) self.parser_block(if_block) if_node.set_right(if_block) node.set_value(if_node) if self.lex_token[self.pos].type == "ASSIGN": # self.pos += 1 assign_node = SyntaxTwoNodeTree() self.assign_node(assign_node) node.set_value(assign_node) if self.lex_token[self.pos].type == "RETURN": self.return_value() self.pos += 1 def var_node(self): var_node = SyntaxTwoNodeTree() while self.lex_token[self.pos].type != "ASSIGN": self.pos += 1 var_node.setValue(self.lex_token[self.pos - 2].value, self.lex_token[self.pos - 1].value, self.lex_token[self.pos + 1].value) var_node.setExtraInfo("VAR:" + str(self.lex_token[self.pos - 2].value)) ##SymbolTable self.const_table.add(self.lex_token[self.pos + 1].value, self.gobol_const_index) self.name_table.add(self.lex_token[self.pos - 1].value, self.gobol_name_index) ##bytecode self.bytecode.append(str(self.code_line_number) + " LOAD_CONST " + str(self.gobol_const_index)) self.code_line_number += 1 self.gobol_const_index += 1 self.bytecode.append(str(self.code_line_number) + " STROKE_NAME " + str(self.gobol_name_index)) self.gobol_name_index += 1 self.code_line_number += 1 self.pos += 1 def func_node(self, codeObj, node): var_num = 0 func_name = None # func_tree_node = SyntaxTwoNodeTree() while self.lex_token[self.pos].type != "LB_BRACKET": if self.lex_token[self.pos].type == "LL_BRACKET": func_name = self.lex_token[self.pos - 1].value func_type = self.lex_token[self.pos - 2].value while self.lex_token[self.pos].type != "RL_BRACKET": if self.lex_token[self.pos].type == "MAOHAO": ##symbolTable self.var_table.add(self.lex_token[self.pos - 1].value, self.gobol_var_index) # print self.var_table.table[self.lex_token[self.pos - 1].value] self.gobol_var_index += 1 var_num += 1 self.pos += 1 self.pos += 1 ##add the node into AST node.set_left(func_name) ##SymbolTable self.const_table.add(codeObj, self.gobol_const_index) self.name_table.add(func_name, self.gobol_name_index) ##bytecode self.bytecode.append(str(self.code_line_number) + " LOAD_CONST " + str(self.gobol_const_index)) self.code_line_number += 1 self.gobol_const_index += 1 self.bytecode.append(str(self.code_line_number) + " STOKE_NAME " + str(self.gobol_name_index)) self.code_line_number += 1 self.gobol_name_index += 1 self.bytecode.append(str(self.code_line_number) + " MAKE_FUNCTION " + str(var_num)) self.code_line_number += 1 return codeObj def assign_node(self, node): ##使用逆波兰表达式来计算 op_stack = [] res = [] var_tmp = self.lex_token[self.pos - 1].value var_index = -1 node.set_left(var_tmp) # 在符号表中寻找变量 for key in self.name_table.table: if key == var_tmp: var_index = self.name_table.table[key] break if isinstance(key, FuncCodeObject): for i in key.name_table.table: if i == var_tmp: var_index = key.name_table.table[key] break # 运算符优先级 operator_priority = {'>': 0, '<': 0, '>=': 0, '<=': 0, '(': 0, ')': 0, '+': 1, '-': 1, '*': 2, '/': 2, '++': 3, '--': 3, '!': 3} ##判断条件 while self.lex_token[self.pos].type != "FENGHAO": # print op_stack if self.check_var_and_digit(self.lex_token[self.pos].type): res.append(self.lex_token[self.pos].value) elif self.check_compute_operation(self.lex_token[self.pos].type): if len(op_stack) != 0: if operator_priority[self.lex_token[self.pos].value] > operator_priority[ op_stack[len(op_stack) - 1]]: op_stack.append(self.lex_token[self.pos].value) else: tmp = op_stack.pop() while operator_priority[tmp] > operator_priority[self.lex_token[self.pos].value]: res.append(tmp) tmp = op_stack.pop() op_stack.append(tmp) res.append(self.lex_token[self.pos].value) else: op_stack.append(self.lex_token[self.pos].value) elif self.lex_token[self.pos].type == "LL_BRACKET": op_stack.append(self.lex_token[self.pos].value) elif self.lex_token[self.pos].type == "RL_BRACKET": tmp = op_stack.pop() while tmp != "(": res.append(tmp) tmp = op_stack.pop() self.pos += 1 ##最后把栈里的元素弹出 while op_stack != []: res.append(op_stack.pop()) # print res # 计算后缀表达式 result = self.postfixEval(res) # print result node.set_right(result) ##把变量的新的计算的值放进去 update SymbolTabel for key in self.const_table.table: if isinstance(key, FuncCodeObject): for i in key.const_table.table: if key.const_table.table[i] == var_index: i = result key.const_table.table[i] = var_index break else: if self.const_table.table[key] == var_index: key = result self.const_table.table[key] = var_index break ##bytecode self.bytecode.append(str(self.code_line_number) + " LOAD_NAME " + str(var_index)) self.code_line_number += 1 self.bytecode.append(str(self.code_line_number) + " STOKE_NAME " + str(var_index)) self.code_line_number += 1 def while_control_node(self, node): tmp_node = SyntaxTwoNodeTree() while self.lex_token[self.pos].type != "LB_BRACKET": self.pos += 1 if self.lex_token[self.pos].type == "LL_BRACKET": while self.lex_token[self.pos].type != "RL_BRACKET": self.pos += 1 if self.check_operation(self.lex_token[self.pos].type): tmp_node.setExtraInfo("OP:" + str(self.lex_token[self.pos].type)) tmp_node.setValue(self.lex_token[self.pos].value, self.lex_token[self.pos - 1].value, self.lex_token[self.pos + 1].value) ##SymbolTable self.name_table.add(self.lex_token[self.pos - 1].value, self.gobol_name_index) self.gobol_name_index += 1 self.bytecode.append(str(self.code_line_number) + " LOAD_NAME " + str(self.gobol_name_index)) self.code_line_number += 1 if self.lex_token[self.pos + 1].type == "VARIABLE": self.name_table.add(self.lex_token[self.pos - 1].value, self.gobol_name_index) self.gobol_name_index += 1 self.bytecode.append(str(self.code_line_number) + " LOAD_NAME " + str(self.gobol_name_index)) self.code_line_number += 1 elif self.lex_token[self.pos + 1].type == "DIGIT": self.const_table.add(self.lex_token[self.pos + 1].value, self.gobol_const_index) self.gobol_const_index += 1 self.bytecode.append(str(self.code_line_number) + " LOAD_CONST " + str(self.gobol_const_index)) self.code_line_number += 1 ##bytecode self.bytecode.append(str(self.code_line_number) + " COMPARE_OP " + str(op_index[self.lex_token[self.pos].value])) self.code_line_number += 1 self.bytecode.append(str(self.code_line_number) + " JUMP_IF_FALSE " + "0") self.code_line_number += 1 self.bytecode.append(str(self.code_line_number) + " POP_TOP " + "-1") self.code_line_number += 1 self.pos += 1 node.set_left(tmp_node) def if_control_node(self, node): tmp_node = SyntaxTwoNodeTree() print ">>>>>>>" while self.lex_token[self.pos].type != "LB_BRACKET": self.pos += 1 if self.lex_token[self.pos].type == "LL_BRACKET": while self.lex_token[self.pos].type != "RL_BRACKET": self.pos += 1 if self.check_operation(self.lex_token[self.pos].type): tmp_node.setExtraInfo("OP:" + str(self.lex_token[self.pos].type)) tmp_node.setValue(self.lex_token[self.pos].value, self.lex_token[self.pos - 1].value, self.lex_token[self.pos + 1].value) ##SymbolTable self.bytecode.append(str(self.code_line_number) + " SETUP_LOOP " + "0") self.code_line_number += 1 self.name_table.add(self.lex_token[self.pos - 1].value, self.gobol_name_index) self.gobol_name_index += 1 self.bytecode.append(str(self.code_line_number) + " LOAD_NAME " + str(self.gobol_name_index)) self.code_line_number += 1 if self.lex_token[self.pos + 1].type == "VARIABLE": self.name_table.add(self.lex_token[self.pos - 1].value, self.gobol_name_index) self.gobol_name_index += 1 self.bytecode.append(str(self.code_line_number) + " LOAD_NAME " + str(self.gobol_name_index)) self.code_line_number += 1 elif self.lex_token[self.pos + 1].type == "DIGIT": self.const_table.add(self.lex_token[self.pos + 1].value, self.gobol_const_index) self.gobol_const_index += 1 self.bytecode.append(str(self.code_line_number) + " LOAD_CONST " + str(self.gobol_const_index)) self.code_line_number += 1 ##bytecode self.bytecode.append(str(self.code_line_number) + " COMPARE_OP " + str(op_index[self.lex_token[self.pos].value])) self.code_line_number += 1 self.bytecode.append(str(self.code_line_number) + " JUMP_IF_FALSE " + "0") self.code_line_number += 1 self.bytecode.append(str(self.code_line_number) + " POP_TOP " + "-1") self.code_line_number += 1 self.pos += 1 node.set_left(tmp_node) def return_value(self): return_tmp = self.lex_token[self.pos+1].value return_index = -1 for key in self.name_table.table: if key == return_tmp: return_index = self.name_table.table[key] break if isinstance(key, FuncCodeObject): for i in key.name_table.table: if i == return_tmp: return_index = key.name_table.table[key] break ##bytecode self.bytecode.append(str(self.code_line_number)+" LOAD_NAME "+str(return_index)) self.code_line_number += 1 self.bytecode.append(str(self.code_line_number)+" RETURN_VALUE "+"-1") self.code_line_number += 1 def check_operation(self, token): return True if token == "LT" or token == "GT" or token == "GET" or token == "LET" else False def check_compute_operation(self, token): return True if token == "PLUS" or token == "MINUS" or token == "MUL" or token == "DIV" else False def check_var_and_digit(self, token): return True if token == "DIGIT" or token == "VARIABLE" else False def postfixEval(self, tokens): operandStack = Stack() for i in tokens: if i.isdigit(): operandStack.push(int(i)) else: operand2 = operandStack.pop() operand1 = operandStack.pop() result = self.compute(operand1, operand2, i) operandStack.push(result) return operandStack.pop() def compute(self, v1, v2, op): if op == "+": return v1 + v2 elif op == "-": return v1 - v2 elif op == "*": return v1 * v2 elif op == "/": return v1 / v2