def p_while_stmt(p): "while_stmt : WHILE LEFTPAREN cond RIGHTPAREN decl_block_var stmt_list ENDWHILE" global currentScope, currentBlock currentScope = currentScope.getParent() blockScope = SymbolTable("BLOCK " + str(currentBlock), currentScope) lastBlock = "BLOCK " + str(currentBlock) currentScope.addChild(blockScope) currentBlock += 1 #currentScope = blockScope if len(blockList.getValues()) > 0: for var in blockList.getValues(): blockScope.addVariable(var) blockList.clear() tempRep = IRRep() tempLabel = irlist.nextLabel() tempNode1 = IRNode("LABEL", tempLabel, "", "", None, None) tempRep.addToEnd(tempNode1) tempRep.addToEnd(p[3][0].first) tempRep.addToEnd(p[6][0].first) tempNode2 = IRNode("JUMP", tempLabel, "", "", None, None) tempRep.addToEnd(tempNode2) tempNode3 = IRNode("LABEL", p[3][1], "", "", None, None) tempRep.addToEnd(tempNode3) p[0] = [tempRep]
def p_if_stmt(p): "if_stmt : IF LEFTPAREN cond RIGHTPAREN decl_block_var stmt_list else_part ENDIF" global currentScope, currentBlock, lastBlock currentScope = currentScope.getParent() blockScope = SymbolTable("BLOCK " + str(currentBlock), currentScope) lastBlock = "BLOCK " + str(currentBlock) currentScope.addChild(blockScope) currentBlock += 1 # currentScope = blockScope # currentBlock if len(blockList.getValues()) > 0: for var in blockList.getValues(): blockScope.addVariable(var) blockList.clear() tempRep = IRRep() tempRep.addToEnd(p[3][0].first) tempRep.addToEnd(p[6][0].first) if(p[7] is not None): skipLabel = irlist.nextLabel() tempNode1 = IRNode("JUMP", skipLabel, "", "", None, None) tempNode2 = IRNode("LABEL", p[3][1], "", "", None, None) tempRep.addToEnd(tempNode1) tempRep.addToEnd(tempNode2) tempRep.addToEnd(p[7][0].first) tempNode3 = IRNode("LABEL", skipLabel, "", "", None, None) tempRep.addToEnd(tempNode3) else: tempNode = IRNode("LABEL", p[3][1], "", "", None, None) tempRep.addToEnd(tempNode) p[0] = [tempRep]
class Assembler: """Handles file I/O""" def __init__(self, files): self.files = files self.symtable = SymbolTable() def hasFiles(self): """Are there more files to process?""" return len(self.files) > 0 def dummyPass(self): """Do a soft pass to determine the location for labels (Xxx)""" filename = self.files[0] p = Parser(filename) current_address = 0 while p.hasMoreCommands(): p.advance() cmd_type = p.commandType() if cmd_type is A_COMMAND or cmd_type is C_COMMAND: current_address += 1 elif cmd_type is L_COMMAND: self.symtable.addEntry(p.symbol(), current_address) def processFile(self): """Translates the next file in the queue""" filename = self.files.pop(0) p = Parser(filename) if filename.endswith('.asm'): fileout = filename.replace('.asm', '.hack') else: fileout = filename + '.hack' f = open(fileout, 'w') print("Translating %s" % (filename)) self.current_address = 16 while p.hasMoreCommands(): p.advance() if p.commandType() is A_COMMAND: address = self._getAddress(p.symbol()) instruction = '{0:016b}'.format(int(address)) elif p.commandType() is C_COMMAND: # dest=comp;jump instruction = ''.join(['111', Code.comp(p.comp()), Code.dest(p.dest()), Code.jump(p.jump())]) else: # L_COMMAND (Xxx) continue #print("Symbol: %s. Instruction: %s" % (p.symbol(), instruction)) print(instruction, end='\n', file=f) f.close() def _getAddress(self, symbol): if symbol.isdigit(): return symbol else: if not self.symtable.contains(symbol): self.symtable.addEntry(symbol, self.current_address) self.current_address += 1 return self.symtable.GetAddress(symbol)
def collect(self): table = SymbolTable() offset = 1 global label for decl in self.decls: if type(decl) == M_var: table.insert(decl.name,decl.internalType, "VAR", offset) offset += 1 elif type(decl) == M_fun: table.insert(decl.name,decl.returnType, "FUN", ("fn" + str(label),decl.params)) label += 1 return table
def subroutineCall(tokens): global pos,fpw # something about subroutine call, if:(1) a(...), then a is a method; # (2) a.b(...) in such case, if a is a variable, is a method, else if function or constructor if tokens[pos+1] == '.': # subroutine call, may be a method or a function # if symbol is of kind field, static ot var, then this will be a method kind = SymbolTable.kindOf(tokens[pos]) # if kind is a variable if kind != 'none': index = SymbolTable.indexOf(tokens[pos]) type_ = SymbolTable.typeOf(tokens[pos]) # funcName = className.funcName funcName = type_ + tokens[pos+1]+tokens[pos+2] pos += 4 # parameterList # push arguments onto stack if kind == 'var': kind = 'local' elif kind == 'field': kind = 'this' fpw.write('push '+kind+' '+str(index)+'\n') argNum = CompileExpressionList(tokens) # call funcName argNum fpw.write('call '+funcName+' '+str(argNum+1)+'\n') # call function pos += 1 # if kind is a classname else: funcName = tokens[pos] + tokens[pos+1] + tokens[pos+2] # 'className.funcName' pos += 4 # parameterList # push arguments onto stack argNum = CompileExpressionList(tokens) # call funcName argNum fpw.write('call '+funcName+' '+str(argNum)+'\n') # ')' pos += 1 # not have '.', then it is a method elif tokens[pos+1] == '(': funcName = tokens[pos] # push this on the stack fpw.write('push pointer 0\n') # same with 'push pointer 0' pos += 2 argNum = CompileExpressionList(tokens) # ')' fpw.write('call '+className+'.'+funcName+' '+str(argNum+1)+'\n') pos += 1
def LookUp(symbol): #Is symbol in hash table? returns True or False. Display("Lookup", _list) pointer = SymbolTable.getPointer(symbol) if pointer != None: return True return False
def __init__(self, file): self.lex = Lex(file) self.symbols = SymbolTable() self.vm = VMWriter() self.openout(file) self.compile_class() self.closeout()
def visit_CompoundInstruction(self, compInstr): if __debug__: print "visit_CompoundInstruction in line",compInstr.lineno prevTable = self.symbolTable self.symbolTable = SymbolTable(prevTable,"compInstr("+str(compInstr.lineno)+") scope") self.analyzeDeclarations(compInstr.decl) self.analyzeInstrBlock(compInstr.instr) self.symbolTable = prevTable return 'void'
def CompileSubroutine(tokens): # subroutine declaration begins global pos,className # clear previous subroutine symbol table, and start a new one SymbolTable.startSubroutine() # constructor void funcName(parameterList){...} funcKind = tokens[pos] # get function kind: method, function, or constructor pos += 1 funcType = tokens[pos] pos += 1 funcName = className + '.' + tokens[pos] # set subroutine name if funcKind == 'method': # firstly put 'this' into symbol table SymbolTable.Define('this',funcType,'argument') pos += 2 # parameter list of this function CompileParameterList(tokens) pos += 1 # begin subroutineBody '{' pos += 1 # count number of local variables var_num = 0 if tokens[pos] == 'var': # begin VarDec* while (tokens[pos] == 'var'): var_num += CompileVarDec(tokens) # function funcName var_num fpw.write('function '+ funcName+' '+str(var_num)+'\n') if funcKind == 'method': # each method should start with these two VM conmands fpw.write('push argument 0\n'+'pop pointer 0\n') elif funcKind == 'constructor': # get the number of fields in this class and alloc memory for this instance size = SymbolTable.field_index fpw.write('push constant '+str(size)+'\n') fpw.write('call Memory.alloc 1\n') fpw.write('pop pointer 0\n') if (tokens[pos] in ['do','let','return','if','while']): # begin statements CompileStatements(tokens) # '}' subroutine body ends pos += 1 # subroutine declaration ends
def visit_FunctionDef(self, node, table): symbol = FunctionSymbol(node.rettype, node.name, node.fmlparams) if not table.put(symbol): self.handle_error(node.pos, 'Symbol ' + symbol.name + ' is already defined') #else: #print 'DEBUG: Added function symbol ' + symbol.name + ' with return type ' + symbol.rettype subtable = SymbolTable(table, symbol) for fmlparam in node.fmlparams.args: type = fmlparam.type id = fmlparam.id symbol = VariableSymbol(type, id) if not subtable.put(symbol): self.handle_error(node.pos, 'Parameter ' + id + ' already declared') node.body.accept(self, subtable)
def visit_Program(self, program): if __debug__: print "visit_Program in line",program.lineno self.symbolTable = SymbolTable(None,"program scope") self.analyzeDeclarations(program.decl) self.analyzeFunDeclarations(program.fundef) self.analyzeInstrBlock(program.instr) #if __debug__: print "symbolTable: ", self.symbolTable.currentScope return self.errors is 0
def p_else_part(p): '''else_part : ELSE decl_block_var stmt_list | empty''' global currentScope, currentBlock, lastBlock if len(p) > 2: currentScope = currentScope.getParent() blockScope = SymbolTable("BLOCK " + str(currentBlock), currentScope) lastBlock = "BLOCK " + str(currentBlock) currentScope.addChild(blockScope) currentBlock += 1 # currentScope = blockScope if len(blockList.getValues()) > 0: for var in blockList.getValues(): blockScope.addVariable(var) blockList.clear() if(p[1] is not None): p[0] = [p[3][0]] else: p[0] = None
def __init__(self, filename): i = filename.find('.') if i != -1: self.filename = filename[:i] else: self.filename = filename self.p = Parser(filename) self.buff = [] self.ops = [] self.table = SymbolTable()
def visit_FunctionDefinition(self, funDef): if __debug__: print "visit_FunctionDefinition in line",funDef.lineno prevTable = self.symbolTable self.symbolTable = SymbolTable(prevTable, "fundef("+funDef.ident+") scope") if funDef.args is not None: for arg in funDef.args: self.symbolTable.put(arg.ident, VariableSymbol(arg.ident, arg.typ, arg)) self.analyzeInstrBlock(funDef.instr) self.symbolTable = prevTable return funDef.typ
def p_func_declaration(p): '''func_declaration : FUNCTION any_type IDENTIFIER LEFTPAREN param_decl_list RIGHTPAREN BEGIN func_body END ''' global currentScope, lastFunc currentScope = currentScope.getParent() funcScope = SymbolTable(p[3], currentScope) currentScope.insertChild(funcScope, lastFunc) if not lastBlock == "": lastFunc = lastBlock else: lastFunc = p[3] # currentScope = funcScope params = parameterList.getValues() if len(params) > 0: for p in params: funcScope.addVariable(p) parameterList.clear() if len(funcList.getValues()) > 0: for v in funcList.getValues(): funcScope.addVariable(v) funcList.clear()
def main(): """Drives the entire translation process.""" """Preprocess the file""" pp = Preprocessor() pp.remove_white_spaces_comments() pp.write_temp_file() """First pass - adds labels to the symbol table.""" parser = Parser(sys.argv[1] + '.tmp') symbol_table = SymbolTable() pc = -1 while parser.has_more_commands(): parser.advance() command_type = parser.command_type() if command_type == 'A_COMMAND' or command_type == 'C_COMMAND': pc += 1 elif command_type == 'L_COMMAND': label = parser.symbol() symbol_table.addEntry(label, pc + 1) """Second pass - handles variables names and writes the *.hack file.""" ram_address = 16 parser = Parser(sys.argv[1] + '.tmp') code = Code() file_name = parser.get_file_name() hack_file = open(file_name + '.hack', 'w') while parser.has_more_commands(): parser.advance() command_type = parser.command_type() if command_type == 'A_COMMAND': a_symbol = parser.symbol() if a_symbol[0] in '0123456789': a_symbol_binary = code.convert_to_binary(a_symbol) hack_file.write('0' + a_symbol_binary + '\n') else: if symbol_table.contains(a_symbol) is False: symbol_table.addEntry(a_symbol, ram_address) ram_address += 1 address = symbol_table.GetAddress(a_symbol) address_binary = code.convert_to_binary(address) hack_file.write('0' + address_binary + '\n') elif command_type == 'C_COMMAND': comp = code.comp(parser.comp()) dest = code.dest(parser.dest()) jump = code.jump(parser.jump()) hack_file.write('111' + comp + dest + jump + '\n') hack_file.close()
def functiondec(op): varList = [] if int(op[3]) > 1: print("Error: cannot have several out parameters") exit() if (op[1] in funcSt): print("Error: " + op[1] + " is defined") exit() inPar = [] for i in range(int(op[2])): global currentLine currentLine += 1 temp = code[currentLine].split(' ') if temp[2] in varList: print("Error: parameter cannot be the same name") exit() inPar.append(createSymbol(code[currentLine])) varList.append(temp[2]) currentLine += 1 if code[currentLine][0:3] == "OPT": temp = code[currentLine].split(' ') if temp[2] in varList: print("Error: parameter cannot be the same name") exit() outPar = createSymbol(code[currentLine]) varList.append(temp[2]) currentLine += 1 else: outPar = None par = SymbolTable.parList(inPar,outPar) startLine = currentLine funcSt[op[1]] = SymbolTable.Function(par,startLine) while code[currentLine][0:3] != "SRT": temp = code[currentLine].split(' ') if(temp[0] == "LBL" and temp[1] == op[1] + "END"): break currentLine += 1
class Syntax(): def __init__(self,lexicalAnalysis): self.Lexical = lexicalAnalysis self.SymbolTable = SymbolTable() self.FirstPass = None self.Next = False def run(self,firstPass =True): if(firstPass): self.Lexical.Run() else: print "reset" self.Lexical.reset(); self.FirstPass = firstPass self.Compilation_Unit() def HandleException(self, exception): traceback.print_exc() print str(exception) os._exit(0) def expression(self): if(self.Lexical.getToken().lexem == "("): try: self.Lexical.GetNextToken() self.expression() except Exception as e: raise e self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ")"): raise Exception(TossError(self,")")) self.Lexical.GetNextToken() try: self.expressionz() except Exception as e: self.HandleException(e) elif(self.Lexical.getToken().lexem == "true" or self.Lexical.getToken().lexem == "false" or self.Lexical.getToken().lexem == "null" or self.Lexical.getToken().myType == "number" or self.Lexical.getToken().myType == "char" ): try: self.Lexical.GetNextToken() ret = self.expressionz() except Exception as e: self.HandleException(e) elif(self.Lexical.getToken().myType == "Identifier"): try: self.Lexical.GetNextToken() ret = self.fn_arr_member() if (ret != None): self.Lexical.GetNextToken() ret = self.member_refz() if (ret != None): self.Lexical.GetNextToken() ret = self.expressionz() except Exception as e: self.HandleException(e) else: raise Exception() def expressionz(self): token = self.Lexical.getToken() if(token.lexem == '='): self.Lexical.GetNextToken() self.assignment_expression() elif(token.lexem == "&&" or token.lexem == '||'): self.Lexical.GetNextToken() self.expression() elif(token.lexem == "==" or token.lexem == "!=" or token.lexem == '<=' or token.lexem == '>=' or token.lexem == '<' or token.lexem == '>'): self.Lexical.GetNextToken() print "EJG"+ self.Lexical.getToken().lexem self.expression() elif(token.myType == 'math'): self.Lexical.GetNextToken() self.expression() return 1 def fn_arr_member(self): if(self.Lexical.getToken().lexem == "("): self.Lexical.GetNextToken() try: self.argument_list() except Exception as e: if e.msg != "NoneExistant": self.HandleException(e) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ")"): raise Exception(TossError(self,")")); return 1 elif( self.Lexical.getToken().lexem == "["): self.Lexical.GetNextToken() try: self.expression() except Exception as e: self.HandleException(e) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "]"): raise Exception(TossError(self,"]")) return 1 else: return None def member_refz(self): if(self.Lexical.getToken().lexem != "."): return self.Lexical.GetNextToken() if(self.Lexical.getToken().myType != "Identifier"): raise Exception(TossError(self,"Identifier")) try: self.Lexical.GetNextToken() ret = self.fn_arr_member() if( ret != None): self.Lexical.GetNextToken() ret = self.member_refz() if(ret != None): self.Lexical.GetNextToken() except Exception as e: self.HandleException(e) return 1 def assignment_expression(self): if self.Lexical.getToken().lexem == 'this': self.Lexical.GetNextToken() return elif self.Lexical.getToken().lexem == 'new': self.Lexical.GetNextToken() try: self.isType() self.Lexical.GetNextToken() self.new_declaration() except Exception as e: self.HandleException(e) elif self.Lexical.getToken().lexem == 'atoi': self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != '('): self.HandleException(TossError(self,'(')) self.Lexical.GetNextToken() try: self.expression() except Exception as e: self.HandleException(e) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ')'): self.HandleException(TossError(self,')')) elif(self.Lexical.getToken().lexem == 'itoa'): self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != '('): self.HandleException(TossError(self,'(')) self.Lexical.GetNextToken() try: self.expression() except Exception as e: self.HandleException(e) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ')'): self.HandleException(TossError(self,')')) else: try: self.expression() except Exception as e: raise e def ClassNameExists(self): return self.SymbolTable.Exists(self.Lexical.getToken().lexem) def new_declaration(self): if(self.Lexical.getToken().lexem == "("): self.Lexical.GetNextToken() try: self.argument_list() except Exception as e: self.HandleException(e) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ")"): raise Exception(TossError(self,")")) if(self.Lexical.getToken().lexem == "["): self.Lexical.GetNextToken() try: self.expression() except Exception as e: self.HandleException(e) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "]"): raise Exception(TossError(self,"]")) def argument_list(self): try: self.expression() except Exception as e: raise e while(self.Lexical.Peek.lexem != ","): self.Lexical.GetNextToken() try: self.Lexical.GetNextToken() self.expression() except Exception as e: raise Exception(TossError(self,"Exception")) def Compilation_Unit(self): ret = -1 while ret != 0: try: ret =self.Class_declaration() except Exception as e: self.HandleException(e) print "Here" if( self.Lexical.getToken().lexem != "void"): raise Exception(TossError(self,"void")) else: self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem == "main"): if(not self.SymbolTable.Exists("main")): symid = self.SymbolTable.getSymID("method") self.SymbolTable.addNode(SymbolNode(self.SymbolTable.getScope(),symid,"main","method",{"Return Type":"void"})) self.SymbolTable.startScope("main") self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "("): raise Exception(TossError(self, '(')) else: self.Lexical.GetNextToken() if self.Lexical.getToken().lexem != ')': raise Exception(TossError(self,')')) else: self.Lexical.GetNextToken() try: self.method_body() except Exception as e: self.HandleException(e) self.SymbolTable.endScope() else: raise Exception(TossError(self,"Main")) def Class_declaration(self): if(self.Lexical.getToken().lexem != "class"): return 0 try: self.Lexical.GetNextToken() ret =self.class_name() if(ret == 0): raise Exception(TossError(self,"ClassName")) self.SymbolTable.startScope(self.Lexical.getToken().lexem) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "{"): raise Exception(TossError(self,"{")) except Exception as e: self.HandleException(e) self.Lexical.GetNextToken() count =0 while(True): try: self.class_member_declaration() if(self.Lexical.getToken().lexem == "}"and self.Lexical.Peek().lexem == "void"): break self.Lexical.GetNextToken() except Exception as e: break if(self.Lexical.getToken().lexem != "}"): raise Exception(TossError(self,"}")) else: self.Lexical.GetNextToken() self.SymbolTable.endScope() def class_name(self): if(self.Lexical.getToken().myType != "Identifier"): return 0 if(self.FirstPass): if (self.SymbolTable.Exists(self.Lexical.getToken().lexem)): return 1 else: symId = self.SymbolTable.getSymID("Class") node = self.Lexical.getToken() self.SymbolTable.addNode(SymbolNode(self.SymbolTable.getScope(),symId,node.lexem,"Class",None)) return 1 else: if (self.SymbolTable.Exists(self.Lexical.getToken().lexem)): return 1 else: return 0 def class_member_declaration(self): try: self.isModifier() except: try: self.constructor_declaration() return except Exception as e: raise Exception() modifier = self.Lexical.getToken().lexem self.Lexical.GetNextToken() method = False try: myType =self.isType() self.Lexical.GetNextToken() if(self.Lexical.getToken().myType != 'Identifier'): raise Exception(TossError(self,'Identifier')) if(not self.SymbolTable.Exists(self.Lexical.getToken().lexem)): if(self.Lexical.Peek().lexem != "("): symId = self.SymbolTable.getSymID("Variable") node = self.Lexical.getToken() self.SymbolTable.addNode(SymbolNode(self.SymbolTable.getScope(),symId,node.lexem,"ivar", {"Type":myType, "modifier":modifier})) else: symId = self.SymbolTable.getSymID("method") method = True node = self.Lexical.getToken() self.SymbolTable.addNode(SymbolNode(self.SymbolTable.getScope(),symId,node.lexem,"method", {"return Type":myType, "modifier":modifier})) self.SymbolTable.startScope(node.lexem) self.Lexical.GetNextToken() self.field_declaration() if(method): self.SymbolTable.endScope() except Exception as e: self.HandleException(e) def isType(self): token = self.Lexical.getToken() if(self.FirstPass): if token.myType == 'type': return token.lexem else: ret = self.ClassNameExists() if(ret == 0): raise Exception("TESTING") else: pass return None else: if token.myType == 'type': return token.lexem else: ret = self.ClassNameExists() if ret == 0 : raise Exception(TossError(self,"Type")) def ClassNameExists(self): if(self.FirstPass): return 1 else: if( self.SymbolTable.ClassExists(self.Lexical.getToken().lexem)): return 1 else: return 0 def isModifier(self): token = self.Lexical.getToken() print token.lexem if( token.lexem == "public"): return elif(token.lexem == 'private'): return else: raise Exception() def field_declaration(self): token = self.Lexical.getToken() if(token.lexem == '('): self.Lexical.GetNextToken() if(self.parameter_list()): self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ')'): raise Exception(TossError(self,')')) try: self.Lexical.GetNextToken() self.method_body() except Exception as e: self.HandleException(e) else: if token.lexem == '[': self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ']'): raise Exception(TossError(self,']')) self.Lexical.GetNextToken() elif(token.lexem == '='): try: self.Lexical.GetNextToken() self.assignment_expression() self.Lexical.GetNextToken() except Exception as e: self.HandleException(e) if(self.Lexical.getToken().lexem != ';'): raise Exception(TossError(self,';')) def constructor_declaration(self): try: ret = self.SymbolTable.ClassExists(self.Lexical.getToken().lexem) if(not ret): raise Exception() if(not self.FirstPass): symId = self.SymbolTable.getSymID("method") token = self.Lexical.getToken() try: node = SymbolNode(self.SymbolTable.getScope(),symId,token.lexem,"method",None) self.SymbolTable.addNode(node) except Exception as e: print str(e) self.SymbolTable.startScope(self.Lexical.getToken().lexem) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != '('): raise Exception(TossError(self,'(')) self.Lexical.GetNextToken() if(self.parameter_list() ): self.Lexical.GetNextToken() if (self.Lexical.getToken().lexem != ')'): raise Exception(TossError(self,')')) self.Lexical.GetNextToken() self.method_body() print self.Lexical.getToken().lexem self.SymbolTable.endScope() except Exception as e: raise e def method_body(self): if(self.Lexical.getToken().lexem != '{'): raise Exception(TossError(self,'{')) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem == "}"): return while(True): try: self.variable_declaration() self.Lexical.GetNextToken() except Exception as e: break while(True): try: self.statement() print "self = %s" % self.Lexical.getToken() self.Lexical.GetNextToken() print "self2 = %s" % self.Lexical.getToken() except: print "self3 = %s" % self.Lexical.getToken() break if(self.Lexical.getToken().lexem != '}'): raise Exception(TossError(self,'}')) def variable_declaration(self): try: mytype = self.isType() except Exception as e: raise e if(self.Lexical.Peek().myType != 'Identifier'): raise self.handleException(Exception(TossError(self,'Identifier'))) self.Lexical.GetNextToken() if(self.Lexical.getToken().myType != 'Identifier'): self.handleException(Exception(TossError(self,'Identifier'))) if( not self.SymbolTable.Exists(self.Lexical.getToken().lexem)): symid = self.SymbolTable.getSymID("local") self.SymbolTable.addNode(SymbolNode(self.SymbolTable.getScope(),symid,self.Lexical.getToken().lexem,"lvar",{"type":mytype})) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem == '['): self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ']'): self.HandleException(Exception(TossError(self,']'))) self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem == '='): self.Lexical.GetNextToken() try: self.assignment_expression() except Exception as e: self.HandleException(e) if(self.Lexical.getToken().lexem != ';'): self.HandleException(Exception(TossError(self,';'))) def statement(self): if(self.Next ): print self.Lexical.getToken().lexem if(self.Lexical.getToken().lexem == "{"): self.Lexical.GetNextToken() try: self.statement() except Exception as e: raise e self.Lexical.GetNextToken() if self.Lexical.getToken().lexem != "}": raise Exception(TossError(self,"}")) elif(self.Lexical.getToken().lexem == "if"): self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "("): raise Exception(TossError(self,"(")) self.Lexical.GetNextToken() try: self.expression() except Exception as e: raise e if self.Lexical.getToken().lexem != ")": raise Exception(TossError(self,")")) try: self.Lexical.GetNextToken() self.statement() except Exception as e: raise e self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem == "else"): try: self.Lexical.GetNextToken() self.statement() except Exception as e: raise e return elif(self.Lexical.getToken().lexem == "while"): self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "("): raise Exception(TossError(self,"(")) self.Lexical.GetNextToken() try: self.expression() except Exception as e: raise e if(self.Lexical.getToken().lexem != ")"): raise Exception(TossError(self,")")) self.Lexical.GetNextToken() try: self.statement() except Exception as e: raise e elif (self.Lexical.getToken().lexem == "return"): self.Lexical.GetNextToken() try: self.expression() except Exception as e: raise e if(self.Lexical.getToken().lexem != ";"): raise Exception(TossError(self,";")) elif (self.Lexical.getToken().lexem == "cout"): self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != "<<"): raise Exception(TossError(self,"<<")) self.Lexical.GetNextToken() try: self.expression() except Exception as e: raise e if(self.Lexical.getToken().lexem != ";"): raise Exception(TossError(self,";")) elif self.Lexical.getToken().lexem == "cin": self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ">>"): raise Exception(TossError(self,">>")) self.Lexical.GetNextToken() try: self.expression() except Exception as e: raise e else: print 'Here' try: print "selfExpre = %s" % self.Lexical.getToken() ret =self.expression() print "sele:w1 = %s" % self.Lexical.getToken() if(self.Lexical.Peek().lexem == "sum"): print "h123:ere" self.Next = True except Exception as e: print "Raising" raise e def parameter_list(self): if(self.Lexical.getToken().lexem == ")"): return False while(True): try: self.parameter() self.Lexical.GetNextToken() if(self.Lexical.getToken().lexem != ","): break self.Lexical.GetNextToken() except Exception as e: self.HandleException(e) return False def parameter(self): try: myType = self.isType() except: raise Exception(TossError(self,"Type")) self.Lexical.GetNextToken() if(self.Lexical.getToken().myType != "Identifier" ): raise Exception(TossError(self,"Identifier")) if(not self.SymbolTable.Exists(self.Lexical.getToken().lexem)): symid = self.SymbolTable.getSymID("parameter") node = self.Lexical.getToken() self.SymbolTable.addNode(SymbolNode(self.SymbolTable.getScope(),symid,node.lexem,"parameter",{"type": myType})) def printTable(self): print self.SymbolTable.getScope() print self.SymbolTable
import SymbolTable f = open('test.asm') code = f.readlines() f.close() lines = [] for line in code: lines.append(line[:-1]) symbols = SymbolTable.SymbolTable()
class Parser(object): def __init__(self, file): self.lex = Lex(file) self.symbols = SymbolTable() self.vm = VMWriter() self.openout(file) self.compile_class() self.closeout() # VMWriter support def openout(self, path): outdir = os.path.join(os.path.dirname(path), 'output') file = os.path.join(outdir, os.path.basename(path)) try: os.mkdir(outdir) except OSError as e: pass self.vm.openout(file) def closeout(self): self.vm.closeout() def vm_function_name(self): return self._cur_class+'.'+self._cur_subroutine def vm_push_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_push(segments[kind], index) def vm_pop_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_pop(segments[kind], index) # Routines to advance the token def _require(self, tok, val=None): lextok, lexval = self._advance() if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval: raise ParserError(self._require_failed_msg(tok, val)) else: return lexval def _require_failed_msg(self, tok, val): if val == None: val = tokens[tok] return 'Expected '+val def _advance(self): return self.lex.advance() def _is_token(self, tok, val=None): lextok, lexval = self.lex.peek() return val == None and lextok == tok or (lextok, lexval) == (tok, val) def _is_keyword(self, *keywords): lextok, lexval = self.lex.peek() return lextok == T_KEYWORD and lexval in keywords def _is_sym(self, symbols): lextok, lexval = self.lex.peek() return lextok == T_SYM and lexval in symbols # Parser and compile Jack code # class: 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): self._require(T_KEYWORD, KW_CLASS) self.compile_class_name() self._require(T_SYM, '{') while self._is_class_var_dec(): self.compile_class_var_dec() while self._is_subroutine(): self.compile_subroutine() self._require(T_SYM, '}') # className: identifier def compile_class_name(self): self._cur_class = self.compile_var_name() # Class names don't have to go into the symbol table # Variable declarations def _is_class_var_dec(self): return self._is_keyword(KW_STATIC, KW_FIELD) # classVarDec: {'static'|'field'} type varName (',' varName)* ';' def compile_class_var_dec(self): tok, kwd = self._advance() # static | field self._compile_dec(kwd_to_kind[kwd]) # type varName (',' varName)* ';' def _compile_dec(self, kind): type = self.compile_type(); name = self.compile_var_name() self.symbols.define(name, type, kind) while self._is_sym(','): self._advance() name = self.compile_var_name() self.symbols.define(name, type, kind) self._require(T_SYM, ';') def _is_type(self): return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN) # 'void' | type def compile_void_or_type(self): if self._is_keyword(KW_VOID): return self._advance()[1] else: return self.compile_type() # type: 'int' | 'char' | 'boolean' | className def compile_type(self): if self._is_type(): return self._advance()[1] else: raise ParserError(self._require_failed_msg(*self.lex.peek())) def _is_var_name(self): return self._is_token(T_ID) # varName: identifier def compile_var_name(self): return self._require(T_ID) # Subroutine declarations def _is_subroutine(self): return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) # subroutineDec: ('constructor'|'function'|'method') ('void'|type) # subroutineName '(' parameterList ')' subroutineBody def compile_subroutine(self): tok, kwd = self._advance() type = self.compile_void_or_type() self.compile_subroutine_name() self.symbols.start_subroutine() if kwd == KW_METHOD: self.symbols.define('this', self._cur_class, SK_ARG) self._require(T_SYM, '(') self.compile_parameter_list() self._require(T_SYM, ')') self.compile_subroutine_body(kwd) # subroutineName: identifier def compile_subroutine_name(self): self._cur_subroutine = self.compile_var_name() # subroutine names don't have to go in the symbol table # parameterList: (parameter (',' parameter)*)? def compile_parameter_list(self): if self._is_type(): self.compile_parameter() while self._is_sym(','): self._advance() self.compile_parameter() # parameter: type varName def compile_parameter(self): if self._is_type(): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, SK_ARG) # subroutineBody: '{' varDec* statements '}' def compile_subroutine_body(self, kwd): self._require(T_SYM, '{') while self._is_var_dec(): self.compile_var_dec() self.write_func_decl(kwd) self.compile_statements() self._require(T_SYM, '}') def write_func_decl(self, kwd): self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR)) self.load_this_ptr(kwd) def load_this_ptr(self, kwd): if kwd == KW_METHOD: self.vm.push_arg(0) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object elif kwd == KW_CONSTRUCTOR: self.vm.push_const(self.symbols.var_count(SK_FIELD)) # object size self.vm.write_call('Memory.alloc', 1) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object def _is_var_dec(self): return self._is_keyword(KW_VAR) # varDec: 'var' type varName (',' varName)* ';' def compile_var_dec(self): self._require(T_KEYWORD, KW_VAR) return self._compile_dec(SK_VAR) # Statements # statement: statement* def compile_statements(self): while self._is_statement(): self._compile_statement() def _is_statement(self): return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return() # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement def _compile_statement(self): if self._is_let(): self.compile_let() elif self._is_if(): self.compile_if() elif self._is_while(): self.compile_while() elif self._is_do(): self.compile_do() elif self._is_return(): self.compile_return() def _is_let(self): return self._is_keyword(KW_LET) # letStatement: 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self._require(T_KEYWORD, KW_LET) name = self.compile_var_name() subscript = self._is_sym('[') if subscript: self.compile_base_plus_index(name) # calculate base+index self._require(T_SYM, '=') self.compile_expression() # calculate expression to assign self._require(T_SYM, ';') if subscript: self.pop_array_element() # *(base+index) = expr else: self.vm_pop_variable(name) # pop value directly into variable # ('[' expression ']')? def compile_base_plus_index(self, name): self.vm_push_variable(name) # push array ptr onto stack self._advance() self.compile_expression() # push index onto stack self._require(T_SYM, ']') self.vm.write_vm_cmd('add') # base+index - leave on the stack for later def pop_array_element(self): self.vm.pop_temp(TEMP_ARRAY) # Pop expr value to temp register self.vm.pop_that_ptr() # Pop base+index into 'that' register self.vm.push_temp(TEMP_ARRAY) # Push expr back onto stack self.vm.pop_that() # Pop value into *(base+index) def _is_if(self): return self._is_keyword(KW_IF) # ifStatement: 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? def compile_if(self): self._require(T_KEYWORD, KW_IF) end_label = self.new_label() self._compile_cond_expression_statements(end_label) # VM code for condition and if statements if self._is_keyword(KW_ELSE): self._advance() self._require(T_SYM, '{') self.compile_statements() # VM code for else statements self._require(T_SYM, "}") self.vm.write_label(end_label) # label end_label def _is_while(self): return self._is_keyword(KW_WHILE) # whileStatement: 'while' '(' expression ')' '{' statements '}' def compile_while(self): self._require(T_KEYWORD, KW_WHILE) top_label = self.new_label() self.vm.write_label(top_label) # label top_label self._compile_cond_expression_statements(top_label) # VM code for condition and while statements # '(' expression ')' '{' statements '}' def _compile_cond_expression_statements(self, label): self._require(T_SYM, '(') self.compile_expression() self._require(T_SYM, ')') self.vm.write_vm_cmd('not') # ~(cond) notif_label = self.new_label() self.vm.write_if(notif_label) # if-goto notif_label self._require(T_SYM, '{') self.compile_statements() # VM code for if statements self._require(T_SYM, '}') self.vm.write_goto(label) # goto label self.vm.write_label(notif_label)# label notif_label label_num = 0 def new_label(self): self.label_num += 1 return 'label'+str(self.label_num) def _is_do(self): return self._is_keyword(KW_DO) # do_statement: 'do' subroutineCall ';' def compile_do(self): self._require(T_KEYWORD, KW_DO) name = self._require(T_ID) self.compile_subroutine_call(name) # VM code for subroutine call self.vm.pop_temp(TEMP_RETURN) # Pop return value and discard self._require(T_SYM, ';') def _is_return(self): return self._is_keyword(KW_RETURN) # returnStatement: 'return' expression? ';' def compile_return(self): self._require(T_KEYWORD, KW_RETURN) if not self._is_sym(';'): self.compile_expression() # VM code for return expression if any else: self.vm.push_const(0) # push 0 if not returning a value self._require(T_SYM, ';') self.vm.write_return() # return # Expressions # expression: term (op term)* def compile_expression(self): self.compile_term() # Doesn't handle normal order of operations - just left to right for now while self._is_op(): op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_cmds[op[1]]) # op def _is_term(self): return self._is_const() or self._is_var_name() or self._is_sym('(') or self._is_unary_op() # term: integerConstant | stringConstant | keywordConstant | varName # | varName '[' expression ']' | subroutineCall | '(' expression ')' # | unaryOp term def compile_term(self): if self._is_const(): self.compile_const() elif self._is_sym('('): self._advance() self.compile_expression() # VM code to evaluate expression self._require(T_SYM, ')') elif self._is_unary_op(): tok, op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_unary_cmds[op]) # op elif self._is_var_name(): tok, name = self._advance() if self._is_sym('['): self.compile_array_subscript(name) # VM code for array subscript elif self._is_sym('(.'): self.compile_subroutine_call(name) # VM code for subroutine call else: self.vm_push_variable(name) # push variable on stack def _is_const(self): return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant() def _is_keyword_constant(self): return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) def _is_op(self): return self._is_sym('+-*/&|<>=') def _is_unary_op(self): return self._is_sym('-~') # integerConstant | stringConstant | keywordConstant def compile_const(self): tok, val = self._advance() if tok == T_NUM: self.vm.push_const(val) # push constant val elif tok == T_STR: self.write_string_const_init(val) # initialize string & push str addr elif tok == T_KEYWORD: self.compile_kwd_const(val) # push TRUE, FALSE, NULL etc. def write_string_const_init(self, val): self.vm.push_const(len(val)) self.vm.write_call('String.new', 1) # String.new(len(str)) for c in val: self.vm.push_const(ord(c)) self.vm.write_call('String.appendChar', 2) # String.appendChar(nextchar) # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kwd_const(self, kwd): if kwd == KW_THIS: self.vm.push_this_ptr() elif kwd == KW_TRUE: self.vm.push_const(1) self.vm.write_vm_cmd('neg') else: # KW_FALSE or KW_NULL self.vm.push_const(0) # '[' expression ']' def compile_array_subscript(self, name): self.vm_push_variable(name) # push array ptr onto stack self._require(T_SYM, '[') self.compile_expression() # push index onto stack self._require(T_SYM, ']') self.vm.write_vm_cmd('add') # base+index self.vm.pop_that_ptr() # pop into 'that' ptr self.vm.push_that() # push *(base+index) onto stack # subroutineCall: subroutineName '(' expressionList ')' # | (className | varName) '.' subroutineName '(' expressionList ')' def compile_subroutine_call(self, name): (type, kind, index) = self.symbols.lookup(name) if self._is_sym('.'): num_args, name = self.compile_dotted_subroutine_call(name, type) else: num_args = 1 self.vm.push_this_ptr() name = self._cur_class+'.'+name self._require(T_SYM, '(') num_args += self.compile_expr_list() # VM code to push arguments self._require(T_SYM, ')') self.vm.write_call(name, num_args) # call name num_args def compile_dotted_subroutine_call(self, name, type): num_args = 0 obj_name = name self._advance() name = self.compile_var_name() if self._is_builtin_type(type): # e.g. int.func(123) not allowed ParserError('Cannot use "." operator on builtin type') elif type == None: # Calling using class name name = obj_name+'.'+name else: # Calling using object variable name num_args = 1 self.vm_push_variable(obj_name) # push object ptr onto stack name = self.symbols.type_of(obj_name)+'.'+name return num_args, name def _is_builtin_type(self, type): return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID] # expressionList: (expression (',' expression)*)? def compile_expr_list(self): num_args = 0 if self._is_term(): self.compile_expression() num_args = 1 while self._is_sym(','): self._advance() self.compile_expression() num_args += 1 return num_args
class CompilationEngine: def __init__(self, files): self.classes = [file[:-5] for file in files] self.variables = [] self.counter = { LOOP_LABEL: 0, EXIT_LABEL: 0, IF_LABEL: 0, ELSE_LABEL: 0 } def get_label(self, label): generated_label = label + str(self.counter.get(label)) self.counter[label] += 1 return generated_label def get_segment(self, var_name): segment = self.st.kindOf(var_name) segment_dict = { VAR_CONSTANT: SEGMENT_LOCAL, ARG_CONSTANT: SEGMENT_ARGUMENT, FIELD_CONSTANT: 'this', STATIC_CONSTANT: SEGMENT_STATIC, } return segment_dict.get(segment) def closeFile(self, file): self.vm.close() def hasMoreTokens(self): return len(self.tokens) > 0 def advance(self): if(self.hasMoreTokens()): self.current_token = self.tokens.pop() return self.current_token def openXMLFile(self, xml_file): self.xml_tree = ET.parse(xml_file) self.tokens = list(self.xml_tree.getroot()) self.tokens.reverse() self.st = SymbolTable() self.current_class = xml_file[:-5] self.vm = VMWriter(self.current_class + '.vm') def compileClass(self): self.root = ET.Element('class') self.advance() self.advance() while True: self.advance() if self.current_token.text == 'static' or self.current_token.text == 'field': self.compileClassVarDec() else: break while True: if self.current_token.text == '}': break else: self.compileSubroutine() self.advance() def compileClassVarDec(self): kind = self.current_token.text self.advance() type = self.current_token.text while True: self.advance() if self.current_token.tag == IDENTIFIER: name = self.current_token.text self.st.define(name, type, kind) elif self.current_token.text == ';': break # Check if there are more declarations elif(self.current_token.text == ','): pass def compileSubroutine(self): self.st.startSubroutine() subroutine_type = self.current_token.text self.advance() self.advance() if subroutine_type == 'method': self.st.define('this', self.current_class, ARG_CONSTANT) subroutine_name = self.current_token.text self.advance() # Skip opening ( self.compileParameterList() if self.advance().text == '{': self.compileSubroutineBody( subroutine_name, subroutine_type) def compileSubroutineBody(self, subroutine_name, subroutine_type): while True: self.advance() if self.current_token.text == 'var': self.compileVarDec() else: break self.vm.writeFunction(subroutine_name, self.st.varCount(VAR_CONSTANT)) if subroutine_type == 'constructor': # If constructor allocate memory for obj object_size = self.st.varCount(FIELD_CONSTANT) # Push the memory size reqd # Call Memory.alloc self.vm.writePush(SEGMENT_CONSTANT, object_size) self.vm.writeCall('Memory.alloc', 1) self.vm.writePop(SEGMENT_POINTER, 0) # Setup 'this' elif subroutine_type == 'method': self.vm.writePush(SEGMENT_ARGUMENT, 0) self.vm.writePop(SEGMENT_POINTER, 0) # Setup THIS self.compileStatements() def compileType(self): if self.current_token.text == 'int' or self.current_token.text == 'char' or self.current_token.text == 'boolean': self.add_sub_element(root, KEYWORD) return True elif self.compileClassName(root): return True elif self.current_token.text == 'void': self.add_sub_element(root, KEYWORD) return True return False def compileParameterList(self,): count_parameters = 0 while True: self.advance() if self.current_token.tag == KEYWORD or self.current_token.tag == IDENTIFIER: type = self.current_token.text self.advance() name = self.current_token.text self.st.define(name, type, ARG_CONSTANT) count_parameters += 1 elif self.current_token.text == ')': return count_parameters elif self.current_token.text == ',': pass def compileVarDec(self): self.advance() type = self.current_token.text while True: self.advance() if self.current_token.tag == IDENTIFIER: name = self.current_token.text self.st.define(name, type, VAR_CONSTANT) elif self.current_token.text == ',': pass elif self.current_token.text == ';': break def compileStatements(self): while True: if self.current_token.text == 'let': self.compileLet() elif self.current_token.text == 'if': self.compileIf() elif self.current_token.text == 'while': self.compileWhile() elif self.current_token.text == 'do': self.compileDo() elif self.current_token.text == 'return': self.compileReturn() elif self.current_token.text == '}': break self.advance() def compileDo(self): self.advance() self.compileExpression() self.vm.writePop(SEGMENT_TEMP, 0) def compileLet(self): self.advance() var_name = self.current_token.text self.advance() array_flag = False if self.current_token.text == '[': self.vm.writePush(self.get_segment(var_name), self.st.indexOf(var_name)) self.advance() self.compileExpression() self.vm.writeArithmetic('add') self.advance() array_flag = True if self.current_token.text == '=': self.advance() self.compileExpression() if self.current_token.text == ';': if array_flag: self.vm.writePop(SEGMENT_TEMP, 0) self.vm.writePop(SEGMENT_POINTER, 1) self.vm.writePush(SEGMENT_TEMP, 0) self.vm.writePop(SEGMENT_THAT, 0) else: self.vm.writePop(self.get_segment(var_name), self.st.indexOf(var_name)) def compileWhile(self): self.advance() while_label = self.get_label(LOOP_LABEL) exit_label = self.get_label(EXIT_LABEL) self.vm.writeLabel(while_label) self.advance() self.compileExpression() self.vm.writeArithmetic('not') self.vm.writeIf(exit_label) if self.advance().text == '{': self.advance() # skip opening { self.compileStatements() # Add closing } self.vm.writeGoto(while_label) self.vm.writeLabel(exit_label) def compileReturn(self): self.advance() if self.current_token.text != ';': self.compileExpression() else: self.vm.writePush(SEGMENT_CONSTANT, 0) self.vm.writeReturn() def compileIf(self): if_label = self.get_label(IF_LABEL) else_label = self.get_label(ELSE_LABEL) self.advance() # Skip if statement self.advance() # Skip opening ( self.compileExpression() self.vm.writeArithmetic('not') self.vm.writeIf(if_label) self.advance() # Skip closing ) self.advance() # Skip opening { self.compileStatements() self.vm.writeGoto(else_label) self.vm.writeLabel(if_label) if self.tokens[-1].text == 'else': # Skip closing } self.advance() self.advance() # Skip else statement self.advance() # Skip opening { self.compileStatements() self.vm.writeLabel(else_label) def compileExpression(self): self.compileTerm() self.advance() while self.current_token.text in '+-*/&|<>=': operator = self.current_token self.advance() self.compileTerm() self.tokens.append(operator) self.advance() self.compileOp() self.advance() def compileOp(self): if (self.current_token.text == '+'): self.vm.writeArithmetic('add') elif (self.current_token.text == '-'): self.vm.writeArithmetic('sub') elif (self.current_token.text == '*'): self.vm.writeCall('Math.multiply', 2) elif (self.current_token.text == '/'): self.vm.writeCall('Math.divide', 2) elif (self.current_token.text == '&'): self.vm.writeArithmetic('and') elif (self.current_token.text == '|'): self.vm.writeArithmetic('or') elif (self.current_token.text == '<'): self.vm.writeArithmetic('lt') elif (self.current_token.text == '>'): self.vm.writeArithmetic('gt') elif (self.current_token.text == '='): self.vm.writeArithmetic('eq') def compileClassName(self, root): if self.current_token.text in self.classes: return True return False def compileTerm(self): if self.current_token.tag == INTEGER: # Integer Constant self.vm.writePush(SEGMENT_CONSTANT, int(self.current_token.text)) return True elif self.current_token.tag == STRING: # String Constant string = self.current_token.text self.vm.writePush(SEGMENT_CONSTANT, len(string)) self.vm.writeCall('String.new', 1) for c in string: self.vm.writePush(SEGMENT_CONSTANT, ord(c)) self.vm.writeCall('String.appendChar', 2) return True elif self.current_token.text == 'true': # Keyword Constant self.vm.writePush(SEGMENT_CONSTANT, 1) self.vm.writeArithmetic('neg') return True elif self.current_token.text == 'false': self.vm.writePush(SEGMENT_CONSTANT, 0) return True elif self.current_token.text == 'null': self.vm.writePush(SEGMENT_CONSTANT, 0) return True elif self.current_token.text == 'this': self.vm.writePush(SEGMENT_POINTER, 0) return True elif self.current_token.tag == IDENTIFIER: # var Name if self.tokens[-1].text == '[': var_name = self.current_token.text self.vm.writePush(self.get_segment(var_name), self.st.indexOf(var_name)) self.advance() self.advance() self.compileExpression() self.vm.writeArithmetic('add') self.vm.writePop(SEGMENT_POINTER, 1) self.vm.writePush(SEGMENT_THAT, 0) elif self.tokens[-1].text == '.': call_function = self.current_token.text nArgs = 0 # Method Call, push obj as first arg if self.st.kindOf(call_function) is not None: self.vm.writePush(self.get_segment( call_function), self.st.indexOf(call_function)) nArgs = 1 # Obj is first argument call_function = self.st.typeOf(call_function) self.advance() self.advance() call_function += '.' + self.current_token.text self.advance() self.advance() # Skip opening ( if self.current_token.text != ')': nArgs += self.compileExpressionList() self.vm.writeCall(call_function, nArgs) elif self.tokens[-1].text == '(': call_function = self.current_class + '.' + self.current_token.text self.advance() self.advance() # Skip opening ( nArgs = 1 self.vm.writePush(SEGMENT_POINTER, 0) if self.current_token.text != ')': nArgs += self.compileExpressionList() self.vm.writeCall(call_function, nArgs) else: # Regular Variable Name var_name = self.current_token.text self.vm.writePush(self.get_segment(var_name), self.st.indexOf(var_name)) elif self.current_token.text == '-': # unaryOp self.advance() self.compileTerm() self.vm.writeArithmetic('neg') elif self.current_token.text == '~': self.advance() self.compileTerm() self.vm.writeArithmetic('not') # Recurse call expression elif self.current_token.text == '(': self.advance() self.compileExpression() def compileExpressionList(self): count_expressions = 1 while True: self.compileExpression() if self.current_token.text == ',': self.advance() count_expressions += 1 else: return count_expressions
def __init__(self): self.table = SymbolTable(None, 'Program') self.a = AllowedOperations() self.in_loop = 0
class Parser(object): def __init__(self, file): self.lex = Lex(file) self.symbols = SymbolTable() self.vm = VMWriter() self.openout(file) self.compile_class() self.closeout() # VMWriter support def openout(self, path): outdir = os.path.join(os.path.dirname(path), "output") file = os.path.join(outdir, os.path.basename(path)) try: os.mkdir(outdir) except OSError as e: pass self.vm.openout(file) def closeout(self): self.vm.closeout() def vm_function_name(self): return self._cur_class + "." + self._cur_subroutine def vm_push_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_push(segments[kind], index) def vm_pop_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_pop(segments[kind], index) # Routines to advance the token def _require(self, tok, val=None): lextok, lexval = self._advance() if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval: raise ParserError(self._require_failed_msg(tok, val)) else: return lexval def _require_failed_msg(self, tok, val): if val == None: val = tokens[tok] return "Expected " + val def _advance(self): return self.lex.advance() def _is_token(self, tok, val=None): lextok, lexval = self.lex.peek() return val == None and lextok == tok or (lextok, lexval) == (tok, val) def _is_keyword(self, *keywords): lextok, lexval = self.lex.peek() return lextok == T_KEYWORD and lexval in keywords def _is_sym(self, symbols): lextok, lexval = self.lex.peek() return lextok == T_SYM and lexval in symbols # Parser and compile Jack code # class: 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): self._require(T_KEYWORD, KW_CLASS) self.compile_class_name() self._require(T_SYM, "{") while self._is_class_var_dec(): self.compile_class_var_dec() while self._is_subroutine(): self.compile_subroutine() self._require(T_SYM, "}") # className: identifier def compile_class_name(self): self._cur_class = self.compile_var_name() # Class names don't have to go into the symbol table # Variable declarations def _is_class_var_dec(self): return self._is_keyword(KW_STATIC, KW_FIELD) # classVarDec: {'static'|'field'} type varName (',' varName)* ';' def compile_class_var_dec(self): tok, kwd = self._advance() # static | field self._compile_dec(kwd_to_kind[kwd]) # type varName (',' varName)* ';' def _compile_dec(self, kind): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, kind) while self._is_sym(","): self._advance() name = self.compile_var_name() self.symbols.define(name, type, kind) self._require(T_SYM, ";") def _is_type(self): return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN) # 'void' | type def compile_void_or_type(self): if self._is_keyword(KW_VOID): return self._advance()[1] else: return self.compile_type() # type: 'int' | 'char' | 'boolean' | className def compile_type(self): if self._is_type(): return self._advance()[1] else: raise ParserError(self._require_failed_msg(*self.lex.peek())) def _is_var_name(self): return self._is_token(T_ID) # varName: identifier def compile_var_name(self): return self._require(T_ID) # Subroutine declarations def _is_subroutine(self): return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) # subroutineDec: ('constructor'|'function'|'method') ('void'|type) # subroutineName '(' parameterList ')' subroutineBody def compile_subroutine(self): tok, kwd = self._advance() type = self.compile_void_or_type() self.compile_subroutine_name() self.symbols.start_subroutine() if kwd == KW_METHOD: self.symbols.define("this", self._cur_class, SK_ARG) self._require(T_SYM, "(") self.compile_parameter_list() self._require(T_SYM, ")") self.compile_subroutine_body(kwd) # subroutineName: identifier def compile_subroutine_name(self): self._cur_subroutine = self.compile_var_name() # subroutine names don't have to go in the symbol table # parameterList: (parameter (',' parameter)*)? def compile_parameter_list(self): if self._is_type(): self.compile_parameter() while self._is_sym(","): self._advance() self.compile_parameter() # parameter: type varName def compile_parameter(self): if self._is_type(): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, SK_ARG) # subroutineBody: '{' varDec* statements '}' def compile_subroutine_body(self, kwd): self._require(T_SYM, "{") while self._is_var_dec(): self.compile_var_dec() self.write_func_decl(kwd) self.compile_statements() self._require(T_SYM, "}") def write_func_decl(self, kwd): self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR)) self.load_this_ptr(kwd) def load_this_ptr(self, kwd): if kwd == KW_METHOD: self.vm.push_arg(0) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object elif kwd == KW_CONSTRUCTOR: self.vm.push_const(self.symbols.var_count(SK_FIELD)) # object size self.vm.write_call("Memory.alloc", 1) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object def _is_var_dec(self): return self._is_keyword(KW_VAR) # varDec: 'var' type varName (',' varName)* ';' def compile_var_dec(self): self._require(T_KEYWORD, KW_VAR) return self._compile_dec(SK_VAR) # Statements # statement: statement* def compile_statements(self): while self._is_statement(): self._compile_statement() def _is_statement(self): return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return() # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement def _compile_statement(self): if self._is_let(): self.compile_let() elif self._is_if(): self.compile_if() elif self._is_while(): self.compile_while() elif self._is_do(): self.compile_do() elif self._is_return(): self.compile_return() def _is_let(self): return self._is_keyword(KW_LET) # letStatement: 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self._require(T_KEYWORD, KW_LET) name = self.compile_var_name() subscript = self._is_sym("[") if subscript: self.compile_base_plus_index(name) # calculate base+index self._require(T_SYM, "=") self.compile_expression() # calculate expression to assign self._require(T_SYM, ";") if subscript: self.pop_array_element() # *(base+index) = expr else: self.vm_pop_variable(name) # pop value directly into variable # ('[' expression ']')? def compile_base_plus_index(self, name): self.vm_push_variable(name) # push array ptr onto stack self._advance() self.compile_expression() # push index onto stack self._require(T_SYM, "]") self.vm.write_vm_cmd("add") # base+index - leave on the stack for later def pop_array_element(self): self.vm.pop_temp(TEMP_ARRAY) # Pop expr value to temp register self.vm.pop_that_ptr() # Pop base+index into 'that' register self.vm.push_temp(TEMP_ARRAY) # Push expr back onto stack self.vm.pop_that() # Pop value into *(base+index) def _is_if(self): return self._is_keyword(KW_IF) # ifStatement: 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? def compile_if(self): self._require(T_KEYWORD, KW_IF) end_label = self.new_label() self._compile_cond_expression_statements(end_label) # VM code for condition and if statements if self._is_keyword(KW_ELSE): self._advance() self._require(T_SYM, "{") self.compile_statements() # VM code for else statements self._require(T_SYM, "}") self.vm.write_label(end_label) # label end_label def _is_while(self): return self._is_keyword(KW_WHILE) # whileStatement: 'while' '(' expression ')' '{' statements '}' def compile_while(self): self._require(T_KEYWORD, KW_WHILE) top_label = self.new_label() self.vm.write_label(top_label) # label top_label self._compile_cond_expression_statements(top_label) # VM code for condition and while statements # '(' expression ')' '{' statements '}' def _compile_cond_expression_statements(self, label): self._require(T_SYM, "(") self.compile_expression() self._require(T_SYM, ")") self.vm.write_vm_cmd("not") # ~(cond) notif_label = self.new_label() self.vm.write_if(notif_label) # if-goto notif_label self._require(T_SYM, "{") self.compile_statements() # VM code for if statements self._require(T_SYM, "}") self.vm.write_goto(label) # goto label self.vm.write_label(notif_label) # label notif_label label_num = 0 def new_label(self): self.label_num += 1 return "label" + str(self.label_num) def _is_do(self): return self._is_keyword(KW_DO) # do_statement: 'do' subroutineCall ';' def compile_do(self): self._require(T_KEYWORD, KW_DO) name = self._require(T_ID) self.compile_subroutine_call(name) # VM code for subroutine call self.vm.pop_temp(TEMP_RETURN) # Pop return value and discard self._require(T_SYM, ";") def _is_return(self): return self._is_keyword(KW_RETURN) # returnStatement: 'return' expression? ';' def compile_return(self): self._require(T_KEYWORD, KW_RETURN) if not self._is_sym(";"): self.compile_expression() # VM code for return expression if any else: self.vm.push_const(0) # push 0 if not returning a value self._require(T_SYM, ";") self.vm.write_return() # return # Expressions # expression: term (op term)* def compile_expression(self): self.compile_term() # Doesn't handle normal order of operations - just left to right for now while self._is_op(): op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_cmds[op[1]]) # op def _is_term(self): return self._is_const() or self._is_var_name() or self._is_sym("(") or self._is_unary_op() # term: integerConstant | stringConstant | keywordConstant | varName # | varName '[' expression ']' | subroutineCall | '(' expression ')' # | unaryOp term def compile_term(self): if self._is_const(): self.compile_const() elif self._is_sym("("): self._advance() self.compile_expression() # VM code to evaluate expression self._require(T_SYM, ")") elif self._is_unary_op(): tok, op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_unary_cmds[op]) # op elif self._is_var_name(): tok, name = self._advance() if self._is_sym("["): self.compile_array_subscript(name) # VM code for array subscript elif self._is_sym("(."): self.compile_subroutine_call(name) # VM code for subroutine call else: self.vm_push_variable(name) # push variable on stack def _is_const(self): return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant() def _is_keyword_constant(self): return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) def _is_op(self): return self._is_sym("+-*/&|<>=") def _is_unary_op(self): return self._is_sym("-~") # integerConstant | stringConstant | keywordConstant def compile_const(self): tok, val = self._advance() if tok == T_NUM: self.vm.push_const(val) # push constant val elif tok == T_STR: self.write_string_const_init(val) # initialize string & push str addr elif tok == T_KEYWORD: self.compile_kwd_const(val) # push TRUE, FALSE, NULL etc. def write_string_const_init(self, val): self.vm.push_const(len(val)) self.vm.write_call("String.new", 1) # String.new(len(str)) for c in val: self.vm.push_const(ord(c)) self.vm.write_call("String.appendChar", 2) # String.appendChar(nextchar) # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kwd_const(self, kwd): if kwd == KW_THIS: self.vm.push_this_ptr() elif kwd == KW_TRUE: self.vm.push_const(1) self.vm.write_vm_cmd("neg") else: # KW_FALSE or KW_NULL self.vm.push_const(0) # '[' expression ']' def compile_array_subscript(self, name): self.vm_push_variable(name) # push array ptr onto stack self._require(T_SYM, "[") self.compile_expression() # push index onto stack self._require(T_SYM, "]") self.vm.write_vm_cmd("add") # base+index self.vm.pop_that_ptr() # pop into 'that' ptr self.vm.push_that() # push *(base+index) onto stack # subroutineCall: subroutineName '(' expressionList ')' # | (className | varName) '.' subroutineName '(' expressionList ')' def compile_subroutine_call(self, name): (type, kind, index) = self.symbols.lookup(name) if self._is_sym("."): num_args, name = self.compile_dotted_subroutine_call(name, type) else: num_args = 1 self.vm.push_this_ptr() name = self._cur_class + "." + name self._require(T_SYM, "(") num_args += self.compile_expr_list() # VM code to push arguments self._require(T_SYM, ")") self.vm.write_call(name, num_args) # call name num_args def compile_dotted_subroutine_call(self, name, type): num_args = 0 obj_name = name self._advance() name = self.compile_var_name() if self._is_builtin_type(type): # e.g. int.func(123) not allowed ParserError('Cannot use "." operator on builtin type') elif type == None: # Calling using class name name = obj_name + "." + name else: # Calling using object variable name num_args = 1 self.vm_push_variable(obj_name) # push object ptr onto stack name = self.symbols.type_of(obj_name) + "." + name return num_args, name def _is_builtin_type(self, type): return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID] # expressionList: (expression (',' expression)*)? def compile_expr_list(self): num_args = 0 if self._is_term(): self.compile_expression() num_args = 1 while self._is_sym(","): self._advance() self.compile_expression() num_args += 1 return num_args
def __init__(self): self.symbols = SymbolTable.SymbolTable() self.symbol_addr = 16
class Assembler(object): ########################################## #Constructor def __init__(self, target): index = target.find('.asm') if (index < 1): raise RuntimeError("error, cannot use the filename: " + target) self.inputFileName = target self.outputFileName = self.inputFileName[:index] + '.hack' self.parser = Parser(self.inputFileName) self.code = Code() self.st = SymbolTable() ########################################## #public methods def assemble(self): '''Does the assembly and creates the file of machine commands, returning the name of that file ''' self.__firstPass__() return self.__output__(self.__secondPass__()) ########################################## #private/local methods def __output__(self, codeList): ''' outpute the machine code codeList into a file and returns the filename''' file = open(self.outputFileName, "w") file.write("\n".join(codeList)) file.close() return self.outputFileName def __firstPass__(self): ''' Passes over the file contents to populate the symbol table''' #MUST prevent the Assembler reaching into the parser #while also not requiring the parser to become semantically aware #so let parser do mechanical work # and let Assembler do the semantic part on the returned results #TODO complete this function #update the symbol table with the processLabels dict self.st.table.update(self.parser.processLabels()) def __secondPass__(self): ''' Manage the translation to machine code, returning a list of machine instructions''' machineCode = [] command = self.parser.advance() while (command): if (True): #TODO complete this loop body if self.parser.commandType(command) == Parser.C_COMMAND: bitString = self.__assembleC__( command) #translate C command result = bitString elif self.parser.commandType(command) == Parser.A_COMMAND: if self.st.contains(command[1:]): bitString = self.__assembleA__( command) #if A command in table, process elif self.parser.symbol(command).isdigit(): bitString = self.__assembleA__( command[1:]) #if A command is digit, process else: self.st.addEntry(command[1:], self.st.getNextVariableAddress()) bitString = self.__assembleA__( command ) #if A command is new variable, add it to table and process result = bitString machineCode.append(result) else: symStr = self.parser.symbol() raise RuntimeError( 'There should be no labels on second pass, errant symbol is ' + symStr) command = self.parser.advance() return machineCode def __assembleC__(self, command): ''' Do the mechanical work to translate a C_COMMAND, returns a string representation of a 16-bit binary word.''' #TODO complete this function return '111' + self.code.comp( self.parser.comp(command)) + self.code.dest( self.parser.dest(command)) + self.code.jump( self.parser.jump(command)) pass def __assembleA__(self, command): ''' Do the mechanical work to translate an A_COMMAND, returns a string representation of a 16-bit binary word.''' #TODO complete this function if command.isdigit(): symVal = command result = '0' + "{0:015b}".format(int(symVal)) #translate the digit else: symVal = self.st.getAddress(self.parser.symbol(command)) result = '0' + "{0:015b}".format( symVal) #translate the address from symbol table return result
def __init__(self, name): super().__init__("ATOMIC_CONCEPT") self.name = name SymbolTable().add_to_table(self.name)
def __init__(self): self.symbol_table = SymbolTable.SymbolTable() # store variable values starting at index 16. self.var_addr_count = 16
import Parser import Code import SymbolTable as st from sys import argv st.Constructor() f = open(argv[1]) lines = f.readlines() symbol_count = 0 cmd_count = 0 bin_output = '' # loop 1, check and build jump table for l in lines: cleaned_str = Parser.clean(l) cmd_type = Parser.commandType(cleaned_str) if cmd_type == Parser.A_COMMAND: cmd_count = cmd_count + 1 if cmd_type == Parser.C_COMMAND: cmd_count = cmd_count + 1 if cmd_type == Parser.L_COMMAND: symbol = cleaned_str[1:-1] st.addEntry(symbol, cmd_count) for l in lines: cleaned_str = Parser.clean(l) cmd_type = Parser.commandType(cleaned_str) # A command
def __init__(self): self.table = SymbolTable(None, "root") self.currentType = "" self.currentFun = None self.isInLoop = False
class TypeChecker(NodeVisitor): def __init__(self): self.table = SymbolTable(None, "root") self.currentType = "" self.currentFun = None self.isInLoop = False def visit_Integer(self, node): return 'int' def visit_Float(self, node): return 'float' def visit_String(self, node): return 'string' def visit_Variable(self, node): definition = self.table.getGlobal(node.id) if definition is None: print("Error: Usage of undeclared variable '{}': line {}".format(node.id, node.line)) else: return definition.type def visit_Declaration(self, node): self.currentType = node.type self.visit(node.inits) self.currentType = "" def visit_Init(self, node): exprType = self.visit(node.expression) if (self.currentType == exprType) or (self.currentType == "int" and exprType == "float") or (self.currentType == "float" and exprType == "int"): if self.table.get(node.id) is not None: print("Error: Variable '{}' already declared: line {}".format(node.id, node.line)) else: self.table.put(node.id, VariableSymbol(node.id, self.currentType)) else: print("Error: Assignment of '{}' to '{}': line {}".format(exprType, self.currentType, node.line)) def visit_FunDef(self, node): if self.table.get(node.id) is not None: print("Error: Redefinition of function '{}': line {}".format(node.id, node.line)) else: function = FunctionSymbol(node.type, node.id, SymbolTable(self.table, node.id)) self.table.put(node.id, function) self.table = function.symbolTable self.currentFun = function if node.args is not None: self.visit(node.args) function.extractParams() self.visit(node.body) self.table = self.table.getParentScope() self.currentFun = None def visit_Arg(self, node): if self.table.get(node.id) is not None: print("Error: Variable '{}' already declared: line {}".format(node.id, node.line)) else: self.table.put(node.id, VariableSymbol(node.id, node.type)) def visit_Assignment(self, node): declaration = self.table.getGlobal(node.id) exprType = self.visit(node.expression) if declaration is None: print("Error: Variable '{}' undefined in current scope: line {}".format(node.id, node.line)) elif declaration.type == "int" and exprType == "float": print("Warning: Assignment of '{}' to '{}' may cause loss of precision: line {}".format(exprType, declaration.type, node.line)) elif declaration.type == "float" and exprType == "int": pass elif exprType != declaration.type: print("Error: Assignment of '{}' to '{}': line {}". format(exprType, declaration.type, node.line)) def visit_InstructionPrint(self, node): self.visit(node.expression) def visit_InstructionIf(self, node): self.visit(node.condition) self.visit(node.instructionIf) if node.instructionElse is not None: self.visit(node.instructionElse) def visit_InstructionWhile(self, node): self.visit(node.condition) self.isInLoop = True self.visit(node.instruction) self.isInLoop = False def visit_Repeat(self, node): self.isInLoop = True self.visit(node.instructions) self.isInLoop = False self.visit(node.condition) def visit_Return(self, node): if self.currentFun is None: print("Error: return instruction outside a function: line {}".format(node.line)) else: retType = self.visit(node.expression) if retType != self.currentFun.type and (self.currentFun.type != "float" or retType != "int"): print("Error: Improper returned type, expected {}, got {}: line {}".format(self.currentFun.type, retType, node.line)) def visit_Continue(self, node): if not self.isInLoop: print("Error: continue instruction outside a loop: line {}".format(node.line)) def visit_Break(self, node): if not self.isInLoop: print("Error: break instruction outside a loop: line {}".format(node.line)) def visit_CompoundInstruction(self, node): innerScope = SymbolTable(self.table, "innerScope") self.table = innerScope if node.declarations is not None: self.visit(node.declarations) self.visit(node.instructions) self.table = self.table.getParentScope() def visit_Expression(self): pass def visit_BinExpr(self, node): type1 = self.visit(node.left) type2 = self.visit(node.right) op = node.op; if ttype[op][type1][type2] is None: print("Error: Illegal operation, '{} {} {}': line {}".format(type1, op, type2, node.line)) return ttype[op][type1][type2] def visit_FunCall(self, node): funDef = self.table.getGlobal(node.id) if funDef is None or not isinstance(funDef, FunctionSymbol): print("Error: Call of undefined fun: '{}': line {}".format(node.id, node.line)) else: if len(node.arglist.children) != len(funDef.params): print("Error: Improper number of args in '{}' call: line {}".format(funDef.id, node.line)) else: types = [self.visit(x) for x in node.arglist.children] expectedTypes = funDef.params for actual, expected in zip(types, expectedTypes): if actual != expected and not (actual == "int" and expected == "float"): print("Error: Improper type of args in {} call: line {}".format(node.id, node.line)) return funDef.type
def Start(): Display("Start", _list) global _max_symbol _max_symbol = 256 if not SymbolTable._initilized : SymbolTable.initilize(0, 32) SymbolTable.newScope()
def pass1(self): token_index = 0 for line in self.line_list: if "START" in line: self.program_number = 0 self.locctr = 0 token_index = 0 self.sym_tab_list.append(SymbolTable.SymbolTable()) self.literal_tab_list.append(SymbolTable.SymbolTable()) self.external_tab_list.append(SymbolTable.SymbolTable()) self.modif_tab_list.append(SymbolTable.SymbolTable()) self.token_tab_list.append(TokenTable.TokenTable()) self.token_tab_list[self.program_number].set_table( self.sym_tab_list[self.program_number], self.literal_tab_list[self.program_number], self.external_tab_list[self.program_number], self.inst_table) elif "CSECT" in line: self.program_number += 1 self.locctr = 0 token_index = 0 self.sym_tab_list.append(SymbolTable.SymbolTable()) self.literal_tab_list.append(SymbolTable.SymbolTable()) self.external_tab_list.append(SymbolTable.SymbolTable()) self.modif_tab_list.append(SymbolTable.SymbolTable()) self.token_tab_list.append(TokenTable.TokenTable()) self.token_tab_list[self.program_number].set_table( self.sym_tab_list[self.program_number], self.literal_tab_list[self.program_number], self.external_tab_list[self.program_number], self.inst_table) self.token_tab_list[self.program_number].put_token(line) current_token = self.token_tab_list[self.program_number].get_token( token_index) if (not eq(current_token.label, "")) and (not eq( current_token.label, ".")): if eq(current_token.operator, "EQU"): self.sym_tab_list[self.program_number].put_symbol( current_token.label, self.operate_address(current_token.operand[0])) else: self.sym_tab_list[self.program_number].put_symbol( current_token.label, self.locctr) if (not eq(current_token.label, "")) and ("=" in current_token.operand[0]): self.literal_tab_list[self.program_number].put_symbol( current_token.operand[0], 0) if not eq(current_token.operator, ""): if eq(current_token.operator, "LTORG") or eq( current_token.operator, "END"): for j in range( 0, self.literal_tab_list[ self.program_number].get_size()): literal = self.literal_tab_list[ self.program_number].get_symbol(j) self.literal_tab_list[ self.program_number].modif_symbol( literal, self.locctr) if "X" in literal: self.locctr += 1 elif "C" in literal: literal = literal.replace("C", "") literal = literal.replace("\'", "") self.locctr += len(literal) elif eq(current_token.operator, "EXTREF"): for j in range(0, len(current_token.operand)): self.external_tab_list[self.program_number].put_symbol( current_token.operand[j], 0) elif len(current_token.operand) > 0: for j in range( 0, self.external_tab_list[ self.program_number].get_size()): if self.external_tab_list[ self.program_number].get_symbol( j) in current_token.operand[0]: modif_size = 6 if "+" in current_token.operator: modif_size = 5 if "-" in current_token.operand[0]: op_symbols = current_token.operand[0].split( "-") self.modif_tab_list[ self.program_number].put_modif_symbol( "+" + op_symbols[0], self.locctr + (6 - modif_size), modif_size) self.modif_tab_list[ self.program_number].put_modif_symbol( "-" + op_symbols[1], self.locctr + (6 - modif_size), modif_size) else: self.modif_tab_list[ self.program_number].put_modif_symbol( "+" + current_token.operand[0], self.locctr + (6 - modif_size), modif_size) break self.locctr += current_token.byte_size token_index += 1
def Declaration_(instruction, ts, textEdit): #print(str(instruction)) try: global la, co, contador la = instruction.line co = instruction.column val = valueExpression(instruction.val, ts, textEdit) if val == '#': seob = seOb( f'Error Semantico: No se pudo declarar {instruction.id}.', instruction.line, instruction.column) semanticErrorList.append(seob) return if val != 'array': type_ = getType(val) sym = TS.Symbol(instruction.id, type_, val, currentAmbit) if isinstance(instruction.val, ReferenceBit): if isinstance(instruction.val.expression, Identifier): sym.referencia = instruction.val.expression.id #elif isinstance(instruction.val.expression, IdentifierArray): #sym.referencia = instruction.val.expression.id if ts.exist(instruction.id) != 1: ts.add(sym) else: ts.update(sym) if sym.id[1] == 'a': #params, update label to procediment currentParams.append(sym.id) ts.updateFunction(currentAmbit, TS.TypeData.PROCEDIMIENTO) elif sym.id[1] == 'v': #update label to function ts.updateFunction(currentAmbit, TS.TypeData.FUNCION) else: #print(instruction.id) valor = {} if ts.exist(instruction.id) == 1: valor = ts.get(instruction.id).valor if isinstance(instruction.val, ExpressionsDeclarationArray): valor = valueArray(instruction.id, instruction.val, ts, valor, textEdit) if isinstance(valor, str): type_ = TS.TypeData.STRING listaKeys = [] else: type_ = TS.TypeData.ARRAY listaKeys = valor.values() sym = TS.Symbol(instruction.id, type_, valor, currentAmbit, 0, len(listaKeys)) #valueArray(sym, instruction.val, ts,textEdit) if ts.exist(instruction.id) != 1: ts.add(sym) else: ts.update(sym) #print("var " + str(sym.id) + ": "+str(ts.get(instruction.id).valor)) #validar las referencias UpdateReferences(instruction.id, val, ts) except: print("error en la declaracion de variable")
def process(instructions, ts, printList, textEdit): global currentAmbit, pasadas, currentParams, contador try: i = 0 while i < len(instructions): #isinstance verificar tipos b = instructions[i] if isinstance(b, Print_): Print(b, ts, printList, textEdit) elif isinstance(b, Declaration): Declaration_(b, ts, textEdit) elif isinstance(b, If): result = valueExpression(b.expression, ts, textEdit) if result == 1: tmp = i i = goto(i + 1, instructions, b.label) if i != 0: pasadas = 0 #print("realizando salto a: "+ str(b.label)) else: i = tmp #print("error semantico, etiqueta no existe") se = seOb(f"Error: etiqueta {b.label} no existe", b.line, b.column) semanticErrorList.append(se) elif result == '#': se = seOb(f"Error: Condicion no valida", b.line, b.column) semanticErrorList.append(se) elif isinstance(b, Goto): #seteamos la instruccion anterior como la llamada al goto tmp = i i = goto(i, instructions, b.label) if i != 0: pasadas = 0 #print("realizando salto a: "+ str(b.label)) else: i = tmp #print("error semantico, etiqueta no existe") se = seOb(f"Error: etiqueta {b.label} no existe", b.line, b.column) semanticErrorList.append(se) elif isinstance(b, Label): #insert to symbols table #type_ = 0 if len(currentParams) > 0: #procedimiento tipo 7, cambiara a funcion si lee un $Vn if ts.exist(b.label) == 1: #print("exists: "+ str(b.label)) type_ = ts.get(b.label).tipo else: type_ = TS.TypeData.PROCEDIMIENTO else: type_ = TS.TypeData.CONTROL #print("antes de insertar funcion: " + str(currentParams)) symbol = TS.Symbol(b.label, type_, 0, currentAmbit, currentParams.copy()) currentParams[:] = [] #clean to current Params #print("despues de insertar funcion: " + str(symbol.parametros)) if ts.exist(symbol.id) != 1: ts.add(symbol) else: ts.update(symbol) currentAmbit = b.label elif isinstance(b, Exit): break elif isinstance(b, Unset): if ts.delete(b.id) == 1: print('variable eliminada.') else: se = seOb( f'Error Semantico: No se pudo eliminar {b.id}, en funcion unset.', b.line, b.column) semanticErrorList.append(se) i += 1 except: if isinstance(instructions, Print_): Print(instructions, ts, printList, textEdit) elif isinstance(instructions, Declaration): Declaration_(instructions, ts, textEdit) elif isinstance(instructions, If): result = valueExpression(instructions.expression, ts, textEdit) if result == 1: tmp = i i = goto(i + 1, instructions, instructions.label) if i != 0: pasadas = 0 #print("realizando salto a: "+ str(b.label)) else: i = tmp #print("error semantico, etiqueta no existe") se = seOb( f"Error: etiqueta {instructions.label} no existe", instructions.line, instructions.column) semanticErrorList.append(se) elif result == '#': se = seOb(f"Error: Condicion no valida", instructions.line, instructions.column) semanticErrorList.append(se) elif isinstance(instructions, Goto): #seteamos la instruccion anterior como la llamada al goto tmp = i i = goto(i, instructions, instructions.label) if i != 0: pasadas = 0 #print("realizando salto a: "+ str(b.label)) else: i = tmp #print("error semantico, etiqueta no existe") se = seOb(f"Error: etiqueta {instructions.label} no existe", instructions.line, instructions.column) semanticErrorList.append(se) elif isinstance(instructions, Label): #insert to symbols table #type_ = 0 if len(currentParams) > 0: #procedimiento tipo 7, cambiara a funcion si lee un $Vn if ts.exist(instructions.label) == 1: #print("exists: "+ str(b.label)) type_ = ts.get(instructions.label).tipo else: type_ = TS.TypeData.PROCEDIMIENTO else: type_ = TS.TypeData.CONTROL #print("antes de insertar funcion: " + str(currentParams)) symbol = TS.Symbol(instructions.label, type_, 0, currentAmbit, currentParams.copy()) currentParams[:] = [] #clean to current Params #print("despues de insertar funcion: " + str(symbol.parametros)) if ts.exist(symbol.id) != 1: ts.add(symbol) else: ts.update(symbol) currentAmbit = instructions.label elif isinstance(instructions, Exit): return elif isinstance(instructions, Unset): if ts.delete(instructions.id) == 1: print('variable eliminada.') else: se = seOb( f'Error Semantico: No se pudo eliminar {b.id}, en funcion unset.', b.line, b.column) semanticErrorList.append(se)
def pass1(self): """ pass1의 과정을 수행한다. 1) 프로그램 소스를 스캔하여 토큰 단위로 분리한 뒤 section별로 tokenList에 저장 2) 주소를 할당하고 SYMTAB과 LITTAB 생성 """ section = -1 locctr = 0 # 한 줄씩 읽어나가며 tokenList에 저장 for line in self.lineList: temp = line.split("\t") # 주석 생략 if temp[0] == ".": continue # section별로 구분하여 저장 if temp[1] == "START" or temp[1] == "CSECT": if temp[1] == "CSECT": self.tokenList[section].tokenList[0].byteSize = locctr section += 1 locctr = 0 self.symtabList.append(SymbolTable.SymbolTable()) self.littabList.append(LiteralTable.LiteralTable()) self.tokenList.append( TokenTable.TokenTable(self.symtabList[section], self.littabList[section], self.instTable)) self.tokenList[section].puttoken(line, locctr) # symbol 저장 if len(temp[0]) > 0: self.tokenList[section].symtab.putsymbol(temp[0], locctr) # literal 임시 저장 if len(temp) > 2: if temp[2].startswith("="): self.tokenList[section].littab.putliteral(temp[2], locctr) # 주소계산 if self.instTable.instMap.get(temp[1].replace("+", "")) is not None: locctr += self.instTable.instMap[temp[1].replace("+", "")].format if temp[1].startswith("+"): locctr += 1 elif temp[1] == "RESB": locctr += int(temp[2]) elif temp[1] == "RESW": locctr += int(temp[2]) * 3 elif temp[1] == "BYTE": # X인 경우 if temp[2].startswith("X"): locctr += int((len(temp[2]) - 3) / 2) # C인 경우 else: locctr += len(temp[2]) - 3 elif temp[1] == "WORD": locctr += 3 # literal 주소 할당 elif temp[1] == "LTORG" or temp[1] == "END": for literal in self.littabList[section].littab.keys(): self.littabList[section].modifyliteral(literal, locctr) if literal.startswith("=X"): locctr += int((len(literal) - 4) / 2) else: locctr += len(literal) - 4 # EQU 값 계산 elif temp[1] == "EQU" and temp[2] != "*": symbol = temp[2].split("-") # 다항(-)이면 if len(symbol) > 1: addr1 = self.symtabList[section].symtab.get(symbol[0]) addr2 = self.symtabList[section].symtab.get(symbol[1]) if addr1 is not None and addr2 is not None: self.tokenList[section].symtab.modifysymbol( temp[0], addr1 - addr2) else: self.tokenList[section].symtab.modifysymbol(temp[0], 0) # 단항이면 else: addr = self.symtabList[section].symtab.get(symbol[0]) if addr is not None: self.tokenList[section].symtab.modifysymbol( temp[0], addr) else: self.tokenList[section].symtab.modifysymbol(temp[0], 0) self.tokenList[section].tokenList[0].byteSize = locctr
import JackTokenizer import CompilationEngine import os import VMWriter import SymbolTable rfile = r"C:\Users\Liu_100\Desktop\nand2tetris\nand2tetris\projects\11\Pong\Ball.jack" xml = os.path.splitext(rfile)[0] + 'MyVersion.xml' vm = os.path.splitext(rfile)[0] + 'MyVersion.vm' xml = open(xml, 'w') vm = open(vm, 'w') jackTokenizer = JackTokenizer.jacktokenizer(rfile, xml) vmWriter = VMWriter.VMWriter(vm) symbolTable = SymbolTable.SymbolTable() compiler = CompilationEngine.compilationengine(xml, symbolTable, vmWriter) compiler.compileClass(jackTokenizer) xml.close() vmWriter.close()
class TypeChecker(NodeVisitor): def __init__(self): self.table = SymbolTable(None, 'Program') self.a = AllowedOperations() self.in_loop = 0 def visit_Program(self, node): logger.debug("Visiting program") self.visit(node.instructions_opt) def visit_InstructionsOpt(self, node): if node.instructions is not None: self.visit(node.instructions) else: logger.info("No instructions in instructions opt.") def visit_Instructions(self, node): self.visit(node.instruction) if node.instructions is not None: self.visit(node.instructions) def visit_Instruction(self, node): self.visit(node.instruction) def visit_InstructionIf(self, node): self.visit(node.condition) self.visit(node.instruction) def visit_InstructionIfElse(self, node): self.visit(node.condition) self.visit(node.then_part) self.visit(node.else_part) def visit_For(self, node): self.visit(node.iterator) self.visit(node.range_start) self.visit(node.range_end) self.in_loop += 1 self.visit(node.instruction) self.in_loop -= 1 def visit_While(self, node): self.visit(node.condition) self.in_loop += 1 self.visit(node.instruction) self.in_loop -= 1 def visit_Assign(self, node): left_symbol = self.visit(node.left) op = node.op right_symbol = self.visit(node.right) type = self.a.get_type_from_symbols(op, left_symbol, right_symbol) if type is not None: # right hand side equals to something if type is 'array': # assigning an array, check if it is legal new_dim = self.a.new_array_dimensions(op, left_symbol, right_symbol) if new_dim is not None: print("Line ", node.line, " array assignment l: ", left_symbol, "op: ", op, "r: ", right_symbol) self.table.put( left_symbol.name, ArraySymbol(left_symbol.name, right_symbol.dimensions)) else: print("Line ", node.line, "! wrong array assignment: l: ", left_symbol, "op: \"", op, "\" r: ", right_symbol) else: # assigning variable, not an array print("Line ", node.line, " assignment l: ", left_symbol, "op: ", op, "r: ", right_symbol) self.table.put( left_symbol.name, VariableSymbol(left_symbol.name, right_symbol.type)) else: # right hand side is wrong, cannot assign print("Line ", node.line, "! wrong assignment: l: ", left_symbol, "op: \"", op, "\" r: ", right_symbol) logger.debug(str(["l: ", left_symbol, "op: ", op, "r: ", right_symbol])) def visit_KeyPhrase(self, node): if (node.word == 'continue' or node.word == 'break') and not self.in_loop > 0: print("Line ", node.line, "! BREAK or CONTINUE outside a loop") else: # self.visit(node.word) if node.argument is not None: self.visit(node.argument) def visit_Condition(self, node): sym_left = self.visit(node.left) op = node.op sym_right = self.visit(node.right) new_type = self.a.get_type_from_symbols(op, sym_left, sym_right) if new_type != 'int': print(new_type, op, sym_left, sym_right) print("Wrong condition") #return VariableSymbol(None, 'int') def visit_Expressions(self, node): # self.visit(node.expression) if node.expressions is not None: self.visit(node.expressions) def visit_NumericExpression(self, node): return self.visit(node.number) def visit_IntNum(self, node): logger.debug('int: ' + str(node.value)) #print('int', node.value) return VariableSymbol(None, 'int') def visit_FloatNum(self, node): return VariableSymbol(None, 'float') def visit_LValue(self, node): return self.visit(node.value) def visit_ID(self, node): logger.debug("ID: " + node.id) #print("ID: " + node.id + " " + str(node.line)) ret = self.table.get(node.id) #print(ret) if ret is None: new_id_symbol = VariableSymbol(node.id, None) self.table.put(node.id, new_id_symbol) return new_id_symbol else: return ret def visit_Number(self, node): return self.visit(node.number) def visit_MatrixExpression(self, node): return self.visit(node.matrix) def visit_Matrix(self, node): #print("MATRIX") vector = self.visit(node.elements) # unpacking vectors to check sizes... not so great while isinstance(vector.dimensions[0], ArraySymbol): for i, dim in enumerate(vector.dimensions): vector.dimensions[i] = dim.dimensions if all(dim == vector.dimensions[0] for dim in vector.dimensions): return vector else: if vector.name is not None: print("Line ", node.line, "! wrong dimensions in ", vector.name) else: print("Line ", node.line, "! wrong dimensions in unnamed vector.", vector.dimensions) return ArraySymbol(None, None) def visit_IntNumbers(self, node): #print("INTNUMBERS") self.visit(node.number) if node.numbers is not None: self.visit(node.numbers) def visit_Matrices(self, node): #print("MATRICES") if node.matrices is not None: return ArraySymbol(None, [self.visit(node.matrix)] + self.visit(node.matrices).dimensions) return ArraySymbol(None, [self.visit(node.matrix)]) def visit_Vectors(self, node): #print("VECTORS") if node.vectors is not None: return ArraySymbol(None, [self.visit(node.vector)] + self.visit(node.vectors).dimensions) else: return ArraySymbol(None, [self.visit(node.vector)]) def visit_AllNumbers(self, node): # returns size of the vector if node.numbers is not None: return 1 + self.visit(node.numbers) return 1 def visit_BinOp(self, node): # alternative usage, # requires definition of accept method in class Node left_symbol = self.visit(node.left) # type1 = node.left.accept(self) right_symbol = self.visit( node.right) # type2 = node.right.accept(self) op = node.op return_type = self.a.get_type_from_symbols(op, left_symbol, right_symbol) if return_type is 'array': new_dim = self.a.new_array_dimensions(op, left_symbol, right_symbol) if new_dim is None: print("Line ", node.line, "! wrong array binop: ", op, " left: ", left_symbol, "right: ", right_symbol) return ArraySymbol(None, None) else: return ArraySymbol(None, new_dim) if return_type is None: print("Line ", node.line, "! wrong binop: ", op, " left: ", left_symbol, "right: ", right_symbol) #print("BINOP", return_type) return VariableSymbol(None, return_type) def visit_DotOp(self, node): left_symbol = self.visit(node.left) right_symbol = self.visit(node.right) op = node.op #op = self.visit(node.op) return_type = self.a.get_type_from_symbols(op, left_symbol, right_symbol) if return_type is 'array': new_dim = self.a.new_array_dimensions(op, left_symbol, right_symbol) if new_dim is None: print("Line ", node.line, "! wrong dotop: ", op, " left: ", left_symbol, "right: ", right_symbol) return ArraySymbol(None, None) else: return ArraySymbol(None, new_dim) if return_type is None: print("Line ", node.line, "! wrong dotop: ", op, " left: ", left_symbol, "right: ", right_symbol) # print("DotOp", return_type) return VariableSymbol(None, return_type) def visit_Transpose(self, node): # TODO change sizes? check if matrix return self.visit(node.expression) def visit_Negation(self, node): # TODO check if can be negated? return self.visit(node.expression) def visit_String(self, node): return VariableSymbol(None, 'string') def visit_Function(self, node): #print("FUN", node.function, node.argument.number.number.value) args = get_matrix_size(node.argument) #print("ARGS: ", args) return ArraySymbol(None, args) def visit_ArrayIndex(self, node): # TODO nie wiem jak i kiedy wnioskować, # na pewno nie zawsze się da... # returns type of elements in array array_symbol = self.table.get(node.id.id) if array_symbol is not None: if array_symbol.type != 'array': print("Line ", node.line, ": ", node.id.id, "! is not an array!") return VariableSymbol(None, None) else: size = [] append_int_numbers_to_list(node.numbers, size) if len(size) != len(array_symbol.dimensions): print("Line ", node.line, "! wrong dimensions in", node.id.id) return VariableSymbol(None, None) elif not all(a < b for a, b in zip(size, array_symbol.dimensions)): print("Line ", node.line, "! out of bounds in", node.id.id) return VariableSymbol(None, None) else: print("Line ", node.line, "! no such array: ", node.id.id) return VariableSymbol(None, None) return VariableSymbol(None, 'float') def get_symbol_table(self): return self.table
import SymbolTable filename = sys.argv[1] ifile = open(filename, 'r') strfile = ifile.read() instr = strfile.split('\n') i = 0 l1 = ['', '\r', '\n'] instr_str = [] for x in instr: if not x.startwith('//'): if x not in l1: instr_str.append(x.strip('\r')) address = 16 sym = SymbolTable.Constructor() for x in instr_str: if x.find('@') >= 0 or x.find('(') >= 0: symbol = Parser.symbol(x) if not SymbolTable.contains(symbol, sym) and not symbol.isdigit(): sym = SymbolTable.addEntry(symbol, address, sym) address = address + 1 while Parser.hasMoreCommands(i, instr_str): c_type = Parser.commandType(instr_str[i]) if c_type == 'A': str1 = Parser.symbol(instr_str[i]) if str1.isdigit(): str1 = bin(int(str1))[2:] address = str1.zfill(16)
def create_signalGroups(string, name = "", file = "", debug=False): func = "SignalGroups.create_signalGroups" tokens = sutils.lex(string=string, debug=debug) sytbl = STBL.SymbolTable(tokens=tokens, debug=debug) if debug: print("DEBUG: (%s): Tokens: %s "% (func, tokens)) print("DEBUG: (%s): SymbolTable: %s"%(func, sytbl)) signalGroups = {} signalGroupName = "" if not name: _start, _end = sytbl.get_next_set(0, 'curly-brackets') if _start == 1: name = KLU.References.GLOBAL elif _start == 2: name = tokens[1]["token"] else: raise ValueError("Unable to extract domain-name.") else: if debug: print("DEBUG: (%s): Received domain name: %s"%(func, name)) # Get list of single quote parings: singleQuotePairs = sytbl.get_list(category="single-quotes") i = 0; end = len(singleQuotePairs) - 1; while i <= end: signalGroupName = "" signalGroupSignals = [] _start, _end = singleQuotePairs[i] # TODO: Can place some sanity checks here: length= 1|3|5|etc. even not allowed # NOTE: Evaluate left side: extract signalGroup name if tokens[_start-1]['tag'] != '=': raise SyntaxError("Must invalid syntax: Expected '='.") if tokens[_start-2]['tag'] != 'identifier': raise SyntaxError("Must invalid syntax: Expected signal group name.") else: signalGroupName = tokens[_start-2]['token'] if debug: print("DEBUG: (%s): Found signalgroup name: %s"%(func, signalGroupName)) # NOTE: Check contents: j = _start + 1; jend = _end - 1 while j <= jend : if j == (_start + 1): # First element if tokens[j]['tag'] != 'identifier': raise SyntaxError("Expecting an identifier") entity = tokens[j]['token'] if entity in signalGroups: entity = signalGroups[entity]["signals"] if debug: print("DEBUG: (%s): Found a group name within content '%s', Expandng to '%s'"%(func, tokens[j]['token'], entity)) signalGroupSignals += entity else: signalGroupSignals.append(entity) j+=1; continue if tokens[j]['tag'] == '+': if tokens[j+1]['tag'] != 'identifier': raise SyntaxError("An identifier must follow a '+'.") entity = tokens[j+1]['token'] if entity in signalGroups: entity = signalGroups[entity]["signals"] if debug: print("DEBUG: (%s): Found a group name within content '%s', Expandng to '%s'"%(func, tokens[j]['token'], entity)) signalGroupSignals += entity else: signalGroupSignals.append(entity) j += 2; continue if tokens[j]['tag'] == '-': if tokens[j+1]['tag'] != 'identifier': raise SyntaxError("An identifier must follow a '+'.") entity = tokens[j+1]['token'] if entity in signalGroups: entity = signalGroups[entity]["signals"] if debug: print("DEBUG: (%s): Found a group name within content '%s', Expandng to '%s'"%(func, tokens[j]['token'], entity)) for substractSignal in entity: signalGroupSignals.remove(substractSignal) else: signalGroupSignals.remove(entity) j += 2; continue j+=1 # NOTE: Check right-hand side: if tokens[_end+1]['tag'] == ';': if signalGroupName == "": raise RuntimeError("SignalGroup name is not set.") if signalGroupSignals == []: raise RuntimeError("SignalGroup list is empty.") signalGroups[signalGroupName] = {"signals": signalGroupSignals, "properties" : {}} if debug: print("DEBUG: (%s): Added '%s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) elif tokens[_end+1]['tag'] == '{': #print("Continuing") # TODO: Grab next set of curly.... cs , ce = sytbl.get_next_set(_end, category='curly-brackets') j = cs + 1; jend = ce -1; while (j <= jend): if tokens[j]['tag'] == "Base": if tokens[j+1]['tag'] == "Hex" or tokens[j+1]['tag'] == "Dec": if tokens[j+2]['tag'] == 'identifier': if tokens[j+3]['tag'] == ';': if signalGroupName == "": raise RuntimeError("SignalGroup name is not set.") if signalGroupSignals == []: raise RuntimeError("SignalGroup list is empty.") if signalGroupName in signalGroups: signalGroups[signalGroupName]['properties']['Base'] = [tokens[j+1]['token'], tokens[j+2]['token']] if debug: print("DEBUG: (%s): Added 'Base' to signal %s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) else: signalGroups[signalGroupName] = {"signals": signalGroupSignals, "properties" : { "Base": [tokens[j+1]['token'], tokens[j+2]['token']] }} if debug: print("DEBUG: (%s): Added '%s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) j += 4; continue else: raise SyntaxError("Base needs to be terminated with semicolon.") else: raise SyntaxError("Base needs waveform characters.") else: raise SyntaxError("Expecting 'Hex' or 'Dec' directly after 'Base'.") if tokens[j]['tag'] == 'Alignment': if tokens[j+1]['tag'] == 'MSB' or tokens[j+1]['tag'] == 'LSB': if tokens[j+2]['tag'] == ';': if signalGroupName == "": raise RuntimeError("SignalGroup name is not set.") if signalGroupSignals == []: raise RuntimeError("SignalGroup list is empty.") if signalGroupName in signalGroups: signalGroups[signalGroupName]['properties']['Alignment'] = tokens[j+1]['token'] if debug: print("DEBUG: (%s): Added 'Alignment' to signal %s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) else: signalGroups[signalGroupName] = {"signals": signalGroupSignals, "properties" : { "Alignment": tokens[j+1] }} if debug: print("DEBUG: (%s): Added '%s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) j += 3; continue else: raise SyntaxError("Alignment must end in semicolon") else: raise SyntaxError("Alignment is only allowed 'MSB' or 'LSB'") if tokens[j]['tag'] == "ScanOut": if tokens[j+1]['tag'] == "digits": if tokens[j+2]['tag'] == ';': if signalGroupName == "": raise RuntimeError("SignalGroup name is not set.") if signalGroupSignals == []: raise RuntimeError("SignalGroup list is empty.") if signalGroupName in signalGroups: signalGroups[signalGroupName]['properties']['ScanOut'] = tokens[j+1]['token'] if debug: print("DEBUG: (%s): Added 'ScanOut' to signal %s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) else: signalGroups[signalGroupName] = {"signals": signalGroupSignals, "properties" : { "ScanOut": tokens[j+1]['token'] }} if debug: print("DEBUG: (%s): Added '%s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) j += 3; continue else: raise SyntaxError("Alignment must end in semicolon") elif tokens[j+1]['tag'] == ';': if signalGroupName == "": raise RuntimeError("SignalGroup name is not set.") if signalGroupSignals == []: raise RuntimeError("SignalGroup list is empty.") if signalGroupName in signalGroups: signalGroups[signalGroupName]['properties']['ScanOut'] = True if debug: print("DEBUG: (%s): Added 'ScanOut' to signal %s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) else: signalGroups[signalGroupName] = {"signals": signalGroupSignals, "properties" : { "ScanOut": True }} if debug: print("DEBUG: (%s): Added '%s' to signalGroups: %s"%(func, signalGroupName, signalGroups[signalGroupName])) j += 2; continue else: raise SyntaxError("Expecting ; or digit after ScanOut") j += 1 else: raise SyntaxError("Expected ';' or '{'") i += 1 if debug: for grp in signalGroups: print("DEBUG: (%s): Signal Group: %s"%(func, grp)) print("DEBUG: (%s): -> %s"%(func, signalGroups[grp])) return SignalGroups(name = name, file = file, mapping = signalGroups)
def __init__(self): self._symbolTable = SymbolTable.SymbolTable()
#!/usr/bin/python import sys import Parser import Code import SymbolTable filename = sys.argv[1] symboldict = SymbolTable.Constructor() rfile = open(filename, 'r') i = 0 linepre = rfile.readline() flag = Parser.hasMoreCommands(linepre) while flag: while linepre == '\r\n' or linepre.startswith('//'): linepre = rfile.readline() if linepre.find('(') >= 0: linepre = linepre.strip() symbol = linepre.strip('()\n') if not SymbolTable.contains(symbol, symboldict): symboldict = SymbolTable.addEntry(symbol, i, symboldict) else: i += 1 linepre = Parser.advance(rfile, linepre) flag = Parser.hasMoreCommands(linepre) rfile.close() j = 0
def ClearScope(): # Clears the local variables from SymbolTable upon competion of subprogram Display("ClearScope",_list) SymbolTable.clearScope()
class CompilationEngine: def __init__(self, inpath, outpath): self.tokenizer = Tokenizer(inpath) self.symboltable = SymbolTable() self.vmwriter = VMWriter(outpath) self._class_name = None if self.tokenizer.has_more_tokens(): self.compile_class() self.vmwriter.close() print("{0} completed.".format(outpath)) def _subroutine_init(self): self._sub_kind = None self._sub_name = None self._ret_type = None def _advance(self): self._check_EOF() self.tokenizer.advance() @property def _current_token(self): t_type = self.tokenizer.token_type return (self.tokenizer.keyword if t_type == T_KEYWORD else self.tokenizer.symbol if t_type == T_SYMBOL else self.tokenizer.identifier if t_type == T_ID else self.tokenizer .intval if t_type == T_INTEGER else self.tokenizer.stringval) @property def _current_tok_type(self): return self.tokenizer.token_type @property def _current_tok_tag(self): return token_tags[self._current_tok_type] @property def _next_token(self): """return raw next_token in the tokenizer""" return str(self.tokenizer.next_token) def _require_token(self, tok_type, token=None): """Check whether the next_token(terminal) in the tokenizer meets the requirement (specific token or just token type). If meets, tokenizer advances (update current_token and next_token) and terminal will be writed into outfile; If not, report an error.""" self._advance() if token and self._current_token != token: return self._error(expect_toks=(token, )) elif self._current_tok_type != tok_type: return self._error(expect_types=(tok_type, )) def _require_id(self): self._require_token(T_ID) def _require_kw(self, token): return self._require_token(T_KEYWORD, token=token) def _require_sym(self, token): return self._require_token(T_SYMBOL, token=token) def _require_brackets(self, brackets, procedure): front, back = brackets self._require_sym(front) procedure() self._require_sym(back) def _fol_by_class_vardec(self): return self._next_token in (KW_STATIC, KW_FIELD) def _fol_by_subroutine(self): return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _fol_by_vardec(self): return self._next_token == KW_VAR ######################### # structure compilation # ######################### def compile_class_name(self): self._require_id() self._class_name = self._current_token def compile_subroutine_name(self): self._require_id() self._sub_name = self._current_token def compile_var_name(self, kind=None, type=None, declare=False): self._require_id() name = self._current_token if declare is True: # kind and type are not None self.symboltable.define(name, type, kind) else: self.check_var_name(name, type) def check_var_name(self, name, type=None): recorded_kind = self.symboltable.kindof(name) if recorded_kind is None: self._traceback('name used before declared: {0}'.format(name)) elif type is not None: recorded_type = self.symboltable.typeof(name) if recorded_type != type: get = '{0} "{1}"'.format(recorded_type, name) self._error(expect_types=(type, ), get=get) def compile_type(self, advanced=False, expect='type'): # int, string, boolean or identifier(className) if advanced is False: self._advance() if (self._current_token not in SymbolTable.builtIn_types and self._current_tok_type != T_ID): return self._error(expect=expect) def compile_return_type(self): # void or type self._advance() if self._current_token != KW_VOID: self.compile_type(True, '"void" or type') self._ret_type = self._current_token if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name: me = 'constructor expect current class as return type' self._traceback(me) @record_non_terminal('class') def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' self._require_kw(KW_CLASS) self.compile_class_name() self._require_sym('{') while self._fol_by_class_vardec(): self.compile_class_vardec() while self._fol_by_subroutine(): self.compile_subroutine() self._advance() if self._current_token != '}': self._traceback("Except classVarDec first, subroutineDec second.") if self.tokenizer.has_more_tokens(): if self._next_token == KW_CLASS: self._traceback('Only expect one classDec.') self._traceback('Unexpected extra tokens.') def compile_declare(self): self._advance() id_kind = self._current_token # ('static | field | var') # type varName (',' varName)* ';' self.compile_type() id_type = self._current_token self.compile_var_name(id_kind, id_type, declare=True) # compile ',' or ';' self._advance() while self._current_token == ',': self.compile_var_name(id_kind, id_type, declare=True) self._advance() if self._current_token != ';': return self._error((',', ';')) @record_non_terminal('classVarDec') def compile_class_vardec(self): # ('static|field') type varName (',' varName)* ';' self.compile_declare() @record_non_terminal('subroutineDec') def compile_subroutine(self): # ('constructor'|'function'|'method') # ('void'|type) subroutineName '(' parameterList ')' subroutineBody self._subroutine_init() self.symboltable.start_subroutine() self._advance() self._sub_kind = self._current_token if self._sub_kind == KW_METHOD: self.symboltable.define('this', self._class_name, 'argument') self.compile_return_type() self.compile_subroutine_name() self._require_brackets('()', self.compile_parameter_list) self.compile_subroutine_body() @record_non_terminal('parameterList') def compile_parameter_list(self): # ((type varName) (',' type varName)*)? if self._next_token == ')': return self.compile_type() self.compile_var_name('argument', self._current_token, True) while self._next_token != ')': self._require_sym(',') self.compile_type() self.compile_var_name('argument', self._current_token, True) @record_non_terminal('subroutineBody') def compile_subroutine_body(self): # '{' varDec* statements '}' self._require_sym('{') while self._fol_by_vardec(): self.compile_vardec() self.compile_function() self.compile_statements() self._require_sym('}') def compile_function(self): fn_name = '.'.join((self._class_name, self._sub_name)) num_locals = self.symboltable.varcount(KW_VAR) self.vmwriter.write_function(fn_name, num_locals) # function fn_name num_locals # set up pointer this if self._sub_kind == KW_CONSTRUCTOR: num_fields = self.symboltable.varcount(KW_FIELD) self.vmwriter.write_push('constant', num_fields) self.vmwriter.write_call('Memory.alloc', 1) self.vmwriter.write_pop('pointer', 0) elif self._sub_kind == KW_METHOD: self.vmwriter.write_push('argument', 0) self.vmwriter.write_pop('pointer', 0) @record_non_terminal('varDec') def compile_vardec(self): # 'var' type varName (',' varName)* ';' self.compile_declare() ######################### # statement compilation # ######################### @record_non_terminal('statements') def compile_statements(self): # (letStatement | ifStatement | whileStatement | doStatement | # returnStatement)* last_statement = None while self._next_token != '}': self._advance() last_statement = self._current_token if last_statement == 'do': self.compile_do() elif last_statement == 'let': self.compile_let() elif last_statement == 'while': self.compile_while() elif last_statement == 'return': self.compile_return() elif last_statement == 'if': self.compile_if() else: return self._error(expect='statement expression') #if STACK[-2] == 'subroutineBody' and last_statement != 'return': # self._error(expect='return statement', get=last_statement) @record_non_terminal('doStatement') def compile_do(self): # 'do' subroutineCall ';' self._advance() self.compile_subroutine_call() self.vmwriter.write_pop('temp', 0) # temp[0] store useless value self._require_sym(';') @record_non_terminal('letStatement') def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' self.compile_var_name() var_name = self._current_token array = (self._next_token == '[') if array: self.compile_array_subscript( var_name) # push (array base + subscript) self._require_sym('=') self.compile_expression() # push expression value self._require_sym(';') if array: self.vmwriter.write_pop('temp', 1) # pop exp value to temp[1] self.vmwriter.write_pop('pointer', 1) # that = array base + subscript self.vmwriter.write_push('temp', 1) self.vmwriter.write_pop('that', 0) else: self.assign_variable(var_name) kind_segment = { 'static': 'static', 'field': 'this', 'argument': 'argument', 'var': 'local' } def assign_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_pop(self.kind_segment[kind], index) def load_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_push(self.kind_segment[kind], index) label_num = 0 @record_non_terminal('whileStatement') def compile_while(self): # 'while' '(' expression ')' '{' statements '}' start_label = 'WHILE_START_' + str(self.label_num) end_label = 'WHILE_END_' + str(self.label_num) self.label_num += 1 self.vmwriter.write_label(start_label) self.compile_cond_expression(start_label, end_label) @record_non_terminal('ifStatement') def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? else_label = 'IF_ELSE_' + str(self.label_num) end_label = 'IF_END_' + str(self.label_num) self.label_num += 1 self.compile_cond_expression(end_label, else_label) # else clause if self._next_token == KW_ELSE: self._require_kw(KW_ELSE) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_label(end_label) def compile_cond_expression(self, goto_label, end_label): self._require_brackets('()', self.compile_expression) self.vmwriter.write_arithmetic('not') self.vmwriter.write_if(end_label) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_goto(goto_label) # meet self.vmwriter.write_label(end_label) @record_non_terminal('returnStatement') def compile_return(self): # 'return' expression? ';' if self._sub_kind == KW_CONSTRUCTOR: self._require_kw(KW_THIS) # constructor must return 'this' self.vmwriter.write_push('pointer', 0) elif self._next_token != ';': self.compile_expression() else: if self._ret_type != KW_VOID: self._traceback('expect return ' + self._ret_type) self.vmwriter.write_push('constant', 0) self._require_sym(';') self.vmwriter.write_return() ########################## # expression compilation # ########################## unary_ops = {'-': 'neg', '~': 'not'} binary_ops = { '+': 'add', '-': 'sub', '*': None, '/': None, '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq' } @record_non_terminal('expression') def compile_expression(self): # term (op term)* self.compile_term() while self._next_token in self.binary_ops: self._advance() if self._current_tok_type != T_SYMBOL: self._error(expect_types=(T_SYMBOL, )) op = self._current_token self.compile_term() self.compile_binaryop(op) def compile_binaryop(self, op): if op == '*': self.vmwriter.write_call('Math.multiply', 2) elif op == '/': self.vmwriter.write_call('Math.divide', 2) else: self.vmwriter.write_arithmetic(self.binary_ops[op]) kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) @record_non_terminal('term') def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term if self._next_token == '(': self._require_brackets('()', self.compile_expression) else: self._advance() tok = self._current_token tok_type = self._current_tok_type if tok_type == T_KEYWORD and tok in self.kw_consts: self.compile_kw_consts(tok) elif tok_type == T_INTEGER: self.vmwriter.write_push('constant', tok) elif tok_type == T_STRING: self.compile_string(tok) elif tok_type == T_ID: if self._next_token in '(.': self.compile_subroutine_call() elif self._next_token == '[': self.check_var_name(tok) self.compile_array_subscript(tok) self.vmwriter.write_pop('pointer', 1) self.vmwriter.write_push('that', 0) else: self.check_var_name(tok) self.load_variable(tok) elif tok_type == T_SYMBOL and tok in self.unary_ops: self.compile_term() self.vmwriter.write_arithmetic(self.unary_ops[tok]) else: self._error(expect='term') # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kw_consts(self, kw): if kw == KW_THIS: self.vmwriter.write_push('pointer', 0) elif kw == KW_TRUE: self.vmwriter.write_push('constant', 1) self.vmwriter.write_arithmetic('neg') else: self.vmwriter.write_push('constant', 0) def compile_string(self, string): self.vmwriter.write_push('constant', len(string)) self.vmwriter.write_call('String.new', 1) for char in string: self.vmwriter.write_push('constant', ord(char)) self.vmwriter.write_call('String.appendChar', 2) def compile_subroutine_call(self): # subroutineName '(' expressionList ')' | # (className | varName) '.' subroutineName '(' expressionList ')' ## the first element of structure has already been compiled. fn_name, num_args = self.compile_call_name() self._require_sym('(') num_args = self.compile_expressionlist(num_args) self._require_sym(')') self.vmwriter.write_call(fn_name, num_args) def compile_call_name(self): # the fisrt name of subroutine call could be (className or varName) if # it is followed by '.', or subroutineName if followed by '('. # return name of function call and num_args (1: means pushing this, 0: # means don't) if self._current_tok_type != T_ID: self._error(expect_types=(T_ID, )) name = self._current_token if self._next_token == '.': self._require_sym('.') self.compile_subroutine_name() sub_name = self._current_token if (name in self.symboltable.all_class_types() or name in SymbolTable.builtIn_class or name == self._class_name): return '.'.join((name, sub_name)), 0 # className else: self.check_var_name(name) # varName with class type type = self.symboltable.typeof(name) if type in SymbolTable.builtIn_types: return self._error(expect='class instance or class', get=type) self.load_variable(name) return '.'.join((type, sub_name)), 1 elif self._next_token == '(': self.vmwriter.write_push('pointer', 0) # push this to be 1st arg return '.'.join((self._class_name, name)), 1 # subroutineName @record_non_terminal('expressionList') def compile_expressionlist(self, num_args): # (expression (',' expression)*)? if self._next_token != ')': self.compile_expression() num_args += 1 while self._next_token != ')': self._require_sym(',') self.compile_expression() num_args += 1 return num_args def compile_array_subscript(self, var_name): # varName '[' expression ']' self.check_var_name(var_name, 'Array') self._require_brackets( '[]', self.compile_expression) # push expression value self.load_variable(var_name) self.vmwriter.write_arithmetic('add') # base + subscript def _check_EOF(self): if not self.tokenizer.has_more_tokens(): self._traceback("Unexpected EOF.") def _error(self, expect_toks=(), expect_types=(), expect=None, get=None): if expect is None: exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks)) exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types) exp_type = ' or '.join(exp_types) if exp_tok and exp_type: expect = ' or '.join(exp_tok, exp_type) else: expect = exp_tok + exp_type if get is None: get = self._current_token me = 'Expect {0} but get "{1}"'.format(expect, get) return self._traceback(me) def _traceback(self, message): if DEBUG: print('--------------------------------------------') print(self.symboltable) print(self.symboltable.all_class_types()) print('--------------------------------------------') file_info = 'file: "{0}"'.format(self.tokenizer.filename) line_info = 'line {0}'.format(self.tokenizer.line_count) raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))
def visit_Arg(self, node): self.current_symbol_table.put( node.name, SymbolTable.VariableSymbol(node.name, node.type)) self.carried_info["funsymbol"].argtypes.append(node.type) return node.type
def GenNewContext(self): newContext = Context(self.name, self.context, self.startPos) newContext.symbTable = SymbolTable(newContext.parent.symbTable) return newContext
def CompileLet(tokens): # let statement global pos global fpw #'let' pos += 1 # variable name varName = tokens[pos] if varName in SymbolTable.subroutineTable.keys(): # local scope index = SymbolTable.indexOf(varName) # get index of this variable kind = SymbolTable.kindOf(varName) # get kind of this variable if tokens[pos+1] == '[': # variable is an array if kind == 'var': fpw.write('push local '+ str(index)+'\n') # push base address onto stack else: fpw.write('push '+kind +' '+ str(index)+'\n') pos += 2 # compile expresion in [exp] CompileExpression(tokens) fpw.write('add\n') # add these two parts, then get a target address # push expression value of on the left of '=' pos += 2 # jump off '=' CompileExpression(tokens) # assignment fpw.write('pop temp 0\n') fpw.write('pop pointer 1\n') # pop into that, now that points to the address of a[exp] # ']' fpw.write('push temp 0\n') fpw.write('pop that 0\n') else: # just an variable, simply push it pos += 1 # comes to '=' pos += 1 # comes to expression CompileExpression(tokens) if kind == 'var': fpw.write('pop local '+ str(index)+'\n') # pop x else: fpw.write('pop ' + kind+' '+str(index)+ ' \n') # pop x elif varName in SymbolTable.classTable.keys(): # global scope index = SymbolTable.indexOf(varName) # get index of this variable kind = SymbolTable.kindOf(varName) # get kind of this variable if tokens[pos+1] == '[': # variable is an array if kind == 'field': fpw.write('push this '+str(index) +' \n') else: fpw.write('push '+kind +' '+ str(index)+'\n') pos += 2 # compile expresion in [exp] CompileExpression(tokens) fpw.write('add\n') # add these two parts, then get a target address # push expression value of on the left of '=' pos += 2 # jump off '=' CompileExpression(tokens) # assignment fpw.write('pop temp 0\n') fpw.write('pop pointer 1\n') # pop into that, now that points to the address of a[exp] # ']' fpw.write('push temp 0\n') fpw.write('pop that 0\n') else: # just an variable, simply push it pos += 1 # comes to '=' pos += 1 # comes to expression CompileExpression(tokens) if kind == 'field': fpw.write('pop this '+str(index)+ ' \n') # pop field x else: fpw.write('pop '+kind +' '+str(index)+ ' \n') # pop static x pos += 1
def __init__(self,lexicalAnalysis): self.Lexical = lexicalAnalysis self.SymbolTable = SymbolTable() self.FirstPass = None self.Next = False
def Enter(symbol): #add a variable to the hash table and string space Display("Enter", _list) SymbolTable.enterSymbol(symbol)
def __init__(self, files): self.files = files self.symtable = SymbolTable()
import sys sys.path.append("../") import Compiler from SymbolTable import * from Symbol import * s = SymbolTable() i = 0 while i < 100: sym = Symbol("var" + str(i), SymbolType.VARIABLE, i % 2) s.insert(sym) i += 1 Compiler.Compiler.debugOn() s.clearLocalTable() s.dump() print s.get("var2").toString()
def symbol_phase1(expression, line_count): if "(" in expression: SymbolTable.addEntry(expression[1:-1], line_count) return SymbolTable.getAddress(expression[1:-1])
class Parser: def __init__(self): self.cmd_path = None self.IS_directory = None self.input_file_path = None # input file used to parse code self.output_file_path = None # output file where xml will be written self.current_file_read = None self.lexer = None #------------------------------------------------------------------- # iterative data structures #------------------------------------------------------------------- self.token_list = None self.ITERABLE_token_list = None self.current_token = None self.previous_token = None self.next_token = None self.token_index = -1 #------------------------------------------------------------------- # XML tree variables #------------------------------------------------------------------- self.xml_root = None self.tree_list = [] #------------------------------------------------------------------- # variables for compiling VM code #------------------------------------------------------------------- self.symbol_table = None self.VMwriter = VMwriter() def get_token_list(self): # initialise and clean up previous iterations self.token_list = None self.ITERABLE_token_list = None self.current_token = None self.previous_token = None self.next_token = None self.token_index = -1 # create and store tokens self.lexer = Lexer(self.input_file_path) self.token_list = self.lexer.get_token_types() self.ITERABLE_token_list = iter(self.token_list) self.token_PEEKING_list = self.ITERABLE_token_list return def print_iterated_tokens(self): print("| Current token:", self.current_token.ascii, " | Current token type:", self.current_token.type, "| Next token val:", self.next_token.ascii, " |") def print_all_tokens(self): for t in self.token_list: print("|", "Token val: ", t.ascii, " | ", "Token type: ", t.type, " |") def create_xml(self): """ Writes the tokens in XML format """ # compile all statements self.parse() # create tree self.tree_list.append((self.xml_root, self.output_file_path)) return def write_xml(self): from xml.dom import minidom for root in self.tree_list: pretty_tree = self.prettify(root[0]) pretty_tree = pretty_tree[22:].lstrip() # print(pretty_tree) with open(root[1], "w") as f: f.write(pretty_tree) self.VMwriter.print_VM_code() self.VMwriter.write_VM_code() return def prettify(self, elem): """Return a pretty-printed XML string for the Element """ # with great help from: https://stackoverflow.com/a/17402424 from xml.dom import minidom rough_string = ET.tostring(elem, 'utf-8') reparsed = minidom.parseString(rough_string) return reparsed.toprettyxml(indent="\t") def add_xml_element(self, parent_node): ET.SubElement(parent_node, self.current_token.type ).text = ' ' + self.current_token.ascii + ' ' return #------------------------------------------------------------------- # fetching, checking and adding xml tokens #------------------------------------------------------------------- def get_token(self): self.previous_token = self.current_token self.current_token = next(self.ITERABLE_token_list) self.token_index += 1 self.peek_next_token() # self.print_iterated_tokens() return self.current_token def peek_next_token(self): if self.token_index < len(self.token_list) - 1: self.next_token = next(self.token_PEEKING_list) self.ITERABLE_token_list = (value for g in ([self.next_token], self.ITERABLE_token_list) for value in g) return self.next_token def consume(self, tkn_val=[], tkn_type=None, parent_node=None, ast_parent=None): """ Consume a token of a given type and get the next token """ if len(tkn_val) > 0: if self.current_token.ascii in tkn_val and self.current_token.type == tkn_type: self.add_xml_element(parent_node) self.get_token() else: if self.current_token.type == tkn_type: self.add_xml_element(parent_node) self.get_token() return #------------------------------------------------------------------- # key driver: parse #------------------------------------------------------------------- def parse(self): self.get_token() # initialise key data structures self.symbol_table = SymbolTable() self.VMwriter.symbol_table = self.symbol_table # begin compiling self.compile_class() return #-------------------------------------------------------- # recursive descent: class #-------------------------------------------------------- def compile_class(self): """ 'class' className '{' classVarDec* subroutineDec* '}' """ # intialise the tree Class_node = ET.Element("class") self.xml_root = Class_node #-------------------------------------------------------- # component: 'class' self.consume(tkn_val=["class"], tkn_type="keyword", parent_node=Class_node) # component: className self.symbol_table.new_class_scope(self.current_token.ascii) self.compile_className(parent_node=Class_node) # component: '{' self.consume(tkn_val=["{"], tkn_type="symbol", parent_node=Class_node) # component: classVarDec* subroutineDec* while not (self.next_token.type == "EOF"): # component: classVarDec* self.compile_classVarDec(parent_node=Class_node) # component: subroutineDec* self.compile_subroutineDec(parent_node=Class_node) # component: '}' self.consume(tkn_val=["}"], tkn_type="symbol", parent_node=Class_node) # component: EOF #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: className #-------------------------------------------------------- def compile_className(self, parent_node): """identifier""" self.consume(tkn_val=[], tkn_type="identifier", parent_node=parent_node) # component: 'class' return #-------------------------------------------------------- # recursive descent: classVarDec #-------------------------------------------------------- def compile_classVarDec(self, parent_node): """ ('static' | 'field' ) type varName (',' varName)* ';' """ while self.current_token.ascii in ['static', 'field']: # new XML parent classVarDec_node = ET.SubElement(parent_node, "classVarDec") #-------------------------------------------------------- # component: ('static' | 'field' ) self.symbol_table.update_class_table(self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.consume(tkn_val=[], tkn_type="keyword", parent_node=classVarDec_node) # component: type self.symbol_table.update_class_table(self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.compile_type(parent_node=classVarDec_node) while ";" not in self.current_token.ascii: # component: varName self.symbol_table.update_class_table(self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.compile_varName(parent_node=classVarDec_node) # component: , self.consume(tkn_val=[","], tkn_type="symbol", parent_node=classVarDec_node) self.consume(tkn_val=[";"], tkn_type="symbol", parent_node=classVarDec_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: type #-------------------------------------------------------- def compile_type(self, parent_node): """ 'int' | 'char' | 'boolean' | className """ # component:'int' | 'char' | 'boolean' if self.current_token.ascii in ['int', 'char', 'boolean']: self.consume(tkn_val=['int', 'char', 'boolean'], tkn_type="keyword", parent_node=parent_node) # component: | className else: self.compile_className(parent_node=parent_node) return #-------------------------------------------------------- # recursive descent: varName #-------------------------------------------------------- def compile_varName(self, parent_node): """identifier""" self.consume(tkn_val=[], tkn_type="identifier", parent_node=parent_node) return #-------------------------------------------------------- # recursive descent: subroutineName #-------------------------------------------------------- def compile_subroutineName(self, parent_node): """identifier""" self.consume(tkn_val=[], tkn_type="identifier", parent_node=parent_node) return #-------------------------------------------------------- # recursive descent: subroutineDec #-------------------------------------------------------- def compile_subroutineDec(self, parent_node): """ ('constructor' | 'function' | 'method') ('void' | type) subroutineName'(' parameterList ')' subroutineBody """ #-------------------------------------------------------- f = open("permanent_variables.txt", "w+") f.write(self.current_token.ascii) # write permanent variable f = open("permanent_variables.txt", "r") subroutine_kind = f.read() # save permanent variable f = open("permanent_variables.txt", "w+") f.seek(0) f.truncate() f.close() # delete file contents and close file #-------------------------------------------------------- # new XML parent subroutineDec_node = ET.SubElement(parent_node, "subroutineDec") #-------------------------------------------------------- # component: ('constructor' | 'function' | 'method') self.consume(tkn_val=['constructor', 'function', 'method'], tkn_type="keyword", parent_node=subroutineDec_node) self.VMwriter.num_functions["if"] = -1 self.VMwriter.num_functions["while"] = -1 # component: ('void' | type) if self.current_token.ascii == "void": self.consume(tkn_val=["void"], tkn_type="keyword", parent_node=subroutineDec_node) else: self.compile_type(parent_node=subroutineDec_node) # component: subroutineName self.symbol_table.new_subroutine_scope(subroutine_kind, self.previous_token.ascii, self.current_token.ascii) self.compile_subroutineName(parent_node=subroutineDec_node) # check if list is empty (print space with closing tag if empty) if self.current_token.ascii == "(" and self.next_token.ascii == ")": # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=subroutineDec_node) # component: parameterList self.compile_parameterList(empty=True, parent_node=subroutineDec_node) # component: ')' self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=subroutineDec_node) else: # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=subroutineDec_node) # component: parameterList self.symbol_table.compiling_state = "args" self.compile_parameterList(empty=False, parent_node=subroutineDec_node) # component: ')' self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=subroutineDec_node) # component: subroutineBody self.compile_subroutineBody(parent_node=subroutineDec_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: parameterList #-------------------------------------------------------- def compile_parameterList(self, empty, parent_node): """ : ( (type varName) (',' type varName)*)? """ # new XML parent if empty: parameterList_node = ET.SubElement( parent_node, "parameterList").text = " " + "\n" if not empty: parameterList_node = ET.SubElement(parent_node, "parameterList") #-------------------------------------------------------- while not (self.current_token.ascii == ")"): # component: type self.compile_type(parent_node=parameterList_node) # component: varName self.symbol_table.update_subroutine_table( self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.compile_varName(parent_node=parameterList_node) # component: , self.consume(tkn_val=[","], tkn_type="symbol", parent_node=parameterList_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: subroutineBody #-------------------------------------------------------- def compile_subroutineBody(self, parent_node): """ '{' varDec* statements '}' """ # new XML parent subroutineBody_node = ET.SubElement(parent_node, "subroutineBody") #-------------------------------------------------------- # component: '{' self.consume(tkn_val=["{"], tkn_type="symbol", parent_node=subroutineBody_node) # component: varDec* self.compile_varDec(parent_node=subroutineBody_node) self.VMwriter.writeFunction(self.symbol_table.class_name, self.symbol_table.subroutine_name, self.symbol_table.var_count["var"]) # component: statements self.compile_statements(parent_node=subroutineBody_node) # component: '}' self.consume(tkn_val=["}"], tkn_type="symbol", parent_node=subroutineBody_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: varDec #-------------------------------------------------------- def compile_varDec(self, parent_node): """ 'var' type varName (',' varName)* ';' """ while self.current_token.ascii == "var": # new XML parent varDec_node = ET.SubElement(parent_node, "varDec") #-------------------------------------------------------- self.symbol_table.compiling_state = "vars" self.symbol_table.update_subroutine_table( self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) # component: 'var' self.consume(tkn_val=["var"], tkn_type="keyword", parent_node=varDec_node) # component: type self.symbol_table.update_subroutine_table( self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.compile_type(parent_node=varDec_node) # component: varName self.symbol_table.update_subroutine_table( self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.compile_varName(parent_node=varDec_node) while self.current_token.ascii == ',': # component: ',' self.consume(tkn_val=[","], tkn_type="symbol", parent_node=varDec_node) # component: varName self.symbol_table.update_subroutine_table( self.token_index, self.current_token.ascii, self.previous_token.ascii, self.next_token.ascii) self.compile_varName(parent_node=varDec_node) # component: ';' self.consume(tkn_val=[";"], tkn_type="symbol", parent_node=varDec_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: statements #-------------------------------------------------------- def compile_statements(self, parent_node): """ statement* """ # new XML parent statements_node = ET.SubElement(parent_node, "statements") #-------------------------------------------------------- while self.current_token.ascii in [ "let", "if", "while", "do", "return" ]: self.compile_statement(parent_node=statements_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: statement #-------------------------------------------------------- def compile_statement(self, parent_node): """ letStatement | ifStatement | whileStatement | doStatement | returnStatement """ if self.current_token.ascii == "let": self.compile_letStatement(parent_node=parent_node) elif self.current_token.ascii == "if": self.compile_ifStatement(parent_node=parent_node) elif self.current_token.ascii == "while": self.compile_whileStatement(parent_node=parent_node) elif self.current_token.ascii == "do": self.compile_doStatement(parent_node=parent_node) elif self.current_token.ascii == "return": self.compile_returnStatement(parent_node=parent_node) return #-------------------------------------------------------- # recursive descent: letStatement #-------------------------------------------------------- def compile_letStatement(self, parent_node): """ 'let' varName ('[' expression ']')? '=' expression ';' """ # new XML parent letStatement_node = ET.SubElement(parent_node, "letStatement") #-------------------------------------------------------- # component: 'let' self.consume(tkn_val=["let"], tkn_type="keyword", parent_node=letStatement_node) # component:varName ('[' expression ']')? if self.next_token.ascii == "[": token = self.token_list[self.token_index] self.compile_varName( parent_node=letStatement_node) # component: varName # component: '[' self.consume(tkn_val=["["], tkn_type="symbol", parent_node=letStatement_node) # component: expression self.compile_expression(parent_node=letStatement_node) # component: ']' self.consume(tkn_val=["]"], tkn_type="symbol", parent_node=letStatement_node) self.VMwriter.writePush(token=token) self.VMwriter.WriteArithmetic("+") Array_status = True # component: subroutineCall else: Array_status = False token = self.token_list[self.token_index] self.compile_varName(parent_node=letStatement_node) # component: '=' self.consume(tkn_val=["="], tkn_type="symbol", parent_node=letStatement_node) # component: 'expression' self.compile_expression(parent_node=letStatement_node) # component: component: ';' self.consume(tkn_val=[";"], tkn_type="symbol", parent_node=letStatement_node) if Array_status: self.VMwriter.writePop(segment="temp", index=0) # pop the expression into a temp self.VMwriter.writePop( segment="pointer", index=1) # store the topmost stack element in RAM[addr] self.VMwriter.writePush(segment="temp", index=0) self.VMwriter.writePop(segment="that", index=0) else: self.VMwriter.writePop(token=token) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: doStatement #-------------------------------------------------------- def compile_doStatement(self, parent_node): """ 'do' subroutineCall ';' """ # new XML parent doStatement_node = ET.SubElement(parent_node, "doStatement") #-------------------------------------------------------- # component: 'do' self.consume(tkn_val=["do"], tkn_type="keyword", parent_node=doStatement_node) # subroutineCall self.compile_subroutineCall(parent_node=doStatement_node) # component: ';' self.consume(tkn_val=[";"], tkn_type="symbol", parent_node=doStatement_node) #-------------------------------------------------------- self.VMwriter.writePop(segment="temp", index=0) # return a pop temp return #-------------------------------------------------------- # recursive descent: returnStatement #-------------------------------------------------------- def compile_returnStatement(self, parent_node): """ 'return' expression? ';' """ # new XML parent returnStatement_node = ET.SubElement(parent_node, "returnStatement") #-------------------------------------------------------- # component: 'return' self.consume(tkn_val=["return"], tkn_type="keyword", parent_node=returnStatement_node) # component: expression? if not self.current_token.ascii == ";": self.compile_expression(returnStatement_node) self.VMwriter.writeReturn(self.symbol_table.subroutine_type) # component: ';' self.consume(tkn_val=[";"], tkn_type="symbol", parent_node=returnStatement_node) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: ifStatement #-------------------------------------------------------- def compile_ifStatement(self, parent_node): """ 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? """ if_count = 0 # new XML parent ifStatement_node = ET.SubElement(parent_node, "ifStatement") #-------------------------------------------------------- # component: 'if' '(' expression ')' '{' statements '}' # component: 'if' self.VMwriter.num_functions["if"] += 1 self.consume(tkn_val=["if"], tkn_type="keyword", parent_node=ifStatement_node) # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=ifStatement_node) # component: expression self.compile_expression(parent_node=ifStatement_node) # component: ')' self.VMwriter.WriteIf(label="IF_TRUE", function="if") self.VMwriter.WriteGoto(label="IF_FALSE", function="if") self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=ifStatement_node) # component: '{' self.consume(tkn_val=["{"], tkn_type="symbol", parent_node=ifStatement_node) self.VMwriter.WriteLabel(label="IF_TRUE", function="if") # component: 'statements' if self.current_token.ascii == "if": if_count += 1 self.compile_statements(parent_node=ifStatement_node) # component: '}' self.consume(tkn_val=["}"], tkn_type="symbol", parent_node=ifStatement_node) self.VMwriter.WriteGoto(label="IF_END", function="if", count=if_count) self.VMwriter.WriteLabel(label="IF_FALSE", function="if", count=if_count) # component: ( 'else' '{' statements '}' )? if self.current_token.ascii == "else": # component: 'else' self.consume(tkn_val=["else"], tkn_type="keyword", parent_node=ifStatement_node) # component: '{' self.consume(tkn_val=["{"], tkn_type="symbol", parent_node=ifStatement_node) # component: 'statements' if self.current_token.ascii == "if": if_count += 1 self.compile_statements(parent_node=ifStatement_node) # component: '}' self.consume(tkn_val=["}"], tkn_type="symbol", parent_node=ifStatement_node) #-------------------------------------------------------- self.VMwriter.WriteLabel(label="IF_END", function="if", count=if_count) return if_count #-------------------------------------------------------- # recursive descent: whileStatement #-------------------------------------------------------- def compile_whileStatement(self, parent_node): """ 'while' '(' expression ')' '{' statements '}' """ while_count = 0 # new XML parent whileStatement_node = ET.SubElement(parent_node, "whileStatement") #-------------------------------------------------------- # component: 'while' self.VMwriter.num_functions["while"] += 1 self.VMwriter.WriteLabel(label="WHILE_EXP", function="while") self.consume(tkn_val=["while"], tkn_type="keyword", parent_node=whileStatement_node) # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=whileStatement_node) # component: expression self.compile_expression(whileStatement_node) # component: ')' self.VMwriter.WriteArithmetic("~") self.VMwriter.WriteIf(label="WHILE_END", function="while", count=while_count) self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=whileStatement_node) # component: '{' self.consume(tkn_val=["{"], tkn_type="symbol", parent_node=whileStatement_node) # component: 'statements' if self.current_token.ascii == "while": while_count += 1 self.compile_statements(whileStatement_node) # component: '}' self.consume(tkn_val=["}"], tkn_type="symbol", parent_node=whileStatement_node) #-------------------------------------------------------- self.VMwriter.WriteGoto(label="WHILE_EXP", function="while", count=while_count) self.VMwriter.WriteLabel(label="WHILE_END", function="while", count=while_count) return while_count #-------------------------------------------------------- # recursive descent: expression #-------------------------------------------------------- def compile_expression(self, parent_node): """ term (op term)* """ term_count = 0 neg = False # new XML parent expression_node = ET.SubElement(parent_node, "expression") #-------------------------------------------------------- if self.current_token.ascii in self.lexer.symbols.unary_op and self.previous_token.ascii in [ "=", ",", ", ", "(" ]: operator = self.current_token.ascii # component: term self.compile_term(parent_node=expression_node) if operator == "-": self.VMwriter.WriteArithmetic(operator="neg", term_count=term_count) else: self.VMwriter.WriteArithmetic(operator=operator, term_count=term_count) else: # component: term self.compile_term(parent_node=expression_node) term_count += 1 # component: term (op term)* while (self.current_token.ascii in self.lexer.symbols.operator): #-------------------------------------------------------- f = open("permanent_variables.txt", "w+") f.write(self.current_token.ascii) # write permanent variable f = open("permanent_variables.txt", "r") operator = f.read() # save permanent variable f = open("permanent_variables.txt", "w+") f.seek(0) f.truncate() f.close() # delete file contents and close file #-------------------------------------------------------- # component: component: op self.consume(tkn_val=self.lexer.symbols.operator, tkn_type="symbol", parent_node=expression_node) # component: term self.compile_term(parent_node=expression_node) term_count += 1 self.VMwriter.WriteArithmetic(operator=operator, term_count=term_count) #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: expressionList #-------------------------------------------------------- def compile_expressionList(self, empty, parent_node): """ (expression (',' expression)* )? """ expression_counter = 0 # new XML parent if empty: expressionList_node = ET.SubElement( parent_node, "expressionList").text = " " + "\n" if not empty: expressionList_node = ET.SubElement(parent_node, "expressionList") #-------------------------------------------------------- while not (self.current_token.ascii in [")"]): # component: expression self.compile_expression(parent_node=expressionList_node) expression_counter += 1 # component: , self.consume(tkn_val=[", ", ","], tkn_type="symbol", parent_node=expressionList_node) #-------------------------------------------------------- return expression_counter #-------------------------------------------------------- # recursive descent: term #-------------------------------------------------------- def compile_term(self, parent_node): """ integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term """ # new XML parent if not self.current_token.ascii in [")", "]"]: term_node = ET.SubElement(parent_node, "term") #-------------------------------------------------------- if self.current_token.type == "integerConstant": # component: integerConstant self.VMwriter.writePush(self.current_token, segment=None, index=None) return self.consume(tkn_val=[], tkn_type="integerConstant", parent_node=term_node) elif self.current_token.type == "stringConstant": # component: stringConstant self.VMwriter.writePush(self.current_token, segment=None, index=None) return self.consume(tkn_val=[], tkn_type="stringConstant", parent_node=term_node) elif self.current_token.ascii in self.lexer.symbols.keyword_constant: # component: keywordConstant self.VMwriter.writePush(self.current_token, segment=None, index=None) return self.consume(tkn_val=[], tkn_type="keyword", parent_node=term_node) elif self.current_token.type == "identifier": # component: varName '[' expression ']' if self.next_token.ascii == "[": token = self.token_list[self.token_index] self.compile_varName( parent_node=term_node) # component: varName # component: '[' self.consume(tkn_val=["["], tkn_type="symbol", parent_node=term_node) # component: expression self.compile_expression(parent_node=term_node) # component: ']' self.consume(tkn_val=["]"], tkn_type="symbol", parent_node=term_node) self.VMwriter.writePush(token=token) self.VMwriter.WriteArithmetic("+") self.VMwriter.writePop( segment="pointer", index=1) # store the topmost stack element in RAM[addr] self.VMwriter.writePush(segment="that", index=0) return # component: subroutineCall elif self.next_token.ascii == "(" or self.next_token.ascii == ".": return self.compile_subroutineCall( parent_node=term_node) # subroutineCall # component: varName else: self.VMwriter.writePush(self.current_token) return self.compile_varName(parent_node=term_node) elif self.current_token.ascii == "(": # '(' expression ')' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=term_node) # component: '(' self.compile_expression( parent_node=term_node) # component: expression self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=term_node) # component: ')' return # component: unaryOp term elif self.current_token.ascii in self.lexer.symbols.unary_op: self.consume(tkn_val=['-', '~'], tkn_type="symbol", parent_node=term_node) # component: unaryOp self.compile_term(parent_node=term_node) # component: term return #-------------------------------------------------------- return #-------------------------------------------------------- # recursive descent: subroutineCall #-------------------------------------------------------- def compile_subroutineCall(self, parent_node): """ subroutineName '(' expressionList ')' | ( className | varName) '.' subroutineName '(' expressionList ')' """ # component: subroutineName '(' expressionList ')' if self.next_token.ascii == "(": #-------------------------------------------------------- f = open("permanent_variables.txt", "w+") f.write(self.current_token.ascii) # write permanent variable f = open("permanent_variables.txt", "r") subroutineName = f.read() # save permanent variable f = open("permanent_variables.txt", "w+") f.seek(0) f.truncate() f.close() # delete file contents and close file #-------------------------------------------------------- # component: subroutineName self.compile_subroutineName(parent_node=parent_node) # check if list is empty (print space with closing tag if empty) if (self.current_token.ascii == "(") and (self.next_token.ascii == ")"): # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=parent_node) # component: expressionList nArgs = self.compile_expressionList(empty=True, parent_node=parent_node) # component: ')' self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=parent_node) self.VMwriter.writeCall(subroutineName=subroutineName, nArgs=nArgs) else: # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=parent_node) # component: expressionList nArgs = self.compile_expressionList(empty=False, parent_node=parent_node) # component: ')' self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=parent_node) self.VMwriter.writeCall(subroutineName=subroutineName, nArgs=nArgs) # component: ( className | varName) '.' subroutineName '(' expressionList ')' if self.next_token.ascii == ".": #-------------------------------------------------------- f = open("permanent_variables.txt", "w+") f.write(self.current_token.ascii) # write permanent variable f = open("permanent_variables.txt", "r") className = f.read() # save permanent variable f = open("permanent_variables.txt", "w+") f.seek(0) f.truncate() f.close() # delete file contents and close file #-------------------------------------------------------- # component: ( className | varName) self.consume(tkn_val=[], tkn_type="identifier", parent_node=parent_node) # component: '.' self.consume(tkn_val=["."], tkn_type="symbol", parent_node=parent_node) #-------------------------------------------------------- f = open("permanent_variables.txt", "w+") f.write(self.current_token.ascii) # write permanent variable f = open("permanent_variables.txt", "r") subroutineName = f.read() # save permanent variable f = open("permanent_variables.txt", "w+") f.seek(0) f.truncate() f.close() # delete file contents and close file #-------------------------------------------------------- # component: subroutineName self.compile_subroutineName(parent_node=parent_node) # check if list is empty (print space with closing tag if empty) if (self.current_token.ascii == "(") and (self.next_token.ascii == ")"): # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=parent_node) # component: expressionList nArgs = self.compile_expressionList(empty=True, parent_node=parent_node) # component: ')' self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=parent_node) self.VMwriter.writeCall(className=className, subroutineName=subroutineName, nArgs=nArgs) else: # component: '(' self.consume(tkn_val=["("], tkn_type="symbol", parent_node=parent_node) # component: expressionList nArgs = self.compile_expressionList(empty=False, parent_node=parent_node) # component: ')' self.consume(tkn_val=[")"], tkn_type="symbol", parent_node=parent_node) self.VMwriter.writeCall(className=className, subroutineName=subroutineName, nArgs=nArgs) #-------------------------------------------------------- return
def assemble(): logging.debug(sys.argv) if (len(sys.argv) != 2) or (not sys.argv[1].endswith(".asm")): raise Exception("Usage: {} src_file".format(__file__)) parser = Parser.Parser(sys.argv[1]) code = Code.Code() symbol_table = SymbolTable.SymbolTable() dst_file = open(re.sub("\.asm$", ".hack", sys.argv[1]), "w") # 1st Path ## L_Symbol PC = 0 while parser.advance(): cmd_type = parser.commandType() if cmd_type is parser.A_COMMAND: PC += 1 elif cmd_type is parser.L_COMMAND: symbol = parser.symbol() if not symbol_table.contains(symbol): symbol_table.addEntry(symbol, PC) else: raise Exception("Error: Dual Definition of l_symbol: {}".format(symbol)) elif cmd_type is parser.C_COMMAND: PC += 1 else: raise Exception("Error") parser = Parser.Parser(sys.argv[1]) # 2nd Path while parser.advance(): cmd_type = parser.commandType() logging.debug("cmd_type: {}".format(cmd_type)) if cmd_type is parser.A_COMMAND: symbol = parser.symbol() try: const_num = int(symbol) ostr = "{:016b}\n".format(const_num) logging.debug("output: {}".format(ostr)) dst_file.write(ostr) except ValueError: if symbol_table.contains(symbol): address = symbol_table.getAddress(symbol) ostr = "{:016b}\n".format(address) dst_file.write(ostr) else: # new local varialble address = symbol_table.addLocalVar(symbol) ostr = "{:016b}\n".format(address) dst_file.write(ostr) elif cmd_type is parser.L_COMMAND: pass elif cmd_type is parser.C_COMMAND: dest = parser.dest() comp = parser.comp() jump = parser.jump() logging.debug("dest: {}, comp: {}, jump: {}".format(dest, comp, jump)) ostr = "111{comp}{dest}{jump}\n".format( \ comp=code.comp(comp), dest=code.dest(dest), jump=code.jump(jump)) logging.debug("output: {}".format(ostr)) dst_file.write(ostr) else: raise Exception("Error") dst_file.close()
def __init__(self, filename): self.symbolTable = SymbolTable.SymbolTable() self.code = Code.Code() self.filename = filename self.outputFile = open(self.filename.replace('.asm', '.hack'), 'w')
class CompilationEngine: OP_DICT_BIN = { '+': 'add', '-': 'sub', '=': 'eq', '>': 'gt', '<': 'lt', '&': 'and', '|': 'or' } OP_DICT_UN = {'-': 'neg', '~': 'not'} KEYWORD_DICT = { 'true': ('constant', -1), 'false': ('constant', 0), 'null': ('constant', 0), 'this': ('pointer', 0) } VAR_DICT = { 'static': 'static', 'field': 'this', 'var': 'local', 'argument': 'argument' } def __init__(self, tokenizer, output): """Creates a new compilation engine with the given input and output. The next routine called must be compileClass(). """ self.tokenizer = tokenizer self.tags = [] self.scope = 0 self.writer = VMWriter(output) self.class_table = SymbolTable() self.method_table = SymbolTable() self.class_name = '' self.method_or_constructor = False def compile_class(self): """Compiles a complete class.""" self.tokenizer.advance() # class self.class_name = self.tokenizer.get_token() # class name self.tokenizer.advance() self.tokenizer.advance() # { while self.tokenizer.get_token() == 'static' or \ self.tokenizer.get_token() == 'field': self.compile_class_var_dec() while self.tokenizer.get_token() == 'constructor' or \ self.tokenizer.get_token() == 'function' or \ self.tokenizer.get_token() == 'method': self.compile_subroutine() self.tokenizer.advance() # } self.writer.close() self.writer.close() return def compile_class_var_dec(self): """Compiles a static declaration or a field declaration. """ kind = self.tokenizer.get_token() # var self.tokenizer.advance() var_type = self.tokenizer.get_token() # type self.tokenizer.advance() name = self.tokenizer.get_token() # var name self.class_table.define(name, var_type, kind) self.tokenizer.advance() while self.tokenizer.get_token() == ',': self.tokenizer.advance() # , name = self.tokenizer.get_token() # var name self.class_table.define(name, var_type, kind) self.tokenizer.advance() # var name self.tokenizer.advance() # ; return def compile_subroutine(self): """Compiles a complete method, function, or constructor. """ func_type = self.tokenizer.get_token() # method/function/constructor self.tokenizer.advance() self.tokenizer.advance() # type/void func_name = self.tokenizer.get_token() # subroutineName self.tokenizer.advance() # ( self.tokenizer.advance() self.method_table.start_subroutine() if func_type == 'method': self.method_or_constructor = True self.method_table.define('this', self.class_name, 'argument') self.compile_parameter_list() num_locals = 0 self.tokenizer.advance() # { while self.tokenizer.get_token() == 'var': num_locals += self.compile_var_dec() self.writer.write_function(self.class_name + '.' + func_name, num_locals) self.writer.write_push('argument', 0) self.writer.write_pop('pointer', 0) elif func_type == 'function': self.method_or_constructor = False self.compile_parameter_list() num_locals = 0 self.tokenizer.advance() # { while self.tokenizer.get_token() == 'var': num_locals += self.compile_var_dec() self.writer.write_function(self.class_name + '.' + func_name, num_locals) elif func_type == 'constructor': self.method_or_constructor = True self.compile_parameter_list() num_locals = 0 self.tokenizer.advance() # { while self.tokenizer.get_token() == 'var': num_locals += self.compile_var_dec() self.writer.write_function(self.class_name + '.' + func_name, num_locals) self.writer.write_push('constant', self.class_table.var_count('field')) self.writer.write_call('Memory.alloc', 1) self.writer.write_pop('pointer', 0) self.compile_subroutine_body() return def compile_subroutine_body(self): """Compiles the subroutine's body - the statements. """ self.compile_statements() self.tokenizer.advance() # } return def compile_parameter_list(self): """Compiles a (possibly empty) parameter list, not including the enclosing . """ if self.tokenizer.get_token() == ')': self.tokenizer.advance() # ) return 0 var_type = self.tokenizer.get_token() # type self.tokenizer.advance() name = self.tokenizer.get_token() # var name self.tokenizer.advance() num = 1 self.method_table.define(name, var_type, 'argument') while self.tokenizer.get_token() == ',': self.tokenizer.advance() # , var_type = self.tokenizer.get_token() # type self.tokenizer.advance() name = self.tokenizer.get_token() # var nam self.tokenizer.advance() num += 1 self.method_table.define(name, var_type, 'argument') self.tokenizer.advance() # ) return num def compile_var_dec(self): """Compiles a var declaration. """ var = self.tokenizer.get_token() # var self.tokenizer.advance() var_type = self.tokenizer.get_token() # type self.tokenizer.advance() name = self.tokenizer.get_token() # var name self.method_table.define(name, var_type, var) self.tokenizer.advance() num_vars = 1 while self.tokenizer.get_token() == ',': num_vars += 1 self.tokenizer.advance() # , name = self.tokenizer.get_token() # var name self.method_table.define(name, var_type, var) self.tokenizer.advance() # var name self.tokenizer.advance() # ; return num_vars def compile_statements(self): """Compiles a sequence of statements, not including the enclosing . """ current_token = self.tokenizer.get_token() while current_token == 'let' or current_token == 'if' or \ current_token == 'while' or current_token == 'do' \ or current_token == 'return': if current_token == 'let': self.compile_let() elif current_token == 'if': self.compile_if() elif current_token == 'while': self.compile_while() elif current_token == 'do': self.compile_do() elif current_token == 'return': self.compile_return() current_token = self.tokenizer.get_token() return def compile_do(self): """Compiles a do statement. """ self.tokenizer.advance() # do name = self.tokenizer.get_token() self.tokenizer.advance() # function name if self.tokenizer.get_token() == '(': name = self.class_name + '.' + name # ************************* self.writer.write_comment('//function call') self.compile_subroutine_call(name, False) elif self.tokenizer.get_token() == '.': self.writer.write_comment('//function object call') if name in self.method_table.symbol_table: self.compile_var_name(name) elif name in self.class_table.symbol_table: self.compile_var_name(name) self.compile_subroutine_call(name, True) self.tokenizer.advance() # ; self.writer.write_pop('temp', 0) return def compile_let(self): """Compiles a let statement. """ self.writer.write_comment('//let statement') self.tokenizer.advance() # let name = self.tokenizer.get_token() # var name self.tokenizer.advance() if self.tokenizer.get_token() == '[': self.compile_var_name(name) self.tokenizer.advance() # [ self.compile_expression() self.writer.write_arithmetic('add') self.tokenizer.advance() # ] self.tokenizer.advance() # = self.compile_expression() self.writer.write_pop('temp', 0) self.writer.write_pop('pointer', 1) self.writer.write_push('temp', 0) self.writer.write_pop('that', 0) else: self.tokenizer.advance() # = symbol self.compile_expression() self.compile_pop_var_name(name) self.tokenizer.advance() # ; return def compile_while(self): """Compiles a while statement. """ self.writer.write_comment('//while statement') self.tokenizer.advance() # while num_while = str(self.tokenizer.current_token) self.writer.write_label('while' + num_while) self.tokenizer.advance() # ( self.compile_expression() self.writer.write_arithmetic('not') self.tokenizer.advance() # ) end_num = str(self.tokenizer.current_token) self.writer.write_if('end' + end_num) self.tokenizer.advance() # { self.compile_statements() self.tokenizer.advance() # } self.writer.write_go_to('while' + num_while) self.writer.write_label('end' + end_num) return def compile_return(self): """Compiles a return statement. """ self.writer.write_comment('//return statement') self.tokenizer.advance() # return if self.tokenizer.get_token() != ';': self.compile_expression() else: self.writer.write_push('constant', 0) self.writer.write_return() self.tokenizer.advance() # ; return def compile_if(self): """Compiles an if statement, possibly with a trailing else clause. """ self.writer.write_comment('//if statement') self.tokenizer.advance() # if self.tokenizer.advance() # ( self.compile_expression() self.writer.write_arithmetic('not') self.tokenizer.advance() # ) else_num = str(self.tokenizer.current_token) self.writer.write_if('else' + else_num) self.tokenizer.advance() # { self.compile_statements() end_num = str(self.tokenizer.current_token) self.writer.write_go_to('end' + end_num) self.tokenizer.advance() # } self.writer.write_label('else' + else_num) if self.tokenizer.get_token() == 'else': self.tokenizer.advance() # else self.tokenizer.advance() # { self.compile_statements() self.tokenizer.advance() # } self.writer.write_label('end' + end_num) return def compile_expression(self): """Compiles an expression. """ self.compile_term() while self.tokenizer.operator(): bin_op = self.tokenizer.get_token() self.tokenizer.advance() self.compile_term() if bin_op == '*': self.writer.write_multiply() elif bin_op == '/': self.writer.write_divide() else: self.writer.write_arithmetic(self.OP_DICT_BIN[bin_op]) return def compile_term(self): """Compiles a term. """ if self.tokenizer.int_val(): self.writer.write_push('constant', self.tokenizer.get_token()) self.tokenizer.advance() elif self.tokenizer.string_val(): self.writer.write_string(self.tokenizer.get_token()) self.tokenizer.advance() elif self.tokenizer.keyword_constant(): self.compile_keyword_const() elif self.tokenizer.identifier(): name = self.tokenizer.get_token() self.tokenizer.advance() if self.tokenizer.get_token() == '[': self.tokenizer.advance() # '[' self.compile_array(name) elif self.tokenizer.get_token() == '(': self.writer.write_comment('//function call') self.compile_subroutine_call(name, False) elif self.tokenizer.get_token() == '.': self.writer.write_comment('//function object call') self.compile_subroutine_call(name, True) else: self.compile_var_name(name) elif self.tokenizer.unary_op(): unary_op = self.tokenizer.get_token() self.tokenizer.advance() self.compile_term() self.compile_un_op(unary_op) elif self.tokenizer.get_token() == '(': self.tokenizer.advance() # ( self.compile_expression() self.tokenizer.advance() # ) return def compile_expression_list(self): """Compiles a (possibly empty) comma-separated list of expressions. """ if self.tokenizer.get_token() == ')': return 0 self.compile_expression() num = 1 while self.tokenizer.get_token() == ',': self.tokenizer.advance() self.compile_expression() num += 1 return num def compile_bin_op(self, operator): """compiles a binary operator""" self.writer.write_arithmetic(self.OP_DICT_BIN[operator]) def compile_un_op(self, operator): """compiles an unary operator""" self.writer.write_arithmetic(self.OP_DICT_UN[operator]) def compile_keyword_const(self): """compiles a keyword constant""" if self.tokenizer.get_token() == 'true': self.writer.write_push('constant', 1) self.writer.write_arithmetic('neg') else: self.writer.write_push( self.KEYWORD_DICT[self.tokenizer.get_token()][0], self.KEYWORD_DICT[self.tokenizer.get_token()][1]) self.tokenizer.advance() def compile_var_name(self, name): """compile a identifier push command""" if name in self.method_table.symbol_table: self.writer.write_push( self.VAR_DICT[self.method_table.kind_of(name)], self.method_table.index_of(name)) else: self.writer.write_push( self.VAR_DICT[self.class_table.kind_of(name)], self.class_table.index_of(name)) def compile_pop_var_name(self, name): """compile a identifier pop command""" if name in self.method_table.symbol_table: self.writer.write_pop( self.VAR_DICT[self.method_table.kind_of(name)], self.method_table.index_of(name)) else: self.writer.write_pop( self.VAR_DICT[self.class_table.kind_of(name)], self.class_table.index_of(name)) def compile_array(self, name): """compiles an array operation""" self.compile_expression() self.tokenizer.advance() # ']' self.compile_var_name(name) self.writer.write_arithmetic('add') self.writer.write_pop('pointer', 1) self.writer.write_push('that', 0) def compile_subroutine_call(self, name, obj): """compiles a subroutine call. obj is true if the call command was: obj_name.func_name(), and false if the call command was func_name() """ var_type = '' if obj: if name in self.method_table.symbol_table: self.compile_var_name(name) var_type = self.method_table.type_of(name) self.tokenizer.advance() name = self.tokenizer.get_token() self.tokenizer.advance() elif name in self.class_table.symbol_table: self.compile_var_name(name) var_type = self.class_table.type_of(name) self.tokenizer.advance() name = self.tokenizer.get_token() self.tokenizer.advance() elif self.tokenizer.get_token() == '.': self.tokenizer.advance() name += '.' + self.tokenizer.get_token() self.tokenizer.advance() else: self.tokenizer.advance() if obj: self.tokenizer.advance() # ( else: self.writer.write_push('pointer', 0) num_args = self.compile_expression_list() #todo 11/27 could be problematic for the case 'do Output.printInt(1 + (2 * 3));' if var_type: num_args += 1 name = var_type + '.' + name if not obj and self.method_or_constructor: num_args += 1 self.writer.write_call(name, num_args) self.tokenizer.advance()
def ExtractRec(Record): #Record must be tuple (<token>,<name/token>) #retrieves the memory location for the symbol (variable) Display("ExtractRec", _list) return SymbolTable.getLocationString(Record)
def CompileTerm(tokens): global fpw,pos # if exp is a number if JackTokenizer.findIntConst.match((tokens[pos])): # integer constant fpw.write('push constant '+tokens[pos]+'\n') pos += 1 # exp is string constant,string constant, using os routine String.append(nextchar) elif JackTokenizer.findStrConst.match((tokens[pos])): #print('StrConst: %s' %tokens[pos]) size = len(tokens[pos])-2 fpw.write('push constant '+str(size)+'\n') fpw.write('call String.new 1\n') for ch in tokens[pos]: if ch != r'"': fpw.write('push constant '+str(ord(ch))+'\n') fpw.write('call String.appendChar 2\n') pos += 1 # sub exp, recursively compile it elif tokens[pos] == '(': pos += 1 CompileExpression(tokens) #')' pos += 1 # jump off ')' # unary term elif tokens[pos] in ['-','~']: op = tokens[pos] pos += 1 CompileTerm(tokens) if op == '-': fpw.write('neg\n') if op == '~': fpw.write('not\n') #pos += 1 # keyword constant elif JackTokenizer.findKeyword.match((tokens[pos])): if tokens[pos] == 'true': fpw.write('push constant 0\n') fpw.write('not\n') elif tokens[pos] in ['null','false']: fpw.write('push constant 0\n') elif tokens[pos] == 'this': fpw.write('push pointer 0\n') pos += 1 # an variable, simply push it elif JackTokenizer.findIdentifier.match(tokens[pos]) and (tokens[pos+1] not in ['.','[','(']): varName = tokens[pos] kind = SymbolTable.kindOf(varName) index = SymbolTable.indexOf(varName) if kind == 'var': fpw.write('push local '+ str(index)+'\n') elif kind == 'field': fpw.write('push this '+ str(index)+'\n') else: fpw.write('push ' + kind+' '+str(index)+ ' \n') pos += 1 # a[], a(), a.b(), array, subroutine call else: # array if tokens[pos+1] == '[': varName = tokens[pos] kind = SymbolTable.kindOf(varName) index = SymbolTable.indexOf(varName) # push base address onto stack if kind == 'var': fpw.write('push local '+ str(index)+'\n') elif kind == 'field': fpw.write('push this '+ str(index)+'\n') else: fpw.write('push '+kind+' '+str(index)+' \n') pos += 2 # compile expresion in [exp] CompileExpression(tokens) # add these two parts, then get a target address fpw.write('add\n') fpw.write('pop pointer 1\n') fpw.write('push that 0\n') # put the value of x[k] on to stack pos += 1 # subroutine call else: subroutineCall(tokens)
def CompileSubroutine(self): #initializiing function symbol table and helper variable self.isVoid = False self.function_sym_table = SymbolTable.SymbolTable() if not (self.tokenizer.tokenType() == Config.TType.KEYWORD and self.tokenizer.keyword() in ["constructor", "function", "method"]): raise Exception( "Syntax Error; expected \"constructor\",\"function\" or \"method\" or class instead of:" + self.tokenizer.currentToken) #identifier subroutine_type = self.tokenizer.tokenType() self.tokenizer.advance() if self.tokenizer.tokenType( ) == Config.TType.KEYWORD and self.tokenizer.keyword() in [ "int", "char", "boolean" ]: pass elif self.tokenizer.tokenType( ) == Config.TType.KEYWORD and self.tokenizer.keyword() == "void": self.isVoid = True elif self.tokenizer.tokenType() == Config.TType.IDENTIFIER: pass else: raise Exception( "Syntax Error; expected void, primitive type or class instead of:" + self.tokenizer.currentToken) self.tokenizer.advance() if not (self.tokenizer.tokenType() == Config.TType.IDENTIFIER): raise Exception("Syntax Error; expected identifier instead of:" + self.tokenizer.currentToken) tmp_name = self.tokenizer.currentToken if self.isVoid: self.void_functions.append(self.class_name + "." + tmp_name) self.tokenizer.advance() if not (self.tokenizer.tokenType() == Config.TType.SYMBOL and self.tokenizer.symbol() == "("): raise Exception("Syntax Error; expected ( instead of: " + self.tokenizer.currentToken) self.tokenizer.advance() self.CompileParameterList() if not (self.tokenizer.tokenType() == Config.TType.SYMBOL and self.tokenizer.symbol() == ")"): raise Exception("Syntax Error; expected ) instead of: " + self.tokenizer.currentToken) self.tokenizer.advance() if not (self.tokenizer.tokenType() == Config.TType.SYMBOL and self.tokenizer.symbol() == "{"): raise Exception("Syntax Error; expected { instead of: " + self.tokenizer.currentToken) self.tokenizer.advance() self.varCount = 0 while self.tokenizer.tokenType( ) == Config.TType.KEYWORD and self.tokenizer.keyword() == "var": self.CompileVarDec() if subroutine_type == "method": self.varCount = self.varCount + 1 self.out.WriteFunction(self.class_name + "." + tmp_name, self.varCount) self.out.writePop("pointer", 0) elif subroutine_type == "constructor": #TODO pass else: self.out.WriteFunction(self.class_name + "." + tmp_name, self.varCount) self.CompileStatements() if not (self.tokenizer.tokenType() == Config.TType.SYMBOL and self.tokenizer.symbol() == "}"): raise Exception("Syntax Error; expected } instead of: " + self.tokenizer.currentToken) self.tokenizer.advance()
def symbolToBinary(symbolString): if st.contains(symbolString): symbol = st.getAddress(symbolString) else: symbol = int(symbolString) return decimalToBinary(symbol, 15)
def toBinary(decNum): num = int(decNum) ans = "" while (num > 0): ans = str(num % 2) + ans num = num / 2 return ans while Parser.hasMoreCommands(): Parser.advance() if Parser.commandType() is "L_COMMAND": SymbolTable.addEntry(Parser.symbol(), Parser.current) Parser.lines.pop(Parser.current) Parser.current -= 1 Parser.current = -1 while Parser.hasMoreCommands(): Parser.advance() if Parser.commandType() is "A_COMMAND": if Parser.symbol().isdigit(): bin = toBinary(Parser.symbol()) print "0" * (16 - len(bin)) + bin else: if SymbolTable.contains(Parser.symbol()): bin = toBinary(SymbolTable.getAddress(Parser.symbol())) print "0" * (16 - len(bin)) + bin