def __init__(self, tokenizer, out_file_name): self._tokenizer = tokenizer self._vm_writer = VMWriter(out_file_name) self._class_name = None self._symbol_table = SymbolTable() self._counter = 0 self._subroutine_name = None
def p_ExprSwitchStmt(p): '''ExprSwitchStmt : SWITCH SimpleStmt SEMICOLON LCURLY ExprCaseClauseList RCURLY | SWITCH SimpleStmt SEMICOLON Expression LCURLY ExprCaseClauseList RCURLY | SWITCH LCURLY ExprCaseClauseList RCURLY | SWITCH Expression LCURLY ExprCaseClauseList RCURLY ''' global current_scope if len(p) == 6: l1 = labelGen() l2 = labelGen() p[0] = TreeNode('ExprSwitchStmt', 0, 'INT') p[0].TAC.append_TAC(p[2].TAC) t1 = tempGen() node = symboltable_node() node.name = t1 node.scope = current_scope node.value = p[2].data node.type = insertType(p[2].data) SymbolTable.add_node(node) p[0].TAC.add_line(['=', t1, p[2].data, '']) p[0].TAC.append_TAC(p[4].data) for i in range(len(p[4].children)): p[0].TAC.add_line( ['ifgotoeq', t1, p[4].children[i][0], p[4].children[i][1]]) for i in range(p[4].TAC.length()): if i in p[4].TAC.leaders[1:]: p[0].TAC.add_line(['goto', l2, '', '']) p[0].TAC.add_line(p[4].TAC.code[i]) p[0].TAC.add_line([l2]) return
def __init__(self, file): self._st = SymbolTable() self._pif = PIF() self._tokens = get_tokens() self._file_name = file self._separators = separators self._operators = operators self._reserved_words = reserved_words
def __init__(self, f): self.asm_file = f self.table = SymbolTable() self.data = self.read_file() self.parser = Parser(self.data) self.commands = self.parser.commands self.translator = Translator() self.filename = self.asm_file.split(".")[0]
def parse(self): """ Parsea un archivo entero hasta el final y devuelve los errores encontrados. """ result = "" source = self.file source = re.sub(r'\t+', '', source) # Esto quita los carácteres \t'. source = re.sub(r' {2,}', ' ', source) # Elimina el exceso de espacios. line = 1 while (len(source) > 0): # Esto se usa para ver cuáles elementos existen y cuál de los existentes # es el más cercano. len1 = -1 len2 = -1 len3 = -1 v = [] # Si existe lo añade al vector de elementos existentes. if ';' in source: len1 = source.index(';') v += [len1] if '{' in source: len2 = source.index('{') v += [len2] if '}' in source: len3 = source.index('}') v += [len3] this = min(v) # Dice el número de lÃnea según cuántos '\n' se han econtrado. if '\n' in source[:this]: line += source[:this].count('\n') if this == len1: # Si el más cercano fue un ';'. result += self.parsestatement( self.expr['nl'].sub('', source[:this].strip()), line) elif this == len2: # Si el más cercano fue un '{'. result += self.parsefunction( self.expr['nl'].sub('', source[:this].strip()), line) else: # Si el más cercano fue un '}'. self.retscope() self.table = self.table.getfather() # Elimina lo que se acaba de parsear junto con el carácter delimitador. source = source[this + 1:] # Reinicia el scope. self.table = SymbolTable() self.scope = None # Para eliminar las lÃnea repetidas. l = list(dict.fromkeys(re.sub(r'^\n', '', result).split('\n'))) res = ''.join(s + '\n' for s in l) return re.sub(r'\n$', '', res)
def evaluate_tvm_expr(expr, suppress_errors=False): """ Evaluate given tvm expr This uses the binds in SymbolTable to provide the variables with their values during evaluation Parameters ---------- expr : tvm.expr.PrimExpr expr to evaluate suppress_errors : bool Set to True to not print error traceback False to print error tracebac Returns ------- result : int or float or bool or str result of evaluation or "Runtime Error" if there was an error during evaluation """ if len(SymbolTable.binds) == 0: SymbolTable.populate() tvm_vars = SymbolTable.variables var_binds = SymbolTable.binds dtype = None if (isinstance(expr, tir.expr.ExprOp)): dtype = expr.dtype else: return expr try: shape = (1, ) c = te.compute(shape, lambda i: expr) s = te.create_schedule([c.op]) f = tvm.build(s, tvm_vars + [c]) c_tvm = tvm.nd.array(np.zeros(shape, dtype)) tvm_binds = [] for var in tvm_vars: value = var_binds[var.name] if (var.dtype == 'uint32' or var.dtype == 'int32'): tvm_binds.append(int(value)) elif (var.dtype == "float"): tvm_binds.append(float(value)) else: tvm_binds.append(value) f(*(tvm_binds + [c_tvm])) return c_tvm.asnumpy()[0] except Exception: if (not suppress_errors): traceback.print_exc() return "Runtime Exception"
def populate_table(table: SymbolTable, instructions: Iterator[str]) -> Iterator[Instruction]: """ first pass: populate symbol table with labels """ instruction_number = 0 for line in instructions: parsed = Instruction.parse(line) if isinstance(parsed, Label): table.put_label(parsed, instruction_number) else: instruction_number += 1 yield parsed
def p_SourceFile(p): '''SourceFile : PACKAGE IDENTIFIER SEMICOLON ImportDeclList TopLevelDeclList ''' # TODO: Ignoring package name and Imports for now p[0] = p[5] print("Code length: ", len(p[0].TAC.code)) p[0].TAC.print_code() print("\n\nOPTMIZATION\n\n") tac_optimized = optimize_tac.optimize_tac(SymbolTable, p[0].TAC) print("\nSYMBOL TABLE:") SymbolTable.print_symbol_table()
def __init__(self, filename): self._input = self.preprocess(filename) self._name = filename.split('/')[-1] self._name = self._name.split('.')[0] self._table = SymbolTable() self.first_pass() self._parser = Parser(self._input) self._output = [] self._code = Code() self.second_pass() self.write_output()
def translate_symbols( table: SymbolTable, instructions: Iterator[Instruction]) -> Iterator[Instruction]: """ second pass: resolve symbols """ # force incoming iterator to exhaust itself first so symbol table is properly built for instruction in list(instructions): if isinstance(instruction, Symbol): if instruction not in table: table.put_symbol(instruction) yield AInstruction(table[instruction]) else: yield instruction
def __init__(self, file): self._st = SymbolTable() self._pif = PIF() self._tokens = get_tokens() self._file_name = file self._separators = separators self._operators = operators self._reserved_words = reserved_words self._identifier_FA = FiniteAutomatan("identifier.in") self._integer_FA = FiniteAutomatan("integer.in") self._string_FA = FiniteAutomatan("string.in") self._string_FA.delta.add_transition("Q2", "Q2", " ")
def eval(self, st): new_st = SymbolTable(st) func = st.getSymbol(self.symbol) counter = 0 if (func.args is not None): while (counter < len(func.args)): symbol = func.args[counter] new_st.setSymbol(symbol, self.args[counter].eval(st)) counter += 1 return func.child.eval(new_st)
def setUp(self): self.table = SymbolTable() self.table.put(LetterGrade("A+"), 4.33) self.table.put(LetterGrade("A "), 4.00) self.table.put(LetterGrade("A-"), 3.67) self.table.put(LetterGrade("B+"), 3.33) self.table.put(LetterGrade("B "), 3.00) self.table.put(LetterGrade("B-"), 2.67) self.table.put(LetterGrade("C+"), 2.33) self.table.put(LetterGrade("C "), 2.00) self.table.put(LetterGrade("C-"), 1.67) self.table.put(LetterGrade("D "), 1.00) self.table.put(LetterGrade("F "), 0.00)
def test_check_scoping(): st = SymbolTable() for val in xrange(0, 10): st.enter_scope() st.add('a', val) for val in xrange(9, -1, -1): assert st.find('a') == val st.leave_scope()
def __init__(self, string=None): """ Inicializa cada una de las variables y agarra un string de archivo. """ self.file = string self.table = SymbolTable() self.scopestack = [] self.scope = None self.types = {'void', 'int', 'float', 'string'} self.reserved = {'if', 'while'} self.err = dict() self.generate_errors() self.expr = dict() self.generate_expressions()
def p_Expression(p): '''Expression : UnaryExpr | Expression OR_OR Expression | Expression AMP_AMP Expression | Expression EQ_EQ Expression | Expression NOT_EQ Expression | Expression LT Expression | Expression LT_EQ Expression | Expression GT Expression | Expression GT_EQ Expression | Expression PLUS Expression | Expression MINUS Expression | Expression OR Expression | Expression CARET Expression | Expression STAR Expression | Expression DIVIDE Expression | Expression MODULO Expression | Expression LS Expression | Expression RS Expression | Expression AMP Expression | Expression AND_OR Expression ''' global current_scope if len(p) == 2: p[0] = p[1] elif len(p) == 4: expression = p[1].data + p[2] + p[3].data expr_node = SymbolTable.search_expr(expression) if not expr_node: temp = tempGen() node = symboltable_node() node.name = temp node.value = p[1].data + p[2] + p[3].data node.expr = p[1].data + p[2] + p[3].data node.type = p[1].input_type node.scope = current_scope SymbolTable.add_node(node) #print(f"Evaluating expression {node.value}") node.value = evalExpr(p[1], p[2], p[3]) #SymbolTable.print_symbol_table() #print(node.value, node.name) p[0] = TreeNode('IDENTIFIER', temp, 'INT', 1, [], p[1].TAC) node.exprnode = p[0] p[0].TAC.append_TAC(p[3].TAC) p[0].TAC.add_line([p[2], p[0].data, p[1].data, p[3].data]) else: p[0] = expr_node.exprnode p[0].name = 'Expression' return
def __init__(self, tokenizer): self._name = tokenizer.get_filename().replace('.jack','') # tokenizer for input self._tokenizer = tokenizer # symbol table self._symbols = SymbolTable() # vm output fiole self._writer = VMWriter(self._name + '.vm') # Input should be a tokenized .jack file containing one class assert self._tokenizer.has_more_tokens() self._tokenizer.advance() self._class = None self._subroutine = None self._counter = 0 self.compile_class() self.close()
def eval(self, st): func = st.getSymbol(self.name) #get function definition if(len(self.argList) != len(func.argList)): #check if has same arglen raise Exception(f"{self.name} receives {len(self.argList)} arguments but {func.argList} were given") funcSt = SymbolTable(self.name, st) #create local scope for i in range(len(func.argList)): #copy args for local scope # Arg name , arg value symb = IdentSymbol(func.argList[i],self.argList[i].eval(st)) funcSt.setSymbol(symb) result = func.block.eval(funcSt) #run funcblock with local scope if(not result is None): return result.eval(funcSt)
def parse(): st = SymbolTable() Parser.analyze_program(st) if Parser.is_valid(Parser.tok.curr): utils.print_error(Parser) raise ValueError('Found remaning values after last block') return nd.FuncCall(const.MAIN, []).eval(st)
def test_st_define(): st = SymbolTable() st.define("first", "int", SymbolType.STATIC) st.define("second", "SomeClass", SymbolType.FIELD) st.define("third", "String", SymbolType.ARG) st.define("fourth", "bool", SymbolType.VAR) assert (st.classTable == { "first": ("int", SymbolType.STATIC, 0), "second": ("SomeClass", SymbolType.FIELD, 1), }) assert (st.subroutineTable == { "third": ("String", SymbolType.ARG, 0), "fourth": ("bool", SymbolType.VAR, 1), })
def main(): with open(sys.argv[1], 'r') as myfile: input_data=myfile.read().replace('\n', '') st = SymbolTable() Parser.tokens.origin = input_data result = Parser.init_parse() result.eval(st)
def main(input_path: Path, output_path: Path): """ wire up argument and file parsing to run assembler """ with open(input_path) as input_file, open(output_path, 'w') as output_file: stripped = strip_whitespace(input_file) symbol_table = SymbolTable() unlabelled = populate_table(symbol_table, stripped) for instruction in translate_symbols(symbol_table, unlabelled): print(instruction, file=output_file)
def Evaluate(self, st): new_st = SymbolTable(st) get_node = st.getter(self.value) node = get_node[0] func_void = get_node[1] if func_void == "FUNCTION": new_st.declare(node.children[0][0], node.children[0][1]) if node.children: if len(self.children) > len(node.children[1]): raise ValueError(f"Too many arguments in {self.value}") if len(self.children) < len(node.children[1]): raise ValueError(f"missing arguments in {self.value}") for i in range(len(node.children[1])): new_st.create(node.children[1][i][0], self.children[i].Evaluate(st), node.children[1][i][1]) for child in node.children[2]: child.Evaluate(new_st) if func_void == "FUNCTION": return_value = new_st.getter(self.value) # print(return_value) #if the type match if return_value[1] == node.children[0][1]: return return_value[0]
def p_id(self, p): '''id : ID''' macro = SymbolTable().lookup_symbol(p[1]) if macro: print "Replacing %s with %s" % (p[1], macro.value) p[0] = macro.value return p[0] = p[1]
def p_UnaryExpr(p): '''UnaryExpr : PrimaryExpr | unary_op UnaryExpr ''' global current_scope if len(p) == 2: p[0] = p[1] elif len(p) == 3: temp = tempGen() node = symboltable_node() node.name = temp node.value = p[2].data node.scope = current_scope SymbolTable.add_node(node) p[0] = TreeNode('IDENTIFIER', temp, 'INT', 1) p[0].TAC.add_line([p[1].data, p[0].data, p[2].data]) p[0].name = 'UnaryExpr' return
def main(debug, b_reg): """ in: bool debug, bool b register out: void """ ML = [] st = SymbolTable() if len(sys.argv) < 2: no_file_arg() input_file = sys.argv[1] name = os.path.splitext(input_file)[0] parsed = Parser(input_file, debug, b_reg) rom_address = 0 ram_address = 16 """ First pass """ while parsed.has_more_cmds(): if parsed.command_type() == "C_COMMAND" or parsed.command_type() == "A_COMMAND": rom_address += 1 elif parsed.command_type() == "L_COMMAND": st.add_entry(parsed.symbol(), rom_address) parsed.advance() parsed.reset() """ Second pass """ i = 0 while parsed.has_more_cmds(): cc = parsed.b_cc() # account for b reg command_type = parsed.command_type() if command_type == "A_COMMAND": """ Handle A commands. """ if st.contains(cc[1:]): ML.append(parsed.a_int_to_binary(st.get_address(cc[1:]))) elif parsed.cc_is_int(): ML.append(parsed.a_int_to_binary(cc)) elif not st.contains(cc[1:]): st.add_entry(cc[1:], ram_address) ML.append(parsed.a_int_to_binary(str(st.get_address(cc[1:])))) ram_address += 1 else: ML.append(parsed.c_to_binary(cc, command_type, st)) parsed.advance() i += 1 create_file(ML, name)
def compile_class(self): # create symbol table for class self.symbol_table = SymbolTable() self.add_opening_tag('class') self.increase_indent() self.write_next_token() # 'class' self.class_name = self.write_next_token() # className self.write_next_token() # { while self.tokenizer.has_more_tokens(): if self.tokenizer.look_ahead()[1] in set(['static', 'field']): self.compile_class_var_dec() elif self.tokenizer.look_ahead()[1] in set( ['function', 'constructor', 'method']): self.compile_subroutine() elif self.tokenizer.look_ahead()[1] == '}': self.write_next_token() # } # print self.symbol_table.class_symbols self.decrease_indent() self.add_closing_tag('class')
def main(): filename = sys.argv[1] output = open(filename.split('.')[0] + '.hack', 'w') firstPass = Parser(filename) symbolTable = SymbolTable() rom_address = 0 ramAddress = 16 # First pass adds L_COMMANDs and ROM addresses to symbol table while firstPass.hasMoreCommands(): firstPass.advance() command = firstPass.commandType() if command == 'A_COMMAND' or command == 'C_COMMAND': rom_address += 1 elif command == 'L_COMMAND': symbolTable.addEntry(firstPass.symbol(), rom_address) # When A_COMMAND is encountered: # if symbol is a digit write it to file # if symbol is not a digit, look it up in the symbol table. If it's there, write the address # if symbol is not a digit, look it up in the symbol table. If it is not there, add it then write the address secondPass = Parser(filename) while secondPass.hasMoreCommands(): secondPass.advance() command = secondPass.commandType() symbol = secondPass.symbol() if command == 'A_COMMAND' and symbol: if symbol.isdigit(): output.write('0' + '{0:015b}'.format(int(symbol)) + '\n') elif symbolTable.contains(symbol): symbolAddress = symbolTable.getAddress(symbol) output.write('0' + '{0:015b}'.format(int(symbolAddress)) + '\n') else: symbolTable.addEntry(symbol, ramAddress) ramAddress += 1 symbolAddress = symbolTable.getAddress(symbol) output.write('0' + '{0:015b}'.format(int(symbolAddress)) + '\n') else: dest = Code.dest(secondPass.dest()) jump = Code.jump(secondPass.jump()) comp = Code.comp(secondPass.comp()) if comp != None: output.write('111' + comp + dest + jump + '\n') output.close()
def main(): instream = InStream('misspellings.txt') lines = instream.readAllLines() misspellings = SymbolTable() for line in lines: tokens = line.split(' ') misspellings[tokens[0]] = tokens[1] while not stdio.isEmpty(): word = stdio.readString() if word in misspellings: stdio.write(word + '-->' + misspellings[word])
def main(): print("hello world") #print(format(5,'b:5.')) # print(bin(5)) # print('{0:5.b}'.format(5)) st = SymbolTable() filename = 'C:\\Users\\wuviv\\OneDrive\\Documents\\nand2tetris\\nand2tetris\projects\\06\\add\\Add.asm' #sys.argv[0] filenameout = 'test.hack' #sys.argv[1] first_pass(st, filename) second_pass(st, filename, filenameout)
def run(source): source = PrePro.filter(source) Parser.tokens = Tokenizer(source) Parser.tokens.selectNext() st = SymbolTable(None) Parser.program().Evaluate(st) if Parser.tokens.actual.tp != "EOF": raise ValueError(f"{Parser.tokens.actual.value} invalid at end of sentence")
class CompilationEngineXML: #------------------------------------------------------------------------------ # Var Declar: #------------------------------------------------------------------------------ #stores all the different key words key_class='CLASS' key_method='METHOD' key_function='FUNCTION' key_constructor='CONSTRUCTOR' key_int='INT' key_boolean='BOOLEAN' key_char='CHAR' key_void='VOID' key_var='VAR' key_static='STATIC' key_field='FIELD' key_let='LET' key_do='DO' key_if='IF' key_else='ELSE' key_while='WHILE' key_return='RETURN' key_true='TRUE' key_false='FALSE' key_null='NULL' key_this='THIS' #stores all the token types keyword='KEYWORD' sym='SYMBOL' ident='IDENTIFIER' intc='INT_CONST' string_c='STRING_CONST' #for off setting the xml attributes in the output file space = ' ' spaceCount = 0 #look up table for xml attributes xml={'classb':'<class>','classe':'</class>','classVarDecb':'<classVarDec>','classVarDece':'</classVarDec>' ,'subroutineDecb':'<subroutineDec>','subroutineDece':'</subroutineDec>','parameterListb':'<parameterList>','parameterListe':'</parameterList>' ,'subroutineBodyb':'<subroutineBody>','subroutineBodye':'</subroutineBody>','varDecb':'<varDec>','varDece':'</varDec>' ,'statementsb':'<statements>','statementse':'</statements>','letStatementb':'<letStatement>','letStatemente':'</letStatement>' ,'ifStatementb':'<ifStatement>','ifStatemente':'</ifStatement>','whileStatementb':'<whileStatement>','whileStatemente':'</whileStatement>' ,'doStatementb':'<doStatement>','doStatemente':'</doStatement>','ReturnStatementb':'<returnStatement>','ReturnStatemente':'</returnStatement>' ,'expressionb':'<expression>','expressione':'</expression>','termb':'<term>','terme':'</term>','expressionListb':'<expressionList>' ,'expressionListe':'</expressionList>','integerConstantb':'<integerConstant>','integerConstante':'</integerConstant>','StringConstantb':'<stringConstant>' ,'StringConstante':'</stringConstant>','identifierb':'<identifier>','identifiere':'</identifier>','keywordb':'<keyword>','keyworde':'</keyword>', 'symbolb':'<symbol>', 'symbole':'</symbol>'} #-------------------------------------------------------------------------- # Class declaration: #-------------------------------------------------------------------------- #------------------------------------------------------------------------------ # This is the constructor def __init__(self,infile,outfile): self.of = open(outfile,'w') self.token = JackToken(infile) self.table = SymbolTable() #------------------------------------------------------------------------------ # This method compiles the entire class contained in the input file def compileClass(self): self.of.write((self.space*self.spaceCount)+self.xml['classb']+'\n') self.spaceCount += 1 self.token.advance() while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_class in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if the keyword is static or field then it is known that it is a class var dec #at this level of compilation elif self.key_static in tempkey or self.key_field in tempkey: self.compileClassVarDec() continue #continue because there maybe more then one class var and don't want to advane tokenizer #if the keyword is a subroutine type elif self.key_constructor in tempkey or self.key_method in tempkey or self.key_function in tempkey: self.compileSubroutine() elif self.sym in tokentype: tempsym = self.token.symbol() #if we run into } at this level then we are at the end of the class if '}' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') break self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') elif self.ident in tokentype: tempident = self.token.identifier() self.currClassName = tempident self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+tempident+self.xml['identifiere']+'\n') self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['classe']) self.of.close() #------------------------------------------------------------------------------ # This method compiles class var dec def compileClassVarDec(self): self.of.write((self.space*self.spaceCount)+self.xml['classVarDecb']+'\n') self.spaceCount += 1 curtype = "" curkind = "" curname = "" while self.token.hasMoreTokens: tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey: curtype = tempkey self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') elif self.key_static in tempkey or self.key_field in tempkey: curkind = tempkey self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if we run into a subroutine declaration then we break elif self.key_function in tempkey or self.key_method in tempkey or self.key_constructor in tempkey: break elif self.ident in tokentype: tempident = self.token.identifier() if len(curtype) == 0: curtype = tempident else: curname = tempident self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+tempident+self.xml['identifiere']+'\n') elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into any of the below symboles then it is an invalid var decleration if re.search('[\(\)\{\}\[\]\.\+\-\*\/\&\<\>\=\~]{1}',tempsym) is not None: print(self.token.errorMsg()) sys.exit(0) #if we run into a ; then it is the end of this particular class var dec if ';' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.table.Define(curname,curtype,curkind) break self.table.Define(curname,curtype,curkind) curname = '' self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['classVarDece']+'\n') #------------------------------------------------------------------------------ # This method compiles the subroutines def compileSubroutine(self): self.of.write((self.space*self.spaceCount)+self.xml['subroutineDecb']+'\n') self.spaceCount += 1 self.table.startSubroutine() if_param = False #ensures that at least an empty param list is discovered isConstruct = False while self.token.hasMoreTokens: tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_method in tempkey or self.key_function in tempkey or self.key_constructor in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') isConstruct = True if self.key_constructor in tempkey else False elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey or self.key_void in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if the keyward var is in tempkey then we need to compile a vardeck elif self.key_var in tempkey: self.compileVarDec() #if it runs into any keywords that aren't caught by the above statements then it is no longer #in a subroutine else: break elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into a ( then it is descovering a parameter list if '(' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() #compiles the parameter list self.compileParameterList(isConstruct) self.of.write((self.space*self.spaceCount)+self.xml['subroutineBodyb']+'\n') self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') if_param = True #set param list discovered to true #if it has fond at lest an empty paramlist then it can print the next symboles elif if_param: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+self.token.identifier()+self.xml['identifiere']+'\n') self.token.advance() self.compileStatements() self.of.write((self.space*self.spaceCount)+self.xml['subroutineBodye']+'\n') self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['subroutineDece']+'\n') #------------------------------------------------------------------------------ # This method compiles the parameter list def compileParameterList(self,isConstruct): self.of.write((self.space*self.spaceCount)+self.xml['parameterListb']+'\n') self.spaceCount += 1 curname = '' curtype = '' curkind = '' if not isConstruct: self.table.Define('this',self.currClassName,'ARG') while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() curtype = tempkey self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') elif self.ident in tokentype: tempident = self.token.identifier() if len(curtype) == 0: curtype = tempident else: curname = tempident self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+tempident+self.xml['identifiere']+'\n') elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into a ) means the end of the parameter list so break if ')' in tempsym: self.table.Define(curname, curtype, 'ARG') break #seperation of the parameters elif ',' in tempsym: self.table.Define(curname, curtype, 'ARG') curname = '' curtype = '' self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #any other symbol results in a an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['parameterListe']+'\n') self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #advance twice because we are at ( so need to getpast that and need to get the next symbol self.token.advance() self.token.advance() #------------------------------------------------------------------------------ # This method compiles the var decliration def compileVarDec(self): self.of.write((self.space*self.spaceCount)+self.xml['varDecb']+'\n') self.spaceCount += 1 curname = '' curtype = '' while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_var in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey: curtype = tempkey self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if any keyword is docovered than what is above then the vardec is over else: break elif self.ident in tokentype: tempident = self.token.identifier() if len(curtype) == 0: curtype = tempident else: curname = tempident self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+tempident+self.xml['identifiere']+'\n') elif self.sym in tokentype: tempsym = self.token.symbol() if ',' in tempsym: self.table.Define(curname,curtype, 'VAR') curname = '' self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #once ; is found then at the end of a vardec elif ';' in tempsym: self.table.Define(curname,curtype, 'VAR') self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') break self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['varDece']+'\n') #------------------------------------------------------------------------------ # This method compiles the statements def compileStatements(self): self.of.write((self.space*self.spaceCount)+self.xml['statementsb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() #if 'let' is found then compilelet if self.key_let in tempkey: self.compileLet() elif self.key_if in tempkey: self.compileIf() #continue because we could have multiple if statements found and #the current token could be the key word if so we don't want to advance #the tokenizer prematurely continue elif self.key_while in tempkey: self.compileWhile() elif self.key_do in tempkey: self.compileDo() elif self.key_return in tempkey: self.compileReturn() #incorrect key word at this level of compilation else: print(self.token.errorMsg()) sys.exit(0) elif self.sym in tokentype: tempsym = self.token.symbol() #once we run into } thats the endof statments if '}' in tempsym: break #any other symbol discovered at this stage is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['statementse']+'\n') self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') #------------------------------------------------------------------------------ # This method compiles the do def compileDo(self): self.of.write((self.space*self.spaceCount)+self.xml['doStatementb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_do in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if any keyword other then do is discovered at this level it results in an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: #compiles the expression with the value for a subroutine call passed in being true self.compileExpression(True) #once compileexpression is done then the current token is a ; signalling the end of a dostatment self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') self.token.advance() break self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['doStatemente']+'\n') #------------------------------------------------------------------------------ # This method compiles the letStatement def compileLet(self): self.of.write((self.space*self.spaceCount)+self.xml['letStatementb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_let in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if any other keyword is discovered it is an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: tempident = self.token.identifier() self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+'\n') self.of.write((self.space*(self.spaceCount+1))+'<name>'+tempident+'</name>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<type>'+self.table.typeOf(tempident)+'</type>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<kind>'+self.table.kindOf(tempident)+'</kind>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<index>'+repr(self.table.indexOf(tempident))+'</index>'+'\n') self.of.write((self.space*self.spaceCount)+self.xml['identifiere']+'\n') elif self.sym in tokentype: tempsym = self.token.symbol() #if [ is discovered it means that it is an array access if '[' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(False) self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') self.token.advance() #continue so that the bellow error catching isn't accidently triped hence the advance command #before this continue #this means that we compile th expression on the other side of the = sign elif '=' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(False) #sets tempsym to the current symbole tempsym = self.token.symbol() #if tempsym at this point is ; then end of let statement if ';' in self.token.symbol(): self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') break #othre wise it is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['letStatemente']+'\n') #------------------------------------------------------------------------------ # This method compiles the whileStatement def compileWhile(self): self.of.write((self.space*self.spaceCount)+self.xml['whileStatementb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_while in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if any other keyword is discovered at this level it is an error else: print(self.token.errorMsg()) sys.exit(0) elif self.sym in tokentype: tempsym = self.token.symbol() #the condition of the while loop if '(' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(False) self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') #body of the while loop elif '{' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileStatements() #once the statments are compiled the whilestatment is done break #any other symbol at this level results in an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['whileStatemente']+'\n') #------------------------------------------------------------------------------ # This method compiles the ReturnStatement def compileReturn(self): self.of.write((self.space*self.spaceCount)+self.xml['ReturnStatementb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_return in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #Any other keyword means that an exprssion is to be compiled and return is done else: self.compileExpression(False) self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') self.token.advance() break #other wise compile expression elif self.ident in tokentype or self.string_c in tokentype or self.intc in tokentype: self.compileExpression(False) self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') self.token.advance() break elif self.sym in tokentype: tempsym = self.token.symbol() #denotes the end of a return statment if ';' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') break #any other symbol at this level is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['ReturnStatemente']+'\n') #------------------------------------------------------------------------------ # This method compiles the ifStatement def compileIf(self): self.of.write((self.space*self.spaceCount)+self.xml['ifStatementb']+'\n') self.spaceCount += 1 #this means that keyword if has been seen only once so if it seen again #that means it is a seperate if statment seen_once = True while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_if in tempkey and seen_once: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') elif self.key_else in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #if any other keyword is seen then it is the end of an if statement else: break elif self.sym in tokentype: tempsym = self.token.symbol() #The condition of an if statment if '(' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(False) self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') #body of an if|else statment elif '{' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileStatements() seen_once = False self.token.advance() self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['ifStatemente']+'\n') #------------------------------------------------------------------------------ # This method compiles the expression def compileExpression(self,subCall): #if it a subrountine call don't want to print out the exprssion attribute if not subCall: self.of.write((self.space*self.spaceCount)+self.xml['expressionb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.sym in tokentype: tempsym = self.token.symbol() #if it is an operator then print out the appropriate xml attribute statement if tempsym in '+-*/&|<>=': if '<' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+"<"+self.xml['symbole']+'\n') elif '>' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+">"+self.xml['symbole']+'\n') elif '&' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+"&"+self.xml['symbole']+'\n') else: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #this means that we have term to compile elif tempsym in '(~': self.compileTerm(subCall) #signifies the end of an expression elif tempsym in ';)],': break else: self.compileTerm(subCall) self.token.advance() self.spaceCount -= 1 if not subCall: self.of.write((self.space*self.spaceCount)+self.xml['expressione']+'\n') #------------------------------------------------------------------------------ # This method compiles the term def compileTerm(self,subCall): if not subCall: self.of.write((self.space*self.spaceCount)+self.xml['termb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_true in tempkey or self.key_false in tempkey or self.key_null in tempkey or self.key_this in tempkey: self.of.write((self.space*self.spaceCount)+self.xml['keywordb']+tempkey.lower()+self.xml['keyworde']+'\n') #any other keyword than the ones above results in an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: tempident = self.token.identifier() #peaks at the next token to determine the type of call peaks = self.token.peak() #means that it as a call to a var or class method if '.' in peaks: #replace this with code to do a look up self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+'\n') self.of.write((self.space*(self.spaceCount+1))+'<name>'+tempident+'</name>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<type>'+self.table.typeOf(tempident)+'</type>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<kind>'+self.table.kindOf(tempident)+'</kind>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<index>'+repr(self.table.indexOf(tempident))+'</index>'+'\n') self.of.write((self.space*self.spaceCount)+self.xml['identifiere']+'\n') self.token.advance() tempsym = self.token.symbol() self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() tempident = self.token.identifier() self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+tempident+self.xml['identifiere']+'\n') self.token.advance() tempsym = self.token.symbol() self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() #then compiles the expression list self.compileExpressionList() #this means that it is a subroutine call to one of its own methods elif '(' in peaks: self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+tempident+self.xml['identifiere']+'\n') self.token.advance() tempsym = self.token.symbol() self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpressionList() #this means that it is accessing an array element elif '[' in peaks: self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+'\n') self.of.write((self.space*(self.spaceCount+1))+'<name>'+tempident+'</name>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<type>'+self.table.typeOf(tempident)+'</type>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<kind>'+self.table.kindOf(tempident)+'</kind>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<index>'+repr(self.table.indexOf(tempident))+'</index>'+'\n') self.of.write((self.space*self.spaceCount)+self.xml['identifiere']+'\n') self.token.advance() tempsym = self.token.symbol() self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(subCall) tempsym = self.token.symbol() self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #other wise it is just an identifier else: self.of.write((self.space*self.spaceCount)+self.xml['identifierb']+'\n') self.of.write((self.space*(self.spaceCount+1))+'<name>'+tempident+'</name>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<type>'+self.table.typeOf(tempident)+'</type>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<kind>'+self.table.kindOf(tempident)+'</kind>'+'\n') self.of.write((self.space*(self.spaceCount+1))+'<index>'+repr(self.table.indexOf(tempident))+'</index>'+'\n') self.of.write((self.space*self.spaceCount)+self.xml['identifiere']+'\n') elif self.intc in tokentype: self.of.write((self.space*self.spaceCount)+self.xml['integerConstantb']+self.token.intVal()+self.xml['integerConstante']+'\n') elif self.string_c in tokentype: self.of.write((self.space*self.spaceCount)+self.xml['StringConstantb']+self.token.stringVal()+self.xml['StringConstante']+'\n') elif self.sym in tokentype: tempsym = self.token.symbol() #this means that it is and expression surrounded by () if '(' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(subCall) tempsym = self.token.symbol() self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') #not unary operator elif '~' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileTerm(subCall) #operator elif tempsym in '+-*/&|<>=': self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['terme']+'\n') if '<' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+"<"+self.xml['symbole']+'\n') elif '>' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+">"+self.xml['symbole']+'\n') elif '&' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+"&"+self.xml['symbole']+'\n') else: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') return #if the next token is ]);, means the end of a term if self.token.peak() in ']);,': break self.token.advance() self.spaceCount -= 1 if not subCall: self.of.write((self.space*self.spaceCount)+self.xml['terme']+'\n') #------------------------------------------------------------------------------ # This method compiles the expressionList def compileExpressionList(self): self.of.write((self.space*self.spaceCount)+self.xml['expressionListb']+'\n') self.spaceCount += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.sym in tokentype: tempsym = self.token.symbol() #indicates teh start of another expression if ',' in tempsym: self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+tempsym+self.xml['symbole']+'\n') self.token.advance() self.compileExpression(False) #indicates that end of expression list elif ')' in tempsym: break #other wise compile expression else: self.compileExpression(False) else: self.compileExpression(False) self.spaceCount -= 1 self.of.write((self.space*self.spaceCount)+self.xml['expressionListe']+'\n') self.of.write((self.space*self.spaceCount)+self.xml['symbolb']+self.token.symbol()+self.xml['symbole']+'\n') #-------------------End Class--------------------------------------------------
def __init__(self,infile,outfile): self.of = open(outfile,'w') self.token = JackToken(infile) self.table = SymbolTable()
class SymbolVisitor(Visitor): def __init__(self, file=stderr): self.table = SymbolTable(file=file) self.output = file @staticmethod def toTypeList(node): types = [] if node.name == 'TYPE': # could have a modifier AND a type if len(node.children) > 0: for child in node.children: if child.data is not None: types.append(child.data) else: types.append(node.data) return tuple(types) def visit(self, node): node.scopestack = tuple(self.table.getCurrentScopeStack()) if node.name == 'DECL': # DECL is the important one for processing variable instantion typelist = self.toTypeList(node.children[0]) self.table.enterSymbol(node.children[1].data, typelist) node.typelist = typelist super().visit(node) elif node.name == 'MULTI_ASSIGN' or node.name == 'MULTI': typelist = node.parent.typelist # typelist = self.toTypeList(node.children[0]) self.table.enterSymbol(node.children[0].data, typelist) node.typelist = typelist super().visit(node) elif node.name == 'CODEBLOCK': self.table.openScope() node.accept(self) self.table.closeScope() elif node.name == 'ASSIGN': # Check for const correctness # is node.data writable? syme = self.table.retrieveScope(node.children[0].data) if syme is None: # This error will be caught when the recursion to VALUE is found pass else: if 'const' in syme.symtype: # You cannot change that!!! print("Symbol %s cannot be assigned in scope %i" % (node.children[0].data, self.table.getCurrentScope()), file=self.output) self.table.errors = True node.accept(self) elif node.name == "VALUE" or node.name == "IDENTIFIER": # Check that the symbol is accessible in this scope, including const correctness if type(node.data) is str and self.table.retrieveScope(node.data) is None: print("The symbol %s is not accessible in scope %i" % (node.data, self.table.getCurrentScope()), file=self.output) self.table.errors = True else: node.accept(self) return self.table
def __init__(self, file=stderr): self.table = SymbolTable(file=file) self.output = file
def __init__(self,infile,outfile): self.writer = VMWriter(outfile) self.token = JackToken(infile) self.table = SymbolTable()
class CompilationEngine: #------------------------------------------------------------------------------ # Var Declar: #------------------------------------------------------------------------------ #stores all the different key words key_class='CLASS' key_method='METHOD' key_function='FUNCTION' key_constructor='CONSTRUCTOR' key_int='INT' key_boolean='BOOLEAN' key_char='CHAR' key_void='VOID' key_var='VAR' key_static='STATIC' key_field='FIELD' key_let='LET' key_do='DO' key_if='IF' key_else='ELSE' key_while='WHILE' key_return='RETURN' key_true='TRUE' key_false='FALSE' key_null='NULL' key_this='THIS' #stores all the token types keyword='KEYWORD' sym='SYMBOL' ident='IDENTIFIER' intc='INT_CONST' string_c='STRING_CONST' #This stores the convertions from the jack kind to the appropriate segment field segment = {'VAR':'local', 'STATIC':'static', 'FIELD':'this', 'ARG':'argument'} #Stores counters for lables of loops and if/else statments loopCounter = 0 ifCounter = 0 #-------------------------------------------------------------------------- # Class declaration: #-------------------------------------------------------------------------- #------------------------------------------------------------------------------ # This is the constructor def __init__(self,infile,outfile): self.writer = VMWriter(outfile) self.token = JackToken(infile) self.table = SymbolTable() #------------------------------------------------------------------------------ # This method compiles the entire class contained in the input file def compileClass(self): self.token.advance() while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_class in tempkey: s = "Place holder nothing to do here" #if the keyword is static or field then it is known that it is a class var dec #at this level of compilation elif self.key_static in tempkey or self.key_field in tempkey: self.compileClassVarDec() continue #continue because there maybe more then one class var and don't want to advane tokenizer #if the keyword is a subroutine type elif self.key_constructor in tempkey or self.key_method in tempkey or self.key_function in tempkey: self.compileSubroutine() elif self.sym in tokentype: tempsym = self.token.symbol() #if we run into } at this level then we are at the end of the class if '}' in tempsym: break elif self.ident in tokentype: tempident = self.token.identifier() #stores the name of the class we are in for calling methods from #with in this class and for other things as well self.currClassName = tempident self.token.advance() self.writer.close() #------------------------------------------------------------------------------ # This method compiles class var dec def compileClassVarDec(self): curtype = "" curkind = "" curname = "" while self.token.hasMoreTokens: tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey: curtype = tempkey elif self.key_static in tempkey or self.key_field in tempkey: curkind = tempkey #if we run into a subroutine declaration then we break elif self.key_function in tempkey or self.key_method in tempkey or self.key_constructor in tempkey: break elif self.ident in tokentype: tempident = self.token.identifier() #if the curtype string is empty then its type is an object if len(curtype) == 0: curtype = tempident else: curname = tempident elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into any of the below symboles then it is an invalid var decleration if re.search('[\(\)\{\}\[\]\.\+\-\*\/\&\<\>\=\~]{1}',tempsym) is not None: print(self.token.errorMsg()) sys.exit(0) #if we run into a ; then it is the end of this particular class var dec if ';' in tempsym: #want to advance past ; so the calling method can do the proper checks self.token.advance() self.table.Define(curname,curtype,curkind) break self.table.Define(curname,curtype,curkind) #clears the curname for cases like 'FIELD int haberdash, x, y' all have same #type and kind but different names curname = '' self.token.advance() #------------------------------------------------------------------------------ # This method compiles the subroutines def compileSubroutine(self): self.table.startSubroutine() self.curSubType = '' if_param = False #ensures that at least an empty param list is discovered #this is to tell other methods that the current block being read in is a constructor and to take #the appropriate actions self.isConstruct = False isFunct = False while self.token.hasMoreTokens: tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_method in tempkey or self.key_function in tempkey or self.key_constructor in tempkey: #sets isConstruct to true if the keyword is constructor or false other wise self.isConstruct = True if self.key_constructor in tempkey else False #sets isFunct to true if the keyword is function or false other wise isFunct = True if self.key_function in tempkey else False elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey or self.key_void in tempkey: self.curSubType = tempkey #if the keyward var is in tempkey then we need to compile a vardeck elif self.key_var in tempkey: self.compileVarDec() #if it runs into any keywords that aren't caught by the above statements then it is no longer #in a subroutine else: self.writer.writeFunction(self.currClassName+'.'+self.curSubName,self.table.varCount('VAR')) break elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into a ( then it is descovering a parameter list if '(' in tempsym: self.token.advance() self.compileParameterList(self.isConstruct or isFunct) if_param = True #set param list discovered to true #if it has fond at lest an empty paramlist then it can print the next symboles elif if_param: s = "this is does nothing just place holeder" #error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: #if cursubtype is empty then the return type is an object #used for compiling returns and type checking if len(self.curSubType) == 0: self.curSubType = self.token.identifier() else: self.curSubName = self.token.identifier() self.token.advance() #If this was defined as an argument then the subroutine is not a function or constructor #thus we need to set the this pointer in the subroutine to the first argument passed in if 'NONE' not in self.table.kindOf('this'): self.writer.writePush(self.segment[self.table.kindOf('this')],repr(self.table.indexOf('this'))) self.writer.writePop('pointer','0') #if it is a constructor then we need to allocate memory for the object if self.isConstruct: self.writer.writePush('constant',repr(self.table.varCount('FIELD'))) self.writer.writeCall('Memory.alloc',1) self.writer.writePop('pointer','0') #compile the body of the subroutine self.compileStatements() self.loopCounter = 0 self.ifCounter = 0 self.curSubName = '' #------------------------------------------------------------------------------ # This method compiles the parameter list def compileParameterList(self,isConstruct): curname = '' curtype = '' curkind = '' #If it isn't a constructor then we need to define this as the #first argument if not isConstruct: self.table.Define('this',self.currClassName,'ARG') while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() curtype = tempkey elif self.ident in tokentype: tempident = self.token.identifier() #if the curtype is empty then its type is an object if len(curtype) == 0: curtype = tempident else: curname = tempident elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into a ) means the end of the parameter list so break if ')' in tempsym: self.table.Define(curname, curtype, 'ARG') break #seperation of the parameters elif ',' in tempsym: self.table.Define(curname, curtype, 'ARG') curname = '' curtype = '' #any other symbol results in a an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #advance twice because we are at ( so need to getpast that and need to get the next symbol self.token.advance() self.token.advance() #------------------------------------------------------------------------------ # This method compiles the var decliration def compileVarDec(self): curname = '' curtype = '' while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_var in tempkey: s = 'Place holder does nothing just ensures that a var is seen' elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey: curtype = tempkey #if any keyword is docovered than what is above then the vardec is over else: break elif self.ident in tokentype: tempident = self.token.identifier() #if the curtype is empty then its type is an object if len(curtype) == 0: curtype = tempident else: curname = tempident elif self.sym in tokentype: tempsym = self.token.symbol() if ',' in tempsym: self.table.Define(curname,curtype, 'VAR') curname = '' #once ; is found then at the end of a vardec elif ';' in tempsym: self.table.Define(curname,curtype, 'VAR') break self.token.advance() #------------------------------------------------------------------------------ # This method compiles the statements def compileStatements(self): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() #if 'let' is found then compilelet if self.key_let in tempkey: self.compileLet() elif self.key_if in tempkey: self.compileIf() #continue because we could have multiple if statements found and #the current token could be the key word if so we don't want to advance #the tokenizer prematurely continue elif self.key_while in tempkey: self.compileWhile() elif self.key_do in tempkey: self.compileDo() elif self.key_return in tempkey: self.compileReturn() #incorrect key word at this level of compilation else: print(self.token.errorMsg()) sys.exit(0) elif self.sym in tokentype: tempsym = self.token.symbol() #once we run into } thats the endof statments if '}' in tempsym: break #any other symbol discovered at this stage is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #------------------------------------------------------------------------------ # This method compiles the do def compileDo(self): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_do in tempkey: s = 'Place holder this does nothing' #if any keyword other then do is discovered at this level it results in an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: #compiles the expression with the value for a subroutine call passed in being true self.compileExpression(True) self.token.advance() break self.token.advance() #need to pop the return value of the stack so that it doesn't interfeer #with other operations self.writer.writePop('temp','0') #------------------------------------------------------------------------------ # This method compiles the letStatement def compileLet(self): isArray = False leftSideEq = '' while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_let in tempkey: s = 'Place holder this does nothing' #if any other keyword is discovered it is an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: tempident = self.token.identifier() peak = self.token.peak() #if [ is discovered it means that it is an array access if '[' in peak: self.token.advance() self.token.advance() kind = self.table.kindOf(tempident) #if the identifiers kind is non then it is an udefined variable if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) #pushs the arrays location on to the stack self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident))) #compiles the expression for the index self.compileExpression(False) #adds the result of the expression to the base location self.writer.writeArithmetic('+') isArray = True self.token.advance() #continue so that the bellow error catching isn't accidently triped hence the advance command #before this continue else: kind = self.table.kindOf(tempident) if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) #stores the lefside idetifier if it isn't an array leftSideEq = tempident elif self.sym in tokentype: tempsym = self.token.symbol() #this means that we compile th expression on the other side of the = sign if '=' in tempsym: self.token.advance() self.compileExpression(False) #if we are setting an array location (left side of =) to the expressions result if isArray: #pop expressions result into temp 0 self.writer.writePop('temp','0') #sets that to what the left side resulted in self.writer.writePop('pointer','1') #pushs temp back on to stack and pops it to that at 0 self.writer.writePush('temp','0') self.writer.writePop('that','0') #other wise pop it to the variables location else: kind = self.table.kindOf(leftSideEq) self.writer.writePop(self.segment[kind],repr(self.table.indexOf(leftSideEq))) #sets tempsym to the current symbole tempsym = self.token.symbol() #if tempsym at this point is ; then end of let statement if ';' in self.token.symbol(): break #othre wise it is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #------------------------------------------------------------------------------ # This method compiles the whileStatement def compileWhile(self): #lables for the begenning and the exit of a loop curLoop = self.curSubName+'.loop.'+repr(self.loopCounter) curLoopExit = curLoop+'.EXIT' #incremets loop counter so that all loop for this subroutine will have #unique exit and begin label self.loopCounter += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_while in tempkey: self.writer.writeLabel(curLoop) #if any other keyword is discovered at this level it is an error else: print(self.token.errorMsg()) sys.exit(0) elif self.sym in tokentype: tempsym = self.token.symbol() #the condition of the while loop if '(' in tempsym: self.token.advance() self.compileExpression(False) #not the result of the exprssion that if the expression #is false we jump the loops exit self.writer.writeArithmetic('~') self.writer.writeIf(curLoopExit) #body of the while loop elif '{' in tempsym: self.token.advance() self.compileStatements() #bottom of loop need to go back to the top self.writer.writeGoto(curLoop) #once the statments are compiled the whilestatment is done break #any other symbol at this level results in an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.writer.writeLabel(curLoopExit) #------------------------------------------------------------------------------ # This method compiles the ReturnStatement def compileReturn(self): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_return in tempkey: s = "Place holder does nothing" #Any other keyword means that an exprssion is to be compiled and return is done else: self.compileExpression(False) self.token.advance() break #other wise compile expression elif self.ident in tokentype or self.string_c in tokentype or self.intc in tokentype: self.compileExpression(False) self.token.advance() break elif self.sym in tokentype: tempsym = self.token.symbol() #denotes the end of a return statment if ';' in tempsym: #if the current subroutines type is the same as the class #then it is a constructor and needs to return the this pointer if self.curSubType == self.currClassName: self.writer.writePush('pointer','0') #if we reach this point and void is not the subroutines type #then the user must need to return a value elif self.key_void not in self.curSubType: print(self.token.errorMsg()+'must return something\n') sys.exit(0) #if void is the subroutines type return 0 else: self.writer.writePush('constant','0') break #any other symbol at this level is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.writer.writeReturn() #------------------------------------------------------------------------------ # This method compiles the ifStatement def compileIf(self): #labels for the else part of if and the exit of both if and else statents currIf = self.curSubName+'.else.'+repr(self.ifCounter) currIfExit = self.curSubName+'.if.'+repr(self.ifCounter)+'.EXIT' #ensurest that all future if|else blocks have unique labels for this #subroutine self.ifCounter += 1 ifElse = False #this means that keyword if has been seen only once so if it seen again #that means it is a seperate if statment seen_once = True while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_if in tempkey and seen_once: s = 'Place holeder does nothing' elif self.key_else in tempkey and not ifElse: ifElse = True #write the jump to the exit of the if/else block self.writer.writeGoto(currIfExit) #Else part of the block self.writer.writeLabel(currIf) #if any other keyword is seen then it is the end of an if statement else: break elif self.sym in tokentype: tempsym = self.token.symbol() #The condition of an if statment if '(' in tempsym: self.token.advance() self.compileExpression(False) self.writer.writeArithmetic('~') self.writer.writeIf(currIf) #body of an if|else statment elif '{' in tempsym: self.token.advance() self.compileStatements() seen_once = False #if part of an if else block then break if ifElse: self.token.advance() break #just incase this catches } which means that its #the end of an if else block that isn't this one elif '}' in tempsym: break self.token.advance() #if an if/else block write the exit label if ifElse: self.writer.writeLabel(currIfExit) else: self.writer.writeLabel(currIf) #------------------------------------------------------------------------------ # This method compiles the expression # @param: if this is part of an enclosed statment meanig args to another sub # routine def compileExpression(self,enclosed): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.sym in tokentype: tempsym = self.token.symbol() #this means that we have term to compile with a potential unary op if tempsym in '(~-': self.compileTerm(enclosed,True,False,'') #signifies the end of an expression elif tempsym in ';)],': break else: self.compileTerm(enclosed,False,False,'') self.token.advance() #------------------------------------------------------------------------------ # This method compiles the term # @param: if argument or array expression # @param: if the term contains a unary operator # @param: if the method was recursively called # @param: the previous sumbol if recursively called def compileTerm(self,enclosed,isUnary,callfromTerm,prevSym): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_true in tempkey: #pushes -1 onto the stack self.writer.writePush('constant','1') self.writer.writeArithmetic('NEG') elif self.key_false in tempkey: self.writer.writePush('constant','0') elif self.key_null in tempkey: self.writer.writePush('constant','0') elif self.key_this in tempkey: self.writer.writePush('pointer','0') #any other keyword than the ones above results in an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: tempident = self.token.identifier() #peaks at the next token to determine the type of call peaks = self.token.peak() #means that it as a call to a var or class method if '.' in peaks: callName = '' numArgs = 0 typeof = self.table.typeOf(tempident) #if the type is none then we are calling a function or constructor not a method if 'NONE' in typeof: callName = tempident else: callName = typeof numArgs += 1 #push the objects location value as the first argument self.writer.writePush(self.segment[self.table.kindOf(tempident)],repr(self.table.indexOf(tempident))) self.token.advance() callName += self.token.symbol() self.token.advance() #checks to see if the next token is an identifier if not error if self.ident in self.token.tokenType(): callName += self.token.identifier() else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #if the token type is not a symbol then error if self.sym not in self.token.tokenType(): print(self.token.errorMsg()) sys.exit() self.token.advance() #then compiles the expression list and gets the number of arguments numArgs += self.compileExpressionList() self.writer.writeCall(callName,numArgs) #this means that it is a subroutine call to one of its own methods elif '(' in peaks: #calling one of its own methods so push this pointer onto the stack as the first argument #to the function self.writer.writePush('pointer','0') self.token.advance() self.token.advance() #gets the number of arguments from the expression list and adds 1 for the this pointer pushed #on earlier numArgs = self.compileExpressionList()+1 self.writer.writeCall(self.currClassName+'.'+tempident,numArgs if numArgs != 0 else 1) #this means that it is accessing an array element elif '[' in peaks: self.token.advance() self.token.advance() kind = self.table.kindOf(tempident) #if the kind of the identifier is none then it wasn't defined if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) #push base location of the array onto the stack self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident))) #calc offset self.compileExpression(enclosed) #add offset to base self.writer.writeArithmetic('+') #set that to the new value self.writer.writePop('pointer','1') #get the value at the offset self.writer.writePush('that','0') #other wise it is just an identifier else: kind = self.table.kindOf(tempident) if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident))) elif self.intc in tokentype: self.writer.writePush('constant',self.token.intVal()) elif self.string_c in tokentype: string = self.token.stringVal() #creates a new string of the appropriate length self.writer.writePush('constant', repr(len(string))) self.writer.writeCall('String.new',1) #appends each new character to the string for c in string: self.writer.writePush('constant',repr(ord(c))) self.writer.writeCall('String.appendChar',2) elif self.sym in tokentype: tempsym = self.token.symbol() #this means that it is and expression surrounded by () if '(' in tempsym: self.token.advance() self.compileExpression(True) enclosed = True #not unary operator elif '~' in tempsym: self.token.advance() self.compileTerm(enclosed,False,False,prevSym) self.writer.writeArithmetic(tempsym) elif '-' in tempsym and isUnary and not enclosed: self.token.advance() self.compileTerm(enclosed,False,False,prevSym) self.writer.writeArithmetic('NEG') #operator elif tempsym in '+-*/&|<>=': self.token.advance() #if this was recursivelly called then need to print symble #of previous call ensures that the correct values on the stack #are used if callfromTerm: if '*' in prevSym: self.writer.writeCall('Math.multiply',2) elif '/' in prevSym: self.writer.writeCall('Math.divide',2) else: self.writer.writeArithmetic(prevSym) what = self.compileTerm(enclosed,False,True,tempsym) #if the return value is true and is the end of the expression if what and self.token.peak() in ']);,': if '*' in tempsym: self.writer.writeCall('Math.multiply',2) elif '/' in tempsym: self.writer.writeCall('Math.divide',2) else: self.writer.writeArithmetic(tempsym) #return false becuase we don't want to write anything #more from this block return False #if what is false and at the end of the expression #return false elif not what and self.token.peak() in ']);,': return False #if the next token is ]);, means the end of a term if self.token.peak() in ']);,': break self.token.advance() return True #------------------------------------------------------------------------------ # This method compiles the expressionList def compileExpressionList(self): expressCount = 0 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.sym in tokentype: tempsym = self.token.symbol() #indicates teh start of another expression if ',' in tempsym: self.token.advance() self.compileExpression(False) expressCount += 1 #indicates that end of expression list elif ')' in tempsym: break else: self.compileExpression(False) expressCount += 1 else: self.compileExpression(False) expressCount += 1 return expressCount #-------------------End Class--------------------------------------------------
print('incorrect file name!') sys.exit(0) #If user passed in option to allow extra functionality if len(sys.argv) == 3: temp_op=sys.argv[2] if '-x' in temp_op: extended_op=True #Strips .asm of the end and adds .hack temp_out=re.search('(.*)(\.asm)',in_file) out_file+=temp_out.group(1)+'.hack' par = Parser(in_file,extended_op) code = Code() symT = SymbolTable() #------------------------------------------------------------------------------ # Main #------------------------------------------------------------------------------ #Builds the symbol table while par.hasMoreCommands(): par.advance() cType = par.commandType() #if the command type is a label if re.search('L_COMMAND',cType) is not None: temp_L = re.search('(.*)(;)(.*)',par.symbol()) symT.addEntry(temp_L.group(1),int(temp_L.group(3)),False) #if it is an adddress type command