def compile(): fileDest = open(tokenizerDestFilePath, "w") tokens = JackTokenizer(filePath) lines = [] fileDest.write("<tokens>\n") while (tokens.hasMoreTokens()): tokens.advance() curType = tokens.tokenType() if curType == "stringConstant": curToken = tokens.stringVal() elif curType == "symbol": curToken = tokens.symbol() else: curToken = tokens.getCurrentToken() toWrite = "<" + curType + ">" + " " + curToken + " " + "</" + curType + ">\n" lines.append(toWrite) fileDest.write(toWrite) fileDest.write("</tokens>") fileDest.close() finalDestFile = open(finalPath, "w") engine = CompilationEngine(lines, finalDestFile) engine.CompileClass() finalDestFile.close()
def create_token_file(jack_file_name): token_file_name = jack_file_name.replace('.jack', 'T.xml') token_file = open(token_file_name, 'w') jack_file = open(jack_file_name, 'rU') tokenizer = JackTokenizer(jack_file) token_file.write('<tokens>\n') while tokenizer.hasMoreTokens(): tokenizer.advance() if tokenizer.tokenType() is 'KEYWORD': token_file.write('<keyword> {} </keyword>\n'.format(tokenizer.keyWord().lower())) elif tokenizer.tokenType() is 'SYMBOL': symbol = tokenizer.symbol() if symbol in ['<', '>', '&']: symbol = Main.XML_CONVSERSIONS[symbol] token_file.write('<symbol> {} </symbol>\n'.format(symbol)) elif tokenizer.tokenType() is 'IDENTIFIER': token_file.write('<identifier> {} </identifier>\n'.format(tokenizer.identifier())) elif tokenizer.tokenType() is 'INT_CONST': token_file.write('<integerConstant> {} </integerConstant>\n'.format(tokenizer.intVal())) elif tokenizer.tokenType() is 'STRING_CONST': token_file.write('<stringConstant> {} </stringConstant>\n'.format(tokenizer.stringVal())) token_file.write('</tokens>\n') token_file.close() return token_file_name
def main(): if len(sys.argv) != 2: print("Usage: python3 JackAnalyzer.py path/file.jack\nor...\ \nUsage: python3 JackAnalyzer.py path/dir") sys.exit(1) #check path is valid #and return list of in_path(s) in_f_paths = check_path_type() #Generate Tokens... token_xml_files = [] for fp in in_f_paths: #setup outpath: token_fp = change_fp_name(fp, ".jack", "Tokens.xml") token_xml_files.append(token_fp) #use this later with open(token_fp, 'w') as f: f.write("<tokens>\n") #create Tokenizer tokenizer = JackTokenizer(fp) while tokenizer.hasMoreTokens(): crnt_tkn, type = tokenizer.advance() if crnt_tkn: out_string = "<{}> {} </{}>\n".format(type, crnt_tkn, type) f.write(out_string) f.write("</tokens>\n") #Create compiler: for fp in token_xml_files: out_fp = change_fp_name(fp, "Tokens.xml", "new.xml") Compiler(fp, out_fp)
def writeTokenizerFile(self, inputFile, inputDirName): from JackTokenizer import JackTokenizer import os outputFileName = os.path.join(inputDirName, "output", os.path.splitext(os.path.basename(inputFile.name))[0] + ".xml") if(not os.path.exists(os.path.dirname(outputFileName))): os.makedirs(os.path.dirname(outputFileName)) outputFile = open(outputFileName, 'w') outputFile.write("<tokens>\n") tokenizer = JackTokenizer(inputFile) while(tokenizer.hasMoreTokens()): tokenizer.advance() if tokenizer.tokenType == JackTokenizer.KEYWORD: outputFile.write("\t<keyword>" + tokenizer.currentToken + "</keyword>\n") elif tokenizer.tokenType == JackTokenizer.SYMBOL: outputFile.write("\t<symbol>") if tokenizer.currentToken == "&": outputFile.write("&") elif tokenizer.currentToken == "<": outputFile.write("<") elif tokenizer.currentToken == ">": outputFile.write(">") elif tokenizer.currentToken == "\"": outputFile.write(""") else: outputFile.write(tokenizer.currentToken) outputFile.write("</symbol>\n") elif tokenizer.tokenType == JackTokenizer.IDENTIFIER: outputFile.write("\t<identifier>" + tokenizer.currentToken + "</identifier>\n") elif tokenizer.tokenType == JackTokenizer.INT_CONST: outputFile.write("\t<integerConstant>" + tokenizer.currentToken + "</integerConstant>\n") elif tokenizer.tokenType == JackTokenizer.STRING_CONST: outputFile.write("\t<stringConstant>" + tokenizer.currentToken + "</stringConstant>\n") outputFile.write("</tokens>\n")
def tokenizerTest(): tk = JackTokenizer(infile) fout = open(outfile, 'w') while tk.hasMoreTokens(): tk.advance() typestr = tk.typestr[tk.tokenType()] fout.write('<' + typestr + '> ') fout.write(tk.getToken()) fout.write(' </' + typestr + '>\n') fout.close()
def create_token(self, path): token_file_name = path.replace('.jack', '.token.xml') token_file = open(token_file_name, 'w') tokenizer = JackTokenizer(path) token_file.write('<tokens>\n') while tokenizer.hasMoreTokens(): tokenizer.advance() token_file.write(self.xml_token(tokenizer.current_token)) token_file.write('</tokens>\n') token_file.close() return token_file_name
def compileOneFile(self): if len(self.file_list) == 0: print("No more file to be compiled!") return False input_file_name = self.file_list.pop() output_file = open( self.output_dir + input_file_name.split("/")[-1].split(".")[0] + ".xml", "w") tokenizer = JackTokenizer(input_file_name) compeng = CompilationEngine(tokenizer, output_file) while tokenizer.hasMoreTokens(): tokenizer.advance() compeng.compileClass() output_file.close() print("done: " + input_file_name)
def getTokens(): tokens = JackTokenizer(filePath) # tokenFile = open(tokenFilePath, "w") while (tokens.hasMoreTokens()): tokens.advance() curType = tokens.tokenType() if curType == "stringConstant": curToken = tokens.stringVal() elif curType == "symbol": curToken = tokens.symbol() else: curToken = tokens.getCurrentToken() currentLine = "<" + curType + "> " + curToken + " </" + curType + ">\n" tokenList.append(currentLine)
def writeTokenizerFile(self, inputFile, inputDirName): from JackTokenizer import JackTokenizer import os outputFileName = os.path.join( inputDirName, "output", os.path.splitext(os.path.basename(inputFile.name))[0] + ".xml") if (not os.path.exists(os.path.dirname(outputFileName))): os.makedirs(os.path.dirname(outputFileName)) outputFile = open(outputFileName, 'w') outputFile.write("<tokens>\n") tokenizer = JackTokenizer(inputFile) while (tokenizer.hasMoreTokens()): tokenizer.advance() if tokenizer.tokenType == JackTokenizer.KEYWORD: outputFile.write("\t<keyword>" + tokenizer.currentToken + "</keyword>\n") elif tokenizer.tokenType == JackTokenizer.SYMBOL: outputFile.write("\t<symbol>") if tokenizer.currentToken == "&": outputFile.write("&") elif tokenizer.currentToken == "<": outputFile.write("<") elif tokenizer.currentToken == ">": outputFile.write(">") elif tokenizer.currentToken == "\"": outputFile.write(""") else: outputFile.write(tokenizer.currentToken) outputFile.write("</symbol>\n") elif tokenizer.tokenType == JackTokenizer.IDENTIFIER: outputFile.write("\t<identifier>" + tokenizer.currentToken + "</identifier>\n") elif tokenizer.tokenType == JackTokenizer.INT_CONST: outputFile.write("\t<integerConstant>" + tokenizer.currentToken + "</integerConstant>\n") elif tokenizer.tokenType == JackTokenizer.STRING_CONST: outputFile.write("\t<stringConstant>" + tokenizer.currentToken + "</stringConstant>\n") outputFile.write("</tokens>\n")
def tokenize(path): # for each file in the path: os.chdir(path) for filename in os.listdir(path): if filename[-5:] == '.jack': # open .jack file, read it as one string, create new JackTokenizer class inStr = open(filename).read() jackFile = JackTokenizer(inStr) # create a xml file for output out_txt = open('My' + filename[:-5] + 'T.xml','w') out_txt.write('<tokens>\n') # run through string by tokens, adding them to the output file while(jackFile.hasMoreTokens()): jackFile.advance() out_txt.write(jackFile.writeXML()) # close xml file out_txt.write('</tokens>') out_txt.close()
def compileOneFile(self): if len(self.file_list) == 0: print("No more file to be compiled!") return False input_file_name = self.file_list.pop() output_xml_file = open( self.output_dir + input_file_name.split("/")[-1].split(".")[0] + ".xml", "w") output_vm_file = open( self.output_dir + input_file_name.split("/")[-1].split(".")[0] + ".vm", "w") tokenizer = JackTokenizer(input_file_name) compeng = CompilationEngine(tokenizer, output_vm_file, output_xml_file) while tokenizer.hasMoreTokens(): tokenizer.advance() compeng.compileClass() output_vm_file.close() output_xml_file.close() if True: # well, I wrote the xml file but delete it, cuz it's messy os.remove(self.output_dir + input_file_name.split("/")[-1].split(".")[0] + ".xml") print("done: " + input_file_name)
def main(): if len(sys.argv) != 2: print("Usage: python3 JackAnalyzer.py path/file.jack\nor...\ \nUsage: python3 JackAnalyzer.py path/dir") sys.exit(1) #check path is valid #and return list of in_path(s) in_f_paths = check_path_type() #Generate Tokens... token_xml_files = [] for fp in in_f_paths: tokenizer = JackTokenizer(fp) tokens = [] while tokenizer.hasMoreTokens(): text, tag = tokenizer.advance() if tag: tokens.append(Token(text, tag)) #setup outpath: out_fp = change_fp_name(fp, ".jack", ".vm") compiler = Compiler(tokens, out_fp) compiler.compileClass()
def create_token_file(jack_file_name): token_file_name = jack_file_name.replace('.jack', 'T.xml') token_file = open(token_file_name, 'w') jack_file = open(jack_file_name, 'rU') tokenizer = JackTokenizer(jack_file) token_file.write('<tokens>\n') while tokenizer.hasMoreTokens(): tokenizer.advance() if tokenizer.tokenType() is 'KEYWORD': token_file.write('<keyword> {} </keyword>\n'.format( tokenizer.keyWord().lower())) elif tokenizer.tokenType() is 'SYMBOL': symbol = tokenizer.symbol() if symbol in ['<', '>', '&']: symbol = Main.XML_CONVSERSIONS[symbol] token_file.write('<symbol> {} </symbol>\n'.format(symbol)) elif tokenizer.tokenType() is 'IDENTIFIER': token_file.write('<identifier> {} </identifier>\n'.format( tokenizer.identifier())) elif tokenizer.tokenType() is 'INT_CONST': token_file.write( '<integerConstant> {} </integerConstant>\n'.format( tokenizer.intVal())) elif tokenizer.tokenType() is 'STRING_CONST': token_file.write( '<stringConstant> {} </stringConstant>\n'.format( tokenizer.stringVal())) token_file.write('</tokens>\n') token_file.close() return token_file_name
class CompilationEngine: """ effects the compilation engine """ def __init__(self, input_file_path, output_path): """ :param fileToRead: """ self._indentation = 0 self._tokenizer = JackTokenizer(input_file_path) self._output = open(output_path, "w+") def compileClass(self): if self._tokenizer.hasMoreTokens(): self._tokenizer.advance() self._output.write("<class>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() while self._tokenizer.keyWord() == "static" or \ self._tokenizer.keyWord() == "field": self.compileClassVarDec() while self._tokenizer.keyWord() == "constructor" or \ self._tokenizer.keyWord() == "function" \ or self._tokenizer.keyWord() == "method": self.compileSubroutine() self._write_symbol() self._indentation -= 1 self._output.write("</class>\n") self._output.close() def compileClassVarDec(self): """ this should only print if there actually are class var decs, should run on the recursively :return: """ self._output.write(" " * self._indentation + "<classVarDec>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._compile_type_and_varName() self._indentation -= 1 self._output.write(" " * self._indentation + "</classVarDec>\n") def compileSubroutine(self): self._output.write(" " * self._indentation + "<subroutineDec>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() if self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileParameterList() self._write_symbol() self._tokenizer.advance() # compile subroutineBody: self._output.write(" " * self._indentation + "<subroutineBody>\n") self._indentation += 1 self._write_symbol() self._tokenizer.advance() while self._tokenizer.keyWord() == "var": self.compileVarDec() self.compileStatements() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</subroutineBody>\n") self._indentation -= 1 self._output.write(" " * self._indentation + "</subroutineDec>\n") self._tokenizer.advance() def compileParameterList(self): self._output.write(" " * self._indentation + "<parameterList>\n") self._indentation += 1 while self._tokenizer.tokenType() != self._tokenizer.SYMBOL: if self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() if self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self._indentation -= 1 self._output.write(" " * self._indentation + "</parameterList>\n") def compileVarDec(self): self._output.write(" " * self._indentation + "<varDec>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._compile_type_and_varName() self._indentation -= 1 self._output.write(" " * self._indentation + "</varDec>\n") def compileStatements(self): self._output.write(" " * self._indentation + "<statements>\n") self._indentation += 1 while self._tokenizer.tokenType() == self._tokenizer.KEYWORD: if self._tokenizer.keyWord() == "let": self.compileLet() elif self._tokenizer.keyWord() == "if": self.compileIf() elif self._tokenizer.keyWord() == "while": self.compileWhile() elif self._tokenizer.keyWord() == "do": self.compileDo() elif self._tokenizer.keyWord() == "return": self.compileReturn() self._indentation -= 1 self._output.write(" " * self._indentation + "</statements>\n") def compileDo(self): self._output.write(" " * self._indentation + "<doStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() #subroutineCall self._write_identifier() self._tokenizer.advance() if self._tokenizer.symbol() == ".": self._write_symbol() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpressionList() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</doStatement>\n") self._tokenizer.advance() def compileLet(self): self._output.write(" " * self._indentation + "<letStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() if self._tokenizer.symbol() == "[": self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</letStatement>\n") self._tokenizer.advance() def compileWhile(self): self._output.write(" " * self._indentation + "<whileStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileStatements() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</whileStatement>\n") self._tokenizer.advance() def compileReturn(self): self._output.write(" " * self._indentation + "<returnStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \ self._tokenizer.symbol() != ";": self.compileExpression() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</returnStatement>\n") self._tokenizer.advance() def compileIf(self): self._output.write(" " * self._indentation + "<ifStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileStatements() self._write_symbol() self._tokenizer.advance() if self._tokenizer.tokenType() == self._tokenizer.KEYWORD and \ self._tokenizer.keyWord() == "else": self._write_keyword() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileStatements() self._write_symbol() self._tokenizer.advance() self._indentation -= 1 self._output.write(" " * self._indentation + "</ifStatement>\n") def compileExpression(self): """ Note that tokenizer must be advanced before this is called!!! :return: """ self._output.write(" " * self._indentation + "<expression>\n") self._indentation += 1 self.compileTerm() while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \ self._tokenizer.symbol() in OP_LIST: self._write_symbol() self._tokenizer.advance() self.compileTerm() self._indentation -= 1 self._output.write(" " * self._indentation + "</expression>\n") def compileTerm(self): # debugging - not finished!! sanity_check = True self._output.write(" " * self._indentation + "<term>\n") self._indentation += 1 if self._tokenizer.tokenType() == self._tokenizer.INT_CONST: self._write_int_const() elif self._tokenizer.tokenType() == self._tokenizer.STRING_CONST: self._write_str_const() elif self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() sanity_check = False if self._tokenizer.symbol() == "[": sanity_check = True self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() elif self._tokenizer.symbol() == ".": ## subroutine case sanity_check = True self._write_symbol() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpressionList() self._write_symbol() elif self._tokenizer.symbol() == "(": sanity_check = True self._write_symbol() self._tokenizer.advance() self.compileExpressionList() self._write_symbol() elif self._tokenizer.symbol() == "(": self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() elif self._tokenizer.symbol() == "~" or self._tokenizer.symbol() == \ "-": self._write_symbol() self._tokenizer.advance() self.compileTerm() sanity_check = False if sanity_check: self._tokenizer.advance() self._indentation -= 1 self._output.write(" " * self._indentation + "</term>\n") def compileExpressionList(self): self._output.write(" " * self._indentation + "<expressionList>\n") self._indentation += 1 if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \ self._tokenizer.symbol() != ")": self.compileExpression() while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \ self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self.compileExpression() if self._tokenizer.symbol() == "(": self.compileExpression() while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \ self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self.compileExpression() self._indentation -= 1 self._output.write(" " * self._indentation + "</expressionList>\n") def _compile_type_and_varName(self): if self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() while self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() def _write_identifier(self): self._output.write(" " * self._indentation + "<identifier> " + self._tokenizer.identifier() + " </identifier>\n") def _write_keyword(self): self._output.write(" " * self._indentation + "<keyword> " + self._tokenizer.keyWord() + " </keyword>\n") def _write_symbol(self): string_to_write = self._tokenizer.symbol() if self._tokenizer.symbol() == "<": string_to_write = "<" elif self._tokenizer.symbol() == ">": string_to_write = ">" elif self._tokenizer.symbol() == "&": string_to_write = "&" self._output.write(" " * self._indentation + "<symbol> " + string_to_write + " </symbol>\n") def _write_int_const(self): self._output.write(" " * self._indentation + "<integerConstant> " + self._tokenizer.identifier() + " </integerConstant>\n") def _write_str_const(self): self._output.write(" " * self._indentation + "<stringConstant> " + self._tokenizer.identifier() + " </stringConstant>\n")
class CompilationEngine(object): def __init__(self, inStr): self.xml = '' self.jackFile = JackTokenizer(inStr) self.jackFile.advance() def compileClass(self): # check the first token, return if wrong if self.jackFile.token != 'class': print("first token needs to be 'class'") return '' self.xml += '<class>' # open class tag self.writeAdv() # write class keyword self.writeAdv() # write class name tag self.writeAdv() # write '{' # look for variable declarations while (self.jackFile.token == 'static' or self.jackFile.token == 'field') and \ self.jackFile.tokenType == 'keyword': self.compileClassVarDec() # look for subroutine declarations while (self.jackFile.token == 'method' or self.jackFile.token == 'function' or \ self.jackFile.token == 'constructor') and self.jackFile.tokenType == 'keyword': self.compileSubroutine() # look for '}' while not (self.jackFile.token == '}' and self.jackFile.tokenType == 'symbol'): self.writeAdv("Expected '}'") self.writeAdv() # '}' has been hit, end of file. close class tag and return xml string if self.jackFile.hasMoreTokens(): print("There is uncompiled code after the class") self.xml += '\n</class>' return self.xml def compileClassVarDec(self): self.xml += '\n<classVarDec>' # open classVarDec tag # loop through until ';' while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'): self.writeAdv() self.writeAdv() # write ';' self.xml += '\n</classVarDec>' # close classVarDec tag def compileSubroutine(self): self.xml += '\n<subroutineDec>' # open subroutineDec tag self.writeAdv() # write sub type self.writeAdv() # write sub return type self.writeAdv() # write sub name self.writeAdv() # write '(' self.xml += '\n<parameterList>' # open parameterList tag self.compileParameterList() # writes the potentially empty parameter list self.xml += '\n</parameterList>' # close parameterList tag self.writeAdv() # write ')' self.xml += '\n<subroutineBody>' # open subroutineBody tag self.writeAdv() # write '{' # look for variable declarations while (self.jackFile.token == 'var') and self.jackFile.tokenType == 'keyword': self.compileVarDec() # write the sub statements self.xml += '\n<statements>' # open statements tag self.compileStatements() # compile all statements self.xml += '\n</statements>' # close statements tag self.writeAdv() # write '}' (closing the subroutine body) self.xml += '\n</subroutineBody>' # close subroutineBody tag self.xml += '\n</subroutineDec>' # close subroutineDec tag def compileParameterList(self): # loop through until ')' without writing it while not (self.jackFile.token == ')' and self.jackFile.tokenType == 'symbol'): self.writeAdv() def compileVarDec(self): self.xml += '\n<varDec>' # open varDec tag # loop through until ';' while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'): self.writeAdv() self.writeAdv() # write ';' self.xml += '\n</varDec>' # close varDec tag def compileStatements(self): # look for statements while not (self.jackFile.token == '}' and self.jackFile.tokenType == 'symbol'): if self.jackFile.token == 'let': self.compileLet() elif self.jackFile.token == 'if': self.compileIf() elif self.jackFile.token == 'while': self.compileWhile() elif self.jackFile.token == 'do': self.compileDo() elif self.jackFile.token == 'return': self.compileReturn() else: self.writeAdv('expected statement') return def compileDo(self): self.xml += '\n<doStatement>' # open doStatement tag # loop through until ';' while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'): if self.jackFile.token == '(' and self.jackFile.tokenType == 'symbol': self.writeAdv() self.compileExpressionList() else: self.writeAdv() self.writeAdv() # write ';' self.xml += '\n</doStatement>' # close doStatement tag def compileLet(self): # 'let' varName ('[' expression ']')? '=' expression ';' self.xml += '\n<letStatement>' # open letStatement tag self.writeAdv() # write 'let' self.writeAdv() # write varName if self.jackFile.token == '[': self.writeAdv() # write '[' self.compileExpression() self.writeAdv() # write ']' self.writeAdv() # write '=' self.compileExpression() self.writeAdv() #write ';' self.xml += '\n</letStatement>' # close letStatement tag def compileWhile(self): # 'while' '(' expression ')' '{' statements '}' self.xml += '\n<whileStatement>' self.writeAdv() # write 'while' self.writeAdv() # write '(' self.compileExpression() self.writeAdv() # write ')' self.writeAdv() # write '{' self.xml += '\n<statements>' # open statements tag self.compileStatements() self.xml += '\n</statements>' # close statements tag self.writeAdv() # write '}' self.xml += '\n</whileStatement>' def compileReturn(self): self.xml += '\n<returnStatement>' self.writeAdv() # write 'return' # loop through until ';' while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'): self.compileExpression() self.writeAdv() # write ';' self.xml += '\n</returnStatement>' def compileIf(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? #TODO does not handle else statements self.xml += '\n<ifStatement>' self.writeAdv() # write 'if' self.writeAdv() # write '(' self.compileExpression() self.writeAdv() # write ')' self.writeAdv() # write '{' self.xml += '\n<statements>' # open statements tag self.compileStatements() self.xml += '\n</statements>' # close statements tag self.writeAdv() # write '}' if self.jackFile.token == 'else': self.writeAdv() # write 'else' self.writeAdv() # write '{' self.xml += '\n<statements>' # open statements tag self.compileStatements() self.xml += '\n</statements>' # close statements tag self.writeAdv() # write '}' self.xml += '\n</ifStatement>' def compileExpressionList(self): self.xml += '\n<expressionList>' # loop through until ')' without writing it cont = True while cont: if self.jackFile.token == ')': cont = False elif self.jackFile.token == ',': self.writeAdv() else: self.compileExpression() self.xml += '\n</expressionList>' def compileExpression(self): ### term (op term)* # TODO can't handle unary operaters yet ('-' & '~') self.xml += '\n<expression>' cont = True while cont: if self.isTerm() or self.isUnaryOp() or self.jackFile.token == '(': self.compileTerm() if self.isOp(): self.writeAdv() else: cont = False self.xml += '\n</expression>' def compileTerm(self): # this is the hard one that needs to look ahead # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term self.xml += '\n<term>' if self.isUnaryOp(): # account for unary operators self.writeAdv() self.compileTerm() elif self.isTerm() and self.jackFile.peek() == '(': self.writeAdv() # write 'term' self.writeAdv() # write '(' self.compileExpressionList() self.writeAdv() # write ')' elif self.isTerm() and self.jackFile.peek() == '.' and self.jackFile.peek(3) == '(': self.writeAdv() # write 'term' self.writeAdv() # write '.' self.writeAdv() # write 'term' self.writeAdv() # write '(' self.compileExpressionList() self.writeAdv() # write ')' elif self.isTerm() and self.jackFile.peek() == '[': self.writeAdv() # write 'term' self.writeAdv() # write '[' self.compileExpression() self.writeAdv() # write ']' elif self.jackFile.token == '(': self.writeAdv() # write '(' self.compileExpression() self.writeAdv() # write ')' else: self.writeAdv() # write 'term' self.xml += '\n</term>' def writeAdv(self, *err): if err: print(err[0]) self.xml += '\n<error>' + err[0] + '</error>' self.jackFile.advance() else: if self.jackFile.token == '<': self.jackFile.token = '<' if self.jackFile.token == '>': self.jackFile.token = '>' if self.jackFile.token == '&': self.jackFile.token = '&' self.xml += '\n<' + self.jackFile.tokenType + '> ' + self.jackFile.token + ' </' + self.jackFile.tokenType + '>' self.jackFile.advance() def isOp(self): operators = ['+', '-', '*', '/', '&', '|', '<', '>', '='] return self.jackFile.token in operators def isUnaryOp(self): unaryOperators = ['-', '~'] return self.jackFile.token in unaryOperators def isSymbol(self): symbols = ['{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~'] return self.jackFile.token in symbols def isTermEnd(self): operators = ['+', '-', '*', '/', '&' '|', '<', '>', '='] termFinshers = [' ', ')', ']', ';'] + operators return self.jackFile.token in termFinshers def isTerm(self): terms = ['keyword', 'identifier', 'integerConstant', 'stringConstant'] return self.jackFile.tokenType in terms
class CompilationEngine: """Recursive top-down parser""" def __init__(self, inFile, outFile): """Creates a new compilation engine with the given input and output. The next routine called must be compileClass()""" self.tokenizer = JackTokenizer(inFile) self.targetFile = open(outFile, 'w') self.getNext() self.classTable = None self.className = '' self.writer = VMWriter(outFile) self.labelWhile = 1 self.labelIf = 1 def getNext(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def compileClass(self): """Compiles a complete class""" self.classTable = SymbolTable() # 'class' className '{' classVarDec* subroutineDec* '}' # class self.getNext() # className self.className = self.tokenizer.getToken() self.getNext() # { self.getNext() token = self.tokenizer.getToken() while token in ["static", "field"]: self.compileDec() token = self.tokenizer.getToken() token = self.tokenizer.getToken() while token in ["constructor", "function", "method"]: self.compileSubroutine() token = self.tokenizer.getToken() # } self.getNext() def compileSubroutine(self): """Compiles a complete method, function, or constructor.""" # subroutine dec self.classTable.startSubroutine() # ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody # ('constructor' | 'function' | 'method') subroutineType = self.tokenizer.getToken() self.getNext() # ('void' | type) self.getNext() # subroutineName name = self.tokenizer.getToken() self.getNext() # ( self.getNext() # parameterList self.compileParameterList(subroutineType == 'method') # ) self.getNext() # subroutine body # '{' varDec* statements '}' # { self.getNext() # varDec* while self.tokenizer.getToken() == 'var': self.compileDec() numOfVars = self.classTable.varCount(Toolbox.VAR) if subroutineType == 'function': self.writer.writeFunction(self.className + "." + name, numOfVars) elif subroutineType == 'constructor': self.writer.writeFunction(self.className + "." + name, numOfVars) # push constant (num of fields) # call Memory.alloc 1 # pop pointer 0 fields = self.classTable.varCount(Toolbox.FIELD) self.writer.writePush(Toolbox.CONST, fields) self.writer.writeCall('Memory.alloc', 1) self.writer.writePop(Toolbox.POINTER, 0) else: # method self.writer.writeFunction(self.className + "." + name, numOfVars) # push argument 0 # pop pointer 0 self.writer.writePush(Toolbox.SEG_ARG, 0) self.writer.writePop(Toolbox.POINTER, 0) # statements self.compileStatements() # } self.getNext() def compileParameterList(self, method=False): """Compiles a (possibly empty) parameter list, not including the enclosing "()".""" tokenType, name = '', '' if method: # Add this to method's var list. self.classTable.define(None, None, Toolbox.ARG) if self.tokenizer.tokenType() != self.tokenizer.SYMBOL: # param list not empty while True: tokenType = self.tokenizer.getToken() self.getNext() name = self.tokenizer.getToken() self.classTable.define(name, tokenType, Toolbox.ARG) self.getNext() if self.tokenizer.getToken() == ')': break self.getNext() # ',' def compileStatements(self): # (letStatement | ifStatement | whileStatement | doStatement | returnStatement)* """Compiles a sequence of statements, not including the enclosing "{}".""" token = self.tokenizer.getToken() while token in ["let", "if", "while", "do", "return"]: if token == 'let': self.compileLet() elif token == 'if': self.compileIf() elif token == 'while': self.compileWhile() elif token == 'do': self.compileDo() elif token == 'return': self.compileReturn() token = self.tokenizer.getToken() def compileSubroutineCall(self, name, printIdentifier=True): # subroutineName '(' expressionList ') ' | ( className | varName) '.' subroutineName '(' expressionList ') ' var = None nArgs = 0 if printIdentifier: # subroutineName | ( className | varName) self.getNext() var = self.classTable.searchScope(name) if self.tokenizer.getToken() == '.': if var: # push <this> self.writer.writePush(var[0], var[1]) nArgs += 1 className = var[2] # Use the type instead of the variable name else: className = name self.getNext() subroutineName = self.tokenizer.getToken() self.getNext() else: # push <this> self.writer.writePush(Toolbox.POINTER, 0) nArgs += 1 className = self.className subroutineName = name name = className + '.' + subroutineName # '(' self.getNext() nArgs += self.compileExpressionList() self.writer.writeCall(name, nArgs) # ')' self.getNext() def compileDo(self): # 'do' subroutineCall ';' """Compiles a do statement""" # do self.getNext() # subroutineCall self.compileSubroutineCall(self.tokenizer.getToken()) self.writer.writePop(Toolbox.TEMP, 0) # ; if self.tokenizer.getToken() == ';': self.getNext() def compileLet(self): # 'let' varName ('[' expression ']')? '=' expression ';' """Compiles a let statement""" # let # self.targetFile.write(T_LET) self.getNext() # var name name = self.tokenizer.getToken() # search scope segment, index, type = self.classTable.searchScope(name) self.getNext() # [ array = False if self.tokenizer.getToken() == '[': array = True self.writer.writePush(segment, index) self.getNext() # expression self.compileExpression() # ] self.getNext() self.writer.writeArithmetic('add') # = self.getNext() # expression self.compileExpression() if array: self.writer.writePop(Toolbox.TEMP, 0) self.writer.writePop(Toolbox.TEMP, 1) self.writer.writePush(Toolbox.TEMP, 0) self.writer.writePush(Toolbox.TEMP, 1) self.writer.writePop(Toolbox.POINTER, 1) self.writer.writePop(Toolbox.THAT, 0) else: self.writer.writePop(segment, index) # ; token = self.tokenizer.getToken() if token == ';': self.getNext() def compileWhile(self): # while' '(' expression ')' '{' statements '}' """Compiles a while statement""" # while label = str(self.labelWhile) self.labelWhile += 1 self.writer.writeLabel('while' + label) self.getNext() # ( self.getNext() # expression self.compileExpression() # ) self.getNext() self.writer.writeArithmetic('not') self.writer.writeIf('endwhile' + label) # { self.getNext() # statements self.compileStatements() # } self.getNext() self.writer.writeGoto('while' + label) self.writer.writeLabel('endwhile' + label) def compileReturn(self): # 'return' expression? ';' """Compiles a return statement""" # return self.getNext() # expression if not (self.tokenizer.getToken() == ";"): self.compileExpression() else: self.writer.writePush(Toolbox.CONST, 0) self.writer.writeReturn() # ; self.getNext() def compileIf(self): # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? """Compiles an if statement, possibly with a trailing else clause""" # if label = 'if' + str(self.labelIf) self.labelIf += 1 self.getNext() # ( self.getNext() # expression self.compileExpression() # ) self.getNext() self.writer.writeArithmetic('not') self.writer.writeIf('else' + label) # { self.getNext() # statements self.compileStatements() # } self.getNext() self.writer.writeGoto('end' + label) self.writer.writeLabel('else' + label) # else if self.tokenizer.getToken() == 'else': self.getNext() # { self.getNext() # expression self.compileStatements() # } self.getNext() self.writer.writeLabel('end' + label) def compileExpression(self): """Compiles an expression""" # term (op term)* self.compileTerm() token = self.tokenizer.getToken() while token in ['+', '/', '-', '*', '&', '|', '>', '<', '=']: self.getNext() self.compileTerm() self.writer.writeArithmetic(token) token = self.tokenizer.getToken() def compileTerm(self): #integerConstant | stringConstant | keywordConstant | varName | varName '[' expression']' | # subroutineCall | '(' expression ')' | unaryOp term """Compiles a term""" token = self.tokenizer.getToken() tokenType = self.tokenizer.tokenType() if tokenType == self.tokenizer.INT_CONST: self.writer.writePush(Toolbox.CONST, token) self.getNext() elif tokenType == self.tokenizer.STRING_CONST: self.writer.writePush(Toolbox.CONST, len(token)) self.writer.writeCall('String.new', 1) for c in token: self.writer.writePush(Toolbox.CONST, ord(c)) self.writer.writeCall('String.appendChar', 2) self.getNext() elif tokenType == self.tokenizer.KEYWORD: # true | false | null | this self.compileKeywordConstant(token) elif tokenType == self.tokenizer.IDENTIFIER: name = token self.getNext() token = self.tokenizer.getToken() if token == '[': self.compileVarName(name) self.getNext() self.compileExpression() self.getNext() self.writer.writeArithmetic('add') self.writer.writePop(Toolbox.POINTER, 1) self.writer.writePush(Toolbox.THAT, 0) elif token in ['(', '.']: self.compileSubroutineCall(name, False) else: self.compileVarName(name) elif token == '(': self.getNext() self.compileExpression() self.getNext() elif token in ['-', '~']: self.compileUnary(token) def compileExpressionList(self): """Compiles a (possibly empty) comma separated list of expressions""" nArgs = 0 if self.tokenizer.getToken() != ')': self.compileExpression() nArgs += 1 while self.tokenizer.getToken() == ',': self.getNext() self.compileExpression() nArgs += 1 return nArgs def compileDec(self): # 'var' type varName (',' varName)* ';' """Compiles a var declaration""" # keyword 'var' token = self.tokenizer.getToken() kind = None if token == 'var': kind = Toolbox.VAR elif token == 'field': kind = Toolbox.FIELD elif token == 'static': kind = Toolbox.STATIC self.getNext() tokenType = self.tokenizer.getToken() # type can be an identifier or a keyword self.getNext() # var name name = self.tokenizer.getToken() self.classTable.define(name, tokenType, kind) self.getNext() while self.tokenizer.tokenType() == self.tokenizer.SYMBOL and self.tokenizer.getToken() == ',': # , self.getNext() name = self.tokenizer.getToken() self.classTable.define(name, tokenType, kind) # var name self.getNext() # ; self.getNext() def compileVarName(self, name): segment, index, type = self.classTable.searchScope(name) self.writer.writePush(segment, index) def compileKeywordConstant(self, keyword): if keyword == 'false' or keyword == 'null': self.writer.writePush(Toolbox.CONST, 0) if keyword == 'true': self.writer.writePush(Toolbox.CONST, 0) self.writer.writeArithmetic('not') if keyword == 'this': self.writer.writePush(Toolbox.POINTER, 0) self.getNext() def compileUnary(self, token): """ Compiles an unary operator with its operand (term) :param token: unary token """ self.getNext() # '~' or '-' self.compileTerm() # operand if token == '-': self.writer.writeArithmetic('neg') else: # token is '~' self.writer.writeArithmetic('not')
class CompilationEngine: """ Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream. """ INDENT = " " def __init__(self, jackFile, vmFile, DEBUG=False): """ Creates a new compilation engine with the given input and output. The next routine called must be compileClass(). """ self.tokenizer = JackTokenizer(jackFile) # , DEBUG=DEBUG) self.DEBUG = DEBUG # Indentation level self.indentLevel = 0 # Counters for while loops and if statements self.whileCounter = self.ifCounter = 0 # Initialize the symbol table self.symtab = SymbolTable(DEBUG=True) # Initialize the VM writer self.writer = VMWriter(vmFile, DEBUG=True) def compileClass(self): """ Compiles a complete class. """ self.emit(xml="<class>") # Alias self.tokenizer to make code more compact t = self.tokenizer # Verify that there is a token to read and advance to it if t.hasMoreTokens(): # Advance to the next token t.advance() else: # If not, we're done. return self.eatAndEmit("keyword", ["class"]) (_, self.thisClass) = self.eatAndEmit( "identifier", category="CLASS", state="DEFINE" ) self.eatAndEmit("symbol", ["{"]) # Expect zero or more classVarDecs. Count the fields defined. self.nFields = 0 while t.tokenType() == "keyword" and t.keyWord() in ["static", "field"]: kw = t.keyWord() count = self.compileClassVarDec() # Count the fields to determine the size of the object if kw == "field": self.nFields += count # Expect zero or more subroutineDecs while t.tokenType() == "keyword" and t.keyWord() in [ "constructor", "function", "method", ]: self.compileSubroutine() self.eatAndEmit("symbol", ["}"]) self.emit(xml="</class>") # Should not be any more input if self.tokenizer.hasMoreTokens(): raise SyntaxError( "Token after end of class: " + self.tokenizer.currentToken ) # Close the VMWriter self.writer.close() def compileClassVarDec(self): """ Compiles a static declaration or a field declaration. Should only be called if keyword static or keyword field is the current token. """ self.emit(xml="<classVarDec>") # Need to save the variable kind for the symbol table token = self.eat("keyword", ["static", "field"]) (_, varKind) = token varKind = varKind.upper() self.emit(token=token) # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a # className (identifier). t = self.tokenizer tType = t.tokenType() if tType == "keyword": (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"]) else: (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE") self.eatAndEmit("identifier", category=varKind, varType=varType, state="DEFINE") count = 1 # Expect an optional list of identifiers. while t.tokenType() == "symbol" and t.symbol() == ",": self.eatAndEmit("symbol", [","]) self.eatAndEmit("identifier", category=varKind, state="DEFINE") count += 1 self.eatAndEmit("symbol", [";"]) self.emit(xml="</classVarDec>") return count def compileSubroutine(self): """ Compiles a complete method, function, or constructor. Should only be called if the current token is one of 'constructor', 'function', or 'method'. """ self.emit(xml="<subroutineDec>") (_, kw) = self.eatAndEmit("keyword", ["constructor", "function", "method"]) # Reset the subroutine symbol table self.symtab.startSubroutine() # If this is a method, seed the symbol table with "this" as argument 0 if kw == "method": self.symtab.define("this", self.thisClass, "ARG") # Expect 'void' or a type: one of the keywords 'int', 'char', or # 'boolean', or a className (identifier). t = self.tokenizer tType = t.tokenType() if tType == "keyword": self.eatAndEmit("keyword", ["void", "int", "char", "boolean"]) else: self.eatAndEmit("identifier", category="CLASS", state="USE") (_, functionName) = self.eatAndEmit( "identifier", category="SUBROUTINE", state="DEFINE" ) self.eatAndEmit("symbol", ["("]) self.compileParameterList() self.eatAndEmit("symbol", [")"]) self.emit(xml="<subroutineBody>") self.eatAndEmit("symbol", ["{"]) # Expect varDec*. Count the number of local variables. nLocals = 0 while t.tokenType() == "keyword" and t.keyWord() == "var": nLocals += self.compileVarDec() # Generate the VM code to start the function. self.writer.writeFunction("{}.{}".format(self.thisClass, functionName), nLocals) # If this subroutine is a constructor, allocate memory for the new object and set the base of the this segment if kw == "constructor": self.writer.writePush("CONST", self.nFields) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop("POINTER", 0) # If this subroutine is a method, set the base of the this segment if kw == "method": self.writer.writePush("ARG", 0) self.writer.writePop("POINTER", 0) # Compile the code of the function self.compileStatements() self.eatAndEmit("symbol", ["}"]) self.emit(xml="</subroutineBody>") self.emit(xml="</subroutineDec>") def compileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing '( )'. """ self.emit(xml="<parameterList>") # Alias for tokenizer t = self.tokenizer # Get the current token type tType = t.tokenType() # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a # className (identifier). finished = False while not finished and tType in ["keyword", "identifier"]: if tType == "keyword": (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"]) else: (_, varType) = self.eatAndEmit( "identifier", category="CLASS", state="USE" ) self.eatAndEmit( "identifier", category="ARG", state="DEFINE", varType=varType ) # Look for a ',' symbol if t.tokenType() == "symbol" and t.symbol() == ",": # If found, eat it self.eatAndEmit("symbol", [","]) # Get the next token type tType = t.tokenType() else: finished = True self.emit(xml="</parameterList>") def compileVarDec(self): """ Compiles a var declaration. """ self.emit(xml="<varDec>") self.eatAndEmit("keyword", ["var"]) # Expect a type for the variable: one of the keywords 'int', 'char', # or 'boolean', or a className (identifier). Save the variable type. t = self.tokenizer tType = t.tokenType() if tType == "keyword": (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"]) else: (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE") self.eatAndEmit("identifier", category="VAR", state="DEFINE", varType=varType) nVars = 1 # Expect an optional list of identifiers. while t.tokenType() == "symbol" and t.symbol() == ",": self.eatAndEmit("symbol", [","]) self.eatAndEmit( "identifier", category="VAR", state="DEFINE", varType=varType ) nVars += 1 self.eatAndEmit("symbol", [";"]) self.emit(xml="</varDec>") return nVars def compileStatements(self): """ Compiles a sequence of statements, not including the enclosing '{ }'. """ self.emit(xml="<statements>") t = self.tokenizer while t.tokenType() == "keyword": keyword = t.keyWord() if keyword == "do": self.compileDo() elif keyword == "let": self.compileLet() elif keyword == "while": self.compileWhile() elif keyword == "return": self.compileReturn() elif keyword == "if": self.compileIf() else: raise SyntaxError( "Expected statement. Found {}.".format(t.currentToken) ) self.emit(xml="</statements>") def compileDo(self): """ Compiles a do statement. """ self.emit(xml="<doStatement>") self.eatAndEmit("keyword", ["do"]) # Eat the identifier. Can't emit until we know if this is a class or a subroutine. token = self.eat("identifier") (_, ident) = token # Check for a '.', which indicates a method call t = self.tokenizer if t.tokenType() == "symbol" and t.symbol() == ".": self.eatAndEmit("symbol", ["."]) # Previous token was an object or a class. Check symbol table. objType = self.symtab.typeOf(ident) if objType: # ident is an object, so method is objType.method, and the object must be loaded into this as argument 0 self.emit(token=token, category=self.symtab.kindOf(ident), state="USE") # subroutine starts with the class type subroutine = objType # Add an argument to the stack for "this" nArgs = 1 kind = self.symtab.kindOf(ident) index = self.symtab.indexOf(ident) self.writer.writePush(kind, index) else: # ident is a class, so method is ident.method and there is no this self.emit(token=token, category="CLASS", state="USE") subroutine = ident nArgs = 0 methodToken = self.eat("identifier") (_, method) = methodToken self.emit(token=methodToken, category="METHOD", state="USE") subroutine += "." + method else: # Bare subroutine calls are assumed to be methods of the current class self.emit(token=token, category="SUBROUTINE", state="USE") subroutine = self.thisClass + "." + ident # Add "this" to the stack nArgs = 1 self.writer.writePush("POINTER", 0) self.eatAndEmit("symbol", ["("]) nArgs += self.compileExpressionList() self.eatAndEmit("symbol", [")"]) self.eatAndEmit("symbol", [";"]) # Call the desired subroutine and consume the returned value self.writer.writeCall(subroutine, nArgs) self.writer.writePop("TEMP", 0) self.emit(xml="</doStatement>") def compileLet(self): """ Compiles a let statement. """ self.emit(xml="<letStatement>") self.eatAndEmit("keyword", ["let"]) (_, varName) = self.eatAndEmit("identifier", category="LET", state="USE") # Look up the variable in the symbol table varKind = self.symtab.kindOf(varName) varIndex = self.symtab.indexOf(varName) # Check for array qualifier t = self.tokenizer arrayRef = False if t.tokenType() == "symbol" and t.symbol() == "[": # Compute the offset self.eatAndEmit("symbol", "[") self.compileExpression() self.eatAndEmit("symbol", ["]"]) # Add the offset to the base. Leave the result on the stack. self.writer.writePush(varKind, varIndex) self.writer.writeArithmetic("+") arrayRef = True self.eatAndEmit("symbol", ["="]) self.compileExpression() self.eatAndEmit("symbol", [";"]) # Value to save is at the top of the stack. if not arrayRef: # Direct POP self.writer.writePop(varKind, varIndex) else: # Array reference. Save value temporarily while setting THAT. self.writer.writePop("TEMP", 0) self.writer.writePop("POINTER", 1) self.writer.writePush("TEMP", 0) self.writer.writePop("THAT", 0) self.emit(xml="</letStatement>") def compileWhile(self): """ Compiles a while statement. """ self.emit(xml="<whileStatement>") self.eatAndEmit("keyword", ["while"]) whileInstance = self.whileCounter self.whileCounter += 1 self.writer.writeLabel("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance)) self.eatAndEmit("symbol", ["("]) self.compileExpression() self.eatAndEmit("symbol", [")"]) self.writer.writeArithmetic("U~") self.writer.writeIf("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance)) self.eatAndEmit("symbol", ["{"]) self.compileStatements() self.eatAndEmit("symbol", ["}"]) self.writer.writeGoto("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance)) self.writer.writeLabel("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance)) self.emit(xml="</whileStatement>") def compileReturn(self): """ Compiles a return statement. """ self.emit(xml="<returnStatement>") self.eatAndEmit("keyword", ["return"]) # If not a ';', expect an expression t = self.tokenizer if not (t.tokenType() == "symbol" and t.symbol() == ";"): # Expect an expression self.compileExpression() else: # void function, so force a 0 onto the stack to return self.writer.writePush("CONST", 0) self.writer.writeReturn() self.eatAndEmit("symbol", [";"]) self.emit(xml="</returnStatement>") def compileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ self.emit(xml="<ifStatement>") self.eatAndEmit("keyword", ["if"]) self.eatAndEmit("symbol", ["("]) self.compileExpression() self.eatAndEmit("symbol", [")"]) self.writer.writeArithmetic("U~") ifInstance = self.ifCounter self.ifCounter += 1 self.writer.writeIf("IF.{}.{}.ELSE".format(self.thisClass, ifInstance)) self.eatAndEmit("symbol", ["{"]) self.compileStatements() self.eatAndEmit("symbol", ["}"]) t = self.tokenizer if t.tokenType() == "keyword" and t.keyWord() == "else": self.writer.writeGoto("IF.{}.{}.EXIT".format(self.thisClass, ifInstance)) self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance)) self.eatAndEmit("keyword", ["else"]) self.eatAndEmit("symbol", ["{"]) self.compileStatements() self.eatAndEmit("symbol", ["}"]) self.writer.writeLabel("IF.{}.{}.EXIT".format(self.thisClass, ifInstance)) else: self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance)) self.emit(xml="</ifStatement>") def compileExpression(self): """ Compiles an expression. """ self.emit(xml="<expression>") self.compileTerm() # Look for operator-term pairs t = self.tokenizer ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="] while t.tokenType() == "symbol" and t.symbol() in ops: (_, op) = self.eatAndEmit("symbol", ops) self.compileTerm() self.writer.writeArithmetic(op) self.emit(xml="</expression>") def compileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single lookahead token, which may be one of '[', '(', or '.', suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ self.emit(xml="<term>") # Get the current token type t = self.tokenizer tType = t.tokenType() # Integer constant if tType == "integerConstant": (_, value) = self.eatAndEmit("integerConstant") self.writer.writePush("CONST", value) # String constant elif tType == "stringConstant": (_, value) = self.eatAndEmit("stringConstant") # Declare space for the string self.writer.writePush("CONST", len(value)) self.writer.writeCall("String.new", 1) # Save the contents of the string for c in value: self.writer.writePush("CONST", ord(c)) self.writer.writeCall("String.appendChar", 2) # Keyword constant elif tType == "keyword" and t.keyWord() in ["true", "false", "null", "this"]: (_, kw) = self.eatAndEmit("keyword", ["true", "false", "null", "this"]) if kw in ["null", "false"]: # Map to 0 self.writer.writePush("CONST", 0) elif kw == "true": # Map to -1 self.writer.writePush("CONST", 1) self.writer.writeArithmetic("U-") # NEG else: # this self.writer.writePush("POINTER", 0) # Identifier (varName, or array name, or subroutine call) elif tType == "identifier": (_, ident) = self.eatAndEmit("identifier", category="TERM", state="USE") if t.tokenType() == "symbol": symbol = t.symbol() if symbol == "[": # Array reference # ident is the array name # Compute the offset self.eatAndEmit("symbol", ["["]) self.compileExpression() self.eatAndEmit("symbol", ["]"]) # Add base to offset self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident)) self.writer.writeArithmetic("+") # Update THAT and retrieve self.writer.writePop("POINTER", 1) self.writer.writePush("THAT", 0) elif symbol == "(": # Subroutine call # ident is the subroutine. self.eatAndEmit("symbol", ["("]) nArgs = self.compileExpressionList() self.eatAndEmit("symbol", [")"]) self.writer.writeCall(ident, nArgs) elif symbol == ".": # Method call. # ident is the class name (static method) or the object which will be argument 0 (this). # Look up the object's type in the symbol table. If not found, then it is a class name and there is no object to be "this". objType = self.symtab.typeOf(ident) nArgs = 0 if objType is not None: # Push this onto stack as argument 0 self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident)) nArgs = 1 else: # ident is the class name, so use it objType = ident self.eatAndEmit("symbol", ["."]) (_, method) = self.eatAndEmit( "identifier", category="SUBROUTINE", state="USE" ) self.eatAndEmit("symbol", ["("]) nArgs += self.compileExpressionList() self.eatAndEmit("symbol", [")"]) self.writer.writeCall(objType + "." + method, nArgs) else: # Next token not a symbol, so ident is a simple variable identifier. varKind = self.symtab.kindOf(ident) varIndex = self.symtab.indexOf(ident) self.writer.writePush(varKind, varIndex) # Sub-expression elif tType == "symbol" and t.symbol() == "(": self.eatAndEmit("symbol", ["("]) self.compileExpression() self.eatAndEmit("symbol", [")"]) # Unary op and term elif tType == "symbol" and t.symbol() in ["-", "~"]: (_, op) = self.eatAndEmit("symbol", ["-", "~"]) self.compileTerm() # Mark as unary to get right version of '-' self.writer.writeArithmetic("U" + op) else: # Not a term raise SyntaxError("Expected term, found {}.".format(t.currentToken)) self.emit(xml="</term>") def compileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. Returns the number of expressions compiled. """ self.emit(xml="<expressionList>") # Get the initial token type t = self.tokenizer tType = t.tokenType() # Count the expressions in the list nExpressions = 0 # Closing parenthesis ends the list while not (tType == "symbol" and t.symbol() == ")"): nExpressions += 1 self.compileExpression() # Expect an optional ',' if t.tokenType() == "symbol" and t.symbol() == ",": self.eatAndEmit("symbol", [","]) # Update the tType tType = t.tokenType() self.emit(xml="</expressionList>") return nExpressions def eat(self, tokenType, tokenVals=None): """ Consume the current token if it matches the expected type and value. """ # Get the type and value of the current token t = self.tokenizer tType = t.tokenType() if tType == "keyword": tVal = t.keyWord() elif tType == "symbol": tVal = t.symbol() elif tType == "identifier": tVal = t.identifier() elif tType == "integerConstant": tVal = t.intVal() else: # tType == 'stringConstant' tVal = t.stringVal() # Verify that the type matches and the value is one of the values # expected. if not (tType == tokenType and (not tokenVals or tVal in tokenVals)): raise SyntaxError( "Expected {} {}. Found {}.".format( tokenType, " or ".join(tokenVals or []), t.currentToken ) ) if t.hasMoreTokens(): t.advance() # Return the actual token type and value return (tType, tVal) def emit(self, token=None, category=None, state=None, varType=None, xml=None): """ Emit the provided XML or token as XML to the xmlFile. Will indent based on the current indentLevel. """ # If XML code not provided, create it from the token type and value if not xml: (tokenType, tokenVal) = token # Handle symbol table additions/lookups index = None if state == "DEFINE" and category in ["STATIC", "FIELD", "ARG", "VAR"]: index = self.symtab.define(tokenVal, varType, category) if state == "USE" and category in ["LET", "TERM"]: category = self.symtab.kindOf(tokenVal) if category: varType = self.symtab.typeOf(tokenVal) index = self.symtab.indexOf(tokenVal) else: category = "CLASS OR SUBROUTINE" # Define additional output fields fields = "" if category is not None: fields += " category={}".format(category) if state is not None: fields += " state={}".format(state) if varType is not None: fields += " varType={}".format(varType) if index is not None: fields += " index={}".format(index) xml = "<{0}{2}>{1}</{0}>".format( tokenType, self.xmlProtect(tokenVal), fields ) else: # If the XML starts with '</', reduce the indent level if xml[:2] == "</": self.indentLevel = self.indentLevel - 1 # Output the XML, indented to the current level output = "{}{}\n".format(self.INDENT * self.indentLevel, xml) self.writer.writeComment(output) if self.DEBUG: print(output, end="") # If the XML does not contain '</', increase the indent level if "</" not in xml: self.indentLevel = self.indentLevel + 1 def eatAndEmit( self, tokenType, tokenVals=None, category=None, state=None, varType=None ): """ Shorthand for common pattern of eat and emit. Returns the token eaten. """ token = self.eat(tokenType, tokenVals) self.emit(token=token, category=category, state=state, varType=varType) # Return the token in case the caller wants it return token def xmlProtect(self, token): # Protect <, >, and & tokens from XML if token == "<": return "<" elif token == ">": return ">" elif token == "&": return "&" else: return token
class CompilationEngine: def __init__(self, filepath): file = filepath.replace('.jack', '.xml') self._outputFile = open(file, 'w') self._tokenizer = JackTokenizer(filepath) self._openedNonTerminalElements = [] self._currentToken = None def constructParseTree(self): self._compileClass() self._outputFile.close() #compile functions def _compileClass(self): self._eatObligatory([T_KEYWORD], [K_CLASS]) self._openNonTerminalElement(K_CLASS, eraseToken=False) self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], ['{']) self._writeTerminalElement() self._compileClassVarDeclarations() self._compileSubroutineDeclarations() self._eatObligatory([T_SYMBOL], ['}']) self._writeTerminalElement() self._closeNonTerminalElement(K_CLASS) def _compileClassVarDeclarations(self): while self._eatExpected([T_KEYWORD], [K_STATIC, K_FIELD]): self._openNonTerminalElement(NON_TERMINAL_CLASS_VAR_DEC, eraseToken=False) self._writeTerminalElement() self._compileTypedVarDeclaration() while self._eatExpected([T_SYMBOL], [',']): self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], [';']) self._writeTerminalElement() self._closeNonTerminalElement(NON_TERMINAL_CLASS_VAR_DEC) return def _compileSubroutineDeclarations(self): while self._eatExpected([T_KEYWORD], [K_CONSTRUCTOR, K_FUNCTION, K_METHOD]): self._openNonTerminalElement(NON_TERMINAL_SUB_DEC, eraseToken=False) self._writeTerminalElement() self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN, K_VOID]) self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], ['(']) self._writeTerminalElement() self._compileParameterList() self._eatObligatory([T_SYMBOL], [')']) self._writeTerminalElement() self._compileSubroutineBody() self._closeNonTerminalElement(NON_TERMINAL_SUB_DEC) def _compileParameterList(self): self._openNonTerminalElement(NON_TERMINAL_PARAM_LIST) if self._eatExpected([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]): self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() while self._eatExpected([T_SYMBOL], [',']): self._writeTerminalElement() self._compileTypedVarDeclaration() self._closeNonTerminalElement(NON_TERMINAL_PARAM_LIST) return def _compileSubroutineBody(self): self._openNonTerminalElement(NON_TERMINAL_SUB_BODY) self._eatObligatory([T_SYMBOL], ['{']) self._writeTerminalElement() self._compileVarDeclaration() self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) self._writeTerminalElement() self._closeNonTerminalElement(NON_TERMINAL_SUB_BODY) def _compileVarDeclaration(self): while self._eatExpected([T_KEYWORD], [K_VAR]): self._openNonTerminalElement(NON_TERMINAL_VAR_DEC, eraseToken=False) self._writeTerminalElement() self._compileTypedVarDeclaration() while self._eatExpected([T_SYMBOL], [',']): self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], [';']) self._writeTerminalElement() self._closeNonTerminalElement(NON_TERMINAL_VAR_DEC) def _compileStatements(self): self._openNonTerminalElement(NON_TERMINAL_STATEMENTS, eraseToken=False) while self._eatExpected([T_KEYWORD], [K_LET, K_IF, K_WHILE, K_DO, K_RETURN]): self._compileStatementByKeyword() self._closeNonTerminalElement(NON_TERMINAL_STATEMENTS) return def _compileLetStatement(self): self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() if self._eatExpected([T_SYMBOL], ['[']): self._writeTerminalElement() self._compileExpression() self._eatObligatory([T_SYMBOL], [']']) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], ['=']) self._writeTerminalElement() self._compileExpression() self._eatObligatory([T_SYMBOL], [';']) self._writeTerminalElement() def _compileIfStatement(self): self._compileConditionalStatementBody() if self._eatExpected([T_KEYWORD], [K_ELSE]): self._writeTerminalElement() self._eatObligatory([T_SYMBOL], ['{']) self._writeTerminalElement() self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) self._writeTerminalElement() def _compileWhileStatement(self): self._compileConditionalStatementBody() def _compileDoStatement(self): self._compileSubroutineCall(calledFromDoStatement=True) self._eatObligatory([T_SYMBOL], [';']) self._writeTerminalElement() def _compileReturnStatement(self): if self._eatExpected([T_SYMBOL], [';']): self._writeTerminalElement() else: self._compileExpression() self._eatObligatory([T_SYMBOL], [';']) self._writeTerminalElement() def _compileExpression(self): self._openNonTerminalElement(NON_TERMINAL_EXPRESSION, eraseToken=False) self._compileTerm() if self._eatExpected([T_SYMBOL], ['+', '-', '*', '/', '&', '|', '<', '>', '=']): self._writeTerminalElement() self._compileTerm() self._closeNonTerminalElement(NON_TERMINAL_EXPRESSION) def _compileTerm(self): self._openNonTerminalElement(NON_TERMINAL_TERM, eraseToken=False) requiredTypes = [ T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD, T_IDENTIFIER, T_SYMBOL ] requiredValues = [K_TRUE, K_FALSE, K_NULL, K_THIS, '(', '-', '~'] self._eatObligatory(requiredTypes, requiredValues) if self._currentToken['type'] in [ T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD ]: self._writeTerminalElement() elif self._currentToken['type'] == T_SYMBOL: symbol = self._currentToken['value'] self._writeTerminalElement() if symbol == '(': self._compileExpression() self._eatObligatory([T_SYMBOL], [')']) self._writeTerminalElement() else: self._compileTerm() elif self._currentToken['type'] == T_IDENTIFIER: self._writeTerminalElement() if self._eatExpected([T_SYMBOL], ['[', '.', '(']): symbol = self._currentToken['value'] if symbol == '[': self._writeTerminalElement() self._compileExpression() self._eatObligatory([T_SYMBOL], [']']) self._writeTerminalElement() else: self._compileSubroutineCall() self._closeNonTerminalElement(NON_TERMINAL_TERM) def _compileExpressionList(self): self._openNonTerminalElement(NON_TERMINAL_EXPRESSION_LIST) if not self._eatExpected([T_SYMBOL], [')']): self._compileExpression() while self._eatExpected([T_SYMBOL], [',']): self._writeTerminalElement() self._compileExpression() self._closeNonTerminalElement(NON_TERMINAL_EXPRESSION_LIST) #aux compile functions def _compileTypedVarDeclaration(self): self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]) self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() def _compileStatementByKeyword(self): COMPILE_FUNCTION_BY_KEYWORD = { K_LET: self._compileLetStatement, K_IF: self._compileIfStatement, K_WHILE: self._compileWhileStatement, K_DO: self._compileDoStatement, K_RETURN: self._compileReturnStatement } keyword = self._currentToken['value'] self._openNonTerminalElement(keyword + NON_TERMINAL_STATEMENT, eraseToken=False) self._writeTerminalElement() COMPILE_FUNCTION_BY_KEYWORD[keyword]() self._closeNonTerminalElement(keyword + NON_TERMINAL_STATEMENT) def _compileConditionalStatementBody(self): self._eatObligatory([T_SYMBOL], ['(']) self._writeTerminalElement() self._compileExpression() self._eatObligatory([T_SYMBOL], [')']) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], ['{']) self._writeTerminalElement() self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) self._writeTerminalElement() def _compileSubroutineCall(self, calledFromDoStatement=False): if calledFromDoStatement: self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() if self._eatExpected([T_SYMBOL], ['.']): self._writeTerminalElement() self._eatObligatory([T_IDENTIFIER]) self._writeTerminalElement() self._eatObligatory([T_SYMBOL], ['(']) self._writeTerminalElement() self._compileExpressionList() self._eatObligatory([T_SYMBOL], [')']) self._writeTerminalElement() #aux functions def _eatObligatory(self, requiredTokenTypes, requiredTokenValues=[]): if self._currentToken is None and not self._tokenizer.hasMoreTokens(): self._outputFile.write( '-- COMPILATION ERROR -> MORE TOKENS EXPECTED!! --') self._outputFile.close() exit(1) self._currentToken = self._currentToken or self._tokenizer.advance() if (self._currentToken['type'] not in requiredTokenTypes or (self._currentToken['type'] in TOKEN_TYPES_WITH_EXPECTABLE_VALUES and len(requiredTokenValues) > 0 and self._currentToken['value'] not in requiredTokenValues)): self._outputFile.write('-- COMPILATION ERROR -> WRONG SYNTAX!! --') self._outputFile.close() exit(1) def _eatExpected(self, expectedTokenTypes, expectedTokenValues=[]): self._currentToken = self._currentToken or self._tokenizer.advance() return (self._currentToken['type'] in expectedTokenTypes and (self._currentToken['type'] not in TOKEN_TYPES_WITH_EXPECTABLE_VALUES or len(expectedTokenValues) == 0 or self._currentToken['value'] in expectedTokenValues)) def _openNonTerminalElement(self, element, isNonTerminalElementUnique=False, eraseToken=True): if not isNonTerminalElementUnique or element not in self._openedNonTerminalElements: self._outputFile.write(f'<{element}>\n') self._openedNonTerminalElements.append(element) self._currentToken = None if eraseToken else self._currentToken def _closeNonTerminalElement(self, element): if element in self._openedNonTerminalElements: self._outputFile.write(f'</{element}>\n') self._openedNonTerminalElements.remove(element) def _writeTerminalElement(self): XML_TRANSLATOR = { '<': '<', '>': '>', '&': '&', '"': '"' } tokenType, tokenValue = self._currentToken.values() tokenValue = XML_TRANSLATOR[ tokenValue] if tokenValue in XML_TRANSLATOR else tokenValue.replace( '"', '') self._outputFile.write( f'<{TERMINAL_ELEMENT_BY_TOKEN_TYPE[tokenType]}>') self._outputFile.write(f' {tokenValue} ') self._outputFile.write( f'</{TERMINAL_ELEMENT_BY_TOKEN_TYPE[tokenType]}>\n') self._currentToken = None
# Teste de JackTokenizer from JackTokenizer import JackTokenizer tknz = JackTokenizer('Main.jack') tknz.advance() print("<tokens>") while (tknz.hasMoreTokens()): classeToken = tknz.tagToken() print(classeToken) tknz.advance() print("</tokens>")
class CompilationEngine: def __init__(self, filepath): self._tokenizer = JackTokenizer(filepath) self._writer = VMWriter(filepath) self._classVariables = SymbolTable() self._subroutineVariables = SymbolTable() self._currentToken = None self._preserveCurrentToken = False self._className = '' self._currentCompilingFunction = {'kind': '', 'name': ''} self._numberConditionalsStatementsCurrentFunction = 0 def run(self): self._compileClass() self._writer.close() return #compile functions def _compileClass(self): self._eatObligatory([T_KEYWORD], [K_CLASS]) self._eatObligatory([T_IDENTIFIER]) self._className = self._currentToken['value'] self._eatObligatory([T_SYMBOL], ['{']) self._compileClassVarDeclarations() self._compileSubroutineDeclarations() self._eatObligatory([T_SYMBOL], ['}']) return def _compileClassVarDeclarations(self): self._classVariables.startSubroutine() while self._eatExpected([T_KEYWORD], [K_STATIC, K_FIELD]): kind = VAR_STATIC if self._currentToken['value'] == K_STATIC else VAR_FIELD varType, name = self._compileTypedVarDeclaration() self._classVariables.insert(name, varType, kind) while self._eatExpected([T_SYMBOL], [',']): self._eatObligatory([T_IDENTIFIER]) name = self._currentToken['value'] self._classVariables.insert(name, varType, kind) self._eatObligatory([T_SYMBOL], [';']) return def _compileSubroutineDeclarations(self): while self._eatExpected([T_KEYWORD], [K_CONSTRUCTOR, K_FUNCTION, K_METHOD]): self._currentCompilingFunction['kind'] = self._currentToken['value'] self._subroutineVariables.startSubroutine() self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN, K_VOID]) self._eatObligatory([T_IDENTIFIER]) self._currentCompilingFunction['name'] = self._currentToken['value'] self._eatObligatory([T_SYMBOL], ['(']) self._compileParameterList() self._eatObligatory([T_SYMBOL], [')']) self._compileSubroutineBody() return def _compileParameterList(self): if self._eatExpected([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]): varType = self._currentToken['value'] self._eatObligatory([T_IDENTIFIER]) name = self._currentToken['value'] self._subroutineVariables.insert(name, varType, VAR_ARG) while self._eatExpected([T_SYMBOL], [',']): varType, name = self._compileTypedVarDeclaration() self._subroutineVariables.insert(name, varType, VAR_ARG) return def _compileSubroutineBody(self): self._eatObligatory([T_SYMBOL], ['{']) self._compileVarDeclaration() funcName = self._className + '.' + self._currentCompilingFunction['name'] nLocalVars = self._subroutineVariables.getVarCountByKind(VAR_LOCAL) self._writer.writeFunction(funcName, nLocalVars) self._numberConditionalsStatementsCurrentFunction = 0 if self._currentCompilingFunction['kind'] == K_CONSTRUCTOR: self._compileConstructorCode() elif self._currentCompilingFunction['kind'] == K_METHOD: self._compileMethodCode() self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) return def _compileVarDeclaration(self): while self._eatExpected([T_KEYWORD], [K_VAR]): varType, name = self._compileTypedVarDeclaration() self._subroutineVariables.insert(name, varType, VAR_LOCAL) while self._eatExpected([T_SYMBOL], [',']): self._eatObligatory([T_IDENTIFIER]) name = self._currentToken['value'] self._subroutineVariables.insert(name, varType, VAR_LOCAL) self._eatObligatory([T_SYMBOL], [';']) return def _compileStatements(self): while self._eatExpected([T_KEYWORD], [K_LET, K_IF, K_WHILE, K_DO, K_RETURN]): self._compileStatementByKeyword() return def _compileLetStatement(self): self._eatObligatory([T_IDENTIFIER]) name = self._currentToken['value'] segment, index = self._searchVariableByName(name) isArrayAssignment = False if self._eatExpected([T_SYMBOL], ['[']): self._compileArrayPosition(name) isArrayAssignment = True self._eatObligatory([T_SYMBOL], ['=']) self._compileExpression() if isArrayAssignment: self._writer.writePop(SEGMENT_TEMP, 0) self._writer.writePop(SEGMENT_POINTER, 1) self._writer.writePush(SEGMENT_TEMP, 0) self._writer.writePop(SEGMENT_THAT, 0) else: self._writer.writePop(segment, index) self._eatObligatory([T_SYMBOL], [';']) return def _compileIfStatement(self): funcName = self._className + '.' + self._currentCompilingFunction['name'] notIfLabel = f'{funcName}_NOT_IF_{self._numberConditionalsStatementsCurrentFunction}' endComparisonLabel = f'{funcName}_END_COMPARISON_BLOCK_{self._numberConditionalsStatementsCurrentFunction}' self._numberConditionalsStatementsCurrentFunction += 1 self._eatObligatory([T_SYMBOL], ['(']) self._compileExpression() self._writer.writeArithmetic('not') self._eatObligatory([T_SYMBOL], [')']) self._writer.writeIf(notIfLabel) self._eatObligatory([T_SYMBOL], ['{']) self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) self._writer.writeGoto(endComparisonLabel) self._writer.writeLabel(notIfLabel) if self._eatExpected([T_KEYWORD], [K_ELSE]): self._eatObligatory([T_SYMBOL], ['{']) self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) self._writer.writeLabel(endComparisonLabel) return def _compileWhileStatement(self): funcName = self._className + '.' + self._currentCompilingFunction['name'] loopLabel = f'{funcName}_LOOP_{self._numberConditionalsStatementsCurrentFunction}' endLoopLabel = f'{funcName}_END_LOOP_{self._numberConditionalsStatementsCurrentFunction}' self._numberConditionalsStatementsCurrentFunction += 1 self._writer.writeLabel(loopLabel) self._eatObligatory([T_SYMBOL], ['(']) self._compileExpression() self._writer.writeArithmetic('not') self._eatObligatory([T_SYMBOL], [')']) self._writer.writeIf(endLoopLabel) self._eatObligatory([T_SYMBOL], ['{']) self._compileStatements() self._eatObligatory([T_SYMBOL], ['}']) self._writer.writeGoto(loopLabel) self._writer.writeLabel(endLoopLabel) return def _compileDoStatement(self): self._compileSubroutineCall() self._writer.writePop(SEGMENT_TEMP, 0) self._eatObligatory([T_SYMBOL], [';']) return def _compileReturnStatement(self): if self._eatExpected([T_SYMBOL], [';']): self._writer.writePush(SEGMENT_CONST, 0) else: self._compileExpression() self._eatObligatory([T_SYMBOL], [';']) self._writer.writeReturn() return def _compileExpression(self): self._compileTerm() if self._eatExpected([T_SYMBOL], ['+', '-', '*', '/', '&', '|', '<', '>', '=']): operator = self._currentToken['value'] self._compileTerm() self._writer.writeArithmetic(VM_COMMAND_BY_JACK_OPERATOR[operator]) return def _compileTerm(self): requiredTypes = [T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD, T_IDENTIFIER, T_SYMBOL] requiredValues = [K_TRUE, K_FALSE, K_NULL, K_THIS, '(', '-', '~'] self._eatObligatory(requiredTypes, requiredValues) tokenType = self._currentToken['type'] if tokenType == T_INTEGER_CONSTANT: integer = self._currentToken['value'] self._writer.writePush(SEGMENT_CONST, integer) elif tokenType == T_STRING_CONSTANT: stringConst = self._currentToken['value'].replace('"', '') self._writer.writePush(SEGMENT_CONST, len(stringConst)) self._writer.writeCall('String.new', 1) for char in stringConst: self._writer.writePush(SEGMENT_CONST, ord(char)) self._writer.writeCall('String.appendChar', 2) elif tokenType == T_KEYWORD: constant = self._currentToken['value'] if constant == K_FALSE or constant == K_NULL: self._writer.writePush(SEGMENT_CONST, 0) elif constant == K_TRUE: self._writer.writePush(SEGMENT_CONST, 1) self._writer.writeArithmetic('neg') else: self._writer.writePush(SEGMENT_POINTER, 0) elif tokenType == T_SYMBOL: symbol = self._currentToken['value'] if symbol == '(': self._compileExpression() self._eatObligatory([T_SYMBOL], [')']) else: unaryOperation = 'neg' if symbol == '-' else 'not' self._compileTerm() self._writer.writeArithmetic(unaryOperation) elif tokenType == T_IDENTIFIER: name = self._currentToken['value'] if self._eatExpected([T_SYMBOL], ['[', '.', '(']): symbol = self._currentToken['value'] if symbol == '[': self._compileArrayPosition(name) self._writer.writePop(SEGMENT_POINTER, 1) self._writer.writePush(SEGMENT_THAT, 0) else: self._preserveCurrentToken = True self._compileSubroutineCall(name) else: segment, index = self._searchVariableByName(name) self._writer.writePush(segment, index) return def _compileExpressionList(self): nArgs = 0 if not self._eatExpected([T_SYMBOL], [')']): self._compileExpression() nArgs += 1 while self._eatExpected([T_SYMBOL], [',']): self._compileExpression() nArgs += 1 self._preserveCurrentToken = True return nArgs #aux compile functions def _compileTypedVarDeclaration(self): self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]) varType = self._currentToken['value'] self._eatObligatory([T_IDENTIFIER]) name = self._currentToken['value'] return varType, name def _compileStatementByKeyword(self): COMPILE_FUNCTION_BY_KEYWORD = { K_LET : self._compileLetStatement, K_IF : self._compileIfStatement, K_WHILE : self._compileWhileStatement, K_DO: self._compileDoStatement, K_RETURN : self._compileReturnStatement } keyword = self._currentToken['value'] COMPILE_FUNCTION_BY_KEYWORD[keyword]() return def _compileSubroutineCall(self, name = None): if name is None: self._eatObligatory([T_IDENTIFIER]) name = self._currentToken['value'] nArgs = 0 if self._eatExpected([T_SYMBOL], ['.']): self._eatObligatory([T_IDENTIFIER]) funcName = self._currentToken["value"] varInfo = self._searchVariableByName(name) if varInfo is not None: segment, index = varInfo self._writer.writePush(segment, index) nArgs += 1 else: funcName = f'{name}.{funcName}' else: funcName = name self._eatObligatory([T_SYMBOL], ['(']) nArgs += self._compileExpressionList() self._eatObligatory([T_SYMBOL], [')']) self._writer.writeCall(funcName, nArgs) return def _compileConstructorCode(self): nArgs = self._subroutineVariables.getVarCountByKind(VAR_ARG) self._writer.writePush(SEGMENT_CONST, nArgs) self._writer.writeCall('Memory.alloc', 1) self._writer.writePop(SEGMENT_POINTER, 0) return def _compileMethodCode(self): self._writer.writePush(SEGMENT_ARG, 0) self._writer.writePop(SEGMENT_POINTER, 0) return def _compileArrayPosition(self, arrName): arrayBaseAddr = self._searchVariableByName(arrName) segment, index = arrayBaseAddr self._writer.writePush(segment, index) self._compileExpression() self._writer.writeArithmetic('add') self._eatObligatory([T_SYMBOL], [']']) return #aux functions def _eatObligatory(self, requiredTokenTypes, requiredTokenValues = []): if not self._preserveCurrentToken and not self._tokenizer.hasMoreTokens(): self._writer.writeCompilationError('MORE TOKENS EXPECTED!') exit(1) if self._preserveCurrentToken: self._preserveCurrentToken = False else: self._currentToken = self._tokenizer.advance() if (self._currentToken['type'] not in requiredTokenTypes or (self._currentToken['type'] in TOKEN_TYPES_WITH_EXPECTABLE_VALUES and len(requiredTokenValues) > 0 and self._currentToken['value'] not in requiredTokenValues)): self._writer.writeCompilationError(f'SYNTAX ERROR!') self._writer.writeCompilationError(f'TOKEN GIVEN: {self._currentToken}') self._writer.writeCompilationError(f'EXPECTED: {requiredTokenValues} in {requiredTokenTypes}') exit(1) return def _eatExpected(self, expectedTokenTypes, expectedTokenValues = []): self._currentToken = self._currentToken if self._preserveCurrentToken else self._tokenizer.advance() ateExpected = (self._currentToken['type'] in expectedTokenTypes and (self._currentToken['type'] not in TOKEN_TYPES_WITH_EXPECTABLE_VALUES or len(expectedTokenValues) == 0 or self._currentToken['value'] in expectedTokenValues)) self._preserveCurrentToken = not ateExpected return ateExpected def _searchVariableByName(self, name): subroutineVar = self._subroutineVariables.getByName(name) if subroutineVar is not None: return subroutineVar['segment'], subroutineVar['index'] classVar = self._classVariables.getByName(name) if classVar is not None: return classVar['segment'], classVar['index'] return None
class CompilationEngine(object): """This class recursively compiles a .jack file into (eventually) vm code. For now, this just outputs a grammar xml file""" def __init__(self, inFile): super(CompilationEngine, self).__init__() # create an internal tokenizer to iterate through self.tokenizer = JackTokenizer(inFile) # spacing so I can make nicely formatted xml, this will increase by # 4 spaces every time I recurse self.spacing = "" # setup the output file self.outputPath = inFile.name.replace(".jack", ".xml") self.outputFile = open(self.outputPath, 'w') self.outputFile.close() self.outputFile = open(self.outputPath, 'a') def __increaseSpacing(self): """Adds 2 spaces to self.spacing""" self.spacing += " " def __decreaseSpacing(self): """Removes 2 spaces from self.spacing""" self.spacing = self.spacing[:-2] def __writeFullTag(self, tag, value): """Writes the spacing, then <tag> value </tag> to the output file""" self.outputFile.write(self.spacing + "<" + tag + "> " + value + " </" + tag + ">\n") def __writeOpenTag(self, tag): """Writes spacing, then <tag>, then increases the spacing""" self.outputFile.write(self.spacing + "<" + tag + ">\n") self.__increaseSpacing() def __writeCloseTag(self, tag): """Decreases spacing, then writes spacing, then </tag>""" self.__decreaseSpacing() self.outputFile.write(self.spacing + "</" + tag + ">\n") def start(self): """Starts the compilation by creating the token XML file and then calling __compileClass()""" # start the tokenizer self.tokenizer.advance() # make token xml file self.__createTokenXML() # reset tokenizer and compile self.tokenizer.reset() self.tokenizer.advance() self.__compileClass() def __createTokenXML(self): """Creates the token XML file for a .jack file""" outputPath = self.outputPath.replace(".xml", "T.xml") f = open(outputPath, 'w') f.close() f = open(outputPath, 'a') f.write("<tokens>\n") # make an output file that is filename but with testXML.xml at end while self.tokenizer.hasMoreTokens(): # output to xml to check tokenType = self.tokenizer.tokenType() if tokenType == "KEYWORD": f.write("<keyword>" + self.tokenizer.keyWord() + "</keyword>\n") elif tokenType == "SYMBOL": symbol = self.tokenizer.symbol() symbol = symbol.replace("&", "&") symbol = symbol.replace("<", "<") symbol = symbol.replace(">", ">") symbol = symbol.replace("\"", """) f.write("<symbol>" + symbol + "</symbol>\n") elif tokenType == "IDENTIFIER": f.write("<identifier>" + self.tokenizer.identifier() + "</identifier>\n") elif tokenType == "INT_CONST": f.write("<integerConstant>" + self.tokenizer.intVal() + "</integerConstant>\n") elif tokenType == "STRING_CONST": f.write("<stringConstant>" + self.tokenizer.stringVal() + "</stringConstant>\n") self.tokenizer.advance() # close the xml tag f.write("</tokens>") def __compileType(self): """Compiles a complete jack type grammar. Returns false if there is an error""" # check for valid keyword if self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k not in ["int", "char", "boolean"]: print("Error: type keyword must be int, char, or boolean") return False self.__writeFullTag("keyword", k) self.tokenizer.advance() return True # check for className else: res = self.__compileClassName() # if __compileClassName() errors, this is not a valid type if not res: print("Error: type not a valid className") return res def __compileClassName(self): """Compiles a complete jack className grammar. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileSubroutineName(self): """Compiles a complete jack subroutineName. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileVarName(self): """Compiles a complete jack varName. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileClass(self): """Compiles a complete jack class grammar""" # find the class keyword if self.tokenizer.tokenType() != "KEYWORD" or \ self.tokenizer.keyWord() != "class": print("Error: no class declaration found") sys.exit(1) # write both the class tag and the keyword tag for class self.__writeOpenTag("class") self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # find the className if not self.__compileClassName(): print("Error: no class name found in class declaration") sys.exit(1) # find the open curly brace if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: no opening brace found after class") sys.exit(0) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile the classVarDecs while (self.tokenizer.tokenType() == "KEYWORD" and (self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field")): self.__compileClassVarDec() # compile the subroutines while (self.tokenizer.tokenType() == "KEYWORD" and (self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method")): self.__compileSubroutineDec() # find last curly brace if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: no closing brace found after class definition") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close class tag self.__writeCloseTag("class") self.tokenizer.advance() def __compileClassVarDec(self): """Compiles a complete jack class variable declaration. This advances the tokenizer completely through the variable declaration""" # since we already checked to make sure this is valid, we can write # the tag here and either static or filed self.__writeOpenTag("classVarDec") self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # look for a valid type if not self.__compileType(): sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in classVarDec") sys.exit(1) # check for comma then more varNames (possible not existing) while self.tokenizer.tokenType() == "SYMBOL" and \ self.tokenizer.symbol() == ",": # write the comma self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for varName again if not self.__compileVarName(): print("Error: missing varName identifier in classVarDec") sys.exit(1) # check for closing semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing semicolon after classVarDec") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close classVarDec tag self.__writeCloseTag("classVarDec") def __compileSubroutineDec(self): """Compiles a complete jack subroutine description. This advances the tokenizer completely through the subroutine declaration""" # write the opening tag self.__writeOpenTag("subroutineDec") # since we already checked for constructor/function/method, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # look for void or type if self.tokenizer.tokenType() == "KEYWORD" and \ self.tokenizer.keyWord() == "void": # if void, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() elif not self.__compileType(): print("Error: subroutine return type not void or valid type") sys.exit(1) # check for subroutineName if not self.__compileSubroutineName(): print("Error: missing subroutineName in subroutineDec") sys.exit(1) # check for open parentheses if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "(": print("Error: missing ( for parameter list") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # do parameter list (this could add nothing) self.__compileParameterList() # check for closing parentheses if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print("Error: missing ) for parameter list") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile subroutine body self.__compileSubroutineBody() # close subroutineDec tag self.__writeCloseTag("subroutineDec") def __compileParameterList(self): """Compiles a complete jack parameter list grammar""" # write opening tag self.__writeOpenTag("parameterList") # if the next symbol is a ), then there is no parameter list, so just return # the rest of compileSubroutine will handle writing that if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ")": # close tag self.__writeCloseTag("parameterList") return # look for a valid type else: res = self.__compileType() if not res: sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in parameterList") sys.exit(1) # check for comma separated list of type and varName while self.tokenizer.tokenType( ) == "SYMBOL" and self.tokenizer.symbol() == ",": # write the comma self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # look for a valid type if not self.__compileType(): sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in parameterList") sys.exit(1) # write closing tag self.__writeCloseTag("parameterList") def __compileSubroutineBody(self): """Compile a complete jack subroutine body grammar""" # write opening tag self.__writeOpenTag("subroutineBody") # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for subroutine body") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check to see if we need to compile varDec while self.tokenizer.tokenType() == "KEYWORD" and \ self.tokenizer.keyWord() == "var": self.__compileVarDec() # compile statements self.__compileStatements() # check for closing } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing closing } for subroutine body") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close tag self.__writeCloseTag("subroutineBody") return def __compileVarDec(self): """Compiles a complete jack varDec grammar""" # write open tag self.__writeOpenTag("varDec") # since we already checked to make sure there is a var, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for type if not self.__compileType(): sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in varDec") sys.exit(1) # check for comma separated list of type and varName while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ",": # write the comma self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in varDec") sys.exit(1) # check for semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after varDec") sys.exit(1) # write ; self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close tag self.__writeCloseTag("varDec") return def __compileStatements(self): """Compiles a complete jack statements grammar""" # write statements tag self.__writeOpenTag("statements") # check for the keywords for all the statements while self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k == "let": self.__compileLetStatement() elif k == "if": self.__compileIfStatement() elif k == "while": self.__compileWhileStatement() elif k == "do": self.__compileDoStatement() elif k == "return": self.__compileReturnStatement() else: print("Error: invalid statment " + k) sys.exit(1) # close statements tag self.__writeCloseTag("statements") def __compileLetStatement(self): """Compiles a complete jack let statment grammar""" # write opening tag self.__writeOpenTag("letStatement") # since we already checked for the keyword let, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # look for varName if not self.__compileVarName(): print("Error: missing varName for let statement") # check for [ if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == "[": # write the bracket self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # write the closing bracket if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "]": print("Error: missing closing ] in let statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for = if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "=": print("Error: missing = in let expression") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # look for ; if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after let statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("letStatement") def __compileIfStatement(self): """Compiles a complete jack if statement grammar""" # write opening tag self.__writeOpenTag("ifStatement") # since we already checked for if, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for ( if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "(": print("Error: missing ( in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print("Error: missing ) in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing } after if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for else if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord( ) == "else": # write else self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing } after if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close tag self.__writeCloseTag("ifStatement") def __compileWhileStatement(self): """Compiles a complete jack while statement grammar""" # write opening tag self.__writeOpenTag("whileStatement") # since we checked for while already, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for ( if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "(": print("Error: missing ( in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print("Error: missing ) in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing } after if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("whileStatement") def __compileDoStatement(self): """Compiles a complete jack do statement grammar""" # write opening tag self.__writeOpenTag("doStatement") # since we already checked for do, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # compile subroutine call self.__compileSubroutineCall() # check for semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after do statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("doStatement") def __compileReturnStatement(self): """Compiles a complete jack return statement grammar""" # write opening tag self.__writeOpenTag("returnStatement") # since we checked for return already, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # if the next symbol isn't a symbol, it must be an expression if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": self.__compileExpression() # write ;, checking again to make sure after calling compile expression # that the next symbol is still a valid ; if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after return statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("returnStatement") def __convertOp(self, op): """Converts the operators that interfere with xml tags to their properly escaped versions""" op = op.replace("&", "&") op = op.replace("<", "<") op = op.replace(">", ">") op = op.replace("\"", """) return op def __compileExpression(self): """Compiles a complete jack expression grammar""" # write opening tag self.__writeOpenTag("expression") # compile term self.__compileTerm() # check for op while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) in op: s = self.tokenizer.symbol() # write op self.__writeFullTag("symbol", self.__convertOp(s)) self.tokenizer.advance() # compile another term self.__compileTerm() # close tag self.__writeCloseTag("expression") def __compileTerm(self): """Compiles a complete jack term grammar""" # write opening tag self.__writeOpenTag("term") # term logic # check for integerConstant if self.tokenizer.tokenType() == "INT_CONST": self.__writeFullTag("integerConstant", self.tokenizer.intVal()) self.tokenizer.advance() # check for string constant elif self.tokenizer.tokenType() == "STRING_CONST": self.__writeFullTag("stringConstant", self.tokenizer.stringVal()) self.tokenizer.advance() # check for keyword for KeywordConstant elif self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k not in KeyWordConstant: print("Error: invalid KeyWordConstant" + k + " in term") sys.exit(1) # write the keywordconstant self.__writeFullTag("keyword", k) self.tokenizer.advance() # check for symbol for either ( expression ) or unary op elif self.tokenizer.tokenType() == "SYMBOL": s = self.tokenizer.symbol() # ( expression ) if s == "(": self.__writeFullTag("symbol", s) self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType( ) != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) after expression in term") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # unaryOp term elif s in unaryOp: self.__writeFullTag("symbol", s) self.tokenizer.advance() # compile term self.__compileTerm() else: print("Error: invalid symbol " + s + " in term") sys.exit(1) # check for varName | varName [ expression ] | subroutineCall elif self.tokenizer.tokenType() == "IDENTIFIER": # advance the tokenizer one more step to check for [, (, or other self.tokenizer.advance() if self.tokenizer.tokenType() == "SYMBOL": s = self.tokenizer.symbol() # varName[expression] if s == "[": # go back to varName self.tokenizer.retreat() # compile varName if not self.__compileVarName(): print("Error: invalid varName in term") sys.exit(1) # write [ self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # write ] if self.tokenizer.tokenType( ) != "SYMBOL" or self.tokenizer.symbol() != "]": print("Error: missing ] after varName[expression]") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # subroutineCall elif s == "(" or s == ".": # go back to subroutineName self.tokenizer.retreat() # compile subroutineCall self.__compileSubroutineCall() else: # go back to varName self.tokenizer.retreat() # compile varName if not self.__compileVarName(): print("Error: invalid varName in term") sys.exit(1) else: # go back to varName self.tokenizer.retreat() # compile varName if not self.__compileVarName(): print("Error: invalid varName in term") sys.exit(1) else: print("Error: invalid term") sys.exit(1) # close tag self.__writeCloseTag("term") def __compileSubroutineCall(self): """Compiles a complete jack subroutine call grammar""" # look ahead one token to see if it is a ( or a . self.tokenizer.advance() # subroutineName if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == "(": # go back to subroutine name self.tokenizer.retreat() # compile subroutine name if not self.__compileSubroutineName(): print("Error: invalid subroutineName in subroutineCall") sys.exit(1) # check for ( if self.tokenizer.tokenType( ) != "SYMBOL" and self.tokenizer.symbol() != "(": print( "Error: missing ( in subroutineCall before expressionList") sys.exit(1) # write ( self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression list self.__compileExpressionList() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print( "Error: missing ) after expressionList in subroutineCall") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # className | varName elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ".": # go back to varName/className self.tokenizer.retreat() if self.tokenizer.tokenType() != "IDENTIFIER": print("Error: missing varName|className in subroutineCall") # Hacky, but className and varName both correspond to just an # identitifer, so I just call compileVarName to handle both if not self.__compileVarName(): print("Error: invalid className or varName in subroutineCall") sys.exit(1) # check for . if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ".": print("Error: missing . in subroutineCall") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile subroutineName if not self.__compileSubroutineName(): print( "Error: missing subroutineName after . in subroutineCall") sys.exit(1) # check for ( if self.tokenizer.tokenType( ) != "SYMBOL" and self.tokenizer.symbol() != "(": print( "Error: missing ( in subroutineCall before expressionList") sys.exit(1) # write ( self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression list self.__compileExpressionList() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print( "Error: missing ) after expressionList in subroutineCall") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() else: print("Error: invalid subroutineCall") sys.exit(1) def __compileExpressionList(self): """Compiles a complete jack expression list grammar""" # write open tag self.__writeOpenTag("expressionList") # if the symbol is ), there is no expression list if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ")": # close tag self.__writeCloseTag("expressionList") return else: # compile expression self.__compileExpression() # loop until you dont see a comma while self.tokenizer.tokenType( ) == "SYMBOL" and self.tokenizer.symbol() == ",": # write , self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # write closing tag self.__writeCloseTag("expressionList")
from JackTokenizer import JackTokenizer tokenizer = JackTokenizer('foo') while tokenizer.hasMoreTokens(): tokenizer.advance()
class CompilationEngine: def __init__(self, path, filename): self._jt = JackTokenizer(path, filename) self._opfilename = filename self._tags = [] self.compileClass() #print(*self._tags) def exportXML(self): with open(self._opfilename, 'w') as f: f.writelines(self._tags) def compileClass(self): self.start_non_terminal_tag('class', addToTags=True) self.move_to_next_token() self.eat_token('class', addToTags=True) self.move_to_next_token() self.eat_token_type('IDENTIFIER', addToTags=True) self.move_to_next_token() self.eat_token('{', addToTags=True) class_var_dec = self.compile_class_var_dec() while class_var_dec: self._tags += class_var_dec class_var_dec = self.compile_class_var_dec() else: subroutine_dec = self.compile_subroutine_dec() while subroutine_dec: self._tags += subroutine_dec subroutine_dec = self.compile_subroutine_dec() self.eat_token('}', addToTags=True) self.end_non_terminal_tag('class', addToTags=True) def compile_class_var_dec(self): tags = [] tags.append(self.start_non_terminal_tag('classVarDec')) tags.append(self.eat_token('static') or self.eat_token('field')) if False in tags: return False tags.append(self.compile_type()) tags.append(self.eat_token_type(self.get_token_type('varName'))) collect = self.eat_token(',') while collect: tags.append(collect) self.move_to_next_token() tags.append(self.eat_token_type(self.get_token_type('varName'))) self.move_to_next_token() collect = self.eat_token(',') else: tags.append(self.eat_token(';')) tags.append(self.end_non_terminal_tag('classVarDec')) return self.validate_tags(tags) def compile_type(self): return self.eat_token('int') or self.eat_token( 'char') or self.eat_token('boolean') or self.eat_token_type( self.get_token_type('className')) def compile_subroutine_dec(self): tags = [] tags.append(self.start_non_terminal_tag('subroutineDec')) self.move_to_next_token() tags.append( self.eat_token('constructor') or self.eat_token('function') or self.eat_token('method')) if False in tags: return False self.move_to_next_token() tags.append(self.eat_token('void') or self.compile_type()) self.move_to_next_token() tags.append(self.eat_token_type(self.get_token_type('subroutineName'))) self.move_to_next_token() tags.append(self.eat_token('(')) parameter_list_tags = self.compileParameterList() if parameter_list_tags: tags += parameter_list_tags self.move_to_next_token() tags.append(self.eat_token(')')) subroutine_body_tags = self.compile_subroutine_body() if subroutine_body_tags: tags += subroutine_body_tags tags.append(self.end_non_terminal_tag('subroutineDec')) return self.validate_tags(tags) def compileParameterList(self): self.move_to_next_token() type = self.compile_type() if not type: tags = [ self.start_non_terminal_tag('parameterList'), self.end_non_terminal_tag('parameterList') ] return self.validate_tags(tags) else: tags = [self.start_non_terminal_tag('parameterList'), type] self.move_to_next_token() tags.append(self.eat_token_type(self.get_token_type('varName'))) self.move_to_next_token() is_comma = self.eat_token(',') while is_comma: tags.append(is_comma) self.move_to_next_token() tags.append(self.compile_type()) self.move_to_next_token() tags.append(self.eat_token_type( self.get_token_type('varName'))) self.move_to_next_token() is_comma = self.eat_token(',') tags.append(self.end_non_terminal_tag('parameterList')) return self.validate_tags(tags) def compile_subroutine_body(self): tags = [self.start_non_terminal_tag('subroutineBody')] self.move_to_next_token() tags.append(self.eat_token('{')) var_dec_tags = self.compile_var_dec() while var_dec_tags: tags += var_dec_tags var_dec_tags = self.compile_var_dec() statements_tags = self.compile_statements() if statements_tags: tags += statements_tags self.move_to_next_token() tags.append(self.eat_token('}')) tags.append(self.end_non_terminal_tag('subroutineBody')) return self.validate_tags(tags) def compile_var_dec(self): tags = [self.start_non_terminal_tag('varDec')] self.move_to_next_token() tags.append(self.eat_token('var')) self.move_to_next_token() tags.append(self.compile_type()) self.move_to_next_token() tags.append(self.eat_token_type(self.get_token_type('varName'))) self.move_to_next_token() is_comma = self.eat_token(',') while is_comma: tags.append(is_comma) self.move_to_next_token() tags.append(self.eat_token_type(self.get_token_type('varName'))) self.move_to_next_token() is_comma = self.eat_token(',') else: tags.append(self.eat_token(';')) tags.append(self.end_non_terminal_tag('varDec')) return self.validate_tags(tags) def compile_statements(self): tags = [self.start_non_terminal_tag('statements')] statement = self.compile_statement() while statement: tags += statement statement = self.compile_statement() tags.append(self.end_non_terminal_tag('statements')) return self.validate_tags(tags) def compile_statement(self): return (self.compile_let_statement() or self.compile_if_statement() or self.compile_while_statement() or self.compile_do_statement() or self.compile_return_statement()) def compile_let_statement(self): tags = [self.start_non_terminal_tag('letStatement')] self.move_to_next_token() tags.append(self.eat_token('let')) if False in tags: return False self.move_to_next_token() tags.append(self.eat_token_type(self.get_token_type('varName'))) self.move_to_next_token() bracket = self.eat_token('[') if bracket: tags.append(bracket) tags += self.compile_expression() tags.append(self.eat_token(']')) self.move_to_next_token() tags.append(self.eat_token('=')) tags += self.compile_expression() self.move_to_next_token() tags.append(self.eat_token(';')) tags.append(self.end_non_terminal_tag('letStatement')) return self.validate_tags(tags) def compile_if_statement(self): tags = [self.start_non_terminal_tag('ifStatement')] self.move_to_next_token() tags.append(self.eat_token('if')) if False in tags: return False self.move_to_next_token() tags.append(self.eat_token('(')) tags += self.compile_expression() self.move_to_next_token() tags.append(self.eat_token(')')) self.move_to_next_token() tags.append(self.eat_token('{')) tags += self.compile_statements() self.move_to_next_token() tags.append(self.eat_token('}')) else_tag = self.eat_token('else') if else_tag: tags.append(else_tag) self.move_to_next_token() tags.append(self.eat_token('{')) tags += self.compile_statements() self.move_to_next_token() tags.append(self.eat_token('}')) tags.append(self.end_non_terminal_tag('ifStatement')) return self.validate_tags(tags) def compile_while_statement(self): tags = [self.eat_token('while'), self.eat_token('(')] if False in tags: return False tags += self.compile_expression() tags.append(self.eat_token(')')) tags.append(self.eat_token('{')) tags += self.compile_statements() tags.append(self.eat_token('}')) tags = self.add_non_terminal_tags('whileStatement', tags) return self.validate_tags(tags) def compile_do_statement(self): tags = [ self.eat_token('do'), self.eat_token_type(self.get_token_type('subroutineName')) ] if False in tags: return False paren_tag = self.eat_token('(') if paren_tag: tags.append(paren_tag) tags += self.compile_expression_list() tags.append(self.eat_token(')')) else: tags.append(self.eat_token('.')) tags.append( self.eat_token_type(self.get_token_type('subroutineName'))) tags.append(self.eat_token('(')) tags += self.compile_expression_list() tags.append(self.eat_token(')')) tags.append(self.eat_token(';')) tags = self.add_non_terminal_tags('doStatement', tags) return self.validate_tags(tags) def compile_return_statement(self): tags = [self.eat_token('return')] if False in tags: return False expression_tags = self.compile_expression() if expression_tags: tags += expression_tags tags.append(self.eat_token(';')) tags = self.add_non_terminal_tags('returnStatement', tags) return self.validate_tags(tags) def compile_expression(self): tags = [self.start_non_terminal_tag('expression')] term_tags = self.compile_term() if term_tags: tags += term_tags else: return False self.move_to_next_token() while self._jt.token in ('+', '-', '*', '/', '&', '|', '<', '>', '='): tags.append(self.eat_token_type('SYMBOL')) tags += self.compile_term() self.move_to_next_token() tags.append(self.end_non_terminal_tag('expression')) return self.validate_tags(tags) def compile_term(self): tags = [] self.move_to_next_token() constant = (self.eat_token_type(self.get_token_type('integerConstant')) or self.eat_token_type( self.get_token_type('stringConstant'))) if not constant and self._jt.token in ('this', 'null', 'true', 'false'): constant = self.eat_token_type('KEYWORD') if not constant: identifier = self.eat_token_type('IDENTIFIER') if identifier: self.move_to_next_token() # checking the next token if self._jt.token == '[': # is it an array invocation? tags.append(identifier) tags.append(self.eat_token('[')) tags += self.compile_expression() self.move_to_next_token() tags.append(self.eat_token(']')) elif self._jt.token in ('.', '('): # is it a subroutine call? tags.append(identifier) if self._jt.token == '(': # its a subroutine call within the Jack class tags.append(self.eat_token('(')) tags += self.compile_expression_list() self.move_to_next_token() self.eat_token(')') else: #its a subroutine call outside this Jack class tags.append(self.eat_token('.')) self.move_to_next_token() tags.append( self.eat_token_type( self.get_token_type('subroutineName'))) self.move_to_next_token() tags.append(self.eat_token('(')) tags += self.compile_expression_list() self.move_to_next_token() tags.append(self.eat_token(')')) else: tags.append(identifier) # just varName is given elif self._jt.tokenType == 'SYMBOL': if self._jt.token == '(': tags.append(self.eat_token('(')) tags += self.compile_expression() tags.append(self.eat_token(')')) elif self._jt.token in ('~', '-'): tags.append(self.eat_token('~') or self.eat_token('-')) tags += self.compile_term() else: tags.append(constant) if len(tags) > 0: tags = self.add_non_terminal_tags('term', tags) return self.validate_tags(tags) else: return False def compile_expression_list(self): tags = [self.start_non_terminal_tag('expressionList')] expression_tags = self.compile_expression() if expression_tags: tags += expression_tags self.move_to_next_token() is_comma = self.eat_token(',') while is_comma: tags.append(is_comma) self.move_to_next_token() expression = self.compile_expression() if expression: tags += expression is_comma = self.eat_token(',') tags.append(self.end_non_terminal_tag('expressionList')) return self.validate_tags(tags) GRAMMAR_KEYWORD = { ('varName', 'className', 'subroutineName'): 'IDENTIFIER', ('integerConstant'): 'INT_CONST', ('stringConstant'): 'STRING_CONST' } def get_token_type(self, token): for t in self.GRAMMAR_KEYWORD: if token in t: return self.GRAMMAR_KEYWORD[t] else: raise Exception(token + ' not found in grammar') def eat_token(self, token, addToTags=False): self.move_to_next_token() if self._jt.token == token: self._jt.tokenConsumed = True xmltag = self.xmltag() if addToTags: self._tags.append(xmltag) return xmltag else: return False def eat_token_type(self, tokenType, addToTags=False): self.move_to_next_token() if self._jt.tokenType == tokenType: self._jt.tokenConsumed = True xmltag = self.xmltag() if addToTags: self._tags.append(xmltag) return xmltag else: return False def move_to_next_token(self): if self._jt.hasMoreTokens(): self._jt.advance() def xmltag(self): type_ = self._jt.tokenTypeXmlTags[self._jt.tokenType] return '<' + type_ + '> ' + self._jt.token + ' </' + type_ + '>\n' def add_non_terminal_tags(self, name, tags): return [self.start_non_terminal_tag(name) ] + tags + [self.end_non_terminal_tag(name)] def start_non_terminal_tag(self, tag, addToTags=False): tag_n_ = '<' + tag + '>\n' if addToTags: self._tags.append(tag_n_) return tag_n_ def end_non_terminal_tag(self, tag, addToTags=False): tag_n_ = '</' + tag + '>\n' if addToTags: self._tags.append(tag_n_) return tag_n_ def validate_tags(self, tags): if False not in tags: return tags else: return False