def create_token_file(jack_file_name): token_file_name = jack_file_name.replace('.jack', 'T.xml') token_file = open(token_file_name, 'w') jack_file = open(jack_file_name, 'rU') tokenizer = JackTokenizer(jack_file) token_file.write('<tokens>\n') while tokenizer.hasMoreTokens(): tokenizer.advance() if tokenizer.tokenType() is 'KEYWORD': token_file.write('<keyword> {} </keyword>\n'.format(tokenizer.keyWord().lower())) elif tokenizer.tokenType() is 'SYMBOL': symbol = tokenizer.symbol() if symbol in ['<', '>', '&']: symbol = Main.XML_CONVSERSIONS[symbol] token_file.write('<symbol> {} </symbol>\n'.format(symbol)) elif tokenizer.tokenType() is 'IDENTIFIER': token_file.write('<identifier> {} </identifier>\n'.format(tokenizer.identifier())) elif tokenizer.tokenType() is 'INT_CONST': token_file.write('<integerConstant> {} </integerConstant>\n'.format(tokenizer.intVal())) elif tokenizer.tokenType() is 'STRING_CONST': token_file.write('<stringConstant> {} </stringConstant>\n'.format(tokenizer.stringVal())) token_file.write('</tokens>\n') token_file.close() return token_file_name
def test_token_type(self): tokenizer = JackTokenizer("ArrayTest/Main.jack") tokenizer.advance() self.assertEqual(TokenType.keyword, tokenizer.token_type()) self.assertEqual(tokenizer.keyword(), KeywordType.CLASS) tokenizer.advance() self.assertEqual(TokenType.identifier, tokenizer.token_type()) self.assertEqual(tokenizer.identifier(), "Main") tokenizer.advance() self.assertEqual(TokenType.symbol, tokenizer.token_type()) self.assertEqual(tokenizer.symbol(), "{") i = 0 while tokenizer.has_more_tokens(): i += 1 tokenizer.advance() if i == 43: self.assertEqual(TokenType.integerConstant, tokenizer.token_type()) self.assertEqual(tokenizer.intVal(), '0') if i == 28: self.assertEqual(TokenType.stringConstant, tokenizer.token_type()) self.assertEqual(tokenizer.stringVal(), "HOW MANY NUMBERS? ")
def create_token_file(jack_file_name): token_file_name = jack_file_name.replace('.jack', 'T.xml') token_file = open(token_file_name, 'w') jack_file = open(jack_file_name, 'rU') tokenizer = JackTokenizer(jack_file) token_file.write('<tokens>\n') while tokenizer.hasMoreTokens(): tokenizer.advance() if tokenizer.tokenType() is 'KEYWORD': token_file.write('<keyword> {} </keyword>\n'.format( tokenizer.keyWord().lower())) elif tokenizer.tokenType() is 'SYMBOL': symbol = tokenizer.symbol() if symbol in ['<', '>', '&']: symbol = Main.XML_CONVSERSIONS[symbol] token_file.write('<symbol> {} </symbol>\n'.format(symbol)) elif tokenizer.tokenType() is 'IDENTIFIER': token_file.write('<identifier> {} </identifier>\n'.format( tokenizer.identifier())) elif tokenizer.tokenType() is 'INT_CONST': token_file.write( '<integerConstant> {} </integerConstant>\n'.format( tokenizer.intVal())) elif tokenizer.tokenType() is 'STRING_CONST': token_file.write( '<stringConstant> {} </stringConstant>\n'.format( tokenizer.stringVal())) token_file.write('</tokens>\n') token_file.close() return token_file_name
class CompilationEngine(object): """CompilationEngine: generates the compiler's output.""" def __init__(self, input_file, output_file, use_text_as_input=False): """Creates a new compilation engine with the given input and output. The next routine called must be compile_class """ self.tokenizer = JackTokenizer(input_file, use_text_as_input) self.output_file = output_file self.output = [] def save_output_file(self): file = open(self.output_file, 'w') for line in self.output: file.write(line + '\n') def compile_class(self): """Compiles a complete class. class: 'class' className '{' classVarDec* subroutineDec* '}' """ self.output.append('<class>') # output <class> self._handle_keyword() # 'class' self._handle_identifier() # className self._handle_symbol() # '{' # classVarDec* while self.tokenizer.peek_at_next_token() in [STATIC, FIELD]: self.compile_class_var_dec() # subroutineDec* while self.tokenizer.peek_at_next_token() in [CONSTRUCTOR, FUNCTION, METHOD]: self.compile_subroutine_dec() self._handle_symbol() # '}' self.output.append('</class>') # output </class> def compile_class_var_dec(self): """Compiles a static variable declaration, or a field declaration. classVarDec: ('static'|'field') type varName(',' varName)* ';' """ self.output.append('<classVarDec>') # output <classVarDec> self._handle_keyword() # ('static'|'field') self._handle_type() # type self._handle_identifier() # varName while (self.tokenizer.peek_at_next_token() == ','): self._handle_symbol() # ',' self._handle_identifier() # varName self._handle_symbol() # ';' self.output.append('</classVarDec>') # output <classVarDec> def compile_subroutine_dec(self): """Compiles a complete method, function, or constructor. subroutineDec: ('constructor'|'function'|'method') ('void'|type) subroutineName '(' parameterList ')' subroutineBody """ self.output.append('<subroutineDec>') self._handle_keyword() # ('constructor'|'function'|'method') if self.tokenizer.peek_at_next_token() == VOID: self._handle_keyword() # 'void' else: self._handle_type() # type self._handle_identifier() # subroutineName self._handle_symbol() # '(' self.compile_parameter_list() self._handle_symbol() # ')' self.compile_subroutine_body() self.output.append('</subroutineDec>') def compile_parameter_list(self): """Compiles a (possibly empty) parameter list. Does not handle the enclosing "()". parameterList: ((type varName) (',' type varName)*)? """ self.output.append('<parameterList>') # ((type varName) (',' type varName)*)? if self.tokenizer.peek_at_next_token() != ')': self._handle_type() # type self._handle_identifier() # varName while self.tokenizer.peek_at_next_token() != ')': self._handle_symbol() # ',' self._handle_type() # type self._handle_identifier() # varName self.output.append('</parameterList>') def compile_subroutine_body(self): """Compiles a subroutine's body. subroutineBody: '{' varDec* statements '}' """ self.output.append('<subroutineBody>') self._handle_symbol() # '{' while self.tokenizer.peek_at_next_token() == VAR: self.compile_var_dec() self.compile_statements() self._handle_symbol() # '}' self.output.append('</subroutineBody>') def compile_var_dec(self): """Compiles a var declaration. varDec: 'var' type varName (',' varName)* ';' """ self.output.append('<varDec>') # output <varDec> self._handle_keyword() # 'var' self._handle_type() # type self._handle_identifier() # varName while (self.tokenizer.peek_at_next_token() == ','): self._handle_symbol() # ',' self._handle_identifier() # varName self._handle_symbol() # ';' self.output.append('</varDec>') # output <varDec> def compile_statements(self): """Compiles a sequence of statements. Does not handle the enclosing "{}". statements: statement* """ self.output.append('<statements>') # output <statements> next_token = self.tokenizer.peek_at_next_token() while next_token in [LET, IF, WHILE, DO, RETURN]: if next_token == LET: self.compile_let() elif next_token == IF: self.compile_if() elif next_token == WHILE: self.compile_while() elif next_token == DO: self.compile_do() elif next_token == RETURN: self.compile_return() next_token = self.tokenizer.peek_at_next_token() self.output.append('</statements>') # output </statements> def compile_let(self): """Compiles a let statement. letStatement: 'let' varName('[' expression ']')? '=' expression ';' """ self.output.append('<letStatement>') # output <letStatement> self._handle_keyword() # 'let' self._handle_identifier() # varName if self.tokenizer.peek_at_next_token() == '[': self._handle_symbol() # '[' self.compile_expression() # expression self._handle_symbol() # ']' self._handle_symbol() # '=' self.compile_expression() # expression self._handle_symbol() # ';' self.output.append('</letStatement>') # output </letStatement> def compile_if(self): """Compiles a if statement. ifStatement: 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')? """ self.output.append('<ifStatement>') # output <ifStatement> self._handle_keyword() # 'if' self._handle_symbol() # '(' self.compile_expression() # expression self._handle_symbol() # ')' self._handle_symbol() # '{' self.compile_statements() # statements self._handle_symbol() # '}' if self.tokenizer.peek_at_next_token() == ELSE: self._handle_keyword() # 'if' self._handle_symbol() # '{' self.compile_statements() # statements self._handle_symbol() # '}' self.output.append('</ifStatement>') # output </ifStatement> def compile_while(self): """Compiles a while statement. whileStatement: 'while' '(' expression ')' '{' statements '}' """ self.output.append('<whileStatement>') # output <whileStatement> self._handle_keyword() # 'while' self._handle_symbol() # '(' self.compile_expression() # expression self._handle_symbol() # ')' self._handle_symbol() # '{' self.compile_statements() # statements self._handle_symbol() # '}' self.output.append('</whileStatement>') # output </whileStatement> def compile_do(self): """Compiles a do statement. doStatement: 'do' subroutineCall ';' """ self.output.append('<doStatement>') # output <doStatement> self._handle_keyword() # 'do' self.compile_subroutine_call() # subroutineCall self._handle_symbol() # ';' self.output.append('</doStatement>') # output </doStatement> def compile_subroutine_call(self): """subroutineCall: subroutineName'('expressionList')'| (className|varName)'.'subroutineName'('expressionList')' """ self._handle_identifier() # subroutineName or (className|varName) if self.tokenizer.peek_at_next_token() == '.': self._handle_symbol() # '.' self._handle_identifier() # subroutineName self._handle_symbol() # '(' self.compile_expression_list() # expressionList self._handle_symbol() # ')' def compile_expression_list(self): """Compiles a (possibly empty) comma-separated list of expressions. expressionList: (expression (','expression)* )? """ self.output.append('<expressionList>') # output <expressionList> if self.tokenizer.peek_at_next_token() != ')': self.compile_expression() # expression while self.tokenizer.peek_at_next_token() != ')': self._handle_symbol() # ',' self.compile_expression() # type self.output.append('</expressionList>') # output </expressionList> def compile_return(self): """Compiles a return statement. returnStatement: 'return' expression? ';' """ self.output.append('<returnStatement>') # output <returnStatement> self._handle_keyword() # 'return' if (self.tokenizer.peek_at_next_token() != ';'): self.compile_expression() self._handle_symbol() # ';' self.output.append('</returnStatement>') # output </returnStatement> def compile_expression(self): """Compiles an expression. expression: term (op term)* """ self.output.append('<expression>') # output <expression> self.compile_term() while (self.tokenizer.peek_at_next_token() in OPS): self._handle_symbol() # op self.compile_term() # term self.output.append('</expression>') # output </expression> def compile_term(self): """Compiles a term. If the current token is an identifier, the routine must distinguish between a variable, an array entry, or a subroutine call. A single look-ahead token, which may be one of "[", "(", or ".", suffices to distinguish between the possibilities. Any other token is not part of this term and should not be advanced over. term: integerConstant|stringConstant|keywordConstant|varName| varName'['expression']'|subroutineCall|'('expression')'|unaryOp term """ self.output.append('<term>') # output <term> self.tokenizer.advance() token_type = self.tokenizer.token_type() if token_type == INT_CONST: self.output.append("<integerConstant> {} </integerConstant>".format(self.tokenizer.int_val())) elif token_type == STRING_CONST: self.output.append("<stringConstant> {} </stringConstant>".format(self.tokenizer.string_val())) elif token_type == KEYWORD: self.output.append("<keyword> {} </keyword>".format(self.tokenizer.keyword())) elif token_type == IDENTIFIER: # varName|varName'['expression']'|subroutineCall self.output.append("<identifier> {} </identifier>".format(self.tokenizer.identifier())) next_token = self.tokenizer.peek_at_next_token() if next_token == '[': # varName'['expression']' self._handle_symbol() # '[' self.compile_expression() # expression self._handle_symbol() # ']' elif next_token == '(': # subroutineCall self._handle_symbol() # '(' self.compile_expression_list() # expressionList self._handle_symbol() # ')' elif next_token == '.': # subroutineCall self._handle_symbol() # '.' self._handle_identifier() # subroutineName self._handle_symbol() # '(' self.compile_expression_list() # expressionList self._handle_symbol() # ')' elif self.tokenizer.current_token == '(': # '('expression')' self.output.append("<symbol> {} </symbol>".format(self.tokenizer.symbol())) # '(' self.compile_expression() # expression self._handle_symbol() # ')' elif self.tokenizer.current_token in ['-', '~']: # unaryOp term self.output.append("<symbol> {} </symbol>".format(self.tokenizer.symbol())) self.compile_term() else: raise Exception("Token '{}' not matched to any term".format(self.tokenizer.current_token)) self.output.append('</term>') # output </term> def _handle_type(self): """ type: 'int'|'char'|'boolean'|className""" self.tokenizer.advance() if self.tokenizer.current_token in [INT, CHAR, BOOLEAN]: self.output.append("<keyword> {} </keyword>".format(self.tokenizer.keyword())) else: self.output.append("<identifier> {} </identifier>".format(self.tokenizer.identifier())) def _handle_keyword(self): self.tokenizer.advance() self.output.append("<keyword> {} </keyword>".format(self.tokenizer.keyword())) def _handle_identifier(self): self.tokenizer.advance() self.output.append("<identifier> {} </identifier>".format(self.tokenizer.identifier())) def _handle_symbol(self): self.tokenizer.advance() self.output.append("<symbol> {} </symbol>".format(self.tokenizer.symbol())) def _handle_int_const(self): self.tokenizer.advance() self.output.append("<integerConstant> {} </integerConstant>".format(self.tokenizer.int_val())) def _handle_string_const(self): self.tokenizer.advance() self.output.append("<stringConstant> {} </stringConstant>".format(self.tokenizer.string_val()))
class CompilationEngine: def __init__(self, inputFile, outputFile): self.tokenizer = JackTokenizer(inputFile) self.vmWriter = VMWriter(outputFile) self.symbolTable = SymbolTable() self.classname = "" self.CompileClass() self.whilecounter = 0 self.ifcounter = 0 def CompileClass(self): #classname self.tokenizer.advance() self.classname = self.tokenizer.identifier() self.tokenizer.advance() # ignore { self.tokenizer.advance() while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field": self.CompileClassVarDec() while self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method": self.CompileSubroutine() #ignore } self.tokenizer.advance() def CompileClassVarDec(self): kind = self.tokenizer.keyWord() self.tokenizer.advance() type = self.compileType() name = self.tokenizer.identifier() self.symbolTable.define(name, type, kind) self.tokenizer.advance() # add the rest of var names, if there are while self.tokenizer.symbol() == ",": self.tokenizer.advance() name = self.tokenizer.identifier() self.symbolTable.define(name, type, kind) self.tokenizer.advance() # ignore ; self.tokenizer.advance() def CompileSubroutine(self): self.symbolTable.startSubroutine() self.ifcounter = 0 self.whilecounter = 0 # constructor | function | method functype = self.tokenizer.keyWord() self.tokenizer.advance() if functype == "method": self.symbolTable.define("this", self.classname, "arg") self.tokenizer.advance() subrotineName = self.classname + "." + self.tokenizer.identifier() self.tokenizer.advance() # ( parameterList ) self.tokenizer.advance() self.compileParameterList() self.tokenizer.advance() # subrotineBody # ignore { self.tokenizer.advance() # varDec* while self.tokenizer.keyWord() == "var": self.compileVarDec() self.vmWriter.writeFunction(subrotineName, self.symbolTable.varCount("var")) # allocate memory for constructor # if functype == "constructor": # self.vmWriter.writePush("constant" , self.symbolTable.varCount("field")) # self.vmWriter.writeCall("Memory.alloc", "1") if functype == "constructor" or functype == "method": if functype == "constructor": self.vmWriter.writePush("constant" , self.symbolTable.varCount("field")) self.vmWriter.writeCall("Memory.alloc", "1") else: self.vmWriter.writePush("argument", "0") self.vmWriter.writePop("pointer", "0") # statements self.compileStatements() # ignore } self.tokenizer.advance() def compileParameterList(self): # if not ) if self.tokenizer.tokenType() != 1: # type varName argtype = self.compileType() argname = self.tokenizer.identifier() self.symbolTable.define(argname, argtype, "arg") self.tokenizer.advance() # (, type varName)* while self.tokenizer.symbol() == ",": self.tokenizer.advance() argtype = self.compileType() argname = self.tokenizer.identifier() self.symbolTable.define(argname, argtype, "arg") self.tokenizer.advance() def compileVarDec(self): # var self.tokenizer.advance() # type type = self.compileType() # varName varname = self.tokenizer.identifier() self.symbolTable.define(varname, type, "var") self.tokenizer.advance() # (, varName)* while self.tokenizer.symbol() == ",": self.tokenizer.advance() varname = self.tokenizer.identifier() self.symbolTable.define(varname, type, "var") self.tokenizer.advance() # ignore ; self.tokenizer.advance() def compileStatements(self): while self.tokenizer.tokenType() == 0: if self.tokenizer.keyWord() == "let": self.compileLet() elif self.tokenizer.keyWord() == "if": self.compileIf() elif self.tokenizer.keyWord() == "while": self.compileWhile() elif self.tokenizer.keyWord() == "do": self.compileDo() elif self.tokenizer.keyWord() == "return": self.compileReturn() def compileDo(self): self.tokenizer.advance() self.compileSubRoutineCall() self.vmWriter.writePop("temp", "0") # ignore ; self.tokenizer.advance() def compileLet(self): # let self.tokenizer.advance() # varName varname = self.tokenizer.identifier() varkind = self.symbolTable.kindOf(varname) self.tokenizer.advance() # ([ expression ])? if self.tokenizer.symbol() == "[": self.tokenizer.advance() self.CompileExpression() if varkind == "field": self.vmWriter.writePush("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(varname)) self.vmWriter.writeArithmetic("add") #ignore ] self.tokenizer.advance() #ignore = self.tokenizer.advance() self.CompileExpression() self.vmWriter.writePop("temp", "0") # that self.vmWriter.writePop("pointer", "1") self.vmWriter.writePush("temp", "0") self.vmWriter.writePop("that", "0") self.tokenizer.advance() else: # ignore = self.tokenizer.advance() # expression self.CompileExpression() if varkind == "field": self.vmWriter.writePop("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePop("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePop("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePop("static", self.symbolTable.indexOf(varname)) #ignore ; self.tokenizer.advance() def compileWhile(self): # while self.tokenizer.advance() # ( expression ) self.tokenizer.advance() whileindex = self.whilecounter self.whilecounter += 1 self.vmWriter.writeLabel("WHILE_EXP" + str(whileindex)) self.CompileExpression() self.vmWriter.writeArithmetic("not") self.vmWriter.writeIf("WHILE_END" + str(whileindex)) self.tokenizer.advance() # ignore { self.tokenizer.advance() # statements self.compileStatements() # ignore } self.tokenizer.advance() self.vmWriter.writeGoto("WHILE_EXP" + str(whileindex)) self.vmWriter.writeLabel("WHILE_END" + str(whileindex)) def compileReturn(self): # return self.tokenizer.advance() # expression? if self.isTerm(): self.CompileExpression() self.vmWriter.writeReturn() else: self.vmWriter.writePush("constant", "0") self.vmWriter.writeReturn() # ignore; self.tokenizer.advance() def compileIf(self): #if self.tokenizer.advance() # ( expression ) self.tokenizer.advance() self.CompileExpression() ifindex = self.ifcounter self.ifcounter += 1 self.vmWriter.writeIf("IF_TRUE" + str(ifindex)) self.vmWriter.writeGoto("IF_FALSE" + str(ifindex)) self.vmWriter.writeLabel("IF_TRUE" + str(ifindex)) self.tokenizer.advance() # { statements } self.tokenizer.advance() self.compileStatements() self.tokenizer.advance() if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() == "else": # else self.vmWriter.writeGoto("IF_END" + str(ifindex)) self.vmWriter.writeLabel("IF_FALSE" + str(ifindex)) self.tokenizer.advance() # { statements } self.tokenizer.advance() self.compileStatements() self.tokenizer.advance() self.vmWriter.writeLabel("IF_END" + str(ifindex)) else: self.vmWriter.writeLabel("IF_FALSE" + str(ifindex)) def CompileExpression(self): #term self.CompileTerm() # (op term)* op = self.tokenizer.symbol() while self.tokenizer.tokenType() == 1 and op in operators: self.tokenizer.advance() self.CompileTerm() if op == "=": self.vmWriter.writeArithmetic("eq") elif op == "+": self.vmWriter.writeArithmetic("add") elif op == "-": self.vmWriter.writeArithmetic("sub") elif op == "*": self.vmWriter.writeCall("Math.multiply", "2") elif op == "/": self.vmWriter.writeCall("Math.divide", "2") elif op == "&": self.vmWriter.writeArithmetic("and") elif op == "|": self.vmWriter.writeArithmetic("or") elif op == "<": self.vmWriter.writeArithmetic("lt") elif op == ">": self.vmWriter.writeArithmetic("gt") op = self.tokenizer.symbol() def CompileTerm(self): if self.tokenizer.tokenType() == 3: self.vmWriter.writePush("constant", self.tokenizer.intVal()) self.tokenizer.advance() elif self.tokenizer.tokenType() == 4: conststring = self.tokenizer.stringVal() self.vmWriter.writePush("constant", str(len(conststring))) self.vmWriter.writeCall("String.new", "1") for i in range(len(conststring)): self.vmWriter.writePush("constant", str(ord(conststring[i]))) self.vmWriter.writeCall("String.appendChar", "2") self.tokenizer.advance() elif self.tokenizer.tokenType() == 0: keywordconst = self.tokenizer.keyWord() if keywordconst == "true": self.vmWriter.writePush("constant", "0") self.vmWriter.writeArithmetic("not") elif keywordconst == "false" or keywordconst == "null": self.vmWriter.writePush("constant", "0") elif keywordconst == "this": self.vmWriter.writePush("pointer", "0") self.tokenizer.advance() elif self.tokenizer.tokenType() == 2: # varName [ expression] if self.tokenizer.tokens[self.tokenizer.currentToken +1] == '[': varname = self.tokenizer.identifier() varkind = self.symbolTable.kindOf(varname) self.tokenizer.advance() # [ expression ] self.tokenizer.advance() self.CompileExpression() if varkind == "field": self.vmWriter.writePush("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(varname)) self.vmWriter.writeArithmetic("add") # that self.vmWriter.writePop("pointer", "1") self.vmWriter.writePush("that", "0") self.tokenizer.advance() # subrutine call elif self.tokenizer.tokens[self.tokenizer.currentToken +1] == '(' or self.tokenizer.tokens[self.tokenizer.currentToken +1] == '.': self.compileSubRoutineCall() # varname else: varname = self.tokenizer.identifier() varkind = self.symbolTable.kindOf(varname) if varkind == "field": self.vmWriter.writePush("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(varname)) self.tokenizer.advance() elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(': # ( expression ) self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: #unary!!! op = self.tokenizer.symbol() self.tokenizer.advance() self.CompileTerm() if op == "-": self.vmWriter.writeArithmetic("neg") elif op == "~": self.vmWriter.writeArithmetic("not") def compileSubRoutineCall(self): # subroutineName | (className | varName) identifier = self.tokenizer.identifier() self.tokenizer.advance() #no "." only name if self.tokenizer.symbol() == '(': # ( expressionList ) -- subroutine of type method self.tokenizer.advance() self.vmWriter.writePush("pointer", "0") argnum = self.CompileExpressionList() self.vmWriter.writeCall(self.classname + "." + identifier, str(argnum +1)) self.tokenizer.advance() else: # . -- class.function or var.method self.tokenizer.advance() # subroutineName subname = self.tokenizer.identifier() self.tokenizer.advance() self.tokenizer.advance() if identifier in self.symbolTable.classtable or identifier in self.symbolTable.subroutinetable: # varname!!! if identifier in self.symbolTable.subroutinetable: if self.symbolTable.kindOf(identifier) == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(identifier)) else: self.vmWriter.writePush("argument", self.symbolTable.indexOf(identifier)) else: if self.symbolTable.kindOf(identifier) == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(identifier)) else: self.vmWriter.writePush("this", self.symbolTable.indexOf(identifier)) argnum = self.CompileExpressionList() identifierclass = self.symbolTable.typeOf(identifier) self.vmWriter.writeCall(identifierclass + "." + subname, str(argnum +1)) else: argnum = self.CompileExpressionList() self.vmWriter.writeCall(identifier + "." + subname, str(argnum)) self.tokenizer.advance() def CompileExpressionList(self): # (expression i = 0 if self.isTerm(): i += 1 # (, expression) self.CompileExpression() while self.tokenizer.symbol() == ',': i+= 1 self.tokenizer.advance() self.CompileExpression() return i def isTerm(self): if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4: return True if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() in keyword_const: return True if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(' : return True if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'): return True if self.tokenizer.tokenType() == 2: return True return False def compileType(self): if self.tokenizer.tokenType() == 0: typen = self.tokenizer.keyWord() else: typen = self.tokenizer.identifier() self.tokenizer.advance() return typen
class CompilationEngine(): """ """ def __init__(self, input_file, output_file): """ Creates a new compilation engine with the given input and output. The next routine called must be compile_class() :param input_file: :param output_file: """ self.tokenizer = JackTokenizer(input_file) self.num_spaces = 0 self.buffer = "" with open(output_file, 'w') as self.output: while self.tokenizer.has_more_tokens(): self.tokenizer.advance() assert self.tokenizer.token_type() == Token_Types.keyword if self.tokenizer.keyWord() == 'class': self.compile_class() else: raise KeyError( "Received a token that does not fit the beginning of a " "module. " + self.tokenizer.keyWord() + " in " + input_file) def compile_class(self): """ Compiles a complete class :return: """ self.write('class', delim=True) self.num_spaces += 1 self.write_terminal(self.tokenizer.token_type().value, self.tokenizer.keyWord()) self.eat('class') t_type, class_name = self.tokenizer.token_type( ), self.tokenizer.identifier() self.write_terminal(t_type.value, class_name) self.tokenizer.advance() t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol() self.write_terminal(t_type.value, symbol) self.eat('{') t_type = self.tokenizer.token_type() while t_type != Token_Types.symbol: operation = self.tokenizer.keyWord() if operation in ['static', 'field']: self.compile_class_var_dec() elif operation in ROUTINES: self.compile_subroutine() else: raise KeyError( "Found statement that does not fit class declaration. ", operation) # self.tokenizer.advance() t_type = self.tokenizer.token_type() self.write_terminal(t_type.value, self.tokenizer.symbol()) self.num_spaces -= 1 self.write('class', delim=True, end=True) def eat(self, string): """ If the given string is the same as current token (only if it keyword or symbol) the tokenizer of the object will be advanced, otherwise an exception will be raised. :param string: the expected string. :raise: the current token is not the expected string. """ type = self.tokenizer.token_type() value = "not keyword and not symbol" if type == Token_Types.keyword: value = self.tokenizer.keyWord() elif type == Token_Types.symbol: value = self.tokenizer.symbol() if value != string: raise Exception("Received '" + value + "' which is not the expected string: '" + string + "'") # assert value == string self.tokenizer.advance() def compile_class_var_dec(self): """ Compiles a static declaration or a field declaration. """ self.write("classVarDec", True) self.num_spaces += 1 # First word is static or field. # if self.tokenizer.token_type() != Token_Types.keyword: # raise Exception("Cant compile class variable declaration without keyword token.") # should i check before if i can get a keyword? var_sort = self.tokenizer.keyWord() if var_sort not in ["static", "field"]: raise Exception( "Cant compile class variable declaration without static of field." ) self.write("<keyword> " + var_sort + " </keyword>") self.tokenizer.advance() # Second word is type. if self.tokenizer.token_type() == Token_Types.keyword: var_type = self.tokenizer.keyWord() if var_type not in ["int", "char", "boolean"]: raise Exception( "Cant compile class variable declaration with invalid keyword type." ) self.write("<keyword> " + var_type + " </keyword>") self.tokenizer.advance() elif self.tokenizer.token_type() == Token_Types.identifier: self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() else: raise Exception( "Cant compile class variable declaration with invalid identifier type." ) # Third and so on, are variables names. # if self.tokenizer.token_type() != Token_Types.identifier: # raise Exception("Cant compile class variable declaration without varName identifier.") # assert self.tokenizer.token_type() == Token_Types.identifier self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() self.possible_varName() # It will always end with ';' self.eat(';') self.write("<symbol> ; </symbol>") self.num_spaces -= 1 self.write("classVarDec", True, True) def possible_varName(self): """ Compile 0 or more variable names, after an existing variable name. """ try: self.eat(',') except: # There is no varName return # There is an varName self.write("<symbol> , </symbol>") # if self.tokenizer.token_type() != Token_Types.identifier: # raise Exception("Cant compile (class or not) variable declaration without varName" + # " identifier after ',' .") self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() self.possible_varName() def compile_subroutine(self): """ Compiles a complete method, function or constructor :return: """ self.write('subroutineDec', delim=True) self.num_spaces += 1 self.write_terminal(self.tokenizer.token_type().value, self.tokenizer.keyWord()) # self.eat('function' | 'method' | 'constructor') self.tokenizer.advance() t_type = self.tokenizer.token_type() if t_type == Token_Types.keyword: func_type = self.tokenizer.keyWord() else: func_type = self.tokenizer.identifier() self.write_terminal(t_type.value, func_type) # self.eat('void' | some other type) self.tokenizer.advance() t_type, func_name = self.tokenizer.token_type( ), self.tokenizer.identifier() self.write_terminal(t_type.value, func_name) self.tokenizer.advance() t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol() self.write_terminal(t_type.value, symbol) self.eat('(') self.compile_param_list() t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol() self.write_terminal(t_type.value, symbol) self.eat(')') self.write("subroutineBody", delim=True) self.num_spaces += 1 t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol() self.write_terminal(t_type.value, symbol) self.eat('{') t_type = self.tokenizer.token_type() while t_type != Token_Types.symbol: token = self.tokenizer.keyWord() if token == 'var': self.compile_var_dec() elif token in STATEMENTS: self.compile_statements() else: raise KeyError("an unknown step inside a subroutine") # self.tokenizer.advance() t_type = self.tokenizer.token_type() self.write_terminal(t_type.value, self.tokenizer.symbol()) self.eat('}') self.num_spaces -= 1 self.write("subroutineBody", delim=True, end=True) self.num_spaces -= 1 self.write('subroutineDec', delim=True, end=True) def compile_param_list(self): """ Compiles a parameter list, which may be empty, not including the "()" :return: """ self.write('parameterList', delim=True) self.num_spaces += 1 t_type = self.tokenizer.token_type() finished = t_type == Token_Types.symbol and self.tokenizer.symbol( ) == ")" while not finished: # Recognized type if t_type == Token_Types.keyword: token = self.tokenizer.keyWord() elif t_type == Token_Types.identifier: token = self.tokenizer.identifier() else: raise KeyError("Got some weird type in paramlist: " + t_type.value) # Write var type self.write_terminal(t_type.value, token) self.tokenizer.advance() # Write var name t_type, token = self.tokenizer.token_type( ), self.tokenizer.identifier() self.write_terminal(t_type.value, token) self.tokenizer.advance() t_type, symbol = self.tokenizer.token_type( ), self.tokenizer.symbol() if symbol == ')': finished = True else: self.eat(',') self.write_terminal(t_type.value, symbol) t_type = self.tokenizer.token_type() self.num_spaces -= 1 self.write('parameterList', delim=True, end=True) def compile_var_dec(self): """ Compiles a var declaration :return: """ self.write("varDec", True) self.num_spaces += 1 # First word is valid. self.eat('var') self.write("<keyword> var </keyword>") # Second word is type. if self.tokenizer.token_type() == Token_Types.keyword: var_type = self.tokenizer.keyWord() if var_type not in ["int", "char", "boolean"]: raise Exception( "Cant compile variable declaration with invalid keyword type." ) self.write("<keyword> " + var_type + " </keyword>") self.tokenizer.advance() elif self.tokenizer.token_type() == Token_Types.identifier: self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() else: raise Exception( "Cant compile variable declaration with invalid identifier type." ) # Third and so on, are variables names. # if self.tokenizer.token_type() != Token_Types.identifier: # raise Exception("Cant compile variable declaration without varName identifier.") self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() self.possible_varName() # It will always end with ';' self.eat(';') self.write("<symbol> ; </symbol>") self.num_spaces -= 1 self.write("varDec", True, True) def compile_statements(self): """ Compile a sequence of 0 or more statements, not including the "{}". """ # if self.tokenizer.token_type() != Token_Types.keyword: # return # # raise Exception("Can't use compile_statement if the current token isn't a keyword.") # statement = self.tokenizer.keyWord() # if statement not in ['let', 'if', 'while', 'do', 'return']: # return self.write("statements", True) self.num_spaces += 1 self.possible_single_statement() self.num_spaces -= 1 self.write("statements", True, True) def possible_single_statement(self): """ Compile 0 or more single statements.. """ if (self.tokenizer.token_type() == Token_Types.keyword and self.tokenizer.keyWord() in STATEMENTS): # if self.tokenizer.keyWord() in STATEMENTS: statement = self.tokenizer.keyWord() self.write(statement + "Statement", True) if statement == 'let': self.compile_let() elif statement == 'if': self.compile_if() elif statement == 'while': self.compile_while() elif statement == 'do': self.compile_do() elif statement == 'return': self.compile_return() # else: # raise Exception("Invalid statement.") self.write(statement + "Statement", True, True) self.possible_single_statement() def compile_do(self): """ Compile do statement. :return: """ self.eat('do') self.num_spaces += 1 self.write("<keyword> do </keyword>") # is the check is necessary? probably not.. # if type != Token_Types.identifier: # raise Exception() self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() self.subroutineCall_continue() self.eat(';') self.write("<symbol> ; </symbol>") self.num_spaces -= 1 def compile_let(self): """ Compile let statement. """ self.eat('let') self.num_spaces += 1 self.write("<keyword> let </keyword>") # self.compile_var_dec() # self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.write_terminal("identifier", self.tokenizer.identifier()) self.tokenizer.advance() self.possible_array() self.eat('=') self.write("<symbol> = </symbol>") self.compile_expression() self.eat(';') self.write("<symbol> ; </symbol>") self.num_spaces -= 1 # self.write("</letStatement>") def possible_array(self): """ Compile 0 or 1 array. """ try: self.eat('[') except: # There is no array return # There is an array self.write("<symbol> [ </symbol>") self.compile_expression() self.eat(']') self.write("<symbol> ] </symbol>") def compile_while(self): """ Compile while statement. """ self.eat('while') # self.write("<whileStatement>") self.num_spaces += 1 self.write("<keyword> while </keyword>") self.eat('(') self.write("<symbol> ( </symbol>") self.compile_expression() self.eat(')') self.write("<symbol> ) </symbol>") self.eat('{') self.write("<symbol> { </symbol>") self.compile_statements() self.eat('}') self.write("<symbol> } </symbol>") self.num_spaces -= 1 # self.write("</whileStatement>") def compile_return(self): """ Compile return statement. """ self.eat('return') self.num_spaces += 1 self.write("<keyword> return </keyword>") try: self.eat(';') except: # would it work? self.compile_expression() self.eat(';') self.write("<symbol> ; </symbol>") self.num_spaces -= 1 def compile_if(self): """ Compile if statement. """ self.eat('if') # self.write("<ifStatement>") self.num_spaces += 1 self.write("<keyword> if </keyword>") self.eat('(') self.write("<symbol> ( </symbol>") self.compile_expression() self.eat(')') self.write("<symbol> ) </symbol>") self.eat('{') self.write("<symbol> { </symbol>") self.compile_statements() self.eat('}') self.write("<symbol> } </symbol>") self.possible_else() self.num_spaces -= 1 # self.write("</ifStatement>" + END_LINE) def possible_else(self): """ Compile 0 or 1 else sections. """ try: self.eat('else') except: # There is no else so we can return return # There is an else, so we handle it properly self.write("<keyword> else </keyword>") self.eat('{') self.write("<symbol> { </symbol>") self.compile_statements() self.eat('}') self.write("<symbol> } </symbol>") def compile_expression(self): """ Compile an expression. :return: """ self.buffer += self.num_spaces * SPACE + "<expression>\n" self.num_spaces += 1 try: self.compile_term() self.possible_op_term() self.num_spaces -= 1 self.write("expression", True, True) except: self.cleanbuffer() def subroutineCall_continue(self): """ After an identifier there can be a '.' or '(', otherwise it not function call (subroutineCall). :return: """ # should i check every time if it's type symbol? symbol = self.tokenizer.symbol() if symbol == '(': self.eat('(') self.write("<symbol> ( </symbol>") self.compile_expression_list() self.eat(')') self.write("<symbol> ) </symbol>") elif symbol == '.': self.eat('.') self.write("<symbol> . </symbol>") self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>") self.tokenizer.advance() self.eat('(') self.write("<symbol> ( </symbol>") self.compile_expression_list() self.eat(')') self.write("<symbol> ) </symbol>") else: raise Exception( "If there is a symbol in the subroutineCall it have to be . or (." ) def compile_term(self): """ Compiles a temp. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of "[", "(", or "." suffices to distiguish between the three possibilities. Any other token is not part of this term and should not be advanced over. :return: """ self.buffer += SPACE * self.num_spaces + "<term>\n" self.num_spaces += 1 type = self.tokenizer.token_type() # maybe i should divide it for int and string # If the token is a string_const or int_const if type in [Token_Types.string_const, Token_Types.int_const]: value = self.tokenizer.intVal( ) if type == Token_Types.int_const else self.tokenizer.stringVal() self.write("<" + type.value + "> " + value + " </" + type.value + ">", use_buffer=True) self.tokenizer.advance() # If the token is a keyword elif type == Token_Types.keyword: if self.tokenizer.keyWord() in KEY_TERMS: self.write("<" + type.value + "> " + self.tokenizer.keyWord() + " </" + type.value + ">", use_buffer=True) self.tokenizer.advance() else: self.cleanbuffer() raise Exception() # If the token is an identifier elif type == Token_Types.identifier: # value = self.tokenizer.identifier() self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>", use_buffer=True) self.tokenizer.advance() self.possible_identifier_continue() # If the token is an symbol elif type == Token_Types.symbol: if self.tokenizer.symbol() == '(': self.eat('(') self.write("<symbol> ( </symbol>", use_buffer=True) self.compile_expression() self.eat(')') self.write("<symbol> ) </symbol>") elif self.tokenizer.symbol() in ["-", "~"]: self.write("<symbol> " + self.tokenizer.symbol() + " </symbol>", use_buffer=True) self.eat(self.tokenizer.symbol()) # self.write("<symbol> " + self.tokenizer.symbol() + " </symbol>") self.compile_term() else: self.cleanbuffer() raise Exception() else: raise Exception("Invalid token for creating term.") self.num_spaces -= 1 self.write("term", True, True) def possible_identifier_continue(self): """ In a term if identifier continues with - '[' - it's a call of an array - '.' or '(' - it's part of subroutineCall (function call) - nothing - it's a variable This functions handle every one of this situations after the original identifier was handled. """ # try: # self.eat("[") # except: # if not self.tokenizer.has_more_tokens(): # already doing it by itself # raise Exception() if self.tokenizer.token_type() == Token_Types.symbol: if self.tokenizer.symbol() == '[': self.eat('[') self.write("<symbol> [ </symbol>") self.compile_expression() self.eat(']') self.write("<symbol> ] </symbol>") return try: self.subroutineCall_continue() except Exception: # raise Exception("If there is a symbol in the token it have to be . or [ or (.") return def possible_op_term(self): """ If the next token is a suitable operation symbol than compile more terms, otherwise return nothing. """ # There is no op term if self.tokenizer.token_type() != Token_Types.symbol: # raise Exception("After term can be only nothing or (op term)*.") return op = self.tokenizer.symbol() if op not in OPERANDS: # raise Exception("Invalid operator use in term.") return # should it be like this? try: # if op in SPECIAL_SYMBOL.keys(): # op = SPECIAL_SYMBOL[op] self.eat(op) except Exception: return # There is op term self.write("<symbol> " + op + " </symbol>") self.compile_term() self.possible_op_term() def compile_expression_list(self): """ Compile a comma-separated list of expressions, which may be empty. """ self.write("expressionList", True) self.num_spaces += 1 try: self.compile_expression() except Exception: self.num_spaces -= 1 self.write("expressionList", True, True) return self.possible_more_expression() self.num_spaces -= 1 self.write("expressionList", True, True) def possible_more_expression(self): """ If the next token is a ',' than compile more expressions, otherwise return nothing. """ try: self.eat(',') except Exception: return self.write("<symbol> , </symbol>") self.compile_expression() self.possible_more_expression() def write(self, statement, delim=False, end=False, new_line=True, no_space=False, use_buffer=False): """ :param statement: :return: """ if use_buffer: self.output.write(self.buffer) self.buffer = "" if end: statement = "/" + statement if delim: statement = "<" + statement + ">" if not no_space: statement = SPACE * self.num_spaces + statement if new_line: statement += END_LINE self.output.write(statement) # if delim: # self.output.write(TAB * self.num_spaces + "<" + statement + ">") # else: # if new_line: # self.output.write(END_LINE) def write_terminal(self, t_type, arg): """ :param t_type: :param arg: :return: """ self.write(t_type, delim=True, new_line=False, no_space=False) self.write(" " + arg + " ", delim=False, new_line=False, no_space=True) self.write(t_type, delim=True, new_line=True, end=True, no_space=True) def cleanbuffer(self): self.num_spaces -= 1 self.buffer = ""
class CompilationEngine(object): def __init__(self, inputfile, outputfile): self._inputfile = inputfile self._outputfile = outputfile self._tokenizer: JackTokenizer = None self._cur_root = [] self._root = None self._init() def _init(self): self._inputbuf = self.create_buffer(self._inputfile) self._outputbuf = self.create_buffer(self._outputfile, mode="w+") self._tokenizer = JackTokenizer(self._inputbuf) def create_buffer(self, fn, mode='r'): if isinstance(fn, str) or isinstance(fn, unicode): return open(fn, mode) elif isinstance(fn, file) or isinstance(fn, IOBase): return fn else: raise ValueError("file object show file or readable") def compile_class(self): parent = self._set_parent("class") self._root = parent self._advance() self._pop_required(parent, TokenType.keyword, KeywordType.CLASS) self._pop_required(parent, TokenType.identifier) self._pop_required(parent, TokenType.symbol, "{") try: while self._is_class_var(): self.compile_class_var_desc() self._advance() while self._is_subroutine(): self.compile_subroutine() self._advance() self._pop_required(parent, TokenType.symbol, "}") finally: self._outputbuf.write( unicode( et.tostring(self._root, pretty_print=True, method="c14n2").decode("utf-8"))) self._outputbuf.close() def _required_type(self, token_type, val): tp, tv = self._token() if token_type != tp or ( (tp == TokenType.keyword or tp == TokenType.symbol) and (val != tv)): raise ValueError("token must be %s,%s" % (token_type, val)) def compile_class_var_desc(self): parent = self._set_parent("classVarDec") # 具体可以细分变量类型检查,标识符正确检查 while not self.is_token(TokenType.symbol, ";"): parent.append(self._build_element()) self._advance() parent.append(self._build_element()) self._remove_parent() def compile_subroutine(self): parent = self._set_parent("subroutineDec") while not self.is_token(TokenType.symbol, "("): parent.append(self._build_element()) self._advance() parent.append(self.required(TokenType.symbol, "(")) self._advance() self.compile_parameter_list() parent.append(self.required(TokenType.symbol, ")")) self._advance() self._compile_body() self._remove_parent() # if self._tokenizer.token_type()==TokenType.KEY_WORD: def _compile_body(self): parent = self._set_parent("subroutineBody") parent.append(self.required(TokenType.symbol, "{")) self._advance() while self._is_var_desc(): self.compile_var_desc() self._advance() self.compile_statements() parent.append(self.required(TokenType.symbol, "}")) self._remove_parent() def _remove_parent(self): self._cur_root.pop() def compile_parameter_list(self): parent = self._set_parent("parameterList") while not self.is_token(TokenType.symbol, ")"): parent.append(self._build_element()) self._advance() self._remove_parent() def compile_var_desc(self): parent = self._set_parent("varDec") while not self.is_token(TokenType.symbol, ";"): parent.append(self._build_element()) self._advance() parent.append(self.required(TokenType.symbol, ";")) self._remove_parent() def compile_statements(self): self._set_parent("statements") while self._is_statement(): if self.is_let_statement(): self.compile_let() if self.is_do_statement(): self.compile_do() if self.is_return_statement(): self.compile_return() if self.is_if_statement(): self.compile_if() continue if self.is_while_statement(): self.compile_while() continue self._advance() self._remove_parent() def compile_do(self): parent = self._set_parent("doStatement") parent.append(self.required(TokenType.keyword, KeywordType.DO)) self._advance() while not self.is_token(TokenType.symbol, "("): parent.append(self._build_element()) self._advance() parent.append(self.required(TokenType.symbol, "(")) self._advance() self.compile_expression_list() parent.append(self.required(TokenType.symbol, ")")) self._advance() parent.append(self.required(TokenType.symbol, ";")) self._remove_parent() def compile_let(self): parent = self._set_parent("letStatement") parent.append(self.required(TokenType.keyword, KeywordType.LET)) self._advance() parent.append(self.required(TokenType.identifier)) self._advance() if self.is_token(TokenType.symbol, "["): parent.append(self._build_element()) self._advance() self.compile_expression() parent.append(self.required(TokenType.symbol, "]")) self._advance() # 有可能是数组 parent.append(self.required(TokenType.symbol, "=")) self._advance() self.compile_expression() parent.append(self.required(TokenType.symbol, ";")) self._remove_parent() def compile_while(self): parent = self._set_parent("whileStatement") self._pop_required(parent, TokenType.keyword, KeywordType.WHILE) self._pop_required(parent, TokenType.symbol, "(") self.compile_expression() self._pop_required(parent, TokenType.symbol, ")") self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self._remove_parent() def compile_return(self): parent = self._set_parent("returnStatement") parent.append(self.required(TokenType.keyword, KeywordType.RETURN)) self._advance() if not self.is_token(TokenType.symbol, ";"): self.compile_expression() parent.append(self.required(TokenType.symbol, ";")) self._remove_parent() def compile_if(self): parent = self._set_parent("ifStatement") parent.append(self.required(TokenType.keyword, KeywordType.IF)) self._advance() self._pop_required(parent, TokenType.symbol, "(") self.compile_expression() self._pop_required(parent, TokenType.symbol, ")") self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") if self.is_token(TokenType.keyword, KeywordType.ELSE): self._pop_required(parent, TokenType.keyword, KeywordType.ELSE) self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() parent.append(self.required(TokenType.symbol, "}")) self._advance() self._remove_parent() def compile_expression(self): parent = self._set_parent("expression") while not self._is_end(): self.compile_term() if self._is_op(False): parent.append(self._build_element()) self._advance() # parent.append(self._build_element()) # self._advance() self._remove_parent() def compile_term(self): parent = self._set_parent("term") first = True while not self._is_op(first) and not self._is_end(): first = False if self.is_token(TokenType.symbol, "("): parent.append(self._build_element()) self._advance() self.compile_expression() parent.append(self.required(TokenType.symbol, ")")) elif self.is_token(TokenType.symbol, "["): parent.append(self._build_element()) self._advance() self.compile_expression() parent.append(self.required(TokenType.symbol, "]")) elif self._is_unary_op(): parent.append(self._build_element()) self._advance() self.compile_term() continue elif self.is_token(TokenType.identifier): parent.append(self._build_element()) self._advance() if self.is_token(TokenType.symbol, "("): self.compile_expression_list() parent.append(self.required(TokenType.symbol, ")")) if self.is_token(TokenType.symbol, "."): parent.append(self._build_element()) self._advance() self._pop_required(parent, TokenType.identifier) self._pop_required(parent, TokenType.symbol, "(") self.compile_expression_list() parent.append(self.required(TokenType.symbol, ")")) self._advance() continue else: parent.append(self._build_element()) self._advance() self._remove_parent() def _pop_required(self, parent, tk, val=None): parent.append(self.required(tk, val)) self._advance() def _is_op(self, first): tk, val = self._token() return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-' and not first) def _is_unary_op(self): tk, val = self._token() return tk == TokenType.symbol and val in '-~' def compile_expression_list(self): parent = self._set_parent("expressionList") while not self.is_token(TokenType.symbol, ")"): self.compile_expression() if self.is_token(TokenType.symbol, ","): parent.append(self._build_element()) self._advance() self._remove_parent() def build_identifier(self): e = et.Element("identifier") e.text = self._tokenizer.identifier() return e def build_keyword(self): e = et.Element("keyword") e.text = self._tokenizer.keyword().name.lower() return e def build_symbol(self): e = et.Element("symbol") e.text = self._tokenizer.symbol() return e def _token(self): token_type = self._tokenizer.token_type() if self._tokenizer.token_type() == TokenType.keyword: a, b = token_type, self._tokenizer.keyword() elif self._tokenizer.token_type() == TokenType.symbol: a, b = token_type, self._tokenizer.symbol() elif self._tokenizer.token_type() == TokenType.identifier: a, b = token_type, self._tokenizer.identifier() elif self._tokenizer.token_type() == TokenType.integerConstant: a, b = token_type, self._tokenizer.intVal() elif self._tokenizer.token_type() == TokenType.stringConstant: a, b = token_type, self._tokenizer.stringVal() else: a, b = None, None print(a, b, self._tokenizer.line) return a, b def _advance(self): if self._tokenizer.has_more_tokens(): self._tokenizer.advance() def required(self, token, val=None): self._required_type(token, val) return self._build_element() def _build_element(self): a, b = self._token() e = et.Element(a.name) if isinstance(b, KeywordType): e.text = b.name.lower() else: e.text = b return e def _is_class_var(self): return self.is_token(TokenType.keyword, KeywordType.FIELD) or self.is_token( TokenType.keyword, KeywordType.STATIC) def is_token(self, token, val=None): t, v = self._token() if val is not None: return t == token and v == val else: return t == token def _get_parent(self): if len(self._cur_root) > 0: return self._cur_root[-1] else: return None def _set_parent(self, name): parent = self._get_parent() ele2 = et.Element(name) if parent is not None: parent.append(ele2) self._cur_root.append(ele2) return ele2 def _is_subroutine(self): return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \ or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \ or self.is_token(TokenType.keyword, KeywordType.METHOD) def _is_statement(self): if self.is_let_statement(): return True if self.is_do_statement(): return True if self.is_return_statement(): return True if self.is_if_statement(): return True if self.is_while_statement(): return True def is_while_statement(self): return self.is_token(TokenType.keyword, KeywordType.WHILE) def is_let_statement(self): return self.is_token(TokenType.keyword, KeywordType.LET) def is_do_statement(self): return self.is_token(TokenType.keyword, KeywordType.DO) def is_return_statement(self): return self.is_token(TokenType.keyword, KeywordType.RETURN) def is_if_statement(self): return self.is_token(TokenType.keyword, KeywordType.IF) def _is_var_desc(self): return self.is_token(TokenType.keyword, KeywordType.VAR) def _is_end(self): return self.is_token(TokenType.symbol, ";") or \ self.is_token(TokenType.symbol, ";") \ or self.is_token(TokenType.symbol, ")") \ or self.is_token(TokenType.symbol, ",") \ or self.is_token(TokenType.symbol, "]")
class CompilationEngine: def __init__(self, input_file, output_file): self.jack_tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.writer = VMWriter(output_file) self.class_name = "" self.subroutine_name = "" self.return_type = "" self.label_counter_if = 0 self.label_counter_while = 0 self.num_args_called_function = 0 self.is_unary = False self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2", "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"} def compile_class(self): # "class className { for i in range(NUM_TOKENS_CLASS_DEC): self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # saves the className if self.jack_tokenizer.token_type() == IDENTIFIER: self.class_name = self.jack_tokenizer.identifier() # classVarDec* or SubroutineDec* while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or self.jack_tokenizer.key_word() == "field"): self.compile_class_var_dec() if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or self.jack_tokenizer.key_word() == "method" or self.jack_tokenizer.key_word() == "constructor"): self.compile_subroutine() if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": break def compile_class_var_dec(self): # "static" of "field" kind = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # type if self.jack_tokenizer.token_type() == KEYWORD: type = self.jack_tokenizer.key_word() else: type = self.jack_tokenizer.identifier() while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: name = self.jack_tokenizer.identifier() self.symbol_table.define(name,type,kind) elif token_type == SYMBOL: if self.jack_tokenizer.symbol() == ";": break def compile_subroutine(self): self.symbol_table.start_subroutine() self.subroutine_name = "" self.return_type = "" self.label_counter_if = 0 self.label_counter_while = 0 # the curr token : "constructor" or "function" or "method type_of_subroutine = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # the curr token : return type of the subroutine if self.jack_tokenizer.token_type() == KEYWORD: self.return_type = self.jack_tokenizer.key_word() else: self.return_type = self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.subroutine_name = self.jack_tokenizer.identifier() while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() == "(": if type_of_subroutine == "method": self.symbol_table.define(THIS, self.class_name, ARG) self.compile_parameter_list() # the curr token should be - ")" if self.jack_tokenizer.symbol() == '{': while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: if self.jack_tokenizer.key_word() == "var": self.compile_var_dec() continue else: self.writer.write_function(self.class_name + "." + self.subroutine_name, self.symbol_table.var_count(VAR)) if type_of_subroutine == "constructor": self.writer.write_push(CONST, self.symbol_table.var_count(FIELD)) self.writer.write_call("Memory.alloc", 1) self.writer.write_pop("pointer", 0) elif type_of_subroutine == "method": self.writer.write_push(ARGUMENT, 0) self.writer.write_pop("pointer", 0) self.compile_statements() # the curr token should be - "}" break break def compile_parameter_list(self): kind = ARG while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() # int, bool.... if token_type == KEYWORD: type = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.symbol_table.define(name, type, kind) # className elif token_type == IDENTIFIER: type = self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.symbol_table.define(name, type, kind) # end of parameter list if token_type == SYMBOL and self.jack_tokenizer.symbol() == ")": break def compile_var_dec(self): # should be "var" kind = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # type if self.jack_tokenizer.token_type() == KEYWORD: type = self.jack_tokenizer.key_word() else: type = self.jack_tokenizer.identifier() while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: name = self.jack_tokenizer.identifier() self.symbol_table.define(name, type, kind) if token_type == SYMBOL: if self.jack_tokenizer.symbol() == ";": break def compile_statements(self): while True: if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do": self.compile_do() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let": self.compile_let() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while": self.compile_while() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return": self.compile_return() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # compile_if returns advanced if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if": self.compile_if() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}": break def compile_do(self): self.num_args_called_function = 0 self.compile_subroutine_call() self.writer.write_pop(TEMP , 0) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # return from compile_subroutine_call with ";" def compile_let(self): init = True # the curr token - "let" while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: name = self.jack_tokenizer.identifier() type = self.symbol_table.type_of(name) kind = self.symbol_table.kind_of(name) index = self.symbol_table.index_of(name) if token_type == SYMBOL: # there is an assignment to an array if self.jack_tokenizer.symbol() == "[": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # handle - [expression] self.compile_expression() # the curr token - "]" self.writer.write_push(self.find_segment(kind), index) self.writer.write_arithmetic("add") self.writer.write_pop("pointer", 1) init = False # should return from the compile_expression only with ";" or "]" if self.jack_tokenizer.symbol() == "=": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # handle the = expression self.compile_expression() # that is only for array if init == False: # was also if type == "Array" self.writer.write_pop(THAT, 0) else: self.writer.write_pop(self.find_segment(kind), index) # end of let statement if self.jack_tokenizer.symbol() == ";": break def compile_while(self): while_counter = self.label_counter_while self.label_counter_while += 1 # the curr token - "while" self.writer.write_label("WHILE_EXP" + str(while_counter)) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # the curr token - ")" self.writer.write_arithmetic("not") self.writer.write_if("WHILE_END" + str(while_counter)) if self.jack_tokenizer.symbol() == "{": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # the curr token - "}" self.writer.write_go_to("WHILE_EXP" + str(while_counter)) self.writer.write_label("WHILE_END" + str(while_counter)) if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": break def compile_return(self): # the curr token - "return" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";": self.writer.write_push(CONST, "0") else: self.compile_expression() # should return from "compile_expression" only with ";" self.writer.write_return() def compile_if(self): if_counter = self.label_counter_if self.label_counter_if += 1 # the curr token - "if" while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # the curr token - ")" self.writer.write_if("IF_TRUE" + str(if_counter)) self.writer.write_go_to("IF_FALSE" + str(if_counter)) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "{": self.writer.write_label("IF_TRUE" + str(if_counter)) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # ~~~~~~~~~~ change : was token_type ~~~~~~~~~~~~~~ if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}": break self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else": # print "else" self.writer.write_go_to("IF_END" + str(if_counter)) self.writer.write_label("IF_FALSE" + str(if_counter)) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # print "{" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # print "}" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.writer.write_label("IF_END" + str(if_counter)) else: self.writer.write_label("IF_FALSE" + str(if_counter)) def compile_subroutine_call(self): to_add = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "subRoutineName" or ("className" | "varName", as part of className.subRoutineName) called_statement = self.jack_tokenizer.identifier() type = self.symbol_table.type_of(called_statement) kind = self.symbol_table.kind_of(called_statement) index = self.symbol_table.index_of(called_statement) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # case of "subRoutineCall(expressionList) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(": to_add = True called_statement = self.class_name + "." + called_statement self.writer.write_push(POINTER, 0) self.compile_expression_list() # the curr token - ")" # (className | varName).subroutineName(expressionList) elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # subroutineName if kind <> NONE: to_add = True self.writer.write_push(self.find_segment(kind), index) called_statement = type + "." + self.jack_tokenizer.identifier() else: called_statement = called_statement + "." + self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "(" # expressionList self.compile_expression_list() # ")" if to_add: self.writer.write_call(called_statement, self.num_args_called_function + 1) else: self.writer.write_call(called_statement, self.num_args_called_function) def compile_expression(self): is_print_unary = False if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-": self.is_unary = True self.compile_term() while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\ ["+", "-", "*", "/", "&", "|", "<", ">", "="]: arit_symbol = self.jack_tokenizer.symbol() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-": self.is_unary = True is_print_unary = True self.compile_term() # if not is_print_unary and self.writer.write_arithmetic(self.dic_arithmetic[arit_symbol]) def compile_term(self): while True: token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\ [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "="]: break if token_type == INT_CONST: self.writer.write_push(CONST, self.jack_tokenizer.int_val()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == STRING_CONST: self.compile_string() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == KEYWORD and self.jack_tokenizer.key_word() in ["true", "false", "null"]: self.writer.write_push(CONST, 0) if self.jack_tokenizer.key_word() == "true": self.writer.write_arithmetic("not") self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break # like in return this if token_type == KEYWORD and self.jack_tokenizer.key_word() == "this": self.writer.write_push(POINTER, 0) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]: symbol = self.jack_tokenizer.symbol() self.is_unary = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_term() if symbol == "~": self.writer.write_arithmetic("not") else: self.writer.write_arithmetic("neg") break if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # should return from compile_expression only with ")" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == IDENTIFIER: is_add = True name = self.jack_tokenizer.identifier() kind = self.symbol_table.kind_of(name) index = self.symbol_table.index_of(name) if name[0].isupper(): is_add = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\ [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&", "<",">"]: # in case of a > ...or b; self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name)) break if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # should return only "]" self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name)) self.writer.write_arithmetic("add") self.writer.write_pop(POINTER, 1) self.writer.write_push(THAT, 0) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(": self.writer.write_push(POINTER, 0) self.compile_expression_list() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # case of a = ... bar() self.writer.write_call(self.class_name + "." + name,self.num_args_called_function + 1) break # (className | varName).subroutineName(expressionList) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # subroutineName if is_add: type = self.symbol_table.type_of(name) name = type + "." + self.jack_tokenizer.identifier() else: name = name + "." + self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "(" # expressionList if is_add: self.writer.write_push(self.find_segment(kind), index) self.compile_expression_list() # ")" if is_add: self.writer.write_call(name, self.num_args_called_function + 1) else: self.writer.write_call(name, self.num_args_called_function) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() def compile_expression_list(self): num_args = 0 while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")": break else: num_args += 1 self.compile_expression() if self.jack_tokenizer.symbol() == ")": break # print "," self.num_args_called_function = num_args def find_segment(self, kind): if kind == ARG: return ARGUMENT if kind == VAR: return LCL if kind == FIELD: return THIS if kind == STATIC: return STATIC def compile_string(self): length = len(self.jack_tokenizer.string_val()) self.writer.write_push(CONST, length) self.writer.write_call("String.new", 1) for i in range(len(self.jack_tokenizer.string_val())): uni = ord(self.jack_tokenizer.string_val()[i]) self.writer.write_push(CONST, uni) self.writer.write_call("String.appendChar", 2)
class CompilationEngine(object): """This class recursively compiles a .jack file into vm code.""" def __init__(self, inFile): super(CompilationEngine, self).__init__() # create an internal tokenizer to iterate through self.tokenizer = JackTokenizer(inFile) # setup the output file self.outputPath = inFile.name.replace(".jack", ".vm") self.outputFile = open(self.outputPath, 'w') self.outputFile.close() self.outputFile = open(self.outputPath, 'a') # create a VMWriter with the output file self.vmWriter = VMWriter(self.outputFile) # create a symbol table self.symbolTable = SymbolTable() # stuff we need to keep track of for the symbol table self.className = "" self.currentName = "" self.currentKind = "" self.currentType = "" self.ifCounter = 0 self.whileCounter = 0 def start(self): """Starts the compilation by creating the token XML file and then calling __compileClass()""" # start the tokenizer self.tokenizer.advance() # start the compilation self.__compileClass() def __checkIdentifier(self): """Makes sure that the current token is an identifier and saves that identifier as the current name for the symbol table""" if self.tokenizer.tokenType() == "IDENTIFIER": self.currentName = self.tokenizer.identifier() return True return False def __checkType(self): """Checks for a valid type and saves that type for the symbol table""" if self.tokenizer.tokenType() == "KEYWORD" and \ self.tokenizer.keyWord() in ["int", "char", "boolean", "void"]: self.currentType = self.tokenizer.keyWord() return True elif self.tokenizer.tokenType() == "IDENTIFIER": self.currentType = self.tokenizer.identifier() return True else: return False def __compileType(self): """Compiles a complete jack type grammar. Returns false if there is an error""" # check for valid keyword if self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k not in ["int", "char", "boolean"]: print("Error: type keyword must be int, char, or boolean") return False # self.__writeFullTag("keyword", k) self.tokenizer.advance() return True # check for className else: res = self.__compileClassName() # if __compileClassName() errors, this is not a valid type if not res: print("Error: type not a valid className") return res def __compileClassName(self): """Compiles a complete jack className grammar. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False # self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileSubroutineName(self): """Compiles a complete jack subroutineName. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False # self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileVarName(self): """Compiles a complete jack varName. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False # self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileClass(self): """Compiles a complete jack class grammar""" # find the class keyword if self.tokenizer.tokenType() != "KEYWORD" or \ self.tokenizer.keyWord() != "class": print("Error: no class declaration found") sys.exit(1) self.tokenizer.advance() # find the className if not self.__checkIdentifier(): print("Error: no class name found in class declaration") sys.exit(1) # save the class name self.className = self.tokenizer.identifier() self.tokenizer.advance() # find the open curly brace if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{": print("Error: no opening brace found after class") sys.exit(0) self.tokenizer.advance() # compile the classVarDecs while(self.tokenizer.tokenType() == "KEYWORD" and (self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field")): self.__compileClassVarDec() # compile the subroutines while(self.tokenizer.tokenType() == "KEYWORD" and (self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method")): self.__compileSubroutineDec() # find last curly brace if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}": print("Error: no closing brace found after class definition") sys.exit(1) self.tokenizer.advance() def __compileClassVarDec(self): """Compiles a complete jack class variable declaration. This advances the tokenizer completely through the variable declaration""" # we already checked to make sure that the keyword is valid self.currentKind = self.tokenizer.keyWord() self.tokenizer.advance() # look for a valid type if not self.__checkType(): print("Error: invalid type in classVarDec") sys.exit(1) self.tokenizer.advance() # check for varName if self.__checkIdentifier(): self.symbolTable.define( self.currentName, self.currentType, self.currentKind) self.tokenizer.advance() else: print("Error: missing varName identifier in classVarDec") sys.exit(1) # check for comma then more varNames (possible not existing) while self.tokenizer.tokenType() == "SYMBOL" and \ self.tokenizer.symbol() == ",": self.tokenizer.advance() # check for varName again if self.__checkIdentifier(): self.symbolTable.define( self.currentName, self.currentType, self.currentKind) self.tokenizer.advance() else: print("Error: missing varName identifier in classVarDec") sys.exit(1) # check for closing semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";": print("Error: missing semicolon after classVarDec") sys.exit(1) self.tokenizer.advance() def __compileSubroutineDec(self): """Compiles a complete jack subroutine description. This advances the tokenizer completely through the subroutine declaration""" # clear the subroutine symbol table self.symbolTable.startSubroutine() # since we already checked for the subroutine kind, grab it subroutineKind = self.tokenizer.keyWord() self.tokenizer.advance() # look for return type if not self.__checkType(): print("Error: illegal return type for subroutine") sys.exit(1) self.tokenizer.advance() # check for subroutineName and save it with the specified format if self.__checkIdentifier(): currentSubroutineName = self.className + "." + self.currentName self.tokenizer.advance() else: print("Error: missing subroutineName in subroutineDec") sys.exit(1) # if the subroutine is a method, the first arg needs to be this if subroutineKind == "method": self.symbolTable.define("this", self.className, "arg") # check for open parentheses if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(": print("Error: missing ( for parameter list") sys.exit(1) self.tokenizer.advance() # do parameter list (this could add nothing) self.__compileParameterList() # check for closing parentheses if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) for parameter list") sys.exit(1) self.tokenizer.advance() # compile subroutine body self.__compileSubroutineBody(subroutineKind, currentSubroutineName) def __compileParameterList(self): """Compiles a complete jack parameter list grammar""" # we know all parameter lists are arguments, so set the current kind self.currentKind = "arg" # if the next symbol is a ), then there is no parameter list, so just return # the rest of compileSubroutine will handle writing that if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ")": return # look for a valid type else: if not self.__checkType(): print("Error: invalid type in parameter list") sys.exit(1) self.tokenizer.advance() # check for varName if self.__checkIdentifier(): self.symbolTable.define( self.currentName, self.currentType, self.currentKind) self.tokenizer.advance() else: print("Error: missing varName identifier in parameterList") sys.exit(1) # check for comma separated list of type and varName while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",": # write the comma self.tokenizer.advance() # look for a valid type if not self.__checkType(): print("Error: invalid type in parameter list") sys.exit(1) self.tokenizer.advance() # check for varName if self.__checkIdentifier(): self.symbolTable.define( self.currentName, self.currentType, self.currentKind) self.tokenizer.advance() else: print("Error: missing varName identifier in parameterList") sys.exit(1) def __compileSubroutineBody(self, currentSubKind, currentSubName): """Compile a complete jack subroutine body grammar""" # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{": print("Error: missing { for subroutine body") sys.exit(1) self.tokenizer.advance() # check to see if we need to compile varDec while self.tokenizer.tokenType() == "KEYWORD" and \ self.tokenizer.keyWord() == "var": self.__compileVarDec() # write the function self.vmWriter.writeFunction( currentSubName, self.symbolTable.varCount("var")) # write stuff for constructor if currentSubKind == "constructor": # get number of class fields to allocate space for them numFields = self.symbolTable.varCount("field") if numFields > 0: self.vmWriter.writePush("constant", numFields) self.vmWriter.writeCall("Memory.alloc", 1) self.vmWriter.writePop("pointer", 0) # write stuff for method elif currentSubKind == "method": # get the this pointer self.vmWriter.writePush("argument", 0) self.vmWriter.writePop("pointer", 0) # compile statements self.__compileStatements() # check for closing } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}": print("Error: missing closing } for subroutine body") sys.exit(1) self.tokenizer.advance() return def __compileVarDec(self): """Compiles a complete jack varDec grammar""" # all var decs are of type var, so set it self.currentKind = "var" self.tokenizer.advance() # check for type if not self.__checkType(): print("Error: invalid type in var dec") sys.exit(1) self.tokenizer.advance() # check for varName if self.__checkIdentifier(): self.symbolTable.define( self.currentName, self.currentType, self.currentKind) self.tokenizer.advance() else: print("Error: missing varName identifier in varDec") sys.exit(1) # check for comma separated list of type and varName while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",": # write the comma self.tokenizer.advance() # check for varName if self.__checkIdentifier(): self.symbolTable.define( self.currentName, self.currentType, self.currentKind) self.tokenizer.advance() else: print("Error: missing varName identifier in varDec") sys.exit(1) # check for semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";": print("Error: missing ; after varDec") sys.exit(1) self.tokenizer.advance() return def __compileStatements(self): """Compiles a complete jack statements grammar""" # check for the keywords for all the statements while self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k == "let": self.__compileLetStatement() elif k == "if": self.__compileIfStatement() elif k == "while": self.__compileWhileStatement() elif k == "do": self.__compileDoStatement() elif k == "return": self.__compileReturnStatement() else: print("Error: invalid statment " + k) sys.exit(1) def __compileLetStatement(self): """Compiles a complete jack let statment grammar""" self.tokenizer.advance() # look for varName if not self.__checkIdentifier(): print("Error: missing varName for let statement") self.tokenizer.advance() # get values from symbol table varName = self.currentName kind = self.symbolTable.kindOf(varName) varType = self.symbolTable.typeOf(varName) index = self.symbolTable.indexOf(varName) isArray = False # check for [ if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == "[": isArray = True self.tokenizer.advance() # compile expression self.__compileExpression() # get the index from the top of the stack from compileExpression self.vmWriter.writePush(kind, index) self.vmWriter.writeArithmetic("add") self.vmWriter.writePop("temp", 2) # write the closing bracket if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "]": print("Error: missing closing ] in let statement") sys.exit(1) self.tokenizer.advance() # check for = if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "=": print("Error: missing = in let expression") sys.exit(1) self.tokenizer.advance() # compile expression self.__compileExpression() # write code to pop since expression puts result on top of stack if isArray: self.vmWriter.writePush("temp", 2) self.vmWriter.writePop("pointer", 1) self.vmWriter.writePop("that", 0) else: self.vmWriter.writePop(kind, index) # look for ; if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";": print("Error: missing ; after let statement") sys.exit(1) self.tokenizer.advance() def __compileIfStatement(self): """Compiles a complete jack if statement grammar""" # setup local counter localIfCounter = self.ifCounter self.ifCounter += 1 self.tokenizer.advance() # check for ( if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(": print("Error: missing ( in if statement") sys.exit(1) self.tokenizer.advance() # compile expression self.__compileExpression() # get the ~ if part from the stack self.vmWriter.writeArithmetic("not") # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) in if statement") sys.exit(1) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{": print("Error: missing { for if statement") sys.exit(1) self.tokenizer.advance() # write the if for L1 self.vmWriter.writeIf("if-false", localIfCounter) # compile more statements self.__compileStatements() # write the goto for L2 self.vmWriter.writeGoto("if-true", localIfCounter) # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}": print("Error: missing } after if statement") sys.exit(1) self.tokenizer.advance() # write label for L1 self.vmWriter.writeLabel("if-false", localIfCounter) # check for else if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord() == "else": self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{": print("Error: missing { for if statement") sys.exit(1) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}": print("Error: missing } after if statement") sys.exit(1) self.tokenizer.advance() # write label for L2 self.vmWriter.writeLabel("if-true", localIfCounter) def __compileWhileStatement(self): """Compiles a complete jack while statement grammar""" # get counter and write label for L1 localWhileCounter = self.whileCounter self.whileCounter += 1 self.vmWriter.writeLabel("whileStart", localWhileCounter) self.tokenizer.advance() # check for ( if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(": print("Error: missing ( in if statement") sys.exit(1) self.tokenizer.advance() # compile expression self.__compileExpression() # get ~cond from stack self.vmWriter.writeArithmetic("not") # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) in if statement") sys.exit(1) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{": print("Error: missing { for if statement") sys.exit(1) self.tokenizer.advance() # write the if for L2 self.vmWriter.writeIf("whileEnd", localWhileCounter) # compile more statements self.__compileStatements() # write the goto for L1 self.vmWriter.writeGoto("whileStart", localWhileCounter) # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}": print("Error: missing } after if statement") sys.exit(1) self.tokenizer.advance() # write the label for L2 self.vmWriter.writeLabel("whileEnd", localWhileCounter) def __compileDoStatement(self): """Compiles a complete jack do statement grammar""" self.tokenizer.advance() # compile subroutine call if self.__checkIdentifier(): firstHalf = self.currentName self.tokenizer.advance() if self.tokenizer.tokenType() == "SYMBOL" and (self.tokenizer.symbol() == "." or self.tokenizer.symbol() == "("): self.__compileSubroutineCall(firstHalf) # check for semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";": print("Error: missing ; after do statement") sys.exit(1) self.tokenizer.advance() # pop the 0 from the return self.vmWriter.writePop("temp", 0) def __compileReturnStatement(self): """Compiles a complete jack return statement grammar""" self.tokenizer.advance() # if the next symbol isn't a symbol, it must be an expression if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";": self.__compileExpression() # write ;, checking again to make sure after calling compile expression # that the next symbol is still a valid ; if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";": print("Error: missing ; after return statement") sys.exit(1) else: # write the return of 0 self.vmWriter.writePush("constant", 0) self.tokenizer.advance() # write the return self.vmWriter.writeReturn() def __convertOp(self, op): """Converts the operators that interfere with xml tags to their properly escaped versions""" op = op.replace("&", "&") op = op.replace("<", "<") op = op.replace(">", ">") op = op.replace("\"", """) return op def __compileExpression(self): """Compiles a complete jack expression grammar""" # compile term self.__compileTerm() # check for op while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() in op: s = self.tokenizer.symbol() self.tokenizer.advance() # compile another term self.__compileTerm() # write op vm code if s == "+": self.vmWriter.writeArithmetic("add") elif s == "-": self.vmWriter.writeArithmetic("sub") elif s == "*": self.vmWriter.writeCall("Math.multiply", 2) elif s == "/": self.vmWriter.writeCall("Math.divide", 2) elif s == "&": self.vmWriter.writeArithmetic("and") elif s == "|": self.vmWriter.writeArithmetic("or") elif s == "<": self.vmWriter.writeArithmetic("lt") elif s == ">": self.vmWriter.writeArithmetic("gt") elif s == "=": self.vmWriter.writeArithmetic("eq") def __compileTerm(self): """Compiles a complete jack term grammar""" # term logic # check for integerConstant if self.tokenizer.tokenType() == "INT_CONST": self.vmWriter.writePush("constant", self.tokenizer.intVal()) self.tokenizer.advance() # check for string constant elif self.tokenizer.tokenType() == "STRING_CONST": # need to make a string constant string = self.tokenizer.stringVal() # push the length of the string self.vmWriter.writePush("constant", len(string)) # call String.new 1 self.vmWriter.writeCall("String.new", 1) # append to create the string for letter in string: self.vmWriter.writePush("constant", ord(letter)) self.vmWriter.writeCall("String.appendChar", 2) self.tokenizer.advance() # check for keyword for KeywordConstant elif self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k not in KeyWordConstant: print("Error: invalid KeyWordConstant" + k + " in term") sys.exit(1) # write the outputs for the keyword constants if k == "null" or k == "false": self.vmWriter.writePush("constant", 0) elif k == "true": self.vmWriter.writePush("constant", 1) self.vmWriter.writeArithmetic("neg") elif k == "this": self.vmWriter.writePush("pointer", 0) self.tokenizer.advance() # check for symbol for either ( expression ) or unary op elif self.tokenizer.tokenType() == "SYMBOL": s = self.tokenizer.symbol() # ( expression ) if s == "(": self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) after expression in term") sys.exit(1) self.tokenizer.advance() # unaryOp term elif s in unaryOp: self.tokenizer.advance() # compile term self.__compileTerm() # write the unary output if s == "-": self.vmWriter.writeArithmetic("neg") else: self.vmWriter.writeArithmetic("not") else: print("Error: invalid symbol " + s + " in term") sys.exit(1) # check for varName | varName [ expression ] | subroutineCall elif self.__checkIdentifier(): # advance the tokenizer one more step to check for [, (, or other self.tokenizer.advance() firstHalf = self.currentName if self.tokenizer.tokenType() == "SYMBOL": s = self.tokenizer.symbol() # varName[expression] if s == "[": # push the array address self.vmWriter.writePush(self.symbolTable.kindOf(firstHalf), self.symbolTable.indexOf(firstHalf)) # write [ self.tokenizer.advance() # compile expression self.__compileExpression() # write vm code for array expression self.vmWriter.writeArithmetic("add") self.vmWriter.writePop("pointer", 1) self.vmWriter.writePush("that", 0) # write ] if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "]": print("Error: missing ] after varName[expression]") sys.exit(1) self.tokenizer.advance() # subroutineCall elif s == "(" or s == ".": # compile subroutineCall self.__compileSubroutineCall(firstHalf) else: self.vmWriter.writePush(self.symbolTable.kindOf(self.currentName), self.symbolTable.indexOf(self.currentName)) else: self.vmWriter.writePush(self.symbolTable.kindOf(self.currentName), self.symbolTable.indexOf(self.currentName)) else: print("Error: invalid term") sys.exit(1) def __compileSubroutineCall(self, firstHalf): """Compiles a complete jack subroutine call grammar""" # look ahead one token to see if it is a ( or a . isClass = firstHalf[0].isupper() fullSubroutineName = "" nArgs = 0 # subroutineName if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == "(": fullSubroutineName = self.className + "." + firstHalf # since this a self method, we need to push pointer self.vmWriter.writePush("pointer", 0) self.tokenizer.advance() # compile expression list nArgs = self.__compileExpressionList(isClass) # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) after expressionList in subroutineCall") sys.exit(1) self.tokenizer.advance() # className | varName elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ".": self.tokenizer.advance() if self.__checkIdentifier(): if isClass: fullSubroutineName = firstHalf + "." + self.currentName else: fullSubroutineName = self.symbolTable.typeOf( firstHalf) + "." + self.currentName # push the address of firstHalf self.vmWriter.writePush(self.symbolTable.kindOf( firstHalf), self.symbolTable.indexOf(firstHalf)) else: print("Error: missing varName|className in subroutineCall") # check for ( self.tokenizer.advance() if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(": print("Error: missing ( in subroutineCall before expressionList") sys.exit(1) self.tokenizer.advance() # compile expression list nArgs = self.__compileExpressionList(isClass) # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) after expressionList in subroutineCall") sys.exit(1) self.tokenizer.advance() else: print("Error: invalid subroutineCall") sys.exit(1) if fullSubroutineName != "": self.vmWriter.writeCall(fullSubroutineName, nArgs) def __compileExpressionList(self, isClass): """Compiles a complete jack expression list grammar""" # if the symbol is ), there is no expression list if isClass: argCounter = 0 else: argCounter = 1 if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ")": return argCounter else: # compile expression self.__compileExpression() argCounter += 1 # loop until you dont see a comma while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",": self.tokenizer.advance() # compile expression self.__compileExpression() argCounter += 1 return argCounter
class CompilationEngine(object): def __init__(self, inputfile, outputfile): self._inputfile = inputfile self._outputfile = outputfile self._tokenizer: JackTokenizer = None self._cur_root = [] self._n_args = [] self._root = None self.class_name = None self.return_type = None self._label_cnt = 0 self.vm_writer = None # type:VMWriter self._init() self.symbol = SymbolTable() self.vm_writer.set_engine(self) self.method_type = None def line_num(self): return self._tokenizer.line def _init(self): self._inputbuf = self.create_buffer(self._inputfile) self._outputbuf = self.create_buffer(self._outputfile, mode="w+") self.vm_writer = VMWriter(self._outputfile[:-4] + ".vm") self._tokenizer = JackTokenizer(self._inputbuf) def create_buffer(self, fn, mode='r'): if isinstance(fn, str) or isinstance(fn, unicode): return open(fn, mode) elif isinstance(fn, file) or isinstance(fn, IOBase): return fn else: raise ValueError("file object show file or readable") def compile_class(self): parent = self._set_parent("class") self._root = parent self._advance() self._pop_required(parent, TokenType.keyword, KeywordType.CLASS) self.class_name = self._token()[1] self._pop_required(parent, TokenType.identifier) self._pop_required(parent, TokenType.symbol, "{") try: while self._is_class_var(): self.compile_class_var_desc() while self._is_subroutine(): self.compile_subroutine() self._pop_required(parent, TokenType.symbol, "}") print(self.symbol) finally: self._outputbuf.write( unicode( et.tostring(self._root, pretty_print=True, method="c14n2").decode("utf-8"))) self.vm_writer.close() self._outputbuf.close() def _required_type(self, token_type, val=None): tp, tv = self._token() if token_type != tp or ( (tp == TokenType.keyword or tp == TokenType.symbol) and (val != tv)): raise ValueError("token must be %s,%s" % (token_type, val)) return tp, tv def compile_class_var_desc(self): parent = self._set_parent("classVarDec") # 具体可以细分变量类型检查,标识符正确检查 parent.append(self._build_element()) kind = self.get_kind() self._advance() itype = self.get_type() parent.append(self._build_element()) self._advance() while not self.is_token(TokenType.symbol, ";"): parent.append(self._build_element()) if self._token()[1] != "," and self._token()[1] != ";": self.symbol.define(self._token()[1], itype, kind) self._advance() parent.append(self._build_element()) self._advance() self._remove_parent() def get_kind(self): kind = self._token()[1] if isinstance(kind, KeywordType): kind = kind.name.lower() return kind def get_type(self): itype = self._token()[1] if isinstance(itype, KeywordType): return itype.name.lower() return itype def compile_subroutine(self): print(self.symbol) self.symbol.start_subroutine() parent = self._set_parent("subroutineDec") method_type = self._token()[1] self.method_type = method_type self._advance() self.return_type = self._token()[1] self._advance() function_name = self._token()[1] self._advance() self._pop_required(parent, TokenType.symbol, "(") self.compile_parameter_list() full_name = "{}.{}".format(self.class_name, function_name) self._pop_required(parent, TokenType.symbol, ")") self._compile_body(full_name, method_type) self._remove_parent() self.vm_writer.write_comment("end function %s" % function_name) self.vm_writer.write_comment("") # if self._tokenizer.token_type()==TokenType.KEY_WORD: def _compile_body(self, full_name, method_type): parent = self._set_parent("subroutineBody") self._pop_required(parent, TokenType.symbol, "{") while self._is_var_desc(): self.compile_var_desc() var_cnt = self.symbol.var_count("var") field_cnt = self.symbol.var_count("field") self.vm_writer.write_function(full_name, var_cnt) if method_type == KeywordType.CONSTRUCTOR: # 构造函数分配对象内存 self.vm_writer.write_push(SEG_CONSTANT, field_cnt) self.vm_writer.write_call("Memory.alloc", "1") self.vm_writer.write_pop(SEG_POINTER, "0") elif method_type == KeywordType.METHOD: # 成员方法,设置this=arg[0] self.vm_writer.write_push(SEG_ARG, "0") self.vm_writer.write_pop(SEG_POINTER, "0") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self._remove_parent() def _remove_parent(self): self._cur_root.pop() def compile_parameter_list(self): kind = "arg" while not self.is_token(TokenType.symbol, ")"): itype = self.get_type() self._advance() name = self._token()[1] self.symbol.define(name, itype, kind) self._advance() # parent.append(self._build_element()) if self.is_token(TokenType.symbol, ","): self._advance() def compile_var_desc(self): parent = self._set_parent("varDec") self._pop_required(parent, TokenType.keyword, KeywordType.VAR) kind = "var" itype = self.get_type() parent.append(self._build_element()) self._advance() while not self.is_token(TokenType.symbol, ";"): # parent.append(self._build_element()) if not self.is_token(TokenType.symbol, ",") and not self.is_token( TokenType.symbol, ";"): self.symbol.define(self._token()[1], itype, kind) self._advance() self._pop_required(parent, TokenType.symbol, ";") self._remove_parent() def compile_statements(self): self._set_parent("statements") while self._is_statement(): if self.is_let_statement(): self.compile_let() if self.is_do_statement(): self.compile_do() if self.is_return_statement(): self.compile_return() if self.is_if_statement(): self.compile_if() continue if self.is_while_statement(): self.compile_while() continue self._remove_parent() def compile_do(self): parent = self._set_parent("doStatement") self._pop_required(parent, TokenType.keyword, KeywordType.DO) type1, id1 = self._pop_required(parent, TokenType.identifier) self.compile_call(type1, id1) self.vm_writer.write_pop(SEG_TEMP, 0) self._pop_required(parent, TokenType.symbol, ";") self._remove_parent() def compile_call(self, typ1, id1): parent = None symbol_kind = self.symbol.kind_of(id1) # 调用变量方法 n_args = 0 typ2, id2 = self._token() if id2 == ".": if symbol_kind: # 变量类型 function_type = self.symbol.type_of(id1) # this 指针入栈 if symbol_kind == "arg": self.vm_writer.write_push("argument", self.symbol.index_of(id1)) elif symbol_kind == "static": self.vm_writer.write_push("static", self.symbol.index_of(id1)) elif symbol_kind == "var": self.vm_writer.write_push("local", self.symbol.index_of(id1)) elif symbol_kind == "field": self.vm_writer.write_push("this", self.symbol.index_of(id1)) n_args += 1 else: # 静态方法 function_type = id1 self._advance() _, method_name = self._pop_required(parent, TokenType.identifier) full_name = "%s.%s" % (function_type, method_name) else: n_args += 1 self.vm_writer.write_push("pointer", 0) function_type = self.class_name full_name = "%s.%s" % (function_type, id1) self._n_args.append(n_args) self._pop_required(parent, TokenType.symbol, "(") self.compile_expression_list() self._pop_required(parent, TokenType.symbol, ")") n_args = self._n_args.pop(-1) self.vm_writer.write_call(full_name, n_args=n_args) def compile_let(self): parent = self._set_parent("letStatement") self._pop_required(parent, TokenType.keyword, KeywordType.LET) tk, val = self._pop_required(parent, TokenType.identifier) seg, idx = self.get_var_seg_idx(val) is_arr = False if self.is_token(TokenType.symbol, "["): is_arr = True self._advance() self.compile_expression() self.vm_writer.write_push(seg, idx) self.vm_writer.write_arithmetic("+") self._pop_required(parent, TokenType.symbol, "]") # 有可能是数组 # 替换正则 self._pop_required(parent, TokenType.symbol, "=") self.compile_expression() if is_arr: self.vm_writer.write_pop(SEG_TEMP, "0") self.vm_writer.write_pop(SEG_POINTER, "1") self.vm_writer.write_push(SEG_TEMP, "0") self.vm_writer.write_pop(SEG_THAT, "0") else: self.vm_writer.write_pop(seg, idx) self._pop_required(parent, TokenType.symbol, ";") self._remove_parent() def compile_while(self): self.vm_writer.write_comment("start while") parent = self._set_parent("whileStatement") self._pop_required(parent, TokenType.keyword, KeywordType.WHILE) label1 = self._get_label() self.vm_writer.write_label(label1) label2 = self._get_label() self._pop_required(parent, TokenType.symbol, "(") self.compile_expression() self.vm_writer.write_arithmetic("~") self._pop_required(parent, TokenType.symbol, ")") self.vm_writer.write_if(label2) self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self.vm_writer.write_goto(label1) self.vm_writer.write_label(label2) self._remove_parent() self.vm_writer.write_comment("end while") def compile_return(self): parent = self._set_parent("returnStatement") self._pop_required(parent, TokenType.keyword, KeywordType.RETURN) if not self.is_token(TokenType.symbol, ";"): self.compile_expression() self._pop_required(parent, TokenType.symbol, ";") if self.return_type == KeywordType.VOID: self.vm_writer.write_push(SEG_CONSTANT, 0) self.vm_writer.write_return() self._remove_parent() def compile_if(self): parent = self._set_parent("ifStatement") self.vm_writer.write_comment("compile if") self._pop_required(parent, TokenType.keyword, KeywordType.IF) self._pop_required(parent, TokenType.symbol, "(") label1 = self._get_label() label2 = self._get_label() self.compile_expression() self.vm_writer.write_arithmetic("~") self.vm_writer.write_if(label1) self._pop_required(parent, TokenType.symbol, ")") self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self.vm_writer.write_goto(label2) self.vm_writer.write_label(label1) if self.is_token(TokenType.keyword, KeywordType.ELSE): self._pop_required(parent, TokenType.keyword, KeywordType.ELSE) self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self.vm_writer.write_label(label2) self._remove_parent() self.vm_writer.write_comment(" if end") def compile_expression(self): parent = self._set_parent("expression") op_count = 0 ops = [] while not self._is_end(): self.compile_term() if self._is_op(False): _, op = self._token() self._advance() ops.append(op) op_count += 1 if op_count >= 2: print(ops) self.vm_writer.write_arithmetic(ops.pop(0)) # parent.append(self._build_element()) # self._advance() self._remove_parent() def compile_term(self): parent = self._set_parent("term") first = True while not self._is_op(first) and not self._is_end(): first = False if self.is_token(TokenType.symbol, "("): self._advance() self.compile_expression() self._pop_required(parent, TokenType.symbol, ")") elif self._is_unary_op(): token, op = self._token() self._advance() op = "neg" if op == "-" else op self.compile_term() self.vm_writer.write_arithmetic(op) continue elif self.is_token(TokenType.identifier): tk, val = self._pop_required(parent, TokenType.identifier) if self.is_token(TokenType.symbol, "(") or self.is_token( TokenType.symbol, "."): self.compile_call(tk, val) elif self.is_token(TokenType.symbol, "["): self._advance() self.compile_expression() seg, idx = self.get_var_seg_idx(val) self.vm_writer.write_push(seg, idx) # 数组直接计算基址,通过that[0]访问 # fixme a[0] 这种常数的访问 self.vm_writer.write_arithmetic("+") self.vm_writer.write_pop(SEG_POINTER, "1") self.vm_writer.write_push(SEG_THAT, "0") self._pop_required(parent, TokenType.symbol, "]") else: # 变量 seg, idx = self.get_var_seg_idx(val) self.vm_writer.write_push(seg, idx) else: tk, val = self._token() if self.is_token(TokenType.integerConstant): self.vm_writer.write_push(SEG_CONSTANT, val) elif self.is_token(TokenType.keyword, KeywordType.TRUE): self.vm_writer.write_push(SEG_CONSTANT, "0") self.vm_writer.write_arithmetic("~") elif self.is_token(TokenType.keyword, KeywordType.FALSE): self.vm_writer.write_push(SEG_CONSTANT, "0") elif self.is_token(TokenType.keyword, KeywordType.NULL): self.vm_writer.write_push(SEG_CONSTANT, "0") elif self.is_token(TokenType.keyword, KeywordType.THIS): self.vm_writer.write_push(SEG_POINTER, "0") elif self.is_token(TokenType.stringConstant): str_len = len(val) self.vm_writer.write_push(SEG_CONSTANT, str(str_len)) self.vm_writer.write_call("String.new", "1") for idx, x in enumerate(val): self.vm_writer.write_push(SEG_CONSTANT, str(ord(x))) self.vm_writer.write_call("String.appendChar", '2') self._advance() self._remove_parent() def _pop_required(self, parent, tk, val=None): tk, val = self.required(tk, val) self._advance() return tk, val def _is_op(self, first): tk, val = self._token() return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-' and not first) def _is_unary_op(self): tk, val = self._token() return tk == TokenType.symbol and val in '-~' def compile_expression_list(self): parent = self._set_parent("expressionList") n_args = self._n_args[-1] while not self.is_token(TokenType.symbol, ")"): n_args += 1 self.compile_expression() if self.is_token(TokenType.symbol, ","): self._pop_required(parent, TokenType.symbol, ",") self._n_args[-1] = n_args self._remove_parent() def build_identifier(self): e = et.Element("identifier") e.text = self._tokenizer.identifier() return e def build_keyword(self): e = et.Element("keyword") e.text = self._tokenizer.keyword().name.lower() return e def build_symbol(self): e = et.Element("symbol") e.text = self._tokenizer.symbol() return e def _token(self): # if self._tokenizer.line > 44: # raise ValueError("测试代码,翻译到此停止") token_type = self._tokenizer.token_type() if self._tokenizer.token_type() == TokenType.keyword: a, b = token_type, self._tokenizer.keyword() elif self._tokenizer.token_type() == TokenType.symbol: a, b = token_type, self._tokenizer.symbol() elif self._tokenizer.token_type() == TokenType.identifier: a, b = token_type, self._tokenizer.identifier() elif self._tokenizer.token_type() == TokenType.integerConstant: a, b = token_type, self._tokenizer.intVal() elif self._tokenizer.token_type() == TokenType.stringConstant: a, b = token_type, self._tokenizer.stringVal() else: a, b = None, None print(a, b, self._tokenizer.line) return a, b def _advance(self): if self._tokenizer.has_more_tokens(): self._tokenizer.advance() def required(self, token, val=None): return self._required_type(token, val) def _build_element(self): a, b = self._token() e = et.Element(a.name) if isinstance(b, KeywordType): e.text = b.name.lower() else: e.text = b return e def _is_class_var(self): return self.is_token(TokenType.keyword, KeywordType.FIELD) or self.is_token( TokenType.keyword, KeywordType.STATIC) def is_token(self, token, val=None): t, v = self._token() if val is not None: return t == token and v == val else: return t == token def _get_parent(self): if len(self._cur_root) > 0: return self._cur_root[-1] else: return None def _set_parent(self, name): parent = self._get_parent() ele2 = et.Element(name) if parent is not None: parent.append(ele2) self._cur_root.append(ele2) return ele2 def _is_subroutine(self): return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \ or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \ or self.is_token(TokenType.keyword, KeywordType.METHOD) def _is_statement(self): if self.is_let_statement(): return True if self.is_do_statement(): return True if self.is_return_statement(): return True if self.is_if_statement(): return True if self.is_while_statement(): return True def is_while_statement(self): return self.is_token(TokenType.keyword, KeywordType.WHILE) def is_let_statement(self): return self.is_token(TokenType.keyword, KeywordType.LET) def is_do_statement(self): return self.is_token(TokenType.keyword, KeywordType.DO) def is_return_statement(self): return self.is_token(TokenType.keyword, KeywordType.RETURN) def is_if_statement(self): return self.is_token(TokenType.keyword, KeywordType.IF) def _is_var_desc(self): return self.is_token(TokenType.keyword, KeywordType.VAR) def _is_end(self): return self.is_token(TokenType.symbol, ";") or \ self.is_token(TokenType.symbol, ";") \ or self.is_token(TokenType.symbol, ")") \ or self.is_token(TokenType.symbol, ",") \ or self.is_token(TokenType.symbol, "]") def get_var_seg_idx(self, val): kind = self.symbol.kind_of(val) idx = self.symbol.index_of(val) if kind == "static": return SEG_STATIC, idx elif kind == "var": return SEG_LOCAL, idx elif kind == "field": return SEG_THIS, idx elif kind == "arg": if self.method_type == KeywordType.METHOD: idx += 1 return SEG_ARG, idx def _get_label(self): label = "label_%s" % self._label_cnt self._label_cnt += 1 return label
class CompilationEngine: def __init__(self, input_file, output_file): self.jack_tokenizer = JackTokenizer(input_file) self.output = open(output_file, "w") self.level = 0 self.is_unary = False def compile_class(self): self.print_title("class", True) self.level += 1 # "class className { for i in range(NUM_TOKENS_CLASS_DEC): self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: self.print_tag(token_type, self.jack_tokenizer.key_word()) if token_type == IDENTIFIER: self.print_tag(token_type, self.jack_tokenizer.identifier()) if token_type == SYMBOL: self.print_tag(token_type, self.jack_tokenizer.symbol()) # classVarDec* or SubroutineDec* while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() curr_keyword = self.jack_tokenizer.key_word() if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or self.jack_tokenizer.key_word() == "field"): self.compile_class_var_dec() if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or self.jack_tokenizer.key_word() == "method" or self.jack_tokenizer.key_word() == "constructor"): self.compile_subroutine() # self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) # break if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) break self.level -= 1 self.print_title("class", False) def compile_class_var_dec(self): self.print_title("classVarDec", True) self.level += 1 # "static" or "field" self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: self.print_tag(token_type, self.jack_tokenizer.key_word()) elif token_type == IDENTIFIER: self.print_tag(token_type, self.jack_tokenizer.identifier()) elif token_type == SYMBOL: self.print_tag(token_type, self.jack_tokenizer.symbol()) if self.jack_tokenizer.symbol() == ";": break self.level -= 1 self.print_title("classVarDec", False) def compile_subroutine(self): self.print_title("subroutineDec", True) self.level += 1 # "constructor" or "function" or "method" self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) elif token_type == IDENTIFIER: self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) elif token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.print_tag(token_type, self.jack_tokenizer.symbol()) self.compile_parameter_list() # should print ")" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) # if self.jack_tokenizer.symbol() == "}": # break if self.jack_tokenizer.symbol() == '{': self.compile_subroutine_body() break self.level -= 1 self.print_title("subroutineDec", False) def compile_subroutine_body(self): self.print_title("subroutineBody", True) self.level += 1 self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) if token_type == KEYWORD: if self.jack_tokenizer.key_word() == "var": self.compile_var_dec() continue else: self.compile_statements() # print "}" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) break self.level -= 1 self.print_title("subroutineBody", False) def compile_parameter_list(self): self.print_title("parameterList", True) self.level += 1 while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) elif token_type == IDENTIFIER: self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) else: if self.jack_tokenizer.symbol() == ")": break else: self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.level -= 1 self.print_title("parameterList", False) def compile_var_dec(self): self.print_title("varDec", True) self.level += 1 self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) if token_type == IDENTIFIER: self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) if token_type == SYMBOL: self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) if self.jack_tokenizer.symbol() == ";": break self.level -= 1 self.print_title("varDec", False) def compile_statements(self): self.print_title("statements", True) self.level += 1 while True: if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do": self.compile_do() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let": self.compile_let() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while": self.compile_while() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return": self.compile_return() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if": self.compile_if() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}": break self.level -= 1 self.print_title("statements", False) def compile_do(self): self.print_title("doStatement", True) self.level += 1 self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) self.compile_subroutine_call() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # return from compile_subroutine_call with ";" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.level -= 1 self.print_title("doStatement", False) def compile_let(self): self.print_title("letStatement", True) self.level += 1 self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) #continue if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "[":# or self.jack_tokenizer.symbol() == "=": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # print "]" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) # should return from the compile_expression only with ";" or "]" if self.jack_tokenizer.symbol() == "=": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() if self.jack_tokenizer.symbol() == ";": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) break self.level -= 1 self.print_title("letStatement", False) def compile_while(self): self.print_title("whileStatement", True) self.level += 1 self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() if self.jack_tokenizer.symbol() == "{": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": break self.level -= 1 self.print_title("whileStatement", False) def compile_return(self): self.print_title("returnStatement", True) self.level += 1 self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) else: self.compile_expression() # should return from "compile_expression" only with ";" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.level -= 1 self.print_title("returnStatement", False) def compile_if(self): self.print_title("ifStatement", True) self.level += 1 self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() if self.jack_tokenizer.symbol() == "{": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": break self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else": # print "else" self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # print "{" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # print "}" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.level -= 1 self.print_title("ifStatement", False) def compile_subroutine_call(self): self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.compile_expression_list() self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) # (className | varName).subroutineName(expressionList) elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # subroutineName self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "(" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) # expressionList self.compile_expression_list() # ")" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) def compile_expression(self): self.print_title("expression", True) self.level += 1 if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-": self.is_unary = True self.compile_term() while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\ ["+", "-", "*", "/", "&", "|", "<", ">", "=", "&", "<",">"]: self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-": self.is_unary = True self.compile_term() self.level -= 1 self.print_title("expression", False) def compile_term(self): keywords_list = ["true", "false", "null", "this"] self.print_title("term", True) self.level += 1 while True: token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\ [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&", "<",">"]: break if token_type == INT_CONST: self.print_tag(INT_CONST, self.jack_tokenizer.int_val()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == STRING_CONST: self.print_tag(STRING_CONST, self.jack_tokenizer.string_val()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == KEYWORD and self.jack_tokenizer.key_word() in keywords_list: self.print_tag(KEYWORD, self.jack_tokenizer.key_word()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]: self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.is_unary = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_term() break if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # should return from compile_expression only with ")" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == IDENTIFIER: self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\ [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&", "<",">"]: break if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # should print only "]" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.compile_expression_list() self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break # (className | varName).subroutineName(expressionList) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".": self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # subroutineName self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "(" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) # expressionList self.compile_expression_list() # ")" self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.level -= 1 self.print_title("term", False) def compile_expression_list(self): self.print_title("expressionList", True) self.level += 1 while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")": break else: self.compile_expression() if self.jack_tokenizer.symbol() == ")": break # print "," self.print_tag(SYMBOL, self.jack_tokenizer.symbol()) self.level -= 1 self.print_title("expressionList", False) def print_tag(self, token_type, value): tabs = "" tag = "" for i in range(self.level): tabs += "\t" if token_type == KEYWORD: tag = "<keyword> " + value + " </keyword>\n" elif token_type == SYMBOL: tag = "<symbol> " + value + " </symbol>\n" elif token_type == IDENTIFIER: tag = "<identifier> " + value + " </identifier>\n" elif token_type == INT_CONST: tag = "<integerConstant> " + value + " </integerConstant>\n" elif token_type == STRING_CONST: tag = "<stringConstant> " + value + " </stringConstant>\n" else: tag = "<" + value + ">" + " </" + value + ">\n" self.output.write(tabs + tag) def print_title(self, title, is_title): tabs = "" for i in range(self.level): tabs += "\t" if is_title: self.output.write(tabs + "<" + title + ">\n") # print closer else: self.output.write(tabs + "</" + title + ">\n")
class CompilationEngine(): op = ['+', '-', '*', '/', '&', '|', '<', '>', '='] def __init__(self, input, output): print('Opened ' + input + ' for compiling.') self.input = input # Instantiate different modules self.tokenizer = JackTokenizer(input) self.symbolTable = SymbolTable() self.vmWriter = VMWriter(output) # Unique number - used for labels self.uniqueNo = -1 # Load up the first token self.tokenizer.advance() # Call compileClass to start the compilation self.compileClass() def subTag(self, _tag): print('Subtag encountered - fix this') raise NameError sys.exit() def subTagIdentifier(self, name, category, new, kind, index): print('Subtag encountered - fix this') raise NameError sys.exit() def getUniqueNo(self): self.uniqueNo += 1 return str(self.uniqueNo) def compileClass(self): # Current token assumed to be the CLASS keyword # Keyword: class self.tokenizer.advance() # Identifier: class name # Classes are not entered into symboltable self.className = self.tokenizer.identifier() self.tokenizer.advance() # Symbol: { self.tokenizer.advance() # classVarDec or Subroutine while not self.tokenizer.rawToken( ) == '}': # Access token directly to circumvent error checking if self.tokenizer.keyWord() in ['STATIC', 'FIELD']: self.compileClassVarDec() elif self.tokenizer.keyWord() in [ 'CONSTRUCTOR', 'FUNCTION', 'METHOD' ]: self.compileSubroutine() # Symbol: } # Do not advance, we are done self.close() def close(self): self.vmWriter.close() self.tokenizer.close() print('Finished compiling ' + self.input + '.') def compileClassVarDec(self): # Current token assumed to be the STATIC or FIELD keyword # Keyword: STATIC or FIELD if self.tokenizer.keyWord() == 'FIELD': _kind = 'FIELD' elif self.tokenizer.keyWord() == 'STATIC': _kind = 'STATIC' raise NotImplementedError self.tokenizer.advance() # Keyword: type | identifier (if class) try: _type = self.tokenizer.keyWord() except TokenTypeError: _type = self.tokenizer.identifier() self.tokenizer.advance() # Identifier: varName # Declare in symboltable self.symbolTable.define(self.tokenizer.identifier(), _type, _kind) self.tokenizer.advance() # Compile any other varDecs on the same line (of the same type) while self.tokenizer.symbol() == ',': self.tokenizer.advance() # Identifier: varName # Declare in symboltable self.symbolTable.define(self.tokenizer.identifier(), _type, _kind) self.tokenizer.advance() # Symbol: ; self.tokenizer.advance() def compileSubroutine(self): # Current token assumed to be keyword: constructor | function | method # Create new subroutine scoped symbol table self.symbolTable.startSubroutine() # Keyword: constructor | function | method subroutineKind = self.tokenizer.keyWord() self.tokenizer.advance() # Keyword: void | type | identifier (if class) self.tokenizer.advance() # Identifier: subroutineName subroutineName = self.tokenizer.identifier() self.tokenizer.advance() # Symbol: ( self.tokenizer.advance() # Program structure: ParameterList self.compileParameterList() # Symbol: ) self.tokenizer.advance() ### START SUBROUTINE BODY ### # Symbol: { self.tokenizer.advance() # subroutineBody: varDecs while self.tokenizer.keyWord() == 'VAR': self.compileVarDec() # Write vm code function declaration # This is done 'late' so that we can get nLocals (noting that varDec() does not actually write vm code) self.vmWriter.writeFunction(self.className + '.' + subroutineName, self.symbolTable.varCount('LOCAL')) if subroutineKind == 'CONSTRUCTOR': # Alloc() required space (as determined by number of class variables) self.vmWriter.writePush('constant', self.symbolTable.varCount('FIELD')) self.vmWriter.writeCall('Memory.alloc', 1) # pop return value of alloc() to THIS (effectively pointing it to start of allocated object memory) self.vmWriter.writePop('pointer', 0) elif subroutineKind == 'METHOD': # Set 'this' pointer by pushing first argument and popping to pointer 0 self.vmWriter.writePush('argument', 0) self.vmWriter.writePop('pointer', 0) # subroutineBody: Statements self.compileStatements() # Symbol: } self.tokenizer.advance() ### END SUBROUTINE BODY ### def compileParameterList(self): # assume pointer is on keyword: type of first parameter OR symbol: ( if no parameters if self.tokenizer.rawToken() is not ')': run_once = True while self.tokenizer.rawToken() == ',' or run_once == True: if run_once == False: # Symbol: , self.tokenizer.advance() # Keyword: type _type = self.tokenizer.keyWord() self.tokenizer.advance() # Identifier: varName # Declare in symboltable self.symbolTable.define(self.tokenizer.identifier(), _type, 'ARGUMENT') self.tokenizer.advance() run_once = False def compileVarDec(self): # assume pointer is on keyword: var # Keyword: var self.tokenizer.advance() # Keyword: type | identifier (if class) try: _type = self.tokenizer.keyWord() except TokenTypeError: _type = self.tokenizer.identifier() finally: self.tokenizer.advance() # Identifier: varName # Define in symboltable - note that no actual VM code is required here self.symbolTable.define(self.tokenizer.identifier(), _type, 'LOCAL') self.tokenizer.advance() # Further varNames while self.tokenizer.symbol() == ',': # Symbol: , self.tokenizer.advance() # Identifier: varName self.symbolTable.define(self.tokenizer.identifier(), _type, 'LOCAL') self.tokenizer.advance() # Symbol: ; self.tokenizer.advance() def compileStatements(self): # assume token is keyword: let | if | while | do | return # note: each of the nested compile methods call tokenizer.advance() at the end, # so no need to call it here while self.tokenizer.rawToken() is not '}': if self.tokenizer.keyWord() == 'LET': self.compileLet() elif self.tokenizer.keyWord() == 'IF': self.compileIf() elif self.tokenizer.keyWord() == 'WHILE': self.compileWhile() elif self.tokenizer.keyWord() == 'DO': self.compileDo() elif self.tokenizer.keyWord() == 'RETURN': self.compileReturn() else: raise TokenTypeError('Statement keyword', self.tokenizer.tokenType(), self.tokenizer.rawToken(), self.tokenizer.lineNo) def compileSubroutineCall(self): # Identifier: subroutineName or (className | varName) # Check symboltable to see if this is an instantiated class object # If so, we need to retrieve the object type to be able to call the method if self.symbolTable.typeOf(self.tokenizer.identifier()): # This is a declared variable, so assume instantiated class object targetObject = self.tokenizer.identifier() subroutineName = self.symbolTable.typeOf(targetObject) else: # Not declared, assume we are calling it on the class directly subroutineName = self.tokenizer.identifier() targetObject = None self.tokenizer.advance() thisArg = 0 # Symbol: . (indicating format of className.subroutineName) or ( (indicating format of subroutineName) if self.tokenizer.symbol() == ".": subroutineName += self.tokenizer.symbol() self.tokenizer.advance() # Identifier: subroutineName subroutineName += self.tokenizer.identifier() # Push object pointer (if it exists) to top of stack so that it is available for methods if targetObject is not None and self.symbolTable.kindOf( targetObject): if self.symbolTable.kindOf(targetObject) == 'field': self.vmWriter.writePush( 'this', self.symbolTable.indexOf(targetObject)) else: self.vmWriter.writePush( self.symbolTable.kindOf(targetObject), self.symbolTable.indexOf(targetObject)) thisArg = 1 self.tokenizer.advance() elif self.tokenizer.symbol() == '(': # We are calling a method from a method within the same class, so push the class pointer to stack for first arg self.vmWriter.writePush('pointer', 0) thisArg = 1 # Also append className to start so that we have a complete vm function name subroutineName = self.className + '.' + subroutineName # Symbol: ( self.tokenizer.advance() nArgs = self.compileExpressionList() # Symbol: ) self.tokenizer.advance() # Write function call self.vmWriter.writeCall(subroutineName, nArgs + thisArg) def compileDo(self): # Keyword: Do self.tokenizer.advance() self.compileSubroutineCall() # Symbol: ; self.tokenizer.advance() def compileLet(self): # Keyword: let self.tokenizer.advance() # identifier: varName varName = self.tokenizer.identifier() self.tokenizer.advance() # index if applicable if self.tokenizer.symbol() == '[': # Symbol: [ self.tokenizer.advance() # Expression self.compileExpression() # Symbol: ] self.tokenizer.advance() # Symbol: = self.tokenizer.advance() # Expression self.compileExpression() # Symbol: ; self.tokenizer.advance() # Write VM code - pop from top of stack to variable if self.symbolTable.kindOf(varName) == 'field': self.vmWriter.writePop('this', self.symbolTable.indexOf(varName)) else: self.vmWriter.writePop(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName)) def compileWhile(self): # Get a new unique number uniqueNo = self.getUniqueNo() # Keyword: while self.tokenizer.advance() # Symbol: ( self.tokenizer.advance() # startWhile label self.vmWriter.writeLabel('startWhile' + uniqueNo) # Expression self.compileExpression() # Jump if expression is FALSE # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value) self.vmWriter.writePush('constant', 1) self.vmWriter.writeArithmetic('ADD') self.vmWriter.writeIf('endWhile' + uniqueNo) # Symbol: ) self.tokenizer.advance() # Symbol: { self.tokenizer.advance() # Statements self.compileStatements() # Jump to startWhile self.vmWriter.writeGoto('startWhile' + uniqueNo) # endWhile label self.vmWriter.writeLabel('endWhile' + uniqueNo) # Symbol: } self.tokenizer.advance() def compileReturn(self): # Keyword: return self.tokenizer.advance() # Symbol: ; or expression then ; if self.tokenizer.rawToken() is not ';': self.compileExpression() else: # No return value - push constant 0 self.vmWriter.writePush('constant', 0) self.tokenizer.advance() # Write return self.vmWriter.writeReturn() def compileIf(self): # Get new unique no uniqueNo = self.getUniqueNo() # Keyword: if self.tokenizer.advance() # Symbol: ( self.tokenizer.advance() # Expression self.compileExpression() # Jump if expression is FALSE # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value) self.vmWriter.writePush('constant', 1) self.vmWriter.writeArithmetic('ADD') self.vmWriter.writeIf('startElse' + uniqueNo) # Symbol: ) self.tokenizer.advance() # Symbol: { self.tokenizer.advance() # Statements self.compileStatements() # Symbol: } self.tokenizer.advance() self.vmWriter.writeGoto('endIf' + uniqueNo) self.vmWriter.writeLabel('startElse' + uniqueNo) try: if self.tokenizer.keyWord() == 'ELSE': # keyword: else self.tokenizer.advance() # symbol: { self.tokenizer.advance() # Compile statements self.compileStatements() # symbol: } self.tokenizer.advance() except TokenTypeError: pass self.vmWriter.writeLabel('endIf' + uniqueNo) def compileExpression(self): # Term self.compileTerm() while self.tokenizer.symbol() in CompilationEngine.op: # Symbol: op # Save for writing later op = self.tokenizer.symbol() self.tokenizer.advance() # Term self.compileTerm() # Write op if op == '+': self.vmWriter.writeArithmetic('ADD') elif op == '-': self.vmWriter.writeArithmetic('SUB') elif op == '=': self.vmWriter.writeArithmetic('EQ') elif op == '>': self.vmWriter.writeArithmetic('GT') elif op == '<': self.vmWriter.writeArithmetic('LT') elif op == '&': self.vmWriter.writeArithmetic('AND') elif op == '|': self.vmWriter.writeArithmetic('OR') elif op == '~': self.vmWriter.writeArithmetic('NOT') elif op == '*': self.vmWriter.writeCall('Math.multiply', 2) def compileTerm(self): tokenType = self.tokenizer.tokenType() if tokenType == 'INT_CONST': # Integer constant self.vmWriter.writePush('constant', self.tokenizer.intVal()) self.tokenizer.advance() elif tokenType == 'STRING_CONST': # String constant string = self.tokenizer.stringVal() # Create empty string object of required length and store it in pointer 1 (that) length = len(string) self.vmWriter.writePush('constant', length) self.vmWriter.writeCall('String.new', 1) self.vmWriter.writePop('pointer', 1) # Append each char in the string for i in range(0, length - 1): ascii_value = ord(string[i]) self.vmWriter.writePush('pointer', 1) self.vmWriter.writePush('constant', ascii_value) self.vmWriter.writeCall('String.appendChar', 2) # No need to return the pointer because it is already stored in pointer 1 # Next token self.tokenizer.advance() elif tokenType == 'KEYWORD': # Keyword constant (true | false | null | this) ########## NB: LET LOOP = TRUE; IS NOT PUSHING -1 TO STACK if self.tokenizer.keyWord() == 'TRUE': self.vmWriter.writePush('constant', 1) self.vmWriter.writeArithmetic('NEG') elif self.tokenizer.keyWord() == 'FALSE' or self.tokenizer.keyWord( ) == 'NULL': self.vmWriter.writePush('constant', 0) elif self.tokenizer.keyWord() == 'THIS': self.vmWriter.writePush('pointer', 0) self.tokenizer.advance() elif tokenType == 'IDENTIFIER': # varName | varName[expression] | subroutineCall # Symbol: [ | ( | . if self.tokenizer.lookAhead() == '[': # varName[expression] # Identifier: varName self.subTagIdentifier( self.tokenizer.identifier(), 'VAR', 'FALSE', self.symbolTable.kindOf(self.tokenizer.identifier()), self.symbolTable.indexOf(self.tokenizer.identifier())) self.tokenizer.advance() # Symbol: [ self.subTag('symbol') self.tokenizer.advance() # Expression self.compileExpression() # Symbol: ] self.subTag('symbol') self.tokenizer.advance() elif self.tokenizer.lookAhead() == '(' or self.tokenizer.lookAhead( ) == '.': # subroutine call self.compileSubroutineCall() else: # Identifier: varName # Retrieve segment and index from symboltable and push to top of stack varName = self.tokenizer.identifier() if self.symbolTable.kindOf(varName) == 'field': self.vmWriter.writePush('this', self.symbolTable.indexOf(varName)) else: self.vmWriter.writePush(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName)) self.tokenizer.advance() elif self.tokenizer.symbol() == '(': # ( Expression ) # Symbol: ( self.tokenizer.advance() # Expression self.compileExpression() # Symbol: ) self.tokenizer.advance() elif self.tokenizer.symbol() in ['-', '~']: # Symbol: unaryop op = self.tokenizer.symbol() self.tokenizer.advance() # Term self.compileTerm() # Write op if op == '-': self.vmWriter.writeArithmetic('NEG') elif op == '~': self.vmWriter.writeArithmetic('NOT') def compileExpressionList(self): nArgs = 0 # Expression list may be empty, check if self.tokenizer.rawToken() is not ')': # Expression self.compileExpression() nArgs += 1 # Further comma delimited expressions while self.tokenizer.rawToken() == ',': # Symbol: , self.tokenizer.advance() # Expression self.compileExpression() nArgs += 1 return nArgs
class CompilationEngine(object): """This class recursively compiles a .jack file into (eventually) vm code. For now, this just outputs a grammar xml file""" def __init__(self, inFile): super(CompilationEngine, self).__init__() # create an internal tokenizer to iterate through self.tokenizer = JackTokenizer(inFile) # spacing so I can make nicely formatted xml, this will increase by # 4 spaces every time I recurse self.spacing = "" # setup the output file self.outputPath = inFile.name.replace(".jack", ".xml") self.outputFile = open(self.outputPath, 'w') self.outputFile.close() self.outputFile = open(self.outputPath, 'a') def __increaseSpacing(self): """Adds 2 spaces to self.spacing""" self.spacing += " " def __decreaseSpacing(self): """Removes 2 spaces from self.spacing""" self.spacing = self.spacing[:-2] def __writeFullTag(self, tag, value): """Writes the spacing, then <tag> value </tag> to the output file""" self.outputFile.write(self.spacing + "<" + tag + "> " + value + " </" + tag + ">\n") def __writeOpenTag(self, tag): """Writes spacing, then <tag>, then increases the spacing""" self.outputFile.write(self.spacing + "<" + tag + ">\n") self.__increaseSpacing() def __writeCloseTag(self, tag): """Decreases spacing, then writes spacing, then </tag>""" self.__decreaseSpacing() self.outputFile.write(self.spacing + "</" + tag + ">\n") def start(self): """Starts the compilation by creating the token XML file and then calling __compileClass()""" # start the tokenizer self.tokenizer.advance() # make token xml file self.__createTokenXML() # reset tokenizer and compile self.tokenizer.reset() self.tokenizer.advance() self.__compileClass() def __createTokenXML(self): """Creates the token XML file for a .jack file""" outputPath = self.outputPath.replace(".xml", "T.xml") f = open(outputPath, 'w') f.close() f = open(outputPath, 'a') f.write("<tokens>\n") # make an output file that is filename but with testXML.xml at end while self.tokenizer.hasMoreTokens(): # output to xml to check tokenType = self.tokenizer.tokenType() if tokenType == "KEYWORD": f.write("<keyword>" + self.tokenizer.keyWord() + "</keyword>\n") elif tokenType == "SYMBOL": symbol = self.tokenizer.symbol() symbol = symbol.replace("&", "&") symbol = symbol.replace("<", "<") symbol = symbol.replace(">", ">") symbol = symbol.replace("\"", """) f.write("<symbol>" + symbol + "</symbol>\n") elif tokenType == "IDENTIFIER": f.write("<identifier>" + self.tokenizer.identifier() + "</identifier>\n") elif tokenType == "INT_CONST": f.write("<integerConstant>" + self.tokenizer.intVal() + "</integerConstant>\n") elif tokenType == "STRING_CONST": f.write("<stringConstant>" + self.tokenizer.stringVal() + "</stringConstant>\n") self.tokenizer.advance() # close the xml tag f.write("</tokens>") def __compileType(self): """Compiles a complete jack type grammar. Returns false if there is an error""" # check for valid keyword if self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k not in ["int", "char", "boolean"]: print("Error: type keyword must be int, char, or boolean") return False self.__writeFullTag("keyword", k) self.tokenizer.advance() return True # check for className else: res = self.__compileClassName() # if __compileClassName() errors, this is not a valid type if not res: print("Error: type not a valid className") return res def __compileClassName(self): """Compiles a complete jack className grammar. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileSubroutineName(self): """Compiles a complete jack subroutineName. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileVarName(self): """Compiles a complete jack varName. Returns false if there is an error""" if self.tokenizer.tokenType() != "IDENTIFIER": return False self.__writeFullTag("identifier", self.tokenizer.identifier()) self.tokenizer.advance() return True def __compileClass(self): """Compiles a complete jack class grammar""" # find the class keyword if self.tokenizer.tokenType() != "KEYWORD" or \ self.tokenizer.keyWord() != "class": print("Error: no class declaration found") sys.exit(1) # write both the class tag and the keyword tag for class self.__writeOpenTag("class") self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # find the className if not self.__compileClassName(): print("Error: no class name found in class declaration") sys.exit(1) # find the open curly brace if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: no opening brace found after class") sys.exit(0) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile the classVarDecs while (self.tokenizer.tokenType() == "KEYWORD" and (self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field")): self.__compileClassVarDec() # compile the subroutines while (self.tokenizer.tokenType() == "KEYWORD" and (self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method")): self.__compileSubroutineDec() # find last curly brace if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: no closing brace found after class definition") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close class tag self.__writeCloseTag("class") self.tokenizer.advance() def __compileClassVarDec(self): """Compiles a complete jack class variable declaration. This advances the tokenizer completely through the variable declaration""" # since we already checked to make sure this is valid, we can write # the tag here and either static or filed self.__writeOpenTag("classVarDec") self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # look for a valid type if not self.__compileType(): sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in classVarDec") sys.exit(1) # check for comma then more varNames (possible not existing) while self.tokenizer.tokenType() == "SYMBOL" and \ self.tokenizer.symbol() == ",": # write the comma self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for varName again if not self.__compileVarName(): print("Error: missing varName identifier in classVarDec") sys.exit(1) # check for closing semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing semicolon after classVarDec") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close classVarDec tag self.__writeCloseTag("classVarDec") def __compileSubroutineDec(self): """Compiles a complete jack subroutine description. This advances the tokenizer completely through the subroutine declaration""" # write the opening tag self.__writeOpenTag("subroutineDec") # since we already checked for constructor/function/method, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # look for void or type if self.tokenizer.tokenType() == "KEYWORD" and \ self.tokenizer.keyWord() == "void": # if void, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() elif not self.__compileType(): print("Error: subroutine return type not void or valid type") sys.exit(1) # check for subroutineName if not self.__compileSubroutineName(): print("Error: missing subroutineName in subroutineDec") sys.exit(1) # check for open parentheses if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "(": print("Error: missing ( for parameter list") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # do parameter list (this could add nothing) self.__compileParameterList() # check for closing parentheses if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print("Error: missing ) for parameter list") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile subroutine body self.__compileSubroutineBody() # close subroutineDec tag self.__writeCloseTag("subroutineDec") def __compileParameterList(self): """Compiles a complete jack parameter list grammar""" # write opening tag self.__writeOpenTag("parameterList") # if the next symbol is a ), then there is no parameter list, so just return # the rest of compileSubroutine will handle writing that if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ")": # close tag self.__writeCloseTag("parameterList") return # look for a valid type else: res = self.__compileType() if not res: sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in parameterList") sys.exit(1) # check for comma separated list of type and varName while self.tokenizer.tokenType( ) == "SYMBOL" and self.tokenizer.symbol() == ",": # write the comma self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # look for a valid type if not self.__compileType(): sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in parameterList") sys.exit(1) # write closing tag self.__writeCloseTag("parameterList") def __compileSubroutineBody(self): """Compile a complete jack subroutine body grammar""" # write opening tag self.__writeOpenTag("subroutineBody") # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for subroutine body") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check to see if we need to compile varDec while self.tokenizer.tokenType() == "KEYWORD" and \ self.tokenizer.keyWord() == "var": self.__compileVarDec() # compile statements self.__compileStatements() # check for closing } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing closing } for subroutine body") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close tag self.__writeCloseTag("subroutineBody") return def __compileVarDec(self): """Compiles a complete jack varDec grammar""" # write open tag self.__writeOpenTag("varDec") # since we already checked to make sure there is a var, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for type if not self.__compileType(): sys.exit(1) # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in varDec") sys.exit(1) # check for comma separated list of type and varName while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ",": # write the comma self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for varName if not self.__compileVarName(): print("Error: missing varName identifier in varDec") sys.exit(1) # check for semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after varDec") sys.exit(1) # write ; self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close tag self.__writeCloseTag("varDec") return def __compileStatements(self): """Compiles a complete jack statements grammar""" # write statements tag self.__writeOpenTag("statements") # check for the keywords for all the statements while self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k == "let": self.__compileLetStatement() elif k == "if": self.__compileIfStatement() elif k == "while": self.__compileWhileStatement() elif k == "do": self.__compileDoStatement() elif k == "return": self.__compileReturnStatement() else: print("Error: invalid statment " + k) sys.exit(1) # close statements tag self.__writeCloseTag("statements") def __compileLetStatement(self): """Compiles a complete jack let statment grammar""" # write opening tag self.__writeOpenTag("letStatement") # since we already checked for the keyword let, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # look for varName if not self.__compileVarName(): print("Error: missing varName for let statement") # check for [ if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == "[": # write the bracket self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # write the closing bracket if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "]": print("Error: missing closing ] in let statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for = if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "=": print("Error: missing = in let expression") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # look for ; if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after let statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("letStatement") def __compileIfStatement(self): """Compiles a complete jack if statement grammar""" # write opening tag self.__writeOpenTag("ifStatement") # since we already checked for if, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for ( if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "(": print("Error: missing ( in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print("Error: missing ) in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing } after if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for else if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord( ) == "else": # write else self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing } after if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # close tag self.__writeCloseTag("ifStatement") def __compileWhileStatement(self): """Compiles a complete jack while statement grammar""" # write opening tag self.__writeOpenTag("whileStatement") # since we checked for while already, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # check for ( if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "(": print("Error: missing ( in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print("Error: missing ) in if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # check for { if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "{": print("Error: missing { for if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile more statements self.__compileStatements() # check for } if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != "}": print("Error: missing } after if statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("whileStatement") def __compileDoStatement(self): """Compiles a complete jack do statement grammar""" # write opening tag self.__writeOpenTag("doStatement") # since we already checked for do, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # compile subroutine call self.__compileSubroutineCall() # check for semicolon if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after do statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("doStatement") def __compileReturnStatement(self): """Compiles a complete jack return statement grammar""" # write opening tag self.__writeOpenTag("returnStatement") # since we checked for return already, write it self.__writeFullTag("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # if the next symbol isn't a symbol, it must be an expression if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": self.__compileExpression() # write ;, checking again to make sure after calling compile expression # that the next symbol is still a valid ; if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ";": print("Error: missing ; after return statement") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # write closing tag self.__writeCloseTag("returnStatement") def __convertOp(self, op): """Converts the operators that interfere with xml tags to their properly escaped versions""" op = op.replace("&", "&") op = op.replace("<", "<") op = op.replace(">", ">") op = op.replace("\"", """) return op def __compileExpression(self): """Compiles a complete jack expression grammar""" # write opening tag self.__writeOpenTag("expression") # compile term self.__compileTerm() # check for op while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) in op: s = self.tokenizer.symbol() # write op self.__writeFullTag("symbol", self.__convertOp(s)) self.tokenizer.advance() # compile another term self.__compileTerm() # close tag self.__writeCloseTag("expression") def __compileTerm(self): """Compiles a complete jack term grammar""" # write opening tag self.__writeOpenTag("term") # term logic # check for integerConstant if self.tokenizer.tokenType() == "INT_CONST": self.__writeFullTag("integerConstant", self.tokenizer.intVal()) self.tokenizer.advance() # check for string constant elif self.tokenizer.tokenType() == "STRING_CONST": self.__writeFullTag("stringConstant", self.tokenizer.stringVal()) self.tokenizer.advance() # check for keyword for KeywordConstant elif self.tokenizer.tokenType() == "KEYWORD": k = self.tokenizer.keyWord() if k not in KeyWordConstant: print("Error: invalid KeyWordConstant" + k + " in term") sys.exit(1) # write the keywordconstant self.__writeFullTag("keyword", k) self.tokenizer.advance() # check for symbol for either ( expression ) or unary op elif self.tokenizer.tokenType() == "SYMBOL": s = self.tokenizer.symbol() # ( expression ) if s == "(": self.__writeFullTag("symbol", s) self.tokenizer.advance() # compile expression self.__compileExpression() # check for ) if self.tokenizer.tokenType( ) != "SYMBOL" or self.tokenizer.symbol() != ")": print("Error: missing ) after expression in term") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # unaryOp term elif s in unaryOp: self.__writeFullTag("symbol", s) self.tokenizer.advance() # compile term self.__compileTerm() else: print("Error: invalid symbol " + s + " in term") sys.exit(1) # check for varName | varName [ expression ] | subroutineCall elif self.tokenizer.tokenType() == "IDENTIFIER": # advance the tokenizer one more step to check for [, (, or other self.tokenizer.advance() if self.tokenizer.tokenType() == "SYMBOL": s = self.tokenizer.symbol() # varName[expression] if s == "[": # go back to varName self.tokenizer.retreat() # compile varName if not self.__compileVarName(): print("Error: invalid varName in term") sys.exit(1) # write [ self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # write ] if self.tokenizer.tokenType( ) != "SYMBOL" or self.tokenizer.symbol() != "]": print("Error: missing ] after varName[expression]") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # subroutineCall elif s == "(" or s == ".": # go back to subroutineName self.tokenizer.retreat() # compile subroutineCall self.__compileSubroutineCall() else: # go back to varName self.tokenizer.retreat() # compile varName if not self.__compileVarName(): print("Error: invalid varName in term") sys.exit(1) else: # go back to varName self.tokenizer.retreat() # compile varName if not self.__compileVarName(): print("Error: invalid varName in term") sys.exit(1) else: print("Error: invalid term") sys.exit(1) # close tag self.__writeCloseTag("term") def __compileSubroutineCall(self): """Compiles a complete jack subroutine call grammar""" # look ahead one token to see if it is a ( or a . self.tokenizer.advance() # subroutineName if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == "(": # go back to subroutine name self.tokenizer.retreat() # compile subroutine name if not self.__compileSubroutineName(): print("Error: invalid subroutineName in subroutineCall") sys.exit(1) # check for ( if self.tokenizer.tokenType( ) != "SYMBOL" and self.tokenizer.symbol() != "(": print( "Error: missing ( in subroutineCall before expressionList") sys.exit(1) # write ( self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression list self.__compileExpressionList() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print( "Error: missing ) after expressionList in subroutineCall") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # className | varName elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ".": # go back to varName/className self.tokenizer.retreat() if self.tokenizer.tokenType() != "IDENTIFIER": print("Error: missing varName|className in subroutineCall") # Hacky, but className and varName both correspond to just an # identitifer, so I just call compileVarName to handle both if not self.__compileVarName(): print("Error: invalid className or varName in subroutineCall") sys.exit(1) # check for . if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ".": print("Error: missing . in subroutineCall") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile subroutineName if not self.__compileSubroutineName(): print( "Error: missing subroutineName after . in subroutineCall") sys.exit(1) # check for ( if self.tokenizer.tokenType( ) != "SYMBOL" and self.tokenizer.symbol() != "(": print( "Error: missing ( in subroutineCall before expressionList") sys.exit(1) # write ( self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression list self.__compileExpressionList() # check for ) if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol( ) != ")": print( "Error: missing ) after expressionList in subroutineCall") sys.exit(1) self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() else: print("Error: invalid subroutineCall") sys.exit(1) def __compileExpressionList(self): """Compiles a complete jack expression list grammar""" # write open tag self.__writeOpenTag("expressionList") # if the symbol is ), there is no expression list if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol( ) == ")": # close tag self.__writeCloseTag("expressionList") return else: # compile expression self.__compileExpression() # loop until you dont see a comma while self.tokenizer.tokenType( ) == "SYMBOL" and self.tokenizer.symbol() == ",": # write , self.__writeFullTag("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # compile expression self.__compileExpression() # write closing tag self.__writeCloseTag("expressionList")
class CompilationEngine: def __init__(self, inputFile, outputFile): self.XMLFile = open(outputFile, 'w') self.tokenizer = JackTokenizer(inputFile) self.CompileClass() def __writeToken(self, token, value): self.XMLFile.write("<" + token + "> " + value + " </" + token + ">\n") def CompileClass(self): self.XMLFile.write("<class>\n") self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord( ) == "field": self.CompileClassVarDec() while self.tokenizer.keyWord( ) == "constructor" or self.tokenizer.keyWord( ) == "function" or self.tokenizer.keyWord() == "method": self.CompileSubroutine() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</class>\n") def CompileClassVarDec(self): self.XMLFile.write("<classVarDec>\n") self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() self.compileType() self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # add the rest of var names, if there are while self.tokenizer.symbol() == ",": self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</classVarDec>\n") def CompileSubroutine(self): self.XMLFile.write("<subroutineDec>\n") # constructor | function | method self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # void | type self.compileType() # subrotineName self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # ( parameterList ) self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.compileParameterList() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # subrotineBody self.compileSubroutineBody() self.XMLFile.write("</subroutineDec>\n") def compileParameterList(self): self.XMLFile.write("<parameterList>\n") if self.tokenizer.tokenType() != 1: # type varName self.compileType() self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # (, type varName)* while self.tokenizer.symbol() == ",": self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.compileType() self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() self.XMLFile.write("</parameterList>\n") def compileSubroutineBody(self): self.XMLFile.write("<subroutineBody>\n") # { self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # varDec* while self.tokenizer.keyWord() == "var": self.compileVarDec() # statements self.compileStatements() # } self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</subroutineBody>\n") def compileVarDec(self): self.XMLFile.write("<varDec>\n") # var self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # type self.compileType() # varName self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # (, varName)* while self.tokenizer.symbol() == ",": self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # ; self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</varDec>\n") def compileStatements(self): self.XMLFile.write("<statements>\n") while self.tokenizer.tokenType() == 0: if self.tokenizer.keyWord() == "let": self.compileLet() elif self.tokenizer.keyWord() == "if": self.compileIf() elif self.tokenizer.keyWord() == "while": self.compileWhile() elif self.tokenizer.keyWord() == "do": self.compileDo() elif self.tokenizer.keyWord() == "return": self.compileReturn() self.XMLFile.write("</statements>\n") def compileDo(self): self.XMLFile.write("<doStatement>\n") # do self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() self.compileSubRoutineCall() # ; self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</doStatement>\n") def compileLet(self): self.XMLFile.write("<letStatement>\n") # let self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # varName self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # ([ expression ])? if self.tokenizer.symbol() == "[": self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # = self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # expression self.CompileExpression() # ; self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</letStatement>\n") def compileWhile(self): self.XMLFile.write("<whileStatement>\n") # while self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # ( expression ) self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # { self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # statements self.compileStatements() # } self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</whileStatement>\n") def compileReturn(self): self.XMLFile.write("<returnStatement>\n") # return self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # expression? # if (self.tokenizer.tokenType() != 1 and self.tokenizer.symbol() != ";") \ # or (self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == "-" or self.tokenizer.symbol() == "~")): if self.isTerm(): self.CompileExpression() # ; self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</returnStatement>\n") def compileIf(self): self.XMLFile.write("<ifStatement>\n") #if self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # ( expression ) self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # { statements } self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.compileStatements() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord( ) == "else": # else self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() # { statements } self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.compileStatements() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.XMLFile.write("</ifStatement>\n") def CompileExpression(self): self.XMLFile.write("<expression>\n") #term self.CompileTerm() # (op term)* while self.tokenizer.tokenType() == 1 and self.tokenizer.symbol( ) in operators: self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileTerm() self.XMLFile.write("</expression>\n") def CompileTerm(self): self.XMLFile.write("<term>\n") if self.tokenizer.tokenType() == 3: self.__writeToken("integerConstant", self.tokenizer.intVal()) self.tokenizer.advance() elif self.tokenizer.tokenType() == 4: self.__writeToken("stringConstant", self.tokenizer.stringVal()) self.tokenizer.advance() elif self.tokenizer.tokenType() == 0: self.__writeToken("keyword", self.tokenizer.keyWord()) self.tokenizer.advance() elif self.tokenizer.tokenType() == 2: if self.tokenizer.tokens[self.tokenizer.currentToken + 1] == '[': self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # [ expression ] self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() elif self.tokenizer.tokens[self.tokenizer.currentToken + 1] == '(' or self.tokenizer.tokens[ self.tokenizer.currentToken + 1] == '.': self.compileSubRoutineCall() else: self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol( ) == '(': # ( expression ) self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() else: self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileTerm() self.XMLFile.write("</term>\n") def compileSubRoutineCall(self): # subroutineName | (className | varName) self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() if self.tokenizer.symbol() == '(': # ( expressionList ) self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpressionList() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() else: # . self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() # subroutineName self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance() # ( expressionList ) self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpressionList() self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() def CompileExpressionList(self): self.XMLFile.write("<expressionList>\n") # (expression if self.isTerm(): # (, expression) self.CompileExpression() while self.tokenizer.symbol() == ',': self.__writeToken("symbol", self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() self.XMLFile.write("</expressionList>\n") def isTerm(self): if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4: return True if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord( ) in keyword_const: return True if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(': return True if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'): return True if self.tokenizer.tokenType() == 2: return True return False def compileType(self): if self.tokenizer.tokenType() == 0: self.__writeToken("keyword", self.tokenizer.keyWord()) else: self.__writeToken("identifier", self.tokenizer.identifier()) self.tokenizer.advance()
class CompilationEngine: """ effects the compilation engine """ def __init__(self, input_file_path, output_path): """ :param fileToRead: """ self._indentation = 0 self._tokenizer = JackTokenizer(input_file_path) self._output = open(output_path, "w+") def compileClass(self): if self._tokenizer.hasMoreTokens(): self._tokenizer.advance() self._output.write("<class>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() while self._tokenizer.keyWord() == "static" or \ self._tokenizer.keyWord() == "field": self.compileClassVarDec() while self._tokenizer.keyWord() == "constructor" or \ self._tokenizer.keyWord() == "function" \ or self._tokenizer.keyWord() == "method": self.compileSubroutine() self._write_symbol() self._indentation -= 1 self._output.write("</class>\n") self._output.close() def compileClassVarDec(self): """ this should only print if there actually are class var decs, should run on the recursively :return: """ self._output.write(" " * self._indentation + "<classVarDec>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._compile_type_and_varName() self._indentation -= 1 self._output.write(" " * self._indentation + "</classVarDec>\n") def compileSubroutine(self): self._output.write(" " * self._indentation + "<subroutineDec>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() if self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileParameterList() self._write_symbol() self._tokenizer.advance() # compile subroutineBody: self._output.write(" " * self._indentation + "<subroutineBody>\n") self._indentation += 1 self._write_symbol() self._tokenizer.advance() while self._tokenizer.keyWord() == "var": self.compileVarDec() self.compileStatements() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</subroutineBody>\n") self._indentation -= 1 self._output.write(" " * self._indentation + "</subroutineDec>\n") self._tokenizer.advance() def compileParameterList(self): self._output.write(" " * self._indentation + "<parameterList>\n") self._indentation += 1 while self._tokenizer.tokenType() != self._tokenizer.SYMBOL: if self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() if self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self._indentation -= 1 self._output.write(" " * self._indentation + "</parameterList>\n") def compileVarDec(self): self._output.write(" " * self._indentation + "<varDec>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._compile_type_and_varName() self._indentation -= 1 self._output.write(" " * self._indentation + "</varDec>\n") def compileStatements(self): self._output.write(" " * self._indentation + "<statements>\n") self._indentation += 1 while self._tokenizer.tokenType() == self._tokenizer.KEYWORD: if self._tokenizer.keyWord() == "let": self.compileLet() elif self._tokenizer.keyWord() == "if": self.compileIf() elif self._tokenizer.keyWord() == "while": self.compileWhile() elif self._tokenizer.keyWord() == "do": self.compileDo() elif self._tokenizer.keyWord() == "return": self.compileReturn() self._indentation -= 1 self._output.write(" " * self._indentation + "</statements>\n") def compileDo(self): self._output.write(" " * self._indentation + "<doStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() #subroutineCall self._write_identifier() self._tokenizer.advance() if self._tokenizer.symbol() == ".": self._write_symbol() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpressionList() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</doStatement>\n") self._tokenizer.advance() def compileLet(self): self._output.write(" " * self._indentation + "<letStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() if self._tokenizer.symbol() == "[": self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</letStatement>\n") self._tokenizer.advance() def compileWhile(self): self._output.write(" " * self._indentation + "<whileStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileStatements() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</whileStatement>\n") self._tokenizer.advance() def compileReturn(self): self._output.write(" " * self._indentation + "<returnStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \ self._tokenizer.symbol() != ";": self.compileExpression() self._write_symbol() self._indentation -= 1 self._output.write(" " * self._indentation + "</returnStatement>\n") self._tokenizer.advance() def compileIf(self): self._output.write(" " * self._indentation + "<ifStatement>\n") self._indentation += 1 self._write_keyword() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileStatements() self._write_symbol() self._tokenizer.advance() if self._tokenizer.tokenType() == self._tokenizer.KEYWORD and \ self._tokenizer.keyWord() == "else": self._write_keyword() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileStatements() self._write_symbol() self._tokenizer.advance() self._indentation -= 1 self._output.write(" " * self._indentation + "</ifStatement>\n") def compileExpression(self): """ Note that tokenizer must be advanced before this is called!!! :return: """ self._output.write(" " * self._indentation + "<expression>\n") self._indentation += 1 self.compileTerm() while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \ self._tokenizer.symbol() in OP_LIST: self._write_symbol() self._tokenizer.advance() self.compileTerm() self._indentation -= 1 self._output.write(" " * self._indentation + "</expression>\n") def compileTerm(self): # debugging - not finished!! sanity_check = True self._output.write(" " * self._indentation + "<term>\n") self._indentation += 1 if self._tokenizer.tokenType() == self._tokenizer.INT_CONST: self._write_int_const() elif self._tokenizer.tokenType() == self._tokenizer.STRING_CONST: self._write_str_const() elif self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() sanity_check = False if self._tokenizer.symbol() == "[": sanity_check = True self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() elif self._tokenizer.symbol() == ".": ## subroutine case sanity_check = True self._write_symbol() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() self.compileExpressionList() self._write_symbol() elif self._tokenizer.symbol() == "(": sanity_check = True self._write_symbol() self._tokenizer.advance() self.compileExpressionList() self._write_symbol() elif self._tokenizer.symbol() == "(": self._write_symbol() self._tokenizer.advance() self.compileExpression() self._write_symbol() elif self._tokenizer.symbol() == "~" or self._tokenizer.symbol() == \ "-": self._write_symbol() self._tokenizer.advance() self.compileTerm() sanity_check = False if sanity_check: self._tokenizer.advance() self._indentation -= 1 self._output.write(" " * self._indentation + "</term>\n") def compileExpressionList(self): self._output.write(" " * self._indentation + "<expressionList>\n") self._indentation += 1 if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \ self._tokenizer.symbol() != ")": self.compileExpression() while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \ self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self.compileExpression() if self._tokenizer.symbol() == "(": self.compileExpression() while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \ self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self.compileExpression() self._indentation -= 1 self._output.write(" " * self._indentation + "</expressionList>\n") def _compile_type_and_varName(self): if self._tokenizer.tokenType() == self._tokenizer.KEYWORD: self._write_keyword() elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER: self._write_identifier() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() while self._tokenizer.symbol() == ",": self._write_symbol() self._tokenizer.advance() self._write_identifier() self._tokenizer.advance() self._write_symbol() self._tokenizer.advance() def _write_identifier(self): self._output.write(" " * self._indentation + "<identifier> " + self._tokenizer.identifier() + " </identifier>\n") def _write_keyword(self): self._output.write(" " * self._indentation + "<keyword> " + self._tokenizer.keyWord() + " </keyword>\n") def _write_symbol(self): string_to_write = self._tokenizer.symbol() if self._tokenizer.symbol() == "<": string_to_write = "<" elif self._tokenizer.symbol() == ">": string_to_write = ">" elif self._tokenizer.symbol() == "&": string_to_write = "&" self._output.write(" " * self._indentation + "<symbol> " + string_to_write + " </symbol>\n") def _write_int_const(self): self._output.write(" " * self._indentation + "<integerConstant> " + self._tokenizer.identifier() + " </integerConstant>\n") def _write_str_const(self): self._output.write(" " * self._indentation + "<stringConstant> " + self._tokenizer.identifier() + " </stringConstant>\n")