class CompilationEngine(object): """ Compiles tokenized input """ def __init__(self, tokenizer, filename): self.filename = filename self.tokenizer = tokenizer self.VMwriter = VMWriter() self.contents = [] self.indent = 0 self.label_index = 0 def compile(self): self.compile_class() # print self.VMwriter.commands def write_next_token(self, op_replace=None): self.tokenizer.advance() token_type = self.tokenizer.get_token_type() if not op_replace: token = self.tokenizer.get_token_value() else: token = op_replace self.contents.append("\t" * self.indent + "<{token_type}> {token} </{token_type}>\n".format( token_type=token_type, token=token)) return token # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): # create symbol table for class self.symbol_table = SymbolTable() self.add_opening_tag('class') self.increase_indent() self.write_next_token() # 'class' self.class_name = self.write_next_token() # className self.write_next_token() # { while self.tokenizer.has_more_tokens(): if self.tokenizer.look_ahead()[1] in set(['static', 'field']): self.compile_class_var_dec() elif self.tokenizer.look_ahead()[1] in set( ['function', 'constructor', 'method']): self.compile_subroutine() elif self.tokenizer.look_ahead()[1] == '}': self.write_next_token() # } # print self.symbol_table.class_symbols self.decrease_indent() self.add_closing_tag('class') # ('static' | 'field' ) type varName (',' varName)* ';' def compile_class_var_dec(self): tokens = [] self.add_opening_tag('classVarDec') self.increase_indent() while self.tokenizer.look_ahead()[1] != ';': tokens.append(self.write_next_token()) self.symbol_table.add_class_var(tokens) self.write_next_token() self.decrease_indent() self.add_closing_tag('classVarDec') # ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody def compile_subroutine(self): self.symbol_table.reset_subroutine() # add class to sub_symbols table self.add_opening_tag('subroutineDec') self.increase_indent() f_type = self.write_next_token() # constructor|function|method # if f_type in {'method', 'constructor'}: if f_type == 'method': self.symbol_table.add_sub_var('arg', self.class_name, 'this') self.write_next_token() # 'void'|type f_name = self.write_next_token() # subroutineName self.compile_param_list() self.write_next_token() # ) # tokens = { varDec* self.write_next_token() # { num_locals = 0 while self.tokenizer.look_ahead()[1] == 'var': # figure out number of vars num_locals += self.compile_var_dec() self.VMwriter.write_function('{}.{}'.format(self.class_name, f_name), num_locals) if f_type == 'method': # push this self.VMwriter.write_push('arg', 0) # store memory address of this obj in this self.VMwriter.write_pop('pointer', 0) elif f_type == 'constructor': # push fields onto stack, allocate that much memory num_fields = self.symbol_table.num_field_vars() self.VMwriter.write_push('constant', num_fields) self.VMwriter.write_call('Memory.alloc', 1) self.VMwriter.write_pop('pointer', 0) self.compile_subroutine_body() self.decrease_indent() self.add_closing_tag('subroutineDec') # statements '}' def compile_subroutine_body(self): self.add_opening_tag('subroutineBody') self.increase_indent() while self.tokenizer.look_ahead()[1] != '}': self.compile_statements() self.write_next_token() # } self.decrease_indent() self.add_closing_tag('subroutineBody') # ( (type varName) (',' type varName)*)? def compile_param_list(self): self.add_opening_tag('parameterList') self.increase_indent() while self.tokenizer.look_ahead()[1] != ')': token = self.tokenizer.look_ahead() # ( OR , if token[0] == 'symbol': token_type = None elif token[0] == 'keyword': token_type = token[1] else: if token_type: token_name = token[1] self.symbol_table.add_sub_var('arg', token_type, token_name) else: token_type = token[1] self.write_next_token() self.decrease_indent() self.add_closing_tag('parameterList') # 'var' type varName (',' varName)* ';' def compile_var_dec(self): self.add_opening_tag('varDec') self.increase_indent() self.write_next_token() # var token_type = self.write_next_token() # type num_vars = 0 while self.tokenizer.look_ahead()[1] != ';': token = self.tokenizer.look_ahead() if token[0] == 'identifier': num_vars += 1 token_name = token[1] self.symbol_table.add_sub_var('var', token_type, token_name) self.write_next_token() self.write_next_token() # ; self.decrease_indent() self.add_closing_tag('varDec') return num_vars # statement* # letStatement | ifStatement | whileStatement | doStatement | returnStatement def compile_statements(self): self.add_opening_tag('statements') self.increase_indent() while self.tokenizer.look_ahead()[1] != '}': if self.tokenizer.look_ahead()[1] == 'do': self.compile_do() elif self.tokenizer.look_ahead()[1] == 'let': self.compile_let() elif self.tokenizer.look_ahead()[1] == 'while': self.compile_while() elif self.tokenizer.look_ahead()[1] == 'if': self.compile_if() elif self.tokenizer.look_ahead()[1] == 'return': self.compile_return() self.decrease_indent() self.add_closing_tag('statements') # subroutineCall: subroutineName '(' expressionList ')' | ( className | varName) '.' subroutineName '(' expressionList ')' def compile_subroutine_call(self): call_name = self.write_next_token() # subroutineName|className|varName # lookup in symbol table call_name symbol = self.symbol_table.sub_symbols.get(call_name, None) if not symbol: symbol = self.symbol_table.class_symbols.get(call_name, None) # entering expressionlist # subroutine case 2: show(x, y, z) (method) if self.tokenizer.look_ahead()[1] == '(': self.write_next_token() # ( # first push class as "this" self.VMwriter.write_push('pointer', 0) num_exp = self.compile_expression_list() + 1 self.write_next_token() # ) self.VMwriter.write_call( '{}.{}'.format(self.class_name, call_name), num_exp) # subroutineCall case 1: elif self.tokenizer.look_ahead()[1] == '.': self.write_next_token() # . method_name = self.write_next_token() # subroutineName self.write_next_token() # ( # game.run() if symbol: self.VMwriter.write_push(symbol['kind'], symbol['index']) call_name = symbol['type'] # Math.multiply(x, y) num_exp = self.compile_expression_list() if symbol: # this means we're operating on a method num_exp += 1 # writes the VM command that calls the function with number of args self.VMwriter.write_call('{}.{}'.format(call_name, method_name), num_exp) self.write_next_token() # ) # 'do' subroutineCall ';' def compile_do(self): self.add_opening_tag('doStatement') self.increase_indent() self.write_next_token() # do self.compile_subroutine_call() # subroutine call # throw away top stack item self.VMwriter.write_pop('temp', 0) self.write_next_token() # ; self.decrease_indent() self.add_closing_tag('doStatement') # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self.add_opening_tag('letStatement') self.increase_indent() self.write_next_token() # let token = self.write_next_token() # varName symbol = self.symbol_table.sub_symbols.get(token, None) if not symbol: symbol = self.symbol_table.class_symbols.get(token, None) # [ expression ] did_index = False if self.tokenizer.look_ahead()[1] == '[': # only push if we're indexing array self.VMwriter.write_push(symbol['kind'], symbol['index']) did_index = True self.write_next_token() # [ self.compile_expression() # expression self.write_next_token() # ] # add token and index (from compiled expression) self.VMwriter.write_arithmetic('+') self.write_next_token() # = # write expression and place on stack self.compile_expression() if did_index: self.VMwriter.write_pop('temp', 0) self.VMwriter.write_pop('pointer', 1) self.VMwriter.write_push('temp', 0) self.VMwriter.write_pop('that', 0) else: # pop symboltable[symbol] self.VMwriter.write_pop(symbol['kind'], symbol['index']) self.write_next_token() # ; self.decrease_indent() self.add_closing_tag('letStatement') # 'while' '(' expression ')' '{' statements '}' def compile_while(self): self.label_index += 1 label = self.label_index self.add_opening_tag('whileStatement') self.increase_indent() self.write_next_token() # while self.VMwriter.write_label('WHILE_COND{}'.format(label)) self.write_next_token() # ( self.compile_expression() self.write_next_token() # ) self.VMwriter.write_arithmetic('~', unary=True) self.VMwriter.write_if('END_WHILE{}'.format(label)) self.write_next_token() # { while self.tokenizer.look_ahead()[1] != '}': self.compile_statements() self.write_next_token() # } self.VMwriter.write_goto('WHILE_COND{}'.format(label)) self.VMwriter.write_label('END_WHILE{}'.format(label)) self.decrease_indent() self.add_closing_tag('whileStatement') # 'return' expression? ';' def compile_return(self): self.add_opening_tag('returnStatement') self.increase_indent() self.write_next_token() # return # returns nothing if self.tokenizer.look_ahead()[1] == ';': self.VMwriter.write_push('constant', 0) while self.tokenizer.look_ahead()[1] != ';': self.compile_expression() # write return self.VMwriter.write_return() self.write_next_token() # ; self.decrease_indent() self.add_closing_tag('returnStatement') # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? # if (expression) {s1} else {s2} # if (expression) {s1} ==> not expression; if-goto l1; s1; l1 def compile_if(self): self.label_index += 1 label = self.label_index self.add_opening_tag('ifStatement') self.increase_indent() self.write_next_token() # if self.write_next_token() # ( self.compile_expression() self.write_next_token() # ) self.VMwriter.write_if('IF_TRUE{}'.format(label)) self.VMwriter.write_goto('IF_FALSE{}'.format(label)) self.VMwriter.write_label('IF_TRUE{}'.format(label)) self.write_next_token() # { self.compile_statements() # s1 self.write_next_token() # } # [else] # goto end self.VMwriter.write_goto('IF_END{}'.format(label)) self.VMwriter.write_label('IF_FALSE{}'.format(label)) if self.tokenizer.look_ahead()[1] == 'else': # l1 self.write_next_token() # else # write label for else code executions self.write_next_token() # { self.compile_statements() # s2 self.write_next_token() # } self.VMwriter.write_label('IF_END{}'.format(label)) self.decrease_indent() self.add_closing_tag('ifStatement') # term (op term)* def compile_expression(self): self.add_opening_tag('expression') self.increase_indent() self.compile_term() # term while self.tokenizer.look_ahead()[1] in ops: self.tokenizer.look_ahead()[1] operation = self.write_next_token() # op self.compile_term() self.VMwriter.write_arithmetic(operation) self.decrease_indent() self.add_closing_tag('expression') # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term def compile_term(self): self.add_opening_tag('term') self.increase_indent() next_token = self.tokenizer.look_ahead() if next_token[1] in unary_ops: operation = self.write_next_token() # unaryOp self.compile_term() # term self.VMwriter.write_arithmetic(operation, unary=True) elif next_token[1] == '(': self.write_next_token() # ( self.compile_expression() # expression self.write_next_token() # ) else: # some sort of identifier is present first # subroutineCall if self.tokenizer.tokens[1][1] in {'.', '('}: self.compile_subroutine_call() # varname|intconstant|stringconstant|keywordconstant else: token = self.write_next_token() # write identifer token_type = self.tokenizer.get_token_type() symbol = self.symbol_table.sub_symbols.get(token, None) if not symbol: symbol = self.symbol_table.class_symbols.get(token, None) # varName ([expression])? if symbol and token != 'this': # evaluating a term, so needs to be on stack self.VMwriter.write_push(symbol['kind'], symbol['index']) # [ expression ] if self.tokenizer.look_ahead()[1] == '[': self.write_next_token() # [ self.compile_expression() # expression self.write_next_token() # ] # add token and index (from compiled expression) self.VMwriter.write_arithmetic('+') # pop pointer 1 (stores base address of that) self.VMwriter.write_pop('pointer', 1) # put dereferenced value of THAT on the stack self.VMwriter.write_push('that', 0) # intConstant, stringConstant, keywordConstant elif token_type == 'integerConstant': self.VMwriter.write_push('constant', token) elif token_type == 'stringConstant': self.VMwriter.write_push('constant', len(token)) self.VMwriter.write_call('String.new', 1) for char in token: self.VMwriter.write_push('constant', ord(char)) self.VMwriter.write_call('String.appendChar', 2) elif token_type == 'keyword': if token in {'null', 'false'}: self.VMwriter.write_push('constant', 0) elif token == 'true': self.VMwriter.write_push('constant', 0) self.VMwriter.write_arithmetic('~', unary=True) elif token == 'this': self.VMwriter.write_push('pointer', 0) else: raise Exception('Unkown keyword!') else: raise Exception('Unkown term!') self.decrease_indent() self.add_closing_tag('term') # (expression (',' expression)* )? def compile_expression_list(self): self.add_opening_tag('expressionList') self.increase_indent() num_exp = 0 while self.tokenizer.look_ahead()[1] not in set([')', ']']): if self.tokenizer.look_ahead()[1] == ',': self.write_next_token() # , else: self.compile_expression() num_exp += 1 self.decrease_indent() self.add_closing_tag('expressionList') return num_exp def increase_indent(self): self.indent += 1 def decrease_indent(self): self.indent -= 1 def write_token(self, token): token_str = token + "\n" self.contents.append(token_str) def add_opening_tag(self, tagname): tag_str = "\t" * self.indent + "<{}>\n".format(tagname) self.contents.append(tag_str) def add_closing_tag(self, tagname): tag_str = "\t" * self.indent + "</{}>\n".format(tagname) self.contents.append(tag_str)