class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # symbol table symbol_table = None # vm writer vm_writer = None # the class name class_name = "" # indicies for if and while loops # start at -1 because we increment before use while_index = -1 if_index = -1 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file) # compiles a complete class and closes the output file def compile_class(self): # class keyword tt, t = self._token_next(True, "KEYWORD", "class") # name of class tt, t = self._token_next(True, "IDENTIFIER") self.class_name = t # open brace tt, t = self._token_next(True, "SYMBOL", "{") # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") # done with compilation; close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # compile the variable declaration # False means this is a class (not a subroutine) self.compile_var_dec(False) # compiles a complete method, function, or constructor def compile_subroutine(self): # start of subroutine self.symbol_table.start_subroutine() # constructor, function, or method keyword tt, type = self._token_next(False, "KEYWORD") # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) # name of the method/function/constructor tt, name = self._token_next(True) name = self.class_name + "." + name # if the type is a method, "define" this as an argument, so the other # argument indexes work correctly if type == "method": self.symbol_table.define("this", self.class_name, SymbolTable.ARG) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # write the function num_locals = self.symbol_table.var_count(self.symbol_table.VAR) self.vm_writer.write_function(name, num_locals) # write any special code at the top of the function if type == "constructor": # code to allocate memory and set "this" size = self.symbol_table.var_count(self.symbol_table.FIELD) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) elif type == "function": # nothing special pass elif type == "method": # put argument 0 into pointer 0 (this) self.vm_writer.write_push(self.vm_writer.ARG, 0) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) else: print "WARNING: Expected constructor, function, or name; got", type # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, type = self._token_next(False) # identifier (variable name) tt, name = self._token_next(True) # the kind is always an arg, since these are all parameters to the # function kind = SymbolTable.ARG # define the variable in the symbol table self.symbol_table.define(name, type, kind) # possible comma tt, t = self._token_next(True) if tt != "SYMBOL" or t != ",": # not a comma; stop processing parameters break self.tokenizer.advance() # compiles a var declaration # if subroutine is true, only the var keyword can be used # if subroutine is false, only the static and field keywords can be used def compile_var_dec(self, subroutine=True): # the keyword to start the declaration tt, kind = self._token_next(False, "KEYWORD") # check for required types if subroutine: if kind == "var": kind = SymbolTable.VAR else: print "WARNING: expecting var, but received %s" % (str(kind)) else: if kind == "static": kind = SymbolTable.STATIC elif kind == "field": kind = SymbolTable.FIELD else: print "WARNING: expecting static or field, but received %s" % (str(kind)) # type of the declaration # could be an identifier or a keyword (int, etc) tt, type = self._token_next(True) # name of the declaration tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # another variable name follows tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break # compiles a do statement def compile_do(self): # do keyword tt, t = self._token_next(False, "KEYWORD", "do") # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # do statements do not have a return value, so eliminate the return # off of the stack self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a let statement def compile_let(self): # let keyword tt, t = self._token_next(False, "KEYWORD", "let") # variable name tt, name = self._token_next(True, "IDENTIFIER") # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # array - write operation array = True # compile the offset expression self.tokenizer.advance() self.compile_expression() # write the base address onto the stack segment, index = self._resolve_symbol(name) self.vm_writer.write_push(segment, index) # add base and offset self.vm_writer.write_arithmetic("add") # we cannot yet put the result into pointer 1, since the read # operation (which hasn't been parsed/computed yet) may use pointer 1 # to read from an arrya value # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() else: array = False # equals sign tt, t = self._token_next(False, "SYMBOL", "=") # expression self.tokenizer.advance() self.compile_expression() if array: # our stack now looks like this: # TOP OF STACK # computed result to store # address in which value should be stored # ... previous stuff ... # pop the computed value to temp 0 self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # pop the array address to pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # put the computed value back onto the stack self.vm_writer.write_push(self.vm_writer.TEMP, 0) # pop to the variable name or the array reference self.vm_writer.write_pop(self.vm_writer.THAT, 0) else: # not an array - pop the expression to the variable segment, index = self._resolve_symbol(name) self.vm_writer.write_pop(segment, index) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a while statement def compile_while(self): # labels for this while loop self.while_index += 1 while_start = "WHILE_START_%d" % (self.while_index) while_end = "WHILE_END_%d" % (self.while_index) # while keyword tt, t = self._token_next(False, "KEYWORD", "while") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # label for the start of the while statement self.vm_writer.write_label(while_start) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto to the end of the loop # to do this, negate and then call if-goto self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(while_end) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") # after the last statement of the while loop # need to jump back up to the top of the loop to evaluate again self.vm_writer.write_goto(while_start) # label at the end of the loop self.vm_writer.write_label(while_end) self.tokenizer.advance() # compiles a return statement def compile_return(self): # return keyword tt, t = self._token_next(False, "KEYWORD", "return") # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() else: # no return expression; return 0 self.vm_writer.write_push(self.vm_writer.CONST, 0) # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.vm_writer.write_return() self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): # it is more efficient in an if-else case to have the else portion first # in the code when testing, but we use the less-efficient but # easier-to-write true-false pattern here # labels for this if statement self.if_index += 1 if_false = "IF_FALSE_%d" % (self.if_index) if_end = "IF_END_%d" % (self.if_index) # if keyword tt, t = self._token_next(False, "KEYWORD", "if") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto the false label # if true, fall through to executing code # if there is no else, then false and end are the same, but having two # labels does not increase code size self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements for true portion self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # goto the end of the if statement at the end of the true portion self.vm_writer.write_goto(if_end) # label for the start of the false portion self.vm_writer.write_label(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") # end label self.vm_writer.write_label(if_end) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() else: # no else portion; only put in a label for false, since end is not # used self.vm_writer.write_label(if_false) # compiles an expression (one or more terms connected by operators) def compile_expression(self): # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator # postfix order - add the next term and then do the operator # the next term self.tokenizer.advance() self.compile_term() # the operator if t == "+": self.vm_writer.write_arithmetic("add") if t == "-": self.vm_writer.write_arithmetic("sub") if t == "=": self.vm_writer.write_arithmetic("eq") if t == ">": self.vm_writer.write_arithmetic("gt") if t == "<": self.vm_writer.write_arithmetic("lt") if t == "&": self.vm_writer.write_arithmetic("and") if t == "|": self.vm_writer.write_arithmetic("or") if t == "*": self.vm_writer.write_call("Math.multiply", 2) if t == "/": self.vm_writer.write_call("Math.divide", 2) else: # no term found; done parsing the expression break # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt == "INT_CONST": self.vm_writer.write_push(self.vm_writer.CONST, t) # advance for the next statement self.tokenizer.advance() elif tt == "STRING_CONST": # after this portion is run, a pointer to a string should be on the # stack # we create a new string of a certain size and then append characters # one by one; each append operation returns the pointer to the same # string # create the string # string is a len, data tuple; not null-terminated size = len(t) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("String.new", 1) # append each character for char in t: self.vm_writer.write_push(self.vm_writer.CONST, ord(char)) self.vm_writer.write_call("String.appendChar", 2) # advance for the next statement self.tokenizer.advance() elif tt == "KEYWORD": if t == "true": # true is -1, which is 0 negated self.vm_writer.write_push(self.vm_writer.CONST, 0) self.vm_writer.write_arithmetic("not") elif t == "false" or t == "null": self.vm_writer.write_push(self.vm_writer.CONST, 0) elif t == "this": self.vm_writer.write_push(self.vm_writer.POINTER, 0) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # postfix order - add the next term and then do the operator # parse the rest of the term self.tokenizer.advance() self.compile_term() # write the unary operation if t == "-": self.vm_writer.write_arithmetic("neg") elif t == "~": self.vm_writer.write_arithmetic("not") elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array - read operation # write the base address onto the stack segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) # compile the offset expression self.tokenizer.advance() self.compile_expression() # add base and offset self.vm_writer.write_arithmetic("add") # put the resulting address into pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # read from that 0 onto the stack self.vm_writer.write_push(self.vm_writer.THAT, 0) # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) else: # unknown print "WARNING: Unknown term expression object:", tt, t # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): num_args = 0 # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() num_args += 1 # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self.tokenizer.advance() else: # not a comma; stop processing parameters break return num_args # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, name1 = self._token_next(False, "IDENTIFIER") # a dot and another name may exist, or it could be a parenthesis name2 = None tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": # the name after the dot tt, name2 = self._token_next(True, "IDENTIFIER") # advance so that we are on the parenthesis self.tokenizer.advance() # determine if this is a method call # three possibilities # - class.func() - function call # - var.func() - method call # - func() - method call on current object if self.symbol_table.contains(name1): method_call = True local_call = False elif name2 == None: method_call = True local_call = True else: method_call = False # if a method call, push variable name1 # this a method call if the symbol table contains name1 and name2 exists # OR name1 is a method in the current object if method_call and local_call: # push the current object onto the stack as a hidden argument self.vm_writer.write_push(self.vm_writer.POINTER, 0) elif method_call and not local_call: # push the variable onto the stack as a hidden argument segment, index = self._resolve_symbol(name1) self.vm_writer.write_push(segment, index) # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") # expression list self.tokenizer.advance() num_args = self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # write the call if method_call and local_call: # methd + <blank> # get the name of the vm function to call classname = self.class_name vm_function_name = classname + "." + name1 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) elif method_call and not local_call: # variable name + method # get the name of the vm function to call classname = self.symbol_table.get(name1)[1] vm_function_name = classname + "." + name2 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) else: # get the name of the vm function to call vm_function_name = name1 + "." + name2 # make the call self.vm_writer.write_call(vm_function_name, num_args) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # convets a symbol table type into a segment type def _type_to_segment(self, type): if type == self.symbol_table.STATIC: return self.vm_writer.STATIC elif type == self.symbol_table.FIELD: return self.vm_writer.THIS elif type == self.symbol_table.ARG: return self.vm_writer.ARG elif type == self.symbol_table.VAR: return self.vm_writer.LOCAL else: print "ERROR: Bad type %s" % (str(type)) # resolves the symbol from the symbol table # the segment and index is returned as a 2-tuple def _resolve_symbol(self, name): kind, type, index = self.symbol_table.get(name) return self._type_to_segment(kind), index
class CompilationEngine(): ''' Parses a stream of jack tokens recursively. ''' def __init__(self, tokenizer): self._name = tokenizer.get_filename().replace('.jack','') # tokenizer for input self._tokenizer = tokenizer # symbol table self._symbols = SymbolTable() # vm output fiole self._writer = VMWriter(self._name + '.vm') # Input should be a tokenized .jack file containing one class assert self._tokenizer.has_more_tokens() self._tokenizer.advance() self._class = None self._subroutine = None self._counter = 0 self.compile_class() self.close() def change_name(self, name): self._name = name def get_name(self, name): return self._name def get_token(self): return self._tokenizer._token def get_type(self): return self._tokenizer._type def close(self): # close the output file at the end self._writer.close() def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' # keyword - class assert self._tokenizer.keyword() == 'class' self._tokenizer.advance() # identifier - className assert self._tokenizer.identifier() self._class = self._tokenizer.identifier() self._tokenizer.advance() # sybmol - '{' assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token() self._tokenizer.advance() # classVarDec* while self._tokenizer.is_valid_class_variable(): self.compile_class_var() # subroutineBody* while self._tokenizer.is_valid_subroutine(): self.compile_subroutine() # sybmol - '}' assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token() self._tokenizer.advance() # assuming .jack file is properly formatted, there should be no more tokens assert not self._tokenizer.has_more_tokens() def compile_class_var(self): # ('static'|'field') type varName (',' varName)* ';' assert self._tokenizer.is_valid_class_variable() # keyword - 'static' or 'field' temp_kind = self._tokenizer.get_token() self._tokenizer.advance() # type - 'int' or 'char' or 'boolean' or className assert self._tokenizer.is_valid_type() temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, temp_kind) self._tokenizer.advance() # recursively check for (',' varName)* structure while self._tokenizer.symbol() == ',': self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, temp_kind) # symbol - ',' or ';' self._tokenizer.advance() # next token should be a ';' assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token() self._tokenizer.advance() def compile_subroutine(self): # ('constructor'|'method'|'function') ('void'| type) subroutineName '(' parameterList ')' subroutineBody assert self._tokenizer.is_valid_subroutine() self._symbols.start_subroutine() # keyword - constructor or method or function self._subroutine = self._tokenizer.get_token() if self._subroutine == 'method': # in the case of method, add 'this' to symbol table self._symbols.define('this', self._class, 'argument') self._tokenizer.advance() # keyword - type or void assert self._tokenizer.is_valid_subroutine_type() self._tokenizer.advance() # identifier - subroutineName assert self._tokenizer.identifier() temp_name = self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() # parameterList if self._tokenizer.is_valid_type(): self.compile_parameter_list() # symbol - '(' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() temp_name = self._class + '.' + temp_name # symbol - '{' assert self._tokenizer.symbol() == '{' # subroutineBody self.compile_subroutine_body(temp_name) self._writer.write_comment('end subroutine ' + temp_name) def compile_parameter_list(self): # ( (type varName) (',' type varName)* )? # only called if non-empty parameter list assert self._tokenizer.is_valid_type() # type - int or char or boolean or className temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'argument') self._tokenizer.advance() while self._tokenizer.symbol() == ',': # symbol - ',' self._tokenizer.advance() assert self._tokenizer.is_valid_type() # type - int or char or boolean or className temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'argument') self._tokenizer.advance() # symbol - ')' assert self._tokenizer.symbol() == ')' def compile_subroutine_body(self, name): # '{' varDec* statements '}' # symbol - '{' assert self._tokenizer.symbol() == '{' self._tokenizer.advance() # varDec num_locals = 0 while self._tokenizer.keyword() == 'var': # remember that compiling variables writes NO vm code num_locals += self.compile_var() self._writer.write_function(name, num_locals) if self._subroutine == 'method': # set this, in the case of a method self._writer.write_push('argument',0) self._writer.write_pop('pointer',0) elif self._subroutine == 'constructor': # allocate object self._writer.write_object_alloc(self._symbols.var_count('field')) # statements self.compile_statements() # symbol - '{' assert self._tokenizer.symbol() == '}' self._tokenizer.advance() def compile_var(self): # 'var' type varName (',' varName)* ';' assert self._tokenizer.is_valid_variable() # keyword - 'var' self._tokenizer.advance() # type - int or char or boolean or className assert self._tokenizer.is_valid_type() temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'local') num_locals = 1 self._tokenizer.advance() while self._tokenizer.symbol() == ',': # symbol - ',' self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'local') num_locals += 1 self._tokenizer.advance() # symbol - ';' assert self._tokenizer.symbol() == ';' self._tokenizer.advance() return num_locals def compile_statements(self): # statement* while self._tokenizer.is_valid_statement(): if self._tokenizer.keyword() == 'let': # letStatement self.compile_let() elif self._tokenizer.keyword() == 'if': # ifStatement self.compile_if() elif self._tokenizer.keyword() == 'while': # whileStatement self.compile_while() elif self._tokenizer.keyword() == 'do': # doStatement self.compile_do() elif self._tokenizer.keyword() == 'return': # returnStatement self.compile_return() # symbol - '}' assert self._tokenizer.symbol() == '}' def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' # keyword - 'let' assert self._tokenizer.keyword() == 'let' self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() if self._tokenizer.peek() == '=': # varName '=' expression ';' var_kind = self._symbols.kind_of(self._tokenizer.identifier()) var_index = self._symbols.index_of(self._tokenizer.identifier()) self._tokenizer.advance() # next token is '=' self._tokenizer.advance() # evaluate RHS expression, pop into variable self.compile_expression() if var_kind == 'field': self._writer.write_pop('this', var_index) else: self._writer.write_pop(var_kind, var_index) # expression ends with a ';' assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token() self._tokenizer.advance() elif self._tokenizer.peek() == '[': # varName '[' expression ']' '=' expression ';' # write base address to stack self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()), self._symbols.index_of(self._tokenizer.identifier())) self._tokenizer.advance() # symbol - '[' self._tokenizer.advance() # expression - represents array index self.compile_expression() # base address + array index self._writer.write_arithmetic('add') # symbol - '[' assert self._tokenizer.symbol() == ']' self._tokenizer.advance() # symbol - '=' assert self._tokenizer.symbol() == '=' self._tokenizer.advance() # expression self.compile_expression() # pop RHS value into temp segment self._writer.write_pop('temp', 1) # align that with array[i] self._writer.write_pop('pointer', 1) # push value of RHS expression onto stack self._writer.write_push('temp', 1) # pop value into correct array index self._writer.write_pop('that', 0) # symbol - ';' assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token() self._tokenizer.advance() def compile_if(self): # 'if' '(' expression ')' ('else' '{' statements '}')? # keyword - if assert self._tokenizer.keyword() == 'if' self._writer.write_comment('if statement') self._tokenizer.advance() # symbol - ( assert self._tokenizer.symbol() == '(', "expected '(' but got " + self.get_token() self._tokenizer.advance() # expression self.compile_expression() self._writer.write_arithmetic('not') label_num = str(self._counter) self._counter += 1 self._writer.write_if('ELSE'+label_num) # symbol - ) assert self._tokenizer.symbol() == ')', "expected '(' but got " + self.get_token() self._tokenizer.advance() # symbol - '{' assert self._tokenizer.symbol() == '{' self._tokenizer.advance() # statements self.compile_statements() # symbol - '}' assert self._tokenizer.symbol() == '}' self._tokenizer.advance() self._writer.write_goto('IF'+label_num) self._writer.write_label('ELSE'+label_num) # check for else if self._tokenizer.keyword() == 'else': # 'else' '{' statements '}' # keyword - 'else' self._tokenizer.advance() # symbol - '{' assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token() self._tokenizer.advance() # statements self.compile_statements() # symbol - '}' assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token() self._tokenizer.advance() self._writer.write_label('IF'+label_num) def compile_while(self): # 'while' '(' expression ')' '{' statements '}' # keyword - 'while' assert self._tokenizer.keyword() == 'while' # labels for ifgoto and goto vm commands label_num = str(self._counter) self._counter += 1 self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() self._writer.write_label('WHILE'+label_num) # expression self.compile_expression() self._writer.write_arithmetic('not') self._writer.write_if('ELSE'+label_num) # symbol - ')' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() # symbol - '{' assert self._tokenizer.symbol() == '{' self._tokenizer.advance() # statements self.compile_statements() self._writer.write_goto('WHILE'+label_num) self._writer.write_label('ELSE'+label_num) # symbol - '}' assert self._tokenizer.symbol() == '}' self._tokenizer.advance() def compile_do(self): # 'do' subroutineCall ';' assert self._tokenizer.keyword() == 'do' # keyword - 'do' self._tokenizer.advance() # identifier - subroutineCall assert self._tokenizer.identifier() # outer subroutine must be void function self.compile_subroutine_call() # symbol - ';' assert self._tokenizer.symbol() == ';' # discard void function default return value self._writer.write_pop('temp',0) self._tokenizer.advance() def compile_return(self): # 'return' expression? ';' # keyword - 'return' assert self._tokenizer.keyword() == 'return' self._writer.write_comment('return statement') self._tokenizer.advance() # expression? if self._tokenizer.symbol() == ';': # symbol - ';' (void function) self._writer.write_push('constant', 0) self._tokenizer.advance() else: # expression (not void) self.compile_expression() # symbol - ';' assert self._tokenizer.symbol() == ';' self._tokenizer.advance() self._writer.write_return() def compile_expression(self): # term (op term)* # term self.compile_term() # check for op while self._tokenizer.is_valid_operator(): # op temp_op = self._tokenizer.symbol() self._tokenizer.advance() # term self.compile_term() # write operator vm command, postfix order self._writer.write_operator(temp_op) def compile_term(self): # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression']' | subroutineCall | '(' expression ')' | unaryOp term if self._tokenizer.int_value() is not None: # integerConstant self._writer.write_push('constant', self._tokenizer.int_value()) self._tokenizer.advance() elif self._tokenizer.string_value() is not None: # stringConstant self._writer.write_string_constant(self._tokenizer.string_value()) self._tokenizer.advance() elif self._tokenizer.keyword() is not None: # keywordConstant self._writer.write_keyword_constant(self._tokenizer.keyword()) self._tokenizer.advance() elif self._tokenizer.symbol() == '(': # '(' expression ')' self._tokenizer.advance() self.compile_expression() assert self._tokenizer.symbol() == ')' self._tokenizer.advance() elif self._tokenizer.is_valid_unary(): # unaryOp term temp_op = self._tokenizer.symbol() self._tokenizer.advance() # term self.compile_term() # write operator vm command, postfix order self._writer.write_unary(temp_op) elif self._tokenizer.identifier() and self._tokenizer.peek() == '[': # varName '[' expression']' # process array name, push associated value onto stack self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()), self._symbols.index_of(self._tokenizer.identifier())) self._tokenizer.advance() # process [ symbol self._tokenizer.advance() # expects expression, value is pushed onto the stack self.compile_expression() # setup pointer to array element self._writer.write_operator('+') self._writer.write_pop('pointer', 1) # push array value onto stack self._writer.write_push('that', 0) # expects closing square bracket assert self._tokenizer.symbol() == ']' self._tokenizer.advance() elif self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.']: # subroutineCall self.compile_subroutine_call() elif self._symbols.exists(self._tokenizer.identifier()): # varName var_name = self._tokenizer.identifier() var_kind = self._symbols.kind_of(var_name) var_index = self._symbols.index_of(var_name) if var_kind == 'field': # push field var onto stack self._writer.write_push('this', var_index) else: self._writer.write_push(var_kind, var_index) self._tokenizer.advance() else: assert False, "unknown token: " + self.get_token() + " with type " + self.get_type() def compile_subroutine_call(self): # subroutineName '(' expressionList ')'| (className | varName) '.' subroutineName '(' expressionList ')' assert self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.'] if self._tokenizer.identifier() and self._tokenizer.peek() == '(': # subroutineName '(' expressionList ')' # method (in current class) temp_name = self._class + '.' + self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' self._tokenizer.advance() # push this onto the stack self._writer.write_push('pointer',0) temp_nargs = 1 # expressionList temp_nargs += self.compile_expression_list() self._writer.write_call(temp_name, temp_nargs) # symbol - ')' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() elif self._symbols.exists(self._tokenizer.identifier()) and self._tokenizer.peek() == '.': # varName '.' subroutineName '(' expressionList ')' # varName (object) temp_name = self._tokenizer.identifier() # push object address onto stack, this is an implicit argument if self._symbols.kind_of(temp_name) == 'field': self._writer.write_push('this', self._symbols.index_of(temp_name)) else: self._writer.write_push(self._symbols.kind_of(temp_name), self._symbols.index_of(temp_name)) # change name to class name temp_name = self._symbols.type_of(temp_name) temp_nargs = 1 self._tokenizer.advance() # symbol - '.' temp_name += self._tokenizer.get_token() self._tokenizer.advance() # subroutineName assert self._tokenizer.identifier() temp_name += self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() # expressionList temp_nargs += self.compile_expression_list() self._writer.write_call(temp_name, temp_nargs) # symbol - '(' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() elif self._tokenizer.identifier() and self._tokenizer.peek() == '.': # className . subroutineName '(' expressionList ')' # className temp_name = self._tokenizer.identifier() self._tokenizer.advance() # symbol - '.' temp_name += self._tokenizer.get_token() self._tokenizer.advance() # subroutineName assert self._tokenizer.identifier(), print(self._tokenizer._tokens) temp_name += self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() # expressionList temp_nargs = self.compile_expression_list() self._writer.write_call(temp_name, temp_nargs) # symbol - ')' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() def compile_expression_list(self): # (expression ( ',' expression)* )? temp_nargs = 0 while self._tokenizer.symbol() != ')': self.compile_expression() temp_nargs += 1 if self._tokenizer.symbol() == ',': # there is another expression in the list self._tokenizer.advance() return temp_nargs