class CompilationEngine(object): def __init__(self, jack_fname): self._jack_fname = jack_fname self._s_table = SymbolTable() self._writer = None self._class_name = None self._is_writing_void_func = None self._current_func_name = None self._n_labels = 0 def compile(self, out_fname: str) -> None: tknizer = Tokenizer(self._jack_fname) with VMWriter(out_fname) as writer: self._writer = writer token = self._compile_class(tknizer, tknizer.next_token()) if token: raise CompilationException( f"Expected end of file, found {token}") def _compile_class(self, tknizer, token): _assert(token, CLASS) token = tknizer.next_token() _assert_identifier(token) self._class_name = token.value _assert(tknizer.next_token(), "{") token = tknizer.next_token() while token.value in [STATIC, FIELD]: token = self._compile_class_var_dec(tknizer, token) while token.value in [CONSTRUCTOR, FUNCTION, METHOD]: token = self._compile_subroutine_dec(tknizer, token) _assert(token, "}") return tknizer.next_token() def _compile_class_var_dec(self, tknizer, token): _assert(token, [STATIC, FIELD]) if token.value == STATIC: kind = SymbolTable.STATIC else: kind = SymbolTable.FIELD token = tknizer.next_token() _assert_type(token) var_type = token.value self._record_symbol(tknizer.next_token(), var_type, kind) token = tknizer.next_token() while token.value == ",": self._record_symbol(tknizer.next_token(), var_type, kind) token = tknizer.next_token() _assert(token, ";") return tknizer.next_token() def _compile_subroutine_dec(self, tknizer, token): _assert(token, [CONSTRUCTOR, FUNCTION, METHOD]) subroutine_type = token.value token = tknizer.next_token() _assert_type(token, allow_void=True) self._is_writing_void_func = token.value == VOID self._s_table.start_subroutine(is_method=subroutine_type == METHOD) token = tknizer.next_token() _assert_identifier(token) subroutine_name = token.value self._current_func_name = subroutine_name _assert(tknizer.next_token(), "(") # populates symbol table with arguments token = self._compile_parameter_list(tknizer, tknizer.next_token()) _assert(token, ")") _assert(tknizer.next_token(), "{") token = tknizer.next_token() while token.value == VAR: # populates symbol table with local variabls token = self._compile_var_dec(tknizer, token) n_locals = self._s_table.var_count(SymbolTable.VAR) qualified_name = ".".join([self._class_name, subroutine_name]) self._writer.write_function(qualified_name, n_locals) if subroutine_type == CONSTRUCTOR: size = self._s_table.var_count(SymbolTable.FIELD) self._writer.write_push("constant", size) self._writer.write_call("Memory.alloc", 1) self._writer.write_pop("pointer", 0) elif subroutine_type == METHOD: self._writer.write_push("argument", 0) self._writer.write_pop("pointer", 0) token = self._compile_statements(tknizer, token) _assert(token, "}") self._is_writing_void_func = None self._s_table.complete_subroutine() return tknizer.next_token() def _compile_parameter_list(self, tknizer, token): if not (token.value in [INT, CHAR, BOOLEAN] or token.type == IDENTIFIER): return token while True: var_type = token.value self._record_symbol(tknizer.next_token(), var_type, SymbolTable.ARG) token = tknizer.next_token() if token.value == ",": token = tknizer.next_token() else: return token def _compile_var_dec(self, tknizer, token): _assert(token, VAR) token = tknizer.next_token() _assert_type(token) var_type = token.value token = tknizer.next_token() self._record_symbol(token, var_type, SymbolTable.VAR) token = tknizer.next_token() while token.value == ",": self._record_symbol(tknizer.next_token(), var_type, SymbolTable.VAR) token = tknizer.next_token() _assert(token, ";") return tknizer.next_token() def _compile_statements(self, tknizer, token): while token.value in [LET, IF, WHILE, DO, RETURN]: method = getattr(self, f"_compile_{token.value}") token = method(tknizer, token) return token def _compile_let(self, tknizer, token): _assert(token, LET) token = tknizer.next_token() _assert_identifier(token) var_name = token.value token = tknizer.next_token() if token.value == "[": token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, "]") self._push_variable(var_name) self._writer.write_add() _assert(tknizer.next_token(), "=") token = self._compile_expression(tknizer, tknizer.next_token()) self._writer.write_pop("temp", 0) self._writer.write_pop("pointer", 1) self._writer.write_push("temp", 0) self._writer.write_pop("that", 0) else: _assert(token, "=") token = self._compile_expression(tknizer, tknizer.next_token()) self._pop_variable(var_name) _assert(token, ";") return tknizer.next_token() def _compile_if(self, tknizer, token): _assert(token, IF) _assert(tknizer.next_token(), "(") token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, ")") _assert(tknizer.next_token(), "{") self._writer.write_push("constant", 0) self._writer.write_equals() false_label = self._allocate_label("IF_FALSE") self._writer.write_if(false_label) token = self._compile_statements(tknizer, tknizer.next_token()) _assert(token, "}") token = tknizer.next_token() if token.value == ELSE: skip_else_label = self._allocate_label("SKIP_ELSE") self._writer.write_goto(skip_else_label) self._writer.write_label(false_label) _assert(tknizer.next_token(), "{") token = self._compile_statements(tknizer, tknizer.next_token()) _assert(token, "}") token = tknizer.next_token() self._writer.write_label(skip_else_label) else: self._writer.write_label(false_label) return token def _compile_while(self, tknizer, token): _assert(token, WHILE) _assert(tknizer.next_token(), "(") true_label = self._allocate_label("WHILE_TRUE") self._writer.write_label(true_label) token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, ")") _assert(tknizer.next_token(), "{") self._writer.write_push("constant", 0) self._writer.write_equals() false_label = self._allocate_label("WHILE_FALSE") self._writer.write_if(false_label) token = self._compile_statements(tknizer, tknizer.next_token()) _assert(token, "}") self._writer.write_goto(true_label) self._writer.write_label(false_label) return tknizer.next_token() def _compile_do(self, tknizer, token): _assert(token, DO) token = self._compile_subroutine_call(tknizer, tknizer.next_token()) _assert(token, ";") self._writer.write_pop("temp", 0) return tknizer.next_token() def _compile_return(self, tknizer, token): _assert(token, RETURN) token = tknizer.next_token() if self._is_writing_void_func is True: _assert(token, ";") self._writer.write_push("constant", 0) elif self._is_writing_void_func is False: if token.value == THIS: self._writer.write_push("pointer", 0) token = tknizer.next_token() else: token = self._compile_expression(tknizer, token) _assert(token, ";") else: raise CompilationEngine( "Encountered return statement outside function") self._writer.write_return() return tknizer.next_token() def _compile_subroutine_call(self, tknizer, first_token): _assert_identifier(first_token) is_method = False second_token = tknizer.next_token() if second_token.value == ".": token = tknizer.next_token() _assert_identifier(token) if self._s_table.has(first_token.value): # method call on another object is_method = True class_name = self._s_table.type_of(first_token.value) subroutine_name = ".".join([class_name, token.value]) self._push_variable(first_token.value) else: # constructor or class function subroutine_name = ".".join([first_token.value, token.value]) token = tknizer.next_token() else: # method call on this object is_method = True subroutine_name = ".".join([self._class_name, first_token.value]) self._writer.write_push("pointer", 0) token = second_token _assert(token, "(") n_args = 1 if is_method else 0 token = tknizer.next_token() if token.value != ")": token = self._compile_expression(tknizer, token) n_args += 1 while token.value == ",": token = self._compile_expression(tknizer, tknizer.next_token()) n_args += 1 _assert(token, ")") self._writer.write_call(subroutine_name, n_args) return tknizer.next_token() def _compile_expression(self, tknizer, token): token = self._compile_term(tknizer, token) ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="] while token.value in ops: op = token.value token = self._compile_term(tknizer, tknizer.next_token()) if op == "+": self._writer.write_add() elif op == "-": self._writer.write_sub() elif op == "*": self._writer.write_call("Math.multiply", 2) elif op == "/": self._writer.write_call("Math.divide", 2) elif op == "&": self._writer.write_and() elif op == "|": self._writer.write_or() elif op == "<": self._writer.write_less_than() elif op == ">": self._writer.write_greater_than() elif op == "=": self._writer.write_equals() else: raise Exception(f"Bug: no case for op {token.value}") return token def _compile_term(self, tknizer, token): if token.type == INT_CONSTANT: self._writer.write_push("constant", token.value) return tknizer.next_token() elif token.type == STRING_CONSTANT: str_val = token.value[1:-1] self._writer.write_push("constant", len(str_val)) self._writer.write_call("String.new", 1) for char in str_val: self._writer.write_push("constant", ord(char)) self._writer.write_call("String.appendChar", 2) return tknizer.next_token() elif token.type == KEYWORD and token.value in [ TRUE, FALSE, NULL, THIS ]: if token.value == TRUE: self._writer.write_push("constant", 1) self._writer.write_neg() elif token.value in [FALSE, NULL]: self._writer.write_push("constant", 0) elif token.value == THIS: self._writer.write_push("argument", 0) else: raise Exception(f"Bug: unexpected keyword {token.value}") return tknizer.next_token() elif token.value == "(": token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, ")") return tknizer.next_token() elif token.value in ["-", "~"]: next_token = self._compile_term(tknizer, tknizer.next_token()) if token.value == "-": self._writer.write_neg() elif token.value == "~": self._writer.write_not() else: raise CompilationException( f"Bug: Unexpected unary op {token.value}") return next_token else: next_token = tknizer.next_token() if next_token.value == "[": _assert_identifier(token) array_var_name = token.value token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, "]") self._push_variable(array_var_name) self._writer.write_add() self._writer.write_pop("pointer", 1) self._writer.write_push("that", 0) return tknizer.next_token() elif next_token.value in ["(", "."]: tknizer.rewind() return self._compile_subroutine_call(tknizer, token) else: _assert_identifier(token) if not self._s_table.has(token.value): raise CompilationException( f"Unknown variable {token.value}") self._push_variable(token.value) return next_token def _push_variable(self, var_name): idx = self._s_table.index_of(var_name) kind = self._s_table.kind_of(var_name) if kind == SymbolTable.STATIC: self._writer.write_push(STATIC, idx) elif kind == SymbolTable.FIELD: self._writer.write_push(THIS, idx) elif kind == SymbolTable.ARG: self._writer.write_push("argument", idx) elif kind == SymbolTable.VAR: self._writer.write_push("local", idx) else: raise Exception(f"Bug: unexpected variable kind {kind}") def _pop_variable(self, var_name): idx = self._s_table.index_of(var_name) kind = self._s_table.kind_of(var_name) if kind == SymbolTable.STATIC: self._writer.write_pop(STATIC, idx) elif kind == SymbolTable.FIELD: self._writer.write_pop(THIS, idx) elif kind == SymbolTable.ARG: self._writer.write_pop("argument", idx) elif kind == SymbolTable.VAR: self._writer.write_pop("local", idx) else: raise Exception(f"Bug: unexpected variable kind {kind}") def _allocate_label(self, label_name): label = "{cls}.{func}${name}${id}".format( cls=self._class_name, func=self._current_func_name, name=label_name, id=self._n_labels, ) self._n_labels += 1 return label def _record_symbol(self, token, typ, kind): if token.type != IDENTIFIER: raise CompilationException(f"Expected an {IDENTIFIER}, " f'found {token.type}: "{token.value}"') self._s_table.define(token.value, typ, kind)