class Engine(object): def __init__(self, tokens, filepath): # compilation engine init self.lex = tokens self.symbols = SymbolTable() self.vm = VMWriter(filepath) self.compile_class() self.vm.closeout() # Routines to advance the token def _require(self, tok, val=None): lextok, lexval = self._advance() if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval: raise Exception(self._require_failed_msg(tok, val)) else: return lexval def _require_failed_msg(self, tok, val): if val is None: val = token_list[tok] return 'Expected: {0}, {1} \ntoken is: {2}'.format(tok, val, self.lex.tokens) def _advance(self): return self.lex.advance() def vm_function_name(self): return self._cur_class + '.' + self._cur_subroutine def vm_push_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_push(segments[kind], index) def vm_pop_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_pop(segments[kind], index) def load_this_ptr(self, kwd): if kwd == KW_METHOD: self.vm.push_arg(0) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object elif kwd == KW_CONSTRUCTOR: self.vm.push_const(self.symbols.var_count(SK_FIELD)) # object size self.vm.write_call('Memory.alloc', 1) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object def write_func_decl(self, kwd): self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR)) self.load_this_ptr(kwd) def write_string_const_init(self, val): self.vm.push_const(len(val)) self.vm.write_call('String.new', 1) # String.new(len(str)) for c in val: self.vm.push_const(ord(c)) self.vm.write_call('String.appendChar', 2) # String.appendChar(nextchar) label_num = 0 def new_label(self): self.label_num += 1 return 'label' + str(self.label_num) # ------------- verify part ---------------- def _is_token(self, tok, val=None): lextok, lexval = self.lex.peek() return val == None and lextok == tok or (lextok, lexval) == (tok, val) def _is_keyword(self, *keywords): lextok, lexval = self.lex.peek() return lextok == T_KEYWORD and lexval in keywords def _is_sym(self, symbols): lextok, lexval = self.lex.peek() return lextok == T_SYM and lexval in symbols # Variable declarations def _is_class_var_dec(self): return self._is_keyword(KW_STATIC, KW_FIELD) def _is_type(self): return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN) # Subroutine declarations def _is_subroutine(self): return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _is_var_dec(self): return self._is_keyword(KW_VAR) def _is_let(self): return self._is_keyword(KW_LET) def _is_if(self): return self._is_keyword(KW_IF) def _is_while(self): return self._is_keyword(KW_WHILE) def _is_do(self): return self._is_keyword(KW_DO) def _is_return(self): return self._is_keyword(KW_RETURN) def _is_statement(self): return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return() def _is_const(self): return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant() def _is_keyword_constant(self): return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) def _is_op(self): return self._is_sym('+-*/&|<>=') def _is_unary_op(self): return self._is_sym('-~') def _is_var_name(self): return self._is_token(T_ID) def _is_builtin_type(self, type): return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID] def _is_term(self): return self._is_const() or self._is_var_name() or self._is_sym('(') or self._is_unary_op() # --------------- compile part ----------------- # Parser and compile Jack code # class: 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): self._require(T_KEYWORD, KW_CLASS) self.compile_class_name() self._require(T_SYM, '{') while self._is_class_var_dec(): self.compile_class_var_dec() while self._is_subroutine(): self.compile_subroutine() self._require(T_SYM, '}') # className: identifier def compile_class_name(self): self._cur_class = self.compile_var_name() # Class names don't have to go into the symbol table # type varName (',' varName)* ';' def _compile_dec(self, kind): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, kind) while self._is_sym(','): self._advance() name = self.compile_var_name() self.symbols.define(name, type, kind) self._require(T_SYM, ';') def compile_type(self): """ type: 'int' | 'char' | 'boolean' | className """ if self._is_type(): return self._advance()[1] else: raise ValueError(self._require_failed_msg(*self.lex.peek())) # classVarDec: {'static'|'field'} type varName (',' varName)* ';' def compile_class_var_dec(self): tok, kwd = self._advance() # static | field self._compile_dec(kwd_to_kind[kwd]) # varName: identifier def compile_var_name(self): return self._require(T_ID) # subroutineDec: ('constructor'|'function'|'method') ('void'|type) # subroutineName '(' parameterList ')' subroutineBody def compile_subroutine(self): tok, kwd = self._advance() type = self.compile_void_or_type() self.compile_subroutine_name() self.symbols.start_subroutine() if kwd == KW_METHOD: self.symbols.define('this', self._cur_class, SK_ARG) self._require(T_SYM, '(') self.compile_parameter_list() self._require(T_SYM, ')') self.compile_subroutine_body(kwd) # 'void' | type def compile_void_or_type(self): if self._is_keyword(KW_VOID): return self._advance()[1] else: return self.compile_type() # subroutineName: identifier def compile_subroutine_name(self): self._cur_subroutine = self.compile_var_name() # subroutine names don't have to go in the symbol table # parameterList: (parameter (',' parameter)*)? def compile_parameter_list(self): if self._is_type(): self.compile_parameter() while self._is_sym(','): self._advance() self.compile_parameter() # parameter: type varName def compile_parameter(self): if self._is_type(): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, SK_ARG) # subroutineBody: '{' varDec* statements '}' def compile_subroutine_body(self, kwd): self._require(T_SYM, '{') while self._is_var_dec(): self.compile_var_dec() self.write_func_decl(kwd) self.compile_statements() # varDec: 'var' type varName (',' varName)* ';' def compile_var_dec(self): self._require(T_KEYWORD, KW_VAR) return self._compile_dec(SK_VAR) # statement: statement* def compile_statements(self): while self._is_statement(): self._compile_statement() # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement def _compile_statement(self): if self._is_let(): self.compile_let() elif self._is_if(): self.compile_if() elif self._is_while(): self.compile_while() elif self._is_do(): self.compile_do() elif self._is_return(): self.compile_return() # letStatement: 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self._require(T_KEYWORD, KW_LET) name = self.compile_var_name() subscript = self._is_sym('[') if subscript: self.compile_base_plus_index(name) # calculate base+index self._require(T_SYM, '=') self.compile_expression() # calculate expression to assign self._require(T_SYM, ';') if subscript: self.pop_array_element() # *(base+index) = expr else: self.vm_pop_variable(name) # pop value directly into variable def pop_array_element(self): self.vm.pop_temp(TEMP_ARRAY) # Pop expr value to temp register self.vm.pop_that_ptr() # Pop base+index into 'that' register self.vm.push_temp(TEMP_ARRAY) # Push expr back onto stack self.vm.pop_that() # Pop value into *(base+index) # ('[' expression ']')? def compile_base_plus_index(self, name): self.vm_push_variable(name) # push array ptr onto stack self._advance() self.compile_expression() # push index onto stack self._require(T_SYM, ']') self.vm.write_vm_cmd('add') # base+index - leave on the stack for later def compile_expression(self): self.compile_term() # Doesn't handle normal order of operations - just left to right for now while self._is_op(): op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_cmds[op[1]]) # op # term: integerConstant | stringConstant | keywordConstant | varName # | varName '[' expression ']' | subroutineCall | '(' expression ')' # | unaryOp term def compile_term(self): if self._is_const(): self.compile_const() elif self._is_sym('('): self._advance() self.compile_expression() # VM code to evaluate expression self._require(T_SYM, ')') elif self._is_unary_op(): tok, op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_unary_cmds[op]) # op elif self._is_var_name(): tok, name = self._advance() if self._is_sym('['): self.compile_array_subscript(name) # VM code for array subscript elif self._is_sym('(.'): self.compile_subroutine_call(name) # VM code for subroutine call else: self.vm_push_variable(name) # push variable on stack # integerConstant | stringConstant | keywordConstant def compile_const(self): tok, val = self._advance() if tok == T_NUM: self.vm.push_const(val) # push constant val elif tok == T_STR: self.write_string_const_init(val) # initialize string & push str addr elif tok == T_KEYWORD: self.compile_kwd_const(val) # push TRUE, FALSE, NULL etc. # '[' expression ']' def compile_array_subscript(self, name): self.vm_push_variable(name) # push array ptr onto stack self._require(T_SYM, '[') self.compile_expression() # push index onto stack self._require(T_SYM, ']') self.vm.write_vm_cmd('add') # base+index self.vm.pop_that_ptr() # pop into 'that' ptr self.vm.push_that() # push *(base+index) onto stack # subroutineCall: subroutineName '(' expressionList ')' # | (className | varName) '.' subroutineName '(' expressionList ')' def compile_subroutine_call(self, name): (type, kind, index) = self.symbols.lookup(name) if self._is_sym('.'): num_args, name = self.compile_dotted_subroutine_call(name, type) else: num_args = 1 self.vm.push_this_ptr() name = self._cur_class+'.'+name self._require(T_SYM, '(') num_args += self.compile_expr_list() # VM code to push arguments self._require(T_SYM, ')') self.vm.write_call(name, num_args) # call name num_args # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kwd_const(self, kwd): if kwd == KW_THIS: self.vm.push_this_ptr() elif kwd == KW_TRUE: self.vm.push_const(1) self.vm.write_vm_cmd('neg') else: # KW_FALSE or KW_NULL self.vm.push_const(0) def compile_dotted_subroutine_call(self, name, type): num_args = 0 obj_name = name self._advance() name = self.compile_var_name() if self._is_builtin_type(type): # e.g. int.func(123) not allowed ValueError('Cannot use "." operator on builtin type') elif type == None: # Calling using class name name = obj_name+'.'+name else: # Calling using object variable name num_args = 1 self.vm_push_variable(obj_name) # push object ptr onto stack name = self.symbols.type_of(obj_name)+'.'+name return num_args, name # expressionList: (expression (',' expression)*)? def compile_expr_list(self): num_args = 0 if self._is_term(): self.compile_expression() num_args = 1 while self._is_sym(','): self._advance() self.compile_expression() num_args += 1 return num_args # ifStatement: 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? def compile_if(self): self._require(T_KEYWORD, KW_IF) end_label = self.new_label() self._compile_cond_expression_statements(end_label) # VM code for condition and if statements if self._is_keyword(KW_ELSE): self._advance() self._require(T_SYM, '{') self.compile_statements() # VM code for else statements self._require(T_SYM, "}") self.vm.write_label(end_label) # label end_label # '(' expression ')' '{' statements '}' def _compile_cond_expression_statements(self, label): self._require(T_SYM, '(') self.compile_expression() self._require(T_SYM, ')') self.vm.write_vm_cmd('not') # ~(cond) notif_label = self.new_label() self.vm.write_if(notif_label) # if-goto notif_label self._require(T_SYM, '{') self.compile_statements() # VM code for if statements self._require(T_SYM, '}') self.vm.write_goto(label) # goto label self.vm.write_label(notif_label)# label notif_label # whileStatement: 'while' '(' expression ')' '{' statements '}' def compile_while(self): self._require(T_KEYWORD, KW_WHILE) top_label = self.new_label() self.vm.write_label(top_label) # label top_label self._compile_cond_expression_statements(top_label) # VM code for condition and while statements # do_statement: 'do' subroutineCall ';' def compile_do(self): self._require(T_KEYWORD, KW_DO) name = self._require(T_ID) self.compile_subroutine_call(name) # VM code for subroutine call self.vm.pop_temp(TEMP_RETURN) # Pop return value and discard self._require(T_SYM, ';') # returnStatement: 'return' expression? ';' def compile_return(self): self._require(T_KEYWORD, KW_RETURN) if not self._is_sym(';'): self.compile_expression() # VM code for return expression if any else: self.vm.push_const(0) # push 0 if not returning a value self._require(T_SYM, ';') self.vm.write_return() # return
class CompilationEngine(object): def __init__(self, jack_fname): self._jack_fname = jack_fname self._s_table = SymbolTable() self._writer = None self._class_name = None self._is_writing_void_func = None self._current_func_name = None self._n_labels = 0 def compile(self, out_fname: str) -> None: tknizer = Tokenizer(self._jack_fname) with VMWriter(out_fname) as writer: self._writer = writer token = self._compile_class(tknizer, tknizer.next_token()) if token: raise CompilationException( f"Expected end of file, found {token}") def _compile_class(self, tknizer, token): _assert(token, CLASS) token = tknizer.next_token() _assert_identifier(token) self._class_name = token.value _assert(tknizer.next_token(), "{") token = tknizer.next_token() while token.value in [STATIC, FIELD]: token = self._compile_class_var_dec(tknizer, token) while token.value in [CONSTRUCTOR, FUNCTION, METHOD]: token = self._compile_subroutine_dec(tknizer, token) _assert(token, "}") return tknizer.next_token() def _compile_class_var_dec(self, tknizer, token): _assert(token, [STATIC, FIELD]) if token.value == STATIC: kind = SymbolTable.STATIC else: kind = SymbolTable.FIELD token = tknizer.next_token() _assert_type(token) var_type = token.value self._record_symbol(tknizer.next_token(), var_type, kind) token = tknizer.next_token() while token.value == ",": self._record_symbol(tknizer.next_token(), var_type, kind) token = tknizer.next_token() _assert(token, ";") return tknizer.next_token() def _compile_subroutine_dec(self, tknizer, token): _assert(token, [CONSTRUCTOR, FUNCTION, METHOD]) subroutine_type = token.value token = tknizer.next_token() _assert_type(token, allow_void=True) self._is_writing_void_func = token.value == VOID self._s_table.start_subroutine(is_method=subroutine_type == METHOD) token = tknizer.next_token() _assert_identifier(token) subroutine_name = token.value self._current_func_name = subroutine_name _assert(tknizer.next_token(), "(") # populates symbol table with arguments token = self._compile_parameter_list(tknizer, tknizer.next_token()) _assert(token, ")") _assert(tknizer.next_token(), "{") token = tknizer.next_token() while token.value == VAR: # populates symbol table with local variabls token = self._compile_var_dec(tknizer, token) n_locals = self._s_table.var_count(SymbolTable.VAR) qualified_name = ".".join([self._class_name, subroutine_name]) self._writer.write_function(qualified_name, n_locals) if subroutine_type == CONSTRUCTOR: size = self._s_table.var_count(SymbolTable.FIELD) self._writer.write_push("constant", size) self._writer.write_call("Memory.alloc", 1) self._writer.write_pop("pointer", 0) elif subroutine_type == METHOD: self._writer.write_push("argument", 0) self._writer.write_pop("pointer", 0) token = self._compile_statements(tknizer, token) _assert(token, "}") self._is_writing_void_func = None self._s_table.complete_subroutine() return tknizer.next_token() def _compile_parameter_list(self, tknizer, token): if not (token.value in [INT, CHAR, BOOLEAN] or token.type == IDENTIFIER): return token while True: var_type = token.value self._record_symbol(tknizer.next_token(), var_type, SymbolTable.ARG) token = tknizer.next_token() if token.value == ",": token = tknizer.next_token() else: return token def _compile_var_dec(self, tknizer, token): _assert(token, VAR) token = tknizer.next_token() _assert_type(token) var_type = token.value token = tknizer.next_token() self._record_symbol(token, var_type, SymbolTable.VAR) token = tknizer.next_token() while token.value == ",": self._record_symbol(tknizer.next_token(), var_type, SymbolTable.VAR) token = tknizer.next_token() _assert(token, ";") return tknizer.next_token() def _compile_statements(self, tknizer, token): while token.value in [LET, IF, WHILE, DO, RETURN]: method = getattr(self, f"_compile_{token.value}") token = method(tknizer, token) return token def _compile_let(self, tknizer, token): _assert(token, LET) token = tknizer.next_token() _assert_identifier(token) var_name = token.value token = tknizer.next_token() if token.value == "[": token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, "]") self._push_variable(var_name) self._writer.write_add() _assert(tknizer.next_token(), "=") token = self._compile_expression(tknizer, tknizer.next_token()) self._writer.write_pop("temp", 0) self._writer.write_pop("pointer", 1) self._writer.write_push("temp", 0) self._writer.write_pop("that", 0) else: _assert(token, "=") token = self._compile_expression(tknizer, tknizer.next_token()) self._pop_variable(var_name) _assert(token, ";") return tknizer.next_token() def _compile_if(self, tknizer, token): _assert(token, IF) _assert(tknizer.next_token(), "(") token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, ")") _assert(tknizer.next_token(), "{") self._writer.write_push("constant", 0) self._writer.write_equals() false_label = self._allocate_label("IF_FALSE") self._writer.write_if(false_label) token = self._compile_statements(tknizer, tknizer.next_token()) _assert(token, "}") token = tknizer.next_token() if token.value == ELSE: skip_else_label = self._allocate_label("SKIP_ELSE") self._writer.write_goto(skip_else_label) self._writer.write_label(false_label) _assert(tknizer.next_token(), "{") token = self._compile_statements(tknizer, tknizer.next_token()) _assert(token, "}") token = tknizer.next_token() self._writer.write_label(skip_else_label) else: self._writer.write_label(false_label) return token def _compile_while(self, tknizer, token): _assert(token, WHILE) _assert(tknizer.next_token(), "(") true_label = self._allocate_label("WHILE_TRUE") self._writer.write_label(true_label) token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, ")") _assert(tknizer.next_token(), "{") self._writer.write_push("constant", 0) self._writer.write_equals() false_label = self._allocate_label("WHILE_FALSE") self._writer.write_if(false_label) token = self._compile_statements(tknizer, tknizer.next_token()) _assert(token, "}") self._writer.write_goto(true_label) self._writer.write_label(false_label) return tknizer.next_token() def _compile_do(self, tknizer, token): _assert(token, DO) token = self._compile_subroutine_call(tknizer, tknizer.next_token()) _assert(token, ";") self._writer.write_pop("temp", 0) return tknizer.next_token() def _compile_return(self, tknizer, token): _assert(token, RETURN) token = tknizer.next_token() if self._is_writing_void_func is True: _assert(token, ";") self._writer.write_push("constant", 0) elif self._is_writing_void_func is False: if token.value == THIS: self._writer.write_push("pointer", 0) token = tknizer.next_token() else: token = self._compile_expression(tknizer, token) _assert(token, ";") else: raise CompilationEngine( "Encountered return statement outside function") self._writer.write_return() return tknizer.next_token() def _compile_subroutine_call(self, tknizer, first_token): _assert_identifier(first_token) is_method = False second_token = tknizer.next_token() if second_token.value == ".": token = tknizer.next_token() _assert_identifier(token) if self._s_table.has(first_token.value): # method call on another object is_method = True class_name = self._s_table.type_of(first_token.value) subroutine_name = ".".join([class_name, token.value]) self._push_variable(first_token.value) else: # constructor or class function subroutine_name = ".".join([first_token.value, token.value]) token = tknizer.next_token() else: # method call on this object is_method = True subroutine_name = ".".join([self._class_name, first_token.value]) self._writer.write_push("pointer", 0) token = second_token _assert(token, "(") n_args = 1 if is_method else 0 token = tknizer.next_token() if token.value != ")": token = self._compile_expression(tknizer, token) n_args += 1 while token.value == ",": token = self._compile_expression(tknizer, tknizer.next_token()) n_args += 1 _assert(token, ")") self._writer.write_call(subroutine_name, n_args) return tknizer.next_token() def _compile_expression(self, tknizer, token): token = self._compile_term(tknizer, token) ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="] while token.value in ops: op = token.value token = self._compile_term(tknizer, tknizer.next_token()) if op == "+": self._writer.write_add() elif op == "-": self._writer.write_sub() elif op == "*": self._writer.write_call("Math.multiply", 2) elif op == "/": self._writer.write_call("Math.divide", 2) elif op == "&": self._writer.write_and() elif op == "|": self._writer.write_or() elif op == "<": self._writer.write_less_than() elif op == ">": self._writer.write_greater_than() elif op == "=": self._writer.write_equals() else: raise Exception(f"Bug: no case for op {token.value}") return token def _compile_term(self, tknizer, token): if token.type == INT_CONSTANT: self._writer.write_push("constant", token.value) return tknizer.next_token() elif token.type == STRING_CONSTANT: str_val = token.value[1:-1] self._writer.write_push("constant", len(str_val)) self._writer.write_call("String.new", 1) for char in str_val: self._writer.write_push("constant", ord(char)) self._writer.write_call("String.appendChar", 2) return tknizer.next_token() elif token.type == KEYWORD and token.value in [ TRUE, FALSE, NULL, THIS ]: if token.value == TRUE: self._writer.write_push("constant", 1) self._writer.write_neg() elif token.value in [FALSE, NULL]: self._writer.write_push("constant", 0) elif token.value == THIS: self._writer.write_push("argument", 0) else: raise Exception(f"Bug: unexpected keyword {token.value}") return tknizer.next_token() elif token.value == "(": token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, ")") return tknizer.next_token() elif token.value in ["-", "~"]: next_token = self._compile_term(tknizer, tknizer.next_token()) if token.value == "-": self._writer.write_neg() elif token.value == "~": self._writer.write_not() else: raise CompilationException( f"Bug: Unexpected unary op {token.value}") return next_token else: next_token = tknizer.next_token() if next_token.value == "[": _assert_identifier(token) array_var_name = token.value token = self._compile_expression(tknizer, tknizer.next_token()) _assert(token, "]") self._push_variable(array_var_name) self._writer.write_add() self._writer.write_pop("pointer", 1) self._writer.write_push("that", 0) return tknizer.next_token() elif next_token.value in ["(", "."]: tknizer.rewind() return self._compile_subroutine_call(tknizer, token) else: _assert_identifier(token) if not self._s_table.has(token.value): raise CompilationException( f"Unknown variable {token.value}") self._push_variable(token.value) return next_token def _push_variable(self, var_name): idx = self._s_table.index_of(var_name) kind = self._s_table.kind_of(var_name) if kind == SymbolTable.STATIC: self._writer.write_push(STATIC, idx) elif kind == SymbolTable.FIELD: self._writer.write_push(THIS, idx) elif kind == SymbolTable.ARG: self._writer.write_push("argument", idx) elif kind == SymbolTable.VAR: self._writer.write_push("local", idx) else: raise Exception(f"Bug: unexpected variable kind {kind}") def _pop_variable(self, var_name): idx = self._s_table.index_of(var_name) kind = self._s_table.kind_of(var_name) if kind == SymbolTable.STATIC: self._writer.write_pop(STATIC, idx) elif kind == SymbolTable.FIELD: self._writer.write_pop(THIS, idx) elif kind == SymbolTable.ARG: self._writer.write_pop("argument", idx) elif kind == SymbolTable.VAR: self._writer.write_pop("local", idx) else: raise Exception(f"Bug: unexpected variable kind {kind}") def _allocate_label(self, label_name): label = "{cls}.{func}${name}${id}".format( cls=self._class_name, func=self._current_func_name, name=label_name, id=self._n_labels, ) self._n_labels += 1 return label def _record_symbol(self, token, typ, kind): if token.type != IDENTIFIER: raise CompilationException(f"Expected an {IDENTIFIER}, " f'found {token.type}: "{token.value}"') self._s_table.define(token.value, typ, kind)
class Parser: def __init__(self, f): self.in_name, self.out_name = f self.output = [] self.tokeniser = Tokeniser(f) self.st_handler = SymbolTable() self.writer = VMCodeWriter(f) self.local_state = {'labeler': labeler()} self.parse() self.writer.close() def parse(self): if self.tokeniser.has_next(): self.compileClass() return self.out_name, self.output def compileClass(self): self.expect(TokenType.KEYWORD, 'class') self.local_state['class'] = self.expect(TokenType.IDENTIFIER) self.expect(TokenType.SYMBOL, '{') while self.peek(TokenType.KEYWORD, CLASS_VAR_KEYWORDS): self.compileClassVarDec() while self.peek(TokenType.KEYWORD, FXN_KEYWORDS): self.st_handler.start_subroutine() self.compileSubroutine() self.expect(TokenType.SYMBOL, '}') del self.local_state['class'] def compileClassVarDec(self): kind = keyword_to_kind[self.expect(TokenType.KEYWORD)] taipu = self.compileType() name = self.expect(TokenType.IDENTIFIER) self.st_handler.define(name, taipu, kind) varlist = self.tryCompileVarList(taipu=taipu, kind=kind) self.expect(TokenType.SYMBOL, ";") def compileType(self): ttype, token = self.tokeniser.peek() if ttype == TokenType.KEYWORD and token in BI_TYPES: return self.expect(TokenType.KEYWORD, token) elif ttype == TokenType.IDENTIFIER: return self.expect(TokenType.IDENTIFIER) else: raise SyntaxError( "Expected type in {} or identifier, got: {} of type {}".format( BI_TYPES, token, ttype)) def tryCompileVarList(self, exp_type=False, taipu=None, kind=None): varlist = [] while self.peek(TokenType.SYMBOL, ","): self.expect(TokenType.SYMBOL, ",") if exp_type: taipu = self.compileType() name = self.expect(TokenType.IDENTIFIER) varlist.append((name, taipu, kind)) for entry in varlist: self.st_handler.define(*entry) def compileSubroutine(self): fxn_kind = self.expect(TokenType.KEYWORD, FXN_KEYWORDS) if fxn_kind == 'method': self.st_handler.define('this', self.local_state['class'], IdentifierKind.ARGUMENT) self.compileType() fxn_name = self.expect(TokenType.IDENTIFIER) self.expect(TokenType.SYMBOL, "(") self.compileParameterList() self.expect(TokenType.SYMBOL, ")") self.compileSubroutineBody(fxn_name, fxn_kind) def compileParameterList(self): if not self.peek(TokenType.SYMBOL, ")"): kind = IdentifierKind.ARGUMENT taipu = self.compileType() name = self.expect(TokenType.IDENTIFIER) self.st_handler.define(name, taipu, kind) self.tryCompileVarList(exp_type=True, kind=kind) def compileSubroutineBody(self, fxn_name, fxn_kind): self.expect(TokenType.SYMBOL, "{") while self.peek(TokenType.KEYWORD, "var"): self.compileVarDec() self.writer.fun_dec(fxn_name, self.st_handler.var_count(IdentifierKind.VAR)) self.compileFxnKind(fxn_kind) self.compileStatements() self.expect(TokenType.SYMBOL, "}") def compileFxnKind(self, kind): if kind == 'constructor': num_fields = self.st_handler.var_count(IdentifierKind.FIELD) self.writer.alloc(num_fields) self.writer.pop_this_ptr() elif kind == 'method': self.writer.push_variable('this', self.st_handler) self.writer.pop_this_ptr() def compileVarDec(self): self.expect(TokenType.KEYWORD, "var") kind = IdentifierKind.VAR taipu = self.compileType() name = self.expect(TokenType.IDENTIFIER) self.st_handler.define(name, taipu, kind) self.tryCompileVarList(taipu=taipu, kind=kind) self.expect(TokenType.SYMBOL, ";") def compileStatements(self): while self.peek(TokenType.KEYWORD, STMT_KEYWORDS): self.compileStatement() def compileStatement(self): if self.peek(TokenType.KEYWORD, "let"): self.compileLet() elif self.peek(TokenType.KEYWORD, "if"): self.compileIf() elif self.peek(TokenType.KEYWORD, "while"): self.compileWhile() elif self.peek(TokenType.KEYWORD, "do"): self.compileDo() elif self.peek(TokenType.KEYWORD, "return"): self.compileReturn() def compileLet(self): self.expect(TokenType.KEYWORD, "let") var = self.expect(TokenType.IDENTIFIER) array_assignment = False if self.peek(TokenType.SYMBOL, "["): array_assignment = True self.compileBasePlusOffset(var) self.expect(TokenType.SYMBOL, "=") self.compileExpression() self.expect(TokenType.SYMBOL, ";") if array_assignment: self.saveToTemp() self.popToArray() else: self.writer.pop_variable(var, self.st_handler) def compileBasePlusOffset(self, base): self.writer.push_variable(base, self.st_handler) self.expect(TokenType.SYMBOL, "[") self.compileExpression() self.expect(TokenType.SYMBOL, "]") self.writer.binary_op("+") def saveToTemp(self): self.writer.pop('temp', 0) def popToArray(self): self.writer.pop_that_ptr() self.writer.push('temp', 0) self.writer.pop_that() def compileIf(self): self.expect(TokenType.KEYWORD, "if") endif = next(self.local_state['labeler']) self.compileCond(endif) if self.peek(TokenType.KEYWORD, "else"): self.expect(TokenType.KEYWORD, "else") self.expectBracedStatements() self.writer.label(endif) def expectBracedStatements(self): self.expect(TokenType.SYMBOL, "{") self.compileStatements() self.expect(TokenType.SYMBOL, "}") def compileWhile(self): self.expect(TokenType.KEYWORD, "while") loop = next(self.local_state['labeler']) self.writer.label(loop) self.compileCond(loop) def compileCond(self, ret): self.expectGroupedExpression() self.writer.unary_op('~') not_cond = next(self.local_state['labeler']) self.writer.ifgoto(not_cond) self.expectBracedStatements() self.writer.goto(ret) self.writer.label(not_cond) def expectGroupedExpression(self): self.expect(TokenType.SYMBOL, "(") self.compileExpression() self.expect(TokenType.SYMBOL, ")") def compileDo(self): self.expect(TokenType.KEYWORD, "do") caller = self.expect(TokenType.IDENTIFIER) self.compileSubroutineCall(caller) self.writer.pop("temp", "0") self.expect(TokenType.SYMBOL, ";") def compileReturn(self): self.expect(TokenType.KEYWORD, "return") if not self.peek(TokenType.SYMBOL, ";"): self.compileExpression() else: self.writer.int_const(0) self.writer.ret() self.expect(TokenType.SYMBOL, ";") def compileExpression(self): self.compileTerm() while self.peek(TokenType.SYMBOL, EXP_SYMBOLS): op = self.compileOp() self.compileTerm() self.writer.binary_op(op) def compileOp(self): return self.expect(TokenType.SYMBOL) def compileTerm(self): if self.peek(TokenType.INT_CONST): int = self.expect(TokenType.INT_CONST) self.writer.int_const(int) elif self.peek(TokenType.STR_CONST): str = self.compileStrConst() self.writer.str_const(str) elif self.peek(TokenType.KEYWORD, KEYWORD_CONSTANTS): kw = self.expect(TokenType.KEYWORD, KEYWORD_CONSTANTS) self.writer.kw_const(kw) elif self.peek(TokenType.SYMBOL, UNARY_OPS): self.compileUnaryOp() elif self.peek(TokenType.SYMBOL, "("): self.expectGroupedExpression() elif self.tokeniser.has_next(): t1, token1 = self.tokeniser.next() if self.tokeniser.has_next(): t2, token2 = self.tokeniser.peek() if self.peek(TokenType.SYMBOL, "["): self.compileArrayAccess(token1) elif self.peek(TokenType.SYMBOL, ["(", "."]): self.compileSubroutineCall(token1) else: self.writer.push_variable(token1, self.st_handler) def compileStrConst(self): ttype, token = self.tokeniser.next() return token[1:-1] def compileUnaryOp(self): op = self.expect(TokenType.SYMBOL, ["-", "~"]) self.compileTerm() self.writer.unary_op(op) def compileArrayAccess(self, arr): self.compileBasePlusOffset(arr) self.writer.pop_that_ptr() self.writer.push_that() def compileSubroutineCall(self, caller): if self.peek(TokenType.SYMBOL, "("): method, nargs = self.compileSelfFunctionCall(caller) qualified_name = self.local_state['class'] + '.' + method elif self.peek(TokenType.SYMBOL, "."): method, nargs = self.compileMethodCall(caller) qualified_name = self.st_handler.qualify(caller, method) self.writer.call(qualified_name, nargs) def compileSelfFunctionCall(self, method): self.writer.push_this_ptr() nargs = self.expectExpressionList() + 1 return method, nargs def compileMethodCall(self, caller): nargs = 0 if self.st_handler.is_object(caller): nargs += 1 self.writer.push_variable(caller, self.st_handler) self.expect(TokenType.SYMBOL, ".") method = self.expect(TokenType.IDENTIFIER) nargs += self.expectExpressionList() return method, nargs def expectExpressionList(self): self.expect(TokenType.SYMBOL, "(") nexps = self.compileExpressionList() self.expect(TokenType.SYMBOL, ")") return nexps def compileExpressionList(self): nexps = 0 if not self.peek(TokenType.SYMBOL, ")"): self.compileExpression() nexps += 1 while self.peek(TokenType.SYMBOL, ","): self.expect(TokenType.SYMBOL, ",") self.compileExpression() nexps += 1 return nexps def peek(self, e_type, e_token=None): if not self.tokeniser.has_next(): return False a_type, a_token = self.tokeniser.peek() return self.token_match(e_type, e_token, a_type, a_token) def expect(self, e_type, e_token=None): a_type, a_token = self.tokeniser.next() if self.token_match(e_type, e_token, a_type, a_token): return a_token else: raise SyntaxError( "Expected {} of type {}, got {} of type {}".format( e_token, e_type, a_token, a_type)) def token_match(self, e_type, e_token, a_type, a_token): return (e_type == a_type or (type(e_type) == list and a_type in e_type)) and \ (e_token is None or e_token == a_token or (type(e_token) == list and a_token in e_token))
class CompilationEngine: def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable() def compile(self): root = self._compileClass() return root def _compileClass(self): classE = Element(ELEMENTS.CLASS) self._readKeyword(classE, ELEMENTS.CLASS) self.className = self._readIdentifier(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileClassVarDec(classE) self._compileSubroutine(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE) return classE def _compileClassVarDec(self, parent): while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES: classVarDecE = Element(ELEMENTS.CLASSVARDEC) self._readKeyword(classVarDecE) self._readType(classVarDecE) self._readIdentifier(classVarDecE) while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA): self._readIdentifier(classVarDecE) self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON) parent.append(classVarDecE) def _compileSubroutine(self, parent): while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES: subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC) function_type = self._readKeyword(subroutineDecE) self._readReturnType(subroutineDecE) self.methodName = self._readIdentifier(subroutineDecE) self._symbol_table.startSubroutine(self.className, self.methodName) if function_type == _SUBROUTINEDEC.METHOD: self._symbol_table.define("this", self.className, SYM_KINDS.ARG) self._uid = -1 self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN) self._compileParameters(subroutineDecE) self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE) self._compileSubroutineBody(subroutineDecE, function_type) parent.append(subroutineDecE) def _gen_label(self, type_): self._uid += 1 return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid) def _gen_labels(self, *parts): self._uid += 1 return ["%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts] def _compileSubroutineBody(self, parent, function_type): bodyE = Element(ELEMENTS.SUBROUTINEBODY) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN) nArgs = self._compileVarDec(bodyE) function_name = parent[2].text function_full_name = "%s.%s" % (self.className, function_name) self.writer.writeFunction(function_full_name, nArgs) if function_type == _SUBROUTINEDEC.CONSTRUCTOR: field_count = self._symbol_table.varCount(SYM_KINDS.FIELD) self.writer.writePush(SEGMENT.CONST, field_count) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop(SEGMENT.POINTER, 0) elif function_type == _SUBROUTINEDEC.METHOD: self.writer.writePush(SEGMENT.ARG, 0) self.writer.writePop(SEGMENT.POINTER, 0) self._compileStatements(bodyE) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE) parent.append(bodyE) def _compileStatements(self, parent): statementsE = Element(ELEMENTS.STATEMENTS) while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES: if self.nextTok.value == _STATEMENTS.LET: statementE = Element(ELEMENTS.STATEMENT_LET) self._readKeyword(statementE) identifier = self._readIdentifier(statementE) is_array = False if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN): is_array = True self._compileExpression(statementE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE) self._readSymbol(statementE, _SYMBOLS.EQUAL) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) if is_array: self.writer.writePop(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.THAT, 0) else: self.writer.writePop(*self._identifier_data(identifier)) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.IF: label_else, label_end = self._gen_labels("if.else", "if.end") statementE = Element(ELEMENTS.STATEMENT_IF) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_else) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeGoto(label_end) self.writer.writeLabel(label_else) if self._readKeywordOptional(statementE, _KEYWORDS.ELSE): self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeLabel(label_end) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.WHILE: label_start, label_end = self._gen_labels("while.start", "while.end") self.writer.writeLabel(label_start) statementE = Element(ELEMENTS.STATEMENT_WHILE) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_end) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) statementsE.append(statementE) self.writer.writeGoto(label_start) self.writer.writeLabel(label_end) elif self.nextTok.value == _STATEMENTS.DO: self._compileDo(statementsE) elif self.nextTok.value == _STATEMENTS.RETURN: statementE = Element(ELEMENTS.STATEMENT_RETURN) self._readKeyword(statementE) if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON): self._compileExpression(statementE) else: self.writer.writePush(SEGMENT.CONST, 0) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeReturn() statementsE.append(statementE) if len(statementsE) == 0: statementsE.text = "\n" parent.append(statementsE) def _compileExpression(self, parent): expressionE = Element(ELEMENTS.EXPRESSION) self._readTerm(expressionE) while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS: symbol = self._readSymbol(expressionE) self._readTerm(expressionE) self.writer.writeArithmetic(symbol) parent.append(expressionE) def _compileExpressionList(self, parent): self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN) expListE = Element(ELEMENTS.EXPRESSION_LIST) nArgs = 0 while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE): self._compileExpression(expListE) self._readSymbolOptional(expListE, _SYMBOLS.COMMA) nArgs += 1 # hack for TextComparer if len(expListE) == 0: expListE.text = "\n" parent.append(expListE) self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE) return nArgs def _compileDo(self, parent): statementE = Element(ELEMENTS.STATEMENT_DO) self._readKeyword(statementE, _STATEMENTS.DO) identifier = self._readIdentifier(statementE) nArgs = 0 if self._readSymbolOptional(statementE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (self.className, identifier) self.writer.writePush(SEGMENT.POINTER, 0) nArgs += 1 nArgs += self._compileExpressionList(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeCall(identifier, nArgs) self.writer.writePop(SEGMENT.TEMP, 0) parent.append(statementE) def _compileVarDec(self, parent): nArgs = 0 while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR: varDecE = Element(ELEMENTS.VAR_DEC) self._readKeyword(varDecE, _KEYWORDS.VAR) self._readType(varDecE) self._readIdentifier(varDecE) nArgs += 1 while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA): self._readIdentifier(varDecE) nArgs += 1 self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON) parent.append(varDecE) return nArgs def _compileParameters(self, parent): paramListE = Element(ELEMENTS.PARAM_LIST) while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES) or self.nextTok.type == tokenizor.IDENTIFIER: self._readType(paramListE) self._readIdentifier(paramListE) self._readSymbolOptional(paramListE, _SYMBOLS.COMMA) if len(paramListE) == 0: paramListE.text = "\n" parent.append(paramListE) ############################## ########## READ ############## ############################## def _readTerm(self, parent): termE = Element(ELEMENTS.TERM) if self.nextTok.type == tokenizor.INTEGER: self.next() termE.append(_leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value)) self.writer.writePush(SEGMENT.CONST, self.tok.value) elif self.nextTok.type == tokenizor.STRING: self.next() termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value)) string_value = self.tok.value self.writer.writePush(SEGMENT.CONST, len(string_value)) self.writer.writeCall("String.new", 1) for char in string_value: self.writer.writePush(SEGMENT.CONST, ord(char)) self.writer.writeCall("String.appendChar", 2) elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS: self.next() termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) _KW_CONT_WRITE[self.tok.value](self.writer) elif self.nextTok.type == tokenizor.IDENTIFIER: identifier = self._readIdentifier(termE) nArgs = 0 if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN): self._compileExpression(termE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.THAT, 0) self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: nArgs = self._compileExpressionList(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeCall(identifier, nArgs) elif self._readSymbolOptional(termE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(termE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(termE)) nArgs += self._compileExpressionList(termE) self.writer.writeCall(identifier, nArgs) else: self.writer.writePush(*self._identifier_data(identifier)) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: self.next() termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._compileExpression(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS: self.next() sym = self.tok.value termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._readTerm(termE) self.writer.writeArithmeticUnary(sym) else: raise self._syntaxError("Unexpected %s." % self.tok.value) parent.append(termE) def _identifier_data(self, identifier): return _SEG_TRANSLATE[self._symbol_table.kindOf(identifier)], self._symbol_table.indexOf(identifier) def _readIdentifier(self, parent): self.next() self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER) name = self.tok.value element = _leafElement(ELEMENTS.IDENTIFIER, name) type_ = self._symbol_table.typeOf(name) kind = None index = None if type_ is None: if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1: type_ = parent[1].text kind = _SYM_KIND_MAP[parent[0].text] elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0: type_ = parent[-1].text kind = SYM_KINDS.ARG if kind is not None: index = self._symbol_table.define(name, type_, kind) else: type_ = self._symbol_table.typeOf(name) kind = self._symbol_table.kindOf(name) index = self._symbol_table.indexOf(name) if kind is not None: element.set("type", type_) element.set("kind", str(kind)) element.set("index", str(index)) parent.append(element) return name def _readType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readReturnType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readSymbol(self, parent, expected = None): self.next() expectedStr = expected if expected is not None else ELEMENTS.SYMBOL self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return self.tok.value def _readKeyword(self, parent, expected = None): self.next() expectedStr = expected if expected is not None else ELEMENTS.KEYWORD self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return self.tok.value def _readSymbolOptional(self, parent, expected): if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return True return False def _readKeywordOptional(self, parent, expected): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return True return False def next(self): self.tok = self.iter.next() self.nextTok = self.iter.lookahead() def _assertToken(self, tok, expected_str, type_ = None, value_ = None): if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_): raise self._syntaxError("Expected %s but found %s" % (expected_str, tok.value), tok) def _syntaxError(self, msg, tok = None): if tok is None: tok = self.tok return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class jackVisitor(jackGrammarVisitor): """Clase que hereda del visitor para ir escribiendo en lenguaje de maquina virtual""" def __init__(self): """Inciializa una tabald e simbolos y un ecritor de codigo junto con variables auxiliares""" self.symbolTable = SymbolTable() self.contWhile = -1 self.contIf = -1 self.nombreClase = "" self.kindMetodo = "" self.nombreMetodo = "" self.vmWriter = CodeWriter() self.vmWriter.vm = "" self.nArgs = 0 def visitClasses(self, ctx): """Obtiene y guarda el nombre de la clase actualmente compilada""" self.nombreClase = ctx.children[1].children[0].getText() return self.visitChildren(ctx) def visitClassVarDec(self, ctx): """Guarda en la tabla de simbolos cada uno de los fields variables taticas declaradas """ kind = ctx.children[0].getText() tipo = ctx.children[1].children[0].getText() i = 2 while ctx.children[i].getText() != ';': name = ctx.children[i].getText() if name == ',': pass else: self.symbolTable.define(name, tipo, kind) i +=1 return self.visitChildren(ctx) def visitTypes(self, ctx): return self.visitChildren(ctx) def visitSubroutineDec(self, ctx): """Inicializa en la tabla de simbolos una subrotina, y en caso de se un metodo agrega this como parametro""" self.kindMetodo = ctx.children[0].getText() self.nombreMetodo = ctx.children[2].children[0].getText() self.symbolTable.startSubroutine() if self.kindMetodo == 'method': self.symbolTable.define('this', self.nombreMetodo, 'argument') return self.visitChildren(ctx) def visitParameterList(self, ctx): """Agrega a la tabla de simbolos de la subroutina cada uno de los parametros """ if ctx.getChildCount() > 0: tipo = ctx.children[0].children[0].getText() nombre = ctx.children[1].children[0].getText() self.symbolTable.define(nombre, tipo, 'argument') i = 2 while i < len(ctx.children)-1 and ctx.children[i].getText() != ')': tipo = ctx.children[i+1].getText() nombre = ctx.children[i+2].getText() self.symbolTable.define(nombre, tipo, 'argument') i+=3 return self.visitChildren(ctx) def visitSubroutineBody(self, ctx): """Despues de contar las variables locales escribe la funcion en maquina virtual y dependiendo del tipo de funcion hace los llamados, push y pop correspondientes""" i = 1 while ctx.children[i].children[0].getText() == "var": self.visit(ctx.children[i]) i += 1 funcion = self.nombreClase +'.'+ self.nombreMetodo numLcl = self.symbolTable.varCount('local') self.vmWriter.writeFunction(funcion, numLcl) if self.kindMetodo == 'constructor': numFields = self.symbolTable.varCount('field') self.vmWriter.writePush('constant', numFields) self.vmWriter.writeCall('Memory.alloc', 1) self.vmWriter.writePop('pointer', 0) elif self.kindMetodo == 'method': self.vmWriter.writePush('argument', 0) self.vmWriter.writePop('pointer', 0) while i < ctx.getChildCount(): self.visit(ctx.children[i]) i += 1 def visitVarDec(self, ctx): """Inicializa en la tabla de simbolos todas las variables locales de la subrutina para poder escribir la función""" tipo = ctx.children[1].children[0].getText() nombre = ctx.children[2].getText() self.symbolTable.define(nombre, tipo, 'local') i = 3 while ctx.children[i].getText() != ';': nombre = ctx.children[i].getText() if nombre == ',': pass else: self.symbolTable.define(nombre, tipo, 'local') i += 1 return self.visitChildren(ctx) """Llamados en los que no es necesario escribir codigo de VM""" def visitClassName(self, ctx): return self.visitChildren(ctx) def visitSubroutineName(self, ctx): return self.visitChildren(ctx) def visitVarName(self, ctx): return self.visitChildren(ctx) def visitStatements(self, ctx): return self.visitChildren(ctx) def visitStatement(self, ctx): return self.visitChildren(ctx) def visitLetStatement(self, ctx): """Realiza los push y pop necesarios para guardar un valor y asignarle una posiicon en memoria""" nombre = ctx.children[1].getText() tipo = self.symbolTable.kindOf(nombre) index = self.symbolTable.indexOf(nombre) if tipo == None: tipo = self.symbolTable.kindOf(nombre) index = self.symbolTable.indexOf(nombre) if ctx.children[2].getText() == '[': self.visit(ctx.children[3]) self.vmWriter.writePush(tipo,index) self.vmWriter.writeArithmetic('add') self.visit(ctx.children[6]) self.vmWriter.writePop('temp', 0) self.vmWriter.writePop('pointer', 1) self.vmWriter.writePush('temp', 0) self.vmWriter.writePop('that', 0) else: self.visit(ctx.children[3]) self.vmWriter.writePop(tipo,index) def visitIfStatement(self, ctx): """Escribe los labels necesarios para manejar el flujo del programa de a cuerdo a lo indicado por la expresión""" self.contIf += 1 cont = self.contIf self.visit(ctx.children[2]) self.vmWriter.writeIf('IF_TRUE' + str(cont)) self.vmWriter.writeGoto('IF_FALSE' + str(cont)) self.vmWriter.writeLabel('IF_TRUE' + str(cont)) self.visit(ctx.children[5]) if ctx.getChildCount() > 7 : if str(ctx.children[7]) == 'else': self.vmWriter.writeGoto('IF_END' + str(cont)) self.vmWriter.writeLabel('IF_FALSE' + str(cont)) self.visit(ctx.children[9]) self.vmWriter.writeLabel('IF_END' + str(cont)) else: self.vmWriter.writeLabel('IF_FALSE' + str(cont)) def visitWhileStatement(self, ctx): """Similar al if, escribe labels para que el flujo del programa se repita hasta que una condicion no se cumpla""" self.contWhile += 1 contW = self.contWhile self.vmWriter.writeLabel('WHILE_EXP' + str(contW)) self.visit(ctx.children[2]) self.vmWriter.writeArithmetic('not') self.vmWriter.writeIf('WHILE_END' + str(contW)) self.visit(ctx.children[5]) self.vmWriter.writeGoto('WHILE_EXP' + str(contW)) self.vmWriter.writeLabel('WHILE_END' + str(contW)) def visitDoStatement(self, ctx): """Hago el llamado y posteriormente vuelvo a la función de donde hice el llamado""" self.visitChildren(ctx) self.vmWriter.writePop('temp', 0) def visitReturnStatement(self, ctx): """Obtengo valor de retorno, si no hay, el valor de retorno es 0""" if ctx.children[1].getText() != ';': self.visit(ctx.children[1]) else: self.vmWriter.writePush('constant', 0) self.vmWriter.writeReturn() def visitExpression(self, ctx): """Separo al expresion por partes para irla compilando""" self.visit(ctx.children[0]) i = 2 while i < ctx.getChildCount(): self.visit(ctx.children[i]) self.visit(ctx.children[i-1]) i +=2 def visitTerm(self, ctx): """Determino el tipo de termino,si es un tipo de dato o un valor de un arreglo, dependiendo de esto obtengo su valor si está en la tabla de simbolos o lo busco en un arreglo o busco el siguiente etrmino con el que opera y lo guardo en memoria""" term = ctx.children[0].getText() if ctx.getChildCount() == 1: if term.isdigit(): self.vmWriter.writePush('constant', term) elif term.startswith('"'): term = term.strip('"') tam = len(term) self.vmWriter.writePush('constant', tam) self.vmWriter.writeCall('String.new', 1) for char in term: self.vmWriter.writePush('constant', ord(char)) self.vmWriter.writeCall('String.appendChar', 2) elif term in ['true', 'false', 'null', 'this']: self.visitChildren(ctx) elif term in self.symbolTable.subrutina.keys(): tipo = self.symbolTable.kindOf(term) index = self.symbolTable.indexOf(term) self.vmWriter.writePush(tipo,index) elif term in self.symbolTable.clase.keys(): tipo = self.symbolTable.kindOf(term) index = self.symbolTable.indexOf(term) self.vmWriter.writePush(tipo,index) else: self.visitChildren(ctx) else: var = ctx.children[0].getText() if ctx.children[1].getText() == '[': index = self.symbolTable.indexOf(var) segment = self.symbolTable.kindOf(var) self.visit(ctx.children[2]) self.vmWriter.writePush(segment, index) self.vmWriter.writeArithmetic('add') self.vmWriter.writePop('pointer', '1') self.vmWriter.writePush('that', '0') elif term == '(': self.visitChildren(ctx) elif term == '-': self.visit(ctx.children[1]) self.visit(ctx.children[0]) elif term == '~': self.visit(ctx.children[1]) self.visit(ctx.children[0]) def visitSubroutineCall(self, ctx): """Ubica la subrutina de acuerdo a la clase en la que se encuentre y escribe en VM el respectivo llamado con su paso de parametros""" nombre = ctx.children[0].children[0].getText() funcion = nombre args = 0 if ctx.children[1].getText() == '.': nombreSubrutina = ctx.children[2].children[0].getText() tipo = self.symbolTable.typeOf(nombre) if tipo != None: kind = self.symbolTable.kindOf(nombre) index = self.symbolTable.indexOf(nombre) self.vmWriter.writePush(kind, index) funcion = tipo + '.' + nombreSubrutina args += 1 else: funcion = nombre + '.' + nombreSubrutina elif ctx.children[1].getText() == '(': funcion = self.nombreClase + '.' + nombre args += 1 self.vmWriter.writePush('pointer', 0) self.visitChildren(ctx) args = args +self.nArgs self.vmWriter.writeCall(funcion, args) def visitExpressionList(self, ctx): """Evalua cada expresion indivudualmente""" self.nArgs = 0 if ctx.getChildCount() > 0: self.nArgs = 1 self.visit(ctx.children[0]) i = 2 while i < ctx.getChildCount(): self.visit(ctx.children[i]) self.visit(ctx.children[i-1]) self.nArgs += 1 i += 2 def visitOp(self, ctx): """Genera el comando de VM respectivo dependiendo del operador""" op = ctx.children[0].getText() if op == "+": self.vmWriter.writeArithmetic('add') elif op == "-": self.vmWriter.writeArithmetic('sub') elif op == "*": self.vmWriter.writeArithmetic('call Math.multiply 2') elif op == "/": self.vmWriter.writeArithmetic('call Math.divide 2') elif op == "&": self.vmWriter.writeArithmetic('and') elif op == "|": self.vmWriter.writeArithmetic('or') elif op == ">": self.vmWriter.writeArithmetic('gt') elif op == "<": self.vmWriter.writeArithmetic('lt') elif op == "=": self.vmWriter.writeArithmetic('eq') return self.visitChildren(ctx) def visitUnaryop(self, ctx): """Determina el comando de VM para cada operaodr unario""" op = ctx.children[0].getText() if op == "~": self.vmWriter.writeArithmetic('not') elif op == "-": self.vmWriter.writeArithmetic('neg') def visitKeywordconstant(self, ctx): """Escribe el comando de VM para poder hacer uso de una palabra reservada espcifica""" keyword = ctx.children[0].getText() if keyword == 'this': self.vmWriter.writePush('pointer', 0) elif keyword in ['false','null']: self.vmWriter.writePush('constant', 0) elif keyword == 'true': self.vmWriter.writePush('constant', 0) self.vmWriter.writeArithmetic('not') return self.visitChildren(ctx) def crearArchivo(self,path): """Abre el archivo .vm donde se escribirán lso comandos de máquina virtual""" filewrite = path.split('.jack') #Reemplazo el .jack con .xml si lo tiene filewritef = filewrite[0]+'.vm' #Sino le agrego el . codigoVM = self.vmWriter.vm archivo = filewritef try: file = open(archivo,'w') #Abro el file en modo escribir except FileNotFoundError: print('ERROR:No hay directorio existente para escribir') exit(1) file.write(codigoVM)
class CompilationEnginge(object): """ lalala """ def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.out = open(output_file, 'w') self.token = None self.class_name = None ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) ####################### def analyze(self): self.token = self.tokenizer.advance() self.compile_class() self.close() print('CLASS TABLE:') print(self.symbol_table.class_table) def close(self): if self.out: self.out.close() self.out = None def advance(self): self.token = self.tokenizer.advance() def write_to_out(self): pass def format_line(self, defined_or_used=''): token_type = self.tokenizer.token_type() running_index = '' if token_type == self.tokenizer.keyword_token: meat = self.tokenizer.keyword() defined_or_used='' elif token_type == self.tokenizer.symbol_token: meat = self.tokenizer.symbol() defined_or_used='' elif token_type == self.tokenizer.identifier_token: meat = self.tokenizer.identifier() ####################### ### PROJECT 11 CODE ### ####################### # Extending compilaiton engine to output <var/argument/static/field...> instead of <indentifier> name = self.tokenizer.token if self.symbol_table.kind_of(name): token_type = self.symbol_table.kind_of(name) running_index = str(self.symbol_table.index_of(name)) elif name[0].islower(): token_type = 'subroutine' else: token_type = 'class' ####################### elif token_type == self.tokenizer.int_const: meat = self.tokenizer.int_val() defined_or_used='' elif token_type == self.tokenizer.string_const: meat = self.tokenizer.string_val() defined_or_used='' else: raise ValueError('Something went wrong with token: {}'.format(self.token)) if defined_or_used != '': defined_or_used += ' ' if running_index != '': running_index = ' ' + running_index formated_line = '<{2}{0}{3}> {1} </{2}{0}{3}>\n'.format(token_type, meat, defined_or_used, running_index) return formated_line ######################### ### PROGARM STRUCTURE ### ######################### def compile_class(self): """ #################################################################### ### class: 'class' className '{' classVarDec* subroutineDec* '}' ### #################################################################### """ self.out.write('<class>\n') # 'class' keyword_line = self.format_line() self.out.write(keyword_line) # className self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.class_name = self.tokenizer.token ####################### identifier_line = self.format_line('defined') self.out.write(identifier_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) ### classVarDec* subroutineDec* ### self.advance() # classVarDec* while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_field, self.tokenizer.key_static]: self.compile_class_var_dec() # subroutineDec* while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_function, self.tokenizer.key_method, self.tokenizer.key_constructor]: self.compile_subroutine() # '}' if self.tokenizer.token_type() == self.tokenizer.symbol_token: # Class compilation is done symbol_line = self.format_line() self.out.write(symbol_line) else: raise ValueError('Something went wrong') # Closing with </class> self.out.write('</class>\n') is_sucessfull = not(self.advance()) if is_sucessfull: print('Compilation enginge succesfully finished') else: print('Something went wrong!') def compile_class_var_dec(self): """ ####################################################################### ### classVarDec: ('static'|'field') type varName (',' varName)* ';' ### ####################################################################### """ self.out.write('<classVarDec>\n') ####################### ### PROJECT 11 CODE ### ####################### # Extract field or static # field_or_static = re.match('<[a-z]*>', field_or_static_line)[0][1:-1] field_or_static = self.tokenizer.token ####################### # ('static' | 'field') field_or_static_line = self.format_line() self.out.write(field_or_static_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### # Extract token type type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static) ####################### varname_line = self.format_line('defined') self.out.write(varname_line) # (',' varName)* self.advance() symbol = self.tokenizer.symbol() while symbol == ',': colon_line = self.format_line() self.out.write(colon_line) self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static) ####################### varname_line = self.format_line('defined') self.out.write(varname_line) self.advance() symbol = self.tokenizer.symbol() # symbol == ';' semicolon_line = self.format_line() self.out.write(semicolon_line) self.advance() self.out.write('</classVarDec>\n') def compile_subroutine(self): """ ########################################################################### ### subroutineDec: ('constructor'|'function'|'method') ### ### ('void' | type) subroutineName '(' parameterList ')' ### ### subroutineBody ### ########################################################################### """ ####################### ### PROJECT 11 CODE ### ####################### print() print('SUBROUTINE TABLE:') print(self.symbol_table.subroutine_table) print() self.symbol_table.start_subroutine() self.symbol_table.define(name='this', type_=self.class_name, kind='argument') ####################### self.out.write('<subroutineDec>\n') # ('constructor'|'function'|'method') constructor_function_method_line = self.format_line() self.out.write(constructor_function_method_line) # ('void' | type) self.advance() void_or_type_line = self.format_line() self.out.write(void_or_type_line) # subroutineName self.advance() subroutine_name_line = self.format_line('defined') self.out.write(subroutine_name_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # parameterList self.advance() self.compile_parameter_list() # ')' symbol_line = self.format_line() self.out.write(symbol_line) ################################################## ### subroutineBody: '{' varDec* statements '}' ### ################################################## self.out.write('<subroutineBody>\n') # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) ############### ### varDec* ### ############### self.advance() while self.tokenizer.token == self.tokenizer.key_var: self.compile_var_dec() ################## ### statements ### ################## self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</subroutineBody>\n') self.out.write('</subroutineDec>\n') ####################### ### PROJECT 11 CODE ### ####################### print() print('SUBROUTINE TABLE:') print(self.symbol_table.subroutine_table) print() ####################### def compile_parameter_list(self): """ ############################################################ ### parameterList: ((type varName) (',' type varName)*)? ### ############################################################ """ self.out.write('<parameterList>\n') # If token type is symbol then we have empty parameter list # If we have symbol token then it means our parameter list is fully processed if self.tokenizer.token_type() != self.tokenizer.symbol_token: # type ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) # If next token is ',' we have more then one parameter self.advance() while self.tokenizer.token_type() == self.tokenizer.symbol_token and self.tokenizer.symbol() == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument') # We are in new subroutine so add next nested scope # self.symbol_table.start_subroutine() ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) self.advance() self.out.write('</parameterList>\n') def compile_var_dec(self): """ ##################################################### ### varDec: 'var' type varName (',' varName)* ';' ### ##################################################### """ self.out.write('<varDec>\n') # var var_line = self.format_line() self.out.write(var_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) # (',' varName)* self.advance() while self.tokenizer.symbol() == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) self.advance() # ';' semicolon_line = self.format_line() self.out.write(semicolon_line) self.advance() self.out.write('</varDec>\n') ################## ### STATEMENTS ### ################## def compile_statements(self): """ ############################## ### statements: statement* ### ############################## """ self.out.write('<statements>\n') while self.tokenizer.token_type() != self.tokenizer.symbol_token: keyword = self.tokenizer.keyword() # letStatement if keyword == self.tokenizer.key_let: self.compile_let() # ifStatement elif keyword == self.tokenizer.key_if: self.compile_if() # whileStatement elif keyword == self.tokenizer.key_while: self.compile_while() # doStatement elif keyword == self.tokenizer.key_do: self.compile_do() # returnStatement elif keyword == self.tokenizer.key_return: self.compile_return() else: raise ValueError('Wrong statement: {}'.format(keyword)) self.out.write('</statements>\n') def compile_do(self): """ ############################################ ### doStatement: 'do' subroutineCall ';' ### ############################################ """ self.out.write('<doStatement>\n') # 'do' do_line = self.format_line() self.out.write(do_line) # subroutineCall self.advance() self.compile_subroutine_call() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</doStatement>\n') def compile_let(self): """ ############################################################################ ### letStatement: 'let' varName ('[' expression ']')? '=' expression ';' ### ############################################################################ """ self.out.write('<letStatement>\n') # let let_line = self.format_line() self.out.write(let_line) # varName self.advance() var_name_line = self.format_line('used') self.out.write(var_name_line) # Check if '[' or '=' self.advance() if self.tokenizer.token == '[': # '[' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ']' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # '=' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</letStatement>\n') def compile_while(self): """ ##################################################################### ### whileStatement: 'while' '(' expression ')' '{' statements '}' ### ##################################################################### """ self.out.write('<whileStatement>\n') # 'while' while_line = self.format_line() self.out.write(while_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</whileStatement>\n') def compile_return(self): """ ################################################ ### ReturnStatement 'return' expression? ';' ### ################################################ """ self.out.write('<returnStatement>\n') # 'return' return_line = self.format_line() self.out.write(return_line) # Ceck if expression self.advance() if self.tokenizer.token != ';': # 'expression' self.compile_expression() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</returnStatement>\n') def compile_if(self): """ ############################################################### ### ifStatement: 'if' '(' expression ')' '{' statements '}' ### ### ('else' '{' statements '}')? ### ############################################################### """ self.out.write('<ifStatement>\n') # 'if' if_line = self.format_line() self.out.write(if_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) # Check if there is 'else' part of ifStatement self.advance() if self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() == 'else': # 'else' else_line = self.format_line() self.out.write(else_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</ifStatement>\n') ################### ### EXPRESSIONS ### ################### def compile_subroutine_call(self, skip_subroutine_name=False): """ ############################################################################ ### subroutineCall: subroutineName '(' expressionList ')' | (className | ### ### varName) '.' subroutineName '(' expressionList ')' ### ############################################################################ """ if not skip_subroutine_name: # subroutineName or className or varName subroutine_class_var_name_line = self.format_line('used') self.out.write(subroutine_class_var_name_line) self.advance() # Check '(' or '.' if self.tokenizer.token == '.': # '.' symbol_line = self.format_line() self.out.write(symbol_line) # subroutineName self.advance() subroutine_name_line = self.format_line('used') self.out.write(subroutine_name_line) self.advance() # '(' symbol_line = self.format_line() self.out.write(symbol_line) # expressionList self.advance() self.compile_expression_list() # ')' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() def compile_expression(self): """ ################################### ### expression: term (op term)* ### ################################### """ self.out.write('<expression>\n') ops = ['+', '-', '*', '/', '&', '|', '<', '>', '='] # 'term' self.compile_term() # Check if there is (op term)* part while self.tokenizer.token in ops: # op op_line = self.format_line() self.out.write(op_line) # term self.advance() self.compile_term() self.out.write('</expression>\n') def compile_term(self): """ ################################################################ ### integerConstant | stringConstant | keywordConstant | ### ### varName | varName '[' expression ']' | subroutineCall | ### ### '(' expression ')' | unaryOp term ### ################################################################ """ self.out.write('<term>\n') unary_ops = ['-', '~'] ############################################# ### constant, name, expression or unaryOp ### ############################################# # '(' expression ')' if self.tokenizer.token == '(': # '(' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # unaryOp term elif self.tokenizer.token in unary_ops: # unaryOp unary_op_line = self.format_line() self.out.write(unary_op_line) # term self.advance() self.compile_term() # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall else: # constant or name constant_or_name = self.format_line('used') self.out.write(constant_or_name) # varName '[' expression ']' | subroutineCall or end of compile_term function # Check if expression: '[', subroutineCall: '(' with parameter skip_subroutine_name = True, # otherwise end of compile_term function self.advance() # '[' expression ']' if self.tokenizer.token == '[': # '[' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ']' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # subroutineCall with skip_subroutine_name=True elif self.tokenizer.token in ['(', '.']: self.compile_subroutine_call(skip_subroutine_name=True) self.out.write('</term>\n') def compile_expression_list(self): """ ######################################################## ### expressionList: (expression (',' expression)* )? ### ######################################################## """ self.out.write('<expressionList>\n') # Check if token is ')', if so we got empty expression list if self.tokenizer.token != ')': # 'expression' self.compile_expression() # Check if token is ',', if so we got more expressions while self.tokenizer.token == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # expression self.advance() self.compile_expression() self.out.write('</expressionList>\n')
class CompilationEngine: def __init__(self, tokenizer, vm_writer): # We will use the passed-in JackTokenizer to parse the given Jack code. self.tokenizer = tokenizer # We will use the passed-in VMWriter to write our compiled VM code. # The VMWriter instance should have already create the .vm file for us. self.vm_writer = vm_writer # When handling Jack variable declarations, you need two symbol tables: # - a SymbolTable for the class scope, and # - a SymbolTable for the subroutine scope. self.class_symbol_table = SymbolTable() self.subroutine_symbol_table = SymbolTable() # Even though a class can contain multiple different subroutines, # we only ever need one subroutine symbol table. # # We can simply reset the subroutine symbol table # every time we encounter a new subroutine! # We will use simple counters to create distinct labels # for each if/while statement in the compiled VM code. self.if_counter = 0 self.while_counter = 0 # We'll need to track the currently-parsing class and subroutine for various reasons. self.current_class_name = None self.current_subroutine_name = None # We'll need to track a subroutine's type as we parse it. # Its value is always one of ["function", "method", "constructor"]. self.subroutine_type = None def run(self): # Advance to the first token in the .jack file. # This should always be "class". self.tokenizer.advance() # Let's get started! return self.compile_class() ################################################### # ASSERTIONS ################################################### def assert_identifier(self): assert self.tokenizer.identifier( ), f"Expected an identifier but found: {self.tokenizer.current_token}" def assert_keyword(self, keyword=None): if keyword and type(keyword) is list: assert self.tokenizer.keyword( ) and self.tokenizer.current_token in keyword, f"Expected one of keywords {keyword} but found: {self.tokenizer.current_token}" elif keyword: assert self.tokenizer.keyword( ) and self.tokenizer.current_token == keyword, f"Expected keyword {keyword} but found: {self.tokenizer.current_token}" else: assert self.tokenizer.keyword( ), f"Expected a keyword but found: {self.tokenizer.current_token}" def assert_return_type(self): assert self.tokenizer.keyword() or self.tokenizer.identifier( ), f"Expected a keyword or identifier as the return type but found: {self.tokenizer.current_token}" def assert_symbol(self, symbol=None): if symbol and type(symbol) is list: assert self.tokenizer.symbol( ) and self.tokenizer.current_token in symbol, f"Expected one of symbols {symbol} but found: {self.tokenizer.current_token}" elif symbol: assert self.tokenizer.symbol( ) and self.tokenizer.current_token == symbol, f"Expected symbol \"{symbol}\" but found: {self.tokenizer.current_token}" else: assert self.tokenizer.symbol( ), f"Expected a symbol but found: {self.tokenizer.current_token}" ################################################### # COMPILER METHODS ################################################### def compile_class(self): self.assert_keyword('class') # Advance to the next token, which should be the class name. self.tokenizer.advance() self.assert_identifier() self.current_class_name = self.tokenizer.current_token self.tokenizer.advance() self.assert_symbol('{') # We reset the class-level symbol table on the off-chance that # there are multiple classes defined in a .jack file. self.class_symbol_table.reset() # At this point, we may encounter class-level field or static variables. # We will compile those as needed. self.tokenizer.advance() while self.tokenizer.keyword() and self.tokenizer.current_token in [ 'field', 'static' ]: self.compile_class_var_dec() self.tokenizer.advance() # We will compile each class's subroutines one at a time. while self.tokenizer.keyword() and self.tokenizer.current_token in [ 'constructor', 'function', 'method' ]: # We can safely reset the subroutine-level symbol table for each new subroutine. # There's no need to keep the old table. self.subroutine_symbol_table.reset() self.compile_subroutine_dec() self.tokenizer.advance() self.assert_symbol('}') def compile_class_var_dec(self): # We will store the variable kind, which should always be one of ['field', 'static']. self.assert_keyword(['field', 'static']) kind = self.tokenizer.current_token self.tokenizer.advance() # We'll also keep track of the variable's type. typ = self.tokenizer.current_token self.tokenizer.advance() # We'll need the variable's name, of course, to populate the symbol table. name = self.tokenizer.current_token self.tokenizer.advance() # We now have everything we need to update the class-level symbol table, # so let's do that! self.class_symbol_table.define(name, typ, kind) # It's completely possible that the programmer used comma-separated variable decs. # Example: field int x, y, z; # We will anticipate this and populate the symbol table accordingly. while self.tokenizer.current_token != ';': self.assert_symbol(',') self.tokenizer.advance() name = self.tokenizer.current_token self.tokenizer.advance() self.class_symbol_table.define(name, typ, kind) def compile_do(self): self.assert_keyword('do') self.tokenizer.advance() self.compile_subroutine_call() self.tokenizer.advance() self.assert_symbol(';') # Subroutine calls always return *something*. # However, by using a do-statment, we're choosing to do nothing with the return value. # Le'ts dump this return value immediately, since we'll never use it. self.vm_writer.write_pop('temp', 0) def compile_expression(self): self.compile_term() self.tokenizer.advance() if self.tokenizer.binary_op(): binary_op = self.tokenizer.current_token self.tokenizer.advance() self.compile_term() self.vm_writer.write_binary_op(binary_op) self.tokenizer.advance() def compile_expression_list(self): expression_count = 0 while self.tokenizer.current_token not in [')', '}']: if self.tokenizer.current_token == ',': self.tokenizer.advance() else: expression_count += 1 self.compile_expression() return expression_count def compile_if_statement(self): self.assert_keyword('if') # Let's increment the if_counter for VM labeling. self.if_counter += 1 label_1 = f"IF_STATEMENT_{self.if_counter}_A" label_2 = f"IF_STATEMENT_{self.if_counter}_B" self.tokenizer.advance() self.assert_symbol('(') # First, we'll write the if-statement's expression to VM. self.tokenizer.advance() self.compile_expression() self.assert_symbol(')') # Next, we'll write the not and if-goto statements to label 1. self.vm_writer.write_command('not') self.vm_writer.write_if(label_1) self.tokenizer.advance() self.assert_symbol('{') # We'll compile each statement in the if-block. self.tokenizer.advance() self.compile_statements() self.assert_symbol('}') # We will writ ethe goto to label 2. self.vm_writer.write_goto(label_2) # We'll write the VM code for label 1. self.vm_writer.write_label(label_1) if self.tokenizer.peek() == 'else': self.tokenizer.advance() self.tokenizer.advance() self.assert_symbol('{') # If we find an else statement, we'll need to compile its statements as well. self.tokenizer.advance() self.compile_statements() self.assert_symbol('}') # Finally, we'll write the VM code for label 2. self.vm_writer.write_label(label_2) def compile_let(self): self.assert_keyword('let') self.tokenizer.advance() self.assert_identifier() name = self.tokenizer.current_token self.tokenizer.advance() self.assert_symbol() # TODO: Handle arrays # if self.tokenizer.current_token == '[': # let_statement += self.add_xml_for_current_token() # self.tokenizer.advance() # let_statement += self.compile_expression() # self.assert_symbol(']') # let_statement += self.add_xml_for_current_token() # self.tokenizer.advance() self.assert_symbol('=') self.tokenizer.advance() self.compile_expression() self.assert_symbol(';') if self.subroutine_symbol_table.has_name(name): self.vm_writer.write_pop( self.subroutine_symbol_table.kind_of(name), self.subroutine_symbol_table.index_of(name)) elif self.class_symbol_table.has_name(name): kind = self.class_symbol_table.kind_of(name) kind = "this" if kind == "field" else kind self.vm_writer.write_pop(kind, self.class_symbol_table.index_of(name)) else: raise AssertionError(f"Undeclared variable found: {name}") def compile_parameter_list(self): # We'll keep a running list of the contents inside of a subroutine's params. param_content = [] while self.tokenizer.current_token != ')': if self.tokenizer.current_token != ',': param_content.append(self.tokenizer.current_token) self.tokenizer.advance() typ = None # By this point, we'll likely have a series of tokens with alternating types and identifiers. # Example: ["int", "x", "char", "y"] # We'll need to populate the symbol table with the params we found. # We *should* have (len(param_content) / 2) params total. while len(param_content) > 0: val = param_content.pop(0) if typ == None: typ = val else: name = val self.subroutine_symbol_table.define(name, typ, 'argument') typ = None def compile_return(self): self.assert_keyword('return') self.tokenizer.advance() if self.tokenizer.keyword() and self.tokenizer.current_token == "this": # If we're returning "this", then we'll need to push "this" onto the stack first. self.vm_writer.write_push('pointer', 0) self.tokenizer.advance() elif self.tokenizer.current_token != ';': # We'll need to compile any expressions we find after the "return" keyword. self.compile_expression() else: # Even if a Jack subroutine returns nothing, # the compiler always expects a method to return *something*. # # We'll push the constant 0, which will be immediately thrown away, # to satisfy this contract. self.vm_writer.write_push('constant', 0) self.assert_symbol(';') self.vm_writer.write_return() def compile_statement(self): if self.tokenizer.current_token == 'do': return self.compile_do() if self.tokenizer.current_token == 'let': return self.compile_let() if self.tokenizer.current_token == 'if': return self.compile_if_statement() if self.tokenizer.current_token == 'while': return self.compile_while_statement() if self.tokenizer.current_token == 'return': return self.compile_return() raise AssertionError( f"Unrecognized token in compile_statement(): {self.tokenizer.current_token}" ) def compile_statements(self): while self.tokenizer.current_token != '}': if self.tokenizer.symbol(): pass else: self.compile_statement() self.tokenizer.advance() def compile_subroutine_body(self): # First, we'll do a sanity check and look for a left brace. # A left brace symbol indicates the start of a block of statements. self.tokenizer.advance() self.assert_symbol('{') # A function declaration in VM code has the format: # function MyClass.method local_count # We need to know the number of local variables in the function. # Let's initialize the local count. local_count = 0 self.tokenizer.advance() # We'll now check for any local variable declarations. # # We'll compile each declaration we find and update the running tally # of our local count. while self.tokenizer.current_token == 'var': local_count += self.compile_var_dec() self.tokenizer.advance() # With the class name, subroutine name, and local count on hand, # we can finally declare our function in VM bytecode. self.vm_writer.write_function( f"{self.current_class_name}.{self.current_subroutine_name}", local_count) # Edge case! if self.subroutine_type == 'constructor': # If we're compiling a constructor, we'll need to do some initialization # before compiling any statements. # First, we'll use Memory.alloc() to allocate memory for the new object. field_count = self.class_symbol_table.var_count('field') self.vm_writer.write_push("constant", field_count) self.vm_writer.write_call("Memory.alloc", 1) # We will then anchor _this_ to the THIS base address. self.vm_writer.write_pop("pointer", 0) # Another edge case! elif self.subroutine_type == "method": # Since we're in a method, we need to initialize _this_ to the current object. # We can use our recently-updated symbol table to do this. # First, let's push the first argument _this_ onto the stack. self.vm_writer.write_push("argument", 0) # Next, we must immediately pop this value from the stack # and store it at the THIS address in memory. self.vm_writer.write_pop("pointer", 0) # Now the compiled code can access the object's fields. # We'll now compile every statement inside of the subroutine. self.compile_statements() # Finally, we'll do another sanity check to ensure we've hit the # end of our statement block. self.assert_symbol('}') def compile_subroutine_call(self): # We'll keep a running tally of the argument count. # This is required for the call VM code. # Example: call {subroutine_name} {arg_count} arg_count = 0 # First, let's make sure that the current token is an identifier. self.assert_identifier() # This identifier can be one of the following: # - a subroutine name, such as doAThing in doAThing() # - a class name, such as MyClass in MyClass.doAThing() # - an object, such as myObj in myObj.doAThing() # # We'll store it for future use. name = self.tokenizer.current_token # We'll also store whether the subroutine has a prefix, e.g. MyClass or myObj. # By default, let's assume there's a prefix. has_prefix = True self.tokenizer.advance() self.assert_symbol(['(', '.']) # If the current token is a period, then this is a method call. # # At this point in time, the name is either a class name or an object, # like MyClass or myObj. if self.tokenizer.current_token == '.': obj_in_sub_symbol_table = self.subroutine_symbol_table.has_name( name) obj_in_class_symbol_table = self.class_symbol_table.has_name(name) # If the current token is an object, we need to do something special. # Specifically, we need to push the current object onto the stack. # # In a sense, we're converting our object-oriented Jack code into # procedural code. # # myObj.doAThing(a, b) -> doAThing(myObj, a, b) # # From a VM perspective, the procedural version is easier to compile. if obj_in_sub_symbol_table or obj_in_class_symbol_table: arg_count += 1 # First, we'll for the object identifier in the subroutine symbol table. if obj_in_sub_symbol_table: # Push the object to the stack. self.vm_writer.write_push( self.subroutine_symbol_table.kind_of(name), self.subroutine_symbol_table.index_of(name)) # We'll need to replace our current name with the object's type (aka class). name = self.subroutine_symbol_table.type_of(name) # Next, we'll check for the object identifier in the class symbol table. elif obj_in_class_symbol_table: kind = self.class_symbol_table.kind_of(name) kind = "this" if kind == "field" else kind # Push the object to the stack. self.vm_writer.write_push( kind, self.class_symbol_table.index_of(name)) # We'll need to replace our current name with the object's type (aka class). name = self.class_symbol_table.type_of(name) name += "." # At this point, we can be confident that we're at the method name. self.tokenizer.advance() self.assert_identifier() name += self.tokenizer.current_token self.tokenizer.advance() else: # If we hit this code, then we've encounted a subroutine call without a prefix. # Example: doAThing() # We can assume that this is a method call and never a function call. has_prefix = False # VM function calls are always of the format Class.subroutine # Therefore, we'll need to prepend the current class's name to the subroutine identifier. name = f"{self.current_class_name}.{name}" self.assert_symbol('(') # We'll need to compile every expression inside of the subroutine call. # # We'll also get the number of expressions in the call, # which will increase our argument counter. # # Example: myObj.doAThing(exp1, exp2, exp3...) self.tokenizer.advance() arg_count += self.compile_expression_list() self.assert_symbol(')') if not has_prefix: # If this subroutine didn't have a prefix, we're assuming it's a method call. # Method calls always take at least one argument: the object itself. # We'll need to push that object onto the stack. self.vm_writer.write_push("pointer", 0) # We should also increment arg_count to account for the object itself. arg_count += 1 # FINALLY, we can write our VM code! self.vm_writer.write_call(name, arg_count) def compile_subroutine_dec(self): self.assert_keyword(['constructor', 'method', 'function']) self.subroutine_type = self.tokenizer.current_token self.tokenizer.advance() self.assert_return_type() return_type = self.tokenizer.current_token # Methods are unique, since they implicitly imply an extra parameter: the object itself. # We'll add the object to the subroutine symbol table as "this". if self.subroutine_type == 'method': self.subroutine_symbol_table.define('this', return_type, 'argument') self.tokenizer.advance() self.assert_identifier() self.current_subroutine_name = self.tokenizer.current_token self.tokenizer.advance() self.assert_symbol('(') self.tokenizer.advance() self.compile_parameter_list() # compile_parameter_list() should have already advanced to ")" for us. self.assert_symbol(')') self.compile_subroutine_body() def compile_term(self): # We need to compile each individual term to VM code as needed. # # The definition for "term" in this context is quite broad, # so bear with me as we go through each possible term! # First, if we have an identifier on our hands, we'll need to peek one token ahead. # # The token ahead could be one of the following: # - a period, indicating that the identifier is a class name or object # - a left parens, indicating that the identifier is a subroutine # - a left bracket, indiciating that the identifier is an array if self.tokenizer.identifier() and self.tokenizer.peek() in [ '.', '(', '[' ]: next_token = self.tokenizer.peek() # The next token is either a period or left parens, # which means we're in a subroutine call! # # Examples: Memory.alloc(), myObj.doAThing(), doSomethingElse() if next_token in ['.', '(']: self.compile_subroutine_call() # The next token is a left bracket, which means # we're trying to access an array. # # Examples: myArray[3], myArray[x + (y - 2)] elif next_token == '[': # TODO: Handle identifier. self.tokenizer.advance() self.assert_symbol('[') self.tokenizer.advance() self.compile_expression() self.assert_symbol(']') # Let's check if the current token is a unary operation, # such as "-" (negate, or neg) or "~" (not). # # Examples: -3, ~(~(x)) elif self.tokenizer.unary_op(): unary_op = self.tokenizer.current_token self.tokenizer.advance() self.compile_term() self.vm_writer.write_unary_op(unary_op) # We can always have expressions inside of parentheses. # We can treat this like its own term. # Examples: (x + 3), ((x + 2) > 9) elif self.tokenizer.current_token == '(': self.tokenizer.advance() self.compile_expression() self.assert_symbol(')') # Now we've reached some simpler terms! # If we encounter a number, we simply write "push constant {number}". elif self.tokenizer.int_val() or self.tokenizer.int_val() == 0: self.vm_writer.write_push("constant", self.tokenizer.current_token) # We need to consider some special keyword expressions. # Most of keywords ultimately resolve to simple "push constant" VM commands. elif self.tokenizer.keyword(): # null and false keywords map to constant 0. if self.tokenizer.current_token in ["null", "false"]: self.vm_writer.write_push("constant", 0) # The true keyword maps to constant -1. elif self.tokenizer.current_token == "true": self.vm_writer.write_push("constant", 1) self.vm_writer.write_command("neg") # The this keyword indicates a reference to the current object in the THIS address. elif self.tokenizer.current_token == "this": self.vm_writer.write_push("pointer", 0) # If we have an identifer at this point, we can safely assume that # its a standalone variable, not part of a subroutine call or array access. # # We will leverage our symbol tables to write the VM code here. elif self.tokenizer.identifier(): name = self.tokenizer.current_token # First, let's check the subroutine symbol table for the identifier. if self.subroutine_symbol_table.has_name(name): # If we find it, we can now write the VM push code. self.vm_writer.write_push( self.subroutine_symbol_table.kind_of(name), self.subroutine_symbol_table.index_of(name)) # Next, we'll check the class symbol table for the identifier. elif self.class_symbol_table.has_name(name): # We need to make a small tweak to address _field_s. kind = self.class_symbol_table.kind_of(name) kind = "this" if kind == "field" else kind # We can now write the VM push code. self.vm_writer.write_push( kind, self.class_symbol_table.index_of(name)) else: raise AssertionError(f"Unknown identifier: {name}") # TODO # For strings, we'll need to call String.new() and String.appendChar(). elif self.tokenizer.string_val(): pass else: raise AssertionError( f"Unsure how to handle parse the current token as a term: {self.tokenizer.current_token}" ) def compile_var_dec(self): # We'll keep a running tally of the local variable count. # This is necessary for function declarations in VM code. var_count = 1 self.assert_keyword('var') kind = 'local' self.tokenizer.advance() typ = self.tokenizer.current_token self.tokenizer.advance() name = self.tokenizer.current_token self.tokenizer.advance() # We have the name, type, and kind (which should be local) # for this variable declaration. # # Let's add it to our symbol table! self.subroutine_symbol_table.define(name, typ, kind) # It's entirely possible that we have comma-separated var declarations. # Let's account for that. # Note that we'll increment the variable count for each new variable we find. while self.tokenizer.current_token != ';': self.assert_symbol(',') var_count += 1 self.tokenizer.advance() name = self.tokenizer.current_token self.tokenizer.advance() # We'll now add this variable to the symbol table. self.subroutine_symbol_table.define(name, typ, kind) # The compile_subroutine_body() method will find the var_count useful # for declaring the function as VM code. return var_count def compile_while_statement(self): self.assert_keyword('while') # We'll increment while_counter for VM labeling. # This way, we'll have distinct labels for each while statement we encounter. self.while_counter += 1 label_1 = f"WHILE_STATEMENT_{self.while_counter}_A" label_2 = f"WHILE_STATEMENT_{self.while_counter}_B" # Let's write the first label. self.vm_writer.write_label(label_1) self.tokenizer.advance() self.assert_symbol('(') # Now we'll write the while's expression. self.tokenizer.advance() self.compile_expression() self.assert_symbol(')') # Let's write the not and if-goto statements. self.vm_writer.write_command("not") self.vm_writer.write_if(label_2) self.tokenizer.advance() self.assert_symbol('{') # Let's write the while's inner statements. self.tokenizer.advance() self.compile_statements() self.assert_symbol('}') # Write the goto statement. self.vm_writer.write_goto(label_1) # Finally, let's write the second label. self.vm_writer.write_label(label_2)
from symbol_table import SymbolTable, SymbolKind # Create a new symbol table s1 = SymbolTable() # Insert some test data s1.define("a", "int", SymbolKind.ARG) s1.define("b", "int", SymbolKind.ARG) s1.define("c", "String", SymbolKind.FEILD) s1.define("d", "Point", SymbolKind.ARG) # Test var index assert s1.get_index_of("a") == 0 assert s1.get_index_of("b") == 1 assert s1.get_index_of("c") == 0 assert s1.get_index_of("d") == 2 # Test var kind assert s1.get_kind_of("d") == SymbolKind.ARG assert s1.get_kind_of("c") == SymbolKind.FEILD # When symbol is not defined assert s1.get_kind_of("e") == SymbolKind.NONE assert s1.get_kind_of("spam") == SymbolKind.NONE # Test var type assert s1.get_type_of("a") == "int" assert s1.get_type_of("c") == "String" assert s1.get_type_of("d") == "Point" print("All assertions are True!")
class CompilationEngine: def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable() def compile(self): root = self._compileClass() return root def _compileClass(self): classE = Element(ELEMENTS.CLASS) self._readKeyword(classE, ELEMENTS.CLASS) self.className = self._readIdentifier(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileClassVarDec(classE) self._compileSubroutine(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE) return classE def _compileClassVarDec(self, parent): while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES: classVarDecE = Element(ELEMENTS.CLASSVARDEC) self._readKeyword(classVarDecE) self._readType(classVarDecE) self._readIdentifier(classVarDecE) while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA): self._readIdentifier(classVarDecE) self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON) parent.append(classVarDecE) def _compileSubroutine(self, parent): while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES: subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC) function_type = self._readKeyword(subroutineDecE) self._readReturnType(subroutineDecE) self.methodName = self._readIdentifier(subroutineDecE) self._symbol_table.startSubroutine(self.className, self.methodName) if function_type == _SUBROUTINEDEC.METHOD: self._symbol_table.define("this", self.className, SYM_KINDS.ARG) self._uid = -1 self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN) self._compileParameters(subroutineDecE) self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE) self._compileSubroutineBody(subroutineDecE, function_type) parent.append(subroutineDecE) def _gen_label(self, type_): self._uid += 1 return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid) def _gen_labels(self, *parts): self._uid += 1 return [ "%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts ] def _compileSubroutineBody(self, parent, function_type): bodyE = Element(ELEMENTS.SUBROUTINEBODY) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN) nArgs = self._compileVarDec(bodyE) function_name = parent[2].text function_full_name = "%s.%s" % (self.className, function_name) self.writer.writeFunction(function_full_name, nArgs) if function_type == _SUBROUTINEDEC.CONSTRUCTOR: field_count = self._symbol_table.varCount(SYM_KINDS.FIELD) self.writer.writePush(SEGMENT.CONST, field_count) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop(SEGMENT.POINTER, 0) elif function_type == _SUBROUTINEDEC.METHOD: self.writer.writePush(SEGMENT.ARG, 0) self.writer.writePop(SEGMENT.POINTER, 0) self._compileStatements(bodyE) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE) parent.append(bodyE) def _compileStatements(self, parent): statementsE = Element(ELEMENTS.STATEMENTS) while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES: if self.nextTok.value == _STATEMENTS.LET: statementE = Element(ELEMENTS.STATEMENT_LET) self._readKeyword(statementE) identifier = self._readIdentifier(statementE) is_array = False if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN): is_array = True self._compileExpression(statementE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE) self._readSymbol(statementE, _SYMBOLS.EQUAL) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) if is_array: self.writer.writePop(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.THAT, 0) else: self.writer.writePop(*self._identifier_data(identifier)) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.IF: label_else, label_end = self._gen_labels("if.else", "if.end") statementE = Element(ELEMENTS.STATEMENT_IF) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_else) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeGoto(label_end) self.writer.writeLabel(label_else) if self._readKeywordOptional(statementE, _KEYWORDS.ELSE): self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeLabel(label_end) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.WHILE: label_start, label_end = self._gen_labels( "while.start", "while.end") self.writer.writeLabel(label_start) statementE = Element(ELEMENTS.STATEMENT_WHILE) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_end) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) statementsE.append(statementE) self.writer.writeGoto(label_start) self.writer.writeLabel(label_end) elif self.nextTok.value == _STATEMENTS.DO: self._compileDo(statementsE) elif self.nextTok.value == _STATEMENTS.RETURN: statementE = Element(ELEMENTS.STATEMENT_RETURN) self._readKeyword(statementE) if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON): self._compileExpression(statementE) else: self.writer.writePush(SEGMENT.CONST, 0) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeReturn() statementsE.append(statementE) if len(statementsE) == 0: statementsE.text = "\n" parent.append(statementsE) def _compileExpression(self, parent): expressionE = Element(ELEMENTS.EXPRESSION) self._readTerm(expressionE) while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS: symbol = self._readSymbol(expressionE) self._readTerm(expressionE) self.writer.writeArithmetic(symbol) parent.append(expressionE) def _compileExpressionList(self, parent): self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN) expListE = Element(ELEMENTS.EXPRESSION_LIST) nArgs = 0 while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE): self._compileExpression(expListE) self._readSymbolOptional(expListE, _SYMBOLS.COMMA) nArgs += 1 # hack for TextComparer if len(expListE) == 0: expListE.text = "\n" parent.append(expListE) self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE) return nArgs def _compileDo(self, parent): statementE = Element(ELEMENTS.STATEMENT_DO) self._readKeyword(statementE, _STATEMENTS.DO) identifier = self._readIdentifier(statementE) nArgs = 0 if self._readSymbolOptional(statementE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (self.className, identifier) self.writer.writePush(SEGMENT.POINTER, 0) nArgs += 1 nArgs += self._compileExpressionList(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeCall(identifier, nArgs) self.writer.writePop(SEGMENT.TEMP, 0) parent.append(statementE) def _compileVarDec(self, parent): nArgs = 0 while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR: varDecE = Element(ELEMENTS.VAR_DEC) self._readKeyword(varDecE, _KEYWORDS.VAR) self._readType(varDecE) self._readIdentifier(varDecE) nArgs += 1 while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA): self._readIdentifier(varDecE) nArgs += 1 self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON) parent.append(varDecE) return nArgs def _compileParameters(self, parent): paramListE = Element(ELEMENTS.PARAM_LIST) while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES ) or self.nextTok.type == tokenizor.IDENTIFIER: self._readType(paramListE) self._readIdentifier(paramListE) self._readSymbolOptional(paramListE, _SYMBOLS.COMMA) if len(paramListE) == 0: paramListE.text = "\n" parent.append(paramListE) ############################## ########## READ ############## ############################## def _readTerm(self, parent): termE = Element(ELEMENTS.TERM) if self.nextTok.type == tokenizor.INTEGER: self.next() termE.append( _leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value)) self.writer.writePush(SEGMENT.CONST, self.tok.value) elif self.nextTok.type == tokenizor.STRING: self.next() termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value)) string_value = self.tok.value self.writer.writePush(SEGMENT.CONST, len(string_value)) self.writer.writeCall("String.new", 1) for char in string_value: self.writer.writePush(SEGMENT.CONST, ord(char)) self.writer.writeCall("String.appendChar", 2) elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS: self.next() termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) _KW_CONT_WRITE[self.tok.value](self.writer) elif self.nextTok.type == tokenizor.IDENTIFIER: identifier = self._readIdentifier(termE) nArgs = 0 if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN): self._compileExpression(termE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.THAT, 0) self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: nArgs = self._compileExpressionList(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeCall(identifier, nArgs) elif self._readSymbolOptional(termE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(termE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(termE)) nArgs += self._compileExpressionList(termE) self.writer.writeCall(identifier, nArgs) else: self.writer.writePush(*self._identifier_data(identifier)) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: self.next() termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._compileExpression(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS: self.next() sym = self.tok.value termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._readTerm(termE) self.writer.writeArithmeticUnary(sym) else: raise self._syntaxError("Unexpected %s." % self.tok.value) parent.append(termE) def _identifier_data(self, identifier): return _SEG_TRANSLATE[self._symbol_table.kindOf( identifier)], self._symbol_table.indexOf(identifier) def _readIdentifier(self, parent): self.next() self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER) name = self.tok.value element = _leafElement(ELEMENTS.IDENTIFIER, name) type_ = self._symbol_table.typeOf(name) kind = None index = None if type_ is None: if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1: type_ = parent[1].text kind = _SYM_KIND_MAP[parent[0].text] elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0: type_ = parent[-1].text kind = SYM_KINDS.ARG if kind is not None: index = self._symbol_table.define(name, type_, kind) else: type_ = self._symbol_table.typeOf(name) kind = self._symbol_table.kindOf(name) index = self._symbol_table.indexOf(name) if kind is not None: element.set("type", type_) element.set("kind", str(kind)) element.set("index", str(index)) parent.append(element) return name def _readType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readReturnType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readSymbol(self, parent, expected=None): self.next() expectedStr = expected if expected is not None else ELEMENTS.SYMBOL self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return self.tok.value def _readKeyword(self, parent, expected=None): self.next() expectedStr = expected if expected is not None else ELEMENTS.KEYWORD self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return self.tok.value def _readSymbolOptional(self, parent, expected): if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return True return False def _readKeywordOptional(self, parent, expected): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return True return False def next(self): self.tok = self.iter.next() self.nextTok = self.iter.lookahead() def _assertToken(self, tok, expected_str, type_=None, value_=None): if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_): raise self._syntaxError( "Expected %s but found %s" % (expected_str, tok.value), tok) def _syntaxError(self, msg, tok=None): if tok is None: tok = self.tok return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] BINARY_OPERATORS_TO_COMMAND = { '+': 'add', '-': 'sub', '=': 'eq', '>': 'gt', '<': 'lt', '&': 'and', '|': 'or' } UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'} TYPE_TO_TAG = { 'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol' } SYMBOLS_TO_XML_CONVENTION = { '<': '<', '>': '>', '&': '&', '"': '"' } def __init__(self, input_file_path, vm_writer: VMWriter): self.jack_tokenizer = JackTokenizer(input_file_path) self.symbol_table = SymbolTable() self.vm_writer = vm_writer if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'CLASS') self.jack_tokenizer.advance() self.jack_tokenizer.advance() if self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.jack_tokenizer.advance() self.vm_writer.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[ self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: kind = '' if self.jack_tokenizer.key_word() == 'field': kind = 'FIELD' elif self.jack_tokenizer.key_word() == 'static': kind = 'STATIC' self.jack_tokenizer.advance() field_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() != ';': self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() self.jack_tokenizer.advance() def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): self.vm_writer.zero_branching_indexes() while self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.symbol_table.start_subroutine() constructor = True if self.jack_tokenizer.key_word( ) == 'constructor' else False method = False if self.jack_tokenizer.key_word() == 'method': method = True self.symbol_table.define('this', self.symbol_table.get_class_name(), 'ARG') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'SUBROUTINE') name = self.symbol_table.get_class_name( ) + '.' + self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_parameter_list() self.jack_tokenizer.advance() self.jack_tokenizer.advance() var_num = 0 while self.jack_tokenizer.key_word() == 'var': var_num += self.compile_var_dec() self.vm_writer.write_function(name, var_num) if method: self.vm_writer.write_push('ARG', 0) self.vm_writer.write_pop('POINTER', 0) elif constructor: field_count = self.symbol_table.var_count('FIELD') self.vm_writer.write_push('CONST', field_count) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('POINTER', 0) self.compile_statements() self.jack_tokenizer.advance() def compile_parameter_list(self): if self.jack_tokenizer.symbol() != ')': parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": self.jack_tokenizer.advance() parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() def get_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': parameter_type = self.jack_tokenizer.key_word() elif self.jack_tokenizer.token_type() == 'IDENTIFIER': parameter_type = self.jack_tokenizer.identifier() return parameter_type def compile_var_dec(self): var_num = 1 self.jack_tokenizer.advance() var_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": var_num += 1 self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() self.jack_tokenizer.advance() return var_num def compile_statements(self): while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() def compile_do(self): self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.compile_subroutine_call(name) # must dispose of void function return value self.vm_writer.write_pop('TEMP', 0) self.jack_tokenizer.advance() def compile_subroutine_call(self, prefix_call=''): if self.jack_tokenizer.symbol() == '(': subroutine = False # If not in symbol table - then subroutine if not self.symbol_table.kind_of( prefix_call) or self.symbol_table.kind_of( prefix_call) == 'SUBROUTINE': subroutine = True self.jack_tokenizer.advance() args_count = 0 if subroutine: self.vm_writer.write_push('POINTER', 0) args_count += 1 args_count += self.compile_expression_list() if subroutine: self.vm_writer.write_call( self.symbol_table.get_class_name() + '.' + prefix_call, args_count) else: self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol() == '.': variable = False self.jack_tokenizer.advance() if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']: variable = True variable_name = prefix_call prefix_call = self.symbol_table.type_of(prefix_call) prefix_call += '.{0}'.format(self.jack_tokenizer.identifier()) self.jack_tokenizer.advance() self.jack_tokenizer.advance() args_count = 0 if variable: self.vm_writer.write_push( self.symbol_table.kind_of(variable_name), self.symbol_table.index_of(variable_name)) args_count += 1 args_count += self.compile_expression_list() self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() def compile_let(self): self.jack_tokenizer.advance() var_name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic("add") self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop('TEMP', 0) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('TEMP', 0) self.vm_writer.write_pop('THAT', 0) else: self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() def compile_while(self): while_idx = self.vm_writer.get_next_label_index('while') if_label = 'WHILE_IF_{0}'.format(while_idx) end_label = 'WHILE_END_{0}'.format(while_idx) self.vm_writer.write_label(if_label) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(end_label) self.compile_statements() self.vm_writer.write_goto(if_label) self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_return(self): self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() != ';': self.compile_expression() else: self.vm_writer.write_push('CONST', 0) self.vm_writer.write_return() self.jack_tokenizer.advance() def compile_if(self): if_idx = self.vm_writer.get_next_label_index('if') else_label = 'IF_ELSE_{0}'.format(if_idx) end_label = 'IF_END_{0}'.format(if_idx) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(else_label) self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_goto(end_label) self.vm_writer.write_label(else_label) if self.jack_tokenizer.key_word() == 'else': self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_expression(self): self.compile_term() while self.jack_tokenizer.symbol( ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() self.jack_tokenizer.advance() self.compile_term() if symbol in self.BINARY_OPERATORS_TO_COMMAND: self.vm_writer.write_arithmetic( self.BINARY_OPERATORS_TO_COMMAND[symbol]) elif symbol == '*': self.vm_writer.write_call('Math.multiply', 2) elif symbol == '/': self.vm_writer.write_call('Math.divide', 2) def compile_term(self): token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol( ) == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call(name) elif self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() self.vm_writer.write_arithmetic('add') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) else: kind = self.symbol_table.kind_of(name) idx = self.symbol_table.index_of(name) self.vm_writer.write_push(kind, idx) elif token_type == 'STRING_CONST': string_const = self.jack_tokenizer.string_val() self.vm_writer.write_push("CONST", len(string_const)) self.vm_writer.write_call("String.new", 1) for char in string_const: self.vm_writer.write_push('CONST', ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.jack_tokenizer.advance() elif token_type == 'KEYWORD': keyword = self.jack_tokenizer.key_word() if keyword == 'true': self.vm_writer.write_push('CONST', 1) self.vm_writer.write_arithmetic('neg') elif keyword == 'false' or keyword == 'null': self.vm_writer.write_push('CONST', 0) elif keyword == 'this': self.vm_writer.write_push('POINTER', 0) self.jack_tokenizer.advance() elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol( ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS: command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[ self.jack_tokenizer.symbol()] self.jack_tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(command) elif token_type == 'INT_CONST': self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val()) self.jack_tokenizer.advance() def compile_expression_list(self): expression_count = 0 if self.jack_tokenizer.symbol() != ')': self.compile_expression() expression_count += 1 while self.jack_tokenizer.symbol() == ',': self.jack_tokenizer.advance() self.compile_expression() expression_count += 1 return expression_count
class Compiler(object): def __init__(self, file_address, compile_address, vm=False): self.here = False self.file_object = open(file_address, 'rb') self.compiled = open(compile_address, 'wb') first_line = self.advance() self.current_line = first_line self.nest_level = 0 self.vm = vm self.SYMBOL_TABLE = SymbolTable() def get_xml_value(self): line = self.current_line start = line.find('>') end = line.find('</') return line[start+1:end-1].strip() # + and - for spaces wrapping the value def format_and_write_line(self, dict_=None): if not self.vm: if dict_: return self.compiled.write("{0}{1}{2}\n".format(" "*self.nest_level*2, self.current_line, dict_)) else: return self.compiled.write("{0}{1}\n".format(" "*self.nest_level*2, self.current_line)) def words_exist(self, words): for word in words: if self.current_line.replace('</', '').find(word) != -1: continue else: return False return True def open_tag(self, tag_name): if not self.vm: self.compiled.write("{0}{1}\n".format(" "*self.nest_level*2,"<{}>".format(tag_name))) self.nest_level += 1 def close_tag(self, tag_name): if not self.vm: self.nest_level -= 1 self.compiled.write("{0}{1}\n".format(" "*self.nest_level*2,"</{}>".format(tag_name))) def advance(self): new_line = self.file_object.readline() if new_line == '': return else: self.current_line = new_line.strip() return def compileClass(self): self.open_tag("class") self.advance() if self.words_exist(['keyword','class']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['identifier']): # gotta regex for the name too self.format_and_write_line({'category': 'class', 'defined': True, 'kind':None, 'index': None}) self.SYMBOL_TABLE.class_name = self.get_xml_value() self.advance() else: raise if self.words_exist(['symbol', '{']): self.format_and_write_line() self.advance() else: raise while self.words_exist(['keyword', 'static']) or self.words_exist(['keyword', 'field']): self.compileClassVarDec() while self.words_exist(['keyword', 'function']) or self.words_exist(['keyword', 'constructor']) or self.words_exist(['keyword', 'method']): self.compileSubroutine() if self.words_exist(['symbol', '}']): self.format_and_write_line() self.advance() else: raise self.close_tag("class") def compileClassVarDec(self): self.open_tag("classVarDec") if self.words_exist(['keyword', 'static']) or self.words_exist(['keyword', 'field']): self.format_and_write_line() kind = self.get_xml_value() self.advance() else: raise if self.words_exist(['int']) or self.words_exist(['char']) or self.words_exist(['boolean']) or self.words_exist(['identifier']): type_ = self.get_xml_value() self.format_and_write_line() self.advance() else: raise if self.words_exist(['identifier']): name = self.get_xml_value() self.SYMBOL_TABLE.define(name, type_, kind) self.format_and_write_line({'category': kind, 'defined': True, 'kind':kind, 'index': self.SYMBOL_TABLE.index_of(name)}) self.advance() else: raise has_next = lambda: self.current_line.find(',') != -1 while has_next(): if self.words_exist(['symbol', ',']): self.format_and_write_line() self.advance() if self.words_exist(['identifier']): name = self.get_xml_value() self.SYMBOL_TABLE.define(name, type_, kind) self.format_and_write_line({'category': kind, 'defined': True, 'kind':kind, 'index': self.SYMBOL_TABLE.index_of(name)}) self.advance() if self.words_exist(['symbol', ';']): self.format_and_write_line() self.advance() self.close_tag('classVarDec') def compileSubroutine(self): self.open_tag("subroutineDec") n_params = 0 self.SYMBOL_TABLE.start_subroutine() if self.words_exist(['keyword', 'constructor']) or self.words_exist(['keyword', 'function']) or self.words_exist(['keyword', 'method']): self.format_and_write_line() self.advance() else: raise # void, int etc are keywords, class names are identifiers # Here is where we should set a flag if we need to return 0 on void functions if self.words_exist(['keyword']) or self.words_exist(['identifier']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['identifier']): self.format_and_write_line({'category': 'subroutine', 'defined': True, 'kind':None, 'index': None}) self.SYMBOL_TABLE.subroutine_name = self.get_xml_value() self.advance() else: raise if self.words_exist(['symbol', '(' ]): self.format_and_write_line() self.advance() else: raise # no raise needed here cause its optional if self.words_exist(['keyword']): # we have parameters n_params = self.compileParameterList() else: self.open_tag('parameterList') self.close_tag('parameterList') if self.words_exist(['symbol',')']): self.format_and_write_line() self.advance() if self.words_exist(['{']): self.compileSubroutineBody() self.close_tag("subroutineDec") def compileParameterList(self): self.open_tag('parameterList') n_params = 0 has_next = True while has_next: if self.words_exist(['identifier']) or self.words_exist(['keyword']): type_ = self.get_xml_value() self.format_and_write_line() self.advance() else: raise if self.words_exist(['identifier']): name = self.get_xml_value() self.SYMBOL_TABLE.define(name, type_, 'arg', n_params) self.format_and_write_line({'category': 'ARG', 'defined':True, 'kind': self.SYMBOL_TABLE.kind_of(name), 'index':self.SYMBOL_TABLE.index_of(name)}) self.advance() else: raise has_next = False n_params += 1 if self.words_exist([',']): self.format_and_write_line() self.advance() has_next = True self.close_tag('parameterList') return n_params def compileSubroutineBody(self): self.open_tag('subroutineBody') if self.words_exist(['{']): self.format_and_write_line() self.advance() else: raise while self.words_exist(['keyword','var']): self.compileVarDec() # generate code for function definition after counting the number of local vars if self.vm: self.compiled.write( VMWriter.write_function( '{0}.{1}'.format(self.SYMBOL_TABLE.class_name, self.SYMBOL_TABLE.subroutine_name), self.SYMBOL_TABLE.var_count('var')) # local variables ) # no need to pop the args, the args are already on the stack, and # the arg address is altered by the Assembler to point to the corret # base while self.words_exist(['if']) or self.words_exist(['let']) or self.words_exist(['while']) or self.words_exist(['do']) or self.words_exist(['return']): self.compileStatements() if self.words_exist(['}']): self.format_and_write_line() self.advance() self.close_tag('subroutineBody') def compileVarDec(self): self.open_tag('varDec') if self.words_exist(['var']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['keyword']) or self.words_exist(['identifier']): type_=self.get_xml_value() self.format_and_write_line() self.advance() else: raise has_next = True while has_next: if self.words_exist(['identifier']): name = self.get_xml_value() self.SYMBOL_TABLE.define(name, type_, 'var') self.format_and_write_line({'category': 'VAR', 'defined': True, 'kind': self.SYMBOL_TABLE.kind_of(name), 'index': self.SYMBOL_TABLE.index_of(name)}) self.advance() has_next = False if self.words_exist(['symbol', ',']): self.format_and_write_line() self.advance() has_next = True if self.words_exist(['symbol', ';']): self.format_and_write_line() self.advance() else: raise self.close_tag('varDec') def compileStatements(self): self.open_tag('statements') while self.words_exist(['keyword', 'let']) or self.words_exist(['keyword', 'if']) or self.words_exist(['keyword', 'while']) or self.words_exist(['keyword', 'do']) or self.words_exist(['keyword', 'return']): if self.words_exist(['keyword', 'let']): self.compileLet() if self.words_exist(['keyword', 'if']): self.compileIf() if self.words_exist(['keyword', 'while']): self.compileWhile() if self.words_exist(['keyword', 'do']): self.compileDo() if self.words_exist(['keyword', 'return']): self.compileReturn() self.close_tag('statements') def compileDo(self): self.open_tag('doStatement') if self.words_exist(['keyword', 'do']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['identifier']): self.compileSubroutineCall() if self.words_exist(['symbol',';']): self.format_and_write_line() self.advance() self.close_tag('doStatement') if self.vm: self.compiled.write( VMWriter.write_pop('temp', 0) ) def compileSubroutineCall(self, identifier_compiled = False, identifier=None): # no tags # subroutineName, varName|className if self.words_exist(['identifier']) and not identifier_compiled: self.format_and_write_line({'category': 'subroutine', 'defined': False, 'kind': None, 'index':None}) subroutine_name = self.get_xml_value() self.advance() else: subroutine_name = identifier if self.words_exist(['symbol','(']): # subroutine call self.format_and_write_line() self.advance() self.compileExpressionList() if self.words_exist(['symbol', ')']): self.format_and_write_line() self.advance() elif self.words_exist(['symbol', '.']): self.format_and_write_line() subroutine_name += '.' self.advance() if self.words_exist(['identifier']): self.format_and_write_line({'category': 'subroutine', 'defined':False, 'kind':None, 'index':None}) subroutine_name += self.get_xml_value() self.advance() if self.words_exist(['symbol','(']): # subroutine call self.format_and_write_line() self.advance() # always compile expresionLists cause "nothing" is also an expressionList n_args = self.compileExpressionList() if self.words_exist(['symbol', ')']): self.format_and_write_line() self.advance() else: raise if self.vm: self.compiled.write( VMWriter.write_call(subroutine_name, n_args) ) def compileLet(self): self.open_tag('letStatement') if self.words_exist(['keyword', 'let']): self.format_and_write_line() self.advance() if self.words_exist(['identifier']): name = self.get_xml_value() type_ = 'int' # for lack of a better way to get this; the type will be whatever the expression returns kind = self.SYMBOL_TABLE.kind_of(name) or 'var' # always defined after a let self.SYMBOL_TABLE.define(name, type_, kind) self.format_and_write_line({'category': 'VAR', 'defined':True, 'kind': self.SYMBOL_TABLE.kind_of(name), 'index': self.SYMBOL_TABLE.index_of(name)}) self.advance() else: raise if self.words_exist(['symbol', '[']): self.format_and_write_line() self.advance() self.compileExpression() if self.words_exist(['symbol', ']']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['symbol', '=']): self.format_and_write_line() self.advance() else: raise self.compileExpression() if self.words_exist(['symbol', ';']): self.format_and_write_line() self.advance() else: raise if self.vm: # might need extending segment = {'var': 'local', 'arg': 'argument'} self.compiled.write( VMWriter.write_pop(segment[kind], self.SYMBOL_TABLE.index_of(name)) ) self.close_tag('letStatement') def compileWhile(self): self.open_tag('whileStatement') while_condition_address = str(randint(200, 500)) while_start_address = str(randint(200, 500)) while_end_address = str(randint(200, 500)) # this is the start address self.compiled.write( VMWriter.write_label(while_condition_address) ) if self.words_exist(['while', 'keyword']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['symbol', '(']): self.format_and_write_line() self.advance() else: raise self.compileExpression() if self.words_exist(['symbol', ')']): self.format_and_write_line() self.advance() else: raise # Stack: true or false # if true go to start self.compiled.write( VMWriter.write_if(while_start_address) ) # if not true then do this one, go to the end self.compiled.write( VMWriter.write_go_to(while_end_address) ) # this is the start address of the while block self.compiled.write( VMWriter.write_label(while_start_address) ) if self.words_exist(['symbol', '{']): self.format_and_write_line() self.advance() else: raise self.compileStatements() if self.words_exist(['symbol', '}']): self.format_and_write_line() self.advance() else: raise # return to beginning self.compiled.write( VMWriter.write_go_to(while_condition_address) ) # this is the end address self.compiled.write( VMWriter.write_label(while_end_address) ) self.close_tag('whileStatement') def compileReturn(self): self.open_tag('returnStatement') if self.words_exist(['return', 'keyword']): self.format_and_write_line() if self.vm: self.compiled.write( VMWriter.write_return() ) self.advance() else: raise if not self.words_exist([';']): self.compileExpression() if self.words_exist([';', 'symbol']): self.format_and_write_line() self.advance() self.close_tag('returnStatement') def compileIf(self): self.open_tag('ifStatement') if self.words_exist(['if', 'keyword']): self.format_and_write_line() self.advance() else: raise if self.words_exist(['symbol', '(']): self.format_and_write_line() self.advance() else: raise self.compileExpression() if self.words_exist(['symbol', ')']): self.format_and_write_line() self.advance() else: raise # Now the condition result is on the stack # we need to negate it to use it with if-goto else else_block = str(randint(500, 700)) exit_address = str(randint(500, 700)) self.compiled.write( VMWriter.write_arithmetic('~') ) self.compiled.write( VMWriter.write_if(else_block) ) if self.words_exist(['symbol', '{']): self.format_and_write_line() self.advance() else: raise self.compileStatements() if self.words_exist(['symbol', '}']): self.format_and_write_line() self.advance() else: raise self.compiled.write( VMWriter.write_go_to(exit_address) ) if self.words_exist(['else', 'keyword']): self.here = True # omg what is this hack? i have no recollection self.format_and_write_line() self.advance() self.compiled.write( VMWriter.write_label(else_block) ) if self.words_exist(['symbol', '{']): self.format_and_write_line() self.advance() else: raise self.compileStatements() if self.words_exist(['symbol', '}']): self.format_and_write_line() self.advance() else: raise self.compiled.write( VMWriter.write_label(exit_address) ) self.close_tag('ifStatement') def compileExpression(self): def get_condition(): res_list = [] for op in OPERATIONS: res_list.append(self.words_exist([op])) res = False for r in res_list: res = res or r return res self.open_tag('expression') self.compileTerm() while get_condition(): self.format_and_write_line() symbol = self.get_xml_value() self.advance() self.compileTerm() if self.vm: self.compiled.write( VMWriter.write_arithmetic(symbol) ) self.close_tag('expression') def compileTerm(self, operation=None): def get_condition(): res_list = [] for k in KEYWORD_CONSTANTS: res_list.append(self.words_exist([k])) res = False for r in res_list: res = res or r return res self.open_tag('term') if self.words_exist(['integerConstant']) or self.words_exist(['stringConstant']) or get_condition(): self.format_and_write_line() if self.vm: value = self.get_xml_value() if value == 'true': value = '1' # this might have consequence. PLUM operation = 'neg' elif value == 'false' or value == 'null': value = 0 self.compiled.write( VMWriter.write_push('constant', value) ) if operation: self.compiled.write( VMWriter.write_arithmetic(operation) ) self.advance() elif self.words_exist(['identifier']): name = self.get_xml_value() kind = self.SYMBOL_TABLE.kind_of(name) index = self.SYMBOL_TABLE.index_of(name) self.format_and_write_line({'category': None, 'defined':False, 'kind':kind, 'index':index}) self.advance() # THIS ONLY WORKS FOR SIMPLE IDENTIFIERS, should refactor for indexing arrays KIND_LOOKUP = {'static': 'static', 'field': 'this', 'arg': 'argument', 'var': 'local'} if kind is not None: self.compiled.write( VMWriter.write_push(KIND_LOOKUP[kind], index) ) # if there is a [ next if self.words_exist(['symbol', '[']): self.format_and_write_line() self.advance() self.compileExpression() if self.words_exist(['symbol', ']']): self.format_and_write_line() self.advance() else: raise # if there is a ( next subroutine call, it will leave its value on the stack elif self.words_exist(['(']) or self.words_exist(['.']): self.compileSubroutineCall(identifier_compiled=True, identifier=name) elif self.words_exist(['(', 'symbol']): self.format_and_write_line() self.advance() self.compileExpression() if self.words_exist([')', 'symbol']): self.format_and_write_line() self.advance() else: raise elif self.words_exist(['-']) or self.words_exist(['~']): if self.words_exist(['-']): operation = 'neg' else: operation = '~' self.format_and_write_line() self.advance() self.compileTerm(operation=operation) else: raise self.close_tag('term') def compileExpressionList(self): self.open_tag('expressionList') n_expressions = 0 has_next = (self.current_line.find(')') == -1) while has_next: self.compileExpression() n_expressions += 1 has_next = False if self.words_exist([',']): self.format_and_write_line() self.advance() has_next = True self.close_tag('expressionList') return n_expressions
class CompilationEngine: def __init__(self, token_stream, out_file, xml_name): ''' creates a new compilation engine with the given input and output. The next method called must be compileClass(). ''' self.stream = token_stream self.writer = VMWriter(out_file) self.symbols = SymbolTable() self.xml_name = xml_name self.root = ET.Element('class') self.stream.advance() assert self.stream.keyword() == 'class' def add_terminal(self, root, text): terminal = ET.SubElement(root, self.stream.token_type()) terminal.text = ' {text} '.format(text=text) if self.stream.has_more_tokens(): self.stream.advance() def compile_class(self): ''' compiles a complete class ''' self.add_terminal(self.root, self.stream.keyword()) self.class_name = self.stream.identifier() self.add_terminal(self.root, self.class_name) self.add_terminal(self.root, self.stream.symbol()) while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in CLASS_VARS: self.compile_class_var_dec() while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in SUBROUTINE_TYPES: self.compile_subroutine() self.add_terminal(self.root, self.stream.symbol()) def compile_class_var_dec(self): ''' compiles a static declaration or a field declaration. ''' class_var_root = ET.SubElement(self.root, CLASS_VAR_DEC) kind = self.stream.keyword() self.add_terminal(class_var_root, kind) if self.stream.token_type() == tokenizer.KEYWORD: type_name = self.stream.keyword() else: type_name = self.stream.identifier() self.add_terminal(class_var_root, type_name) name = self.stream.identifier() self.add_terminal(class_var_root, name) self.symbols.define(name, type_name, kind) while self.stream.symbol() == COMMA: self.add_terminal(class_var_root, self.stream.symbol()) name = self.stream.identifier() self.add_terminal(class_var_root, name) self.symbols.define(name, type_name, kind) self.add_terminal(class_var_root, self.stream.symbol()) def compile_subroutine(self): ''' compiles a complete method, function, or constructor. ''' subroutine_dec = ET.SubElement(self.root, SUBROUTINE_DEC) self.symbols.start_subroutine() subroutine_type = self.stream.keyword() if subroutine_type in ['method', 'constructor']: self.symbols.define('this', self.class_name, 'argument') self.add_terminal(subroutine_dec, subroutine_type) if self.stream.token_type() == tokenizer.KEYWORD: self.add_terminal(subroutine_dec, self.stream.keyword()) else: self.add_terminal(subroutine_dec, self.stream.identifier()) name = self.stream.identifier() self.add_terminal(subroutine_dec, name) self.add_terminal(subroutine_dec, self.stream.symbol()) self.compile_parameter_list(subroutine_dec) self.add_terminal(subroutine_dec, self.stream.symbol()) subroutine_body = ET.SubElement(subroutine_dec, SUBROUTINE_BODY) self.add_terminal(subroutine_body, self.stream.symbol()) while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == VAR: self.compile_var_dec(subroutine_body) func_name = '{cls}.{sub}'.format( cls=self.class_name, sub=name) self.writer.write_function(func_name, self.symbols.var_count('var')) self.compile_statements(subroutine_body) self.add_terminal(subroutine_body, self.stream.symbol()) def compile_parameter_list(self, root): ''' compiles a (possibly empty) parameter list, not including the enclosing “()”. ''' parameter_list_root = ET.SubElement(root, PARAMETER_LIST) if self.stream.token_type() != tokenizer.SYMBOL: type_name = self.stream.keyword() self.add_terminal(parameter_list_root, type_name) name = self.stream.identifier() self.add_terminal(parameter_list_root, name) self.symbols.define(name, type_name, 'argument') while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == COMMA: self.add_terminal(parameter_list_root, self.stream.symbol()) type_name = self.stream.keyword() self.add_terminal(parameter_list_root, type_name) name = self.stream.identifier() self.add_terminal(parameter_list_root, name) self.symbols.define(name, type_name, 'argument') def compile_var_dec(self, root): ''' compiles a var declaration ''' var_dec_root = ET.SubElement(root, VAR_DEC) self.add_terminal(var_dec_root, self.stream.keyword()) type_name = None if self.stream.token_type() == tokenizer.IDENTIFIER: type_name = self.stream.identifier() else: type_name = self.stream.keyword() self.add_terminal(var_dec_root, type_name) name = self.stream.identifier() self.add_terminal(var_dec_root, name) self.symbols.define(name, type_name, 'var') while self.stream.symbol() == COMMA: self.add_terminal(var_dec_root, self.stream.symbol()) name = self.stream.identifier() self.add_terminal(var_dec_root, name) self.symbols.define(name, type_name, 'var') self.add_terminal(var_dec_root, self.stream.symbol()) def compile_statements(self, root): ''' compiles a sequence of statements, not including the enclosing “{}”. ''' statements_root = ET.SubElement(root, STATEMENTS) while self.stream.token_type() == tokenizer.KEYWORD: keyword = self.stream.keyword() if keyword == 'let': self.compile_let(statements_root) elif keyword == 'if': self.compile_if(statements_root) elif keyword == 'while': self.compile_while(statements_root) elif keyword == 'do': self.compile_do(statements_root) elif keyword == 'return': self.compile_return(statements_root) else: assert False, 'unsupported keyword {keyword}'.format(keyword=keyword) def compile_do(self, root): ''' compiles a do statement ''' do_root = ET.SubElement(root, DO) self.add_terminal(do_root, self.stream.keyword()) self.compile_subroutine_call(do_root) self.writer.write_pop('temp', 0) self.add_terminal(do_root, self.stream.symbol()) def compile_let(self, root): ''' compiles a let statement ''' let_root = ET.SubElement(root, LET) self.add_terminal(let_root, self.stream.keyword()) lhs = self.stream.identifier() self.add_terminal(let_root, lhs) if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == OPEN_BRACKET: self.add_terminal(let_root, self.stream.symbol()) self.compile_expression(let_root) self.add_terminal(let_root, self.stream.symbol()) self.add_terminal(let_root, self.stream.symbol()) self.compile_expression(let_root) self.add_terminal(let_root, self.stream.symbol()) self.writer.write_pop(self.symbols.kind_of(lhs), self.symbols.index_of(lhs)) def compile_while(self, root): ''' compiles a while statement ''' while_root = ET.SubElement(root, WHILE) while_expression = self.symbols.generate_label('WHILE_EXP') while_end = self.symbols.generate_label('WHILE_END') self.add_terminal(while_root, self.stream.keyword()) self.add_terminal(while_root, self.stream.symbol()) self.writer.write_label(while_expression) self.compile_expression(while_root) self.writer.write_arithmetic('not') self.writer.write_if(while_end) self.add_terminal(while_root, self.stream.symbol()) self.add_terminal(while_root, self.stream.symbol()) self.compile_statements(while_root) self.writer.write_goto(while_expression) self.writer.write_label(while_end) self.add_terminal(while_root, self.stream.symbol()) def compile_return(self, root): ''' compiles a return statement ''' return_root = ET.SubElement(root, RETURN) self.add_terminal(return_root, self.stream.keyword()) if self.stream.token_type() != tokenizer.SYMBOL: self.compile_expression(return_root) else: self.writer.write_push('constant', 0) self.writer.write_return() self.add_terminal(return_root, self.stream.symbol()) def compile_if(self, root): ''' compiles an if statement ''' if_root = ET.SubElement(root, IF) if_label = self.symbols.generate_label('IF_TRUE') else_label = self.symbols.generate_label('IF_FALSE') end_label = self.symbols.generate_label('IF_END') self.add_terminal(if_root, self.stream.keyword()) self.add_terminal(if_root, self.stream.symbol()) self.compile_expression(if_root) self.writer.write_if(if_label) self.writer.write_goto(else_label) self.writer.write_label(if_label) self.add_terminal(if_root, self.stream.symbol()) self.add_terminal(if_root, self.stream.symbol()) self.compile_statements(if_root) self.writer.write_goto(end_label) self.add_terminal(if_root, self.stream.symbol()) self.writer.write_label(else_label) if self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == 'else': self.add_terminal(if_root, self.stream.keyword()) self.add_terminal(if_root, self.stream.symbol()) self.compile_statements(if_root) self.add_terminal(if_root, self.stream.symbol()) self.writer.write_label(end_label) def compile_expression(self, root): ''' compiles an expression ''' expression_root = ET.SubElement(root, EXPRESSION) self.compile_term(expression_root) while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() in OPS: operator = self.stream.symbol() self.add_terminal(expression_root, operator) self.compile_term(expression_root) if operator == '+': self.writer.write_arithmetic('add'), if operator == '-': self.writer.write_arithmetic('sub'), if operator == '*': self.writer.write_call('Math.multiply', 2), if operator == '/': self.writer.write_call('Math.divide', 2), if operator == '&': self.writer.write_arithmetic('and'), if operator == '|': self.writer.write_arithmetic('or'), if operator == '<': self.writer.write_arithmetic('lt'), if operator == '>': self.writer.write_arithmetic('gt'), if operator == '=': self.writer.write_arithmetic('eq') def compile_term(self, root): ''' compiles a term. This method is faced with a slight difficulty when trying to decide between some of the alternative rules. Specifically, if the current token is an identifier, it must still distinguish between a variable, an array entry, and a subroutine call. The distinction can be made by looking ahead one extra token. A single look-ahead token, which may be one of “[“, “(“, “.”, suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. ''' term_root = ET.SubElement(root, TERM) token_type = self.stream.token_type() if token_type == tokenizer.INT: val = self.stream.int_val() self.add_terminal(term_root, val) self.writer.write_push('constant', val) elif token_type == tokenizer.STRING: val = self.stream.string_val() self.add_terminal(term_root, val) #TODO I think it's a character by character push, ugh self.writer.write_push('constant', val) elif token_type == tokenizer.KEYWORD and self.stream.keyword() in KEYWORD_CONSTANTS: keyword = self.stream.keyword() self.add_terminal(term_root, keyword) if keyword == 'true': self.writer.write_push('constant', 0) self.writer.write_arithmetic('not') elif keyword in ['false', 'null']: self.writer.write_push('constant', 0) else: self.writer.write_push('this', 0) elif token_type == tokenizer.IDENTIFIER: if self.stream.peek() == OPEN_BRACKET: name = self.stream.identifier() self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name)) self.add_terminal(term_root, name) self.add_terminal(term_root, self.stream.symbol()) self.compile_expression(term_root) self.add_terminal(term_root, self.stream.symbol()) elif self.stream.peek() == OPEN_PAREN or self.stream.peek() == PERIOD: self.compile_subroutine_call(term_root) else: name = self.stream.identifier() self.add_terminal(term_root, self.stream.identifier()) self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name)) elif token_type == tokenizer.SYMBOL and self.stream.symbol() == OPEN_PAREN: self.add_terminal(term_root, self.stream.symbol()) self.compile_expression(term_root) self.add_terminal(term_root, self.stream.symbol()) elif token_type == tokenizer.SYMBOL and self.stream.symbol() in UNARY_OPS: operator = self.stream.symbol() self.add_terminal(term_root, operator) self.compile_term(term_root) self.writer.write_arithmetic('neg' if operator == '-' else 'not') else: assert False, 'unsupported token {token}'.format(token=self.stream.current_token) def compile_expression_list(self, root): ''' compiles a (possibly empty) comma-separated list of expressions. ''' expression_list_root = ET.SubElement(root, EXPRESSION_LIST) if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == CLOSE_PAREN: return 0 self.compile_expression(expression_list_root) num_vars = 1 while self.stream.symbol() == COMMA: self.add_terminal(expression_list_root, self.stream.symbol()) self.compile_expression(expression_list_root) num_vars += 1 return num_vars def compile_subroutine_call(self, root): class_name = self.class_name subroutine_name = self.stream.identifier() self.add_terminal(root, class_name) if self.stream.symbol() == PERIOD: self.add_terminal(root, self.stream.symbol()) class_name = subroutine_name subroutine_name = self.stream.identifier() self.add_terminal(root, self.stream.identifier()) self.add_terminal(root, self.stream.symbol()) num_vars = self.compile_expression_list(root) self.add_terminal(root, self.stream.symbol()) self.writer.write_call('{cls}.{sub}'.format( cls=class_name, sub=subroutine_name), num_vars) def write(self): if self.xml_name: lines = self._write(self.root).split('\n') lines = lines[1:] file = open(self.xml_name, 'w') file.write('\n'.join(lines)) file.close() self.writer.close() def _write(self, root): return minidom.parseString(ET.tostring(root)).toprettyxml()
class CompilationEngine(): def __init__(self, filepath, vm_writer): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) self.symbol_table = SymbolTable() self.vmw = vm_writer self.compiled_class_name = None self.label_num = 0 def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.wf.close() def get_new_label(self): self.label_num += 1 return 'LABEL_%d' % self.label_num def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') self.compile_keyword([Tokens.CLASS]) self.compiled_class_name = self.compile_class_name().token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is_class_var_dec(): self.compile_class_var_dec() while self.next_is_subroutine_dec(): self.compile_subroutine_dec() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) kind = None if token == Tokens.STATIC: kind = IdentifierKind.STATIC elif token == Tokens.FIELD: kind = IdentifierKind.FIELD else: self.raise_syntax_error('Unexpected token') type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=kind) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=kind) self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('classVarDec') def compile_var_dec(self): self.write_element_start('varDec') self.compile_keyword(Tokens.VAR) type_token = self.compile_type() var_num = 0 self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('varDec') return var_num def compile_subroutine_dec(self): self.symbol_table.start_subroutine() self.write_element_start('subroutineDec') token = self.compile_keyword([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) if self.tokenizer.see_next() == Tokens.VOID: self.compile_keyword(Tokens.VOID) else: self.compile_type() subroutine_name = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) if token == Tokens.METHOD: self.symbol_table.define('$this',self.compiled_class_name,IdentifierKind.ARG) self.compile_parameter_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_subroutine_body(subroutine_name, token) self.write_element_end('subroutineDec') def compile_subroutine_name(self): self.write_identifier_info('category: subroutine') return self.compile_identifier() def compile_class_name(self): self.write_identifier_info('category: class') return self.compile_identifier() def compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False): if declaration: self.symbol_table.define(self.tokenizer.see_next().token, type, kind) elif let: pass elif call: pass else: kind = self.symbol_table.kind_of(self.tokenizer.see_next().token) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(self.tokenizer.see_next().token)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(self.tokenizer.see_next().token)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(self.tokenizer.see_next().token)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(self.tokenizer.see_next().token)) self.write_identifier_info('declaration: %s, kind: %s, index: %d' % ( declaration, self.symbol_table.kind_of(self.tokenizer.see_next().token), self.symbol_table.index_of(self.tokenizer.see_next().token))) return self.compile_identifier() def write_identifier_info(self, value): self.write_element('IdentifierInfo', value) def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] or isinstance( self.tokenizer.see_next(), Identifier): type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) self.write_element_end('parameterList') def compile_subroutine_body(self, subroutine_name, subroutine_dec_token): self.write_element_start('subroutineBody') print subroutine_name,subroutine_dec_token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) local_num = 0 while self.next_is(Tokens.VAR): var_num = self.compile_var_dec() local_num += var_num self.vmw.write_function("%s.%s" % (self.compiled_class_name, subroutine_name), local_num) if subroutine_dec_token == Tokens.METHOD: self.vmw.write_push(Segment.ARG, 0) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.CONSTRUCTOR: self.vmw.write_push(Segment.CONST, self.symbol_table.var_count(IdentifierKind.FIELD)) self.vmw.write_call('Memory.alloc', 1) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.FUNCTION: pass else: self.raise_syntax_error('Invalid token') self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('subroutineBody') print "=========" for key in self.symbol_table.arg_table: print self.symbol_table.arg_table[key].type,key,"kind:",self.symbol_table.arg_table[key].kind,"index:",self.symbol_table.arg_table[key].index return local_num def compile_statements(self): self.write_element_start('statements') while self.next_is_statement(): self.compile_statement() self.write_element_end('statements') def compile_statement(self): if self.next_is(Tokens.LET): self.write_element_start('letStatement') self.compile_keyword(Tokens.LET) let_var = self.compile_var_name(let=True).token if self.next_is(Tokens.LEFT_BOX_BRACKET): self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() # i self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) self.compile_symbol(Tokens.EQUAL) # base address kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(let_var)) # temp_2 <- base + i self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.TEMP, 2) # value self.compile_expression() # set THAT <- base+i self.vmw.write_push(Segment.TEMP, 2) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_pop(Segment.THAT, 0) self.compile_symbol(Tokens.SEMI_COLON) else: self.compile_symbol(Tokens.EQUAL) self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.VAR: self.vmw.write_pop(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.ARG: self.vmw.write_pop(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_pop(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_pop(Segment.STATIC, self.symbol_table.index_of(let_var)) self.write_element_end('letStatement') elif self.next_is(Tokens.IF): self.write_element_start('ifStatement') self.compile_keyword(Tokens.IF) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) l1 = self.get_new_label() l2 = self.get_new_label() self.vmw.write_if(l1) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l2) self.vmw.write_label(l1) if self.next_is(Tokens.ELSE): self.compile_keyword(Tokens.ELSE) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_label(l2) self.write_element_end('ifStatement') elif self.next_is(Tokens.WHILE): self.write_element_start('whileStatement') l1 = self.get_new_label() l2 = self.get_new_label() self.compile_keyword(Tokens.WHILE) self.vmw.write_label(l1) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) self.vmw.write_if(l2) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l1) self.vmw.write_label(l2) self.write_element_end('whileStatement') elif self.next_is(Tokens.DO): self.write_element_start('doStatement') self.compile_keyword(Tokens.DO) self.compile_subroutine_call() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('doStatement') self.vmw.write_pop(Segment.TEMP, 0) elif self.next_is(Tokens.RETURN): self.write_element_start('returnStatement') self.compile_keyword(Tokens.RETURN) if not self.next_is(Tokens.SEMI_COLON): self.compile_expression() else: self.vmw.write_push(Segment.CONST, 0) self.compile_symbol(Tokens.SEMI_COLON) self.vmw.write_return() self.write_element_end('returnStatement') def compile_subroutine_call(self): if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1): subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.vmw.write_push(Segment.POINTER, 0) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (self.compiled_class_name, subroutinename), argnum + 1) else: identifier_str = self.tokenizer.see_next().token if self.symbol_table.kind_of(identifier_str): instance_name = self.compile_var_name(call=True).token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) kind = self.symbol_table.kind_of(instance_name) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(instance_name)) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (self.symbol_table.type_of(instance_name), subroutinename), argnum + 1) else: classname = self.compile_class_name().token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (classname, subroutinename), argnum) def compile_expression_list(self): self.write_element_start('expressionList') argnum = 0 if not self.next_is(Tokens.RIGHT_ROUND_BRACKET): self.compile_expression() argnum += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_expression() argnum += 1 self.write_element_end('expressionList') return argnum def compile_expression(self): self.write_element_start('expression') self.compile_term() while self.next_is([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL]): op_token = self.compile_symbol([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL]) self.compile_term() if op_token == Tokens.PLUS: self.vmw.write_arithmetic(Command.ADD) elif op_token == Tokens.MINUS: self.vmw.write_arithmetic(Command.SUB) elif op_token == Tokens.MULTI: self.vmw.write_call('Math.multiply', 2) elif op_token == Tokens.DIV: self.vmw.write_call('Math.divide', 2) elif op_token == Tokens.AND: self.vmw.write_arithmetic(Command.AND) elif op_token == Tokens.PIPE: self.vmw.write_arithmetic(Command.OR) elif op_token == Tokens.LESS_THAN: self.vmw.write_arithmetic(Command.LT) elif op_token == Tokens.GREATER_THAN: self.vmw.write_arithmetic(Command.GT) elif op_token == Tokens.EQUAL: self.vmw.write_arithmetic(Command.EQ) self.write_element_end('expression') def compile_term(self): self.write_element_start('term') if self.next_type_is(TokenType.INT_CONST): value_str = self.compile_integer_constant() self.vmw.write_push(Segment.CONST, value_str) elif self.next_type_is(TokenType.STRING_CONST): self.compile_string_constant() elif self.next_is(Tokens.NULL): self.compile_keyword(Tokens.NULL) self.vmw.write_push(Segment.CONST, 0) elif self.next_is(Tokens.THIS): self.compile_keyword(Tokens.THIS) self.vmw.write_push(Segment.POINTER, 0) elif self.next_is(Tokens.TRUE): self.compile_keyword(Tokens.TRUE) self.vmw.write_push(Segment.CONST, 0) self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.FALSE): self.compile_keyword(Tokens.FALSE) self.vmw.write_push(Segment.CONST, 0) elif self.next_type_is(TokenType.IDENTIFIER): if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1): var_name = self.compile_var_name().token self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_push(Segment.THAT, 0) self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1): self.compile_subroutine_call() else: self.compile_var_name() elif self.next_is(Tokens.LEFT_ROUND_BRACKET): self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) elif self.next_is(Tokens.TILDE): self.compile_symbol(Tokens.TILDE) self.compile_term() self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.MINUS): self.compile_symbol(Tokens.MINUS) self.compile_term() self.vmw.write_arithmetic(Command.NEG) else: self.raise_syntax_error('') self.write_element_end('term') def next_type_is(self, token_type): return self.tokenizer.see_next().type == token_type def compile_type(self): type_token = self.tokenizer.see_next() if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]): self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]) else: self.compile_class_name() return type_token def next_is_statement(self): return self.next_is([Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]) def next_is(self, tokens, idx=0): if type(tokens) == list: return self.tokenizer.see_next(idx=idx) in tokens else: return self.tokenizer.see_next(idx=idx) == tokens def next_is_class_var_dec(self): return self.next_is([Tokens.STATIC, Tokens.FIELD]) def next_is_subroutine_dec(self): return self.next_is([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) def compile_symbol(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_keyword(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, Identifier): identifier_str = self.tokenizer.current_token.token_escaped self.write_element( 'identifier', identifier_str ) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerConstant): self.write_element('integerConstant', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token.token_escaped else: self.raise_syntax_error('') def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringConstant): string = self.tokenizer.current_token.token self.write_element('stringConstant', self.tokenizer.current_token.token_escaped) self.vmw.write_push(Segment.CONST, len(string)) self.vmw.write_call('String.new', 1) for c in string: self.vmw.write_push(Segment.CONST, ord(c)) self.vmw.write_call('String.appendChar', 2) else: self.raise_syntax_error('') def write_element(self, elem_name, value): self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name)) def write_element_start(self, elem_name): self.wf.write('<%s>\n' % elem_name) def write_element_end(self, elem_name): self.wf.write('</%s>\n' % elem_name) def raise_syntax_error(self, msg): raise Exception('%s' % msg)
class CompilationEngine(): def __init__(self, jack_file, vm_file): self._jack_tokenizer = JackTokenizer(jack_file) self._vm_file = vm_file self._vm_text = '' self._xml_text = '' self._symbol_table = SymbolTable() self._vm_writer = VmWriter(self._vm_file) self._class_name = None self._label_count = 0 self._compiled_class_name = '' def compile_class(self): self._write_start('class') self._compile_keyword() self._write('IdentifierInfo', 'category: class') self._compiled_class_name = self._compile_identifier() self._compile_symbol() while self._what_next_token([Keyword.STATIC, Keyword.FIELD]): self.compile_class_var_dec() while self._what_next_token( [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]): self.compile_subroutine_dec() self._compile_symbol() self._write_end('class') def compile_class_var_dec(self): self._write_start('classVarDec') token = self._compile_keyword() kind = None if token == Keyword.STATIC: kind = Kind.STATIC elif token == Keyword.FIELD: kind = Kind.FIELD type_token = self._jack_tokenizer.next_token() if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=kind) while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self._compile_var_name(declaration=True, type=type_token, kind=kind) self._compile_symbol() self._write_end('classVarDec') def compile_subroutine_dec(self): self._symbol_table.start_subroutine() self._write_start('subroutineDec') token = self._compile_keyword() if self._jack_tokenizer.next_token() == Keyword.VOID: self._compile_keyword() else: self._jack_tokenizer.next_token() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() if token == Keyword.METHOD: self._symbol_table.define('$this', self._compiled_class_name, Kind.ARG) self.compile_parameter_list() self._compile_symbol() self.compile_subroutine_body(subroutine_name, token) self._write_end('subroutineDec') def compile_parameter_list(self): self._write_start('parameterList') if (self._jack_tokenizer.next_token() in [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN] or self._jack_tokenizer.next_token_type() == Type.IDENTIFIER): type_token = self._jack_tokenizer.next_token() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=Kind.ARG) while self._what_next_token([Symbol.COMMA]): self._compile_symbol() type_token = self._jack_tokenizer.next_token() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=Kind.ARG) self._write_end('parameterList') def compile_subroutine_body(self, subroutine_name, subroutine_token): self._write_start('subroutineBody') self._compile_symbol() local_num = 0 while self._what_next_token([Keyword.VAR]): var_num = self.compile_var_dec() local_num += var_num self._vm_writer.write_function( '%s.%s' % (self._compiled_class_name, subroutine_name), local_num) if subroutine_token == Keyword.METHOD: self._vm_writer.write_push(Segment.ARG, 0) self._vm_writer.write_pop(Segment.POINTER, 0) elif subroutine_token == Keyword.CONSTRUCTOR: self._vm_writer.write_push( Segment.CONST, self._symbol_table.var_count(Kind.FIELD)) self._vm_writer.write_call('Memory.alloc', 1) self._vm_writer.write_pop(Segment.POINTER, 0) elif subroutine_token == Keyword.FUNCTION: pass self.compile_statements() self._compile_symbol() self._write_end('subroutineBody') return local_num def compile_var_dec(self): self._write_start('varDec') self._compile_keyword() type_token = self._jack_tokenizer.next_token() if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=Kind.VAR) var_num = 1 # TODO while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self._compile_var_name(declaration=True, type=type_token, kind=Kind.VAR) var_num += 1 self._compile_symbol() self._write_end('varDec') return var_num def compile_statements(self): self._write_start('statements') while self._what_next_token([ Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO, Keyword.RETURN ]): if self._what_next_token([Keyword.LET]): self.compile_let() elif self._what_next_token([Keyword.IF]): self.compile_if() elif self._what_next_token([Keyword.WHILE]): self.compile_while() elif self._what_next_token([Keyword.DO]): self.compile_do() elif self._what_next_token([Keyword.RETURN]): self.compile_return() self._write_end('statements') def compile_let(self): self._write_start('letStatement') self._compile_keyword() let_var = self._compile_var_name(let=True) if self._what_next_token([Symbol.LEFT_BOX_BRACKET]): self._compile_symbol() self.compile_expression() self._compile_symbol() self._compile_symbol() kind = self._symbol_table.kind_of(let_var) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of(let_var)) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of(let_var)) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of(let_var)) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of(let_var)) self._vm_writer.write_arithmetic(Command.ADD) self._vm_writer.write_pop(Segment.TEMP, 2) self.compile_expression() self._vm_writer.write_push(Segment.TEMP, 2) self._vm_writer.write_pop(Segment.POINTER, 1) self._vm_writer.write_pop(Segment.THAT, 0) self._compile_symbol() else: self._compile_symbol() self.compile_expression() self._compile_symbol() kind = self._symbol_table.kind_of(let_var) if kind == Kind.VAR: self._vm_writer.write_pop(Segment.LOCAL, self._symbol_table.index_of(let_var)) elif kind == Kind.ARG: self._vm_writer.write_pop(Segment.ARG, self._symbol_table.index_of(let_var)) elif kind == Kind.FIELD: self._vm_writer.write_pop(Segment.THIS, self._symbol_table.index_of(let_var)) elif kind == Kind.STATIC: self._vm_writer.write_pop(Segment.STATIC, self._symbol_table.index_of(let_var)) self._write_end('letStatement') def compile_if(self): self._write_start('ifStatement') self._compile_keyword() self._compile_symbol() self.compile_expression() self._compile_symbol() self._vm_writer.write_arithmetic(Command.NOT) l1 = self._new_label() l2 = self._new_label() self._vm_writer.write_if(l1) self._compile_symbol() self.compile_statements() self._compile_symbol() self._vm_writer.write_goto(l2) self._vm_writer.write_label(l1) if self._what_next_token([Keyword.ELSE]): self._compile_keyword() self._compile_symbol() self.compile_statements() self._compile_symbol() self._vm_writer.write_label(l2) self._write_end('ifStatement') def compile_while(self): self._write_start('whileStatement') l1 = self._new_label() l2 = self._new_label() self._compile_keyword() self._vm_writer.write_label(l1) self._compile_symbol() self.compile_expression() self._compile_symbol() self._vm_writer.write_arithmetic(Command.NOT) self._vm_writer.write_if(l2) self._compile_symbol() self.compile_statements() self._compile_symbol() self._vm_writer.write_goto(l1) self._vm_writer.write_label(l2) self._write_end('whileStatement') def compile_do(self): self._write_start('doStatement') self._compile_keyword() if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1): self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() self._vm_writer.write_push(Segment.POINTER, 0) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._compiled_class_name, subroutine_name), arg_num + 1) else: identifier_str = self._jack_tokenizer.next_token() if self._symbol_table.kind_of(identifier_str): instance_name = self._compile_var_name(call=True) self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() kind = self._symbol_table.kind_of(instance_name) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of(instance_name)) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of(instance_name)) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of(instance_name)) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of(instance_name)) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._symbol_table.type_of(instance_name), subroutine_name), arg_num + 1) else: self._write('IdentifierInfo', 'category: class') class_name = self._compile_identifier() self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (class_name, subroutine_name), arg_num) self._compile_symbol() self._write_end('doStatement') self._vm_writer.write_pop(Segment.TEMP, 0) def compile_return(self): self._write_start('returnStatement') self._compile_keyword() if not self._what_next_token([Symbol.SEMI_COLON]): self.compile_expression() else: self._vm_writer.write_push(Segment.CONST, 0) self._compile_symbol() self._vm_writer.write_return() self._write_end('returnStatement') def compile_expression(self): self._write_start('expression') self.compile_term() while self._what_next_token([ Symbol.PLUS, Symbol.MINUS, Symbol.MULTI, Symbol.DIV, Symbol.AND, Symbol.PIPE, Symbol.LESS_THAN, Symbol.GREATER_THAN, Symbol.EQUAL ]): token = self._compile_symbol() self.compile_term() if token == Symbol.PLUS: self._vm_writer.write_arithmetic(Command.ADD) elif token == Symbol.MINUS: self._vm_writer.write_arithmetic(Command.SUB) elif token == Symbol.MULTI: self._vm_writer.write_call('Math.multiply', 2) elif token == Symbol.DIV: self._vm_writer.write_call('Math.divide', 2) elif token == Symbol.AND: self._vm_writer.write_arithmetic(Command.AND) elif token == Symbol.PIPE: self._vm_writer.write_arithmetic(Command.OR) elif token == Symbol.LESS_THAN: self._vm_writer.write_arithmetic(Command.LT) elif token == Symbol.GREATER_THAN: self._vm_writer.write_arithmetic(Command.GT) elif token == Symbol.EQUAL: self._vm_writer.write_arithmetic(Command.EQ) self._write_end('expression') def compile_term(self): self._write_start('term') if self._what_next_token_type([Type.INT_CONST]): value = self._compile_integer_constant() self._vm_writer.write_push(Segment.CONST, value) elif self._what_next_token_type([Type.STRING_CONST]): value = self._compile_string_constant() self._vm_writer.write_push(Segment.CONST, len(value)) self._vm_writer.write_call('String.new', 1) for v in value: self._vm_writer.write_push(Segment.CONST, ord(v)) self._vm_writer.write_call('String.appendChar', 2) elif self._what_next_token([Keyword.NULL]): self._compile_keyword() self._vm_writer.write_push(Segment.CONST, 0) elif self._what_next_token([Keyword.THIS]): self._compile_keyword() self._vm_writer.write_push(Segment.POINTER, 0) elif self._what_next_token([Keyword.TRUE]): self._compile_keyword() self._vm_writer.write_push(Segment.CONST, 0) self._vm_writer.write_arithmetic(Command.NOT) elif self._what_next_token([Keyword.FALSE]): self._compile_keyword() self._vm_writer.write_push(Segment.CONST, 0) elif self._what_next_token_type([Type.IDENTIFIER]): if self._what_next_token([Symbol.LEFT_BOX_BRACKET], 1): self._compile_var_name() self._compile_symbol() self.compile_expression() self._vm_writer.write_arithmetic(Command.ADD) self._vm_writer.write_pop(Segment.POINTER, 1) self._vm_writer.write_push(Segment.THAT, 0) self._compile_symbol() elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET, Symbol.DOT], 1): if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1): self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() self._vm_writer.write_push(Segment.POINTER, 0) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._compiled_class_name, subroutine_name), arg_num + 1) else: identifier_str = self._jack_tokenizer.next_token() if self._symbol_table.kind_of(identifier_str): instance_name = self._compile_var_name(call=True) self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() kind = self._symbol_table.kind_of(instance_name) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of(instance_name)) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of(instance_name)) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of(instance_name)) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of(instance_name)) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._symbol_table.type_of(instance_name), subroutine_name), arg_num + 1) else: self._write('IdentifierInfo', 'category: class') class_name = self._compile_identifier() self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (class_name, subroutine_name), arg_num) else: self._compile_var_name() elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET]): self._compile_symbol() self.compile_expression() self._compile_symbol() elif self._what_next_token([Symbol.TILDE]): self._compile_symbol() self.compile_term() self._vm_writer.write_arithmetic(Command.NOT) elif self._what_next_token([Symbol.MINUS]): self._compile_symbol() self.compile_term() self._vm_writer.write_arithmetic(Command.NEG) self._write_end('term') def compile_expression_list(self): self._write_start('expressionList') arg_num = 0 if not self._what_next_token([Symbol.RIGHT_ROUND_BRACKET]): self.compile_expression() arg_num += 1 while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self.compile_expression() arg_num += 1 self._write_end('expressionList') return arg_num def save(self): self._vm_writer.save() def _what_next_token(self, values, index=0): return self._jack_tokenizer.next_token(index) in values def _what_next_token_type(self, values, index=0): return self._jack_tokenizer.next_token_type(index) in values def _compile_symbol(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('symbol', value) return value def _compile_keyword(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('keyword', value) return value def _compile_identifier(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('identifier', value) return value def _compile_integer_constant(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('integerConstant', value) return value def _compile_string_constant(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('stringConstant', value) return value def _compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False): if declaration: self._symbol_table.define(self._jack_tokenizer.next_token(), type, kind) elif let: pass elif call: pass else: kind = self._symbol_table.kind_of( self._jack_tokenizer.next_token()) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of( self._jack_tokenizer.next_token())) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of( self._jack_tokenizer.next_token())) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of( self._jack_tokenizer.next_token())) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of( self._jack_tokenizer.next_token())) self._write( 'IdentifierInfo', 'declaration: %s, kind: %s, index: %d' % (declaration, self._symbol_table.kind_of(self._jack_tokenizer.next_token()), self._symbol_table.index_of(self._jack_tokenizer.next_token()))) return self._compile_identifier() def _write(self, element, value): self._xml_text += '<{}> {} </{}>\n'.format(element, value, element) def _write_start(self, element): self._xml_text += '<%s>\n' % element def _write_end(self, element): self._xml_text += '</%s>\n' % element def _new_label(self): self._label_count += 1 return 'LABEL_%d' % self._label_count
class CompilationEngine(): def __init__(self, filepath, vm_writer): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) self.symbol_table = SymbolTable() self.vmw = vm_writer self.compiled_class_name = None self.label_num = 0 def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.wf.close() def get_new_label(self): self.label_num += 1 return 'LABEL_%d' % self.label_num def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') self.compile_keyword([Tokens.CLASS]) self.compiled_class_name = self.compile_class_name().token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is_class_var_dec(): self.compile_class_var_dec() while self.next_is_subroutine_dec(): self.compile_subroutine_dec() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) kind = None if token == Tokens.STATIC: kind = IdentifierKind.STATIC elif token == Tokens.FIELD: kind = IdentifierKind.FIELD else: self.raise_syntax_error('Unexpected token') type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=kind) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=kind) self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('classVarDec') def compile_var_dec(self): self.write_element_start('varDec') self.compile_keyword(Tokens.VAR) type_token = self.compile_type() var_num = 0 self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('varDec') return var_num def compile_subroutine_dec(self): self.symbol_table.start_subroutine() self.write_element_start('subroutineDec') token = self.compile_keyword( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) if self.tokenizer.see_next() == Tokens.VOID: self.compile_keyword(Tokens.VOID) else: self.compile_type() subroutine_name = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) if token == Tokens.METHOD: self.symbol_table.define('$this', self.compiled_class_name, IdentifierKind.ARG) self.compile_parameter_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_subroutine_body(subroutine_name, token) self.write_element_end('subroutineDec') def compile_subroutine_name(self): self.write_identifier_info('category: subroutine') return self.compile_identifier() def compile_class_name(self): self.write_identifier_info('category: class') return self.compile_identifier() def compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False): if declaration: self.symbol_table.define(self.tokenizer.see_next().token, type, kind) elif let: pass elif call: pass else: kind = self.symbol_table.kind_of(self.tokenizer.see_next().token) if kind == IdentifierKind.ARG: self.vmw.write_push( Segment.ARG, self.symbol_table.index_of( self.tokenizer.see_next().token)) elif kind == IdentifierKind.VAR: self.vmw.write_push( Segment.LOCAL, self.symbol_table.index_of( self.tokenizer.see_next().token)) elif kind == IdentifierKind.FIELD: self.vmw.write_push( Segment.THIS, self.symbol_table.index_of( self.tokenizer.see_next().token)) elif kind == IdentifierKind.STATIC: self.vmw.write_push( Segment.STATIC, self.symbol_table.index_of( self.tokenizer.see_next().token)) self.write_identifier_info( 'declaration: %s, kind: %s, index: %d' % (declaration, self.symbol_table.kind_of(self.tokenizer.see_next().token), self.symbol_table.index_of(self.tokenizer.see_next().token))) return self.compile_identifier() def write_identifier_info(self, value): self.write_element('IdentifierInfo', value) def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [ Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN ] or isinstance(self.tokenizer.see_next(), Identifier): type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) self.write_element_end('parameterList') def compile_subroutine_body(self, subroutine_name, subroutine_dec_token): self.write_element_start('subroutineBody') print(subroutine_name, subroutine_dec_token) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) local_num = 0 while self.next_is(Tokens.VAR): var_num = self.compile_var_dec() local_num += var_num self.vmw.write_function( "%s.%s" % (self.compiled_class_name, subroutine_name), local_num) if subroutine_dec_token == Tokens.METHOD: self.vmw.write_push(Segment.ARG, 0) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.CONSTRUCTOR: self.vmw.write_push( Segment.CONST, self.symbol_table.var_count(IdentifierKind.FIELD)) self.vmw.write_call('Memory.alloc', 1) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.FUNCTION: pass else: self.raise_syntax_error('Invalid token') self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('subroutineBody') print("=========") for key in self.symbol_table.arg_table: print(self.symbol_table.arg_table[key].type, key, "kind:", self.symbol_table.arg_table[key].kind, "index:", self.symbol_table.arg_table[key].index) return local_num def compile_statements(self): self.write_element_start('statements') while self.next_is_statement(): self.compile_statement() self.write_element_end('statements') def compile_statement(self): if self.next_is(Tokens.LET): self.write_element_start('letStatement') self.compile_keyword(Tokens.LET) let_var = self.compile_var_name(let=True).token if self.next_is(Tokens.LEFT_BOX_BRACKET): self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() # i self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) self.compile_symbol(Tokens.EQUAL) # base address kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(let_var)) # temp_2 <- base + i self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.TEMP, 2) # value self.compile_expression() # set THAT <- base+i self.vmw.write_push(Segment.TEMP, 2) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_pop(Segment.THAT, 0) self.compile_symbol(Tokens.SEMI_COLON) else: self.compile_symbol(Tokens.EQUAL) self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.VAR: self.vmw.write_pop(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.ARG: self.vmw.write_pop(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_pop(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_pop(Segment.STATIC, self.symbol_table.index_of(let_var)) self.write_element_end('letStatement') elif self.next_is(Tokens.IF): self.write_element_start('ifStatement') self.compile_keyword(Tokens.IF) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) l1 = self.get_new_label() l2 = self.get_new_label() self.vmw.write_if(l1) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l2) self.vmw.write_label(l1) if self.next_is(Tokens.ELSE): self.compile_keyword(Tokens.ELSE) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_label(l2) self.write_element_end('ifStatement') elif self.next_is(Tokens.WHILE): self.write_element_start('whileStatement') l1 = self.get_new_label() l2 = self.get_new_label() self.compile_keyword(Tokens.WHILE) self.vmw.write_label(l1) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) self.vmw.write_if(l2) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l1) self.vmw.write_label(l2) self.write_element_end('whileStatement') elif self.next_is(Tokens.DO): self.write_element_start('doStatement') self.compile_keyword(Tokens.DO) self.compile_subroutine_call() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('doStatement') self.vmw.write_pop(Segment.TEMP, 0) elif self.next_is(Tokens.RETURN): self.write_element_start('returnStatement') self.compile_keyword(Tokens.RETURN) if not self.next_is(Tokens.SEMI_COLON): self.compile_expression() else: self.vmw.write_push(Segment.CONST, 0) self.compile_symbol(Tokens.SEMI_COLON) self.vmw.write_return() self.write_element_end('returnStatement') def compile_subroutine_call(self): if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1): subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.vmw.write_push(Segment.POINTER, 0) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call( "%s.%s" % (self.compiled_class_name, subroutinename), argnum + 1) else: identifier_str = self.tokenizer.see_next().token if self.symbol_table.kind_of(identifier_str): instance_name = self.compile_var_name(call=True).token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) kind = self.symbol_table.kind_of(instance_name) if kind == IdentifierKind.ARG: self.vmw.write_push( Segment.ARG, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.VAR: self.vmw.write_push( Segment.LOCAL, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.FIELD: self.vmw.write_push( Segment.THIS, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.STATIC: self.vmw.write_push( Segment.STATIC, self.symbol_table.index_of(instance_name)) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call( "%s.%s" % (self.symbol_table.type_of(instance_name), subroutinename), argnum + 1) else: classname = self.compile_class_name().token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (classname, subroutinename), argnum) def compile_expression_list(self): self.write_element_start('expressionList') argnum = 0 if not self.next_is(Tokens.RIGHT_ROUND_BRACKET): self.compile_expression() argnum += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_expression() argnum += 1 self.write_element_end('expressionList') return argnum def compile_expression(self): self.write_element_start('expression') self.compile_term() while self.next_is([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL ]): op_token = self.compile_symbol([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL ]) self.compile_term() if op_token == Tokens.PLUS: self.vmw.write_arithmetic(Command.ADD) elif op_token == Tokens.MINUS: self.vmw.write_arithmetic(Command.SUB) elif op_token == Tokens.MULTI: self.vmw.write_call('Math.multiply', 2) elif op_token == Tokens.DIV: self.vmw.write_call('Math.divide', 2) elif op_token == Tokens.AND: self.vmw.write_arithmetic(Command.AND) elif op_token == Tokens.PIPE: self.vmw.write_arithmetic(Command.OR) elif op_token == Tokens.LESS_THAN: self.vmw.write_arithmetic(Command.LT) elif op_token == Tokens.GREATER_THAN: self.vmw.write_arithmetic(Command.GT) elif op_token == Tokens.EQUAL: self.vmw.write_arithmetic(Command.EQ) self.write_element_end('expression') def compile_term(self): self.write_element_start('term') if self.next_type_is(TokenType.INT_CONST): value_str = self.compile_integer_constant() self.vmw.write_push(Segment.CONST, value_str) elif self.next_type_is(TokenType.STRING_CONST): self.compile_string_constant() elif self.next_is(Tokens.NULL): self.compile_keyword(Tokens.NULL) self.vmw.write_push(Segment.CONST, 0) elif self.next_is(Tokens.THIS): self.compile_keyword(Tokens.THIS) self.vmw.write_push(Segment.POINTER, 0) elif self.next_is(Tokens.TRUE): self.compile_keyword(Tokens.TRUE) self.vmw.write_push(Segment.CONST, 0) self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.FALSE): self.compile_keyword(Tokens.FALSE) self.vmw.write_push(Segment.CONST, 0) elif self.next_type_is(TokenType.IDENTIFIER): if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1): var_name = self.compile_var_name().token self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_push(Segment.THAT, 0) self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1): self.compile_subroutine_call() else: self.compile_var_name() elif self.next_is(Tokens.LEFT_ROUND_BRACKET): self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) elif self.next_is(Tokens.TILDE): self.compile_symbol(Tokens.TILDE) self.compile_term() self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.MINUS): self.compile_symbol(Tokens.MINUS) self.compile_term() self.vmw.write_arithmetic(Command.NEG) else: self.raise_syntax_error('') self.write_element_end('term') def next_type_is(self, token_type): return self.tokenizer.see_next().type == token_type def compile_type(self): type_token = self.tokenizer.see_next() if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]): self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]) else: self.compile_class_name() return type_token def next_is_statement(self): return self.next_is( [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]) def next_is(self, tokens, idx=0): if type(tokens) == list: return self.tokenizer.see_next(idx=idx) in tokens else: return self.tokenizer.see_next(idx=idx) == tokens def next_is_class_var_dec(self): return self.next_is([Tokens.STATIC, Tokens.FIELD]) def next_is_subroutine_dec(self): return self.next_is( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) def compile_symbol(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_keyword(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, Identifier): identifier_str = self.tokenizer.current_token.token_escaped self.write_element('identifier', identifier_str) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerConstant): self.write_element('integerConstant', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token.token_escaped else: self.raise_syntax_error('') def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringConstant): string = self.tokenizer.current_token.token self.write_element('stringConstant', self.tokenizer.current_token.token_escaped) self.vmw.write_push(Segment.CONST, len(string)) self.vmw.write_call('String.new', 1) for c in string: self.vmw.write_push(Segment.CONST, ord(c)) self.vmw.write_call('String.appendChar', 2) else: self.raise_syntax_error('') def write_element(self, elem_name, value): self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name)) def write_element_start(self, elem_name): self.wf.write('<%s>\n' % elem_name) def write_element_end(self, elem_name): self.wf.write('</%s>\n' % elem_name) def raise_syntax_error(self, msg): raise Exception('%s' % msg)
class CompilationEngine: def __init__(self, input_texts, output_file_path, vmw): self.tokenizer = Tokenizer(input_texts) self.wf = open(output_file_path, 'w') self.vmw = vmw self.elements = [] self.symbol_table = SymbolTable() self.symbol_table.show_tables() # tokens self.op_tokens = [ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.OR, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL, ] self.unary_op_tokens = [ Tokens.MINUS, Tokens.TILDE, ] self.statement_tokens = [ Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN, ] self.keyword_constant_tokens = [ Tokens.TRUE, Tokens.FALSE, Tokens.NULL, Tokens.THIS, ] # SymbolTable を作成するのに必要な変数 self.class_name = None self.kind = None self.var_type = None self.var_name = None # VM self.subroutine_class_name = None self.subroutine_name = None self.label_number = 0 def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): return self.wf.close() def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') # class self.compile_keyword([Tokens.CLASS]) # self.class_name = self.tokenizer.see_next() # className self.compile_class_name() # { self.compile_keyword([Tokens.LEFT_CURLY_BRACKET]) # classVarDec* while self.tokenizer.next_is([Tokens.STATIC, Tokens.FIELD]): self.compile_class_var_dec() # subroutineDec* while self.tokenizer.next_is( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD, Tokens.VOID]): self.compile_subroutine_dec() # } self.compile_keyword([Tokens.RIGHT_CURLY_BRACKET]) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') # static or field self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) self.kind = self.get_kind(self.tokenizer.current_token) # type self.compile_type() self.var_type = self.tokenizer.current_token # varName self.compile_var_name(define=True, var_type=self.var_type, kind=self.kind) # (, varName)* while self.tokenizer.next_is([Tokens.COMMA]): self.compile_symbol([Tokens.COMMA]) self.compile_var_name(define=True, var_type=self.var_type, kind=self.kind) # ; self.compile_symbol([Tokens.SEMICOLON]) self.write_element_end('classVarDec') def compile_subroutine_dec(self): # Symbol Table の初期化 self.symbol_table.start_subroutine() self.write_element_start('subroutineDec') # constructor or function or method or void self.compile_keyword( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD, Tokens.VOID]) # VM subroutine_type = self.tokenizer.current_token # Symbol Table の作成 if self.tokenizer.current_token == Tokens.METHOD: self.symbol_table.define('$this', self.class_name, SymbolKind.ARG) # void or type if self.tokenizer.next_is([Tokens.VOID]): self.compile_keyword([Tokens.VOID]) else: self.compile_type() # subroutineName self.compile_subroutine_name() # VM subroutine_name = self.tokenizer.current_token # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # parameterList self.compile_parameter_list() # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) # subroutineBody self.compile_subroutine_body(subroutine_type, subroutine_name) self.write_element_end('subroutineDec') def compile_subroutine_body(self, subroutine_type, subroutine_name): self.write_element_start('subroutineBody') # { self.compile_keyword([Tokens.LEFT_CURLY_BRACKET]) # varDec* local_var_counts = 0 while not self.tokenizer.next_is(self.statement_tokens): counts = self.compile_var_dec() local_var_counts += counts # VM function_name = '{}.{}'.format(self.class_name, subroutine_name) self.vmw.write_function(function_name, local_var_counts) if subroutine_type == Tokens.CONSTRUCTOR: self.vmw.write_push(SegmentType.CONST, self.symbol_table.var_count(SymbolKind.FIELD)) self.vmw.write_call('Memory.alloc', 1) self.vmw.write_pop(SegmentType.POINTER, 0) elif subroutine_type == Tokens.METHOD: self.vmw.write_push(SegmentType.ARG, 0) self.vmw.write_pop(SegmentType.POINTER, 0) elif subroutine_type == Tokens.FUNCTION: pass else: self.raise_syntax_error('Invalid subroutine type.') # statements self.compile_statements() # } self.compile_keyword([Tokens.RIGHT_CURLY_BRACKET]) self.write_element_end('subroutineBody') def compile_var_dec(self): self.write_element_start('varDec') local_var_counts = 0 self.kind = SymbolKind.VAR # var self.compile_keyword([Tokens.VAR]) # type self.compile_type() self.var_type = self.tokenizer.current_token # varName self.compile_var_name(define=True, var_type=self.var_type, kind=self.kind) local_var_counts += 1 # (',' varName)* while self.tokenizer.next_is([Tokens.COMMA]): self.compile_symbol([Tokens.COMMA]) self.compile_var_name(define=True, var_type=self.var_type, kind=self.kind) local_var_counts += 1 # ; self.compile_symbol([Tokens.SEMICOLON]) self.write_element_end('varDec') return local_var_counts def compile_term(self): self.write_element_start('term') if isinstance(self.tokenizer.see_next(), IntegerToken): self.compile_integer_constant() self.vmw.write_push(SegmentType.CONST, self.tokenizer.current_token) elif isinstance(self.tokenizer.see_next(), StringToken): self.compile_string_constant() elif isinstance(self.tokenizer.see_next(), KeywordToken): if self.tokenizer.see_next() == Tokens.TRUE: self.vmw.write_push(SegmentType.CONST, 1) self.vmw.write_arithmetic(ArithmeticType.NEG) elif self.tokenizer.see_next() in [Tokens.FALSE, Tokens.NULL]: self.vmw.write_push(SegmentType.CONST, 0) elif self.tokenizer.see_next() == Tokens.THIS: self.vmw.write_push(SegmentType.POINTER, 0) self.compile_keyword(self.keyword_constant_tokens) elif isinstance(self.tokenizer.see_next(), IdentifierToken): # varName[expression] if self.tokenizer.next_is([Tokens.LEFT_SQUARE_BRACKET], index=1): # varName self.compile_var_name(is_other=True) # VM var_name = self.tokenizer.current_token.token kind = self.symbol_table.kind_of(var_name) index = self.symbol_table.index_of(var_name) segment_type = self.get_segment_type(kind) self.vmw.write_push(segment_type, index) # [ self.compile_symbol([Tokens.LEFT_SQUARE_BRACKET]) # expression self.compile_expression() # ] self.compile_symbol([Tokens.RIGHT_SQUARE_BRACKET]) # VM a[i] のケースのみを考慮 self.vmw.write_arithmetic(ArithmeticType.ADD) self.vmw.write_pop(SegmentType.POINTER, 1) self.vmw.write_push(SegmentType.THAT, 0) # subroutineCall elif self.tokenizer.next_is( [Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], index=1): self.compile_subroutine_call() # varName else: self.compile_var_name() # ( expression ) elif self.tokenizer.next_is([Tokens.LEFT_ROUND_BRACKET]): # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # expression self.compile_expression() # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) # unaryOp - elif self.tokenizer.see_next() == Tokens.MINUS: # unaryOp self.compile_symbol([Tokens.MINUS]) # term self.compile_term() # VM self.vmw.write_arithmetic(ArithmeticType.NEG) # unaryOp ~ elif self.tokenizer.see_next() == Tokens.TILDE: # unaryOp self.compile_symbol([Tokens.TILDE]) # term self.compile_term() # VM self.vmw.write_arithmetic(ArithmeticType.NOT) else: self.raise_syntax_error(self.tokenizer.see_next()) self.write_element_end('term') def compile_expression(self): self.write_element_start('expression') self.compile_term() # (op term)* while self.tokenizer.next_is(self.op_tokens): self.compile_op() op_token = self.tokenizer.current_token self.compile_term() if op_token == Tokens.PLUS: self.vmw.write_arithmetic(ArithmeticType.ADD) elif op_token == Tokens.MINUS: self.vmw.write_arithmetic(ArithmeticType.SUB) elif op_token == Tokens.MULTI: self.vmw.write_call('Math.multiply', 2) elif op_token == Tokens.DIV: self.vmw.write_call('Math.divide', 2) elif op_token == Tokens.GREATER_THAN: self.vmw.write_arithmetic(ArithmeticType.GT) elif op_token == Tokens.LESS_THAN: self.vmw.write_arithmetic(ArithmeticType.LT) elif op_token == Tokens.AND: self.vmw.write_arithmetic(ArithmeticType.AND) elif op_token == Tokens.OR: self.vmw.write_arithmetic(ArithmeticType.OR) elif op_token == Tokens.TILDE: self.vmw.write_arithmetic(ArithmeticType.NOT) elif op_token == Tokens.EQUAL: self.vmw.write_arithmetic(ArithmeticType.EQ) else: self.raise_syntax_error('Invalid op token.') self.write_element_end('expression') def compile_expression_list(self): self.write_element_start('expressionList') # VM argument_counts = 0 # (expression (',' expression)* )? if not self.tokenizer.next_is([Tokens.RIGHT_ROUND_BRACKET]): # expression self.compile_expression() argument_counts += 1 # (',' expression)* while self.tokenizer.next_is([Tokens.COMMA]): self.compile_symbol([Tokens.COMMA]) self.compile_expression() argument_counts += 1 self.write_element_end('expressionList') return argument_counts def compile_subroutine_call(self): # ( のケース if self.tokenizer.next_is([Tokens.LEFT_ROUND_BRACKET], index=1): # subroutinename self.compile_subroutine_name() # VM subroutine_name = self.tokenizer.current_token.token self.vmw.write_push(SegmentType.POINTER, 0) # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # expressionList argument_counts = self.compile_expression_list() argument_counts += 1 # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) function_name = '{}.{}'.format(self.class_name, subroutine_name) self.vmw.write_call(function_name, argument_counts) # . のケース elif self.tokenizer.next_is([Tokens.DOT], index=1): # className | varName self.compile_class_name() # varName (クラスのインスタンスのメソッドを使用するケース) if self.symbol_table.kind_of( self.tokenizer.current_token.token) is not None: # VM instance_name = self.tokenizer.current_token.token # . self.compile_symbol([Tokens.DOT]) # subroutineName self.compile_subroutine_name() # VM subroutine_name = self.tokenizer.current_token.token kind = self.symbol_table.kind_of(instance_name) index = self.symbol_table.index_of(instance_name) segment_type = self.get_segment_type(kind) self.vmw.write_push(segment_type, index) # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # expressionList argument_counts = self.compile_expression_list() argument_counts += 1 # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) # VM function_name = '{}.{}'.format( self.symbol_table.type_of(instance_name), subroutine_name, ) self.vmw.write_call(function_name, argument_counts) # className (例えば Output.printInt 関数を使用するケース) else: # VM class_name = self.tokenizer.current_token # . self.compile_symbol([Tokens.DOT]) # subroutineName self.compile_subroutine_name() # VM subroutine_name = self.tokenizer.current_token # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # expressionList argument_counts = self.compile_expression_list() # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) # VM function_name = '{}.{}'.format( class_name, subroutine_name, ) self.vmw.write_call(function_name, argument_counts) else: self.raise_syntax_error(self.tokenizer.see_next(index=1)) def compile_let_statement(self): self.write_element_start('letStatement') # let self.compile_keyword([Tokens.LET]) # varName self.compile_var_name(is_other=True) # VM let_var_name = self.tokenizer.current_token.token kind = self.symbol_table.kind_of(let_var_name) index = self.symbol_table.index_of(let_var_name) segment_type = self.get_segment_type(kind) # ('[' expression ']')? if self.tokenizer.next_is([Tokens.LEFT_SQUARE_BRACKET]): # [ self.compile_symbol([Tokens.LEFT_SQUARE_BRACKET]) # expression self.compile_expression() # ] self.compile_symbol([Tokens.RIGHT_SQUARE_BRACKET]) # VM a[i] のケースのみを考慮 self.vmw.write_push(segment_type, index) self.vmw.write_arithmetic(ArithmeticType.ADD) self.vmw.write_pop(SegmentType.TEMP, 1) # = self.compile_symbol([Tokens.EQUAL]) # expression self.compile_expression() # VM self.vmw.write_push(SegmentType.TEMP, 1) self.vmw.write_pop(SegmentType.POINTER, 1) self.vmw.write_pop(SegmentType.THAT, 0) # ; self.compile_symbol([Tokens.SEMICOLON]) else: # = self.compile_symbol([Tokens.EQUAL]) # expression self.compile_expression() # ; self.compile_symbol([Tokens.SEMICOLON]) # VM self.vmw.write_pop(segment_type, index) self.write_element_end('letStatement') def compile_if_statement(self): self.write_element_start('ifStatement') label_first = self.get_label() label_last = self.get_label() # if self.compile_keyword([Tokens.IF]) # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # expression self.compile_expression() # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) # VM self.vmw.write_arithmetic(ArithmeticType.NOT) self.vmw.write_if(label_first) # { self.compile_symbol([Tokens.LEFT_CURLY_BRACKET]) # statements self.compile_statements() # } self.compile_symbol([Tokens.RIGHT_CURLY_BRACKET]) # VM self.vmw.write_goto(label_last) self.vmw.write_label(label_first) # (else { statemens }) if self.tokenizer.next_is([Tokens.ELSE]): # else self.compile_keyword([Tokens.ELSE]) # { self.compile_symbol([Tokens.LEFT_CURLY_BRACKET]) # statements self.compile_statements() # } self.compile_symbol([Tokens.RIGHT_CURLY_BRACKET]) # VM self.vmw.write_label(label_last) self.write_element_end('ifStatement') def compile_while_statement(self): self.write_element_start('whileStatement') label_first = self.get_label() label_last = self.get_label() self.vmw.write_label(label_first) # while self.compile_keyword([Tokens.WHILE]) # ( self.compile_symbol([Tokens.LEFT_ROUND_BRACKET]) # expression self.compile_expression() # ) self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET]) # VM self.vmw.write_arithmetic(ArithmeticType.NOT) self.vmw.write_if(label_last) # { self.compile_symbol([Tokens.LEFT_CURLY_BRACKET]) # statements self.compile_statements() # } self.compile_symbol([Tokens.RIGHT_CURLY_BRACKET]) self.vmw.write_goto(label_first) self.vmw.write_label(label_last) self.write_element_end('whileStatement') def compile_do_statement(self): self.write_element_start('doStatement') # do self.compile_keyword([Tokens.DO]) # subroutineCall self.compile_subroutine_call() # ; self.compile_symbol([Tokens.SEMICOLON]) # VM self.vmw.write_pop(SegmentType.TEMP, 0) self.write_element_end('doStatement') def compile_return_statement(self): self.write_element_start('returnStatement') # return self.compile_keyword([Tokens.RETURN]) # expression? if not self.tokenizer.next_is([Tokens.SEMICOLON]): self.compile_expression() else: self.vmw.write_push(SegmentType.CONST, 0) # ; self.compile_symbol([Tokens.SEMICOLON]) # VM self.vmw.write_return() self.write_element_end('returnStatement') def compile_statement(self): if self.tokenizer.next_is([Tokens.LET]): self.compile_let_statement() elif self.tokenizer.next_is([Tokens.IF]): self.compile_if_statement() elif self.tokenizer.next_is([Tokens.WHILE]): self.compile_while_statement() elif self.tokenizer.next_is([Tokens.DO]): self.compile_do_statement() elif self.tokenizer.next_is([Tokens.RETURN]): self.compile_return_statement() else: self.raise_syntax_error(self.tokenizer.see_next()) def compile_statements(self): self.write_element_start('statements') while self.tokenizer.next_is( [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]): self.compile_statement() self.write_element_end('statements') def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] \ or isinstance(self.tokenizer.see_next(), StringToken) \ or isinstance(self.tokenizer.see_next(), IdentifierToken): self.kind = SymbolKind.ARG # type self.compile_type() self.var_type = self.tokenizer.current_token # varName self.compile_var_name(define=True, var_type=self.var_type, kind=self.kind) # (, type varName)* while self.tokenizer.next_is([Tokens.COMMA]): self.compile_symbol([Tokens.COMMA]) self.compile_type() self.var_type = self.tokenizer.current_token self.compile_var_name(define=True, var_type=self.var_type, kind=self.kind) self.write_element_end('parameterList') def compile_op(self): self.compile_symbol(self.op_tokens) def compile_type(self): self.tokenizer.advance() if self.tokenizer.current_token in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] \ or isinstance(self.tokenizer.current_token, IdentifierToken): self.write_element(self.tokenizer.current_token) else: self.raise_syntax_error(self.tokenizer.current_token) def compile_class_name(self): self.compile_identifier() self.subroutine_class_name = self.tokenizer.current_token def compile_subroutine_name(self): self.compile_identifier() self.subroutine_name = self.tokenizer.current_token def compile_var_name(self, define=False, var_type=None, kind=None, is_other=False): if define: self.symbol_table.define(self.tokenizer.see_next().token, var_type, kind) elif is_other: pass else: # VM kind = self.symbol_table.kind_of(self.tokenizer.see_next().token) index = self.symbol_table.index_of(self.tokenizer.see_next().token) segment_type = self.get_segment_type(kind) self.vmw.write_push(segment_type, index) self.compile_identifier() def compile_keyword(self, keyword_tokens): self.tokenizer.advance() if self.tokenizer.current_token in keyword_tokens: self.write_element(self.tokenizer.current_token) else: self.raise_syntax_error(self.tokenizer.current_token) def compile_symbol(self, keyword_tokens): self.tokenizer.advance() if self.tokenizer.current_token in keyword_tokens: self.write_element(self.tokenizer.current_token) else: self.raise_syntax_error(self.tokenizer.current_token) def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerToken): self.write_element(self.tokenizer.current_token) else: self.raise_syntax_error(self.tokenizer.current_token) def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringToken): self.write_element(self.tokenizer.current_token) # VM string = str(self.tokenizer.current_token) length = len(string) self.vmw.write_push(SegmentType.CONST, length) self.vmw.write_call('String.new', 1) for s in string: self.vmw.write_push(SegmentType.CONST, ord(s)) self.vmw.write_call('String.appendChar', 2) else: self.raise_syntax_error(self.tokenizer.current_token) def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IdentifierToken): self.write_element(self.tokenizer.current_token) else: self.raise_syntax_error(self.tokenizer.current_token) def write_element_start(self, element_name): self.wf.write('<{}> \n'.format(element_name)) def write_element(self, token): element_name = get_element(token.type) self.wf.write('<{}> {} </{}>\n'.format(element_name, token, element_name)) def write_element_end(self, element_name): self.wf.write('</{}> \n'.format(element_name)) def raise_syntax_error(self, token): raise ValueError('Invalid syntax of {}'.format(token)) def get_kind(self, token): if token == Tokens.STATIC: return SymbolKind.STATIC elif token == Tokens.FIELD: return SymbolKind.FIELD else: return ValueError('Invalid token in get_kind.') def get_segment_type(self, kind): if kind == SymbolKind.STATIC: return SegmentType.STATIC elif kind == SymbolKind.FIELD: return SegmentType.THIS elif kind == SymbolKind.ARG: return SegmentType.ARG elif kind == SymbolKind.VAR: return SegmentType.LOCAL else: self.raise_syntax_error('Invalid kind and index error.') def get_label(self): self.label_number += 1 return 'LABEL_{}'.format(self.label_number)
class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # symbol table symbol_table = None # vm writer vm_writer = None # the class name class_name = "" # indicies for if and while loops # start at -1 because we increment before use while_index = -1 if_index = -1 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file) # compiles a complete class and closes the output file def compile_class(self): # class keyword tt, t = self._token_next(True, "KEYWORD", "class") # name of class tt, t = self._token_next(True, "IDENTIFIER") self.class_name = t # open brace tt, t = self._token_next(True, "SYMBOL", "{") # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") # done with compilation; close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # compile the variable declaration # False means this is a class (not a subroutine) self.compile_var_dec(False) # compiles a complete method, function, or constructor def compile_subroutine(self): # start of subroutine self.symbol_table.start_subroutine() # constructor, function, or method keyword tt, type = self._token_next(False, "KEYWORD") # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) # name of the method/function/constructor tt, name = self._token_next(True) name = self.class_name + "." + name # if the type is a method, "define" this as an argument, so the other # argument indexes work correctly if type == "method": self.symbol_table.define("this", self.class_name, SymbolTable.ARG) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # write the function num_locals = self.symbol_table.var_count(self.symbol_table.VAR) self.vm_writer.write_function(name, num_locals) # write any special code at the top of the function if type == "constructor": # code to allocate memory and set "this" size = self.symbol_table.var_count(self.symbol_table.FIELD) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) elif type == "function": # nothing special pass elif type == "method": # put argument 0 into pointer 0 (this) self.vm_writer.write_push(self.vm_writer.ARG, 0) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) else: print "WARNING: Expected constructor, function, or name; got", type # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, type = self._token_next(False) # identifier (variable name) tt, name = self._token_next(True) # the kind is always an arg, since these are all parameters to the # function kind = SymbolTable.ARG # define the variable in the symbol table self.symbol_table.define(name, type, kind) # possible comma tt, t = self._token_next(True) if tt != "SYMBOL" or t != ",": # not a comma; stop processing parameters break self.tokenizer.advance() # compiles a var declaration # if subroutine is true, only the var keyword can be used # if subroutine is false, only the static and field keywords can be used def compile_var_dec(self, subroutine=True): # the keyword to start the declaration tt, kind = self._token_next(False, "KEYWORD") # check for required types if subroutine: if kind == "var": kind = SymbolTable.VAR else: print "WARNING: expecting var, but received %s" % (str(kind)) else: if kind == "static": kind = SymbolTable.STATIC elif kind == "field": kind = SymbolTable.FIELD else: print "WARNING: expecting static or field, but received %s" % (str(kind)) # type of the declaration # could be an identifier or a keyword (int, etc) tt, type = self._token_next(True) # name of the declaration tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # another variable name follows tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break # compiles a do statement def compile_do(self): # do keyword tt, t = self._token_next(False, "KEYWORD", "do") # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # do statements do not have a return value, so eliminate the return # off of the stack self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a let statement def compile_let(self): # let keyword tt, t = self._token_next(False, "KEYWORD", "let") # variable name tt, name = self._token_next(True, "IDENTIFIER") # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # array - write operation array = True # compile the offset expression self.tokenizer.advance() self.compile_expression() # write the base address onto the stack segment, index = self._resolve_symbol(name) self.vm_writer.write_push(segment, index) # add base and offset self.vm_writer.write_arithmetic("add") # we cannot yet put the result into pointer 1, since the read # operation (which hasn't been parsed/computed yet) may use pointer 1 # to read from an arrya value # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() else: array = False # equals sign tt, t = self._token_next(False, "SYMBOL", "=") # expression self.tokenizer.advance() self.compile_expression() if array: # our stack now looks like this: # TOP OF STACK # computed result to store # address in which value should be stored # ... previous stuff ... # pop the computed value to temp 0 self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # pop the array address to pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # put the computed value back onto the stack self.vm_writer.write_push(self.vm_writer.TEMP, 0) # pop to the variable name or the array reference self.vm_writer.write_pop(self.vm_writer.THAT, 0) else: # not an array - pop the expression to the variable segment, index = self._resolve_symbol(name) self.vm_writer.write_pop(segment, index) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a while statement def compile_while(self): # labels for this while loop self.while_index += 1 while_start = "WHILE_START_%d" % (self.while_index) while_end = "WHILE_END_%d" % (self.while_index) # while keyword tt, t = self._token_next(False, "KEYWORD", "while") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # label for the start of the while statement self.vm_writer.write_label(while_start) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto to the end of the loop # to do this, negate and then call if-goto self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(while_end) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") # after the last statement of the while loop # need to jump back up to the top of the loop to evaluate again self.vm_writer.write_goto(while_start) # label at the end of the loop self.vm_writer.write_label(while_end) self.tokenizer.advance() # compiles a return statement def compile_return(self): # return keyword tt, t = self._token_next(False, "KEYWORD", "return") # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() else: # no return expression; return 0 self.vm_writer.write_push(self.vm_writer.CONST, 0) # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.vm_writer.write_return() self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): # it is more efficient in an if-else case to have the else portion first # in the code when testing, but we use the less-efficient but # easier-to-write true-false pattern here # labels for this if statement self.if_index += 1 if_false = "IF_FALSE_%d" % (self.if_index) if_end = "IF_END_%d" % (self.if_index) # if keyword tt, t = self._token_next(False, "KEYWORD", "if") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto the false label # if true, fall through to executing code # if there is no else, then false and end are the same, but having two # labels does not increase code size self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements for true portion self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # goto the end of the if statement at the end of the true portion self.vm_writer.write_goto(if_end) # label for the start of the false portion self.vm_writer.write_label(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") # end label self.vm_writer.write_label(if_end) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() else: # no else portion; only put in a label for false, since end is not # used self.vm_writer.write_label(if_false) # compiles an expression (one or more terms connected by operators) def compile_expression(self): # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator # postfix order - add the next term and then do the operator # the next term self.tokenizer.advance() self.compile_term() # the operator if t == "+": self.vm_writer.write_arithmetic("add") if t == "-": self.vm_writer.write_arithmetic("sub") if t == "=": self.vm_writer.write_arithmetic("eq") if t == ">": self.vm_writer.write_arithmetic("gt") if t == "<": self.vm_writer.write_arithmetic("lt") if t == "&": self.vm_writer.write_arithmetic("and") if t == "|": self.vm_writer.write_arithmetic("or") if t == "*": self.vm_writer.write_call("Math.multiply", 2) if t == "/": self.vm_writer.write_call("Math.divide", 2) else: # no term found; done parsing the expression break # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt == "INT_CONST": self.vm_writer.write_push(self.vm_writer.CONST, t) # advance for the next statement self.tokenizer.advance() elif tt == "STRING_CONST": # after this portion is run, a pointer to a string should be on the # stack # we create a new string of a certain size and then append characters # one by one; each append operation returns the pointer to the same # string # create the string # string is a len, data tuple; not null-terminated size = len(t) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("String.new", 1) # append each character for char in t: self.vm_writer.write_push(self.vm_writer.CONST, ord(char)) self.vm_writer.write_call("String.appendChar", 2) # advance for the next statement self.tokenizer.advance() elif tt == "KEYWORD": if t == "true": # true is -1, which is 0 negated self.vm_writer.write_push(self.vm_writer.CONST, 0) self.vm_writer.write_arithmetic("not") elif t == "false" or t == "null": self.vm_writer.write_push(self.vm_writer.CONST, 0) elif t == "this": self.vm_writer.write_push(self.vm_writer.POINTER, 0) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # postfix order - add the next term and then do the operator # parse the rest of the term self.tokenizer.advance() self.compile_term() # write the unary operation if t == "-": self.vm_writer.write_arithmetic("neg") elif t == "~": self.vm_writer.write_arithmetic("not") elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array - read operation # write the base address onto the stack segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) # compile the offset expression self.tokenizer.advance() self.compile_expression() # add base and offset self.vm_writer.write_arithmetic("add") # put the resulting address into pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # read from that 0 onto the stack self.vm_writer.write_push(self.vm_writer.THAT, 0) # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) else: # unknown print "WARNING: Unknown term expression object:", tt, t # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): num_args = 0 # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() num_args += 1 # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self.tokenizer.advance() else: # not a comma; stop processing parameters break return num_args # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, name1 = self._token_next(False, "IDENTIFIER") # a dot and another name may exist, or it could be a parenthesis name2 = None tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": # the name after the dot tt, name2 = self._token_next(True, "IDENTIFIER") # advance so that we are on the parenthesis self.tokenizer.advance() # determine if this is a method call # three possibilities # - class.func() - function call # - var.func() - method call # - func() - method call on current object if self.symbol_table.contains(name1): method_call = True local_call = False elif name2 == None: method_call = True local_call = True else: method_call = False # if a method call, push variable name1 # this a method call if the symbol table contains name1 and name2 exists # OR name1 is a method in the current object if method_call and local_call: # push the current object onto the stack as a hidden argument self.vm_writer.write_push(self.vm_writer.POINTER, 0) elif method_call and not local_call: # push the variable onto the stack as a hidden argument segment, index = self._resolve_symbol(name1) self.vm_writer.write_push(segment, index) # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") # expression list self.tokenizer.advance() num_args = self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # write the call if method_call and local_call: # methd + <blank> # get the name of the vm function to call classname = self.class_name vm_function_name = classname + "." + name1 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) elif method_call and not local_call: # variable name + method # get the name of the vm function to call classname = self.symbol_table.get(name1)[1] vm_function_name = classname + "." + name2 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) else: # get the name of the vm function to call vm_function_name = name1 + "." + name2 # make the call self.vm_writer.write_call(vm_function_name, num_args) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # convets a symbol table type into a segment type def _type_to_segment(self, type): if type == self.symbol_table.STATIC: return self.vm_writer.STATIC elif type == self.symbol_table.FIELD: return self.vm_writer.THIS elif type == self.symbol_table.ARG: return self.vm_writer.ARG elif type == self.symbol_table.VAR: return self.vm_writer.LOCAL else: print "ERROR: Bad type %s" % (str(type)) # resolves the symbol from the symbol table # the segment and index is returned as a 2-tuple def _resolve_symbol(self, name): kind, type, index = self.symbol_table.get(name) return self._type_to_segment(kind), index
class CompilationEngine: """NOTE remember that "is_xxx()" checks on the next token, and load the next token to curr_token before starting sub-methods using "load_next_token()" and you can use values with it """ def __init__(self, jack_file): self.vm_writer = VMWriter(jack_file) self.tokenizer = JackTokenizer(jack_file) self.symbol_table = SymbolTable() self.if_index = -1 self.while_index = -1 # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): #! Beginning of all # * save name of the class and move on self.load_next_token() # 'class' self.class_name = self.load_next_token() # className self.load_next_token() # curr_token = '{' # while next token == 'static' | 'field', while self.is_class_var_dec(): # check next token self.compile_class_var_dec() # classVarDec* # while next_token == constructor | function | method while self.is_subroutine_dec(): self.compile_subroutine() # subroutineDec* self.vm_writer.close() # ('static' | 'field' ) type varName (',' varName)* ';' def compile_class_var_dec(self): kind = self.load_next_token() # curr_token = static | field type = self.load_next_token() # curr_token = type name = self.load_next_token() # curr_token = varName self.symbol_table.define(name, type, kind.upper()) while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, kind.upper()) self.load_next_token() # ';' # next_token = 'constructor' | 'function' | 'method' # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody # subroutineBody: '{' varDec* statements '}' def compile_subroutine(self): subroutine_kind = (self.load_next_token() ) # ('constructor' | 'function' | 'method') self.load_next_token() # ('void' | type) subroutine_name = self.load_next_token() # subroutineName self.symbol_table.start_subroutine() # init subroutine table if subroutine_kind == "method": self.symbol_table.define("instance", self.class_name, "ARG") self.load_next_token() # curr_token '(' self.compile_parameter_list() # parameterList # next_token == ')' when escaped self.load_next_token() # ')' self.load_next_token() # '{' while self.check_next_token() == "var": self.compile_var_dec() # varDec* # NOTE next_token is neither 'var' or ';' # NOTE next_token is statements* (zero or more) # ANCHOR actual writing func_name = f"{self.class_name}.{subroutine_name}" # Main.main num_locals = self.symbol_table.counts["VAR"] # get 'var' count self.vm_writer.write_function(func_name, num_locals) if subroutine_kind == "constructor": num_fields = self.symbol_table.counts["FIELD"] self.vm_writer.write_push("CONST", num_fields) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) elif subroutine_kind == "method": self.vm_writer.write_push("ARG", 0) self.vm_writer.write_pop("POINTER", 0) # NOTE statement starts here self.compile_statements() # statements self.load_next_token() # '} # ( (type varName) (',' type varName)*)? def compile_parameter_list(self): # curr_token == '(' if self.check_next_token() != ")": type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") while self.check_next_token() != ")": self.load_next_token() # ',' type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") # NOTE param compilation finishes when next_token == ')' # 'var' type varName (',' varName)* ';' def compile_var_dec(self): self.load_next_token() # 'var' type = self.load_next_token() # type name = self.load_next_token() # # varName self.symbol_table.define(name, type, "VAR") while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, "VAR") self.load_next_token() # ';' # statement* # letStatement | ifStatement | whileStatement | doStatement | returnStatement def compile_statements(self): # if next_token == let | if | while | do | return while self.is_statement(): statement = (self.load_next_token() ) # curr_token == let | if | while | do | return if statement == "let": self.compile_let() elif statement == "if": self.compile_if() elif statement == "while": self.compile_while() elif statement == "do": self.compile_do() elif statement == "return": self.compile_return() # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): var_name = self.load_next_token() # curr_token == varName var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) # if next_token == "[" if self.is_array(): # array assignment self.load_next_token() # curr_token == '[' self.compile_expression() # expression self.load_next_token() # curr_token == ']' self.vm_writer.write_push(var_kind, var_index) self.vm_writer.write_arithmetic("ADD") self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # curr_token == ';' #! POP TEMP and PUSH TEMP location changed self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) else: # regular assignment self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # ';' self.vm_writer.write_pop(var_kind, var_index) # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? def compile_if(self): # curr_token == if self.if_index += 1 if_index = self.if_index # TODO IF indexes count separately self.load_next_token() # curr_token == '(' self.compile_expression() # expression self.load_next_token() # ')' self.load_next_token() # '{' # S = statement, L = label self.vm_writer.write_if(f"IF_TRUE{if_index}") #! if-goto L1 self.vm_writer.write_goto(f"IF_FALSE{if_index}") #! goto L2 self.vm_writer.write_label(f"IF_TRUE{if_index}") #! label L1 self.compile_statements() # statements #! executing S1 self.vm_writer.write_goto(f"IF_END{if_index}") #! goto END self.load_next_token() # '}' self.vm_writer.write_label(f"IF_FALSE{if_index}") #! label L2 if self.check_next_token() == "else": # ( 'else' '{' statements '}' )? self.load_next_token() # 'else' self.load_next_token() # '{' self.compile_statements() # statements #! executing S2 self.load_next_token() # '}' self.vm_writer.write_label(f"IF_END{if_index}") # 'while' '(' expression ')' '{' statements '}' def compile_while(self): # curr_token == while self.while_index += 1 while_index = self.while_index self.vm_writer.write_label(f"WHILE{while_index}") self.load_next_token() # '(' self.compile_expression() # expression self.vm_writer.write_arithmetic("NOT") # eval false condition first self.load_next_token() # ')' self.load_next_token() # '{' self.vm_writer.write_if(f"WHILE_END{while_index}") self.compile_statements() # statements self.vm_writer.write_goto(f"WHILE{while_index}") self.vm_writer.write_label(f"WHILE_END{while_index}") self.load_next_token() # '}' # 'do' subroutineCall ';' def compile_do(self): # curr_token == do self.load_next_token() #! to sync with compile_term() self.compile_subroutine_call() self.vm_writer.write_pop("TEMP", 0) self.load_next_token() # ';' # 'return' expression? ';' def compile_return(self): # curr_token == return if self.check_next_token() != ";": self.compile_expression() else: self.vm_writer.write_push("CONST", 0) self.vm_writer.write_return() self.load_next_token() # ';' # term (op term)* def compile_expression(self): self.compile_term() # term while self.is_op(): # (op term)* op: str = self.load_next_token() # op self.compile_term() # term if op in ARITHMETIC.keys(): self.vm_writer.write_arithmetic(ARITHMETIC[op]) elif op == "*": self.vm_writer.write_call("Math.multiply", 2) elif op == "/": self.vm_writer.write_call("Math.divide", 2) # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term def compile_term(self): # if next_token == '~' | '-' if self.is_unary_op_term(): unary_op = self.load_next_token() # curr_token == '~' | '-' self.compile_term() # term (recursive) self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op]) # if next_token == '(' => '(' expression ')' elif self.check_next_token() == "(": self.load_next_token() # '(' self.compile_expression() # expression self.load_next_token() # ')' # if next_token == INTEGER(const) elif self.check_next_type() == "INT_CONST": # integerConstant self.vm_writer.write_push("CONST", self.load_next_token()) # ) # if next_token == STRING(const) elif self.check_next_type() == "STRING_CONST": # stringConstant self.compile_string() # if next_token == KEYWORD(const) elif self.check_next_type() == "KEYWORD": # keywordConstant self.compile_keyword() # varName | varName '[' expression ']' | subroutineCall else: #! (varName | varName for expression | subroutine)'s base var_name = self.load_next_token( ) # curr_token = varName | subroutineCall # (e.g. Screen.setColor | show() ) #! next_token == '[' | '(' or '.' | just varName # varName '[' expression ']' if self.is_array(): # if next_token == '[' self.load_next_token() # '[' self.compile_expression() # expression self.load_next_token() # ']' array_kind = self.symbol_table.kind_of(var_name) array_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(CONVERT_KIND[array_kind], array_index) self.vm_writer.write_arithmetic("ADD") self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("THAT", 0) # if next_token == "(" | "." => curr_token == subroutineCall #! if varName is not found, assume class or function name elif self.is_subroutine_call(): # NOTE curr_token == subroutineName | className | varName self.compile_subroutine_call() # varName else: # curr_token == varName # FIXME cannot catch subroutine call and pass it to 'else' below # TODO error caught on Math.abs() part on Ball.vm var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) # subroutineCall: subroutineName '(' expressionList ')' | # ( className | varName) '.' subroutineName '(' expressionList ')' # e.g.) (do) game.run() # ! in case of 'do' order is different from 'let game = Class.new()' def compile_subroutine_call(self): # NOTE curr_token == subroutineName | className | varName subroutine_caller = self.get_curr_token() function_name = subroutine_caller # _next_token() # FIXME now it loads '.' or '(' # func_name = identifier number_args = 0 #! '.' or '(' 2 cases if self.check_next_token() == ".": self.load_next_token() # curr_token == '.' subroutine_name = self.load_next_token( ) # curr_token == subroutineName type = self.symbol_table.type_of(subroutine_caller) if type != "NONE": # it's an instance kind = self.symbol_table.kind_of(subroutine_caller) index = self.symbol_table.index_of(subroutine_caller) self.vm_writer.write_push(CONVERT_KIND[kind], index) function_name = f"{type}.{subroutine_name}" number_args += 1 else: # it's a class class_name = subroutine_caller function_name = f"{class_name}.{subroutine_name}" elif self.check_next_token() == "(": subroutine_name = subroutine_caller function_name = f"{self.class_name}.{subroutine_name}" number_args += 1 self.vm_writer.write_push("POINTER", 0) self.load_next_token() # '(' number_args += self.compile_expression_list() # expressionList self.load_next_token() # ')' self.vm_writer.write_call(function_name, number_args) # (expression (',' expression)* )? def compile_expression_list(self): number_args = 0 if self.check_next_token() != ")": number_args += 1 self.compile_expression() while self.check_next_token() != ")": number_args += 1 self.load_next_token() # curr_token == ',' self.compile_expression() return number_args def compile_string(self): string = self.load_next_token() # curr_token == stringConstant self.vm_writer.write_push("CONST", len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) def compile_keyword(self): keyword = self.load_next_token() # curr_token == keywordConstant if keyword == "this": self.vm_writer.write_push("POINTER", 0) else: self.vm_writer.write_push("CONST", 0) if keyword == "true": self.vm_writer.write_arithmetic("NOT") def is_subroutine_call(self): return self.check_next_token() in [".", "("] def is_array(self): return self.check_next_token() == "[" def is_class_var_dec(self): return self.check_next_token() in ["static", "field"] def is_subroutine_dec(self): return self.check_next_token() in ["constructor", "function", "method"] def is_statement(self): return self.check_next_token() in [ "let", "if", "while", "do", "return" ] def is_op(self): return self.check_next_token() in [ "+", "-", "*", "/", "&", "|", "<", ">", "=" ] def is_unary_op_term(self): return self.check_next_token() in ["~", "-"] def check_next_token(self): return self.tokenizer.next_token[1] def check_next_type(self): return self.tokenizer.next_token[0] def get_curr_token(self): return self.tokenizer.curr_token[1] def load_next_token(self): if self.tokenizer.has_more_tokens(): self.tokenizer.advance() # curr_token = next_token return self.tokenizer.curr_token[1] else: return ""
class CompilationEngine: def __init__(self, tokenizer, vm_writer): self.tokenizer = tokenizer self.st = SymbolTable() self.vm_writer = vm_writer self.class_name = "" def compile(self): self.compile_class() def compile_class(self): self.tokenizer.advance() # eat 'class' keyword self.eat(CLASS) # eat class name self.class_name = self.tokenizer.current_token self.eat(self.tokenizer.current_token) # eat opening brace self.eat("{") # compile class variable declarations self.compile_class_var_dec() # compile class subroutines self.compile_subroutine_dec() # eat closing brace self.eat("}") def compile_class_var_dec(self): while self.tokenizer.current_token in [STATIC, FIELD]: kind = self.tokenizer.current_token # eat keyword 'static' or 'field' self.eat(self.tokenizer.current_token) # eat variable declaration sequence e.g. int x, y, z # eat var type type = self.tokenizer.current_token self.eat(self.tokenizer.current_token) # eat var name name = self.tokenizer.current_token self.st.define(name, type, kind) self.eat(self.tokenizer.current_token) while self.tokenizer.current_token == ",": self.eat(",") self.st.define(self.tokenizer.current_token, type, kind) self.eat(self.tokenizer.current_token) # eat varName # eat terminating semi-colon self.eat(";") def compile_subroutine_dec(self): while self.tokenizer.current_token in [CONSTRUCTOR, FUNCTION, METHOD]: self.st.start_subroutine() # eat subroutine type: method, function or constructor subroutine_type = self.tokenizer.current_token self.eat(self.tokenizer.current_token) # eat subroutine return type self.eat(self.tokenizer.current_token) # eat subroutine name subroutine_name = self.class_name + "." + self.tokenizer.current_token self.eat(self.tokenizer.current_token) # compile parameter list and add args to symbol table self.compile_parameter_list(subroutine_type) self.eat("{") # compile func local VARs and add them to symbol table # output subroutine type specific VM code self.compile_subroutine_header(subroutine_name, subroutine_type) # compile subroutine statements self.compile_statements() self.eat("}") def compile_subroutine_header(self, subroutine_name, subroutine_type): # compile local variable declarations while self.tokenizer.current_token == VAR: # eat keyword 'var' self.eat(VAR) # eat variable declaration sequence e.g. int x, y, z # eat var type type = self.tokenizer.current_token self.eat(self.tokenizer.current_token) # eat var name name = self.tokenizer.current_token self.st.define(name, type, VAR) self.eat(self.tokenizer.current_token) while self.tokenizer.current_token == ",": self.eat(",") self.st.define(self.tokenizer.current_token, type, VAR) self.eat(self.tokenizer.current_token) # eat varName # eat terminating semicolon self.eat(";") self.vm_writer.write_function(subroutine_name, self.st.var_count(VAR)) # add subroutine type specific VM code if subroutine_type == CONSTRUCTOR: # allocate memory for the object being constructed self.vm_writer.write_push(VM_CONST, self.st.var_count(FIELD)) self.vm_writer.write_call("Memory.alloc", "1") self.vm_writer.write_pop(VM_POINTER, "0") elif subroutine_type == METHOD: # anchor THIS to the current object self.vm_writer.write_push(VM_ARGUMENT, "0") self.vm_writer.write_pop(VM_POINTER, "0") def compile_parameter_list(self, subroutine_type): if subroutine_type == METHOD: # arg0 in a method is always THIS object self.st.define( "thisObject", "type", ARG ) # dummy entry, just to make sure all other method args will begin indexing from 1 self.eat("(") if self.tokenizer.current_token != ")": # eat var type type = self.tokenizer.current_token self.eat(self.tokenizer.current_token) # eat var name name = self.tokenizer.current_token self.st.define(name, type, ARG) self.eat(self.tokenizer.current_token) while self.tokenizer.current_token == ",": self.eat(",") # eat var type type = self.tokenizer.current_token self.eat(self.tokenizer.current_token) # eat var name name = self.tokenizer.current_token self.st.define(name, type, ARG) self.eat(self.tokenizer.current_token) self.eat(")") def compile_statements(self): statement_map = { LET: self.compile_let_statement, IF: self.compile_if_statement, WHILE: self.compile_while_statement, DO: self.compile_do_statement, RETURN: self.compile_return_statement } while self.tokenizer.current_token in [LET, IF, WHILE, DO, RETURN]: statement_map[self.tokenizer.current_token]() def compile_let_statement(self): # eat 'let' keyword self.eat(LET) # eat variable name var_name = self.tokenizer.current_token self.eat(var_name) # eat potential array indexing expression if self.tokenizer.current_token == "[": self.eat("[") self.compile_expression() self.eat("]") self.vm_writer.write_push(segment_map[self.st.kind_of(var_name)], self.st.index_of(var_name)) self.vm_writer.write_arithmetic(VM_ADD) self.eat("=") self.compile_expression() self.eat(";") self.vm_writer.write_pop(VM_TEMP, "0") self.vm_writer.write_pop(VM_POINTER, "1") self.vm_writer.write_push(VM_TEMP, "0") self.vm_writer.write_pop(VM_THAT, "0") else: self.eat("=") self.compile_expression() self.eat(";") self.vm_writer.write_pop(segment_map[self.st.kind_of(var_name)], self.st.index_of(var_name)) def compile_do_statement(self): self.eat(DO) self.compile_subroutine_call() # pop and ignore the returned value self.vm_writer.write_pop(VM_TEMP, 0) self.eat(";") def compile_subroutine_call(self): # eat identifier identifier = self.tokenizer.current_token self.eat(identifier) nArgs = 0 if self.tokenizer.current_token == "(": # 'subroutine_name()' is always a method call self.vm_writer.write_push(VM_POINTER, "0") # eat arguments self.eat("(") if self.tokenizer.is_valid_term(self.tokenizer.current_token): self.compile_expression() nArgs += 1 while self.tokenizer.current_token == ",": self.eat(",") self.compile_expression() nArgs += 1 self.eat(")") self.vm_writer.write_call(self.class_name + "." + identifier, nArgs + 1) elif self.tokenizer.current_token == ".": is_method_call = self.st.type_of(identifier) is not None if is_method_call: self.vm_writer.write_push( segment_map[self.st.kind_of(identifier)], self.st.index_of(identifier)) self.eat(".") # eat subroutine name subroutine_name = self.tokenizer.current_token self.eat(subroutine_name) # eat arguments self.eat("(") if self.tokenizer.is_valid_term(self.tokenizer.current_token): self.compile_expression() nArgs += 1 while self.tokenizer.current_token == ",": self.eat(",") self.compile_expression() nArgs += 1 self.eat(")") prefix = identifier if is_method_call: nArgs += 1 prefix = self.st.type_of(identifier) self.vm_writer.write_call(prefix + "." + subroutine_name, nArgs) def compile_if_statement(self): L1 = str(uuid.uuid4().hex) L2 = str(uuid.uuid4().hex) self.eat(IF) self.eat("(") self.compile_expression() self.vm_writer.write_arithmetic(VM_NOT) self.vm_writer.write_if(L1) self.eat(")") self.eat("{") self.compile_statements() self.eat("}") self.vm_writer.write_goto(L2) self.vm_writer.write_label(L1) if self.tokenizer.current_token == ELSE: self.eat(ELSE) self.eat("{") self.compile_statements() self.eat("}") self.vm_writer.write_label(L2) def compile_while_statement(self): L1 = str(uuid.uuid4().hex) L2 = str(uuid.uuid4().hex) self.vm_writer.write_label(L1) self.eat(WHILE) self.eat("(") self.compile_expression() self.vm_writer.write_arithmetic(VM_NOT) self.vm_writer.write_if(L2) self.eat(")") self.eat("{") self.compile_statements() self.vm_writer.write_goto(L1) self.eat("}") self.vm_writer.write_label(L2) def compile_return_statement(self): self.eat(RETURN) if self.tokenizer.current_token != ";": self.compile_expression() else: self.vm_writer.write_push(VM_CONST, 0) self.eat(";") self.vm_writer.write_return() def compile_expression(self): self.compile_term() while self.tokenizer.current_token in operation_map: op = operation_map[self.tokenizer.current_token] self.eat(self.tokenizer.current_token) self.compile_term() self.vm_writer.write_arithmetic(op) def compile_term(self): if lexical_elements.is_int_constant(self.tokenizer.current_token): self.compile_int_constant() elif lexical_elements.is_string_constant(self.tokenizer.current_token): self.compile_string_constant() elif self.tokenizer.is_keyword_constant(self.tokenizer.current_token): self.compile_keyword_constant() elif self.tokenizer.current_token == "(": self.eat("(") self.compile_expression() self.eat(")") elif self.tokenizer.current_token == "-" or self.tokenizer.current_token == "~": self.compile_unary() elif lexical_elements.is_identifier(self.tokenizer.current_token): if self.tokenizer.peek() == "[": self.compile_array_expression() elif self.tokenizer.peek() == "(" or self.tokenizer.peek() == ".": self.compile_subroutine_call() else: var_name = self.tokenizer.current_token self.eat(var_name) self.vm_writer.write_push( segment_map[self.st.kind_of(var_name)], self.st.index_of(var_name)) def compile_int_constant(self): self.vm_writer.write_push(VM_CONST, self.tokenizer.current_token) self.eat(self.tokenizer.current_token) def compile_string_constant(self): constant = self.tokenizer.current_token.replace('"', '') self.eat(self.tokenizer.current_token) self.vm_writer.write_push(VM_CONST, len(constant)) self.vm_writer.write_call("String.new", 1) for c in constant: self.vm_writer.write_push(VM_CONST, ord(c)) self.vm_writer.write_call("String.appendChar", "2") def compile_keyword_constant(self): if self.tokenizer.current_token == TRUE: self.vm_writer.write_push(VM_CONST, "1") self.vm_writer.write_arithmetic(VM_NEG) elif self.tokenizer.current_token == THIS: self.vm_writer.write_push(VM_POINTER, 0) else: self.vm_writer.write_push(VM_CONST, "0") # handles both FALSE and NULL self.eat(self.tokenizer.current_token) def compile_unary(self): op = VM_NEG if self.tokenizer.current_token == "~": op = VM_NOT self.eat(self.tokenizer.current_token) self.compile_term() self.vm_writer.write_arithmetic(op) def compile_array_expression(self): array = self.tokenizer.current_token self.eat(self.tokenizer.current_token) self.eat("[") self.compile_expression() self.eat("]") self.vm_writer.write_push(segment_map[self.st.kind_of(array)], self.st.index_of(array)) self.vm_writer.write_arithmetic(VM_ADD) self.vm_writer.write_pop(VM_POINTER, "1") # anchor THAT to array entry self.vm_writer.write_push( VM_THAT, "0") # push result of array evaluation to stack def eat(self, token): current_token = self.tokenizer.current_token if current_token != token: raise CompilationError( "Expected to find token '{0:}' but found '{1:}'".format( token, current_token)) self.tokenizer.advance()
class CompilationEngine: '''The brain of the Jack syntax analyzer''' # Constructor def __init__(self, tokenizer: JackTokenizer, out_path: Path): self.tokenizer = tokenizer # Create symbol tables self.class_level_st = SymbolTable() self.subroutine_level_st = SymbolTable() # class's name self.class_name = None self.func_name = None self.sub_type = None # Open the output file for writing self.out_stream = out_path.open('w') # Create a new VM writer for writing self.vm_writer = VMWriter(out_path.with_suffix(".vm")) # For generating labels self.label_count = {"if": 0, "while": 0} def get_if_labels(self): self.label_count["if"] += 1 return (f"LABEL_IF_{self.label_count['if'] - 1}_1", f"LABEL_IF_{self.label_count['if'] - 1}_2") def get_while_labels(self): self.label_count["while"] += 1 return (f"LABEL_WHILE_{self.label_count['while'] - 1}_1", f"LABEL_WHILE_{self.label_count['while'] - 1}_2") def start_compilation(self): # Read the first token into memory self.tokenizer.has_more_tokens() # Start analyzing syntax if self.tokenizer.get_token_type() == TokenType.KEYWORD: if self.tokenizer.get_keyword_type() == KeywordType.CLASS: self.compile_class() else: raise AttributeError("Not starting with a class") # Helper method to write terminal XML tags def write_terminal_tag(self, t, v): if t == TokenType.KEYWORD: self.out_stream.write(f"<keyword> {v} </keyword>\n") elif t == TokenType.IDENTIFIER: self.out_stream.write(f"<identifier> {v} </identifier>\n") elif t == TokenType.SYMBOL: self.out_stream.write(f"<symbol> {v} </symbol>\n") elif t == TokenType.INT_CONST: self.out_stream.write( f"<integerConstant> {v} </integerConstant>\n") elif t == TokenType.STRING_CONST: self.out_stream.write(f"<stringConstant> {v} </stringConstant>\n") # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): # Write opening tag self.out_stream.write("<class>\n") self.write_terminal_tag(self.tokenizer.get_token_type(), 'class') # Read the next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: self.class_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), self.class_name) self.out_stream.write("\n===DECLARED===\nclass name\n=======") else: raise AttributeError("Not a valid class name!") # Read the next token self.tokenizer.has_more_tokens() self.eat('{') self.write_terminal_tag(self.tokenizer.get_token_type(), self.tokenizer.get_symbol()) # Handle class variable declaration (classVarDec*) # Proceed to next token self.tokenizer.has_more_tokens() # While there are field/static declarations while \ (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\ ( self.tokenizer.get_keyword_type() in (KeywordType.FIELD, KeywordType.STATIC) ): self.compile_class_var_dec() while \ (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\ ( self.tokenizer.get_keyword_type() in (KeywordType.CONSTRUCTOR, KeywordType.FUNCTION, KeywordType.METHOD) ): self.compile_subroutine_dec() # Class ending curly brackets self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # At the end of function call self.out_stream.write("</class>\n") # ('static'|'field') type varName (',' varName)* ';' def compile_class_var_dec(self): # Write opening tag self.out_stream.write("<classVarDec>\n") # Write static/field self.write_terminal_tag(TokenType.KEYWORD, self.tokenizer.get_cur_ident()) # To store variable properties var_kind = None var_type = None var_index = None var_name = None if self.tokenizer.get_cur_ident() == "static": var_kind = SymbolKind.STATIC elif self.tokenizer.get_cur_ident() == "field": var_kind = SymbolKind.FEILD else: raise Exception("Other than static or feild:" + self.tokenizer.get_cur_ident()) # Read the next token self.tokenizer.has_more_tokens() if self.is_valid_type(): self.write_terminal_tag(self.tokenizer.get_token_type(), self.tokenizer.get_cur_ident()) var_type = self.tokenizer.get_cur_ident() else: raise AssertionError("Invalid class variable type!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() # Write varible tag to XML file self.write_terminal_tag(self.tokenizer.get_token_type(), var_name) # Define new class level variable self.class_level_st.define(var_name, var_type, var_kind) var_index = self.class_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) else: raise AssertionError("Invalid class variable name!") # Move to the next token self.tokenizer.has_more_tokens() # If has more than one varibles: E.g. field int x, y, z; while self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ",": self.write_terminal_tag(TokenType.SYMBOL, ",") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() # Write varible tag to XML file self.write_terminal_tag(self.tokenizer.get_token_type(), var_name) # Define new class level variable self.class_level_st.define(var_name, var_type, var_kind) var_index = self.class_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) else: raise AssertionError( "Invalid Syntax for class varible declaration!") # Move to next token self.tokenizer.has_more_tokens() # Must end with ";" self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() # Write closing tag self.out_stream.write("</classVarDec>\n") # ('constructor' | 'function' | 'method') ('void' | 'type') subroutineName def compile_subroutine_dec(self): # Opening tag self.out_stream.write("<subroutineDec>\n") # To store function parameters func_params = {} # Write subroutine type self.sub_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.KEYWORD, self.sub_type) # Reset subroutine level symbol table self.subroutine_level_st.reset_table() # Insert `this`, if method if self.sub_type == "method": self.subroutine_level_st.define("this", self.class_name, SymbolKind.ARG) # Move to next token self.tokenizer.has_more_tokens() if self.is_valid_type() or \ (self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() == KeywordType.VOID): self.write_terminal_tag(self.tokenizer.get_token_type(), self.tokenizer.get_cur_ident()) else: raise AssertionError("Not a valid subroutine return type!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: func_params["name"] = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, func_params["name"]) else: raise AssertionError("Invalid Syntax for function name!") # Move to next token self.tokenizer.has_more_tokens() self.eat('(') self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() # If there are some parameters self.out_stream.write("<parameterList>\n") if not (self.tokenizer.get_token_type() == TokenType.SYMBOL): self.compile_parameter_list() self.out_stream.write("</parameterList>\n") # Move to next token self.eat(')') self.write_terminal_tag(TokenType.SYMBOL, ")") # Write function VM command self.func_name = func_params['name'] # Move to the next token self.tokenizer.has_more_tokens() self.compile_subroutine_body() # Closing tag self.out_stream.write("</subroutineDec>\n") # ((type varName) (',' type varName)*)? def compile_parameter_list(self): # For storing varible params var_name = None var_type = None var_kind = SymbolKind.ARG # Argument list var_index = None if self.is_valid_type(): var_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), var_type) else: raise AssertionError("Invalid syntax in parameter list!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError( "Invalid Syntax for function parameter name name!") # Define the argument variable self.subroutine_level_st.define(var_name, var_type, var_kind) # Get the index of the newly created variable var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) # Move to next token self.tokenizer.has_more_tokens() # Handle more than one parameters while self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ",": self.write_terminal_tag(TokenType.SYMBOL, ",") # Read the next token self.tokenizer.has_more_tokens() # If the current token is a valid type name if self.is_valid_type(): var_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), var_type) else: raise AssertionError("Invalid variable type in parameter list") # Read the next token self.tokenizer.has_more_tokens() # If current token is a valid identifier if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError( "Invalid variable name in parameter list!!") self.subroutine_level_st.define(var_name, var_type, var_kind) var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) # Read the next token self.tokenizer.has_more_tokens() # '{' varDec* statements '}' def compile_subroutine_body(self): # Write opening tag self.out_stream.write("<subroutineBody>\n") # Eat opening curly bracket self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() # Handle variable declarations while self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() == KeywordType.VAR: # Current token is the 'var' keyword self.compile_var_dec() # Get number of local variables # for the current compiling function nVars = self.subroutine_level_st.get_var_count(SymbolKind.VAR) # Write function self.vm_writer.write_function(f"{self.class_name}.{self.func_name}", nVars) if self.sub_type == "constructor": nFeilds = self.class_level_st.get_var_count(SymbolKind.FEILD) # write "push constant nFeilds" self.vm_writer.write_push(SegmentType.CONST, nFeilds) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(SegmentType.POINTER, 0) elif self.sub_type == "method": # push argument 0 self.vm_writer.write_push(SegmentType.ARG, 0) # pop pointer 0 self.vm_writer.write_pop(SegmentType.POINTER, 0) # Handle statements self.compile_statements() # Eat closing curly bracker self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() # Write closing tag self.out_stream.write("</subroutineBody>\n") # 'var' type varName (',' varName)* ';' def compile_var_dec(self): # Write opening tag self.out_stream.write("<varDec>\n") # Write var keyword tag self.write_terminal_tag(TokenType.KEYWORD, "var") # For storing variable params var_name = None var_type = None var_kind = SymbolKind.VAR var_index = None # Move to next token self.tokenizer.has_more_tokens() # Write the type of variables if self.is_valid_type(): var_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), var_type) else: raise AssertionError("Not a valid var type!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError("Invalid Syntax for var name!") # Move to next token self.tokenizer.has_more_tokens() self.subroutine_level_st.define(var_name, var_type, var_kind) var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) while self.tokenizer.get_token_type( ) == TokenType.SYMBOL and self.tokenizer.get_symbol() == ",": # Write this symbol self.write_terminal_tag(TokenType.SYMBOL, ",") # Move to the next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError("Invalid Syntax for var name!") self.subroutine_level_st.define(var_name, var_type, var_kind) var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) # Move to the next token self.tokenizer.has_more_tokens() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to the next token self.tokenizer.has_more_tokens() # Write closing tag self.out_stream.write("</varDec>\n") # statement* def compile_statements(self): # Write open tag self.out_stream.write("<statements>\n") # Process statements while self.tokenizer.get_token_type( ) == TokenType.KEYWORD and self.tokenizer.get_keyword_type( ) in statement_types: # Statment type is based on the starting keyword statement_type = self.tokenizer.get_keyword_type() # Call compile method based on type if statement_type == KeywordType.LET: self.compile_let() elif statement_type == KeywordType.IF: self.compile_if() elif statement_type == KeywordType.WHILE: self.compile_while_statement() elif statement_type == KeywordType.DO: self.compile_do() elif statement_type == KeywordType.RETURN: self.compile_return() self.out_stream.write("</statements>\n") # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self.out_stream.write("<letStatement>\n") self.write_terminal_tag(TokenType.KEYWORD, "let") # Is Array? is_array_access = False # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) var_props = self.lookup_st(var_name) # Write variable properties self.out_stream.write( f"\n===USED===\nkind: {var_props['kind']}, type: {var_props['type']}, index: {var_props['index']}\n=======" ) # Finding segment type var_props["seg_type"] = self.var_t_to_segment_t(var_props["kind"]) else: raise AssertionError("Invalid Syntax for varName!") # Move to next token self.tokenizer.has_more_tokens() # Optional bracket syntax if self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == "[": is_array_access = True # push arr self.vm_writer.write_push( self.var_t_to_segment_t(var_props["kind"]), var_props["index"]) self.write_terminal_tag(TokenType.SYMBOL, "[") # Move to next token self.tokenizer.has_more_tokens() # Compile the expression self.compile_expression() self.eat("]") self.write_terminal_tag(TokenType.SYMBOL, "]") # add self.vm_writer.write_arithmetic(ArithmeticCType.ADD) # Move to the next token self.tokenizer.has_more_tokens() # Eat assignment operator self.eat("=") self.write_terminal_tag(TokenType.SYMBOL, "=") # Move to next token self.tokenizer.has_more_tokens() self.compile_expression() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() if not is_array_access: self.vm_writer.write_pop(var_props["seg_type"], var_props["index"]) else: # pop temp 0 self.vm_writer.write_pop(SegmentType.TEMP, 0) # pop pointer 1 self.vm_writer.write_pop(SegmentType.POINTER, 1) # push temp 0 self.vm_writer.write_push(SegmentType.TEMP, 0) # pop that 0 self.vm_writer.write_pop(SegmentType.THAT, 0) self.out_stream.write("</letStatement>\n") # 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')? def compile_if(self): self.out_stream.write("<ifStatement>\n") self.vm_writer.write_comment("if statement") self.write_terminal_tag(TokenType.KEYWORD, "if") # get the next labels L1, L2 = self.get_if_labels() # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() # write code for the expression self.compile_expression() self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") # Move to next token self.tokenizer.has_more_tokens() # not, the condition inside if self.vm_writer.write_arithmetic(ArithmeticCType.NOT) self.vm_writer.write_if(L1) self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() # Compile if-block body self.compile_statements() self.vm_writer.write_goto(L2) self.vm_writer.write_label(L1) self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() # If there is an else statement # Handle else block if self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() == KeywordType.ELSE: self.write_terminal_tag(TokenType.KEYWORD, "else") # Move to next token self.tokenizer.has_more_tokens() self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() self.compile_statements() self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() self.vm_writer.write_label(L2) # Write closing tag self.out_stream.write("</ifStatement>\n") # 'while' '(' expression ')' '{' statements '}' def compile_while_statement(self): self.out_stream.write("<whileStatement>\n") self.write_terminal_tag(TokenType.KEYWORD, "while") L1, L2 = self.get_while_labels() self.vm_writer.write_label(L1) # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() self.compile_expression() self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") self.vm_writer.write_arithmetic(ArithmeticCType.NOT) self.vm_writer.write_if(L2) # Move to next token self.tokenizer.has_more_tokens() self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() # Compile block body self.compile_statements() self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() self.vm_writer.write_goto(L1) self.vm_writer.write_label(L2) # Write closing tag self.out_stream.write("</whileStatement>\n") # 'do' subroutineCall ';' def compile_do(self): # To store first and second parts of subroutine call first_part, second_part = None, None # To store nArgs passed to the subroutine nArgs = 0 # Write opening tag self.out_stream.write("<doStatement>\n") # Write do keyword tag self.write_terminal_tag(TokenType.KEYWORD, "do") # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: first_part = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, first_part) else: raise AssertionError("Not a valid subroutine/class name!!!") var_props = self.lookup_st(first_part) if var_props: self.vm_writer.write_push( self.var_t_to_segment_t(var_props["kind"]), var_props["index"]) # Move to next token self.tokenizer.has_more_tokens() # Is is a method call if self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ".": self.write_terminal_tag(TokenType.SYMBOL, ".") # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: second_part = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, second_part) else: raise AssertionError("Not a valid subroutine/class name!!!") # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() self.out_stream.write("<expressionList>\n") if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ")"): nArgs = self.compile_expression_list() self.out_stream.write("</expressionList>\n") self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") # Move to next token self.tokenizer.has_more_tokens() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() if var_props: if second_part: self.vm_writer.write_call(f"{var_props['type']}.{second_part}", nArgs + 1) else: # Write method call if second_part: # Of some other class self.vm_writer.write_call(f"{first_part}.{second_part}", nArgs) else: # Of this class self.vm_writer.write_call(f"{self.class_name}.{first_part}", nArgs) # call-and-return contract self.vm_writer.write_pop(SegmentType.TEMP, 0) # Write closing tag self.out_stream.write("</doStatement>\n") # 'return' expression? ';' def compile_return(self): # Write opening tag self.out_stream.write("<returnStatement>\n") # Write do keyword tag self.write_terminal_tag(TokenType.KEYWORD, "return") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ";": self.write_terminal_tag(TokenType.SYMBOL, ";") # the subroutine void return type self.vm_writer.write_push(SegmentType.CONST, 0) else: self.compile_expression() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() # Write return command self.vm_writer.write_return() # Write closing tag self.out_stream.write("</returnStatement>\n") # term (op term)* def compile_expression(self): self.out_stream.write("<expression>\n") # Compile term self.compile_term() # Handle (op term)* while self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() in allowed_op: symbol = self.tokenizer.get_symbol() # Write tag for operation symbol self.write_terminal_tag(TokenType.SYMBOL, self.tokenizer.get_symbol()) # Move to next token self.tokenizer.has_more_tokens() # Compile term self.compile_term() # Apply operation self.vm_writer.write_arithmetic(allowed_op[symbol]) # Write closing tag self.out_stream.write("</expression>\n") # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' # | unaryOp term def compile_term(self): self.out_stream.write("<term>\n") if self.tokenizer.get_token_type() == TokenType.INT_CONST: self.write_terminal_tag(TokenType.INT_CONST, self.tokenizer.get_int_val()) self.vm_writer.write_push(SegmentType.CONST, self.tokenizer.get_int_val()) self.tokenizer.has_more_tokens() elif self.tokenizer.get_token_type() == TokenType.STRING_CONST: self.write_terminal_tag(TokenType.STRING_CONST, self.tokenizer.get_string_val()) self.tokenizer.has_more_tokens() elif self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() in keyword_constants: # keyword constant kc = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.KEYWORD, kc) if kc == "null" or kc == "false": # push const 0 self.vm_writer.write_push(SegmentType.CONST, 0) elif kc == "true": # push const -1 self.vm_writer.write_push(SegmentType.CONST, 1) self.vm_writer.write_arithmetic(ArithmeticCType.NEG) elif kc == "this": # push pointer 0 self.vm_writer.write_push(SegmentType.POINTER, 0) self.tokenizer.has_more_tokens() elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER: first_part, second_part = None, None nArgs = 0 var_name = self.tokenizer.get_cur_ident() first_part = var_name var_props = self.lookup_st(var_name) self.write_terminal_tag(TokenType.IDENTIFIER, var_name) if var_props: self.vm_writer.write_push( self.var_t_to_segment_t(var_props["kind"]), var_props["index"]) # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.SYMBOL: # Handle varName '[' expression ']' if self.tokenizer.get_symbol() == "[": self.eat("[") self.write_terminal_tag(TokenType.SYMBOL, "[") self.tokenizer.has_more_tokens() self.compile_expression() self.eat(']') self.write_terminal_tag(TokenType.SYMBOL, "]") # add self.vm_writer.write_arithmetic(ArithmeticCType.ADD) # pop pointer 1 self.vm_writer.write_pop(SegmentType.POINTER, 1) # push that 0 self.vm_writer.write_push(SegmentType.THAT, 0) # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall elif self.tokenizer.get_symbol() == "(" \ or self.tokenizer.get_symbol() == ".": # Is a method call if self.tokenizer.get_symbol() == ".": self.write_terminal_tag(TokenType.SYMBOL, ".") # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall if self.tokenizer.get_token_type( ) == TokenType.IDENTIFIER: second_part = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, second_part) else: raise AssertionError( "Not a valid subroutine/class name!!!") # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() self.out_stream.write("<expressionList>\n") if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ")"): nArgs = self.compile_expression_list() self.out_stream.write("</expressionList>\n") self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") # Move to next token self.tokenizer.has_more_tokens() if var_props: print("Looked up: ", var_name) # Is it a method call? if second_part: # Of some other class self.vm_writer.write_call( f"{var_props['type']}.{second_part}", nArgs + 1) # This is no variable with given name else: if second_part: # Of some other class self.vm_writer.write_call(f"{first_part}.{second_part}", nArgs) else: # Of this class self.vm_writer.write_call( f"{self.class_name}.{first_part}", nArgs) elif self.tokenizer.get_token_type() == TokenType.SYMBOL: # Handle '(' expression ')' if self.tokenizer.get_symbol() == '(': self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") self.tokenizer.has_more_tokens() self.compile_expression() self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") self.tokenizer.has_more_tokens() # Handle unaryOp term elif self.tokenizer.get_symbol() in allowed_unary_op: unary_op = self.tokenizer.get_symbol() self.write_terminal_tag(TokenType.SYMBOL, self.tokenizer.get_symbol()) self.tokenizer.has_more_tokens() self.compile_term() self.vm_writer.write_arithmetic(allowed_unary_op[unary_op]) else: raise AssertionError("( or unary Op expected!!") self.out_stream.write("</term>\n") # expression (',' expression)* def compile_expression_list(self): self.compile_expression() arg_count = 1 while (self.tokenizer.get_token_type() == TokenType.SYMBOL) \ and (self.tokenizer.get_symbol() == ","): self.write_terminal_tag(TokenType.SYMBOL, ",") self.tokenizer.has_more_tokens() self.compile_expression() arg_count += 1 return arg_count # eat the given string, else raise error def eat(self, string): if self.tokenizer.get_token_type() == TokenType.SYMBOL: if not (self.tokenizer.get_symbol() == string): raise AssertionError( f"Expected symbol {string}, found: {self.tokenizer.get_symbol()}" ) else: raise AssertionError("Symbol not found!!") # Utility method to check weather # the current token is a valid data type def is_valid_type(self): # If built-in data type if self.tokenizer.get_token_type() == TokenType.KEYWORD: # if int, char, boolean if self.tokenizer.get_keyword_type() in data_types: return True # If custom data type elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER: return True # Invalid data type return False # Lookup variable in symbol table def lookup_st(self, v_name): '''return variable properties''' # FOR DEBUGGING from pprint import pprint pprint(self.subroutine_level_st.hash_map) pprint(self.class_level_st.hash_map) # To store looked up props v_props = {} # lookup subroutine level table v_kind = self.subroutine_level_st.get_kind_of(v_name) # var not found in subroutine level st if v_kind == SymbolKind.NONE: # lookup class level table v_kind = self.class_level_st.get_kind_of(v_name) if v_kind == SymbolKind.NONE: return False v_props["kind"] = v_kind v_props["type"] = self.class_level_st.get_type_of(v_name) v_props["index"] = self.class_level_st.get_index_of(v_name) # return class level variable data return v_props # Data found for subroutine level table v_props["kind"] = v_kind v_props["type"] = self.subroutine_level_st.get_type_of(v_name) v_props["index"] = self.subroutine_level_st.get_index_of(v_name) return v_props def var_t_to_segment_t(self, v_kind: SymbolKind) -> SegmentType: if v_kind == SymbolKind.STATIC: return SegmentType.STATIC elif v_kind == SymbolKind.ARG: return SegmentType.ARG elif v_kind == SymbolKind.VAR: return SegmentType.LOCAL elif v_kind == SymbolKind.FEILD: return SegmentType.THIS else: raise AssertionError("No segment kind for given v_kind!!")