def analyze(src_jack_file): tokenizer = JackTokenizer(src_jack_file) while tokenizer.has_more_tokens(): tokenizer.advance() print(tokenizer.get_current_token())
class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] BINARY_OPERATORS_TO_COMMAND = { '+': 'add', '-': 'sub', '=': 'eq', '>': 'gt', '<': 'lt', '&': 'and', '|': 'or' } UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'} TYPE_TO_TAG = { 'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol' } SYMBOLS_TO_XML_CONVENTION = { '<': '<', '>': '>', '&': '&', '"': '"' } def __init__(self, input_file_path, vm_writer: VMWriter): self.jack_tokenizer = JackTokenizer(input_file_path) self.symbol_table = SymbolTable() self.vm_writer = vm_writer if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'CLASS') self.jack_tokenizer.advance() self.jack_tokenizer.advance() if self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.jack_tokenizer.advance() self.vm_writer.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[ self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: kind = '' if self.jack_tokenizer.key_word() == 'field': kind = 'FIELD' elif self.jack_tokenizer.key_word() == 'static': kind = 'STATIC' self.jack_tokenizer.advance() field_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() != ';': self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() self.jack_tokenizer.advance() def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): self.vm_writer.zero_branching_indexes() while self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.symbol_table.start_subroutine() constructor = True if self.jack_tokenizer.key_word( ) == 'constructor' else False method = False if self.jack_tokenizer.key_word() == 'method': method = True self.symbol_table.define('this', self.symbol_table.get_class_name(), 'ARG') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'SUBROUTINE') name = self.symbol_table.get_class_name( ) + '.' + self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_parameter_list() self.jack_tokenizer.advance() self.jack_tokenizer.advance() var_num = 0 while self.jack_tokenizer.key_word() == 'var': var_num += self.compile_var_dec() self.vm_writer.write_function(name, var_num) if method: self.vm_writer.write_push('ARG', 0) self.vm_writer.write_pop('POINTER', 0) elif constructor: field_count = self.symbol_table.var_count('FIELD') self.vm_writer.write_push('CONST', field_count) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('POINTER', 0) self.compile_statements() self.jack_tokenizer.advance() def compile_parameter_list(self): if self.jack_tokenizer.symbol() != ')': parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": self.jack_tokenizer.advance() parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() def get_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': parameter_type = self.jack_tokenizer.key_word() elif self.jack_tokenizer.token_type() == 'IDENTIFIER': parameter_type = self.jack_tokenizer.identifier() return parameter_type def compile_var_dec(self): var_num = 1 self.jack_tokenizer.advance() var_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": var_num += 1 self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() self.jack_tokenizer.advance() return var_num def compile_statements(self): while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() def compile_do(self): self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.compile_subroutine_call(name) # must dispose of void function return value self.vm_writer.write_pop('TEMP', 0) self.jack_tokenizer.advance() def compile_subroutine_call(self, prefix_call=''): if self.jack_tokenizer.symbol() == '(': subroutine = False # If not in symbol table - then subroutine if not self.symbol_table.kind_of( prefix_call) or self.symbol_table.kind_of( prefix_call) == 'SUBROUTINE': subroutine = True self.jack_tokenizer.advance() args_count = 0 if subroutine: self.vm_writer.write_push('POINTER', 0) args_count += 1 args_count += self.compile_expression_list() if subroutine: self.vm_writer.write_call( self.symbol_table.get_class_name() + '.' + prefix_call, args_count) else: self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol() == '.': variable = False self.jack_tokenizer.advance() if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']: variable = True variable_name = prefix_call prefix_call = self.symbol_table.type_of(prefix_call) prefix_call += '.{0}'.format(self.jack_tokenizer.identifier()) self.jack_tokenizer.advance() self.jack_tokenizer.advance() args_count = 0 if variable: self.vm_writer.write_push( self.symbol_table.kind_of(variable_name), self.symbol_table.index_of(variable_name)) args_count += 1 args_count += self.compile_expression_list() self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() def compile_let(self): self.jack_tokenizer.advance() var_name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic("add") self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop('TEMP', 0) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('TEMP', 0) self.vm_writer.write_pop('THAT', 0) else: self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() def compile_while(self): while_idx = self.vm_writer.get_next_label_index('while') if_label = 'WHILE_IF_{0}'.format(while_idx) end_label = 'WHILE_END_{0}'.format(while_idx) self.vm_writer.write_label(if_label) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(end_label) self.compile_statements() self.vm_writer.write_goto(if_label) self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_return(self): self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() != ';': self.compile_expression() else: self.vm_writer.write_push('CONST', 0) self.vm_writer.write_return() self.jack_tokenizer.advance() def compile_if(self): if_idx = self.vm_writer.get_next_label_index('if') else_label = 'IF_ELSE_{0}'.format(if_idx) end_label = 'IF_END_{0}'.format(if_idx) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(else_label) self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_goto(end_label) self.vm_writer.write_label(else_label) if self.jack_tokenizer.key_word() == 'else': self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_expression(self): self.compile_term() while self.jack_tokenizer.symbol( ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() self.jack_tokenizer.advance() self.compile_term() if symbol in self.BINARY_OPERATORS_TO_COMMAND: self.vm_writer.write_arithmetic( self.BINARY_OPERATORS_TO_COMMAND[symbol]) elif symbol == '*': self.vm_writer.write_call('Math.multiply', 2) elif symbol == '/': self.vm_writer.write_call('Math.divide', 2) def compile_term(self): token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol( ) == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call(name) elif self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() self.vm_writer.write_arithmetic('add') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) else: kind = self.symbol_table.kind_of(name) idx = self.symbol_table.index_of(name) self.vm_writer.write_push(kind, idx) elif token_type == 'STRING_CONST': string_const = self.jack_tokenizer.string_val() self.vm_writer.write_push("CONST", len(string_const)) self.vm_writer.write_call("String.new", 1) for char in string_const: self.vm_writer.write_push('CONST', ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.jack_tokenizer.advance() elif token_type == 'KEYWORD': keyword = self.jack_tokenizer.key_word() if keyword == 'true': self.vm_writer.write_push('CONST', 1) self.vm_writer.write_arithmetic('neg') elif keyword == 'false' or keyword == 'null': self.vm_writer.write_push('CONST', 0) elif keyword == 'this': self.vm_writer.write_push('POINTER', 0) self.jack_tokenizer.advance() elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol( ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS: command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[ self.jack_tokenizer.symbol()] self.jack_tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(command) elif token_type == 'INT_CONST': self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val()) self.jack_tokenizer.advance() def compile_expression_list(self): expression_count = 0 if self.jack_tokenizer.symbol() != ')': self.compile_expression() expression_count += 1 while self.jack_tokenizer.symbol() == ',': self.jack_tokenizer.advance() self.compile_expression() expression_count += 1 return expression_count
class CompilationEngine: """NOTE remember that "is_xxx()" checks on the next token, and load the next token to curr_token before starting sub-methods using "load_next_token()" and you can use values with it """ def __init__(self, jack_file): self.vm_writer = VMWriter(jack_file) self.tokenizer = JackTokenizer(jack_file) self.symbol_table = SymbolTable() self.if_index = -1 self.while_index = -1 # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): #! Beginning of all # * save name of the class and move on self.load_next_token() # 'class' self.class_name = self.load_next_token() # className self.load_next_token() # curr_token = '{' # while next token == 'static' | 'field', while self.is_class_var_dec(): # check next token self.compile_class_var_dec() # classVarDec* # while next_token == constructor | function | method while self.is_subroutine_dec(): self.compile_subroutine() # subroutineDec* self.vm_writer.close() # ('static' | 'field' ) type varName (',' varName)* ';' def compile_class_var_dec(self): kind = self.load_next_token() # curr_token = static | field type = self.load_next_token() # curr_token = type name = self.load_next_token() # curr_token = varName self.symbol_table.define(name, type, kind.upper()) while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, kind.upper()) self.load_next_token() # ';' # next_token = 'constructor' | 'function' | 'method' # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody # subroutineBody: '{' varDec* statements '}' def compile_subroutine(self): subroutine_kind = (self.load_next_token() ) # ('constructor' | 'function' | 'method') self.load_next_token() # ('void' | type) subroutine_name = self.load_next_token() # subroutineName self.symbol_table.start_subroutine() # init subroutine table if subroutine_kind == "method": self.symbol_table.define("instance", self.class_name, "ARG") self.load_next_token() # curr_token '(' self.compile_parameter_list() # parameterList # next_token == ')' when escaped self.load_next_token() # ')' self.load_next_token() # '{' while self.check_next_token() == "var": self.compile_var_dec() # varDec* # NOTE next_token is neither 'var' or ';' # NOTE next_token is statements* (zero or more) # ANCHOR actual writing func_name = f"{self.class_name}.{subroutine_name}" # Main.main num_locals = self.symbol_table.counts["VAR"] # get 'var' count self.vm_writer.write_function(func_name, num_locals) if subroutine_kind == "constructor": num_fields = self.symbol_table.counts["FIELD"] self.vm_writer.write_push("CONST", num_fields) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) elif subroutine_kind == "method": self.vm_writer.write_push("ARG", 0) self.vm_writer.write_pop("POINTER", 0) # NOTE statement starts here self.compile_statements() # statements self.load_next_token() # '} # ( (type varName) (',' type varName)*)? def compile_parameter_list(self): # curr_token == '(' if self.check_next_token() != ")": type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") while self.check_next_token() != ")": self.load_next_token() # ',' type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") # NOTE param compilation finishes when next_token == ')' # 'var' type varName (',' varName)* ';' def compile_var_dec(self): self.load_next_token() # 'var' type = self.load_next_token() # type name = self.load_next_token() # # varName self.symbol_table.define(name, type, "VAR") while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, "VAR") self.load_next_token() # ';' # statement* # letStatement | ifStatement | whileStatement | doStatement | returnStatement def compile_statements(self): # if next_token == let | if | while | do | return while self.is_statement(): statement = (self.load_next_token() ) # curr_token == let | if | while | do | return if statement == "let": self.compile_let() elif statement == "if": self.compile_if() elif statement == "while": self.compile_while() elif statement == "do": self.compile_do() elif statement == "return": self.compile_return() # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): var_name = self.load_next_token() # curr_token == varName var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) # if next_token == "[" if self.is_array(): # array assignment self.load_next_token() # curr_token == '[' self.compile_expression() # expression self.load_next_token() # curr_token == ']' self.vm_writer.write_push(var_kind, var_index) self.vm_writer.write_arithmetic("ADD") self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # curr_token == ';' #! POP TEMP and PUSH TEMP location changed self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) else: # regular assignment self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # ';' self.vm_writer.write_pop(var_kind, var_index) # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? def compile_if(self): # curr_token == if self.if_index += 1 if_index = self.if_index # TODO IF indexes count separately self.load_next_token() # curr_token == '(' self.compile_expression() # expression self.load_next_token() # ')' self.load_next_token() # '{' # S = statement, L = label self.vm_writer.write_if(f"IF_TRUE{if_index}") #! if-goto L1 self.vm_writer.write_goto(f"IF_FALSE{if_index}") #! goto L2 self.vm_writer.write_label(f"IF_TRUE{if_index}") #! label L1 self.compile_statements() # statements #! executing S1 self.vm_writer.write_goto(f"IF_END{if_index}") #! goto END self.load_next_token() # '}' self.vm_writer.write_label(f"IF_FALSE{if_index}") #! label L2 if self.check_next_token() == "else": # ( 'else' '{' statements '}' )? self.load_next_token() # 'else' self.load_next_token() # '{' self.compile_statements() # statements #! executing S2 self.load_next_token() # '}' self.vm_writer.write_label(f"IF_END{if_index}") # 'while' '(' expression ')' '{' statements '}' def compile_while(self): # curr_token == while self.while_index += 1 while_index = self.while_index self.vm_writer.write_label(f"WHILE{while_index}") self.load_next_token() # '(' self.compile_expression() # expression self.vm_writer.write_arithmetic("NOT") # eval false condition first self.load_next_token() # ')' self.load_next_token() # '{' self.vm_writer.write_if(f"WHILE_END{while_index}") self.compile_statements() # statements self.vm_writer.write_goto(f"WHILE{while_index}") self.vm_writer.write_label(f"WHILE_END{while_index}") self.load_next_token() # '}' # 'do' subroutineCall ';' def compile_do(self): # curr_token == do self.load_next_token() #! to sync with compile_term() self.compile_subroutine_call() self.vm_writer.write_pop("TEMP", 0) self.load_next_token() # ';' # 'return' expression? ';' def compile_return(self): # curr_token == return if self.check_next_token() != ";": self.compile_expression() else: self.vm_writer.write_push("CONST", 0) self.vm_writer.write_return() self.load_next_token() # ';' # term (op term)* def compile_expression(self): self.compile_term() # term while self.is_op(): # (op term)* op: str = self.load_next_token() # op self.compile_term() # term if op in ARITHMETIC.keys(): self.vm_writer.write_arithmetic(ARITHMETIC[op]) elif op == "*": self.vm_writer.write_call("Math.multiply", 2) elif op == "/": self.vm_writer.write_call("Math.divide", 2) # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term def compile_term(self): # if next_token == '~' | '-' if self.is_unary_op_term(): unary_op = self.load_next_token() # curr_token == '~' | '-' self.compile_term() # term (recursive) self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op]) # if next_token == '(' => '(' expression ')' elif self.check_next_token() == "(": self.load_next_token() # '(' self.compile_expression() # expression self.load_next_token() # ')' # if next_token == INTEGER(const) elif self.check_next_type() == "INT_CONST": # integerConstant self.vm_writer.write_push("CONST", self.load_next_token()) # ) # if next_token == STRING(const) elif self.check_next_type() == "STRING_CONST": # stringConstant self.compile_string() # if next_token == KEYWORD(const) elif self.check_next_type() == "KEYWORD": # keywordConstant self.compile_keyword() # varName | varName '[' expression ']' | subroutineCall else: #! (varName | varName for expression | subroutine)'s base var_name = self.load_next_token( ) # curr_token = varName | subroutineCall # (e.g. Screen.setColor | show() ) #! next_token == '[' | '(' or '.' | just varName # varName '[' expression ']' if self.is_array(): # if next_token == '[' self.load_next_token() # '[' self.compile_expression() # expression self.load_next_token() # ']' array_kind = self.symbol_table.kind_of(var_name) array_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(CONVERT_KIND[array_kind], array_index) self.vm_writer.write_arithmetic("ADD") self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("THAT", 0) # if next_token == "(" | "." => curr_token == subroutineCall #! if varName is not found, assume class or function name elif self.is_subroutine_call(): # NOTE curr_token == subroutineName | className | varName self.compile_subroutine_call() # varName else: # curr_token == varName # FIXME cannot catch subroutine call and pass it to 'else' below # TODO error caught on Math.abs() part on Ball.vm var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) # subroutineCall: subroutineName '(' expressionList ')' | # ( className | varName) '.' subroutineName '(' expressionList ')' # e.g.) (do) game.run() # ! in case of 'do' order is different from 'let game = Class.new()' def compile_subroutine_call(self): # NOTE curr_token == subroutineName | className | varName subroutine_caller = self.get_curr_token() function_name = subroutine_caller # _next_token() # FIXME now it loads '.' or '(' # func_name = identifier number_args = 0 #! '.' or '(' 2 cases if self.check_next_token() == ".": self.load_next_token() # curr_token == '.' subroutine_name = self.load_next_token( ) # curr_token == subroutineName type = self.symbol_table.type_of(subroutine_caller) if type != "NONE": # it's an instance kind = self.symbol_table.kind_of(subroutine_caller) index = self.symbol_table.index_of(subroutine_caller) self.vm_writer.write_push(CONVERT_KIND[kind], index) function_name = f"{type}.{subroutine_name}" number_args += 1 else: # it's a class class_name = subroutine_caller function_name = f"{class_name}.{subroutine_name}" elif self.check_next_token() == "(": subroutine_name = subroutine_caller function_name = f"{self.class_name}.{subroutine_name}" number_args += 1 self.vm_writer.write_push("POINTER", 0) self.load_next_token() # '(' number_args += self.compile_expression_list() # expressionList self.load_next_token() # ')' self.vm_writer.write_call(function_name, number_args) # (expression (',' expression)* )? def compile_expression_list(self): number_args = 0 if self.check_next_token() != ")": number_args += 1 self.compile_expression() while self.check_next_token() != ")": number_args += 1 self.load_next_token() # curr_token == ',' self.compile_expression() return number_args def compile_string(self): string = self.load_next_token() # curr_token == stringConstant self.vm_writer.write_push("CONST", len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) def compile_keyword(self): keyword = self.load_next_token() # curr_token == keywordConstant if keyword == "this": self.vm_writer.write_push("POINTER", 0) else: self.vm_writer.write_push("CONST", 0) if keyword == "true": self.vm_writer.write_arithmetic("NOT") def is_subroutine_call(self): return self.check_next_token() in [".", "("] def is_array(self): return self.check_next_token() == "[" def is_class_var_dec(self): return self.check_next_token() in ["static", "field"] def is_subroutine_dec(self): return self.check_next_token() in ["constructor", "function", "method"] def is_statement(self): return self.check_next_token() in [ "let", "if", "while", "do", "return" ] def is_op(self): return self.check_next_token() in [ "+", "-", "*", "/", "&", "|", "<", ">", "=" ] def is_unary_op_term(self): return self.check_next_token() in ["~", "-"] def check_next_token(self): return self.tokenizer.next_token[1] def check_next_type(self): return self.tokenizer.next_token[0] def get_curr_token(self): return self.tokenizer.curr_token[1] def load_next_token(self): if self.tokenizer.has_more_tokens(): self.tokenizer.advance() # curr_token = next_token return self.tokenizer.curr_token[1] else: return ""
class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] TYPE_TO_TAG = {'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol'} SYMBOLS_TO_XML_CONVENTION = {'<': '<', '>': '>', '&': '&', '"': '"'} def __init__(self, input_file_path, output_file_path): self.output_file = open(output_file_path, 'w') self.jack_tokenizer = JackTokenizer(input_file_path) if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.output_file.write('<class>\n') # get first token self.jack_tokenizer.advance() self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) if self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</class>') self.output_file.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.output_file.write('<classVarDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() != ';': self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</classVarDec>\n') def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): while self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.output_file.write('<subroutineDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_parameter_list() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('<subroutineBody>\n') self.write_token(self.jack_tokenizer.symbol()) while self.jack_tokenizer.key_word() == 'var': self.compile_var_dec() self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</subroutineBody>\n') self.output_file.write('</subroutineDec>\n') def compile_parameter_list(self): self.output_file.write('<parameterList>\n') if self.jack_tokenizer.symbol() != ')': self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() == ",": self.write_token(self.jack_tokenizer.symbol()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) self.output_file.write('</parameterList>\n') def compile_var_dec(self): self.output_file.write('<varDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() == ",": self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</varDec>\n') def compile_statements(self): self.output_file.write('<statements>\n') while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() self.output_file.write('</statements>\n') def compile_do(self): self.output_file.write('<doStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) self.compile_subroutine_call() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</doStatement>\n') def compile_subroutine_call(self): if self.jack_tokenizer.symbol() == '(': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression_list() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() == '.': self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression_list() self.write_token(self.jack_tokenizer.symbol()) def compile_let(self): self.output_file.write('<letStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) if self.jack_tokenizer.symbol() == '[': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</letStatement>\n') def compile_while(self): self.output_file.write('<whileStatement>\n') self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</whileStatement>\n') def compile_return(self): self.output_file.write('<returnStatement>\n') self.write_token(self.jack_tokenizer.key_word()) if self.jack_tokenizer.symbol() != ';': self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</returnStatement>\n') def compile_if(self): self.output_file.write('<ifStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) if self.jack_tokenizer.key_word() == 'else': self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</ifStatement>\n') def compile_expression(self): self.output_file.write('<expression>\n') self.compile_term() while self.jack_tokenizer.symbol() in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() if symbol in CompilationEngine.SYMBOLS_TO_XML_CONVENTION: symbol = CompilationEngine.SYMBOLS_TO_XML_CONVENTION[symbol] self.write_token(symbol) self.compile_term() self.output_file.write('</expression>\n') def compile_term(self): self.output_file.write('<term>\n') token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) if self.jack_tokenizer.symbol() == '[': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call() elif token_type == 'STRING_CONST': self.write_token(self.jack_tokenizer.string_val()) elif token_type == 'INT_CONST': self.write_token(self.jack_tokenizer.int_val()) elif token_type == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif token_type == 'INT_CONST': self.write_token(self.jack_tokenizer.int_val()) elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() in CompilationEngine.UNARY_OPERATOR_SYMBOLS: self.write_token(self.jack_tokenizer.symbol()) self.compile_term() self.output_file.write('</term>\n') def compile_expression_list(self): self.output_file.write('<expressionList>\n') if self.jack_tokenizer.symbol() != ')': self.compile_expression() while self.jack_tokenizer.symbol() == ',': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.output_file.write('</expressionList>\n')