def main(): """Drives the Jack-to-VM translation process""" file_name = sys.argv[1] tokenizers = [] output_files = [] abs_path = os.path.abspath(file_name) if '.jack' in file_name and file_name[-5:] == '.jack': tokenizer = JackTokenizer(abs_path) tokenizers.append(tokenizer) output_path = os.path.splitext(abs_path)[0] + '.xml' output_files.append(output_path) else: for walk_obj in os.walk(abs_path): for jack_file in walk_obj[2]: if '.jack' in jack_file and jack_file[-5:] == '.jack': tokenizer = JackTokenizer(abs_path + '/' + jack_file) tokenizers.append(tokenizer) output_path = abs_path + '/' + jack_file[:-5] + '.xml' output_files.append(output_path) for tokenizer in tokenizers: while tokenizer.has_more_tokens(): tokenizer.advance() token_type = tokenizer.token_type() if token_type == 'KEYWORD': keyword = tokenizer.keyword() elif token_type == 'SYMBOL': symbol = tokenizer.symbol() elif token_type == 'IDENTIFIER': identifier = tokenizer.identifier() elif token_type == 'INT_CONST': int_val = tokenizer.int_val() elif token_type == 'STRING_CONST': string_val = tokenizer.string_val()
def test_advance(self): """Tests all parts of the tokenizer using this Jack code: /** Multi-line comment for some class. */ class A{ // Single-line comment let x = -4; do Output.printString("Ring Constants!"); } """ tokenizer = JackTokenizer("test.jack") tokenizer.advance() self.assertEqual(tokenizer.keyword(), CLASS) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'A') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '{') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), LET) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'x') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '=') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '-') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.int_val(), 4) self.assertEqual(tokenizer.token_type(), INT_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), DO) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'Output') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '.') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'printString') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '(') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.string_val(), 'Ring Constants!') self.assertEqual(tokenizer.token_type(), STRING_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ')') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '}') self.assertEqual(tokenizer.token_type(), SYMBOL)
class CompilationEngine: def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.current_sub_name = None self.class_name = None self.func_counter = 0 self.while_counter = 0 self.if_counter = 0 # starts the process self.tokenizer.advance() self.compile_class() self.vm_writer.close() def compile_class(self): """ compiles the class function :return: none """ # advances a single step to get the class name self.tokenizer.advance() # set class's name self.class_name = self.tokenizer.current_token # moves to the symbol { self.tokenizer.advance() # move to the next symbol and check what it is self.tokenizer.advance() # compiles class variable while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \ KEY_WORDS.get(self.tokenizer.current_token) == FIELD: self.compile_class_var_dec() # compiles subroutine while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \ KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \ KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION: self.compile_sub_routine() # we are now at the <symbol> } <symbol> which closes the class def compile_class_var_dec(self): """ compiles a var dec :return: none """ var_kind = self.tokenizer.key_word() # advances the token to the var's type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advances the token to the var's identifier self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_name = self.tokenizer.key_word() else: var_name = self.tokenizer.identifier() # update symbol table self.symbol_table.define(var_name, var_type, var_kind) # advance to next token, and check if there are more var_names self.tokenizer.advance() while self.tokenizer.current_token != ";": # token is <symbol> , <symbol> # advance to var's identifier self.tokenizer.advance() var_name = self.tokenizer.current_token # update symbol table self.symbol_table.define(var_name, var_type, var_kind) self.tokenizer.advance() # the current token is <symbol> ; <symbol>, advance to next self.tokenizer.advance() def compile_sub_routine(self): """ compiles a single sub routine :return: none """ # start new subroutine symbol table self.symbol_table.start_subroutine() # get subroutine type (method/construction/function) sub_type = self.tokenizer.key_word() # advances the token to what the subroutine returns self.tokenizer.advance() # updates the return type if self.tokenizer.token_type() == KEY_WORD: return_type = self.tokenizer.key_word() else: return_type = self.tokenizer.identifier() # advances the token to <identifier> sub_name <identifier> self.tokenizer.advance() # update the subroutine name subroutine_name = self.tokenizer.identifier() self.current_sub_name = subroutine_name # advance to <symbol> ( <symbol> self.tokenizer.advance() # if subroutine is a method, add 'this' to the symbol table as argument 0 if sub_type == METHOD: self.symbol_table.define("this", self.class_name, "ARG") # compiles the parameter list self.compile_parameter_list() # we are at <symbol> ) <symbol> # advance to subroutine body, and compile it self.tokenizer.advance() self.compile_subroutine_body(sub_type) def compile_subroutine_body(self, sub_type): """ the method compiles the subroutine body :return: none """ # we are at bracket {, advance self.tokenizer.advance() # compile var dec while KEY_WORDS.get(self.tokenizer.current_token) == VAR: self.compile_var_dec() # write function label self.vm_writer.write_function( self.class_name + '.' + self.current_sub_name, self.symbol_table.var_count("VAR")) # if is method, update THIS to the object if sub_type == METHOD: self.vm_writer.write_push(ARG, 0) self.vm_writer.write_pop("POINTER", 0) # if is constructor, allocate memory, and put in this if sub_type == CONSTRUCTOR: self.vm_writer.write_push("CONST", self.symbol_table.var_count("FIELD")) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) if self.tokenizer.current_token != "}": self.compile_statements() # we are at bracket }, advance self.tokenizer.advance() def compile_parameter_list(self): """ compiles a parameter list :return: none """ # advance to first parameter self.tokenizer.advance() # while there are more parameters while self.tokenizer.current_token != ')': # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to variables name <identifier> var_name <identifier> self.tokenizer.advance() var_name = self.tokenizer.identifier() # define new variable self.symbol_table.define(var_name, var_type, "ARG") # gets the next token self.tokenizer.advance() # advance to next token if we are at ',' if self.tokenizer.current_token == ",": self.tokenizer.advance() def compile_var_dec(self): """ compiles a declaration of a variable :return: none """ # we are at <keyword> var <keyword> # advance to variable type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to the variables name self.tokenizer.advance() while self.tokenizer.current_token != ';': # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # define variable in symbol table self.symbol_table.define(var_name, var_type, "VAR") # advance to next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.tokenizer.advance() # we are at <symbol> ; <symbol> # advance to next token self.tokenizer.advance() def compile_statements(self): """ the method compiles statements :return: none """ # while there are more statements, deal with each one while self.tokenizer.current_token != '}': statement_type = self.tokenizer.key_word() if statement_type == LET: self.compile_let() elif statement_type == IF: self.compile_if() elif statement_type == WHILE: self.compile_while() elif statement_type == DO: self.compile_do() elif statement_type == RETURN: self.compile_return() def compile_do(self): """ the method compiles a do command :return: none """ # we are at <keyword> do <keyword> # advance to next token <identifier> name_of_func <identifier> self.tokenizer.advance() func_name = self.tokenizer.identifier() self.tokenizer.advance() # compile the subroutine call self.compile_subroutine_call(func_name) # pop the result from the function into temp self.vm_writer.write_pop("TEMP", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() def compile_let(self): """ the method compiles a let statement :return: none """ # we are at <keyword> let <keyword> # advance to next token (var_name) self.tokenizer.advance() # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # get variable data var_index = self.symbol_table.index_of(var_name) var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) # advance to next token ('[' | '=') self.tokenizer.advance() is_array = False if self.tokenizer.current_token == '[': is_array = True # push arr self.vm_writer.write_push(var_kind, var_index) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # we are at <symbol> ] <symbol>, advance to next token self.tokenizer.advance() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # we are at <symbol> = <symbol> # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # if var is an array if is_array: self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) # if var is not an array else: self.vm_writer.write_pop(var_kind, var_index) # we are at <symbol> ; <symbol>, advance to next self.tokenizer.advance() return def compile_while(self): """ the method compiles a while statement :return: none """ while_counter = str(self.while_counter) # update the while counter self.while_counter += 1 # create new label for the start of the while self.vm_writer.write_label("While_" + while_counter) # we are at <keyword> while <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # negate expression self.vm_writer.write_arithmetic("NOT") # if condition is not met, go to the end of the while self.vm_writer.write_if("End_While_" + while_counter) # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() # compile statements self.compile_statements() # go back to the start of the while self.vm_writer.write_goto("While_" + while_counter) # create new label for the end of the while self.vm_writer.write_label("End_While_" + while_counter) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() return def compile_return(self): """ the method compiles a return statement :return: none """ # we are at <keyword> return <keyword>, advance to next token self.tokenizer.advance() if self.tokenizer.current_token != ';': self.compile_expression() else: # if function is void, push const 0 to the stack self.vm_writer.write_push("CONST", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() self.vm_writer.write_return() return def compile_if(self): """ the method compiles an if statement :return: none """ if_count = str(self.if_counter) # update if counter self.if_counter += 1 # we are at <keyword> if <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() # compile expression self.compile_expression() # negate the expression self.vm_writer.write_arithmetic("NOT") # check if condition is met self.vm_writer.write_if("ELSE_" + if_count) # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() self.compile_statements() # jump to the end of the if self.vm_writer.write_goto("END_IF_" + if_count) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() # create else label (which may be empty) self.vm_writer.write_label("ELSE_" + if_count) if self.tokenizer.current_token == 'else': # we are at <keyword> else <keyword>, advance self.tokenizer.advance() # we are at <symbol> { <symbol>, advance self.tokenizer.advance() self.compile_statements() # we are at <symbol> } <symbol>, advance self.tokenizer.advance() # create new label self.vm_writer.write_label("END_IF_" + if_count) return def compile_expression(self): """ the method compiles an expression :return: """ # compile the term self.compile_term() while self.tokenizer.current_token in OP_LST: call_math = False # we are at <symbol> op <symbol> op = OP_DICT.get(self.tokenizer.current_token) # check if operator needs to call math if self.tokenizer.current_token == '*' or self.tokenizer.current_token == '/': call_math = True # advance to next term and compile term self.tokenizer.advance() self.compile_term() # output the operator if call_math: self.vm_writer.write_call(op[0], op[1]) else: self.vm_writer.write_arithmetic(op) return def compile_term(self): """ the method compiles a term :return: none """ token_type = self.tokenizer.token_type() if token_type == INT_CONST: # push the const int self.vm_writer.write_push("CONST", self.tokenizer.int_val()) self.tokenizer.advance() elif token_type == STRING_CONST: # write without the "" string_val = self.tokenizer.string_val() # push the len of the string and call the string constructor self.vm_writer.write_push("CONST", len(string_val)) self.vm_writer.write_call("String.new", 1) # update new string for char in string_val: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.tokenizer.advance() elif self.tokenizer.current_token in KEY_WORD_CONST: segment, idx = KEY_WORD_CONST.get(self.tokenizer.current_token) self.vm_writer.write_push(segment, idx) if self.tokenizer.current_token == 'true': self.vm_writer.write_arithmetic('NOT') self.tokenizer.advance() elif self.tokenizer.current_token == '(': # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() elif self.tokenizer.current_token in UNARY_OP: op_command = UNARY_OP.get(self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(op_command) # var/var[expression]/subroutine_call else: # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() self.tokenizer.advance() # if is var_name[expression] if self.tokenizer.current_token == '[': var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) # push arr self.vm_writer.write_push(var_kind, var_index) # we are at <symbol> [ <symbol>, advance to expression and compile it self.tokenizer.advance() self.compile_expression() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # set the that pointer self.vm_writer.write_pop("POINTER", 1) # push to the stack what is in the arr[i] self.vm_writer.write_push("THAT", 0) # we are at <symbol> ] <symbol>, advance self.tokenizer.advance() # if is a subroutine call elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.': self.compile_subroutine_call(var_name) else: # if is just 'var' var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) return def compile_expression_list(self): """ the method compiles a list of expressions :return: amount of arguments in the expression list """ expression_counter = 0 # check that list is not empty if self.tokenizer.current_token != ')': expression_counter += 1 # compile first expression self.compile_expression() # if there are more expressions, compile them while self.tokenizer.current_token == ',': expression_counter += 1 # we are at <symbol> , <symbol>, advance self.tokenizer.advance() # compile expression self.compile_expression() return expression_counter def compile_subroutine_call(self, identifier): """ the method compiles a subroutine call (not including the subroutine first varName :return: none """ func_name = self.class_name + "." + identifier num_of_arguments = 0 if self.tokenizer.current_token == '.': # change func name to its class name if self.symbol_table.type_of(identifier) is not None: func_name = self.symbol_table.type_of(identifier) # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = func_name + "." + self.tokenizer.identifier() self.tokenizer.advance() # push the object to the stack segment = TYPE_DICT.get(self.symbol_table.kind_of(identifier)) idx = self.symbol_table.index_of(identifier) self.vm_writer.write_push(segment, idx) num_of_arguments += 1 else: # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = identifier + "." + self.tokenizer.identifier() self.tokenizer.advance() else: self.vm_writer.write_push("POINTER", 0) num_of_arguments += 1 # we are at <symbol> ( <symbol>, advance self.tokenizer.advance() num_of_arguments += self.compile_expression_list() # we are at <symbol> ) <symbol>, advance self.tokenizer.advance() self.vm_writer.write_call(func_name, num_of_arguments) return