def __init__(self, tokens, filepath): # compilation engine init self.lex = tokens self.symbols = SymbolTable() self.vm = VMWriter(filepath) self.compile_class() self.vm.closeout()
def __init__(self, jack_file): self.vm_writer = VMWriter(jack_file) self.tokenizer = JackTokenizer(jack_file) self.symbol_table = SymbolTable() self.if_index = -1 self.while_index = -1
def __init__(self, token_stream, out_file, xml_name): ''' creates a new compilation engine with the given input and output. The next method called must be compileClass(). ''' self.stream = token_stream self.writer = VMWriter(out_file) self.symbols = SymbolTable() self.xml_name = xml_name self.root = ET.Element('class') self.stream.advance() assert self.stream.keyword() == 'class'
def __init__(self, jack_tokenizer: JackTokenizer, output_path: str): super().__init__() self.tokenizer = jack_tokenizer self.table = SymbolTable() self.writer = VMWriter(output_path) if self.tokenizer.has_more_tokens(): self.tokenizer.advance() self.class_name = '' self.curr_func_name = '' self._if_count = 0 self._while_count = 0 self.compile_class()
def compile(self, out_fname: str) -> None: tknizer = Tokenizer(self._jack_fname) with VMWriter(out_fname) as writer: self._writer = writer token = self._compile_class(tknizer, tknizer.next_token()) if token: raise CompilationException( f"Expected end of file, found {token}")
def __init__(self, tokenizer: JackTokenizer, out_path: Path): self.tokenizer = tokenizer # Create symbol tables self.class_level_st = SymbolTable() self.subroutine_level_st = SymbolTable() # class's name self.class_name = None self.func_name = None self.sub_type = None # Open the output file for writing self.out_stream = out_path.open('w') # Create a new VM writer for writing self.vm_writer = VMWriter(out_path.with_suffix(".vm")) # For generating labels self.label_count = {"if": 0, "while": 0}
def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.out = open(output_file, 'w') self.token = None self.class_name = None ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file)
def main(argv): """ Main flow of program dealing with extracting files for reading and initializing files to translate into """ if not check_args(argv): return # extracting jack file to be processed jack_files_path = argv[1] # creating a .vm file to contain jack files translation to vm language if os.path.isdir(jack_files_path): for file in os.listdir(jack_files_path): if file.endswith(".jack"): vm_file_name = "{0}/{1}.vm".format( jack_files_path, os.path.splitext(os.path.basename(file))[0]) vm_writer = VMWriter(vm_file_name) CompilationEngine('{0}/{1}'.format(jack_files_path, file), vm_writer) else: vm_file_name = "{0}.vm".format(os.path.splitext(jack_files_path)[0]) vm_writer = VMWriter(vm_file_name) CompilationEngine(jack_files_path, vm_writer)
def main(): # Input if len(sys.argv) != 2: raise ValueError('Invalid file name.') input_file_path = sys.argv[1] input_texts = get_file_text(input_file_path) splited_input_file_path = input_file_path.split('/') input_file_name = splited_input_file_path[-1] # Output output_tokenizer_file_name = '{}.xml'.format(input_file_name.split('.')[0]) output_tokenizer_file_path = '/'.join([*splited_input_file_path[:-1], output_tokenizer_file_name]) output_vm_file_name = '{}.vm'.format(input_file_name.split('.')[0]) output_vm_file_path = '/'.join([*splited_input_file_path[:-1], output_vm_file_name]) # Text Processing del_blank_content = lambda value: value != '' del_new_line_in_text = lambda value: value.replace('\n', '') # 文中の // を削除して先頭と末尾と空白の文字列を削除 del_comment_in_line = lambda string: re.sub(r'//\s.*', '', string).strip() input_texts = list( filter( del_blank_content, map( del_comment_in_line, filter( remove_comments, map( del_new_line_in_text, input_texts ) ) ) ) ) update_input_texts = [] for input_text in input_texts: # プログラム中のコメントアウト (/** */) は上のテキスト処理では削除できないのでこの処理を追加 if remove_comments(input_text): update_input_texts.append(input_text) print('output_tokenizer_file_name: {}'.format(output_tokenizer_file_name)) print('output_vm_file_name: {}'.format(output_vm_file_name)) with VMWriter(output_vm_file_path) as vmw: with CompilationEngine(update_input_texts, output_tokenizer_file_path, vmw) as engine: engine.compile()
class CompilationEngine: """NOTE remember that "is_xxx()" checks on the next token, and load the next token to curr_token before starting sub-methods using "load_next_token()" and you can use values with it """ def __init__(self, jack_file): self.vm_writer = VMWriter(jack_file) self.tokenizer = JackTokenizer(jack_file) self.symbol_table = SymbolTable() self.if_index = -1 self.while_index = -1 # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): #! Beginning of all # * save name of the class and move on self.load_next_token() # 'class' self.class_name = self.load_next_token() # className self.load_next_token() # curr_token = '{' # while next token == 'static' | 'field', while self.is_class_var_dec(): # check next token self.compile_class_var_dec() # classVarDec* # while next_token == constructor | function | method while self.is_subroutine_dec(): self.compile_subroutine() # subroutineDec* self.vm_writer.close() # ('static' | 'field' ) type varName (',' varName)* ';' def compile_class_var_dec(self): kind = self.load_next_token() # curr_token = static | field type = self.load_next_token() # curr_token = type name = self.load_next_token() # curr_token = varName self.symbol_table.define(name, type, kind.upper()) while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, kind.upper()) self.load_next_token() # ';' # next_token = 'constructor' | 'function' | 'method' # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody # subroutineBody: '{' varDec* statements '}' def compile_subroutine(self): subroutine_kind = (self.load_next_token() ) # ('constructor' | 'function' | 'method') self.load_next_token() # ('void' | type) subroutine_name = self.load_next_token() # subroutineName self.symbol_table.start_subroutine() # init subroutine table if subroutine_kind == "method": self.symbol_table.define("instance", self.class_name, "ARG") self.load_next_token() # curr_token '(' self.compile_parameter_list() # parameterList # next_token == ')' when escaped self.load_next_token() # ')' self.load_next_token() # '{' while self.check_next_token() == "var": self.compile_var_dec() # varDec* # NOTE next_token is neither 'var' or ';' # NOTE next_token is statements* (zero or more) # ANCHOR actual writing func_name = f"{self.class_name}.{subroutine_name}" # Main.main num_locals = self.symbol_table.counts["VAR"] # get 'var' count self.vm_writer.write_function(func_name, num_locals) if subroutine_kind == "constructor": num_fields = self.symbol_table.counts["FIELD"] self.vm_writer.write_push("CONST", num_fields) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) elif subroutine_kind == "method": self.vm_writer.write_push("ARG", 0) self.vm_writer.write_pop("POINTER", 0) # NOTE statement starts here self.compile_statements() # statements self.load_next_token() # '} # ( (type varName) (',' type varName)*)? def compile_parameter_list(self): # curr_token == '(' if self.check_next_token() != ")": type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") while self.check_next_token() != ")": self.load_next_token() # ',' type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") # NOTE param compilation finishes when next_token == ')' # 'var' type varName (',' varName)* ';' def compile_var_dec(self): self.load_next_token() # 'var' type = self.load_next_token() # type name = self.load_next_token() # # varName self.symbol_table.define(name, type, "VAR") while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, "VAR") self.load_next_token() # ';' # statement* # letStatement | ifStatement | whileStatement | doStatement | returnStatement def compile_statements(self): # if next_token == let | if | while | do | return while self.is_statement(): statement = (self.load_next_token() ) # curr_token == let | if | while | do | return if statement == "let": self.compile_let() elif statement == "if": self.compile_if() elif statement == "while": self.compile_while() elif statement == "do": self.compile_do() elif statement == "return": self.compile_return() # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): var_name = self.load_next_token() # curr_token == varName var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) # if next_token == "[" if self.is_array(): # array assignment self.load_next_token() # curr_token == '[' self.compile_expression() # expression self.load_next_token() # curr_token == ']' self.vm_writer.write_push(var_kind, var_index) self.vm_writer.write_arithmetic("ADD") self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # curr_token == ';' #! POP TEMP and PUSH TEMP location changed self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) else: # regular assignment self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # ';' self.vm_writer.write_pop(var_kind, var_index) # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? def compile_if(self): # curr_token == if self.if_index += 1 if_index = self.if_index # TODO IF indexes count separately self.load_next_token() # curr_token == '(' self.compile_expression() # expression self.load_next_token() # ')' self.load_next_token() # '{' # S = statement, L = label self.vm_writer.write_if(f"IF_TRUE{if_index}") #! if-goto L1 self.vm_writer.write_goto(f"IF_FALSE{if_index}") #! goto L2 self.vm_writer.write_label(f"IF_TRUE{if_index}") #! label L1 self.compile_statements() # statements #! executing S1 self.vm_writer.write_goto(f"IF_END{if_index}") #! goto END self.load_next_token() # '}' self.vm_writer.write_label(f"IF_FALSE{if_index}") #! label L2 if self.check_next_token() == "else": # ( 'else' '{' statements '}' )? self.load_next_token() # 'else' self.load_next_token() # '{' self.compile_statements() # statements #! executing S2 self.load_next_token() # '}' self.vm_writer.write_label(f"IF_END{if_index}") # 'while' '(' expression ')' '{' statements '}' def compile_while(self): # curr_token == while self.while_index += 1 while_index = self.while_index self.vm_writer.write_label(f"WHILE{while_index}") self.load_next_token() # '(' self.compile_expression() # expression self.vm_writer.write_arithmetic("NOT") # eval false condition first self.load_next_token() # ')' self.load_next_token() # '{' self.vm_writer.write_if(f"WHILE_END{while_index}") self.compile_statements() # statements self.vm_writer.write_goto(f"WHILE{while_index}") self.vm_writer.write_label(f"WHILE_END{while_index}") self.load_next_token() # '}' # 'do' subroutineCall ';' def compile_do(self): # curr_token == do self.load_next_token() #! to sync with compile_term() self.compile_subroutine_call() self.vm_writer.write_pop("TEMP", 0) self.load_next_token() # ';' # 'return' expression? ';' def compile_return(self): # curr_token == return if self.check_next_token() != ";": self.compile_expression() else: self.vm_writer.write_push("CONST", 0) self.vm_writer.write_return() self.load_next_token() # ';' # term (op term)* def compile_expression(self): self.compile_term() # term while self.is_op(): # (op term)* op: str = self.load_next_token() # op self.compile_term() # term if op in ARITHMETIC.keys(): self.vm_writer.write_arithmetic(ARITHMETIC[op]) elif op == "*": self.vm_writer.write_call("Math.multiply", 2) elif op == "/": self.vm_writer.write_call("Math.divide", 2) # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term def compile_term(self): # if next_token == '~' | '-' if self.is_unary_op_term(): unary_op = self.load_next_token() # curr_token == '~' | '-' self.compile_term() # term (recursive) self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op]) # if next_token == '(' => '(' expression ')' elif self.check_next_token() == "(": self.load_next_token() # '(' self.compile_expression() # expression self.load_next_token() # ')' # if next_token == INTEGER(const) elif self.check_next_type() == "INT_CONST": # integerConstant self.vm_writer.write_push("CONST", self.load_next_token()) # ) # if next_token == STRING(const) elif self.check_next_type() == "STRING_CONST": # stringConstant self.compile_string() # if next_token == KEYWORD(const) elif self.check_next_type() == "KEYWORD": # keywordConstant self.compile_keyword() # varName | varName '[' expression ']' | subroutineCall else: #! (varName | varName for expression | subroutine)'s base var_name = self.load_next_token( ) # curr_token = varName | subroutineCall # (e.g. Screen.setColor | show() ) #! next_token == '[' | '(' or '.' | just varName # varName '[' expression ']' if self.is_array(): # if next_token == '[' self.load_next_token() # '[' self.compile_expression() # expression self.load_next_token() # ']' array_kind = self.symbol_table.kind_of(var_name) array_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(CONVERT_KIND[array_kind], array_index) self.vm_writer.write_arithmetic("ADD") self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("THAT", 0) # if next_token == "(" | "." => curr_token == subroutineCall #! if varName is not found, assume class or function name elif self.is_subroutine_call(): # NOTE curr_token == subroutineName | className | varName self.compile_subroutine_call() # varName else: # curr_token == varName # FIXME cannot catch subroutine call and pass it to 'else' below # TODO error caught on Math.abs() part on Ball.vm var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) # subroutineCall: subroutineName '(' expressionList ')' | # ( className | varName) '.' subroutineName '(' expressionList ')' # e.g.) (do) game.run() # ! in case of 'do' order is different from 'let game = Class.new()' def compile_subroutine_call(self): # NOTE curr_token == subroutineName | className | varName subroutine_caller = self.get_curr_token() function_name = subroutine_caller # _next_token() # FIXME now it loads '.' or '(' # func_name = identifier number_args = 0 #! '.' or '(' 2 cases if self.check_next_token() == ".": self.load_next_token() # curr_token == '.' subroutine_name = self.load_next_token( ) # curr_token == subroutineName type = self.symbol_table.type_of(subroutine_caller) if type != "NONE": # it's an instance kind = self.symbol_table.kind_of(subroutine_caller) index = self.symbol_table.index_of(subroutine_caller) self.vm_writer.write_push(CONVERT_KIND[kind], index) function_name = f"{type}.{subroutine_name}" number_args += 1 else: # it's a class class_name = subroutine_caller function_name = f"{class_name}.{subroutine_name}" elif self.check_next_token() == "(": subroutine_name = subroutine_caller function_name = f"{self.class_name}.{subroutine_name}" number_args += 1 self.vm_writer.write_push("POINTER", 0) self.load_next_token() # '(' number_args += self.compile_expression_list() # expressionList self.load_next_token() # ')' self.vm_writer.write_call(function_name, number_args) # (expression (',' expression)* )? def compile_expression_list(self): number_args = 0 if self.check_next_token() != ")": number_args += 1 self.compile_expression() while self.check_next_token() != ")": number_args += 1 self.load_next_token() # curr_token == ',' self.compile_expression() return number_args def compile_string(self): string = self.load_next_token() # curr_token == stringConstant self.vm_writer.write_push("CONST", len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) def compile_keyword(self): keyword = self.load_next_token() # curr_token == keywordConstant if keyword == "this": self.vm_writer.write_push("POINTER", 0) else: self.vm_writer.write_push("CONST", 0) if keyword == "true": self.vm_writer.write_arithmetic("NOT") def is_subroutine_call(self): return self.check_next_token() in [".", "("] def is_array(self): return self.check_next_token() == "[" def is_class_var_dec(self): return self.check_next_token() in ["static", "field"] def is_subroutine_dec(self): return self.check_next_token() in ["constructor", "function", "method"] def is_statement(self): return self.check_next_token() in [ "let", "if", "while", "do", "return" ] def is_op(self): return self.check_next_token() in [ "+", "-", "*", "/", "&", "|", "<", ">", "=" ] def is_unary_op_term(self): return self.check_next_token() in ["~", "-"] def check_next_token(self): return self.tokenizer.next_token[1] def check_next_type(self): return self.tokenizer.next_token[0] def get_curr_token(self): return self.tokenizer.curr_token[1] def load_next_token(self): if self.tokenizer.has_more_tokens(): self.tokenizer.advance() # curr_token = next_token return self.tokenizer.curr_token[1] else: return ""
class Engine(object): def __init__(self, tokens, filepath): # compilation engine init self.lex = tokens self.symbols = SymbolTable() self.vm = VMWriter(filepath) self.compile_class() self.vm.closeout() # Routines to advance the token def _require(self, tok, val=None): lextok, lexval = self._advance() if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval: raise Exception(self._require_failed_msg(tok, val)) else: return lexval def _require_failed_msg(self, tok, val): if val is None: val = token_list[tok] return 'Expected: {0}, {1} \ntoken is: {2}'.format(tok, val, self.lex.tokens) def _advance(self): return self.lex.advance() def vm_function_name(self): return self._cur_class + '.' + self._cur_subroutine def vm_push_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_push(segments[kind], index) def vm_pop_variable(self, name): (type, kind, index) = self.symbols.lookup(name) self.vm.write_pop(segments[kind], index) def load_this_ptr(self, kwd): if kwd == KW_METHOD: self.vm.push_arg(0) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object elif kwd == KW_CONSTRUCTOR: self.vm.push_const(self.symbols.var_count(SK_FIELD)) # object size self.vm.write_call('Memory.alloc', 1) self.vm.pop_this_ptr() # set up 'this' pointer to point to new object def write_func_decl(self, kwd): self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR)) self.load_this_ptr(kwd) def write_string_const_init(self, val): self.vm.push_const(len(val)) self.vm.write_call('String.new', 1) # String.new(len(str)) for c in val: self.vm.push_const(ord(c)) self.vm.write_call('String.appendChar', 2) # String.appendChar(nextchar) label_num = 0 def new_label(self): self.label_num += 1 return 'label' + str(self.label_num) # ------------- verify part ---------------- def _is_token(self, tok, val=None): lextok, lexval = self.lex.peek() return val == None and lextok == tok or (lextok, lexval) == (tok, val) def _is_keyword(self, *keywords): lextok, lexval = self.lex.peek() return lextok == T_KEYWORD and lexval in keywords def _is_sym(self, symbols): lextok, lexval = self.lex.peek() return lextok == T_SYM and lexval in symbols # Variable declarations def _is_class_var_dec(self): return self._is_keyword(KW_STATIC, KW_FIELD) def _is_type(self): return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN) # Subroutine declarations def _is_subroutine(self): return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _is_var_dec(self): return self._is_keyword(KW_VAR) def _is_let(self): return self._is_keyword(KW_LET) def _is_if(self): return self._is_keyword(KW_IF) def _is_while(self): return self._is_keyword(KW_WHILE) def _is_do(self): return self._is_keyword(KW_DO) def _is_return(self): return self._is_keyword(KW_RETURN) def _is_statement(self): return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return() def _is_const(self): return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant() def _is_keyword_constant(self): return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) def _is_op(self): return self._is_sym('+-*/&|<>=') def _is_unary_op(self): return self._is_sym('-~') def _is_var_name(self): return self._is_token(T_ID) def _is_builtin_type(self, type): return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID] def _is_term(self): return self._is_const() or self._is_var_name() or self._is_sym('(') or self._is_unary_op() # --------------- compile part ----------------- # Parser and compile Jack code # class: 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): self._require(T_KEYWORD, KW_CLASS) self.compile_class_name() self._require(T_SYM, '{') while self._is_class_var_dec(): self.compile_class_var_dec() while self._is_subroutine(): self.compile_subroutine() self._require(T_SYM, '}') # className: identifier def compile_class_name(self): self._cur_class = self.compile_var_name() # Class names don't have to go into the symbol table # type varName (',' varName)* ';' def _compile_dec(self, kind): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, kind) while self._is_sym(','): self._advance() name = self.compile_var_name() self.symbols.define(name, type, kind) self._require(T_SYM, ';') def compile_type(self): """ type: 'int' | 'char' | 'boolean' | className """ if self._is_type(): return self._advance()[1] else: raise ValueError(self._require_failed_msg(*self.lex.peek())) # classVarDec: {'static'|'field'} type varName (',' varName)* ';' def compile_class_var_dec(self): tok, kwd = self._advance() # static | field self._compile_dec(kwd_to_kind[kwd]) # varName: identifier def compile_var_name(self): return self._require(T_ID) # subroutineDec: ('constructor'|'function'|'method') ('void'|type) # subroutineName '(' parameterList ')' subroutineBody def compile_subroutine(self): tok, kwd = self._advance() type = self.compile_void_or_type() self.compile_subroutine_name() self.symbols.start_subroutine() if kwd == KW_METHOD: self.symbols.define('this', self._cur_class, SK_ARG) self._require(T_SYM, '(') self.compile_parameter_list() self._require(T_SYM, ')') self.compile_subroutine_body(kwd) # 'void' | type def compile_void_or_type(self): if self._is_keyword(KW_VOID): return self._advance()[1] else: return self.compile_type() # subroutineName: identifier def compile_subroutine_name(self): self._cur_subroutine = self.compile_var_name() # subroutine names don't have to go in the symbol table # parameterList: (parameter (',' parameter)*)? def compile_parameter_list(self): if self._is_type(): self.compile_parameter() while self._is_sym(','): self._advance() self.compile_parameter() # parameter: type varName def compile_parameter(self): if self._is_type(): type = self.compile_type() name = self.compile_var_name() self.symbols.define(name, type, SK_ARG) # subroutineBody: '{' varDec* statements '}' def compile_subroutine_body(self, kwd): self._require(T_SYM, '{') while self._is_var_dec(): self.compile_var_dec() self.write_func_decl(kwd) self.compile_statements() # varDec: 'var' type varName (',' varName)* ';' def compile_var_dec(self): self._require(T_KEYWORD, KW_VAR) return self._compile_dec(SK_VAR) # statement: statement* def compile_statements(self): while self._is_statement(): self._compile_statement() # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement def _compile_statement(self): if self._is_let(): self.compile_let() elif self._is_if(): self.compile_if() elif self._is_while(): self.compile_while() elif self._is_do(): self.compile_do() elif self._is_return(): self.compile_return() # letStatement: 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self._require(T_KEYWORD, KW_LET) name = self.compile_var_name() subscript = self._is_sym('[') if subscript: self.compile_base_plus_index(name) # calculate base+index self._require(T_SYM, '=') self.compile_expression() # calculate expression to assign self._require(T_SYM, ';') if subscript: self.pop_array_element() # *(base+index) = expr else: self.vm_pop_variable(name) # pop value directly into variable def pop_array_element(self): self.vm.pop_temp(TEMP_ARRAY) # Pop expr value to temp register self.vm.pop_that_ptr() # Pop base+index into 'that' register self.vm.push_temp(TEMP_ARRAY) # Push expr back onto stack self.vm.pop_that() # Pop value into *(base+index) # ('[' expression ']')? def compile_base_plus_index(self, name): self.vm_push_variable(name) # push array ptr onto stack self._advance() self.compile_expression() # push index onto stack self._require(T_SYM, ']') self.vm.write_vm_cmd('add') # base+index - leave on the stack for later def compile_expression(self): self.compile_term() # Doesn't handle normal order of operations - just left to right for now while self._is_op(): op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_cmds[op[1]]) # op # term: integerConstant | stringConstant | keywordConstant | varName # | varName '[' expression ']' | subroutineCall | '(' expression ')' # | unaryOp term def compile_term(self): if self._is_const(): self.compile_const() elif self._is_sym('('): self._advance() self.compile_expression() # VM code to evaluate expression self._require(T_SYM, ')') elif self._is_unary_op(): tok, op = self._advance() self.compile_term() self.vm.write_vm_cmd(vm_unary_cmds[op]) # op elif self._is_var_name(): tok, name = self._advance() if self._is_sym('['): self.compile_array_subscript(name) # VM code for array subscript elif self._is_sym('(.'): self.compile_subroutine_call(name) # VM code for subroutine call else: self.vm_push_variable(name) # push variable on stack # integerConstant | stringConstant | keywordConstant def compile_const(self): tok, val = self._advance() if tok == T_NUM: self.vm.push_const(val) # push constant val elif tok == T_STR: self.write_string_const_init(val) # initialize string & push str addr elif tok == T_KEYWORD: self.compile_kwd_const(val) # push TRUE, FALSE, NULL etc. # '[' expression ']' def compile_array_subscript(self, name): self.vm_push_variable(name) # push array ptr onto stack self._require(T_SYM, '[') self.compile_expression() # push index onto stack self._require(T_SYM, ']') self.vm.write_vm_cmd('add') # base+index self.vm.pop_that_ptr() # pop into 'that' ptr self.vm.push_that() # push *(base+index) onto stack # subroutineCall: subroutineName '(' expressionList ')' # | (className | varName) '.' subroutineName '(' expressionList ')' def compile_subroutine_call(self, name): (type, kind, index) = self.symbols.lookup(name) if self._is_sym('.'): num_args, name = self.compile_dotted_subroutine_call(name, type) else: num_args = 1 self.vm.push_this_ptr() name = self._cur_class+'.'+name self._require(T_SYM, '(') num_args += self.compile_expr_list() # VM code to push arguments self._require(T_SYM, ')') self.vm.write_call(name, num_args) # call name num_args # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kwd_const(self, kwd): if kwd == KW_THIS: self.vm.push_this_ptr() elif kwd == KW_TRUE: self.vm.push_const(1) self.vm.write_vm_cmd('neg') else: # KW_FALSE or KW_NULL self.vm.push_const(0) def compile_dotted_subroutine_call(self, name, type): num_args = 0 obj_name = name self._advance() name = self.compile_var_name() if self._is_builtin_type(type): # e.g. int.func(123) not allowed ValueError('Cannot use "." operator on builtin type') elif type == None: # Calling using class name name = obj_name+'.'+name else: # Calling using object variable name num_args = 1 self.vm_push_variable(obj_name) # push object ptr onto stack name = self.symbols.type_of(obj_name)+'.'+name return num_args, name # expressionList: (expression (',' expression)*)? def compile_expr_list(self): num_args = 0 if self._is_term(): self.compile_expression() num_args = 1 while self._is_sym(','): self._advance() self.compile_expression() num_args += 1 return num_args # ifStatement: 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? def compile_if(self): self._require(T_KEYWORD, KW_IF) end_label = self.new_label() self._compile_cond_expression_statements(end_label) # VM code for condition and if statements if self._is_keyword(KW_ELSE): self._advance() self._require(T_SYM, '{') self.compile_statements() # VM code for else statements self._require(T_SYM, "}") self.vm.write_label(end_label) # label end_label # '(' expression ')' '{' statements '}' def _compile_cond_expression_statements(self, label): self._require(T_SYM, '(') self.compile_expression() self._require(T_SYM, ')') self.vm.write_vm_cmd('not') # ~(cond) notif_label = self.new_label() self.vm.write_if(notif_label) # if-goto notif_label self._require(T_SYM, '{') self.compile_statements() # VM code for if statements self._require(T_SYM, '}') self.vm.write_goto(label) # goto label self.vm.write_label(notif_label)# label notif_label # whileStatement: 'while' '(' expression ')' '{' statements '}' def compile_while(self): self._require(T_KEYWORD, KW_WHILE) top_label = self.new_label() self.vm.write_label(top_label) # label top_label self._compile_cond_expression_statements(top_label) # VM code for condition and while statements # do_statement: 'do' subroutineCall ';' def compile_do(self): self._require(T_KEYWORD, KW_DO) name = self._require(T_ID) self.compile_subroutine_call(name) # VM code for subroutine call self.vm.pop_temp(TEMP_RETURN) # Pop return value and discard self._require(T_SYM, ';') # returnStatement: 'return' expression? ';' def compile_return(self): self._require(T_KEYWORD, KW_RETURN) if not self._is_sym(';'): self.compile_expression() # VM code for return expression if any else: self.vm.push_const(0) # push 0 if not returning a value self._require(T_SYM, ';') self.vm.write_return() # return
class CompilationEngine: """Generates the compiler's output""" CLASS_VAR_DEC_TOKENS = ["static", "field"] SUBROUTINE_TOKENS = ["function", "method", "constructor"] VARIABLE_TYPES = ['int', 'char', 'boolean'] STATEMENT_TOKENS = ['do', 'let', 'while', 'return', 'if'] OP = {'+': 'ADD', '-': 'SUB', '&': 'AND', '|': 'OR', '<': 'LT', '>': 'GT', '=': 'EQ', '*': 'Math.multiply', '/': 'Math.divide'} def __init__(self, jack_tokenizer: JackTokenizer, output_path: str): super().__init__() self.tokenizer = jack_tokenizer self.table = SymbolTable() self.writer = VMWriter(output_path) if self.tokenizer.has_more_tokens(): self.tokenizer.advance() self.class_name = '' self.curr_func_name = '' self._if_count = 0 self._while_count = 0 self.compile_class() def compile_class(self) -> None: """ Compiles a complete class :return: None """ self._consume('class') if self.tokenizer.token_type() != TokenTypes.IDENTIFIER: raise CompilationEngineError(f"{self._get_current_token()} is an invalid token at this point. Expected a " f"class name.") self.class_name = self._get_current_token() self._consume(TokenTypes.IDENTIFIER) self._consume('{') while self._get_current_token() != '}': if self._get_current_token() in CompilationEngine.CLASS_VAR_DEC_TOKENS: self.compile_class_var_dec() elif self._get_current_token() in CompilationEngine.SUBROUTINE_TOKENS: self.compile_subroutine_dec() else: raise CompilationEngineError(f"{self._get_current_token()} is an expected token at this point") self._consume('}') def compile_class_var_dec(self) -> None: """ Compiles static variable declaration, or a field declaration :return: None. """ kind = str_to_kind(self._get_current_token()) self._consume(self.CLASS_VAR_DEC_TOKENS) var_type = self._get_current_token() self._consume_type() self.table.define(self._get_current_token(), var_type, kind) self._consume(TokenTypes.IDENTIFIER) while self._get_current_token() != ';': self._consume(',') self.table.define(self._get_current_token(), var_type, kind) self._consume(TokenTypes.IDENTIFIER) self._consume(';') def compile_subroutine_dec(self) -> None: """ Compiles a complete method, function or constructor. :return: None """ self.table.reset() subroutine_type = self._get_current_token() if subroutine_type == 'method': self.table.define('this', self.class_name, Kind.ARG) # Put this as the first arg in case it's a # class method self._consume(self.SUBROUTINE_TOKENS) try: self._consume_type() except CompilationEngineError: self._consume('void') self.curr_func_name = f'{self.class_name}.{self._get_current_token()}' self._consume(TokenTypes.IDENTIFIER) self._consume('(') self.compile_parameter_list() self._consume(')') self.compile_subroutine_body(subroutine_type) def compile_parameter_list(self) -> None: """ Compiles a (possibly empty) parameter list. Doesn't handle the enclosing "()". :return: """ if self._get_current_token() != ')': var_type = self._get_current_token() self._consume_type() self.table.define(self._get_current_token(), var_type, Kind.ARG) self._consume(TokenTypes.IDENTIFIER) while self._get_current_token() != ')': self._consume(',') var_type = self._get_current_token() self._consume_type() self.table.define(self._get_current_token(), var_type, Kind.ARG) self._consume(TokenTypes.IDENTIFIER) def compile_subroutine_body(self, subroutine_type: str) -> None: """ Compiles a subroutine's body. :return: None """ self._consume('{') while self._get_current_token() == 'var': self.compile_var_dec() var_count = self.table.var_count(Kind.VAR) self.writer.write_function(self.curr_func_name, var_count) if subroutine_type == 'constructor': n_fields = self.table.var_count(Kind.FIELD) self.writer.write_push('CONST', n_fields) self.writer.write_call('Memory.alloc', 1) self.writer.write_pop('POINTER', 0) elif subroutine_type == 'method': self.writer.write_push('ARG', 0) self.writer.write_pop('POINTER', 0) while self._get_current_token() != '}': self.compile_statements() self._consume('}') def compile_var_dec(self) -> None: """ Compiles a var declaration. :return: None. """ self._consume('var') var_type = self._get_current_token() self._consume_type() self.table.define(self._get_current_token(), var_type, Kind.VAR) self._consume(TokenTypes.IDENTIFIER) while self._get_current_token() != ';': self._consume(',') self.table.define(self._get_current_token(), var_type, Kind.VAR) self._consume(TokenTypes.IDENTIFIER) self._consume(';') def compile_statements(self) -> None: """ Compiles a sequence of statements. Doesn't handle the enclosing "{}". :return: None. """ while self._get_current_token() != '}': if self._get_current_token() in self.STATEMENT_TOKENS: getattr(self, 'compile_' + self._get_current_token())() else: raise CompilationEngineError(f"{self._get_current_token()} is an expected token at this point") def compile_do(self) -> None: """ Compiles a do statement. :return: None. """ self._consume('do') self.compile_subroutine_call() self.writer.write_pop('TEMP', 0) # void method self._consume(';') def compile_let(self) -> None: """ Compiles a let statement. :return: None. """ self._consume('let') name = self._get_current_token() kind = convert_kind(self.table.kind_of(name)) index = self.table.index_of(name) self._consume(TokenTypes.IDENTIFIER) if self._get_current_token() == '[': self._consume('[') self.compile_expression() self._consume(']') self.writer.write_push(kind, index) self.writer.write_arithmetic('ADD') self.writer.write_pop('TEMP', 0) self._consume('=') self.compile_expression() self.writer.write_push('TEMP', 0) self.writer.write_pop('POINTER', 1) self.writer.write_pop('THAT', 0) else: self._consume('=') self.compile_expression() self.writer.write_pop(kind, index) self._consume(';') def compile_while(self) -> None: """ Compiles a while statement. :return: None. """ self._consume('while') self._consume('(') while_lbl = f"WHILE_{self._while_count}" while_false_lbl = f"WHILE_FALSE{self._while_count}" self._while_count += 1 self.writer.write_label(while_lbl) self.compile_expression() self._consume(')') self._consume('{') self.writer.write_if(while_false_lbl) self.compile_statements() self.writer.write_goto(while_lbl) self.writer.write_label(while_false_lbl) self._consume('}') def compile_return(self) -> None: """ Compiles a return statement. :return: None. """ self._consume('return') if self._get_current_token() != ';': self.compile_expression() else: self.writer.write_push('CONST', 0) self.writer.write_return() self._consume(';') def compile_if(self) -> None: """ Compiles an if statement, possibly with a trailing else clause. :return: None. """ self._consume('if') self._consume('(') self.compile_expression() self._consume(')') end_lbl = f'IF_END_{self._if_count}' false_lbl = f'IF_FALSE_{self._if_count}' self._if_count += 1 self._consume('{') self.writer.write_if(false_lbl) self.compile_statements() self.writer.write_goto(end_lbl) self.writer.write_label(false_lbl) self._consume('}') if self._get_current_token() == 'else': self._consume('else') self._consume('{') self.compile_statements() self._consume('}') self.writer.write_label(end_lbl) def compile_expression(self) -> None: """ Compiles an expression. :return: None """ self.compile_term() while self._get_current_token() in self.OP: op = self._get_current_token() self._consume(op) self.compile_term() if op == '*': self.writer.write_call('Math.multiply', 2) elif op == '/': self.writer.write_call('Math.divide', 2) else: self.writer.write_arithmetic(self.OP[op]) def compile_term(self) -> None: """ Compiles a term. If the current token is an identifier, the routine must distinguish between a variable, an array entry, or a subroutine call. :return: None. """ token_type = self.tokenizer.token_type() if token_type == TokenTypes.IDENTIFIER: curr_token = self._get_current_token() self.tokenizer.advance() if self._get_current_token() in ('(', '.'): self.compile_subroutine_call(curr_token) elif self._get_current_token() == '[': self._consume('[') self.compile_expression() self._consume(']') kind = convert_kind(self.table.kind_of(curr_token)) index = self.table.index_of(curr_token) self.writer.write_push(kind, index) self.writer.write_arithmetic('ADD') self.writer.write_pop('POINTER', 1) self.writer.write_push('THAT', 0) else: kind = convert_kind(self.table.kind_of(curr_token)) index = self.table.index_of(curr_token) self.writer.write_push(kind, index) elif token_type == token_type.INT_CONST: self.writer.write_push('CONST', int(self._get_current_token())) self._consume(token_type) elif token_type == token_type.KEYWORD: curr_token = self._get_current_token() if curr_token in ['true', 'false', 'null']: self.writer.write_push('CONST', 0) if curr_token == 'true': self.writer.write_arithmetic('NOT') if curr_token == 'this': self.writer.write_push('POINTER', 0) self._consume(token_type) elif token_type == token_type.STRING_CONST: const_str = '' first = True while const_str.count('"') < 2: if first: const_str += self._get_current_token() first = False else: const_str += ' ' + self._get_current_token() if self.tokenizer.has_more_tokens(): self.tokenizer.advance() const_str = const_str.replace('"', '') self.writer.write_push('CONST', len(const_str)) self.writer.write_call('String.new', 1) for char in const_str: self.writer.write_push('CONST', ord(char)) self.writer.write_call('String.appendChar', 2) else: if self._get_current_token() == '(': self._consume('(') self.compile_expression() self._consume(')') else: op = self._get_current_token() self._consume(['-', '~']) # unaryOp term self.compile_term() if op == '-': self.writer.write_arithmetic('NEG') else: self.writer.write_arithmetic('NOT') def compile_subroutine_call(self, subroutine_name=None) -> None: n_args = 0 if not subroutine_name: subroutine_name = self._get_current_token() self._consume(TokenTypes.IDENTIFIER) if self._get_current_token() == '.': self._consume('.') sub_name = self._get_current_token() self._consume(TokenTypes.IDENTIFIER) try: # Instance var_type = self.table.type_of(subroutine_name) kind = convert_kind(self.table.kind_of(subroutine_name)) index = self.table.index_of(subroutine_name) self.writer.write_push(kind, index) func_name = f'{var_type}.{sub_name}' except KeyError: # Class func_name = f'{subroutine_name}.{sub_name}' else: func_name = f'{self.class_name}.{subroutine_name}' n_args += 1 self.writer.write_pop('POINTER', 0) self._consume('(') n_args += self.compile_expression_list() self._consume(')') self.writer.write_call(func_name, n_args) def compile_expression_list(self) -> int: """ Compiles a (possibly empty) comma-separated list of expressions. :return: Int. Number of arguments. """ n_args = 0 if self._get_current_token() != ')': self.compile_expression() n_args += 1 while self._get_current_token() == ',': self._consume(',') self.compile_expression() n_args += 1 return n_args @singledispatchmethod def _consume(self, expected) -> None: """ Check if the current token matches what it's expected to be. Either by value or by type. In case of a match, the function will advance to the next token. Otherwise the function will raise CompilationEngineError. :return: None """ raise TypeError("Unsupported type: ", type(expected)) @_consume.register(str) @_consume.register(list) def _(self, expected_tokens) -> None: """Consume by value""" if not isinstance(expected_tokens, list): expected_tokens = [expected_tokens] curr_token = self._get_current_token() if curr_token not in expected_tokens: raise CompilationEngineError(f"Expected {expected_tokens} but current token is {curr_token}. " f"Compilation failed.") else: if self.tokenizer.has_more_tokens(): self.tokenizer.advance() @_consume.register def _(self, expected_types: TokenTypes): """Consume by type""" if not isinstance(expected_types, list): expected_types = [expected_types] curr_type = self.tokenizer.token_type() if curr_type not in expected_types: raise CompilationEngineError(f"Expected {expected_types} but current token type is {curr_type}. " f"Compilation failed.") else: if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def _consume_type(self): """ Int / char / boolean / class name :return: None. """ try: self._consume(self.VARIABLE_TYPES) except CompilationEngineError: self._consume(TokenTypes.IDENTIFIER) # Class name def _get_current_token(self) -> str: token_type = self.tokenizer.token_type() if token_type is TokenTypes.INT_CONST: curr_token = str(self.tokenizer.int_val()) elif token_type is TokenTypes.KEYWORD: curr_token = self.tokenizer.key_word() else: curr_token = self.tokenizer.current_token return curr_token
def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable()
class CompilationEngine: def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable() def compile(self): root = self._compileClass() return root def _compileClass(self): classE = Element(ELEMENTS.CLASS) self._readKeyword(classE, ELEMENTS.CLASS) self.className = self._readIdentifier(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileClassVarDec(classE) self._compileSubroutine(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE) return classE def _compileClassVarDec(self, parent): while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES: classVarDecE = Element(ELEMENTS.CLASSVARDEC) self._readKeyword(classVarDecE) self._readType(classVarDecE) self._readIdentifier(classVarDecE) while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA): self._readIdentifier(classVarDecE) self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON) parent.append(classVarDecE) def _compileSubroutine(self, parent): while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES: subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC) function_type = self._readKeyword(subroutineDecE) self._readReturnType(subroutineDecE) self.methodName = self._readIdentifier(subroutineDecE) self._symbol_table.startSubroutine(self.className, self.methodName) if function_type == _SUBROUTINEDEC.METHOD: self._symbol_table.define("this", self.className, SYM_KINDS.ARG) self._uid = -1 self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN) self._compileParameters(subroutineDecE) self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE) self._compileSubroutineBody(subroutineDecE, function_type) parent.append(subroutineDecE) def _gen_label(self, type_): self._uid += 1 return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid) def _gen_labels(self, *parts): self._uid += 1 return ["%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts] def _compileSubroutineBody(self, parent, function_type): bodyE = Element(ELEMENTS.SUBROUTINEBODY) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN) nArgs = self._compileVarDec(bodyE) function_name = parent[2].text function_full_name = "%s.%s" % (self.className, function_name) self.writer.writeFunction(function_full_name, nArgs) if function_type == _SUBROUTINEDEC.CONSTRUCTOR: field_count = self._symbol_table.varCount(SYM_KINDS.FIELD) self.writer.writePush(SEGMENT.CONST, field_count) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop(SEGMENT.POINTER, 0) elif function_type == _SUBROUTINEDEC.METHOD: self.writer.writePush(SEGMENT.ARG, 0) self.writer.writePop(SEGMENT.POINTER, 0) self._compileStatements(bodyE) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE) parent.append(bodyE) def _compileStatements(self, parent): statementsE = Element(ELEMENTS.STATEMENTS) while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES: if self.nextTok.value == _STATEMENTS.LET: statementE = Element(ELEMENTS.STATEMENT_LET) self._readKeyword(statementE) identifier = self._readIdentifier(statementE) is_array = False if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN): is_array = True self._compileExpression(statementE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE) self._readSymbol(statementE, _SYMBOLS.EQUAL) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) if is_array: self.writer.writePop(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.THAT, 0) else: self.writer.writePop(*self._identifier_data(identifier)) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.IF: label_else, label_end = self._gen_labels("if.else", "if.end") statementE = Element(ELEMENTS.STATEMENT_IF) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_else) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeGoto(label_end) self.writer.writeLabel(label_else) if self._readKeywordOptional(statementE, _KEYWORDS.ELSE): self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeLabel(label_end) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.WHILE: label_start, label_end = self._gen_labels("while.start", "while.end") self.writer.writeLabel(label_start) statementE = Element(ELEMENTS.STATEMENT_WHILE) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_end) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) statementsE.append(statementE) self.writer.writeGoto(label_start) self.writer.writeLabel(label_end) elif self.nextTok.value == _STATEMENTS.DO: self._compileDo(statementsE) elif self.nextTok.value == _STATEMENTS.RETURN: statementE = Element(ELEMENTS.STATEMENT_RETURN) self._readKeyword(statementE) if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON): self._compileExpression(statementE) else: self.writer.writePush(SEGMENT.CONST, 0) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeReturn() statementsE.append(statementE) if len(statementsE) == 0: statementsE.text = "\n" parent.append(statementsE) def _compileExpression(self, parent): expressionE = Element(ELEMENTS.EXPRESSION) self._readTerm(expressionE) while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS: symbol = self._readSymbol(expressionE) self._readTerm(expressionE) self.writer.writeArithmetic(symbol) parent.append(expressionE) def _compileExpressionList(self, parent): self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN) expListE = Element(ELEMENTS.EXPRESSION_LIST) nArgs = 0 while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE): self._compileExpression(expListE) self._readSymbolOptional(expListE, _SYMBOLS.COMMA) nArgs += 1 # hack for TextComparer if len(expListE) == 0: expListE.text = "\n" parent.append(expListE) self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE) return nArgs def _compileDo(self, parent): statementE = Element(ELEMENTS.STATEMENT_DO) self._readKeyword(statementE, _STATEMENTS.DO) identifier = self._readIdentifier(statementE) nArgs = 0 if self._readSymbolOptional(statementE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (self.className, identifier) self.writer.writePush(SEGMENT.POINTER, 0) nArgs += 1 nArgs += self._compileExpressionList(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeCall(identifier, nArgs) self.writer.writePop(SEGMENT.TEMP, 0) parent.append(statementE) def _compileVarDec(self, parent): nArgs = 0 while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR: varDecE = Element(ELEMENTS.VAR_DEC) self._readKeyword(varDecE, _KEYWORDS.VAR) self._readType(varDecE) self._readIdentifier(varDecE) nArgs += 1 while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA): self._readIdentifier(varDecE) nArgs += 1 self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON) parent.append(varDecE) return nArgs def _compileParameters(self, parent): paramListE = Element(ELEMENTS.PARAM_LIST) while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES) or self.nextTok.type == tokenizor.IDENTIFIER: self._readType(paramListE) self._readIdentifier(paramListE) self._readSymbolOptional(paramListE, _SYMBOLS.COMMA) if len(paramListE) == 0: paramListE.text = "\n" parent.append(paramListE) ############################## ########## READ ############## ############################## def _readTerm(self, parent): termE = Element(ELEMENTS.TERM) if self.nextTok.type == tokenizor.INTEGER: self.next() termE.append(_leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value)) self.writer.writePush(SEGMENT.CONST, self.tok.value) elif self.nextTok.type == tokenizor.STRING: self.next() termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value)) string_value = self.tok.value self.writer.writePush(SEGMENT.CONST, len(string_value)) self.writer.writeCall("String.new", 1) for char in string_value: self.writer.writePush(SEGMENT.CONST, ord(char)) self.writer.writeCall("String.appendChar", 2) elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS: self.next() termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) _KW_CONT_WRITE[self.tok.value](self.writer) elif self.nextTok.type == tokenizor.IDENTIFIER: identifier = self._readIdentifier(termE) nArgs = 0 if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN): self._compileExpression(termE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.THAT, 0) self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: nArgs = self._compileExpressionList(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeCall(identifier, nArgs) elif self._readSymbolOptional(termE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(termE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(termE)) nArgs += self._compileExpressionList(termE) self.writer.writeCall(identifier, nArgs) else: self.writer.writePush(*self._identifier_data(identifier)) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: self.next() termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._compileExpression(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS: self.next() sym = self.tok.value termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._readTerm(termE) self.writer.writeArithmeticUnary(sym) else: raise self._syntaxError("Unexpected %s." % self.tok.value) parent.append(termE) def _identifier_data(self, identifier): return _SEG_TRANSLATE[self._symbol_table.kindOf(identifier)], self._symbol_table.indexOf(identifier) def _readIdentifier(self, parent): self.next() self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER) name = self.tok.value element = _leafElement(ELEMENTS.IDENTIFIER, name) type_ = self._symbol_table.typeOf(name) kind = None index = None if type_ is None: if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1: type_ = parent[1].text kind = _SYM_KIND_MAP[parent[0].text] elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0: type_ = parent[-1].text kind = SYM_KINDS.ARG if kind is not None: index = self._symbol_table.define(name, type_, kind) else: type_ = self._symbol_table.typeOf(name) kind = self._symbol_table.kindOf(name) index = self._symbol_table.indexOf(name) if kind is not None: element.set("type", type_) element.set("kind", str(kind)) element.set("index", str(index)) parent.append(element) return name def _readType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readReturnType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readSymbol(self, parent, expected = None): self.next() expectedStr = expected if expected is not None else ELEMENTS.SYMBOL self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return self.tok.value def _readKeyword(self, parent, expected = None): self.next() expectedStr = expected if expected is not None else ELEMENTS.KEYWORD self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return self.tok.value def _readSymbolOptional(self, parent, expected): if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return True return False def _readKeywordOptional(self, parent, expected): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return True return False def next(self): self.tok = self.iter.next() self.nextTok = self.iter.lookahead() def _assertToken(self, tok, expected_str, type_ = None, value_ = None): if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_): raise self._syntaxError("Expected %s but found %s" % (expected_str, tok.value), tok) def _syntaxError(self, msg, tok = None): if tok is None: tok = self.tok return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class CompilationEngine: def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable() def compile(self): root = self._compileClass() return root def _compileClass(self): classE = Element(ELEMENTS.CLASS) self._readKeyword(classE, ELEMENTS.CLASS) self.className = self._readIdentifier(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileClassVarDec(classE) self._compileSubroutine(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE) return classE def _compileClassVarDec(self, parent): while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES: classVarDecE = Element(ELEMENTS.CLASSVARDEC) self._readKeyword(classVarDecE) self._readType(classVarDecE) self._readIdentifier(classVarDecE) while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA): self._readIdentifier(classVarDecE) self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON) parent.append(classVarDecE) def _compileSubroutine(self, parent): while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES: subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC) function_type = self._readKeyword(subroutineDecE) self._readReturnType(subroutineDecE) self.methodName = self._readIdentifier(subroutineDecE) self._symbol_table.startSubroutine(self.className, self.methodName) if function_type == _SUBROUTINEDEC.METHOD: self._symbol_table.define("this", self.className, SYM_KINDS.ARG) self._uid = -1 self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN) self._compileParameters(subroutineDecE) self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE) self._compileSubroutineBody(subroutineDecE, function_type) parent.append(subroutineDecE) def _gen_label(self, type_): self._uid += 1 return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid) def _gen_labels(self, *parts): self._uid += 1 return [ "%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts ] def _compileSubroutineBody(self, parent, function_type): bodyE = Element(ELEMENTS.SUBROUTINEBODY) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN) nArgs = self._compileVarDec(bodyE) function_name = parent[2].text function_full_name = "%s.%s" % (self.className, function_name) self.writer.writeFunction(function_full_name, nArgs) if function_type == _SUBROUTINEDEC.CONSTRUCTOR: field_count = self._symbol_table.varCount(SYM_KINDS.FIELD) self.writer.writePush(SEGMENT.CONST, field_count) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop(SEGMENT.POINTER, 0) elif function_type == _SUBROUTINEDEC.METHOD: self.writer.writePush(SEGMENT.ARG, 0) self.writer.writePop(SEGMENT.POINTER, 0) self._compileStatements(bodyE) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE) parent.append(bodyE) def _compileStatements(self, parent): statementsE = Element(ELEMENTS.STATEMENTS) while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES: if self.nextTok.value == _STATEMENTS.LET: statementE = Element(ELEMENTS.STATEMENT_LET) self._readKeyword(statementE) identifier = self._readIdentifier(statementE) is_array = False if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN): is_array = True self._compileExpression(statementE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE) self._readSymbol(statementE, _SYMBOLS.EQUAL) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) if is_array: self.writer.writePop(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.THAT, 0) else: self.writer.writePop(*self._identifier_data(identifier)) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.IF: label_else, label_end = self._gen_labels("if.else", "if.end") statementE = Element(ELEMENTS.STATEMENT_IF) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_else) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeGoto(label_end) self.writer.writeLabel(label_else) if self._readKeywordOptional(statementE, _KEYWORDS.ELSE): self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeLabel(label_end) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.WHILE: label_start, label_end = self._gen_labels( "while.start", "while.end") self.writer.writeLabel(label_start) statementE = Element(ELEMENTS.STATEMENT_WHILE) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_end) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) statementsE.append(statementE) self.writer.writeGoto(label_start) self.writer.writeLabel(label_end) elif self.nextTok.value == _STATEMENTS.DO: self._compileDo(statementsE) elif self.nextTok.value == _STATEMENTS.RETURN: statementE = Element(ELEMENTS.STATEMENT_RETURN) self._readKeyword(statementE) if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON): self._compileExpression(statementE) else: self.writer.writePush(SEGMENT.CONST, 0) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeReturn() statementsE.append(statementE) if len(statementsE) == 0: statementsE.text = "\n" parent.append(statementsE) def _compileExpression(self, parent): expressionE = Element(ELEMENTS.EXPRESSION) self._readTerm(expressionE) while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS: symbol = self._readSymbol(expressionE) self._readTerm(expressionE) self.writer.writeArithmetic(symbol) parent.append(expressionE) def _compileExpressionList(self, parent): self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN) expListE = Element(ELEMENTS.EXPRESSION_LIST) nArgs = 0 while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE): self._compileExpression(expListE) self._readSymbolOptional(expListE, _SYMBOLS.COMMA) nArgs += 1 # hack for TextComparer if len(expListE) == 0: expListE.text = "\n" parent.append(expListE) self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE) return nArgs def _compileDo(self, parent): statementE = Element(ELEMENTS.STATEMENT_DO) self._readKeyword(statementE, _STATEMENTS.DO) identifier = self._readIdentifier(statementE) nArgs = 0 if self._readSymbolOptional(statementE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (self.className, identifier) self.writer.writePush(SEGMENT.POINTER, 0) nArgs += 1 nArgs += self._compileExpressionList(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeCall(identifier, nArgs) self.writer.writePop(SEGMENT.TEMP, 0) parent.append(statementE) def _compileVarDec(self, parent): nArgs = 0 while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR: varDecE = Element(ELEMENTS.VAR_DEC) self._readKeyword(varDecE, _KEYWORDS.VAR) self._readType(varDecE) self._readIdentifier(varDecE) nArgs += 1 while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA): self._readIdentifier(varDecE) nArgs += 1 self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON) parent.append(varDecE) return nArgs def _compileParameters(self, parent): paramListE = Element(ELEMENTS.PARAM_LIST) while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES ) or self.nextTok.type == tokenizor.IDENTIFIER: self._readType(paramListE) self._readIdentifier(paramListE) self._readSymbolOptional(paramListE, _SYMBOLS.COMMA) if len(paramListE) == 0: paramListE.text = "\n" parent.append(paramListE) ############################## ########## READ ############## ############################## def _readTerm(self, parent): termE = Element(ELEMENTS.TERM) if self.nextTok.type == tokenizor.INTEGER: self.next() termE.append( _leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value)) self.writer.writePush(SEGMENT.CONST, self.tok.value) elif self.nextTok.type == tokenizor.STRING: self.next() termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value)) string_value = self.tok.value self.writer.writePush(SEGMENT.CONST, len(string_value)) self.writer.writeCall("String.new", 1) for char in string_value: self.writer.writePush(SEGMENT.CONST, ord(char)) self.writer.writeCall("String.appendChar", 2) elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS: self.next() termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) _KW_CONT_WRITE[self.tok.value](self.writer) elif self.nextTok.type == tokenizor.IDENTIFIER: identifier = self._readIdentifier(termE) nArgs = 0 if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN): self._compileExpression(termE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.THAT, 0) self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: nArgs = self._compileExpressionList(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeCall(identifier, nArgs) elif self._readSymbolOptional(termE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(termE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(termE)) nArgs += self._compileExpressionList(termE) self.writer.writeCall(identifier, nArgs) else: self.writer.writePush(*self._identifier_data(identifier)) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: self.next() termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._compileExpression(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS: self.next() sym = self.tok.value termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._readTerm(termE) self.writer.writeArithmeticUnary(sym) else: raise self._syntaxError("Unexpected %s." % self.tok.value) parent.append(termE) def _identifier_data(self, identifier): return _SEG_TRANSLATE[self._symbol_table.kindOf( identifier)], self._symbol_table.indexOf(identifier) def _readIdentifier(self, parent): self.next() self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER) name = self.tok.value element = _leafElement(ELEMENTS.IDENTIFIER, name) type_ = self._symbol_table.typeOf(name) kind = None index = None if type_ is None: if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1: type_ = parent[1].text kind = _SYM_KIND_MAP[parent[0].text] elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0: type_ = parent[-1].text kind = SYM_KINDS.ARG if kind is not None: index = self._symbol_table.define(name, type_, kind) else: type_ = self._symbol_table.typeOf(name) kind = self._symbol_table.kindOf(name) index = self._symbol_table.indexOf(name) if kind is not None: element.set("type", type_) element.set("kind", str(kind)) element.set("index", str(index)) parent.append(element) return name def _readType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readReturnType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readSymbol(self, parent, expected=None): self.next() expectedStr = expected if expected is not None else ELEMENTS.SYMBOL self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return self.tok.value def _readKeyword(self, parent, expected=None): self.next() expectedStr = expected if expected is not None else ELEMENTS.KEYWORD self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return self.tok.value def _readSymbolOptional(self, parent, expected): if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return True return False def _readKeywordOptional(self, parent, expected): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return True return False def next(self): self.tok = self.iter.next() self.nextTok = self.iter.lookahead() def _assertToken(self, tok, expected_str, type_=None, value_=None): if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_): raise self._syntaxError( "Expected %s but found %s" % (expected_str, tok.value), tok) def _syntaxError(self, msg, tok=None): if tok is None: tok = self.tok return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class CompilationEngine: def __init__(self, token_stream, out_file, xml_name): ''' creates a new compilation engine with the given input and output. The next method called must be compileClass(). ''' self.stream = token_stream self.writer = VMWriter(out_file) self.symbols = SymbolTable() self.xml_name = xml_name self.root = ET.Element('class') self.stream.advance() assert self.stream.keyword() == 'class' def add_terminal(self, root, text): terminal = ET.SubElement(root, self.stream.token_type()) terminal.text = ' {text} '.format(text=text) if self.stream.has_more_tokens(): self.stream.advance() def compile_class(self): ''' compiles a complete class ''' self.add_terminal(self.root, self.stream.keyword()) self.class_name = self.stream.identifier() self.add_terminal(self.root, self.class_name) self.add_terminal(self.root, self.stream.symbol()) while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in CLASS_VARS: self.compile_class_var_dec() while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in SUBROUTINE_TYPES: self.compile_subroutine() self.add_terminal(self.root, self.stream.symbol()) def compile_class_var_dec(self): ''' compiles a static declaration or a field declaration. ''' class_var_root = ET.SubElement(self.root, CLASS_VAR_DEC) kind = self.stream.keyword() self.add_terminal(class_var_root, kind) if self.stream.token_type() == tokenizer.KEYWORD: type_name = self.stream.keyword() else: type_name = self.stream.identifier() self.add_terminal(class_var_root, type_name) name = self.stream.identifier() self.add_terminal(class_var_root, name) self.symbols.define(name, type_name, kind) while self.stream.symbol() == COMMA: self.add_terminal(class_var_root, self.stream.symbol()) name = self.stream.identifier() self.add_terminal(class_var_root, name) self.symbols.define(name, type_name, kind) self.add_terminal(class_var_root, self.stream.symbol()) def compile_subroutine(self): ''' compiles a complete method, function, or constructor. ''' subroutine_dec = ET.SubElement(self.root, SUBROUTINE_DEC) self.symbols.start_subroutine() subroutine_type = self.stream.keyword() if subroutine_type in ['method', 'constructor']: self.symbols.define('this', self.class_name, 'argument') self.add_terminal(subroutine_dec, subroutine_type) if self.stream.token_type() == tokenizer.KEYWORD: self.add_terminal(subroutine_dec, self.stream.keyword()) else: self.add_terminal(subroutine_dec, self.stream.identifier()) name = self.stream.identifier() self.add_terminal(subroutine_dec, name) self.add_terminal(subroutine_dec, self.stream.symbol()) self.compile_parameter_list(subroutine_dec) self.add_terminal(subroutine_dec, self.stream.symbol()) subroutine_body = ET.SubElement(subroutine_dec, SUBROUTINE_BODY) self.add_terminal(subroutine_body, self.stream.symbol()) while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == VAR: self.compile_var_dec(subroutine_body) func_name = '{cls}.{sub}'.format( cls=self.class_name, sub=name) self.writer.write_function(func_name, self.symbols.var_count('var')) self.compile_statements(subroutine_body) self.add_terminal(subroutine_body, self.stream.symbol()) def compile_parameter_list(self, root): ''' compiles a (possibly empty) parameter list, not including the enclosing “()”. ''' parameter_list_root = ET.SubElement(root, PARAMETER_LIST) if self.stream.token_type() != tokenizer.SYMBOL: type_name = self.stream.keyword() self.add_terminal(parameter_list_root, type_name) name = self.stream.identifier() self.add_terminal(parameter_list_root, name) self.symbols.define(name, type_name, 'argument') while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == COMMA: self.add_terminal(parameter_list_root, self.stream.symbol()) type_name = self.stream.keyword() self.add_terminal(parameter_list_root, type_name) name = self.stream.identifier() self.add_terminal(parameter_list_root, name) self.symbols.define(name, type_name, 'argument') def compile_var_dec(self, root): ''' compiles a var declaration ''' var_dec_root = ET.SubElement(root, VAR_DEC) self.add_terminal(var_dec_root, self.stream.keyword()) type_name = None if self.stream.token_type() == tokenizer.IDENTIFIER: type_name = self.stream.identifier() else: type_name = self.stream.keyword() self.add_terminal(var_dec_root, type_name) name = self.stream.identifier() self.add_terminal(var_dec_root, name) self.symbols.define(name, type_name, 'var') while self.stream.symbol() == COMMA: self.add_terminal(var_dec_root, self.stream.symbol()) name = self.stream.identifier() self.add_terminal(var_dec_root, name) self.symbols.define(name, type_name, 'var') self.add_terminal(var_dec_root, self.stream.symbol()) def compile_statements(self, root): ''' compiles a sequence of statements, not including the enclosing “{}”. ''' statements_root = ET.SubElement(root, STATEMENTS) while self.stream.token_type() == tokenizer.KEYWORD: keyword = self.stream.keyword() if keyword == 'let': self.compile_let(statements_root) elif keyword == 'if': self.compile_if(statements_root) elif keyword == 'while': self.compile_while(statements_root) elif keyword == 'do': self.compile_do(statements_root) elif keyword == 'return': self.compile_return(statements_root) else: assert False, 'unsupported keyword {keyword}'.format(keyword=keyword) def compile_do(self, root): ''' compiles a do statement ''' do_root = ET.SubElement(root, DO) self.add_terminal(do_root, self.stream.keyword()) self.compile_subroutine_call(do_root) self.writer.write_pop('temp', 0) self.add_terminal(do_root, self.stream.symbol()) def compile_let(self, root): ''' compiles a let statement ''' let_root = ET.SubElement(root, LET) self.add_terminal(let_root, self.stream.keyword()) lhs = self.stream.identifier() self.add_terminal(let_root, lhs) if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == OPEN_BRACKET: self.add_terminal(let_root, self.stream.symbol()) self.compile_expression(let_root) self.add_terminal(let_root, self.stream.symbol()) self.add_terminal(let_root, self.stream.symbol()) self.compile_expression(let_root) self.add_terminal(let_root, self.stream.symbol()) self.writer.write_pop(self.symbols.kind_of(lhs), self.symbols.index_of(lhs)) def compile_while(self, root): ''' compiles a while statement ''' while_root = ET.SubElement(root, WHILE) while_expression = self.symbols.generate_label('WHILE_EXP') while_end = self.symbols.generate_label('WHILE_END') self.add_terminal(while_root, self.stream.keyword()) self.add_terminal(while_root, self.stream.symbol()) self.writer.write_label(while_expression) self.compile_expression(while_root) self.writer.write_arithmetic('not') self.writer.write_if(while_end) self.add_terminal(while_root, self.stream.symbol()) self.add_terminal(while_root, self.stream.symbol()) self.compile_statements(while_root) self.writer.write_goto(while_expression) self.writer.write_label(while_end) self.add_terminal(while_root, self.stream.symbol()) def compile_return(self, root): ''' compiles a return statement ''' return_root = ET.SubElement(root, RETURN) self.add_terminal(return_root, self.stream.keyword()) if self.stream.token_type() != tokenizer.SYMBOL: self.compile_expression(return_root) else: self.writer.write_push('constant', 0) self.writer.write_return() self.add_terminal(return_root, self.stream.symbol()) def compile_if(self, root): ''' compiles an if statement ''' if_root = ET.SubElement(root, IF) if_label = self.symbols.generate_label('IF_TRUE') else_label = self.symbols.generate_label('IF_FALSE') end_label = self.symbols.generate_label('IF_END') self.add_terminal(if_root, self.stream.keyword()) self.add_terminal(if_root, self.stream.symbol()) self.compile_expression(if_root) self.writer.write_if(if_label) self.writer.write_goto(else_label) self.writer.write_label(if_label) self.add_terminal(if_root, self.stream.symbol()) self.add_terminal(if_root, self.stream.symbol()) self.compile_statements(if_root) self.writer.write_goto(end_label) self.add_terminal(if_root, self.stream.symbol()) self.writer.write_label(else_label) if self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == 'else': self.add_terminal(if_root, self.stream.keyword()) self.add_terminal(if_root, self.stream.symbol()) self.compile_statements(if_root) self.add_terminal(if_root, self.stream.symbol()) self.writer.write_label(end_label) def compile_expression(self, root): ''' compiles an expression ''' expression_root = ET.SubElement(root, EXPRESSION) self.compile_term(expression_root) while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() in OPS: operator = self.stream.symbol() self.add_terminal(expression_root, operator) self.compile_term(expression_root) if operator == '+': self.writer.write_arithmetic('add'), if operator == '-': self.writer.write_arithmetic('sub'), if operator == '*': self.writer.write_call('Math.multiply', 2), if operator == '/': self.writer.write_call('Math.divide', 2), if operator == '&': self.writer.write_arithmetic('and'), if operator == '|': self.writer.write_arithmetic('or'), if operator == '<': self.writer.write_arithmetic('lt'), if operator == '>': self.writer.write_arithmetic('gt'), if operator == '=': self.writer.write_arithmetic('eq') def compile_term(self, root): ''' compiles a term. This method is faced with a slight difficulty when trying to decide between some of the alternative rules. Specifically, if the current token is an identifier, it must still distinguish between a variable, an array entry, and a subroutine call. The distinction can be made by looking ahead one extra token. A single look-ahead token, which may be one of “[“, “(“, “.”, suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. ''' term_root = ET.SubElement(root, TERM) token_type = self.stream.token_type() if token_type == tokenizer.INT: val = self.stream.int_val() self.add_terminal(term_root, val) self.writer.write_push('constant', val) elif token_type == tokenizer.STRING: val = self.stream.string_val() self.add_terminal(term_root, val) #TODO I think it's a character by character push, ugh self.writer.write_push('constant', val) elif token_type == tokenizer.KEYWORD and self.stream.keyword() in KEYWORD_CONSTANTS: keyword = self.stream.keyword() self.add_terminal(term_root, keyword) if keyword == 'true': self.writer.write_push('constant', 0) self.writer.write_arithmetic('not') elif keyword in ['false', 'null']: self.writer.write_push('constant', 0) else: self.writer.write_push('this', 0) elif token_type == tokenizer.IDENTIFIER: if self.stream.peek() == OPEN_BRACKET: name = self.stream.identifier() self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name)) self.add_terminal(term_root, name) self.add_terminal(term_root, self.stream.symbol()) self.compile_expression(term_root) self.add_terminal(term_root, self.stream.symbol()) elif self.stream.peek() == OPEN_PAREN or self.stream.peek() == PERIOD: self.compile_subroutine_call(term_root) else: name = self.stream.identifier() self.add_terminal(term_root, self.stream.identifier()) self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name)) elif token_type == tokenizer.SYMBOL and self.stream.symbol() == OPEN_PAREN: self.add_terminal(term_root, self.stream.symbol()) self.compile_expression(term_root) self.add_terminal(term_root, self.stream.symbol()) elif token_type == tokenizer.SYMBOL and self.stream.symbol() in UNARY_OPS: operator = self.stream.symbol() self.add_terminal(term_root, operator) self.compile_term(term_root) self.writer.write_arithmetic('neg' if operator == '-' else 'not') else: assert False, 'unsupported token {token}'.format(token=self.stream.current_token) def compile_expression_list(self, root): ''' compiles a (possibly empty) comma-separated list of expressions. ''' expression_list_root = ET.SubElement(root, EXPRESSION_LIST) if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == CLOSE_PAREN: return 0 self.compile_expression(expression_list_root) num_vars = 1 while self.stream.symbol() == COMMA: self.add_terminal(expression_list_root, self.stream.symbol()) self.compile_expression(expression_list_root) num_vars += 1 return num_vars def compile_subroutine_call(self, root): class_name = self.class_name subroutine_name = self.stream.identifier() self.add_terminal(root, class_name) if self.stream.symbol() == PERIOD: self.add_terminal(root, self.stream.symbol()) class_name = subroutine_name subroutine_name = self.stream.identifier() self.add_terminal(root, self.stream.identifier()) self.add_terminal(root, self.stream.symbol()) num_vars = self.compile_expression_list(root) self.add_terminal(root, self.stream.symbol()) self.writer.write_call('{cls}.{sub}'.format( cls=class_name, sub=subroutine_name), num_vars) def write(self): if self.xml_name: lines = self._write(self.root).split('\n') lines = lines[1:] file = open(self.xml_name, 'w') file.write('\n'.join(lines)) file.close() self.writer.close() def _write(self, root): return minidom.parseString(ET.tostring(root)).toprettyxml()
def build_vm_writer(self, jack_file): self.vm_writer = VMWriter(jack_file)
class CompilationEngine: '''The brain of the Jack syntax analyzer''' # Constructor def __init__(self, tokenizer: JackTokenizer, out_path: Path): self.tokenizer = tokenizer # Create symbol tables self.class_level_st = SymbolTable() self.subroutine_level_st = SymbolTable() # class's name self.class_name = None self.func_name = None self.sub_type = None # Open the output file for writing self.out_stream = out_path.open('w') # Create a new VM writer for writing self.vm_writer = VMWriter(out_path.with_suffix(".vm")) # For generating labels self.label_count = {"if": 0, "while": 0} def get_if_labels(self): self.label_count["if"] += 1 return (f"LABEL_IF_{self.label_count['if'] - 1}_1", f"LABEL_IF_{self.label_count['if'] - 1}_2") def get_while_labels(self): self.label_count["while"] += 1 return (f"LABEL_WHILE_{self.label_count['while'] - 1}_1", f"LABEL_WHILE_{self.label_count['while'] - 1}_2") def start_compilation(self): # Read the first token into memory self.tokenizer.has_more_tokens() # Start analyzing syntax if self.tokenizer.get_token_type() == TokenType.KEYWORD: if self.tokenizer.get_keyword_type() == KeywordType.CLASS: self.compile_class() else: raise AttributeError("Not starting with a class") # Helper method to write terminal XML tags def write_terminal_tag(self, t, v): if t == TokenType.KEYWORD: self.out_stream.write(f"<keyword> {v} </keyword>\n") elif t == TokenType.IDENTIFIER: self.out_stream.write(f"<identifier> {v} </identifier>\n") elif t == TokenType.SYMBOL: self.out_stream.write(f"<symbol> {v} </symbol>\n") elif t == TokenType.INT_CONST: self.out_stream.write( f"<integerConstant> {v} </integerConstant>\n") elif t == TokenType.STRING_CONST: self.out_stream.write(f"<stringConstant> {v} </stringConstant>\n") # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): # Write opening tag self.out_stream.write("<class>\n") self.write_terminal_tag(self.tokenizer.get_token_type(), 'class') # Read the next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: self.class_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), self.class_name) self.out_stream.write("\n===DECLARED===\nclass name\n=======") else: raise AttributeError("Not a valid class name!") # Read the next token self.tokenizer.has_more_tokens() self.eat('{') self.write_terminal_tag(self.tokenizer.get_token_type(), self.tokenizer.get_symbol()) # Handle class variable declaration (classVarDec*) # Proceed to next token self.tokenizer.has_more_tokens() # While there are field/static declarations while \ (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\ ( self.tokenizer.get_keyword_type() in (KeywordType.FIELD, KeywordType.STATIC) ): self.compile_class_var_dec() while \ (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\ ( self.tokenizer.get_keyword_type() in (KeywordType.CONSTRUCTOR, KeywordType.FUNCTION, KeywordType.METHOD) ): self.compile_subroutine_dec() # Class ending curly brackets self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # At the end of function call self.out_stream.write("</class>\n") # ('static'|'field') type varName (',' varName)* ';' def compile_class_var_dec(self): # Write opening tag self.out_stream.write("<classVarDec>\n") # Write static/field self.write_terminal_tag(TokenType.KEYWORD, self.tokenizer.get_cur_ident()) # To store variable properties var_kind = None var_type = None var_index = None var_name = None if self.tokenizer.get_cur_ident() == "static": var_kind = SymbolKind.STATIC elif self.tokenizer.get_cur_ident() == "field": var_kind = SymbolKind.FEILD else: raise Exception("Other than static or feild:" + self.tokenizer.get_cur_ident()) # Read the next token self.tokenizer.has_more_tokens() if self.is_valid_type(): self.write_terminal_tag(self.tokenizer.get_token_type(), self.tokenizer.get_cur_ident()) var_type = self.tokenizer.get_cur_ident() else: raise AssertionError("Invalid class variable type!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() # Write varible tag to XML file self.write_terminal_tag(self.tokenizer.get_token_type(), var_name) # Define new class level variable self.class_level_st.define(var_name, var_type, var_kind) var_index = self.class_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) else: raise AssertionError("Invalid class variable name!") # Move to the next token self.tokenizer.has_more_tokens() # If has more than one varibles: E.g. field int x, y, z; while self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ",": self.write_terminal_tag(TokenType.SYMBOL, ",") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() # Write varible tag to XML file self.write_terminal_tag(self.tokenizer.get_token_type(), var_name) # Define new class level variable self.class_level_st.define(var_name, var_type, var_kind) var_index = self.class_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) else: raise AssertionError( "Invalid Syntax for class varible declaration!") # Move to next token self.tokenizer.has_more_tokens() # Must end with ";" self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() # Write closing tag self.out_stream.write("</classVarDec>\n") # ('constructor' | 'function' | 'method') ('void' | 'type') subroutineName def compile_subroutine_dec(self): # Opening tag self.out_stream.write("<subroutineDec>\n") # To store function parameters func_params = {} # Write subroutine type self.sub_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.KEYWORD, self.sub_type) # Reset subroutine level symbol table self.subroutine_level_st.reset_table() # Insert `this`, if method if self.sub_type == "method": self.subroutine_level_st.define("this", self.class_name, SymbolKind.ARG) # Move to next token self.tokenizer.has_more_tokens() if self.is_valid_type() or \ (self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() == KeywordType.VOID): self.write_terminal_tag(self.tokenizer.get_token_type(), self.tokenizer.get_cur_ident()) else: raise AssertionError("Not a valid subroutine return type!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: func_params["name"] = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, func_params["name"]) else: raise AssertionError("Invalid Syntax for function name!") # Move to next token self.tokenizer.has_more_tokens() self.eat('(') self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() # If there are some parameters self.out_stream.write("<parameterList>\n") if not (self.tokenizer.get_token_type() == TokenType.SYMBOL): self.compile_parameter_list() self.out_stream.write("</parameterList>\n") # Move to next token self.eat(')') self.write_terminal_tag(TokenType.SYMBOL, ")") # Write function VM command self.func_name = func_params['name'] # Move to the next token self.tokenizer.has_more_tokens() self.compile_subroutine_body() # Closing tag self.out_stream.write("</subroutineDec>\n") # ((type varName) (',' type varName)*)? def compile_parameter_list(self): # For storing varible params var_name = None var_type = None var_kind = SymbolKind.ARG # Argument list var_index = None if self.is_valid_type(): var_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), var_type) else: raise AssertionError("Invalid syntax in parameter list!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError( "Invalid Syntax for function parameter name name!") # Define the argument variable self.subroutine_level_st.define(var_name, var_type, var_kind) # Get the index of the newly created variable var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) # Move to next token self.tokenizer.has_more_tokens() # Handle more than one parameters while self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ",": self.write_terminal_tag(TokenType.SYMBOL, ",") # Read the next token self.tokenizer.has_more_tokens() # If the current token is a valid type name if self.is_valid_type(): var_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), var_type) else: raise AssertionError("Invalid variable type in parameter list") # Read the next token self.tokenizer.has_more_tokens() # If current token is a valid identifier if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError( "Invalid variable name in parameter list!!") self.subroutine_level_st.define(var_name, var_type, var_kind) var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) # Read the next token self.tokenizer.has_more_tokens() # '{' varDec* statements '}' def compile_subroutine_body(self): # Write opening tag self.out_stream.write("<subroutineBody>\n") # Eat opening curly bracket self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() # Handle variable declarations while self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() == KeywordType.VAR: # Current token is the 'var' keyword self.compile_var_dec() # Get number of local variables # for the current compiling function nVars = self.subroutine_level_st.get_var_count(SymbolKind.VAR) # Write function self.vm_writer.write_function(f"{self.class_name}.{self.func_name}", nVars) if self.sub_type == "constructor": nFeilds = self.class_level_st.get_var_count(SymbolKind.FEILD) # write "push constant nFeilds" self.vm_writer.write_push(SegmentType.CONST, nFeilds) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(SegmentType.POINTER, 0) elif self.sub_type == "method": # push argument 0 self.vm_writer.write_push(SegmentType.ARG, 0) # pop pointer 0 self.vm_writer.write_pop(SegmentType.POINTER, 0) # Handle statements self.compile_statements() # Eat closing curly bracker self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() # Write closing tag self.out_stream.write("</subroutineBody>\n") # 'var' type varName (',' varName)* ';' def compile_var_dec(self): # Write opening tag self.out_stream.write("<varDec>\n") # Write var keyword tag self.write_terminal_tag(TokenType.KEYWORD, "var") # For storing variable params var_name = None var_type = None var_kind = SymbolKind.VAR var_index = None # Move to next token self.tokenizer.has_more_tokens() # Write the type of variables if self.is_valid_type(): var_type = self.tokenizer.get_cur_ident() self.write_terminal_tag(self.tokenizer.get_token_type(), var_type) else: raise AssertionError("Not a valid var type!") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError("Invalid Syntax for var name!") # Move to next token self.tokenizer.has_more_tokens() self.subroutine_level_st.define(var_name, var_type, var_kind) var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) while self.tokenizer.get_token_type( ) == TokenType.SYMBOL and self.tokenizer.get_symbol() == ",": # Write this symbol self.write_terminal_tag(TokenType.SYMBOL, ",") # Move to the next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) else: raise AssertionError("Invalid Syntax for var name!") self.subroutine_level_st.define(var_name, var_type, var_kind) var_index = self.subroutine_level_st.get_index_of(var_name) # Write variable properties self.out_stream.write( f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n=======" ) # Move to the next token self.tokenizer.has_more_tokens() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to the next token self.tokenizer.has_more_tokens() # Write closing tag self.out_stream.write("</varDec>\n") # statement* def compile_statements(self): # Write open tag self.out_stream.write("<statements>\n") # Process statements while self.tokenizer.get_token_type( ) == TokenType.KEYWORD and self.tokenizer.get_keyword_type( ) in statement_types: # Statment type is based on the starting keyword statement_type = self.tokenizer.get_keyword_type() # Call compile method based on type if statement_type == KeywordType.LET: self.compile_let() elif statement_type == KeywordType.IF: self.compile_if() elif statement_type == KeywordType.WHILE: self.compile_while_statement() elif statement_type == KeywordType.DO: self.compile_do() elif statement_type == KeywordType.RETURN: self.compile_return() self.out_stream.write("</statements>\n") # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): self.out_stream.write("<letStatement>\n") self.write_terminal_tag(TokenType.KEYWORD, "let") # Is Array? is_array_access = False # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: var_name = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, var_name) var_props = self.lookup_st(var_name) # Write variable properties self.out_stream.write( f"\n===USED===\nkind: {var_props['kind']}, type: {var_props['type']}, index: {var_props['index']}\n=======" ) # Finding segment type var_props["seg_type"] = self.var_t_to_segment_t(var_props["kind"]) else: raise AssertionError("Invalid Syntax for varName!") # Move to next token self.tokenizer.has_more_tokens() # Optional bracket syntax if self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == "[": is_array_access = True # push arr self.vm_writer.write_push( self.var_t_to_segment_t(var_props["kind"]), var_props["index"]) self.write_terminal_tag(TokenType.SYMBOL, "[") # Move to next token self.tokenizer.has_more_tokens() # Compile the expression self.compile_expression() self.eat("]") self.write_terminal_tag(TokenType.SYMBOL, "]") # add self.vm_writer.write_arithmetic(ArithmeticCType.ADD) # Move to the next token self.tokenizer.has_more_tokens() # Eat assignment operator self.eat("=") self.write_terminal_tag(TokenType.SYMBOL, "=") # Move to next token self.tokenizer.has_more_tokens() self.compile_expression() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() if not is_array_access: self.vm_writer.write_pop(var_props["seg_type"], var_props["index"]) else: # pop temp 0 self.vm_writer.write_pop(SegmentType.TEMP, 0) # pop pointer 1 self.vm_writer.write_pop(SegmentType.POINTER, 1) # push temp 0 self.vm_writer.write_push(SegmentType.TEMP, 0) # pop that 0 self.vm_writer.write_pop(SegmentType.THAT, 0) self.out_stream.write("</letStatement>\n") # 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')? def compile_if(self): self.out_stream.write("<ifStatement>\n") self.vm_writer.write_comment("if statement") self.write_terminal_tag(TokenType.KEYWORD, "if") # get the next labels L1, L2 = self.get_if_labels() # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() # write code for the expression self.compile_expression() self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") # Move to next token self.tokenizer.has_more_tokens() # not, the condition inside if self.vm_writer.write_arithmetic(ArithmeticCType.NOT) self.vm_writer.write_if(L1) self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() # Compile if-block body self.compile_statements() self.vm_writer.write_goto(L2) self.vm_writer.write_label(L1) self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() # If there is an else statement # Handle else block if self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() == KeywordType.ELSE: self.write_terminal_tag(TokenType.KEYWORD, "else") # Move to next token self.tokenizer.has_more_tokens() self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() self.compile_statements() self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() self.vm_writer.write_label(L2) # Write closing tag self.out_stream.write("</ifStatement>\n") # 'while' '(' expression ')' '{' statements '}' def compile_while_statement(self): self.out_stream.write("<whileStatement>\n") self.write_terminal_tag(TokenType.KEYWORD, "while") L1, L2 = self.get_while_labels() self.vm_writer.write_label(L1) # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() self.compile_expression() self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") self.vm_writer.write_arithmetic(ArithmeticCType.NOT) self.vm_writer.write_if(L2) # Move to next token self.tokenizer.has_more_tokens() self.eat("{") self.write_terminal_tag(TokenType.SYMBOL, "{") # Move to next token self.tokenizer.has_more_tokens() # Compile block body self.compile_statements() self.eat("}") self.write_terminal_tag(TokenType.SYMBOL, "}") # Move to next token self.tokenizer.has_more_tokens() self.vm_writer.write_goto(L1) self.vm_writer.write_label(L2) # Write closing tag self.out_stream.write("</whileStatement>\n") # 'do' subroutineCall ';' def compile_do(self): # To store first and second parts of subroutine call first_part, second_part = None, None # To store nArgs passed to the subroutine nArgs = 0 # Write opening tag self.out_stream.write("<doStatement>\n") # Write do keyword tag self.write_terminal_tag(TokenType.KEYWORD, "do") # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: first_part = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, first_part) else: raise AssertionError("Not a valid subroutine/class name!!!") var_props = self.lookup_st(first_part) if var_props: self.vm_writer.write_push( self.var_t_to_segment_t(var_props["kind"]), var_props["index"]) # Move to next token self.tokenizer.has_more_tokens() # Is is a method call if self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ".": self.write_terminal_tag(TokenType.SYMBOL, ".") # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall if self.tokenizer.get_token_type() == TokenType.IDENTIFIER: second_part = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, second_part) else: raise AssertionError("Not a valid subroutine/class name!!!") # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() self.out_stream.write("<expressionList>\n") if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ")"): nArgs = self.compile_expression_list() self.out_stream.write("</expressionList>\n") self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") # Move to next token self.tokenizer.has_more_tokens() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() if var_props: if second_part: self.vm_writer.write_call(f"{var_props['type']}.{second_part}", nArgs + 1) else: # Write method call if second_part: # Of some other class self.vm_writer.write_call(f"{first_part}.{second_part}", nArgs) else: # Of this class self.vm_writer.write_call(f"{self.class_name}.{first_part}", nArgs) # call-and-return contract self.vm_writer.write_pop(SegmentType.TEMP, 0) # Write closing tag self.out_stream.write("</doStatement>\n") # 'return' expression? ';' def compile_return(self): # Write opening tag self.out_stream.write("<returnStatement>\n") # Write do keyword tag self.write_terminal_tag(TokenType.KEYWORD, "return") # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ";": self.write_terminal_tag(TokenType.SYMBOL, ";") # the subroutine void return type self.vm_writer.write_push(SegmentType.CONST, 0) else: self.compile_expression() self.eat(";") self.write_terminal_tag(TokenType.SYMBOL, ";") # Move to next token self.tokenizer.has_more_tokens() # Write return command self.vm_writer.write_return() # Write closing tag self.out_stream.write("</returnStatement>\n") # term (op term)* def compile_expression(self): self.out_stream.write("<expression>\n") # Compile term self.compile_term() # Handle (op term)* while self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() in allowed_op: symbol = self.tokenizer.get_symbol() # Write tag for operation symbol self.write_terminal_tag(TokenType.SYMBOL, self.tokenizer.get_symbol()) # Move to next token self.tokenizer.has_more_tokens() # Compile term self.compile_term() # Apply operation self.vm_writer.write_arithmetic(allowed_op[symbol]) # Write closing tag self.out_stream.write("</expression>\n") # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' # | unaryOp term def compile_term(self): self.out_stream.write("<term>\n") if self.tokenizer.get_token_type() == TokenType.INT_CONST: self.write_terminal_tag(TokenType.INT_CONST, self.tokenizer.get_int_val()) self.vm_writer.write_push(SegmentType.CONST, self.tokenizer.get_int_val()) self.tokenizer.has_more_tokens() elif self.tokenizer.get_token_type() == TokenType.STRING_CONST: self.write_terminal_tag(TokenType.STRING_CONST, self.tokenizer.get_string_val()) self.tokenizer.has_more_tokens() elif self.tokenizer.get_token_type() == TokenType.KEYWORD \ and self.tokenizer.get_keyword_type() in keyword_constants: # keyword constant kc = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.KEYWORD, kc) if kc == "null" or kc == "false": # push const 0 self.vm_writer.write_push(SegmentType.CONST, 0) elif kc == "true": # push const -1 self.vm_writer.write_push(SegmentType.CONST, 1) self.vm_writer.write_arithmetic(ArithmeticCType.NEG) elif kc == "this": # push pointer 0 self.vm_writer.write_push(SegmentType.POINTER, 0) self.tokenizer.has_more_tokens() elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER: first_part, second_part = None, None nArgs = 0 var_name = self.tokenizer.get_cur_ident() first_part = var_name var_props = self.lookup_st(var_name) self.write_terminal_tag(TokenType.IDENTIFIER, var_name) if var_props: self.vm_writer.write_push( self.var_t_to_segment_t(var_props["kind"]), var_props["index"]) # Move to next token self.tokenizer.has_more_tokens() if self.tokenizer.get_token_type() == TokenType.SYMBOL: # Handle varName '[' expression ']' if self.tokenizer.get_symbol() == "[": self.eat("[") self.write_terminal_tag(TokenType.SYMBOL, "[") self.tokenizer.has_more_tokens() self.compile_expression() self.eat(']') self.write_terminal_tag(TokenType.SYMBOL, "]") # add self.vm_writer.write_arithmetic(ArithmeticCType.ADD) # pop pointer 1 self.vm_writer.write_pop(SegmentType.POINTER, 1) # push that 0 self.vm_writer.write_push(SegmentType.THAT, 0) # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall elif self.tokenizer.get_symbol() == "(" \ or self.tokenizer.get_symbol() == ".": # Is a method call if self.tokenizer.get_symbol() == ".": self.write_terminal_tag(TokenType.SYMBOL, ".") # Move to next token self.tokenizer.has_more_tokens() # Handle subroutineCall if self.tokenizer.get_token_type( ) == TokenType.IDENTIFIER: second_part = self.tokenizer.get_cur_ident() self.write_terminal_tag(TokenType.IDENTIFIER, second_part) else: raise AssertionError( "Not a valid subroutine/class name!!!") # Move to next token self.tokenizer.has_more_tokens() self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") # Move to next token self.tokenizer.has_more_tokens() self.out_stream.write("<expressionList>\n") if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \ and self.tokenizer.get_symbol() == ")"): nArgs = self.compile_expression_list() self.out_stream.write("</expressionList>\n") self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") # Move to next token self.tokenizer.has_more_tokens() if var_props: print("Looked up: ", var_name) # Is it a method call? if second_part: # Of some other class self.vm_writer.write_call( f"{var_props['type']}.{second_part}", nArgs + 1) # This is no variable with given name else: if second_part: # Of some other class self.vm_writer.write_call(f"{first_part}.{second_part}", nArgs) else: # Of this class self.vm_writer.write_call( f"{self.class_name}.{first_part}", nArgs) elif self.tokenizer.get_token_type() == TokenType.SYMBOL: # Handle '(' expression ')' if self.tokenizer.get_symbol() == '(': self.eat("(") self.write_terminal_tag(TokenType.SYMBOL, "(") self.tokenizer.has_more_tokens() self.compile_expression() self.eat(")") self.write_terminal_tag(TokenType.SYMBOL, ")") self.tokenizer.has_more_tokens() # Handle unaryOp term elif self.tokenizer.get_symbol() in allowed_unary_op: unary_op = self.tokenizer.get_symbol() self.write_terminal_tag(TokenType.SYMBOL, self.tokenizer.get_symbol()) self.tokenizer.has_more_tokens() self.compile_term() self.vm_writer.write_arithmetic(allowed_unary_op[unary_op]) else: raise AssertionError("( or unary Op expected!!") self.out_stream.write("</term>\n") # expression (',' expression)* def compile_expression_list(self): self.compile_expression() arg_count = 1 while (self.tokenizer.get_token_type() == TokenType.SYMBOL) \ and (self.tokenizer.get_symbol() == ","): self.write_terminal_tag(TokenType.SYMBOL, ",") self.tokenizer.has_more_tokens() self.compile_expression() arg_count += 1 return arg_count # eat the given string, else raise error def eat(self, string): if self.tokenizer.get_token_type() == TokenType.SYMBOL: if not (self.tokenizer.get_symbol() == string): raise AssertionError( f"Expected symbol {string}, found: {self.tokenizer.get_symbol()}" ) else: raise AssertionError("Symbol not found!!") # Utility method to check weather # the current token is a valid data type def is_valid_type(self): # If built-in data type if self.tokenizer.get_token_type() == TokenType.KEYWORD: # if int, char, boolean if self.tokenizer.get_keyword_type() in data_types: return True # If custom data type elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER: return True # Invalid data type return False # Lookup variable in symbol table def lookup_st(self, v_name): '''return variable properties''' # FOR DEBUGGING from pprint import pprint pprint(self.subroutine_level_st.hash_map) pprint(self.class_level_st.hash_map) # To store looked up props v_props = {} # lookup subroutine level table v_kind = self.subroutine_level_st.get_kind_of(v_name) # var not found in subroutine level st if v_kind == SymbolKind.NONE: # lookup class level table v_kind = self.class_level_st.get_kind_of(v_name) if v_kind == SymbolKind.NONE: return False v_props["kind"] = v_kind v_props["type"] = self.class_level_st.get_type_of(v_name) v_props["index"] = self.class_level_st.get_index_of(v_name) # return class level variable data return v_props # Data found for subroutine level table v_props["kind"] = v_kind v_props["type"] = self.subroutine_level_st.get_type_of(v_name) v_props["index"] = self.subroutine_level_st.get_index_of(v_name) return v_props def var_t_to_segment_t(self, v_kind: SymbolKind) -> SegmentType: if v_kind == SymbolKind.STATIC: return SegmentType.STATIC elif v_kind == SymbolKind.ARG: return SegmentType.ARG elif v_kind == SymbolKind.VAR: return SegmentType.LOCAL elif v_kind == SymbolKind.FEILD: return SegmentType.THIS else: raise AssertionError("No segment kind for given v_kind!!")