def tokens_to_xml(path): """Write the tokens into a xml file with its type as tags. The outpath is the dirpath of the a new directory in the module path to avoid name clashes.""" paths = retrive_files(path) out_dirpath = os.path.join(path, 'Xmlresult') for path in paths: outfile = os.path.basename(path).replace('.jack', 'T.xml') outpath = os.path.join(out_dirpath, outfile) tokenizer = Tokenizer(path) analyzer = TokenAnalyzer(outpath) while tokenizer.has_more_tokens(): tokenizer.advance() t_type = tokenizer.token_type tag = token_tags[t_type] if t_type == T_KEYWORD: analyzer.write_info(tokenizer.keyword, tag) elif t_type == T_SYMBOL: analyzer.write_info(tokenizer.symbol, tag) elif t_type == T_ID: analyzer.write_info(tokenizer.identifier, tag) elif t_type == T_INTEGER: analyzer.write_info(tokenizer.intval, tag) elif t_type == T_STRING: analyzer.write_info(tokenizer.stringval, tag) analyzer.close()
class CompilationEngine: _OPEN_PARENTHESIS = "\(" _CLOSE_PARENTHESIS = "\)" _OPEN_BRACKET = "\[" _CLOSE_BRACKET = "\]" _DOT = "\." _OPS = "\+|-|\*|\/|&|\||<|>|=" def __init__(self, in_address): self.tokenizer = Tokenizer(in_address) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(in_address.replace(".jack", ".vm")) self.curr_token = self.tokenizer.get_current_token() self.out_address = in_address.replace(".jack", ".xml") self.output = "" self.indent = 0 self.label_count = -1 self.class_name = "" self.compile_class() def write_file(self): # with open(self.out_address, 'w') as f: # f.write(self.output) self.vm_writer.write_file() def write(self, to_write): """ Writes to the output, with indentation. :param to_write: The string to write """ self.output += (self.indent * " ") + to_write + "\n" # ========== Compilation Methods ========== # def compile_class(self): """ Compiles a complete class. """ def comp_class(): self.eat("class") self.class_name = self.eat(NAME_REG) self.eat("{") self.compile_class_var_dec() self.compile_subroutine() self.eat("}") self.wrap("class", comp_class) def compile_class_var_dec(self): """ Compiles a static or field declaration. :return: """ var_type_reg = "static|field" if self.peek_token(var_type_reg): self.wrap("classVarDec", self.__class_var_dec) self.compile_class_var_dec() def compile_subroutine(self): """ Compiles a complete method, function or constructor. :return: """ sub_regex = "(constructor|function|method)" self.symbol_table.start_subroutine() kind = self.eat(sub_regex) self.__compile_type(True) # subroutine name name = self.__compile_name() self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_parameter_list(kind) self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") if self.peek_token("var"): self.compile_var_dec() num_locals = self.symbol_table.var_count("local") self.vm_writer.write_function("{}.{}".format(self.class_name, name), num_locals) self.__set_pointer(kind) self.compile_statements() self.eat("}") # def subroutine_dec(): # kind = self.eat(sub_regex) # self.__compile_type(True) # # subroutine name # name = self.__compile_name() # self.eat(CompilationEngine._OPEN_PARENTHESIS) # self.compile_parameter_list(kind) # self.eat(CompilationEngine._CLOSE_PARENTHESIS) # subroutine_body(name) # # self.wrap("subroutineBody", subroutine_body) # # def subroutine_body(name): # self.eat("{") # num_locals = 0 # if self.peek_token("var"): # num_locals = self.compile_var_dec() # self.vm_writer.write_function("{}.{}".format(self.class_name, # name), num_locals) # # self.compile_statements() # # if sub_type == "void": # # self.vm_writer.write_push("constant", 0) # self.eat("}") # Handle next subroutine if there is one if self.peek_token(sub_regex): self.compile_subroutine() def compile_parameter_list(self, kind): """ Compiles a possibly empty parameter list, not including the enclosing () :return: """ if kind == "method": self.symbol_table.define("this", self.class_name, "argument") type_reg = r"int|char|boolean|[A-Za-z_]\w*" while self.peek_token(type_reg): self.__params() def compile_var_dec(self): """ Compiles a var declaration. :return: """ # self.wrap("varDec", self.__comp_var_dec) self.eat("var") var_type = self.__compile_type(False) self.__var_declare(var_type, "var") self.eat(";") if self.peek_token("var"): self.compile_var_dec() def compile_statements(self): """ Compiles a sequence of statements, not including the enclosing {} :return: """ statement_reg = "let|if|while|do|return" if self.peek_token(statement_reg): if self.peek_token("let"): self.compile_let() elif self.peek_token("if"): self.compile_if() elif self.peek_token("while"): self.compile_while() elif self.peek_token("do"): self.compile_do() elif self.peek_token("return"): self.compile_return() self.compile_statements() def compile_do(self): """ Compiles a do statement """ self.eat("do") self.__subroutine_call() # Since we don't use the return value, we pop it to temp self.vm_writer.write_pop("temp", 0) self.eat(";") def compile_let(self): """ Compiles a let statement """ self.eat("let") name = self.__compile_name() is_array = False # Determine [expression] if self.peek_token(CompilationEngine._OPEN_BRACKET): is_array = True self.__handle_array(name) self.eat("=") self.compile_expression() # Pop the value to the spot in the memory if is_array: self.vm_writer.write_pop("temp", 0) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("temp", 0) self.vm_writer.write_pop("that", 0) else: self.__write_pop(name) self.eat(";") def compile_while(self): """ Compiles a while statement. :return: """ self.eat("while") loop_label = self.__get_label("WHILE_START") exit_label = self.__get_label("WHILE_END") self.vm_writer.write_label(loop_label) self.eat(CompilationEngine._OPEN_PARENTHESIS) # Compute ~condition self.compile_expression() self.vm_writer.write_arithmetic("~") # if ~condition exit loop self.vm_writer.write_if(exit_label) self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") self.compile_statements() self.vm_writer.write_goto(loop_label) self.vm_writer.write_label(exit_label) self.eat("}") def compile_return(self): """ Compiles a return statement. """ self.eat("return") # if next is expression: if self.__is_term(): self.compile_expression() else: # Void function - push 0 self.vm_writer.write_push(CONSTANT, 0) self.vm_writer.write_return() self.eat(";") def compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. :return: """ self.eat("if") self.eat(CompilationEngine._OPEN_PARENTHESIS) # ~cond self.compile_expression() # self.vm_writer.write_arithmetic("~") self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") if_true = self.__get_label("IF_TRUE") self.vm_writer.write_if(if_true) if_false = self.__get_label("IF_FALSE") self.vm_writer.write_goto(if_false) self.vm_writer.write_label(if_true) self.compile_statements() self.eat("}") # Handle else: if self.peek_token("else"): if_end = self.__get_label("IF_END") self.vm_writer.write_goto(if_end) self.vm_writer.write_label(if_false) self.eat("else") self.eat("{") self.compile_statements() self.eat("}") self.vm_writer.write_label(if_end) else: self.vm_writer.write_label(if_false) def compile_expression(self): """ Compiles an expression. :return: """ def comp_expression(): self.compile_term() # Case: term op term if self.peek_token(CompilationEngine._OPS): operation = self.eat(CompilationEngine._OPS) self.compile_term() self.vm_writer.write_arithmetic(operation) self.wrap("expression", comp_expression) def compile_term(self): """ Compiles a term. :return: """ def term(): curr_type = self.peek_type() val = self.curr_token.get_token() # Handle integer constant if curr_type == INT_CONST: self.vm_writer.write_push(CONSTANT, int(val)) self.__advance_token() # Handle String constant elif curr_type == STRING_CONST: self.__handle_string_constant(val) self.__advance_token() # Handle Keyword constant elif curr_type == KEYWORD: self.__handle_keyword_constant(val) self.__advance_token() # Case: token is a varName or a subroutineName elif curr_type == IDENTIFIER: self.__handle_identifier() # Case: ( expression ) elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS): self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) # Case: unaryOp term elif self.peek_token("-|~"): self.__handle_unary_op() else: print("Error: Incorrect Term") exit(-1) term() # self.wrap("term", term) def compile_expression_list(self): """ Compiles a possibly empty list of comma separated expressions :return: """ def exp_list(): count = 0 if self.__is_term(): self.compile_expression() count += 1 while self.peek_token(","): self.eat(",") self.compile_expression() count += 1 return count return exp_list() # self.wrap("expressionList", exp_list) # ========== Compilation Helper ========== # def __class_var_dec(self): """ Compiles a single class var declaration. """ var_type_reg = "static|field" # (static|field) kind = self.eat(var_type_reg) # type var_type = self.__compile_type(False) # Compile varName combo until no more "," self.__var_declare(var_type, kind) self.eat(";") def __var_declare(self, var_type, kind): name = self.eat(NAME_REG) self.symbol_table.define(name, var_type, kind) if self.peek_token(","): self.eat(",") self.__var_declare(var_type, kind) def __compile_type(self, for_function): """ Compiles a type for a function or variable, determined by a received boolean value. :param for_function: True if is type of function, false otherwise. :return: """ type_reg = r"int|char|boolean|[A-Za-z_]\w*" if for_function: type_reg += "|void" return self.eat(type_reg) def __set_pointer(self, kind): if kind == "method": self.vm_writer.write_push("argument", 0) self.vm_writer.write_pop("pointer", 0) elif kind == "constructor": self.__handle_constructor() def __handle_constructor(self): # Allocate memory for the new object var_num = self.symbol_table.var_count("this") self.vm_writer.write_push(CONSTANT, var_num) self.vm_writer.write_call("Memory.alloc", 1) # Set the new memory spot to this self.vm_writer.write_pop("pointer", 0) def __compile_name(self): if self.peek_type() == IDENTIFIER: return self.eat(NAME_REG) else: print("ERROR: Identifier Expected") exit(-1) def __params(self): var_type = self.__compile_type(False) name = self.eat(NAME_REG) self.symbol_table.define(name, var_type, "argument") if self.peek_token(","): self.eat(",") def __handle_unary_op(self): command = self.eat("-|~") self.compile_term() if command == "-": self.vm_writer.write_arithmetic("neg") else: self.vm_writer.write_arithmetic(command) def __handle_identifier(self): """ Handles the case of an identifier given as a term """ # Case: varName [ expression ] if self.peek_next(CompilationEngine._OPEN_BRACKET): name = self.__compile_name() self.__handle_array(name) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("that", 0) # self.__var_name_array() # Case: subroutineCall: elif self.peek_next(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_next(CompilationEngine._DOT): self.__subroutine_call() else: name = self.eat(NAME_REG) self.__write_push(name) def __handle_string_constant(self, string): """ Handles the case of a string constant in a term :param string: the constant """ self.vm_writer.write_push(CONSTANT, len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push(CONSTANT, ord(char)) self.vm_writer.write_call("String.appendChar", 2) def __handle_keyword_constant(self, word): """ Handles the case of a keyword constant given in a term. If the word is not valid the program prints a relevant message and exits. :param word: The keyword """ if word == "this": self.vm_writer.write_push("pointer", 0) else: self.vm_writer.write_push(CONSTANT, 0) if word == "true": self.vm_writer.write_arithmetic("~") def __is_term(self): curr_type = self.peek_type() return curr_type == STRING_CONST or curr_type == INT_CONST or \ curr_type == KEYWORD or curr_type == IDENTIFIER or \ self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_token(CompilationEngine._OPS) def __subroutine_call(self): if self.curr_token.get_type() == IDENTIFIER: if self.peek_next(CompilationEngine._OPEN_PARENTHESIS): self.vm_writer.write_push("pointer", 0) self.__subroutine_name(self.class_name, 1) elif self.peek_next(CompilationEngine._DOT): self.__object_subroutine_call() else: print("Error: ( or . expected") exit(-1) def __object_subroutine_call(self): name = self.eat(NAME_REG) n_args = 0 # Push the object reference to the stack if self.symbol_table.kind_of(name): self.__write_push(name) name = self.symbol_table.type_of(name) n_args = 1 self.eat(CompilationEngine._DOT) self.__subroutine_name(name, n_args) def __subroutine_name(self, type_name, n_args): """ Handles the case of subroutineName(expressionList) :return: """ name = self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_PARENTHESIS) nargs = self.compile_expression_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.vm_writer.write_call("{}.{}".format(type_name, name), nargs + n_args) def __handle_array(self, name): self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) self.__write_push(name) self.vm_writer.write_arithmetic("+") # ========== XML Handling ========== # def wrap(self, section_name, func): """ Wraps a program structure block with the section_name, and executes its function :param section_name: The name of the section :param func: The function to perform :return: """ self.write("<{}>".format(section_name)) self.indent += 2 func() self.indent -= 2 self.write("</{}>".format(section_name)) # ========== Token Handling ========== # def eat(self, token): """ Handles advancing and writing terminal tokens. Will exit the program if an error occurs. :param token: The regex of the token to compare :return: """ ctoken = self.curr_token.get_token() if re.match(token, self.curr_token.get_token()): # self.write(self.curr_token.get_xml_wrap()) self.__advance_token() return ctoken # else: # # if self.tokenizer.get_current_token() != token: # print("Error: Expected " + token) # exit(-1) def peek_token(self, compare_next): """ :param compare_next: The regex to compare. :return: True if the current token matches the regex, False otherwise. """ if self.curr_token: return re.match(compare_next, self.curr_token.get_token()) return False def peek_type(self): """ :return: the type of the current token """ return self.curr_token.get_type() def peek_next(self, comp): next_token = self.tokenizer.get_next_token() # Case: There actually is a next token if next_token: return re.match(comp, self.tokenizer.get_next_token().get_token()) return False def __advance_token(self): self.tokenizer.advance() if self.tokenizer.has_more_tokens(): self.curr_token = self.tokenizer.get_current_token() # ========== VM Helper ========== # def __get_label(self, label): self.label_count += 1 return "{}{}".format(label, str(self.label_count)) def __write_pop(self, name): self.vm_writer.write_pop(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) def __write_push(self, name): self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name))
class CompilationEngine: def __init__(self, inpath, outpath): self.tokenizer = Tokenizer(inpath) self.symboltable = SymbolTable() self.vmwriter = VMWriter(outpath) self._class_name = None if self.tokenizer.has_more_tokens(): self.compile_class() self.vmwriter.close() print("{0} completed.".format(outpath)) def _subroutine_init(self): self._sub_kind = None self._sub_name = None self._ret_type = None def _advance(self): self._check_EOF() self.tokenizer.advance() @property def _current_token(self): t_type = self.tokenizer.token_type return (self.tokenizer.keyword if t_type == T_KEYWORD else self.tokenizer.symbol if t_type == T_SYMBOL else self.tokenizer.identifier if t_type == T_ID else self.tokenizer .intval if t_type == T_INTEGER else self.tokenizer.stringval) @property def _current_tok_type(self): return self.tokenizer.token_type @property def _current_tok_tag(self): return token_tags[self._current_tok_type] @property def _next_token(self): """return raw next_token in the tokenizer""" return str(self.tokenizer.next_token) def _require_token(self, tok_type, token=None): """Check whether the next_token(terminal) in the tokenizer meets the requirement (specific token or just token type). If meets, tokenizer advances (update current_token and next_token) and terminal will be writed into outfile; If not, report an error.""" self._advance() if token and self._current_token != token: return self._error(expect_toks=(token, )) elif self._current_tok_type != tok_type: return self._error(expect_types=(tok_type, )) def _require_id(self): self._require_token(T_ID) def _require_kw(self, token): return self._require_token(T_KEYWORD, token=token) def _require_sym(self, token): return self._require_token(T_SYMBOL, token=token) def _require_brackets(self, brackets, procedure): front, back = brackets self._require_sym(front) procedure() self._require_sym(back) def _fol_by_class_vardec(self): return self._next_token in (KW_STATIC, KW_FIELD) def _fol_by_subroutine(self): return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _fol_by_vardec(self): return self._next_token == KW_VAR ######################### # structure compilation # ######################### def compile_class_name(self): self._require_id() self._class_name = self._current_token def compile_subroutine_name(self): self._require_id() self._sub_name = self._current_token def compile_var_name(self, kind=None, type=None, declare=False): self._require_id() name = self._current_token if declare is True: # kind and type are not None self.symboltable.define(name, type, kind) else: self.check_var_name(name, type) def check_var_name(self, name, type=None): recorded_kind = self.symboltable.kindof(name) if recorded_kind is None: self._traceback('name used before declared: {0}'.format(name)) elif type is not None: recorded_type = self.symboltable.typeof(name) if recorded_type != type: get = '{0} "{1}"'.format(recorded_type, name) self._error(expect_types=(type, ), get=get) def compile_type(self, advanced=False, expect='type'): # int, string, boolean or identifier(className) if advanced is False: self._advance() if (self._current_token not in SymbolTable.builtIn_types and self._current_tok_type != T_ID): return self._error(expect=expect) def compile_return_type(self): # void or type self._advance() if self._current_token != KW_VOID: self.compile_type(True, '"void" or type') self._ret_type = self._current_token if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name: me = 'constructor expect current class as return type' self._traceback(me) @record_non_terminal('class') def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' self._require_kw(KW_CLASS) self.compile_class_name() self._require_sym('{') while self._fol_by_class_vardec(): self.compile_class_vardec() while self._fol_by_subroutine(): self.compile_subroutine() self._advance() if self._current_token != '}': self._traceback("Except classVarDec first, subroutineDec second.") if self.tokenizer.has_more_tokens(): if self._next_token == KW_CLASS: self._traceback('Only expect one classDec.') self._traceback('Unexpected extra tokens.') def compile_declare(self): self._advance() id_kind = self._current_token # ('static | field | var') # type varName (',' varName)* ';' self.compile_type() id_type = self._current_token self.compile_var_name(id_kind, id_type, declare=True) # compile ',' or ';' self._advance() while self._current_token == ',': self.compile_var_name(id_kind, id_type, declare=True) self._advance() if self._current_token != ';': return self._error((',', ';')) @record_non_terminal('classVarDec') def compile_class_vardec(self): # ('static|field') type varName (',' varName)* ';' self.compile_declare() @record_non_terminal('subroutineDec') def compile_subroutine(self): # ('constructor'|'function'|'method') # ('void'|type) subroutineName '(' parameterList ')' subroutineBody self._subroutine_init() self.symboltable.start_subroutine() self._advance() self._sub_kind = self._current_token if self._sub_kind == KW_METHOD: self.symboltable.define('this', self._class_name, 'argument') self.compile_return_type() self.compile_subroutine_name() self._require_brackets('()', self.compile_parameter_list) self.compile_subroutine_body() @record_non_terminal('parameterList') def compile_parameter_list(self): # ((type varName) (',' type varName)*)? if self._next_token == ')': return self.compile_type() self.compile_var_name('argument', self._current_token, True) while self._next_token != ')': self._require_sym(',') self.compile_type() self.compile_var_name('argument', self._current_token, True) @record_non_terminal('subroutineBody') def compile_subroutine_body(self): # '{' varDec* statements '}' self._require_sym('{') while self._fol_by_vardec(): self.compile_vardec() self.compile_function() self.compile_statements() self._require_sym('}') def compile_function(self): fn_name = '.'.join((self._class_name, self._sub_name)) num_locals = self.symboltable.varcount(KW_VAR) self.vmwriter.write_function(fn_name, num_locals) # function fn_name num_locals # set up pointer this if self._sub_kind == KW_CONSTRUCTOR: num_fields = self.symboltable.varcount(KW_FIELD) self.vmwriter.write_push('constant', num_fields) self.vmwriter.write_call('Memory.alloc', 1) self.vmwriter.write_pop('pointer', 0) elif self._sub_kind == KW_METHOD: self.vmwriter.write_push('argument', 0) self.vmwriter.write_pop('pointer', 0) @record_non_terminal('varDec') def compile_vardec(self): # 'var' type varName (',' varName)* ';' self.compile_declare() ######################### # statement compilation # ######################### @record_non_terminal('statements') def compile_statements(self): # (letStatement | ifStatement | whileStatement | doStatement | # returnStatement)* last_statement = None while self._next_token != '}': self._advance() last_statement = self._current_token if last_statement == 'do': self.compile_do() elif last_statement == 'let': self.compile_let() elif last_statement == 'while': self.compile_while() elif last_statement == 'return': self.compile_return() elif last_statement == 'if': self.compile_if() else: return self._error(expect='statement expression') #if STACK[-2] == 'subroutineBody' and last_statement != 'return': # self._error(expect='return statement', get=last_statement) @record_non_terminal('doStatement') def compile_do(self): # 'do' subroutineCall ';' self._advance() self.compile_subroutine_call() self.vmwriter.write_pop('temp', 0) # temp[0] store useless value self._require_sym(';') @record_non_terminal('letStatement') def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' self.compile_var_name() var_name = self._current_token array = (self._next_token == '[') if array: self.compile_array_subscript( var_name) # push (array base + subscript) self._require_sym('=') self.compile_expression() # push expression value self._require_sym(';') if array: self.vmwriter.write_pop('temp', 1) # pop exp value to temp[1] self.vmwriter.write_pop('pointer', 1) # that = array base + subscript self.vmwriter.write_push('temp', 1) self.vmwriter.write_pop('that', 0) else: self.assign_variable(var_name) kind_segment = { 'static': 'static', 'field': 'this', 'argument': 'argument', 'var': 'local' } def assign_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_pop(self.kind_segment[kind], index) def load_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_push(self.kind_segment[kind], index) label_num = 0 @record_non_terminal('whileStatement') def compile_while(self): # 'while' '(' expression ')' '{' statements '}' start_label = 'WHILE_START_' + str(self.label_num) end_label = 'WHILE_END_' + str(self.label_num) self.label_num += 1 self.vmwriter.write_label(start_label) self.compile_cond_expression(start_label, end_label) @record_non_terminal('ifStatement') def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? else_label = 'IF_ELSE_' + str(self.label_num) end_label = 'IF_END_' + str(self.label_num) self.label_num += 1 self.compile_cond_expression(end_label, else_label) # else clause if self._next_token == KW_ELSE: self._require_kw(KW_ELSE) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_label(end_label) def compile_cond_expression(self, goto_label, end_label): self._require_brackets('()', self.compile_expression) self.vmwriter.write_arithmetic('not') self.vmwriter.write_if(end_label) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_goto(goto_label) # meet self.vmwriter.write_label(end_label) @record_non_terminal('returnStatement') def compile_return(self): # 'return' expression? ';' if self._sub_kind == KW_CONSTRUCTOR: self._require_kw(KW_THIS) # constructor must return 'this' self.vmwriter.write_push('pointer', 0) elif self._next_token != ';': self.compile_expression() else: if self._ret_type != KW_VOID: self._traceback('expect return ' + self._ret_type) self.vmwriter.write_push('constant', 0) self._require_sym(';') self.vmwriter.write_return() ########################## # expression compilation # ########################## unary_ops = {'-': 'neg', '~': 'not'} binary_ops = { '+': 'add', '-': 'sub', '*': None, '/': None, '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq' } @record_non_terminal('expression') def compile_expression(self): # term (op term)* self.compile_term() while self._next_token in self.binary_ops: self._advance() if self._current_tok_type != T_SYMBOL: self._error(expect_types=(T_SYMBOL, )) op = self._current_token self.compile_term() self.compile_binaryop(op) def compile_binaryop(self, op): if op == '*': self.vmwriter.write_call('Math.multiply', 2) elif op == '/': self.vmwriter.write_call('Math.divide', 2) else: self.vmwriter.write_arithmetic(self.binary_ops[op]) kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) @record_non_terminal('term') def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term if self._next_token == '(': self._require_brackets('()', self.compile_expression) else: self._advance() tok = self._current_token tok_type = self._current_tok_type if tok_type == T_KEYWORD and tok in self.kw_consts: self.compile_kw_consts(tok) elif tok_type == T_INTEGER: self.vmwriter.write_push('constant', tok) elif tok_type == T_STRING: self.compile_string(tok) elif tok_type == T_ID: if self._next_token in '(.': self.compile_subroutine_call() elif self._next_token == '[': self.check_var_name(tok) self.compile_array_subscript(tok) self.vmwriter.write_pop('pointer', 1) self.vmwriter.write_push('that', 0) else: self.check_var_name(tok) self.load_variable(tok) elif tok_type == T_SYMBOL and tok in self.unary_ops: self.compile_term() self.vmwriter.write_arithmetic(self.unary_ops[tok]) else: self._error(expect='term') # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kw_consts(self, kw): if kw == KW_THIS: self.vmwriter.write_push('pointer', 0) elif kw == KW_TRUE: self.vmwriter.write_push('constant', 1) self.vmwriter.write_arithmetic('neg') else: self.vmwriter.write_push('constant', 0) def compile_string(self, string): self.vmwriter.write_push('constant', len(string)) self.vmwriter.write_call('String.new', 1) for char in string: self.vmwriter.write_push('constant', ord(char)) self.vmwriter.write_call('String.appendChar', 2) def compile_subroutine_call(self): # subroutineName '(' expressionList ')' | # (className | varName) '.' subroutineName '(' expressionList ')' ## the first element of structure has already been compiled. fn_name, num_args = self.compile_call_name() self._require_sym('(') num_args = self.compile_expressionlist(num_args) self._require_sym(')') self.vmwriter.write_call(fn_name, num_args) def compile_call_name(self): # the fisrt name of subroutine call could be (className or varName) if # it is followed by '.', or subroutineName if followed by '('. # return name of function call and num_args (1: means pushing this, 0: # means don't) if self._current_tok_type != T_ID: self._error(expect_types=(T_ID, )) name = self._current_token if self._next_token == '.': self._require_sym('.') self.compile_subroutine_name() sub_name = self._current_token if (name in self.symboltable.all_class_types() or name in SymbolTable.builtIn_class or name == self._class_name): return '.'.join((name, sub_name)), 0 # className else: self.check_var_name(name) # varName with class type type = self.symboltable.typeof(name) if type in SymbolTable.builtIn_types: return self._error(expect='class instance or class', get=type) self.load_variable(name) return '.'.join((type, sub_name)), 1 elif self._next_token == '(': self.vmwriter.write_push('pointer', 0) # push this to be 1st arg return '.'.join((self._class_name, name)), 1 # subroutineName @record_non_terminal('expressionList') def compile_expressionlist(self, num_args): # (expression (',' expression)*)? if self._next_token != ')': self.compile_expression() num_args += 1 while self._next_token != ')': self._require_sym(',') self.compile_expression() num_args += 1 return num_args def compile_array_subscript(self, var_name): # varName '[' expression ']' self.check_var_name(var_name, 'Array') self._require_brackets( '[]', self.compile_expression) # push expression value self.load_variable(var_name) self.vmwriter.write_arithmetic('add') # base + subscript def _check_EOF(self): if not self.tokenizer.has_more_tokens(): self._traceback("Unexpected EOF.") def _error(self, expect_toks=(), expect_types=(), expect=None, get=None): if expect is None: exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks)) exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types) exp_type = ' or '.join(exp_types) if exp_tok and exp_type: expect = ' or '.join(exp_tok, exp_type) else: expect = exp_tok + exp_type if get is None: get = self._current_token me = 'Expect {0} but get "{1}"'.format(expect, get) return self._traceback(me) def _traceback(self, message): if DEBUG: print('--------------------------------------------') print(self.symboltable) print(self.symboltable.all_class_types()) print('--------------------------------------------') file_info = 'file: "{0}"'.format(self.tokenizer.filename) line_info = 'line {0}'.format(self.tokenizer.line_count) raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))
class CompilationEngine: _OPEN_PARENTHESIS = "\(" _CLOSE_PARENTHESIS = "\)" _OPEN_BRACKET = "\[" _CLOSE_BRACKET = "\]" _DOT = "\." _OPS = "\+|-|\*|\/|&|\||<|>|=" def __init__(self, in_address): self.tokenizer = Tokenizer(in_address) self.curr_token = self.tokenizer.get_current_token() self.out_address = in_address.replace(".jack", ".xml") self.output = "" self.indent = 0 self.compile_class() def write_file(self): with open(self.out_address, 'w') as f: f.write(self.output) def write(self, to_write): """ Writes to the output, with indentation. :param to_write: The string to write """ self.output += (self.indent * " ") + to_write + "\n" def compile_class(self): """ Compiles a complete class. """ def comp_class(): self.eat("class") self.eat(NAME_REG) self.eat("{") self.compile_class_var_dec() self.compile_subroutine() self.eat("}") self.wrap("class", comp_class) def compile_class_var_dec(self): """ Compiles a static or field declaration. :return: """ var_type_reg = "static|field" if self.peek_token(var_type_reg): self.wrap("classVarDec", self.__class_var_dec) self.compile_class_var_dec() def __class_var_dec(self): """ Compiles a single class var declaration. """ var_type_reg = "static|field" # (static|field) var_type = self.curr_token.get_token() self.eat(var_type_reg) # type self.__compile_type(False) # Compile varName combo until no more "," self.__single_var() self.eat(";") def __single_var(self): """ Compiles a single set of variables separated by commas. """ # varName self.eat(NAME_REG) if self.peek_token(","): self.eat(",") self.__single_var() def __compile_type(self, for_function): """ Compiles a type for a function or variable, determined by a received boolean value. :param for_function: True if is type of function, false otherwise. :return: """ type_reg = r"int|char|boolean|[A-Za-z_]\w*" if for_function: type_reg += "|void" self.eat(type_reg) def compile_subroutine(self): """ Compiles a complete method, function or constructor. :return: """ sub_regex = "(constructor|function|method)" def subroutine_dec(): self.eat(sub_regex) self.__compile_type(True) # subroutine name self.__compile_name() self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_parameter_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.wrap("subroutineBody", subroutine_body) def subroutine_body(): self.eat("{") if self.peek_token("var"): self.compile_var_dec() self.compile_statements() self.eat("}") if self.peek_token(sub_regex): self.wrap("subroutineDec", subroutine_dec) # Handle next subroutine if there is one self.compile_subroutine() def __compile_name(self): if self.peek_type() == IDENTIFIER: self.eat(NAME_REG) else: print("ERROR: Identifier Expected") exit(-1) def compile_parameter_list(self): """ Compiles a possibly empty parameter list, not including the enclosing () :return: """ self.wrap("parameterList", self.__params) def __params(self): type_reg = r"int|char|boolean|[A-Za-z_]\w*" if self.peek_token(type_reg): self.__compile_type(False) self.eat(NAME_REG) if self.peek_token(","): self.eat(",") self.__params() def compile_var_dec(self): """ Compiles a var declaration. :return: """ self.wrap("varDec", self.__comp_var_dec) if self.peek_token("var"): self.compile_var_dec() def __comp_var_dec(self): self.eat("var") self.__compile_type(False) self.__single_var() self.eat(";") def compile_statements(self): """ Compiles a sequence of statements, not including the enclosing {} :return: """ def statement(): """ Determines the type of statement and compiles it. Calls itself afterwards to check for more statements. :return: """ # statement_reg = "let|if|while|do|return" # if self.peek_token(statement_reg): if self.peek_token("let"): self.compile_let() statement() if self.peek_token("if"): self.compile_if() statement() if self.peek_token("while"): self.compile_while() statement() if self.peek_token("do"): self.compile_do() statement() if self.peek_token("return"): self.compile_return() statement() self.wrap("statements", statement) def compile_do(self): """ Compiles a do statement :return: """ def do(): self.eat("do") self.__subroutine_call() self.eat(";") self.wrap("doStatement", do) def __comp_do(self): self.eat("do") self.__subroutine_call() self.eat(";") def compile_let(self): """ Compiles a let statement :return: """ self.wrap("letStatement", self.__comp_let) def __comp_let(self): self.eat("let") self.__compile_name() # Determine [expression] if self.peek_token(CompilationEngine._OPEN_BRACKET): self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) self.eat("=") self.compile_expression() self.eat(";") def compile_while(self): """ Compiles a while statement. :return: """ def comp_while(): self.eat("while") self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") self.compile_statements() self.eat("}") self.wrap("whileStatement", comp_while) def compile_return(self): """ Compiles a return statement. :return: """ def comp_return(): self.eat("return") # if next is expression: if self.is_term(): self.compile_expression() self.eat(";") self.wrap("returnStatement", comp_return) def compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. :return: """ def comp_if(): self.eat("if") self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") # self.indent += 1 self.compile_statements() self.eat("}") # Handle else: if self.peek_token("else"): self.eat("else") self.eat("{") self.compile_statements() self.eat("}") self.wrap("ifStatement", comp_if) def compile_expression(self): """ Compiles an expression. :return: """ def comp_expression(): self.compile_term() # Case: term op term if self.peek_token(CompilationEngine._OPS): self.eat(CompilationEngine._OPS) self.compile_term() self.wrap("expression", comp_expression) def compile_term(self): """ Compiles a term. :return: """ def term(): curr_type = self.peek_type() is_const = curr_type == STRING_CONST or \ curr_type == INT_CONST or \ curr_type == KEYWORD # Case: term is integerConstant or stringConstant or # keywordConstant if is_const: self.write(self.tokenizer.get_current_token().get_xml_wrap()) self.__advance_token() # Case: token is a varName or a subroutineName elif curr_type == IDENTIFIER: # self.write(self.tokenizer.get_current_token().get_xml_wrap()) # self.tokenizer.advance() # Case: varName [ expression ] if self.peek_next(CompilationEngine._OPEN_BRACKET): self.__var_name_array() # Case: subroutineCall: elif self.peek_next( CompilationEngine._OPEN_PARENTHESIS) or self.peek_next( CompilationEngine._DOT): self.__subroutine_call() else: self.eat(NAME_REG) # Case: ( expression ) elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS): self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) # Case: unaryOp term elif self.peek_token("-|~"): self.eat("-|~") self.compile_term() else: print("Error: Incorrect Term") exit(-1) self.wrap("term", term) def __var_name_array(self): """ Handles the case of varName[expression] :return: """ self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) def is_term(self): curr_type = self.peek_type() return curr_type == STRING_CONST or curr_type == INT_CONST or \ curr_type == KEYWORD or curr_type == IDENTIFIER or \ self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_token(CompilationEngine._OPS) def __subroutine_call(self): if self.curr_token.get_type() == IDENTIFIER: # self.write(self.curr_token.get_xml_wrap()) # self.__advance_token() if self.peek_next(CompilationEngine._OPEN_PARENTHESIS): self.__subroutine_name() elif self.peek_next(CompilationEngine._DOT): self.__object_subroutine_call() else: print("Error: ( or . expected") exit(-1) def __object_subroutine_call(self): self.eat(NAME_REG) self.eat(CompilationEngine._DOT) self.__subroutine_name() def __subroutine_name(self): """ Handles the case of subroutineName(expressionList) :return: """ if self.curr_token.get_type() == IDENTIFIER: self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) def compile_expression_list(self): """ Compiles a possibly empty list of comma separated expressions :return: """ def exp_list(): if self.is_term(): self.compile_expression() while self.peek_token(","): self.eat(",") self.compile_expression() self.wrap("expressionList", exp_list) def wrap(self, section_name, func): """ Wraps a program structure block with the section_name, and executes its function :param section_name: The name of the section :param func: The function to perform :return: """ self.write("<{}>".format(section_name)) self.indent += 2 func() self.indent -= 2 self.write("</{}>".format(section_name)) def eat(self, token): """ Handles advancing and writing terminal tokens. Will exit the program if an error occurs. :param token: The regex of the token to compare :return: """ if re.match(token, self.curr_token.get_token()): self.write(self.curr_token.get_xml_wrap()) self.__advance_token() else: # if self.tokenizer.get_current_token() != token: print("Error: Expected " + token) exit(-1) def peek_token(self, compare_next): """ :param compare_next: The regex to compare. :return: True if the current token matches the regex, False otherwise. """ if self.curr_token: return re.match(compare_next, self.curr_token.get_token()) return False def peek_type(self): """ :return: the type of the current token """ return self.curr_token.get_type() def peek_next(self, comp): next_token = self.tokenizer.get_next_token() # Case: There actually is a next token if next_token: return re.match(comp, self.tokenizer.get_next_token().get_token()) return False def __advance_token(self): self.tokenizer.advance() if self.tokenizer.has_more_tokens(): self.curr_token = self.tokenizer.get_current_token()
class Compiler: def __init__(self, inpath, outpath): self.tokenizer = Tokenizer(inpath) XMLWriter.set_filepath(outpath) if self.tokenizer.has_more_tokens(): self.compile_class() XMLWriter.close() def _write_current_terminal(self): XMLWriter.write_terminal(self._current_token, self._current_tok_tag) def _advance(self): self._check_EOF() self.tokenizer.advance() type_kws = (KW_INT, KW_CHAR, KW_BOOLEAN) kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) @property def _current_token(self): t_type = self.tokenizer.token_type return (self.tokenizer.keyword if t_type == T_KEYWORD else self.tokenizer.symbol if t_type == T_SYMBOL else self.tokenizer.identifier if t_type == T_ID else self.tokenizer.intval if t_type == T_INTEGER else self.tokenizer.stringval) @property def _current_tok_type(self): return self.tokenizer.token_type @property def _current_tok_tag(self): return token_tags[self._current_tok_type] @property def _next_token(self): """return raw next_token in the tokenizer""" return str(self.tokenizer.next_token) def _require_token(self, tok_type, token=None): """Check whether the next_token(terminal) in the tokenizer meets the requirement (specific token or just token type). If meets, tokenizer advances (update current_token and next_token) and terminal will be writed into outfile; If not, report an error.""" self._advance() if token and self._current_token != token: return self._error(expect_toks=(token,)) elif self._current_tok_type != tok_type: return self._error(expect_types=(tok_type,)) self._write_current_terminal() def _require_id(self): return self._require_token(T_ID) def _require_kw(self, token): return self._require_token(T_KEYWORD, token=token) def _require_sym(self, token): return self._require_token(T_SYMBOL, token=token) def _require_brackets(self, brackets, procedure): front, back = brackets self._require_sym(front) procedure() self._require_sym(back) def _fol_by_class_vardec(self): return self._next_token in (KW_STATIC, KW_FIELD) def _fol_by_subroutine(self): return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _fol_by_vardec(self): return self._next_token == KW_VAR ######################### # structure compilation # ######################### # the compilation of three types of name might seem redundant here, but # it was for abstraction and later code generation in project 11. def compile_class_name(self): self._require_id() def compile_subroutine_name(self): self._require_id() def compile_var_name(self): self._require_id() def compile_type(self, advanced=False, expect='type'): # int, string, boolean or identifier(className) if advanced is False: self._advance() if self._current_token in self.type_kws: return self._write_current_terminal() elif self._current_tok_type == T_ID: return self._write_current_terminal() else: return self._error(expect=expect) def compile_void_or_type(self): # void or type self._advance() if self._current_token == KW_VOID: self._write_current_terminal() else: self.compile_type(True, '"void" or type') @record_non_terminal('class') def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' self._require_kw(KW_CLASS) self.compile_class_name() self._require_sym('{') while self._fol_by_class_vardec(): self.compile_class_vardec() while self._fol_by_subroutine(): self.compile_subroutine() self._advance() if self._current_token != '}': self._traceback("Except classVarDec or subroutineDec.") self._write_current_terminal() def compile_declare(self): self._advance() self._write_current_terminal() # type varName (',' varName)* ';' self.compile_type() self.compile_var_name() # compile ',' or ';' self._advance() while self._current_token == ',': self._write_current_terminal() self.compile_var_name() self._advance() if self._current_token != ';': return self._error((',', ';')) self._write_current_terminal() @record_non_terminal('classVarDec') def compile_class_vardec(self): # ('static|field') type varName (',' varName)* ';' self.compile_declare() @record_non_terminal('subroutineDec') def compile_subroutine(self): # ('constructor'|'function'|'method') # ('void'|type) subroutineName '(' parameterList ')' subroutineBody self._advance() self._write_current_terminal() # ('constructor'|'function'|'method') self.compile_void_or_type() self.compile_subroutine_name() self._require_brackets('()', self.compile_parameter_list) self.compile_subroutine_body() @record_non_terminal('parameterList') def compile_parameter_list(self): # ((type varName) (',' type varName)*)? if self._next_token == ')': return self.compile_type() self.compile_var_name() while self._next_token != ')': self._require_sym(',') self.compile_type() self.compile_var_name() @record_non_terminal('subroutineBody') def compile_subroutine_body(self): # '{' varDec* statements '}' self._require_sym('{') while self._fol_by_vardec(): self.compile_vardec() self.compile_statements() self._require_sym('}') @record_non_terminal('varDec') def compile_vardec(self): # 'var' type varName (',' varName)* ';' self.compile_declare() ######################### # statement compilation # ######################### @record_non_terminal('statements') def compile_statements(self): # (letStatement | ifStatement | whileStatement | doStatement | # returnStatement)* last_statement = None while self._next_token != '}': self._advance() last_statement = self._current_token if last_statement == 'do': self.compile_do() elif last_statement == 'let': self.compile_let() elif last_statement == 'while': self.compile_while() elif last_statement == 'return': self.compile_return() elif last_statement == 'if': self.compile_if() else: return self._error(expect='statement expression') if STACK[-2] == 'subroutineBody' and last_statement != 'return': self._error(expect='return statement', get=last_statement) @record_non_terminal('doStatement') def compile_do(self): # 'do' subroutineCall ';' self._write_current_terminal() # compile identifier first self._advance() self.compile_subroutine_call() self._require_sym(';') @record_non_terminal('letStatement') def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' self._write_current_terminal() self.compile_var_name() if self._next_token == '[': self._compile_array_subscript() self._require_sym('=') self.compile_expression() self._require_sym(';') @record_non_terminal('whileStatement') def compile_while(self): # 'while' '(' expression ')' '{' statements '}' self._write_current_terminal() self._require_brackets('()', self.compile_expression) self._require_brackets('{}', self.compile_statements) @record_non_terminal('returnStatement') def compile_return(self): # 'return' expression? ';' self._write_current_terminal() if self._next_token != ';': self.compile_expression() self._require_sym(';') @record_non_terminal('ifStatement') def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? self._write_current_terminal() self._require_brackets('()', self.compile_expression) self._require_brackets('{}', self.compile_statements) # else clause if self._next_token == KW_ELSE: self._require_kw(KW_ELSE) self._require_brackets('{}', self.compile_statements) ########################## # expression compilation # ########################## @record_non_terminal('expression') def compile_expression(self): # term (op term)* self.compile_term() while is_op(self._next_token): self.compile_op() self.compile_term() @record_non_terminal('term') def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term if self._next_token == '(': self._require_brackets('()', self.compile_expression) elif self._next_token in set('-~'): self.compile_unaryop() else: self._advance() tok = self._current_token tok_type = self._current_tok_type if tok in self.kw_consts or tok_type in (T_INTEGER, T_STRING): self._write_current_terminal() elif tok_type == T_ID: if self._next_token in '(.': self.compile_subroutine_call() else: self._write_current_terminal() if self._next_token == '[': self._compile_array_subscript() else: self._error(expect='term') def compile_call_name(self): # the fisrt name of subroutine call could be (className or varName) if # it is followed by '.', or subroutineName if followed by '('. if self._current_tok_type != T_ID: self._error(expect_types=(T_ID,)) self._write_current_terminal() # just write it without analysis. # this method will be extended to decide which kind the name is. def compile_subroutine_call(self): # subroutineName '(' expressionList ')' | (className | # varName) '.' subroutineName '(' expressionList ')' ## the first element of structure has already been compiled. self.compile_call_name() if self._next_token == '.': self._require_sym('.') self.compile_subroutine_name() self._require_brackets('()', self.compile_expressionlist) @record_non_terminal('expressionList') def compile_expressionlist(self): # (expression (',' expression)*)? if self._next_token != ')': self.compile_expression() while self._next_token != ')': self._require_sym(',') self.compile_expression() def compile_op(self): # exclude '~' self._advance() if self._current_token == '~': self._traceback('Unexpected operator: ~') self._write_current_terminal() def compile_unaryop(self): self._advance() self._write_current_terminal() # symbol: - or ~ self.compile_term() def _compile_array_subscript(self): # '[' expression ']' self._require_brackets('[]', self.compile_expression) def _check_EOF(self): if not self.tokenizer.has_more_tokens(): self._traceback("Unexpected EOF.") def _error(self, expect_toks=(), expect_types=(), expect=None, get=None): if expect is None: exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks)) exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types) exp_type = ' or '.join(exp_types) if exp_tok and exp_type: expect = ' or '.join(expect_tok, expect_type) else: expect = exp_toks + exp_types if get is None: get = self._current_token me = 'Expect {0} but get "{1}"'.format(expect, get) return self._traceback(me) def _traceback(self, message): file_info = 'file: "{0}"'.format(self.tokenizer.filename) line_info = 'line {0}'.format(self.tokenizer.line_count) raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))