def __init__(self, jack_file): self.vm_writer = VMWriter(jack_file) self.tokenizer = JackTokenizer(jack_file) self.symbol_table = SymbolTable() self.if_index = -1 self.while_index = -1
def __init__(self, filepath, vm_writer): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) self.symbol_table = SymbolTable() self.vmw = vm_writer self.compiled_class_name = None self.label_num = 0
def __init__(self, jack_file, vm_file): self._jack_tokenizer = JackTokenizer(jack_file) self._vm_file = vm_file self._vm_text = '' self._xml_text = '' self._symbol_table = SymbolTable() self._vm_writer = VmWriter(self._vm_file) self._class_name = None self._label_count = 0 self._compiled_class_name = ''
def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.out = open(output_file, 'w') self.token = None self.class_name = None ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file)
def compile(self, input_file, output_file): self._tokenizer = JackTokenizer(input_file) self._output_file = output_file self._offset = 0 while self._tokenizer.has_more_tokens: if self._tokenizer.advance() == VALID_TOKEN: if self._tokenizer.current_token == 'class': tag = 'class' self._open_tag(tag) self._compile_class() self._close_tag(tag) else: line_n = self._tokenizer.line_number raise Exception( f"Class declaration expected. Line {line_n}")
def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension T.xml # if the original extension was not .jack, then append T.xml if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".xml" else: destination_filename = source_filename + ".xml" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename)
def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file)
def main(): if len(sys.argv) != 2: print( "Expected 1 argument (either the .jack file or a directory containing .jack files). Exiting!" ) return is_file_arg = sys.argv[1].endswith(".jack") if is_file_arg: jack_files = [sys.argv[1]] else: jack_files = [ join(sys.argv[1], f) for f in listdir(sys.argv[1]) if f.endswith(".jack") ] for jack_file in jack_files: ce = CompilationEngine(JackTokenizer(jack_file), jack_file.split(".jack")[0] + "Nisarg.xml") ce.compile()
class CompilationEngine(): def __init__(self, jack_file, vm_file): self._jack_tokenizer = JackTokenizer(jack_file) self._vm_file = vm_file self._vm_text = '' self._xml_text = '' self._symbol_table = SymbolTable() self._vm_writer = VmWriter(self._vm_file) self._class_name = None self._label_count = 0 self._compiled_class_name = '' def compile_class(self): self._write_start('class') self._compile_keyword() self._write('IdentifierInfo', 'category: class') self._compiled_class_name = self._compile_identifier() self._compile_symbol() while self._what_next_token([Keyword.STATIC, Keyword.FIELD]): self.compile_class_var_dec() while self._what_next_token( [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]): self.compile_subroutine_dec() self._compile_symbol() self._write_end('class') def compile_class_var_dec(self): self._write_start('classVarDec') token = self._compile_keyword() kind = None if token == Keyword.STATIC: kind = Kind.STATIC elif token == Keyword.FIELD: kind = Kind.FIELD type_token = self._jack_tokenizer.next_token() if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=kind) while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self._compile_var_name(declaration=True, type=type_token, kind=kind) self._compile_symbol() self._write_end('classVarDec') def compile_subroutine_dec(self): self._symbol_table.start_subroutine() self._write_start('subroutineDec') token = self._compile_keyword() if self._jack_tokenizer.next_token() == Keyword.VOID: self._compile_keyword() else: self._jack_tokenizer.next_token() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() if token == Keyword.METHOD: self._symbol_table.define('$this', self._compiled_class_name, Kind.ARG) self.compile_parameter_list() self._compile_symbol() self.compile_subroutine_body(subroutine_name, token) self._write_end('subroutineDec') def compile_parameter_list(self): self._write_start('parameterList') if (self._jack_tokenizer.next_token() in [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN] or self._jack_tokenizer.next_token_type() == Type.IDENTIFIER): type_token = self._jack_tokenizer.next_token() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=Kind.ARG) while self._what_next_token([Symbol.COMMA]): self._compile_symbol() type_token = self._jack_tokenizer.next_token() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=Kind.ARG) self._write_end('parameterList') def compile_subroutine_body(self, subroutine_name, subroutine_token): self._write_start('subroutineBody') self._compile_symbol() local_num = 0 while self._what_next_token([Keyword.VAR]): var_num = self.compile_var_dec() local_num += var_num self._vm_writer.write_function( '%s.%s' % (self._compiled_class_name, subroutine_name), local_num) if subroutine_token == Keyword.METHOD: self._vm_writer.write_push(Segment.ARG, 0) self._vm_writer.write_pop(Segment.POINTER, 0) elif subroutine_token == Keyword.CONSTRUCTOR: self._vm_writer.write_push( Segment.CONST, self._symbol_table.var_count(Kind.FIELD)) self._vm_writer.write_call('Memory.alloc', 1) self._vm_writer.write_pop(Segment.POINTER, 0) elif subroutine_token == Keyword.FUNCTION: pass self.compile_statements() self._compile_symbol() self._write_end('subroutineBody') return local_num def compile_var_dec(self): self._write_start('varDec') self._compile_keyword() type_token = self._jack_tokenizer.next_token() if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() else: self._write('IdentifierInfo', 'category: class') self._compile_identifier() self._compile_var_name(declaration=True, type=type_token, kind=Kind.VAR) var_num = 1 # TODO while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self._compile_var_name(declaration=True, type=type_token, kind=Kind.VAR) var_num += 1 self._compile_symbol() self._write_end('varDec') return var_num def compile_statements(self): self._write_start('statements') while self._what_next_token([ Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO, Keyword.RETURN ]): if self._what_next_token([Keyword.LET]): self.compile_let() elif self._what_next_token([Keyword.IF]): self.compile_if() elif self._what_next_token([Keyword.WHILE]): self.compile_while() elif self._what_next_token([Keyword.DO]): self.compile_do() elif self._what_next_token([Keyword.RETURN]): self.compile_return() self._write_end('statements') def compile_let(self): self._write_start('letStatement') self._compile_keyword() let_var = self._compile_var_name(let=True) if self._what_next_token([Symbol.LEFT_BOX_BRACKET]): self._compile_symbol() self.compile_expression() self._compile_symbol() self._compile_symbol() kind = self._symbol_table.kind_of(let_var) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of(let_var)) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of(let_var)) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of(let_var)) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of(let_var)) self._vm_writer.write_arithmetic(Command.ADD) self._vm_writer.write_pop(Segment.TEMP, 2) self.compile_expression() self._vm_writer.write_push(Segment.TEMP, 2) self._vm_writer.write_pop(Segment.POINTER, 1) self._vm_writer.write_pop(Segment.THAT, 0) self._compile_symbol() else: self._compile_symbol() self.compile_expression() self._compile_symbol() kind = self._symbol_table.kind_of(let_var) if kind == Kind.VAR: self._vm_writer.write_pop(Segment.LOCAL, self._symbol_table.index_of(let_var)) elif kind == Kind.ARG: self._vm_writer.write_pop(Segment.ARG, self._symbol_table.index_of(let_var)) elif kind == Kind.FIELD: self._vm_writer.write_pop(Segment.THIS, self._symbol_table.index_of(let_var)) elif kind == Kind.STATIC: self._vm_writer.write_pop(Segment.STATIC, self._symbol_table.index_of(let_var)) self._write_end('letStatement') def compile_if(self): self._write_start('ifStatement') self._compile_keyword() self._compile_symbol() self.compile_expression() self._compile_symbol() self._vm_writer.write_arithmetic(Command.NOT) l1 = self._new_label() l2 = self._new_label() self._vm_writer.write_if(l1) self._compile_symbol() self.compile_statements() self._compile_symbol() self._vm_writer.write_goto(l2) self._vm_writer.write_label(l1) if self._what_next_token([Keyword.ELSE]): self._compile_keyword() self._compile_symbol() self.compile_statements() self._compile_symbol() self._vm_writer.write_label(l2) self._write_end('ifStatement') def compile_while(self): self._write_start('whileStatement') l1 = self._new_label() l2 = self._new_label() self._compile_keyword() self._vm_writer.write_label(l1) self._compile_symbol() self.compile_expression() self._compile_symbol() self._vm_writer.write_arithmetic(Command.NOT) self._vm_writer.write_if(l2) self._compile_symbol() self.compile_statements() self._compile_symbol() self._vm_writer.write_goto(l1) self._vm_writer.write_label(l2) self._write_end('whileStatement') def compile_do(self): self._write_start('doStatement') self._compile_keyword() if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1): self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() self._vm_writer.write_push(Segment.POINTER, 0) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._compiled_class_name, subroutine_name), arg_num + 1) else: identifier_str = self._jack_tokenizer.next_token() if self._symbol_table.kind_of(identifier_str): instance_name = self._compile_var_name(call=True) self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() kind = self._symbol_table.kind_of(instance_name) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of(instance_name)) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of(instance_name)) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of(instance_name)) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of(instance_name)) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._symbol_table.type_of(instance_name), subroutine_name), arg_num + 1) else: self._write('IdentifierInfo', 'category: class') class_name = self._compile_identifier() self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (class_name, subroutine_name), arg_num) self._compile_symbol() self._write_end('doStatement') self._vm_writer.write_pop(Segment.TEMP, 0) def compile_return(self): self._write_start('returnStatement') self._compile_keyword() if not self._what_next_token([Symbol.SEMI_COLON]): self.compile_expression() else: self._vm_writer.write_push(Segment.CONST, 0) self._compile_symbol() self._vm_writer.write_return() self._write_end('returnStatement') def compile_expression(self): self._write_start('expression') self.compile_term() while self._what_next_token([ Symbol.PLUS, Symbol.MINUS, Symbol.MULTI, Symbol.DIV, Symbol.AND, Symbol.PIPE, Symbol.LESS_THAN, Symbol.GREATER_THAN, Symbol.EQUAL ]): token = self._compile_symbol() self.compile_term() if token == Symbol.PLUS: self._vm_writer.write_arithmetic(Command.ADD) elif token == Symbol.MINUS: self._vm_writer.write_arithmetic(Command.SUB) elif token == Symbol.MULTI: self._vm_writer.write_call('Math.multiply', 2) elif token == Symbol.DIV: self._vm_writer.write_call('Math.divide', 2) elif token == Symbol.AND: self._vm_writer.write_arithmetic(Command.AND) elif token == Symbol.PIPE: self._vm_writer.write_arithmetic(Command.OR) elif token == Symbol.LESS_THAN: self._vm_writer.write_arithmetic(Command.LT) elif token == Symbol.GREATER_THAN: self._vm_writer.write_arithmetic(Command.GT) elif token == Symbol.EQUAL: self._vm_writer.write_arithmetic(Command.EQ) self._write_end('expression') def compile_term(self): self._write_start('term') if self._what_next_token_type([Type.INT_CONST]): value = self._compile_integer_constant() self._vm_writer.write_push(Segment.CONST, value) elif self._what_next_token_type([Type.STRING_CONST]): value = self._compile_string_constant() self._vm_writer.write_push(Segment.CONST, len(value)) self._vm_writer.write_call('String.new', 1) for v in value: self._vm_writer.write_push(Segment.CONST, ord(v)) self._vm_writer.write_call('String.appendChar', 2) elif self._what_next_token([Keyword.NULL]): self._compile_keyword() self._vm_writer.write_push(Segment.CONST, 0) elif self._what_next_token([Keyword.THIS]): self._compile_keyword() self._vm_writer.write_push(Segment.POINTER, 0) elif self._what_next_token([Keyword.TRUE]): self._compile_keyword() self._vm_writer.write_push(Segment.CONST, 0) self._vm_writer.write_arithmetic(Command.NOT) elif self._what_next_token([Keyword.FALSE]): self._compile_keyword() self._vm_writer.write_push(Segment.CONST, 0) elif self._what_next_token_type([Type.IDENTIFIER]): if self._what_next_token([Symbol.LEFT_BOX_BRACKET], 1): self._compile_var_name() self._compile_symbol() self.compile_expression() self._vm_writer.write_arithmetic(Command.ADD) self._vm_writer.write_pop(Segment.POINTER, 1) self._vm_writer.write_push(Segment.THAT, 0) self._compile_symbol() elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET, Symbol.DOT], 1): if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1): self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() self._vm_writer.write_push(Segment.POINTER, 0) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._compiled_class_name, subroutine_name), arg_num + 1) else: identifier_str = self._jack_tokenizer.next_token() if self._symbol_table.kind_of(identifier_str): instance_name = self._compile_var_name(call=True) self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() kind = self._symbol_table.kind_of(instance_name) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of(instance_name)) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of(instance_name)) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of(instance_name)) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of(instance_name)) arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (self._symbol_table.type_of(instance_name), subroutine_name), arg_num + 1) else: self._write('IdentifierInfo', 'category: class') class_name = self._compile_identifier() self._compile_symbol() self._write('IdentifierInfo', 'category: subroutine') subroutine_name = self._compile_identifier() self._compile_symbol() arg_num = self.compile_expression_list() self._compile_symbol() self._vm_writer.write_call( '%s.%s' % (class_name, subroutine_name), arg_num) else: self._compile_var_name() elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET]): self._compile_symbol() self.compile_expression() self._compile_symbol() elif self._what_next_token([Symbol.TILDE]): self._compile_symbol() self.compile_term() self._vm_writer.write_arithmetic(Command.NOT) elif self._what_next_token([Symbol.MINUS]): self._compile_symbol() self.compile_term() self._vm_writer.write_arithmetic(Command.NEG) self._write_end('term') def compile_expression_list(self): self._write_start('expressionList') arg_num = 0 if not self._what_next_token([Symbol.RIGHT_ROUND_BRACKET]): self.compile_expression() arg_num += 1 while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self.compile_expression() arg_num += 1 self._write_end('expressionList') return arg_num def save(self): self._vm_writer.save() def _what_next_token(self, values, index=0): return self._jack_tokenizer.next_token(index) in values def _what_next_token_type(self, values, index=0): return self._jack_tokenizer.next_token_type(index) in values def _compile_symbol(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('symbol', value) return value def _compile_keyword(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('keyword', value) return value def _compile_identifier(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('identifier', value) return value def _compile_integer_constant(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('integerConstant', value) return value def _compile_string_constant(self): self._jack_tokenizer.advance() value = self._jack_tokenizer.token() self._write('stringConstant', value) return value def _compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False): if declaration: self._symbol_table.define(self._jack_tokenizer.next_token(), type, kind) elif let: pass elif call: pass else: kind = self._symbol_table.kind_of( self._jack_tokenizer.next_token()) if kind == Kind.ARG: self._vm_writer.write_push( Segment.ARG, self._symbol_table.index_of( self._jack_tokenizer.next_token())) elif kind == Kind.VAR: self._vm_writer.write_push( Segment.LOCAL, self._symbol_table.index_of( self._jack_tokenizer.next_token())) elif kind == Kind.FIELD: self._vm_writer.write_push( Segment.THIS, self._symbol_table.index_of( self._jack_tokenizer.next_token())) elif kind == Kind.STATIC: self._vm_writer.write_push( Segment.STATIC, self._symbol_table.index_of( self._jack_tokenizer.next_token())) self._write( 'IdentifierInfo', 'declaration: %s, kind: %s, index: %d' % (declaration, self._symbol_table.kind_of(self._jack_tokenizer.next_token()), self._symbol_table.index_of(self._jack_tokenizer.next_token()))) return self._compile_identifier() def _write(self, element, value): self._xml_text += '<{}> {} </{}>\n'.format(element, value, element) def _write_start(self, element): self._xml_text += '<%s>\n' % element def _write_end(self, element): self._xml_text += '</%s>\n' % element def _new_label(self): self._label_count += 1 return 'LABEL_%d' % self._label_count
def analyze(self): for filename in self.files_to_translate: tokenizer = JackTokenizer(filename) compiler = CompilationEngine(tokenizer.tokens) self.__write_out(filename, compiler.xml_output)
class CompilationEngine: """NOTE remember that "is_xxx()" checks on the next token, and load the next token to curr_token before starting sub-methods using "load_next_token()" and you can use values with it """ def __init__(self, jack_file): self.vm_writer = VMWriter(jack_file) self.tokenizer = JackTokenizer(jack_file) self.symbol_table = SymbolTable() self.if_index = -1 self.while_index = -1 # 'class' className '{' classVarDec* subroutineDec* '}' def compile_class(self): #! Beginning of all # * save name of the class and move on self.load_next_token() # 'class' self.class_name = self.load_next_token() # className self.load_next_token() # curr_token = '{' # while next token == 'static' | 'field', while self.is_class_var_dec(): # check next token self.compile_class_var_dec() # classVarDec* # while next_token == constructor | function | method while self.is_subroutine_dec(): self.compile_subroutine() # subroutineDec* self.vm_writer.close() # ('static' | 'field' ) type varName (',' varName)* ';' def compile_class_var_dec(self): kind = self.load_next_token() # curr_token = static | field type = self.load_next_token() # curr_token = type name = self.load_next_token() # curr_token = varName self.symbol_table.define(name, type, kind.upper()) while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, kind.upper()) self.load_next_token() # ';' # next_token = 'constructor' | 'function' | 'method' # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody # subroutineBody: '{' varDec* statements '}' def compile_subroutine(self): subroutine_kind = (self.load_next_token() ) # ('constructor' | 'function' | 'method') self.load_next_token() # ('void' | type) subroutine_name = self.load_next_token() # subroutineName self.symbol_table.start_subroutine() # init subroutine table if subroutine_kind == "method": self.symbol_table.define("instance", self.class_name, "ARG") self.load_next_token() # curr_token '(' self.compile_parameter_list() # parameterList # next_token == ')' when escaped self.load_next_token() # ')' self.load_next_token() # '{' while self.check_next_token() == "var": self.compile_var_dec() # varDec* # NOTE next_token is neither 'var' or ';' # NOTE next_token is statements* (zero or more) # ANCHOR actual writing func_name = f"{self.class_name}.{subroutine_name}" # Main.main num_locals = self.symbol_table.counts["VAR"] # get 'var' count self.vm_writer.write_function(func_name, num_locals) if subroutine_kind == "constructor": num_fields = self.symbol_table.counts["FIELD"] self.vm_writer.write_push("CONST", num_fields) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) elif subroutine_kind == "method": self.vm_writer.write_push("ARG", 0) self.vm_writer.write_pop("POINTER", 0) # NOTE statement starts here self.compile_statements() # statements self.load_next_token() # '} # ( (type varName) (',' type varName)*)? def compile_parameter_list(self): # curr_token == '(' if self.check_next_token() != ")": type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") while self.check_next_token() != ")": self.load_next_token() # ',' type = self.load_next_token() # type name = self.load_next_token() # varName self.symbol_table.define(name, type, "ARG") # NOTE param compilation finishes when next_token == ')' # 'var' type varName (',' varName)* ';' def compile_var_dec(self): self.load_next_token() # 'var' type = self.load_next_token() # type name = self.load_next_token() # # varName self.symbol_table.define(name, type, "VAR") while self.check_next_token() != ";": # (',' varName)* self.load_next_token() # ',' name = self.load_next_token() # varName self.symbol_table.define(name, type, "VAR") self.load_next_token() # ';' # statement* # letStatement | ifStatement | whileStatement | doStatement | returnStatement def compile_statements(self): # if next_token == let | if | while | do | return while self.is_statement(): statement = (self.load_next_token() ) # curr_token == let | if | while | do | return if statement == "let": self.compile_let() elif statement == "if": self.compile_if() elif statement == "while": self.compile_while() elif statement == "do": self.compile_do() elif statement == "return": self.compile_return() # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): var_name = self.load_next_token() # curr_token == varName var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) # if next_token == "[" if self.is_array(): # array assignment self.load_next_token() # curr_token == '[' self.compile_expression() # expression self.load_next_token() # curr_token == ']' self.vm_writer.write_push(var_kind, var_index) self.vm_writer.write_arithmetic("ADD") self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # curr_token == ';' #! POP TEMP and PUSH TEMP location changed self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) else: # regular assignment self.load_next_token() # curr_token == '=' self.compile_expression() # expression self.load_next_token() # ';' self.vm_writer.write_pop(var_kind, var_index) # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? def compile_if(self): # curr_token == if self.if_index += 1 if_index = self.if_index # TODO IF indexes count separately self.load_next_token() # curr_token == '(' self.compile_expression() # expression self.load_next_token() # ')' self.load_next_token() # '{' # S = statement, L = label self.vm_writer.write_if(f"IF_TRUE{if_index}") #! if-goto L1 self.vm_writer.write_goto(f"IF_FALSE{if_index}") #! goto L2 self.vm_writer.write_label(f"IF_TRUE{if_index}") #! label L1 self.compile_statements() # statements #! executing S1 self.vm_writer.write_goto(f"IF_END{if_index}") #! goto END self.load_next_token() # '}' self.vm_writer.write_label(f"IF_FALSE{if_index}") #! label L2 if self.check_next_token() == "else": # ( 'else' '{' statements '}' )? self.load_next_token() # 'else' self.load_next_token() # '{' self.compile_statements() # statements #! executing S2 self.load_next_token() # '}' self.vm_writer.write_label(f"IF_END{if_index}") # 'while' '(' expression ')' '{' statements '}' def compile_while(self): # curr_token == while self.while_index += 1 while_index = self.while_index self.vm_writer.write_label(f"WHILE{while_index}") self.load_next_token() # '(' self.compile_expression() # expression self.vm_writer.write_arithmetic("NOT") # eval false condition first self.load_next_token() # ')' self.load_next_token() # '{' self.vm_writer.write_if(f"WHILE_END{while_index}") self.compile_statements() # statements self.vm_writer.write_goto(f"WHILE{while_index}") self.vm_writer.write_label(f"WHILE_END{while_index}") self.load_next_token() # '}' # 'do' subroutineCall ';' def compile_do(self): # curr_token == do self.load_next_token() #! to sync with compile_term() self.compile_subroutine_call() self.vm_writer.write_pop("TEMP", 0) self.load_next_token() # ';' # 'return' expression? ';' def compile_return(self): # curr_token == return if self.check_next_token() != ";": self.compile_expression() else: self.vm_writer.write_push("CONST", 0) self.vm_writer.write_return() self.load_next_token() # ';' # term (op term)* def compile_expression(self): self.compile_term() # term while self.is_op(): # (op term)* op: str = self.load_next_token() # op self.compile_term() # term if op in ARITHMETIC.keys(): self.vm_writer.write_arithmetic(ARITHMETIC[op]) elif op == "*": self.vm_writer.write_call("Math.multiply", 2) elif op == "/": self.vm_writer.write_call("Math.divide", 2) # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term def compile_term(self): # if next_token == '~' | '-' if self.is_unary_op_term(): unary_op = self.load_next_token() # curr_token == '~' | '-' self.compile_term() # term (recursive) self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op]) # if next_token == '(' => '(' expression ')' elif self.check_next_token() == "(": self.load_next_token() # '(' self.compile_expression() # expression self.load_next_token() # ')' # if next_token == INTEGER(const) elif self.check_next_type() == "INT_CONST": # integerConstant self.vm_writer.write_push("CONST", self.load_next_token()) # ) # if next_token == STRING(const) elif self.check_next_type() == "STRING_CONST": # stringConstant self.compile_string() # if next_token == KEYWORD(const) elif self.check_next_type() == "KEYWORD": # keywordConstant self.compile_keyword() # varName | varName '[' expression ']' | subroutineCall else: #! (varName | varName for expression | subroutine)'s base var_name = self.load_next_token( ) # curr_token = varName | subroutineCall # (e.g. Screen.setColor | show() ) #! next_token == '[' | '(' or '.' | just varName # varName '[' expression ']' if self.is_array(): # if next_token == '[' self.load_next_token() # '[' self.compile_expression() # expression self.load_next_token() # ']' array_kind = self.symbol_table.kind_of(var_name) array_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(CONVERT_KIND[array_kind], array_index) self.vm_writer.write_arithmetic("ADD") self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("THAT", 0) # if next_token == "(" | "." => curr_token == subroutineCall #! if varName is not found, assume class or function name elif self.is_subroutine_call(): # NOTE curr_token == subroutineName | className | varName self.compile_subroutine_call() # varName else: # curr_token == varName # FIXME cannot catch subroutine call and pass it to 'else' below # TODO error caught on Math.abs() part on Ball.vm var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)] var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) # subroutineCall: subroutineName '(' expressionList ')' | # ( className | varName) '.' subroutineName '(' expressionList ')' # e.g.) (do) game.run() # ! in case of 'do' order is different from 'let game = Class.new()' def compile_subroutine_call(self): # NOTE curr_token == subroutineName | className | varName subroutine_caller = self.get_curr_token() function_name = subroutine_caller # _next_token() # FIXME now it loads '.' or '(' # func_name = identifier number_args = 0 #! '.' or '(' 2 cases if self.check_next_token() == ".": self.load_next_token() # curr_token == '.' subroutine_name = self.load_next_token( ) # curr_token == subroutineName type = self.symbol_table.type_of(subroutine_caller) if type != "NONE": # it's an instance kind = self.symbol_table.kind_of(subroutine_caller) index = self.symbol_table.index_of(subroutine_caller) self.vm_writer.write_push(CONVERT_KIND[kind], index) function_name = f"{type}.{subroutine_name}" number_args += 1 else: # it's a class class_name = subroutine_caller function_name = f"{class_name}.{subroutine_name}" elif self.check_next_token() == "(": subroutine_name = subroutine_caller function_name = f"{self.class_name}.{subroutine_name}" number_args += 1 self.vm_writer.write_push("POINTER", 0) self.load_next_token() # '(' number_args += self.compile_expression_list() # expressionList self.load_next_token() # ')' self.vm_writer.write_call(function_name, number_args) # (expression (',' expression)* )? def compile_expression_list(self): number_args = 0 if self.check_next_token() != ")": number_args += 1 self.compile_expression() while self.check_next_token() != ")": number_args += 1 self.load_next_token() # curr_token == ',' self.compile_expression() return number_args def compile_string(self): string = self.load_next_token() # curr_token == stringConstant self.vm_writer.write_push("CONST", len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) def compile_keyword(self): keyword = self.load_next_token() # curr_token == keywordConstant if keyword == "this": self.vm_writer.write_push("POINTER", 0) else: self.vm_writer.write_push("CONST", 0) if keyword == "true": self.vm_writer.write_arithmetic("NOT") def is_subroutine_call(self): return self.check_next_token() in [".", "("] def is_array(self): return self.check_next_token() == "[" def is_class_var_dec(self): return self.check_next_token() in ["static", "field"] def is_subroutine_dec(self): return self.check_next_token() in ["constructor", "function", "method"] def is_statement(self): return self.check_next_token() in [ "let", "if", "while", "do", "return" ] def is_op(self): return self.check_next_token() in [ "+", "-", "*", "/", "&", "|", "<", ">", "=" ] def is_unary_op_term(self): return self.check_next_token() in ["~", "-"] def check_next_token(self): return self.tokenizer.next_token[1] def check_next_type(self): return self.tokenizer.next_token[0] def get_curr_token(self): return self.tokenizer.curr_token[1] def load_next_token(self): if self.tokenizer.has_more_tokens(): self.tokenizer.advance() # curr_token = next_token return self.tokenizer.curr_token[1] else: return ""
class CompilationEngine(): def __init__(self, filepath, vm_writer): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) self.symbol_table = SymbolTable() self.vmw = vm_writer self.compiled_class_name = None self.label_num = 0 def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.wf.close() def get_new_label(self): self.label_num += 1 return 'LABEL_%d' % self.label_num def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') self.compile_keyword([Tokens.CLASS]) self.compiled_class_name = self.compile_class_name().token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is_class_var_dec(): self.compile_class_var_dec() while self.next_is_subroutine_dec(): self.compile_subroutine_dec() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) kind = None if token == Tokens.STATIC: kind = IdentifierKind.STATIC elif token == Tokens.FIELD: kind = IdentifierKind.FIELD else: self.raise_syntax_error('Unexpected token') type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=kind) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=kind) self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('classVarDec') def compile_var_dec(self): self.write_element_start('varDec') self.compile_keyword(Tokens.VAR) type_token = self.compile_type() var_num = 0 self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('varDec') return var_num def compile_subroutine_dec(self): self.symbol_table.start_subroutine() self.write_element_start('subroutineDec') token = self.compile_keyword( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) if self.tokenizer.see_next() == Tokens.VOID: self.compile_keyword(Tokens.VOID) else: self.compile_type() subroutine_name = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) if token == Tokens.METHOD: self.symbol_table.define('$this', self.compiled_class_name, IdentifierKind.ARG) self.compile_parameter_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_subroutine_body(subroutine_name, token) self.write_element_end('subroutineDec') def compile_subroutine_name(self): self.write_identifier_info('category: subroutine') return self.compile_identifier() def compile_class_name(self): self.write_identifier_info('category: class') return self.compile_identifier() def compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False): if declaration: self.symbol_table.define(self.tokenizer.see_next().token, type, kind) elif let: pass elif call: pass else: kind = self.symbol_table.kind_of(self.tokenizer.see_next().token) if kind == IdentifierKind.ARG: self.vmw.write_push( Segment.ARG, self.symbol_table.index_of( self.tokenizer.see_next().token)) elif kind == IdentifierKind.VAR: self.vmw.write_push( Segment.LOCAL, self.symbol_table.index_of( self.tokenizer.see_next().token)) elif kind == IdentifierKind.FIELD: self.vmw.write_push( Segment.THIS, self.symbol_table.index_of( self.tokenizer.see_next().token)) elif kind == IdentifierKind.STATIC: self.vmw.write_push( Segment.STATIC, self.symbol_table.index_of( self.tokenizer.see_next().token)) self.write_identifier_info( 'declaration: %s, kind: %s, index: %d' % (declaration, self.symbol_table.kind_of(self.tokenizer.see_next().token), self.symbol_table.index_of(self.tokenizer.see_next().token))) return self.compile_identifier() def write_identifier_info(self, value): self.write_element('IdentifierInfo', value) def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [ Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN ] or isinstance(self.tokenizer.see_next(), Identifier): type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) self.write_element_end('parameterList') def compile_subroutine_body(self, subroutine_name, subroutine_dec_token): self.write_element_start('subroutineBody') print subroutine_name, subroutine_dec_token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) local_num = 0 while self.next_is(Tokens.VAR): var_num = self.compile_var_dec() local_num += var_num self.vmw.write_function( "%s.%s" % (self.compiled_class_name, subroutine_name), local_num) if subroutine_dec_token == Tokens.METHOD: self.vmw.write_push(Segment.ARG, 0) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.CONSTRUCTOR: self.vmw.write_push( Segment.CONST, self.symbol_table.var_count(IdentifierKind.FIELD)) self.vmw.write_call('Memory.alloc', 1) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.FUNCTION: pass else: self.raise_syntax_error('Invalid token') self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('subroutineBody') print "=========" for key in self.symbol_table.arg_table: print self.symbol_table.arg_table[ key].type, key, "kind:", self.symbol_table.arg_table[ key].kind, "index:", self.symbol_table.arg_table[key].index return local_num def compile_statements(self): self.write_element_start('statements') while self.next_is_statement(): self.compile_statement() self.write_element_end('statements') def compile_statement(self): if self.next_is(Tokens.LET): self.write_element_start('letStatement') self.compile_keyword(Tokens.LET) let_var = self.compile_var_name(let=True).token if self.next_is(Tokens.LEFT_BOX_BRACKET): self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() # i self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) self.compile_symbol(Tokens.EQUAL) # base address kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(let_var)) # temp_2 <- base + i self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.TEMP, 2) # value self.compile_expression() # set THAT <- base+i self.vmw.write_push(Segment.TEMP, 2) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_pop(Segment.THAT, 0) self.compile_symbol(Tokens.SEMI_COLON) else: self.compile_symbol(Tokens.EQUAL) self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.VAR: self.vmw.write_pop(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.ARG: self.vmw.write_pop(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_pop(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_pop(Segment.STATIC, self.symbol_table.index_of(let_var)) self.write_element_end('letStatement') elif self.next_is(Tokens.IF): self.write_element_start('ifStatement') self.compile_keyword(Tokens.IF) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) l1 = self.get_new_label() l2 = self.get_new_label() self.vmw.write_if(l1) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l2) self.vmw.write_label(l1) if self.next_is(Tokens.ELSE): self.compile_keyword(Tokens.ELSE) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_label(l2) self.write_element_end('ifStatement') elif self.next_is(Tokens.WHILE): self.write_element_start('whileStatement') l1 = self.get_new_label() l2 = self.get_new_label() self.compile_keyword(Tokens.WHILE) self.vmw.write_label(l1) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) self.vmw.write_if(l2) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l1) self.vmw.write_label(l2) self.write_element_end('whileStatement') elif self.next_is(Tokens.DO): self.write_element_start('doStatement') self.compile_keyword(Tokens.DO) self.compile_subroutine_call() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('doStatement') self.vmw.write_pop(Segment.TEMP, 0) elif self.next_is(Tokens.RETURN): self.write_element_start('returnStatement') self.compile_keyword(Tokens.RETURN) if not self.next_is(Tokens.SEMI_COLON): self.compile_expression() else: self.vmw.write_push(Segment.CONST, 0) self.compile_symbol(Tokens.SEMI_COLON) self.vmw.write_return() self.write_element_end('returnStatement') def compile_subroutine_call(self): if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1): subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.vmw.write_push(Segment.POINTER, 0) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call( "%s.%s" % (self.compiled_class_name, subroutinename), argnum + 1) else: identifier_str = self.tokenizer.see_next().token if self.symbol_table.kind_of(identifier_str): instance_name = self.compile_var_name(call=True).token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) kind = self.symbol_table.kind_of(instance_name) if kind == IdentifierKind.ARG: self.vmw.write_push( Segment.ARG, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.VAR: self.vmw.write_push( Segment.LOCAL, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.FIELD: self.vmw.write_push( Segment.THIS, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.STATIC: self.vmw.write_push( Segment.STATIC, self.symbol_table.index_of(instance_name)) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call( "%s.%s" % (self.symbol_table.type_of(instance_name), subroutinename), argnum + 1) else: classname = self.compile_class_name().token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (classname, subroutinename), argnum) def compile_expression_list(self): self.write_element_start('expressionList') argnum = 0 if not self.next_is(Tokens.RIGHT_ROUND_BRACKET): self.compile_expression() argnum += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_expression() argnum += 1 self.write_element_end('expressionList') return argnum def compile_expression(self): self.write_element_start('expression') self.compile_term() while self.next_is([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL ]): op_token = self.compile_symbol([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL ]) self.compile_term() if op_token == Tokens.PLUS: self.vmw.write_arithmetic(Command.ADD) elif op_token == Tokens.MINUS: self.vmw.write_arithmetic(Command.SUB) elif op_token == Tokens.MULTI: self.vmw.write_call('Math.multiply', 2) elif op_token == Tokens.DIV: self.vmw.write_call('Math.divide', 2) elif op_token == Tokens.AND: self.vmw.write_arithmetic(Command.AND) elif op_token == Tokens.PIPE: self.vmw.write_arithmetic(Command.OR) elif op_token == Tokens.LESS_THAN: self.vmw.write_arithmetic(Command.LT) elif op_token == Tokens.GREATER_THAN: self.vmw.write_arithmetic(Command.GT) elif op_token == Tokens.EQUAL: self.vmw.write_arithmetic(Command.EQ) self.write_element_end('expression') def compile_term(self): self.write_element_start('term') if self.next_type_is(TokenType.INT_CONST): value_str = self.compile_integer_constant() self.vmw.write_push(Segment.CONST, value_str) elif self.next_type_is(TokenType.STRING_CONST): self.compile_string_constant() elif self.next_is(Tokens.NULL): self.compile_keyword(Tokens.NULL) self.vmw.write_push(Segment.CONST, 0) elif self.next_is(Tokens.THIS): self.compile_keyword(Tokens.THIS) self.vmw.write_push(Segment.POINTER, 0) elif self.next_is(Tokens.TRUE): self.compile_keyword(Tokens.TRUE) self.vmw.write_push(Segment.CONST, 0) self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.FALSE): self.compile_keyword(Tokens.FALSE) self.vmw.write_push(Segment.CONST, 0) elif self.next_type_is(TokenType.IDENTIFIER): if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1): var_name = self.compile_var_name().token self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_push(Segment.THAT, 0) self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1): self.compile_subroutine_call() else: self.compile_var_name() elif self.next_is(Tokens.LEFT_ROUND_BRACKET): self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) elif self.next_is(Tokens.TILDE): self.compile_symbol(Tokens.TILDE) self.compile_term() self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.MINUS): self.compile_symbol(Tokens.MINUS) self.compile_term() self.vmw.write_arithmetic(Command.NEG) else: self.raise_syntax_error('') self.write_element_end('term') def next_type_is(self, token_type): return self.tokenizer.see_next().type == token_type def compile_type(self): type_token = self.tokenizer.see_next() if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]): self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]) else: self.compile_class_name() return type_token def next_is_statement(self): return self.next_is( [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]) def next_is(self, tokens, idx=0): if type(tokens) == list: return self.tokenizer.see_next(idx=idx) in tokens else: return self.tokenizer.see_next(idx=idx) == tokens def next_is_class_var_dec(self): return self.next_is([Tokens.STATIC, Tokens.FIELD]) def next_is_subroutine_dec(self): return self.next_is( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) def compile_symbol(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_keyword(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, Identifier): identifier_str = self.tokenizer.current_token.token_escaped self.write_element('identifier', identifier_str) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerConstant): self.write_element('integerConstant', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token.token_escaped else: self.raise_syntax_error('') def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringConstant): string = self.tokenizer.current_token.token self.write_element('stringConstant', self.tokenizer.current_token.token_escaped) self.vmw.write_push(Segment.CONST, len(string)) self.vmw.write_call('String.new', 1) for c in string: self.vmw.write_push(Segment.CONST, ord(c)) self.vmw.write_call('String.appendChar', 2) else: self.raise_syntax_error('') def write_element(self, elem_name, value): self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name)) def write_element_start(self, elem_name): self.wf.write('<%s>\n' % elem_name) def write_element_end(self, elem_name): self.wf.write('</%s>\n' % elem_name) def raise_syntax_error(self, msg): raise Exception('%s' % msg)
class CompilationEngine: def __init__(self): self._tokenizer = None self._output_file = None self._offset = None @property def _token_n_type(self): return self._tokenizer.current_token, self._tokenizer.token_type @property def _valid_types(self): return 'int', 'char', 'boolean' @property def _valid_statements(self): return 'let', 'if', 'while', 'do', 'return' @property def _valid_operators(self): return '+', '-', '*', '/', '&', '|', '<', '>', '=' def _is_valid_term_start(self, token, ttype): if ttype in (TTypes.INT_CONST, TTypes.STRING_CONST, TTypes.IDENTIFIER): return True elif token in ('true', 'false', 'null', 'this', '(', '-', '~', *self._valid_operators): return True else: return False def _raise_missing(self, symbol): line_n = self._tokenizer.line_number raise Exception(f"{symbol} expected. Line {line_n}") def _open_tag(self, tag, new_line=True): nl = '\n' if new_line else '' pad = ' ' * self._offset self._output_file.write(f"{pad}<{tag}>{nl}") self._offset = self._offset + 2 if new_line else self._offset def _write_token(self, tag, token): if tag == 'symbol': token = self._tokenizer.symbol self._open_tag(tag, new_line=False) self._output_file.write(f" {token} ") self._close_tag(tag, new_line=False) def _close_tag(self, tag, new_line=True): self._offset = self._offset - 2 if new_line else self._offset pad = ' ' * self._offset if new_line else '' self._output_file.write(f"{pad}</{tag}>\n") def _advance(self): self._tokenizer.advance() return self._tokenizer.current_token, self._tokenizer.token_type def compile(self, input_file, output_file): self._tokenizer = JackTokenizer(input_file) self._output_file = output_file self._offset = 0 while self._tokenizer.has_more_tokens: if self._tokenizer.advance() == VALID_TOKEN: if self._tokenizer.current_token == 'class': tag = 'class' self._open_tag(tag) self._compile_class() self._close_tag(tag) else: line_n = self._tokenizer.line_number raise Exception( f"Class declaration expected. Line {line_n}") def _compile_class(self): self._write_token(tag='keyword', token='class') token, ttype = self._advance() if ttype == TTypes.IDENTIFIER: tag = TTypes.IDENTIFIER.value self._write_token(tag, token) token, _ = self._advance() if token == '{': self._write_token(tag='symbol', token=token) token, ttype = self._advance() while token in ('static', 'field'): tag = 'classVarDec' self._open_tag(tag) self._compile_class_var_dec() self._close_tag(tag) token, ttype = self._advance() while token in ('constructor', 'function', 'method'): tag = 'subroutineDec' self._open_tag(tag) self._compile_subroutine() self._close_tag(tag) token, ttype = self._advance() if token == '}': self._write_token(tag='symbol', token=token) else: self._raise_missing('"}"') else: self._raise_missing('"{"') else: line_n = self._tokenizer.line_number raise Exception(f"Invalid class name declaration. Line {line_n}") def _compile_class_var_dec(self): token = self._tokenizer.current_token self._write_token(tag='keyword', token=token) token, ttype = self._advance() if token in self._valid_types or ttype == TTypes.IDENTIFIER: tag = ttype.value self._write_token(tag=tag, token=token) token, ttype = self._advance() v = False while ttype == TTypes.IDENTIFIER: v = True self._write_token(tag='identifier', token=token) token, ttype = self._advance() if token == ',': self._write_token(tag='symbol', token=token) token, ttype = self._advance() continue else: break if not v: self._raise_missing('Valid variable name') if token == ';': self._write_token(tag='symbol', token=token) else: self._raise_missing('";"') else: self._raise_missing('Valid variable type') def _compile_subroutine(self): token = self._tokenizer.current_token self._write_token(tag='keyword', token=token) token, ttype = self._advance() if token in ('void', *self._valid_types) or ttype == TTypes.IDENTIFIER: tag = ttype.value self._write_token(tag=tag, token=token) token, ttype = self._advance() if ttype == TTypes.IDENTIFIER: self._write_token(tag='identifier', token=token) token, ttype = self._advance() if token == '(': self._write_token(tag='symbol', token=token) tag = 'parameterList' self._open_tag(tag) self._compile_parameter_list() self._close_tag(tag) token, _ = self._token_n_type if token == ')': self._write_token(tag='symbol', token=token) self._open_tag('subroutineBody') token, ttype = self._advance() if token == '{': self._write_token(tag='symbol', token=token) tag = 'varDec' while True: token, _ = self._advance() if token == 'var': self._open_tag(tag) self._compile_var_dec() self._close_tag(tag) else: break tag = 'statements' if token in self._valid_statements: self._open_tag(tag) self._compile_statements() self._close_tag(tag) token, _ = self._token_n_type if token == '}': self._write_token(tag='symbol', token=token) self._close_tag('subroutineBody') else: self._raise_missing('"}"') else: self._raise_missing('"{"') else: self._raise_missing('")"') else: self._raise_missing('"("') else: self._raise_missing('Valid subroutine name') else: self._raise_missing('Valid subroutine type') def _compile_parameter_list(self): token, ttype = self._advance() if token == ')': return elif token in self._valid_types or token == TTypes.IDENTIFIER: tag = ttype.value self._write_token(tag=tag, token=token) token, ttype = self._advance() v = False while ttype == TTypes.IDENTIFIER: v = True self._write_token(tag='identifier', token=token) token, ttype = self._advance() if token == ',': self._write_token(tag='symbol', token=token) token, ttype = self._advance() if token in self._valid_types or ttype == TTypes.IDENTIFIER: tag = ttype.value self._write_token(tag=tag, token=token) token, ttype = self._advance() continue elif token == ')': return else: break if not v: self._raise_missing('Valid variable name') else: self._raise_missing('Valid variable type') def _compile_var_dec(self): token, _ = self._token_n_type self._write_token(tag='keyword', token=token) # token == 'var' token, ttype = self._advance() if token in self._valid_types or ttype == TTypes.IDENTIFIER: tag = ttype.value self._write_token(tag=tag, token=token) token, ttype = self._advance() v = False while ttype == TTypes.IDENTIFIER: v = True self._write_token(tag='identifier', token=token) token, _ = self._advance() if token == ',': self._write_token(tag='symbol', token=token) token, ttype = self._advance() continue elif token == ';': self._write_token(tag='symbol', token=token) break else: self._raise_missing('Valid variable declaration') if not v: self._raise_missing('Valid variable name') else: self._raise_missing('Valid variable type') def _compile_statements(self): token, _ = self._token_n_type while token in self._valid_statements: tag = None comp_call = None if token == 'let': tag = 'letStatement' comp_call = self._compile_let if token == 'if': tag = 'ifStatement' comp_call = self._compile_if if token == 'while': tag = 'whileStatement' comp_call = self._compile_while if token == 'do': tag = 'doStatement' comp_call = self._compile_do if token == 'return': tag = 'returnStatement' comp_call = self._compile_return self._open_tag(tag) self._write_token(tag='keyword', token=token) comp_call() self._close_tag(tag) token, _ = self._token_n_type if token in self._valid_statements: continue else: token, _ = self._advance() def _compile_let(self): token, ttype = self._advance() if ttype == TTypes.IDENTIFIER: self._write_token(tag='identifier', token=token) token, _ = self._advance() if token == '[': self._write_token(tag='symbol', token=token) tag = 'expression' self._advance() self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ']': self._write_token(tag='symbol', token=token) token, _ = self._advance() else: self._raise_missing('"]"') if token == '=': self._write_token(tag='symbol', token=token) self._advance() tag = 'expression' self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ';': self._write_token(tag='symbol', token=token) return else: self._raise_missing('";"') else: self._raise_missing('"="') else: self._raise_missing('Valid variable name') def _compile_if(self): token, _ = self._advance() if token == '(': self._write_token(tag='symbol', token=token) self._advance() tag = 'expression' self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ')': self._write_token(tag='symbol', token=token) token, _ = self._advance() if token == '{': self._write_token(tag='symbol', token=token) token, _ = self._advance() tag = 'statements' if token in self._valid_statements: self._open_tag(tag) self._compile_statements() self._close_tag(tag) token, _ = self._token_n_type if token == '}': self._write_token(tag='symbol', token=token) token, _ = self._advance() if token == 'else': self._write_token(tag='keyword', token=token) token, _ = self._advance() if token == '{': self._write_token(tag='symbol', token=token) token, _ = self._advance() tag = 'statements' if token in self._valid_statements: self._open_tag(tag) self._compile_statements() self._close_tag(tag) token, _ = self._token_n_type if token == '}': self._write_token(tag='symbol', token=token) else: self._raise_missing('"}"') else: self._raise_missing('"{"') else: self._raise_missing('"}"') else: self._raise_missing('"{"') else: self._raise_missing('")"') else: self._raise_missing('"("') def _compile_while(self): token, _ = self._advance() if token == '(': self._write_token(tag='symbol', token=token) self._advance() tag = 'expression' self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ')': self._write_token(tag='symbol', token=token) token, _ = self._advance() if token == '{': self._write_token(tag='symbol', token=token) token, ttype = self._advance() tag = 'statements' if token in self._valid_statements: self._open_tag(tag) self._compile_statements() self._close_tag(tag) token, _ = self._token_n_type if token == '}': self._write_token(tag='symbol', token=token) else: self._raise_missing('"}"') else: self._raise_missing('"{"') else: self._raise_missing('")"') else: self._raise_missing('"("') def _compile_do(self): token, ttype = self._advance() if ttype == TTypes.IDENTIFIER: self._write_token(tag='identifier', token=token) token, _ = self._advance() if token == '.': self._write_token(tag='symbol', token=token) token, ttype = self._advance() if ttype == TTypes.IDENTIFIER: self._write_token(tag='identifier', token=token) token, _ = self._advance() else: self._raise_missing('Valid class or variable name') if token == '(': self._write_token(tag='symbol', token=token) tag = 'expressionList' self._open_tag(tag) self._compile_expression_list() self._close_tag(tag) token, _ = self._token_n_type if token == ')': self._write_token(tag='symbol', token=token) token, _ = self._advance() if token == ';': self._write_token(tag='symbol', token=token) return else: self._raise_missing('";"') else: self._raise_missing('")"') else: self._raise_missing('"("') else: self._raise_missing('Subroutine call') def _compile_return(self): token, ttype = self._advance() tag = 'expression' if self._is_valid_term_start(token, ttype): self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ';': self._write_token(tag='symbol', token=token) else: self._raise_missing('";"') def _compile_expression(self): token, ttype = self._token_n_type if self._is_valid_term_start(token, ttype): tag = 'term' self._open_tag(tag) self._compile_term() self._close_tag(tag) token, ttype = self._token_n_type while token in self._valid_operators: self._write_token(tag='symbol', token=token) token, ttype = self._advance() if self._is_valid_term_start(token, ttype): tag = 'term' self._open_tag(tag) self._compile_term() self._close_tag(tag) token, ttype = self._token_n_type else: self._raise_missing('Valid term') def _compile_term(self): token, ttype = self._token_n_type self._write_token(tag=ttype.value, token=token) if token in ('-', '~'): self._advance() tag = 'term' self._open_tag(tag) self._compile_term() self._close_tag(tag) return elif ttype in (TTypes.INT_CONST, TTypes.STRING_CONST) or token in ('true', 'false', 'null', 'this'): self._advance() return if token == '(': tag = 'expression' self._advance() self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ')': self._write_token(tag='symbol', token=token) token, _ = self._advance() else: self._raise_missing('")"') elif ttype == TTypes.IDENTIFIER: token, ttype = self._advance() if token == '[': self._write_token(tag='symbol', token=token) tag = 'expression' self._advance() self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type if token == ']': self._write_token(tag='symbol', token=token) token, _ = self._advance() else: self._raise_missing('"]"') return if token == '.': self._write_token(tag='symbol', token=token) token, ttype = self._advance() if ttype == TTypes.IDENTIFIER: self._write_token(tag='identifier', token=token) token, _ = self._advance() else: self._raise_missing('Valid class or variable name') if token == '(': self._write_token(tag='symbol', token=token) tag = 'expressionList' self._open_tag(tag) self._compile_expression_list() self._close_tag(tag) token, _ = self._token_n_type if token == ')': self._write_token(tag='symbol', token=token) token, _ = self._advance() else: self._raise_missing('")"') def _compile_expression_list(self): token, ttype = self._advance() if self._is_valid_term_start(token, ttype): tag = 'expression' self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type while True: if token == ',': self._write_token(tag='symbol', token=token) token, ttype = self._advance() if self._is_valid_term_start(token, ttype): tag = 'expression' self._open_tag(tag) self._compile_expression() self._close_tag(tag) token, _ = self._token_n_type else: self._raise_missing('Valid expression') if token == ')': break
from sys import argv # For handling file/dir paths from pathlib import Path # Import Analyzer components from compilation_engine import CompilationEngine from jack_tokenizer import JackTokenizer # Get input path in_path = Path(argv[1]) if in_path.is_file(): # Path points to a file # Initialize tokenizer tokenizer = JackTokenizer(in_path) # Initialize compilation engine compilationEngine = CompilationEngine(tokenizer, in_path.with_suffix(".xml")) # Start compilation compilationEngine.start_compilation() elif in_path.is_dir(): # Path points to a directory for item in in_path.iterdir(): if item.is_file(): # Compile every jack file if item.suffix == ".jack": tokenizer = JackTokenizer(item) ci = CompilationEngine(tokenizer, item.with_suffix(".xml"))
def __init__(self, jack_file, xml_file): self._jack_tokenizer = JackTokenizer(jack_file) self._xml_file = xml_file self._xml_text = ''
def build_tokenizer(self): self.tokenizer = JackTokenizer(self.jack_input)
def __init__(self, filepath): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath)
class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # symbol table symbol_table = None # vm writer vm_writer = None # the class name class_name = "" # indicies for if and while loops # start at -1 because we increment before use while_index = -1 if_index = -1 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file) # compiles a complete class and closes the output file def compile_class(self): # class keyword tt, t = self._token_next(True, "KEYWORD", "class") # name of class tt, t = self._token_next(True, "IDENTIFIER") self.class_name = t # open brace tt, t = self._token_next(True, "SYMBOL", "{") # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") # done with compilation; close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # compile the variable declaration # False means this is a class (not a subroutine) self.compile_var_dec(False) # compiles a complete method, function, or constructor def compile_subroutine(self): # start of subroutine self.symbol_table.start_subroutine() # constructor, function, or method keyword tt, type = self._token_next(False, "KEYWORD") # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) # name of the method/function/constructor tt, name = self._token_next(True) name = self.class_name + "." + name # if the type is a method, "define" this as an argument, so the other # argument indexes work correctly if type == "method": self.symbol_table.define("this", self.class_name, SymbolTable.ARG) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # write the function num_locals = self.symbol_table.var_count(self.symbol_table.VAR) self.vm_writer.write_function(name, num_locals) # write any special code at the top of the function if type == "constructor": # code to allocate memory and set "this" size = self.symbol_table.var_count(self.symbol_table.FIELD) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) elif type == "function": # nothing special pass elif type == "method": # put argument 0 into pointer 0 (this) self.vm_writer.write_push(self.vm_writer.ARG, 0) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) else: print "WARNING: Expected constructor, function, or name; got", type # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, type = self._token_next(False) # identifier (variable name) tt, name = self._token_next(True) # the kind is always an arg, since these are all parameters to the # function kind = SymbolTable.ARG # define the variable in the symbol table self.symbol_table.define(name, type, kind) # possible comma tt, t = self._token_next(True) if tt != "SYMBOL" or t != ",": # not a comma; stop processing parameters break self.tokenizer.advance() # compiles a var declaration # if subroutine is true, only the var keyword can be used # if subroutine is false, only the static and field keywords can be used def compile_var_dec(self, subroutine=True): # the keyword to start the declaration tt, kind = self._token_next(False, "KEYWORD") # check for required types if subroutine: if kind == "var": kind = SymbolTable.VAR else: print "WARNING: expecting var, but received %s" % (str(kind)) else: if kind == "static": kind = SymbolTable.STATIC elif kind == "field": kind = SymbolTable.FIELD else: print "WARNING: expecting static or field, but received %s" % (str(kind)) # type of the declaration # could be an identifier or a keyword (int, etc) tt, type = self._token_next(True) # name of the declaration tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # another variable name follows tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break # compiles a do statement def compile_do(self): # do keyword tt, t = self._token_next(False, "KEYWORD", "do") # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # do statements do not have a return value, so eliminate the return # off of the stack self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a let statement def compile_let(self): # let keyword tt, t = self._token_next(False, "KEYWORD", "let") # variable name tt, name = self._token_next(True, "IDENTIFIER") # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # array - write operation array = True # compile the offset expression self.tokenizer.advance() self.compile_expression() # write the base address onto the stack segment, index = self._resolve_symbol(name) self.vm_writer.write_push(segment, index) # add base and offset self.vm_writer.write_arithmetic("add") # we cannot yet put the result into pointer 1, since the read # operation (which hasn't been parsed/computed yet) may use pointer 1 # to read from an arrya value # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() else: array = False # equals sign tt, t = self._token_next(False, "SYMBOL", "=") # expression self.tokenizer.advance() self.compile_expression() if array: # our stack now looks like this: # TOP OF STACK # computed result to store # address in which value should be stored # ... previous stuff ... # pop the computed value to temp 0 self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # pop the array address to pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # put the computed value back onto the stack self.vm_writer.write_push(self.vm_writer.TEMP, 0) # pop to the variable name or the array reference self.vm_writer.write_pop(self.vm_writer.THAT, 0) else: # not an array - pop the expression to the variable segment, index = self._resolve_symbol(name) self.vm_writer.write_pop(segment, index) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a while statement def compile_while(self): # labels for this while loop self.while_index += 1 while_start = "WHILE_START_%d" % (self.while_index) while_end = "WHILE_END_%d" % (self.while_index) # while keyword tt, t = self._token_next(False, "KEYWORD", "while") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # label for the start of the while statement self.vm_writer.write_label(while_start) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto to the end of the loop # to do this, negate and then call if-goto self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(while_end) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") # after the last statement of the while loop # need to jump back up to the top of the loop to evaluate again self.vm_writer.write_goto(while_start) # label at the end of the loop self.vm_writer.write_label(while_end) self.tokenizer.advance() # compiles a return statement def compile_return(self): # return keyword tt, t = self._token_next(False, "KEYWORD", "return") # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() else: # no return expression; return 0 self.vm_writer.write_push(self.vm_writer.CONST, 0) # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.vm_writer.write_return() self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): # it is more efficient in an if-else case to have the else portion first # in the code when testing, but we use the less-efficient but # easier-to-write true-false pattern here # labels for this if statement self.if_index += 1 if_false = "IF_FALSE_%d" % (self.if_index) if_end = "IF_END_%d" % (self.if_index) # if keyword tt, t = self._token_next(False, "KEYWORD", "if") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto the false label # if true, fall through to executing code # if there is no else, then false and end are the same, but having two # labels does not increase code size self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements for true portion self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # goto the end of the if statement at the end of the true portion self.vm_writer.write_goto(if_end) # label for the start of the false portion self.vm_writer.write_label(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") # end label self.vm_writer.write_label(if_end) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() else: # no else portion; only put in a label for false, since end is not # used self.vm_writer.write_label(if_false) # compiles an expression (one or more terms connected by operators) def compile_expression(self): # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator # postfix order - add the next term and then do the operator # the next term self.tokenizer.advance() self.compile_term() # the operator if t == "+": self.vm_writer.write_arithmetic("add") if t == "-": self.vm_writer.write_arithmetic("sub") if t == "=": self.vm_writer.write_arithmetic("eq") if t == ">": self.vm_writer.write_arithmetic("gt") if t == "<": self.vm_writer.write_arithmetic("lt") if t == "&": self.vm_writer.write_arithmetic("and") if t == "|": self.vm_writer.write_arithmetic("or") if t == "*": self.vm_writer.write_call("Math.multiply", 2) if t == "/": self.vm_writer.write_call("Math.divide", 2) else: # no term found; done parsing the expression break # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt == "INT_CONST": self.vm_writer.write_push(self.vm_writer.CONST, t) # advance for the next statement self.tokenizer.advance() elif tt == "STRING_CONST": # after this portion is run, a pointer to a string should be on the # stack # we create a new string of a certain size and then append characters # one by one; each append operation returns the pointer to the same # string # create the string # string is a len, data tuple; not null-terminated size = len(t) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("String.new", 1) # append each character for char in t: self.vm_writer.write_push(self.vm_writer.CONST, ord(char)) self.vm_writer.write_call("String.appendChar", 2) # advance for the next statement self.tokenizer.advance() elif tt == "KEYWORD": if t == "true": # true is -1, which is 0 negated self.vm_writer.write_push(self.vm_writer.CONST, 0) self.vm_writer.write_arithmetic("not") elif t == "false" or t == "null": self.vm_writer.write_push(self.vm_writer.CONST, 0) elif t == "this": self.vm_writer.write_push(self.vm_writer.POINTER, 0) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # postfix order - add the next term and then do the operator # parse the rest of the term self.tokenizer.advance() self.compile_term() # write the unary operation if t == "-": self.vm_writer.write_arithmetic("neg") elif t == "~": self.vm_writer.write_arithmetic("not") elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array - read operation # write the base address onto the stack segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) # compile the offset expression self.tokenizer.advance() self.compile_expression() # add base and offset self.vm_writer.write_arithmetic("add") # put the resulting address into pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # read from that 0 onto the stack self.vm_writer.write_push(self.vm_writer.THAT, 0) # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) else: # unknown print "WARNING: Unknown term expression object:", tt, t # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): num_args = 0 # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() num_args += 1 # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self.tokenizer.advance() else: # not a comma; stop processing parameters break return num_args # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, name1 = self._token_next(False, "IDENTIFIER") # a dot and another name may exist, or it could be a parenthesis name2 = None tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": # the name after the dot tt, name2 = self._token_next(True, "IDENTIFIER") # advance so that we are on the parenthesis self.tokenizer.advance() # determine if this is a method call # three possibilities # - class.func() - function call # - var.func() - method call # - func() - method call on current object if self.symbol_table.contains(name1): method_call = True local_call = False elif name2 == None: method_call = True local_call = True else: method_call = False # if a method call, push variable name1 # this a method call if the symbol table contains name1 and name2 exists # OR name1 is a method in the current object if method_call and local_call: # push the current object onto the stack as a hidden argument self.vm_writer.write_push(self.vm_writer.POINTER, 0) elif method_call and not local_call: # push the variable onto the stack as a hidden argument segment, index = self._resolve_symbol(name1) self.vm_writer.write_push(segment, index) # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") # expression list self.tokenizer.advance() num_args = self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # write the call if method_call and local_call: # methd + <blank> # get the name of the vm function to call classname = self.class_name vm_function_name = classname + "." + name1 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) elif method_call and not local_call: # variable name + method # get the name of the vm function to call classname = self.symbol_table.get(name1)[1] vm_function_name = classname + "." + name2 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) else: # get the name of the vm function to call vm_function_name = name1 + "." + name2 # make the call self.vm_writer.write_call(vm_function_name, num_args) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # convets a symbol table type into a segment type def _type_to_segment(self, type): if type == self.symbol_table.STATIC: return self.vm_writer.STATIC elif type == self.symbol_table.FIELD: return self.vm_writer.THIS elif type == self.symbol_table.ARG: return self.vm_writer.ARG elif type == self.symbol_table.VAR: return self.vm_writer.LOCAL else: print "ERROR: Bad type %s" % (str(type)) # resolves the symbol from the symbol table # the segment and index is returned as a 2-tuple def _resolve_symbol(self, name): kind, type, index = self.symbol_table.get(name) return self._type_to_segment(kind), index
class CompilationEngine(): def __init__(self, filepath, vm_writer): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) self.symbol_table = SymbolTable() self.vmw = vm_writer self.compiled_class_name = None self.label_num = 0 def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.wf.close() def get_new_label(self): self.label_num += 1 return 'LABEL_%d' % self.label_num def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') self.compile_keyword([Tokens.CLASS]) self.compiled_class_name = self.compile_class_name().token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is_class_var_dec(): self.compile_class_var_dec() while self.next_is_subroutine_dec(): self.compile_subroutine_dec() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) kind = None if token == Tokens.STATIC: kind = IdentifierKind.STATIC elif token == Tokens.FIELD: kind = IdentifierKind.FIELD else: self.raise_syntax_error('Unexpected token') type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=kind) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=kind) self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('classVarDec') def compile_var_dec(self): self.write_element_start('varDec') self.compile_keyword(Tokens.VAR) type_token = self.compile_type() var_num = 0 self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR) var_num += 1 self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('varDec') return var_num def compile_subroutine_dec(self): self.symbol_table.start_subroutine() self.write_element_start('subroutineDec') token = self.compile_keyword([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) if self.tokenizer.see_next() == Tokens.VOID: self.compile_keyword(Tokens.VOID) else: self.compile_type() subroutine_name = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) if token == Tokens.METHOD: self.symbol_table.define('$this',self.compiled_class_name,IdentifierKind.ARG) self.compile_parameter_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_subroutine_body(subroutine_name, token) self.write_element_end('subroutineDec') def compile_subroutine_name(self): self.write_identifier_info('category: subroutine') return self.compile_identifier() def compile_class_name(self): self.write_identifier_info('category: class') return self.compile_identifier() def compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False): if declaration: self.symbol_table.define(self.tokenizer.see_next().token, type, kind) elif let: pass elif call: pass else: kind = self.symbol_table.kind_of(self.tokenizer.see_next().token) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(self.tokenizer.see_next().token)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(self.tokenizer.see_next().token)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(self.tokenizer.see_next().token)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(self.tokenizer.see_next().token)) self.write_identifier_info('declaration: %s, kind: %s, index: %d' % ( declaration, self.symbol_table.kind_of(self.tokenizer.see_next().token), self.symbol_table.index_of(self.tokenizer.see_next().token))) return self.compile_identifier() def write_identifier_info(self, value): self.write_element('IdentifierInfo', value) def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] or isinstance( self.tokenizer.see_next(), Identifier): type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) type_token = self.compile_type() self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG) self.write_element_end('parameterList') def compile_subroutine_body(self, subroutine_name, subroutine_dec_token): self.write_element_start('subroutineBody') print subroutine_name,subroutine_dec_token self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) local_num = 0 while self.next_is(Tokens.VAR): var_num = self.compile_var_dec() local_num += var_num self.vmw.write_function("%s.%s" % (self.compiled_class_name, subroutine_name), local_num) if subroutine_dec_token == Tokens.METHOD: self.vmw.write_push(Segment.ARG, 0) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.CONSTRUCTOR: self.vmw.write_push(Segment.CONST, self.symbol_table.var_count(IdentifierKind.FIELD)) self.vmw.write_call('Memory.alloc', 1) self.vmw.write_pop(Segment.POINTER, 0) elif subroutine_dec_token == Tokens.FUNCTION: pass else: self.raise_syntax_error('Invalid token') self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('subroutineBody') print "=========" for key in self.symbol_table.arg_table: print self.symbol_table.arg_table[key].type,key,"kind:",self.symbol_table.arg_table[key].kind,"index:",self.symbol_table.arg_table[key].index return local_num def compile_statements(self): self.write_element_start('statements') while self.next_is_statement(): self.compile_statement() self.write_element_end('statements') def compile_statement(self): if self.next_is(Tokens.LET): self.write_element_start('letStatement') self.compile_keyword(Tokens.LET) let_var = self.compile_var_name(let=True).token if self.next_is(Tokens.LEFT_BOX_BRACKET): self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() # i self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) self.compile_symbol(Tokens.EQUAL) # base address kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(let_var)) # temp_2 <- base + i self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.TEMP, 2) # value self.compile_expression() # set THAT <- base+i self.vmw.write_push(Segment.TEMP, 2) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_pop(Segment.THAT, 0) self.compile_symbol(Tokens.SEMI_COLON) else: self.compile_symbol(Tokens.EQUAL) self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) kind = self.symbol_table.kind_of(let_var) if kind == IdentifierKind.VAR: self.vmw.write_pop(Segment.LOCAL, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.ARG: self.vmw.write_pop(Segment.ARG, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.FIELD: self.vmw.write_pop(Segment.THIS, self.symbol_table.index_of(let_var)) elif kind == IdentifierKind.STATIC: self.vmw.write_pop(Segment.STATIC, self.symbol_table.index_of(let_var)) self.write_element_end('letStatement') elif self.next_is(Tokens.IF): self.write_element_start('ifStatement') self.compile_keyword(Tokens.IF) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) l1 = self.get_new_label() l2 = self.get_new_label() self.vmw.write_if(l1) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l2) self.vmw.write_label(l1) if self.next_is(Tokens.ELSE): self.compile_keyword(Tokens.ELSE) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_label(l2) self.write_element_end('ifStatement') elif self.next_is(Tokens.WHILE): self.write_element_start('whileStatement') l1 = self.get_new_label() l2 = self.get_new_label() self.compile_keyword(Tokens.WHILE) self.vmw.write_label(l1) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_arithmetic(Command.NOT) self.vmw.write_if(l2) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.vmw.write_goto(l1) self.vmw.write_label(l2) self.write_element_end('whileStatement') elif self.next_is(Tokens.DO): self.write_element_start('doStatement') self.compile_keyword(Tokens.DO) self.compile_subroutine_call() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('doStatement') self.vmw.write_pop(Segment.TEMP, 0) elif self.next_is(Tokens.RETURN): self.write_element_start('returnStatement') self.compile_keyword(Tokens.RETURN) if not self.next_is(Tokens.SEMI_COLON): self.compile_expression() else: self.vmw.write_push(Segment.CONST, 0) self.compile_symbol(Tokens.SEMI_COLON) self.vmw.write_return() self.write_element_end('returnStatement') def compile_subroutine_call(self): if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1): subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.vmw.write_push(Segment.POINTER, 0) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (self.compiled_class_name, subroutinename), argnum + 1) else: identifier_str = self.tokenizer.see_next().token if self.symbol_table.kind_of(identifier_str): instance_name = self.compile_var_name(call=True).token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) kind = self.symbol_table.kind_of(instance_name) if kind == IdentifierKind.ARG: self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.VAR: self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.FIELD: self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(instance_name)) elif kind == IdentifierKind.STATIC: self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(instance_name)) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (self.symbol_table.type_of(instance_name), subroutinename), argnum + 1) else: classname = self.compile_class_name().token self.compile_symbol(Tokens.DOT) subroutinename = self.compile_subroutine_name().token self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) argnum = self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.vmw.write_call("%s.%s" % (classname, subroutinename), argnum) def compile_expression_list(self): self.write_element_start('expressionList') argnum = 0 if not self.next_is(Tokens.RIGHT_ROUND_BRACKET): self.compile_expression() argnum += 1 while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_expression() argnum += 1 self.write_element_end('expressionList') return argnum def compile_expression(self): self.write_element_start('expression') self.compile_term() while self.next_is([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL]): op_token = self.compile_symbol([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL]) self.compile_term() if op_token == Tokens.PLUS: self.vmw.write_arithmetic(Command.ADD) elif op_token == Tokens.MINUS: self.vmw.write_arithmetic(Command.SUB) elif op_token == Tokens.MULTI: self.vmw.write_call('Math.multiply', 2) elif op_token == Tokens.DIV: self.vmw.write_call('Math.divide', 2) elif op_token == Tokens.AND: self.vmw.write_arithmetic(Command.AND) elif op_token == Tokens.PIPE: self.vmw.write_arithmetic(Command.OR) elif op_token == Tokens.LESS_THAN: self.vmw.write_arithmetic(Command.LT) elif op_token == Tokens.GREATER_THAN: self.vmw.write_arithmetic(Command.GT) elif op_token == Tokens.EQUAL: self.vmw.write_arithmetic(Command.EQ) self.write_element_end('expression') def compile_term(self): self.write_element_start('term') if self.next_type_is(TokenType.INT_CONST): value_str = self.compile_integer_constant() self.vmw.write_push(Segment.CONST, value_str) elif self.next_type_is(TokenType.STRING_CONST): self.compile_string_constant() elif self.next_is(Tokens.NULL): self.compile_keyword(Tokens.NULL) self.vmw.write_push(Segment.CONST, 0) elif self.next_is(Tokens.THIS): self.compile_keyword(Tokens.THIS) self.vmw.write_push(Segment.POINTER, 0) elif self.next_is(Tokens.TRUE): self.compile_keyword(Tokens.TRUE) self.vmw.write_push(Segment.CONST, 0) self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.FALSE): self.compile_keyword(Tokens.FALSE) self.vmw.write_push(Segment.CONST, 0) elif self.next_type_is(TokenType.IDENTIFIER): if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1): var_name = self.compile_var_name().token self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.vmw.write_arithmetic(Command.ADD) self.vmw.write_pop(Segment.POINTER, 1) self.vmw.write_push(Segment.THAT, 0) self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1): self.compile_subroutine_call() else: self.compile_var_name() elif self.next_is(Tokens.LEFT_ROUND_BRACKET): self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) elif self.next_is(Tokens.TILDE): self.compile_symbol(Tokens.TILDE) self.compile_term() self.vmw.write_arithmetic(Command.NOT) elif self.next_is(Tokens.MINUS): self.compile_symbol(Tokens.MINUS) self.compile_term() self.vmw.write_arithmetic(Command.NEG) else: self.raise_syntax_error('') self.write_element_end('term') def next_type_is(self, token_type): return self.tokenizer.see_next().type == token_type def compile_type(self): type_token = self.tokenizer.see_next() if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]): self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]) else: self.compile_class_name() return type_token def next_is_statement(self): return self.next_is([Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]) def next_is(self, tokens, idx=0): if type(tokens) == list: return self.tokenizer.see_next(idx=idx) in tokens else: return self.tokenizer.see_next(idx=idx) == tokens def next_is_class_var_dec(self): return self.next_is([Tokens.STATIC, Tokens.FIELD]) def next_is_subroutine_dec(self): return self.next_is([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) def compile_symbol(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_keyword(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, Identifier): identifier_str = self.tokenizer.current_token.token_escaped self.write_element( 'identifier', identifier_str ) return self.tokenizer.current_token else: self.raise_syntax_error('') def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerConstant): self.write_element('integerConstant', self.tokenizer.current_token.token_escaped) return self.tokenizer.current_token.token_escaped else: self.raise_syntax_error('') def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringConstant): string = self.tokenizer.current_token.token self.write_element('stringConstant', self.tokenizer.current_token.token_escaped) self.vmw.write_push(Segment.CONST, len(string)) self.vmw.write_call('String.new', 1) for c in string: self.vmw.write_push(Segment.CONST, ord(c)) self.vmw.write_call('String.appendChar', 2) else: self.raise_syntax_error('') def write_element(self, elem_name, value): self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name)) def write_element_start(self, elem_name): self.wf.write('<%s>\n' % elem_name) def write_element_end(self, elem_name): self.wf.write('</%s>\n' % elem_name) def raise_syntax_error(self, msg): raise Exception('%s' % msg)
import glob import sys from pathlib import Path, PurePath from os.path import isfile, isdir, join from jack_tokenizer import JackTokenizer from compilation_engine import CompilationEngine if __name__ == '__main__': if len(sys.argv) > 1: program_path = sys.argv[1] if isfile(program_path): files = [program_path] output_path = Path(program_path).parent elif isdir(program_path): files = glob.glob(join(program_path, '*.jack')) output_path = program_path else: raise FileNotFoundError("[Errno 2] No such file or directory: ", program_path) for file in files: output_file_name = PurePath(file).name.split('.')[0] + '.vm' output_file = Path(output_path, output_file_name) file_tokenizer = JackTokenizer(file) CompilationEngine(file_tokenizer, output_file) else: raise TypeError("1 argument is required: program path, 0 arguments entered")
class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] TYPE_TO_TAG = {'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol'} SYMBOLS_TO_XML_CONVENTION = {'<': '<', '>': '>', '&': '&', '"': '"'} def __init__(self, input_file_path, output_file_path): self.output_file = open(output_file_path, 'w') self.jack_tokenizer = JackTokenizer(input_file_path) if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.output_file.write('<class>\n') # get first token self.jack_tokenizer.advance() self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) if self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</class>') self.output_file.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.output_file.write('<classVarDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() != ';': self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</classVarDec>\n') def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): while self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.output_file.write('<subroutineDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_parameter_list() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('<subroutineBody>\n') self.write_token(self.jack_tokenizer.symbol()) while self.jack_tokenizer.key_word() == 'var': self.compile_var_dec() self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</subroutineBody>\n') self.output_file.write('</subroutineDec>\n') def compile_parameter_list(self): self.output_file.write('<parameterList>\n') if self.jack_tokenizer.symbol() != ')': self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() == ",": self.write_token(self.jack_tokenizer.symbol()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) self.output_file.write('</parameterList>\n') def compile_var_dec(self): self.output_file.write('<varDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() == ",": self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</varDec>\n') def compile_statements(self): self.output_file.write('<statements>\n') while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() self.output_file.write('</statements>\n') def compile_do(self): self.output_file.write('<doStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) self.compile_subroutine_call() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</doStatement>\n') def compile_subroutine_call(self): if self.jack_tokenizer.symbol() == '(': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression_list() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() == '.': self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression_list() self.write_token(self.jack_tokenizer.symbol()) def compile_let(self): self.output_file.write('<letStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) if self.jack_tokenizer.symbol() == '[': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</letStatement>\n') def compile_while(self): self.output_file.write('<whileStatement>\n') self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</whileStatement>\n') def compile_return(self): self.output_file.write('<returnStatement>\n') self.write_token(self.jack_tokenizer.key_word()) if self.jack_tokenizer.symbol() != ';': self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</returnStatement>\n') def compile_if(self): self.output_file.write('<ifStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) if self.jack_tokenizer.key_word() == 'else': self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</ifStatement>\n') def compile_expression(self): self.output_file.write('<expression>\n') self.compile_term() while self.jack_tokenizer.symbol() in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() if symbol in CompilationEngine.SYMBOLS_TO_XML_CONVENTION: symbol = CompilationEngine.SYMBOLS_TO_XML_CONVENTION[symbol] self.write_token(symbol) self.compile_term() self.output_file.write('</expression>\n') def compile_term(self): self.output_file.write('<term>\n') token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) if self.jack_tokenizer.symbol() == '[': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call() elif token_type == 'STRING_CONST': self.write_token(self.jack_tokenizer.string_val()) elif token_type == 'INT_CONST': self.write_token(self.jack_tokenizer.int_val()) elif token_type == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif token_type == 'INT_CONST': self.write_token(self.jack_tokenizer.int_val()) elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() in CompilationEngine.UNARY_OPERATOR_SYMBOLS: self.write_token(self.jack_tokenizer.symbol()) self.compile_term() self.output_file.write('</term>\n') def compile_expression_list(self): self.output_file.write('<expressionList>\n') if self.jack_tokenizer.symbol() != ')': self.compile_expression() while self.jack_tokenizer.symbol() == ',': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.output_file.write('</expressionList>\n')
def __init__(self, input_file_path, output_file_path): self.output_file = open(output_file_path, 'w') self.jack_tokenizer = JackTokenizer(input_file_path) if self.jack_tokenizer.has_more_tokens(): self.compile_class()
class SyntaxParser: def __init__(self, source_filepath): self.source_path = source_filepath self.tokenizer = JackTokenizer(self.source_path) self.class_name = None self.syntax_tree_root = self.compile_class() def get_syntax_tree(self) -> SyntaxTreeNode: return self.syntax_tree_root def __expect_keyword(self, keywords): next_token = self.tokenizer.peek_next() if next_token and next_token.get_type() == TokenType.KEYWORD: tk_value = next_token.get_keyword() if tk_value in keywords: self.tokenizer.advance() return SyntaxTreeNode('keyword', tk_value) def __expect_identifier(self): next_token = self.tokenizer.peek_next() if next_token and next_token.get_type() == TokenType.IDENTIFIER: self.tokenizer.advance() return SyntaxTreeNode('identifier', next_token.get_identifier()) def __expect_symbol(self, symbols): next_token = self.tokenizer.peek_next() if next_token and next_token.get_type() == TokenType.SYMBOL: tk_value = next_token.get_symbol() if tk_value in symbols: self.tokenizer.advance() return SyntaxTreeNode('symbol', tk_value) def __expect_type(self): node = self.__expect_keyword(('int', 'char', 'boolean')) if not node: node = self.__expect_identifier() return node def __expect_void_or_type(self): node = self.__expect_keyword('void') if not node: node = self.__expect_type() return node def __expect_op(self): # '+'|'-'|'*'|'/'|'&'|'|'|'<'|'>'|'=' return self.__expect_symbol(('+', '-', '*', '/', '&', '|', '<', '>', '=')) def __expect_unary_op(self): return self.__expect_symbol(('-', '~')) def compile_class(self): # class: 'class' className '{' classVarDec* subroutineDec* '}' local_root = SyntaxTreeNode('class') # 'class' local_root.add_child(self.__expect_keyword('class'), 'expect keyword class') # className class_name_node = self.__expect_identifier() local_root.add_child(class_name_node, 'expect identifier className') self.class_name = class_name_node.value # '{' local_root.add_child(self.__expect_symbol('{'), 'expect {') # classVarDec* local_root.add_many(self.compile_class_var_dec) # subroutineDec* local_root.add_many(self.compile_subroutine) # '}' local_root.add_child(self.__expect_symbol('}'), 'expect } in class') return local_root def compile_class_var_dec(self): # classVarDec: ('static' | 'field') type varName (',' varName)* ';' # ('static' | 'field') node = self.__expect_keyword(('static', 'field')) if not node: return None local_root = SyntaxTreeNode('classVarDec') local_root.add_child(node) # re.type: 'int' | 'char' | 'boolean' | className # re.className: identifier local_root.add_child(self.__expect_type(), 'expect type') # varName: identifier local_root.add_child(self.__expect_identifier(), 'expect varName') # (',' varName)* while True: node = self.__expect_symbol(',') if node: local_root.add_child(node) local_root.add_child(self.__expect_identifier(), 'expect varName after ,') else: break # ';' local_root.add_child(self.__expect_symbol(';'), 'expect ; in varDec') return local_root def compile_subroutine(self): # ('constructor' | 'function' | 'method') node = self.__expect_keyword(('constructor', 'function', 'method')) if not node: return None local_root = SyntaxTreeNode('subroutineDec') local_root.add_child(node) # ('void' | type) local_root.add_child(self.__expect_void_or_type(), 'expect void or type') # subroutineName local_root.add_child(self.__expect_identifier(), 'expect subroutineName') # '(' local_root.add_child(self.__expect_symbol('('), 'expect )') # parameterList local_root.add_child(self.compile_parameter_list(), 'expect parameterList') # ')' local_root.add_child(self.__expect_symbol(')'), 'expect )') # subroutineBody local_root.add_child(self.compile_subroutine_body(), 'expect subroutineBody') return local_root def compile_parameter_list(self): # ((type varName) (',' type varName)*)? local_root = SyntaxTreeNode('parameterList') # (type varName) node = self.__expect_type() if not node: return local_root local_root.add_child(node) local_root.add_child(self.__expect_identifier(), 'expect varName') # (',' type varName) * while True: node = self.__expect_symbol(',') if node: local_root.add_child(node) local_root.add_child(self.__expect_type(), 'expect type') local_root.add_child(self.__expect_identifier(), 'expect identifier') else: break return local_root def compile_subroutine_body(self): # subroutineBody # '{' varDec* statements '}' local_root = SyntaxTreeNode('subroutineBody') # '{' node = self.__expect_symbol('{') if not node: return None local_root.add_child(node) # varDesc* local_root.add_many(self.compile_var_dec) # statements local_root.add_child(self.compile_statements(), 'expect statements') # '}' local_root.add_child(self.__expect_symbol('}'), 'expect }') return local_root def compile_var_dec(self): # 'var' type varName (',' varName)* ';' node = self.__expect_keyword('var') if not node: return None # 'var' type varName local_node = SyntaxTreeNode('varDec') local_node.add_child(node) local_node.add_child(self.__expect_type(), 'expect type in varDec') local_node.add_child(self.__expect_identifier(), 'expect identifier in varDec') # (',' varName)* while True: node = self.__expect_symbol(',') if node: local_node.add_child(node) local_node.add_child(self.__expect_identifier(), 'expect identifier in varDec') else: break # ; local_node.add_child(self.__expect_symbol(';')) return local_node def compile_statements(self): local_root = SyntaxTreeNode('statements') local_root.add_many(lambda: or_compile(( self.compile_do, self.compile_let, self.compile_while, self.compile_return, self.compile_if, ))) return local_root def compile_do(self): # 'do' subroutineCall ';' node = self.__expect_keyword('do') if not node: return None local_root = SyntaxTreeNode('doStatement') local_root.add_child(node) sub_call = self.compile_subroutine_call() if not sub_call: sys.exit('missing subroutine call in do statement') local_root.add_child(sub_call) # ; local_root.add_child(self.__expect_symbol(';'), 'expect ; in do') return local_root def compile_subroutine_call(self) -> SyntaxTreeNode: # subroutineCall: subroutineName '(' expressionList ')' # | (className | varName) '.' subroutineName '(' expressionList ')' local_root = None next_token = self.tokenizer.peek_next() if next_token.get_type() == TokenType.IDENTIFIER: next2_token = self.tokenizer.peek_next(2) n2_value = next2_token.get_symbol() if n2_value in ('(', '.'): # ok, this is a subroutine call local_root = SyntaxTreeNode('subroutineCall') node = self.__expect_identifier() local_root.add_child(node) next_token = self.tokenizer.peek_next() if next_token.get_symbol() == '.': local_root.add_child(self.__expect_symbol('.')) local_root.add_child(self.__expect_identifier()) local_root.add_child(self.__expect_symbol('(')) local_root.add_child(self.compile_expression_list()) local_root.add_child(self.__expect_symbol(')')) return local_root def __expect_keyword_constant(self): return self.__expect_keyword(('true', 'false', 'null', 'this')) def __expect_integer_constant(self): next_token = self.tokenizer.peek_next() if next_token and next_token.get_type() == TokenType.INT_CONSTANT: self.tokenizer.advance() return SyntaxTreeNode('integerConstant', next_token.get_integer_constant()) def __expect_string_constant(self): next_token = self.tokenizer.peek_next() if next_token and next_token.get_type() == TokenType.STRING_CONSTANT: self.tokenizer.advance() return SyntaxTreeNode('stringConstant', next_token.get_string_constant()) def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' node = self.__expect_keyword('let') if not node: return None local_root = SyntaxTreeNode('letStatement') local_root.add_child(node) local_root.add_child(self.__expect_identifier(), 'expect varName in let') next_token = self.tokenizer.peek_next() if next_token and next_token.get_symbol() == '[': local_root.add_child(self.compile_array_access()) local_root.add_child(self.__expect_symbol('='), 'expect = in let') local_root.add_child(self.compile_expression(), 'expect expression in let statement') # ; local_root.add_child(self.__expect_symbol(';'), 'expect ; in let') return local_root def compile_array_access(self): local_root = SyntaxTreeNode('arrayAccess') local_root.add_child(self.__expect_symbol('['), 'expect [') local_root.add_child(self.compile_expression(), 'expect expression in array access') local_root.add_child(self.__expect_symbol(']'), 'expect ]') return local_root def compile_while(self): # 'while' '(' expression ')' '{' statements '}' node = self.__expect_keyword('while') if not node: return None local_root = SyntaxTreeNode('whileStatement') local_root.add_child(node) local_root.add_child(self.__expect_symbol('('), 'expect ( in while') local_root.add_child(self.compile_expression(), 'expect expression in while') local_root.add_child(self.__expect_symbol(')'), 'expect ) in while') local_root.add_child(self.__expect_symbol('{'), 'expect { in while') local_root.add_child(self.compile_statements(), 'expect statements in while') local_root.add_child(self.__expect_symbol('}'), 'expect } in while') return local_root def compile_return(self): # 'return' expression? ';' node = self.__expect_keyword('return') if not node: return None local_root = SyntaxTreeNode('returnStatement') local_root.add_child(node) local_root.add_child(self.compile_expression()) local_root.add_child(self.__expect_symbol(';') ,'expect ; in return') return local_root def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? node = self.__expect_keyword('if') if not node: return None local_root = SyntaxTreeNode('ifStatement') local_root.add_child(node) local_root.add_child(self.__expect_symbol('('), 'expect ( in if') local_root.add_child(self.compile_expression(), 'expect expression in if') local_root.add_child(self.__expect_symbol(')'), 'expect ) in if') local_root.add_child(self.__expect_symbol('{'), 'expect { in if') local_root.add_child(self.compile_statements(), 'expect statements in if') local_root.add_child(self.__expect_symbol('}'), 'expect } in if') next_token = self.tokenizer.peek_next() if next_token and next_token.get_keyword() == 'else': local_root.add_child(self.__expect_keyword('else')) local_root.add_child(self.__expect_symbol('{'), 'expect { in else') local_root.add_child(self.compile_statements(), 'expect statements in else') local_root.add_child(self.__expect_symbol('}'), 'expect } in else') return local_root def compile_expression(self): # term (op term)* node = self.compile_term() if not node: return None local_root = SyntaxTreeNode('expression') local_root.add_child(node) while True: node = self.__expect_op() if node: local_root.add_child(node) local_root.add_child(self.compile_term(), 'expect term after op') else: break return local_root def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | # subroutineCall | '(' expression ')' | # unaryOp term local_root = SyntaxTreeNode('term') # integerConstant | stringConstant | keywordConstant node = or_compile(( self.__expect_integer_constant, self.__expect_string_constant, self.__expect_keyword_constant, )) if node: local_root.add_child(node) return local_root # subroutineCall sub_call = self.compile_subroutine_call() if sub_call: local_root.add_child(sub_call) return local_root next_token = self.tokenizer.peek_next() # varName | varName '[' expression ']' if next_token and next_token.get_type() == TokenType.IDENTIFIER: local_root.add_child(self.__expect_identifier()) next2 = self.tokenizer.peek_next() if next2 and next2.get_symbol() == '[': local_root.add_child(self.compile_array_access()) return local_root # '(' expression ')' if next_token and next_token.get_symbol() == '(': local_root.add_child(self.__expect_symbol('(')) local_root.add_child(self.compile_expression(), 'expect expression after ( in term') local_root.add_child(self.__expect_symbol(')'), 'expect ) in term') return local_root # unaryOp term node = self.__expect_unary_op() if node: local_root.add_child(node) local_root.add_child(self.compile_term(), 'expect term after unary op in term') return local_root return None def compile_expression_list(self): # (expression (',' expression)* )? local_root = SyntaxTreeNode('expressionList') node = self.compile_expression() if node: local_root.add_child(node) # (',' expression)* while True: node = self.__expect_symbol(',') if node: local_root.add_child(node) local_root.add_child(self.compile_expression(), 'expect expression in exp list') else: break return local_root def save_as_xml(self, xml_path): with open(xml_path, 'w') as writer: xml = self.syntax_tree_root.to_xml(indent_num=0) print(xml, file=writer, end='')
def tokenize(self, code, outfile): tokenizer = JackTokenizer(code) CompilationEngine(tokenizer, outfile).compile()
def __init__(self, source_filepath): self.source_path = source_filepath self.tokenizer = JackTokenizer(self.source_path) self.class_name = None self.syntax_tree_root = self.compile_class()
class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # current indentation level indent = 0 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension T.xml # if the original extension was not .jack, then append T.xml if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".xml" else: destination_filename = source_filename + ".xml" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # compiles a complete class and closes the output file def compile_class(self): # start the class self._start_block("class") # class tt, t = self._token_next(True, "KEYWORD", "class") self._write(tt, t) # name of class tt, t = self._token_next(True, "IDENTIFIER") self._write(tt, t) # open brace tt, t = self._token_next(True, "SYMBOL", "{") self._write(tt, t) # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") self._write(tt, t) # end the class self._end_block("class") # close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # start variable declaration self._start_block("classVarDec") # compile the variable declaration # False means don't print the tags self.compile_var_dec(False) # end variable declaration self._end_block("classVarDec") # compiles a complete method, function, or constructor def compile_subroutine(self): # start subroutine declaration self._start_block("subroutineDec") # constructor, function, or name keyword tt, t = self._token_next(False, "KEYWORD") self._write(tt, t) # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) self._write(tt, t) # name of the method/function/constructor tt, t = self._token_next(True) self._write(tt, t) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") self._write(tt, t) # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") self._write(tt, t) # start body of subroutine self._start_block("subroutineBody") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") self._write(tt, t) # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self._write(tt, t) # end body of subroutine self._end_block("subroutineBody") # finish subroutine declaration self._end_block("subroutineDec") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): self._start_block("parameterList") # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, t = self._token_next(False) self._write(tt, t) # identifier (variable name) tt, t = self._token_next(True) self._write(tt, t) # possible comma tt, t = self._token_next(True) if tt == "SYMBOL" and t == ",": self._write(tt, t) else: # not a comma; stop processing parameters break self.tokenizer.advance() self._end_block("parameterList") # compiles a var declaration def compile_var_dec(self, print_tags=True): if print_tags: self._start_block("varDec") # the keyword to start the declaration tt, t = self._token_next(False, "KEYWORD") self._write(tt, t) # type of the declaration # could be an identifier or a keyword (int, etc) tt, t = self._token_next(True) self._write(tt, t) # name of the declaration tt, t = self._token_next(True, "IDENTIFIER") self._write(tt, t) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # write the comma self._write(tt, t) # another variable name follows tt, t = self._token_next(True, "IDENTIFIER") self._write(tt, t) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self._write(tt, t) self.tokenizer.advance() if print_tags: self._end_block("varDec") # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): self._start_block("statements") while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break self._end_block("statements") # compiles a do statement def compile_do(self): self._start_block("doStatement") # do keyword tt, t = self._token_next(False, "KEYWORD", "do") self._write(tt, t) # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self._write(tt, t) self._end_block("doStatement") self.tokenizer.advance() # compiles a let statement def compile_let(self): self._start_block("letStatement") # let keyword tt, t = self._token_next(False, "KEYWORD", "let") self._write(tt, t) # variable name tt, t = self._token_next(True, "IDENTIFIER") self._write(tt, t) # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # write bracket self._write(tt, t) # compile the expression self.tokenizer.advance() self.compile_expression() # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") self._write(tt, t) # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() # equals sign tt, t = self._token_next(False, "SYMBOL", "=") self._write(tt, t) # expression self.tokenizer.advance() self.compile_expression() # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self._write(tt, t) self._end_block("letStatement") self.tokenizer.advance() # compiles a while statement def compile_while(self): self._start_block("whileStatement") # while keyword tt, t = self._token_next(False, "KEYWORD", "while") self._write(tt, t) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") self._write(tt, t) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") self._write(tt, t) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") self._write(tt, t) # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") self._write(tt, t) self._end_block("whileStatement") self.tokenizer.advance() # compiles a return statement def compile_return(self): self._start_block("returnStatement") # return keyword tt, t = self._token_next(False, "KEYWORD", "return") self._write(tt, t) # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self._write(tt, t) self._end_block("returnStatement") self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): self._start_block("ifStatement") # if keyword tt, t = self._token_next(False, "KEYWORD", "if") self._write(tt, t) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") self._write(tt, t) # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") self._write(tt, t) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") self._write(tt, t) # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self._write(tt, t) tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # write else seld._write(tt, t) # opening brace tt, t = self._token_next(False, "SYMBOL", "{") self._write(tt, t) # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self._write(tt, t) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() self._end_block("ifStatement") # compiles an expression (one or more terms connected by operators) def compile_expression(self): self._start_block("expression") # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator self._write(tt, t) # the next term self.tokenizer.advance() self.compile_term() else: # no term found; done parsing the expression break self._end_block("expression") # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): self._start_block("term") # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt in ["INT_CONST", "STRING_CONST", "KEYWORD"]: self._write(tt, t) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # write the opening parenthesis self._write(tt, t) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") self._write(tt, t) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # write the unary operation self._write(tt, t) # parse the rest of the term self.tokenizer.advance() self.compile_term() elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identiifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array # write identifier self._write(tt, t) # write bracket self._write(tt2, t2) # compile the expression self.tokenizer.advance() self.compile_expression() # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") self._write(tt, t) # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier self._write(tt, t) else: # unknown print "WARNING: Unknown term expression object:", tt, t self._end_block("term") # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): self._start_block("expressionList") # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self._write(tt, t) self.tokenizer.advance() else: # not a comma; stop processing parameters break self._end_block("expressionList") # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, t = self._token_next(False, "IDENTIFIER") self._write(tt, t) # a dot and another name may exist, or it could be a parenthesis tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": self._write(tt, t) # the name after the dot tt, t = self._token_next(True, "IDENTIFIER") self._write(tt, t) # advance so that we are on the parenthesis self.tokenizer.advance() # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") self._write(tt, t) # expression list self.tokenizer.advance() self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") self._write(tt, t) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # writes the given token to the output file def _write(self, token_type, token): # lowercase for tag name token_type = token_type.lower() # special types token_type = token_type.replace("int_const", "integerConstant") token_type = token_type.replace("string_const", "stringConstant") # special values to replace for output s = {"<": "<", ">": ">", '"': """, "&": "&"} for s, r in s.iteritems(): token = token.replace(s, r) # print the token type and token to the file output = ['<', token_type, '>', ' ', token, ' ', '</', token_type, '>', '\n'] self.destination_file.write(self._indent("".join(output))) # starts an XML block def _start_block(self, block_name): self.destination_file.write(self._indent("<" + block_name + ">\n")) self.indent += 2 # ends an XML block def _end_block(self, block_name): self.indent -= 2 self.destination_file.write(self._indent("</" + block_name + ">\n")) # indents a single line of text at the current indentation level def _indent(self, text): return " " * self.indent + text
class CompilationEngine(): def __init__(self, filepath): self.wf = open(filepath[:-5] + ".my.xml", 'w') self.tokenizer = JackTokenizer(filepath) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.wf.close() def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') self.compile_keyword([Tokens.CLASS]) self.compile_class_name() self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is_class_var_dec(): self.compile_class_var_dec() while self.next_is_subroutine_dec(): self.compile_subroutine_dec() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) self.compile_type() self.compile_var_name() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('classVarDec') def compile_subroutine_dec(self): self.write_element_start('subroutineDec') self.compile_keyword( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) if self.tokenizer.see_next() == Tokens.VOID: self.compile_keyword(Tokens.VOID) else: self.compile_type() self.compile_subroutine_name() self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_parameter_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_subroutine_body() self.write_element_end('subroutineDec') def compile_subroutine_name(self): self.compile_identifier() def compile_class_name(self): self.compile_identifier() def compile_var_name(self): self.compile_identifier() def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [ Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN ] or isinstance(self.tokenizer.see_next(), Identifier): self.compile_type() self.compile_var_name() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_type() self.compile_var_name() self.write_element_end('parameterList') def compile_subroutine_body(self): self.write_element_start('subroutineBody') self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is(Tokens.VAR): self.compile_var_dec() self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('subroutineBody') def compile_var_dec(self): self.write_element_start('varDec') self.compile_keyword(Tokens.VAR) self.compile_type() self.compile_var_name() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('varDec') def compile_statements(self): self.write_element_start('statements') while self.next_is_statement(): self.compile_statement() self.write_element_end('statements') def compile_statement(self): if self.next_is(Tokens.LET): self.write_element_start('letStatement') self.compile_keyword(Tokens.LET) self.compile_var_name() if self.next_is(Tokens.LEFT_BOX_BRACKET): self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) self.compile_symbol(Tokens.EQUAL) self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('letStatement') elif self.next_is(Tokens.IF): self.write_element_start('ifStatement') self.compile_keyword(Tokens.IF) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) if self.next_is(Tokens.ELSE): self.compile_keyword(Tokens.ELSE) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('ifStatement') elif self.next_is(Tokens.WHILE): self.write_element_start('whileStatement') self.compile_keyword(Tokens.WHILE) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('whileStatement') elif self.next_is(Tokens.DO): self.write_element_start('doStatement') self.compile_keyword(Tokens.DO) self.compile_subroutine_call() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('doStatement') elif self.next_is(Tokens.RETURN): self.write_element_start('returnStatement') self.compile_keyword(Tokens.RETURN) if not self.next_is(Tokens.SEMI_COLON): self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('returnStatement') def compile_subroutine_call(self): if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1): self.compile_subroutine_name() self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) else: self.compile_identifier() self.compile_symbol(Tokens.DOT) self.compile_subroutine_name() self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) def compile_expression_list(self): self.write_element_start('expressionList') if not self.next_is(Tokens.RIGHT_ROUND_BRACKET): self.compile_expression() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_expression() self.write_element_end('expressionList') def compile_expression(self): self.write_element_start('expression') self.compile_term() while self.next_is([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL ]): self.compile_symbol([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL ]) self.compile_term() self.write_element_end('expression') def compile_term(self): self.write_element_start('term') if self.next_type_is(TokenType.INT_CONST): self.compile_integer_constant() elif self.next_type_is(TokenType.STRING_CONST): self.compile_string_constant() elif self.next_is( [Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE]): self.compile_keyword( [Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE]) elif self.next_type_is(TokenType.IDENTIFIER): if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1): self.compile_var_name() self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1): self.compile_subroutine_call() else: self.compile_var_name() elif self.next_is(Tokens.LEFT_ROUND_BRACKET): self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) elif self.next_is([Tokens.TILDE, Tokens.MINUS]): self.compile_symbol([Tokens.TILDE, Tokens.MINUS]) self.compile_term() else: self.raise_syntax_error('') self.write_element_end('term') def next_type_is(self, token_type): return self.tokenizer.see_next().type == token_type def compile_type(self): if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]): self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]) elif isinstance(self.tokenizer.see_next(), Identifier): self.compile_identifier() def next_is_statement(self): return self.next_is( [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]) def next_is(self, tokens, idx=0): if type(tokens) == list: return self.tokenizer.see_next(idx=idx) in tokens else: return self.tokenizer.see_next(idx=idx) == tokens def next_is_class_var_dec(self): return self.next_is([Tokens.STATIC, Tokens.FIELD]) def next_is_subroutine_dec(self): return self.next_is( [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) def compile_symbol(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_keyword(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, Identifier): self.write_element('identifier', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerConstant): self.write_element('integerConstant', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringConstant): self.write_element('stringConstant', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def write_element(self, elem_name, value): self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name)) def write_element_start(self, elem_name): self.wf.write('<%s>\n' % elem_name) def write_element_end(self, elem_name): self.wf.write('</%s>\n' % elem_name) def raise_syntax_error(self, msg): raise Exception('%s' % msg)
class CompilationEngine(): def __init__(self, jack_file, xml_file): self._jack_tokenizer = JackTokenizer(jack_file) self._xml_file = xml_file self._xml_text = '' def compile_class(self): self._write_start('class') self._compile_keyword() self._compile_identifier() self._compile_symbol() while self._what_next_token([Keyword.STATIC, Keyword.FIELD]): self.compile_class_var_dec() while self._what_next_token( [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]): self.compile_subroutine_dec() self._compile_symbol() self._write_end('class') def compile_class_var_dec(self): self._write_start('classVarDec') self._compile_keyword() if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() elif self._what_next_token_type([Type.IDENTIFIER]): self._compile_identifier() self._compile_identifier() while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self._compile_identifier() self._compile_symbol() self._write_end('classVarDec') def compile_subroutine_dec(self): self._write_start('subroutineDec') self._compile_keyword() if self._what_next_token([Keyword.VOID]): self._compile_keyword() else: if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() elif self._what_next_token_type([Type.IDENTIFIER]): self._compile_identifier() self._compile_identifier() self._compile_symbol() self.compile_parameter_list() self._compile_symbol() self.compile_subroutine_body() self._write_end('subroutineDec') def compile_parameter_list(self): self._write_start('parameterList') if (self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]) or self._what_next_token_type([Type.IDENTIFIER])): if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() elif self._what_next_token_type([Type.IDENTIFIER]): self._compile_identifier() self._compile_identifier() while self._what_next_token([Symbol.COMMA]): self._compile_symbol() if self._what_next_token( [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() elif self._what_next_token_type([Type.IDENTIFIER]): self._compile_identifier() self._compile_identifier() self._write_end('parameterList') def compile_subroutine_body(self): self._write_start('subroutineBody') self._compile_symbol() while self._what_next_token([Keyword.VAR]): self.compile_var_dec() self.compile_statements() self._compile_symbol() self._write_end('subroutineBody') def compile_var_dec(self): self._write_start('varDec') self._compile_keyword() if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]): self._compile_keyword() elif self._what_next_token_type([Type.IDENTIFIER]): self._compile_identifier() self._compile_identifier() while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self._compile_identifier() self._compile_symbol() self._write_end('varDec') def compile_statements(self): self._write_start('statements') while self._what_next_token([ Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO, Keyword.RETURN ]): if self._what_next_token([Keyword.LET]): self.compile_let() elif self._what_next_token([Keyword.IF]): self.compile_if() elif self._what_next_token([Keyword.WHILE]): self.compile_while() elif self._what_next_token([Keyword.DO]): self.compile_do() elif self._what_next_token([Keyword.RETURN]): self.compile_return() self._write_end('statements') def compile_let(self): self._write_start('letStatement') self._compile_keyword() self._compile_identifier() if self._what_next_token([Symbol.LEFT_BOX_BRACKET]): self._compile_symbol() self.compile_expression() self._compile_symbol() self._compile_symbol() self.compile_expression() self._compile_symbol() self._write_end('letStatement') def compile_if(self): self._write_start('ifStatement') self._compile_keyword() self._compile_symbol() self.compile_expression() self._compile_symbol() self._compile_symbol() self.compile_statements() self._compile_symbol() if self._what_next_token([Keyword.ELSE]): self._compile_keyword() self._compile_symbol() self.compile_statements() self._compile_symbol() self._write_end('ifStatement') def compile_while(self): self._write_start('whileStatement') self._compile_keyword() self._compile_symbol() self.compile_expression() self._compile_symbol() self._compile_symbol() self.compile_statements() self._compile_symbol() self._write_end('whileStatement') def compile_do(self): self._write_start('doStatement') self._compile_keyword() if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1): self._compile_identifier() self._compile_symbol() self.compile_expression_list() self._compile_symbol() else: self._compile_identifier() self._compile_symbol() self._compile_identifier() self._compile_symbol() self.compile_expression_list() self._compile_symbol() self._compile_symbol() self._write_end('doStatement') def compile_return(self): self._write_start('returnStatement') self._compile_keyword() if not self._what_next_token([Symbol.SEMI_COLON]): self.compile_expression() self._compile_symbol() self._write_end('returnStatement') def compile_expression(self): self._write_start('expression') self.compile_term() while self._what_next_token([ Symbol.PLUS, Symbol.MINUS, Symbol.MULTI, Symbol.DIV, Symbol.AND, Symbol.PIPE, Symbol.LESS_THAN, Symbol.GREATER_THAN, Symbol.EQUAL ]): self._compile_symbol() self.compile_term() self._write_end('expression') def compile_term(self): self._write_start('term') if self._what_next_token_type([Type.INT_CONST]): self._compile_integer_constant() elif self._what_next_token_type([Type.STRING_CONST]): self._compile_string_constant() elif self._what_next_token( [Keyword.NULL, Keyword.THIS, Keyword.TRUE, Keyword.FALSE]): self._compile_keyword() elif self._what_next_token_type([Type.IDENTIFIER]): if self._what_next_token([Symbol.LEFT_BOX_BRACKET], 1): self._compile_identifier() self._compile_symbol() self.compile_expression() self._compile_symbol() elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET, Symbol.DOT], 1): if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1): self._compile_identifier() self._compile_symbol() self.compile_expression_list() self._compile_symbol() else: self._compile_identifier() self._compile_symbol() self._compile_identifier() self._compile_symbol() self.compile_expression_list() self._compile_symbol() else: self._compile_identifier() elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET]): self._compile_symbol() self.compile_expression() self._compile_symbol() elif self._what_next_token([Symbol.TILDE, Symbol.MINUS]): self._compile_symbol() self.compile_term() self._write_end('term') def compile_expression_list(self): self._write_start('expressionList') if not self._what_next_token([Symbol.RIGHT_ROUND_BRACKET]): self.compile_expression() while self._what_next_token([Symbol.COMMA]): self._compile_symbol() self.compile_expression() self._write_end('expressionList') def save(self): self._xml_file.write(self._xml_text) def _what_next_token(self, values, index=0): return self._jack_tokenizer.next_token(index) in values def _what_next_token_type(self, values, index=0): return self._jack_tokenizer.next_token_type(index) in values def _compile_symbol(self): self._jack_tokenizer.advance() self._write('symbol', self._jack_tokenizer.token()) def _compile_keyword(self): self._jack_tokenizer.advance() self._write('keyword', self._jack_tokenizer.token()) def _compile_identifier(self): self._jack_tokenizer.advance() self._write('identifier', self._jack_tokenizer.token()) def _compile_integer_constant(self): self._jack_tokenizer.advance() self._write('integerConstant', self._jack_tokenizer.token()) def _compile_string_constant(self): self._jack_tokenizer.advance() self._write('stringConstant', self._jack_tokenizer.token()) def _write(self, element, value): self._xml_text += '<{}> {} </{}>\n'.format(element, value, element) def _write_start(self, element): self._xml_text += '<%s>\n' % element def _write_end(self, element): self._xml_text += '</%s>\n' % element
def __init__(self, filepath): self.wf = open(filepath[:-5] + ".my.xml", 'w') self.tokenizer = JackTokenizer(filepath)
def __init__(self, input_file_path, vm_writer: VMWriter): self.jack_tokenizer = JackTokenizer(input_file_path) self.symbol_table = SymbolTable() self.vm_writer = vm_writer if self.jack_tokenizer.has_more_tokens(): self.compile_class()
class CompilationEnginge(object): """ lalala """ def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.out = open(output_file, 'w') self.token = None self.class_name = None ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) ####################### def analyze(self): self.token = self.tokenizer.advance() self.compile_class() self.close() print('CLASS TABLE:') print(self.symbol_table.class_table) def close(self): if self.out: self.out.close() self.out = None def advance(self): self.token = self.tokenizer.advance() def write_to_out(self): pass def format_line(self, defined_or_used=''): token_type = self.tokenizer.token_type() running_index = '' if token_type == self.tokenizer.keyword_token: meat = self.tokenizer.keyword() defined_or_used='' elif token_type == self.tokenizer.symbol_token: meat = self.tokenizer.symbol() defined_or_used='' elif token_type == self.tokenizer.identifier_token: meat = self.tokenizer.identifier() ####################### ### PROJECT 11 CODE ### ####################### # Extending compilaiton engine to output <var/argument/static/field...> instead of <indentifier> name = self.tokenizer.token if self.symbol_table.kind_of(name): token_type = self.symbol_table.kind_of(name) running_index = str(self.symbol_table.index_of(name)) elif name[0].islower(): token_type = 'subroutine' else: token_type = 'class' ####################### elif token_type == self.tokenizer.int_const: meat = self.tokenizer.int_val() defined_or_used='' elif token_type == self.tokenizer.string_const: meat = self.tokenizer.string_val() defined_or_used='' else: raise ValueError('Something went wrong with token: {}'.format(self.token)) if defined_or_used != '': defined_or_used += ' ' if running_index != '': running_index = ' ' + running_index formated_line = '<{2}{0}{3}> {1} </{2}{0}{3}>\n'.format(token_type, meat, defined_or_used, running_index) return formated_line ######################### ### PROGARM STRUCTURE ### ######################### def compile_class(self): """ #################################################################### ### class: 'class' className '{' classVarDec* subroutineDec* '}' ### #################################################################### """ self.out.write('<class>\n') # 'class' keyword_line = self.format_line() self.out.write(keyword_line) # className self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.class_name = self.tokenizer.token ####################### identifier_line = self.format_line('defined') self.out.write(identifier_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) ### classVarDec* subroutineDec* ### self.advance() # classVarDec* while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_field, self.tokenizer.key_static]: self.compile_class_var_dec() # subroutineDec* while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_function, self.tokenizer.key_method, self.tokenizer.key_constructor]: self.compile_subroutine() # '}' if self.tokenizer.token_type() == self.tokenizer.symbol_token: # Class compilation is done symbol_line = self.format_line() self.out.write(symbol_line) else: raise ValueError('Something went wrong') # Closing with </class> self.out.write('</class>\n') is_sucessfull = not(self.advance()) if is_sucessfull: print('Compilation enginge succesfully finished') else: print('Something went wrong!') def compile_class_var_dec(self): """ ####################################################################### ### classVarDec: ('static'|'field') type varName (',' varName)* ';' ### ####################################################################### """ self.out.write('<classVarDec>\n') ####################### ### PROJECT 11 CODE ### ####################### # Extract field or static # field_or_static = re.match('<[a-z]*>', field_or_static_line)[0][1:-1] field_or_static = self.tokenizer.token ####################### # ('static' | 'field') field_or_static_line = self.format_line() self.out.write(field_or_static_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### # Extract token type type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static) ####################### varname_line = self.format_line('defined') self.out.write(varname_line) # (',' varName)* self.advance() symbol = self.tokenizer.symbol() while symbol == ',': colon_line = self.format_line() self.out.write(colon_line) self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static) ####################### varname_line = self.format_line('defined') self.out.write(varname_line) self.advance() symbol = self.tokenizer.symbol() # symbol == ';' semicolon_line = self.format_line() self.out.write(semicolon_line) self.advance() self.out.write('</classVarDec>\n') def compile_subroutine(self): """ ########################################################################### ### subroutineDec: ('constructor'|'function'|'method') ### ### ('void' | type) subroutineName '(' parameterList ')' ### ### subroutineBody ### ########################################################################### """ ####################### ### PROJECT 11 CODE ### ####################### print() print('SUBROUTINE TABLE:') print(self.symbol_table.subroutine_table) print() self.symbol_table.start_subroutine() self.symbol_table.define(name='this', type_=self.class_name, kind='argument') ####################### self.out.write('<subroutineDec>\n') # ('constructor'|'function'|'method') constructor_function_method_line = self.format_line() self.out.write(constructor_function_method_line) # ('void' | type) self.advance() void_or_type_line = self.format_line() self.out.write(void_or_type_line) # subroutineName self.advance() subroutine_name_line = self.format_line('defined') self.out.write(subroutine_name_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # parameterList self.advance() self.compile_parameter_list() # ')' symbol_line = self.format_line() self.out.write(symbol_line) ################################################## ### subroutineBody: '{' varDec* statements '}' ### ################################################## self.out.write('<subroutineBody>\n') # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) ############### ### varDec* ### ############### self.advance() while self.tokenizer.token == self.tokenizer.key_var: self.compile_var_dec() ################## ### statements ### ################## self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</subroutineBody>\n') self.out.write('</subroutineDec>\n') ####################### ### PROJECT 11 CODE ### ####################### print() print('SUBROUTINE TABLE:') print(self.symbol_table.subroutine_table) print() ####################### def compile_parameter_list(self): """ ############################################################ ### parameterList: ((type varName) (',' type varName)*)? ### ############################################################ """ self.out.write('<parameterList>\n') # If token type is symbol then we have empty parameter list # If we have symbol token then it means our parameter list is fully processed if self.tokenizer.token_type() != self.tokenizer.symbol_token: # type ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) # If next token is ',' we have more then one parameter self.advance() while self.tokenizer.token_type() == self.tokenizer.symbol_token and self.tokenizer.symbol() == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument') # We are in new subroutine so add next nested scope # self.symbol_table.start_subroutine() ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) self.advance() self.out.write('</parameterList>\n') def compile_var_dec(self): """ ##################################################### ### varDec: 'var' type varName (',' varName)* ';' ### ##################################################### """ self.out.write('<varDec>\n') # var var_line = self.format_line() self.out.write(var_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) # (',' varName)* self.advance() while self.tokenizer.symbol() == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) self.advance() # ';' semicolon_line = self.format_line() self.out.write(semicolon_line) self.advance() self.out.write('</varDec>\n') ################## ### STATEMENTS ### ################## def compile_statements(self): """ ############################## ### statements: statement* ### ############################## """ self.out.write('<statements>\n') while self.tokenizer.token_type() != self.tokenizer.symbol_token: keyword = self.tokenizer.keyword() # letStatement if keyword == self.tokenizer.key_let: self.compile_let() # ifStatement elif keyword == self.tokenizer.key_if: self.compile_if() # whileStatement elif keyword == self.tokenizer.key_while: self.compile_while() # doStatement elif keyword == self.tokenizer.key_do: self.compile_do() # returnStatement elif keyword == self.tokenizer.key_return: self.compile_return() else: raise ValueError('Wrong statement: {}'.format(keyword)) self.out.write('</statements>\n') def compile_do(self): """ ############################################ ### doStatement: 'do' subroutineCall ';' ### ############################################ """ self.out.write('<doStatement>\n') # 'do' do_line = self.format_line() self.out.write(do_line) # subroutineCall self.advance() self.compile_subroutine_call() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</doStatement>\n') def compile_let(self): """ ############################################################################ ### letStatement: 'let' varName ('[' expression ']')? '=' expression ';' ### ############################################################################ """ self.out.write('<letStatement>\n') # let let_line = self.format_line() self.out.write(let_line) # varName self.advance() var_name_line = self.format_line('used') self.out.write(var_name_line) # Check if '[' or '=' self.advance() if self.tokenizer.token == '[': # '[' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ']' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # '=' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</letStatement>\n') def compile_while(self): """ ##################################################################### ### whileStatement: 'while' '(' expression ')' '{' statements '}' ### ##################################################################### """ self.out.write('<whileStatement>\n') # 'while' while_line = self.format_line() self.out.write(while_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</whileStatement>\n') def compile_return(self): """ ################################################ ### ReturnStatement 'return' expression? ';' ### ################################################ """ self.out.write('<returnStatement>\n') # 'return' return_line = self.format_line() self.out.write(return_line) # Ceck if expression self.advance() if self.tokenizer.token != ';': # 'expression' self.compile_expression() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</returnStatement>\n') def compile_if(self): """ ############################################################### ### ifStatement: 'if' '(' expression ')' '{' statements '}' ### ### ('else' '{' statements '}')? ### ############################################################### """ self.out.write('<ifStatement>\n') # 'if' if_line = self.format_line() self.out.write(if_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) # Check if there is 'else' part of ifStatement self.advance() if self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() == 'else': # 'else' else_line = self.format_line() self.out.write(else_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</ifStatement>\n') ################### ### EXPRESSIONS ### ################### def compile_subroutine_call(self, skip_subroutine_name=False): """ ############################################################################ ### subroutineCall: subroutineName '(' expressionList ')' | (className | ### ### varName) '.' subroutineName '(' expressionList ')' ### ############################################################################ """ if not skip_subroutine_name: # subroutineName or className or varName subroutine_class_var_name_line = self.format_line('used') self.out.write(subroutine_class_var_name_line) self.advance() # Check '(' or '.' if self.tokenizer.token == '.': # '.' symbol_line = self.format_line() self.out.write(symbol_line) # subroutineName self.advance() subroutine_name_line = self.format_line('used') self.out.write(subroutine_name_line) self.advance() # '(' symbol_line = self.format_line() self.out.write(symbol_line) # expressionList self.advance() self.compile_expression_list() # ')' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() def compile_expression(self): """ ################################### ### expression: term (op term)* ### ################################### """ self.out.write('<expression>\n') ops = ['+', '-', '*', '/', '&', '|', '<', '>', '='] # 'term' self.compile_term() # Check if there is (op term)* part while self.tokenizer.token in ops: # op op_line = self.format_line() self.out.write(op_line) # term self.advance() self.compile_term() self.out.write('</expression>\n') def compile_term(self): """ ################################################################ ### integerConstant | stringConstant | keywordConstant | ### ### varName | varName '[' expression ']' | subroutineCall | ### ### '(' expression ')' | unaryOp term ### ################################################################ """ self.out.write('<term>\n') unary_ops = ['-', '~'] ############################################# ### constant, name, expression or unaryOp ### ############################################# # '(' expression ')' if self.tokenizer.token == '(': # '(' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # unaryOp term elif self.tokenizer.token in unary_ops: # unaryOp unary_op_line = self.format_line() self.out.write(unary_op_line) # term self.advance() self.compile_term() # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall else: # constant or name constant_or_name = self.format_line('used') self.out.write(constant_or_name) # varName '[' expression ']' | subroutineCall or end of compile_term function # Check if expression: '[', subroutineCall: '(' with parameter skip_subroutine_name = True, # otherwise end of compile_term function self.advance() # '[' expression ']' if self.tokenizer.token == '[': # '[' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ']' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # subroutineCall with skip_subroutine_name=True elif self.tokenizer.token in ['(', '.']: self.compile_subroutine_call(skip_subroutine_name=True) self.out.write('</term>\n') def compile_expression_list(self): """ ######################################################## ### expressionList: (expression (',' expression)* )? ### ######################################################## """ self.out.write('<expressionList>\n') # Check if token is ')', if so we got empty expression list if self.tokenizer.token != ')': # 'expression' self.compile_expression() # Check if token is ',', if so we got more expressions while self.tokenizer.token == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # expression self.advance() self.compile_expression() self.out.write('</expressionList>\n')
class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] BINARY_OPERATORS_TO_COMMAND = { '+': 'add', '-': 'sub', '=': 'eq', '>': 'gt', '<': 'lt', '&': 'and', '|': 'or' } UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'} TYPE_TO_TAG = { 'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol' } SYMBOLS_TO_XML_CONVENTION = { '<': '<', '>': '>', '&': '&', '"': '"' } def __init__(self, input_file_path, vm_writer: VMWriter): self.jack_tokenizer = JackTokenizer(input_file_path) self.symbol_table = SymbolTable() self.vm_writer = vm_writer if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'CLASS') self.jack_tokenizer.advance() self.jack_tokenizer.advance() if self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.jack_tokenizer.advance() self.vm_writer.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[ self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: kind = '' if self.jack_tokenizer.key_word() == 'field': kind = 'FIELD' elif self.jack_tokenizer.key_word() == 'static': kind = 'STATIC' self.jack_tokenizer.advance() field_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() != ';': self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() self.jack_tokenizer.advance() def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): self.vm_writer.zero_branching_indexes() while self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.symbol_table.start_subroutine() constructor = True if self.jack_tokenizer.key_word( ) == 'constructor' else False method = False if self.jack_tokenizer.key_word() == 'method': method = True self.symbol_table.define('this', self.symbol_table.get_class_name(), 'ARG') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'SUBROUTINE') name = self.symbol_table.get_class_name( ) + '.' + self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_parameter_list() self.jack_tokenizer.advance() self.jack_tokenizer.advance() var_num = 0 while self.jack_tokenizer.key_word() == 'var': var_num += self.compile_var_dec() self.vm_writer.write_function(name, var_num) if method: self.vm_writer.write_push('ARG', 0) self.vm_writer.write_pop('POINTER', 0) elif constructor: field_count = self.symbol_table.var_count('FIELD') self.vm_writer.write_push('CONST', field_count) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('POINTER', 0) self.compile_statements() self.jack_tokenizer.advance() def compile_parameter_list(self): if self.jack_tokenizer.symbol() != ')': parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": self.jack_tokenizer.advance() parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() def get_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': parameter_type = self.jack_tokenizer.key_word() elif self.jack_tokenizer.token_type() == 'IDENTIFIER': parameter_type = self.jack_tokenizer.identifier() return parameter_type def compile_var_dec(self): var_num = 1 self.jack_tokenizer.advance() var_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": var_num += 1 self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() self.jack_tokenizer.advance() return var_num def compile_statements(self): while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() def compile_do(self): self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.compile_subroutine_call(name) # must dispose of void function return value self.vm_writer.write_pop('TEMP', 0) self.jack_tokenizer.advance() def compile_subroutine_call(self, prefix_call=''): if self.jack_tokenizer.symbol() == '(': subroutine = False # If not in symbol table - then subroutine if not self.symbol_table.kind_of( prefix_call) or self.symbol_table.kind_of( prefix_call) == 'SUBROUTINE': subroutine = True self.jack_tokenizer.advance() args_count = 0 if subroutine: self.vm_writer.write_push('POINTER', 0) args_count += 1 args_count += self.compile_expression_list() if subroutine: self.vm_writer.write_call( self.symbol_table.get_class_name() + '.' + prefix_call, args_count) else: self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol() == '.': variable = False self.jack_tokenizer.advance() if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']: variable = True variable_name = prefix_call prefix_call = self.symbol_table.type_of(prefix_call) prefix_call += '.{0}'.format(self.jack_tokenizer.identifier()) self.jack_tokenizer.advance() self.jack_tokenizer.advance() args_count = 0 if variable: self.vm_writer.write_push( self.symbol_table.kind_of(variable_name), self.symbol_table.index_of(variable_name)) args_count += 1 args_count += self.compile_expression_list() self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() def compile_let(self): self.jack_tokenizer.advance() var_name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic("add") self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop('TEMP', 0) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('TEMP', 0) self.vm_writer.write_pop('THAT', 0) else: self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() def compile_while(self): while_idx = self.vm_writer.get_next_label_index('while') if_label = 'WHILE_IF_{0}'.format(while_idx) end_label = 'WHILE_END_{0}'.format(while_idx) self.vm_writer.write_label(if_label) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(end_label) self.compile_statements() self.vm_writer.write_goto(if_label) self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_return(self): self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() != ';': self.compile_expression() else: self.vm_writer.write_push('CONST', 0) self.vm_writer.write_return() self.jack_tokenizer.advance() def compile_if(self): if_idx = self.vm_writer.get_next_label_index('if') else_label = 'IF_ELSE_{0}'.format(if_idx) end_label = 'IF_END_{0}'.format(if_idx) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(else_label) self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_goto(end_label) self.vm_writer.write_label(else_label) if self.jack_tokenizer.key_word() == 'else': self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_expression(self): self.compile_term() while self.jack_tokenizer.symbol( ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() self.jack_tokenizer.advance() self.compile_term() if symbol in self.BINARY_OPERATORS_TO_COMMAND: self.vm_writer.write_arithmetic( self.BINARY_OPERATORS_TO_COMMAND[symbol]) elif symbol == '*': self.vm_writer.write_call('Math.multiply', 2) elif symbol == '/': self.vm_writer.write_call('Math.divide', 2) def compile_term(self): token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol( ) == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call(name) elif self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() self.vm_writer.write_arithmetic('add') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) else: kind = self.symbol_table.kind_of(name) idx = self.symbol_table.index_of(name) self.vm_writer.write_push(kind, idx) elif token_type == 'STRING_CONST': string_const = self.jack_tokenizer.string_val() self.vm_writer.write_push("CONST", len(string_const)) self.vm_writer.write_call("String.new", 1) for char in string_const: self.vm_writer.write_push('CONST', ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.jack_tokenizer.advance() elif token_type == 'KEYWORD': keyword = self.jack_tokenizer.key_word() if keyword == 'true': self.vm_writer.write_push('CONST', 1) self.vm_writer.write_arithmetic('neg') elif keyword == 'false' or keyword == 'null': self.vm_writer.write_push('CONST', 0) elif keyword == 'this': self.vm_writer.write_push('POINTER', 0) self.jack_tokenizer.advance() elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol( ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS: command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[ self.jack_tokenizer.symbol()] self.jack_tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(command) elif token_type == 'INT_CONST': self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val()) self.jack_tokenizer.advance() def compile_expression_list(self): expression_count = 0 if self.jack_tokenizer.symbol() != ')': self.compile_expression() expression_count += 1 while self.jack_tokenizer.symbol() == ',': self.jack_tokenizer.advance() self.compile_expression() expression_count += 1 return expression_count
def analyze(src_jack_file): tokenizer = JackTokenizer(src_jack_file) while tokenizer.has_more_tokens(): tokenizer.advance() print(tokenizer.get_current_token())
class CompilationEngine(): def __init__(self, filepath): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.wf.close() def compile(self): self.compile_class() def compile_class(self): self.write_element_start('class') self.compile_keyword([Tokens.CLASS]) self.compile_class_name() self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is_class_var_dec(): self.compile_class_var_dec() while self.next_is_subroutine_dec(): self.compile_subroutine_dec() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('class') def compile_class_var_dec(self): self.write_element_start('classVarDec') self.compile_keyword([Tokens.STATIC, Tokens.FIELD]) self.compile_type() self.compile_var_name() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('classVarDec') def compile_subroutine_dec(self): self.write_element_start('subroutineDec') self.compile_keyword([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) if self.tokenizer.see_next() == Tokens.VOID: self.compile_keyword(Tokens.VOID) else: self.compile_type() self.compile_subroutine_name() self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_parameter_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_subroutine_body() self.write_element_end('subroutineDec') def compile_subroutine_name(self): self.compile_identifier() def compile_class_name(self): self.compile_identifier() def compile_var_name(self): self.compile_identifier() def compile_parameter_list(self): self.write_element_start('parameterList') if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] or isinstance( self.tokenizer.see_next(), Identifier): self.compile_type() self.compile_var_name() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_type() self.compile_var_name() self.write_element_end('parameterList') def compile_subroutine_body(self): self.write_element_start('subroutineBody') self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) while self.next_is(Tokens.VAR): self.compile_var_dec() self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('subroutineBody') def compile_var_dec(self): self.write_element_start('varDec') self.compile_keyword(Tokens.VAR) self.compile_type() self.compile_var_name() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_var_name() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('varDec') def compile_statements(self): self.write_element_start('statements') while self.next_is_statement(): self.compile_statement() self.write_element_end('statements') def compile_statement(self): if self.next_is(Tokens.LET): self.write_element_start('letStatement') self.compile_keyword(Tokens.LET) self.compile_var_name() if self.next_is(Tokens.LEFT_BOX_BRACKET): self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) self.compile_symbol(Tokens.EQUAL) self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('letStatement') elif self.next_is(Tokens.IF): self.write_element_start('ifStatement') self.compile_keyword(Tokens.IF) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) if self.next_is(Tokens.ELSE): self.compile_keyword(Tokens.ELSE) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('ifStatement') elif self.next_is(Tokens.WHILE): self.write_element_start('whileStatement') self.compile_keyword(Tokens.WHILE) self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) self.compile_symbol(Tokens.LEFT_CURLY_BRACKET) self.compile_statements() self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET) self.write_element_end('whileStatement') elif self.next_is(Tokens.DO): self.write_element_start('doStatement') self.compile_keyword(Tokens.DO) self.compile_subroutine_call() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('doStatement') elif self.next_is(Tokens.RETURN): self.write_element_start('returnStatement') self.compile_keyword(Tokens.RETURN) if not self.next_is(Tokens.SEMI_COLON): self.compile_expression() self.compile_symbol(Tokens.SEMI_COLON) self.write_element_end('returnStatement') def compile_subroutine_call(self): if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1): self.compile_subroutine_name() self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) else: self.compile_identifier() self.compile_symbol(Tokens.DOT) self.compile_subroutine_name() self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression_list() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) def compile_expression_list(self): self.write_element_start('expressionList') if not self.next_is(Tokens.RIGHT_ROUND_BRACKET): self.compile_expression() while self.next_is(Tokens.COMMA): self.compile_symbol(Tokens.COMMA) self.compile_expression() self.write_element_end('expressionList') def compile_expression(self): self.write_element_start('expression') self.compile_term() while self.next_is([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL]): self.compile_symbol([ Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV, Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN, Tokens.EQUAL]) self.compile_term() self.write_element_end('expression') def compile_term(self): self.write_element_start('term') if self.next_type_is(TokenType.INT_CONST): self.compile_integer_constant() elif self.next_type_is(TokenType.STRING_CONST): self.compile_string_constant() elif self.next_is([Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE]): self.compile_keyword([Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE]) elif self.next_type_is(TokenType.IDENTIFIER): if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1): self.compile_var_name() self.compile_symbol(Tokens.LEFT_BOX_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_BOX_BRACKET) elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1): self.compile_subroutine_call() else: self.compile_var_name() elif self.next_is(Tokens.LEFT_ROUND_BRACKET): self.compile_symbol(Tokens.LEFT_ROUND_BRACKET) self.compile_expression() self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET) elif self.next_is([Tokens.TILDE, Tokens.MINUS]): self.compile_symbol([Tokens.TILDE, Tokens.MINUS]) self.compile_term() else: self.raise_syntax_error('') self.write_element_end('term') def next_type_is(self, token_type): return self.tokenizer.see_next().type == token_type def compile_type(self): if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]): self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]) elif isinstance(self.tokenizer.see_next(), Identifier): self.compile_identifier() def next_is_statement(self): return self.next_is([Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]) def next_is(self, tokens, idx=0): if type(tokens) == list: return self.tokenizer.see_next(idx=idx) in tokens else: return self.tokenizer.see_next(idx=idx) == tokens def next_is_class_var_dec(self): return self.next_is([Tokens.STATIC, Tokens.FIELD]) def next_is_subroutine_dec(self): return self.next_is([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD]) def compile_symbol(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('symbol', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_keyword(self, tokens): self.tokenizer.advance() if type(tokens) == list: if self.tokenizer.current_token in tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') else: if self.tokenizer.current_token == tokens: self.write_element('keyword', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_identifier(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, Identifier): self.write_element('identifier', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_integer_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, IntegerConstant): self.write_element('integerConstant', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def compile_string_constant(self): self.tokenizer.advance() if isinstance(self.tokenizer.current_token, StringConstant): self.write_element('stringConstant', self.tokenizer.current_token.token_escaped) else: self.raise_syntax_error('') def write_element(self, elem_name, value): self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name)) def write_element_start(self, elem_name): self.wf.write('<%s>\n' % elem_name) def write_element_end(self, elem_name): self.wf.write('</%s>\n' % elem_name) def raise_syntax_error(self, msg): raise Exception('%s' % msg)