class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] BINARY_OPERATORS_TO_COMMAND = { '+': 'add', '-': 'sub', '=': 'eq', '>': 'gt', '<': 'lt', '&': 'and', '|': 'or' } UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'} TYPE_TO_TAG = { 'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol' } SYMBOLS_TO_XML_CONVENTION = { '<': '<', '>': '>', '&': '&', '"': '"' } def __init__(self, input_file_path, vm_writer: VMWriter): self.jack_tokenizer = JackTokenizer(input_file_path) self.symbol_table = SymbolTable() self.vm_writer = vm_writer if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'CLASS') self.jack_tokenizer.advance() self.jack_tokenizer.advance() if self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.jack_tokenizer.advance() self.vm_writer.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[ self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word( ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: kind = '' if self.jack_tokenizer.key_word() == 'field': kind = 'FIELD' elif self.jack_tokenizer.key_word() == 'static': kind = 'STATIC' self.jack_tokenizer.advance() field_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() != ';': self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), field_type, kind) self.jack_tokenizer.advance() self.jack_tokenizer.advance() def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): self.vm_writer.zero_branching_indexes() while self.jack_tokenizer.key_word( ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.symbol_table.start_subroutine() constructor = True if self.jack_tokenizer.key_word( ) == 'constructor' else False method = False if self.jack_tokenizer.key_word() == 'method': method = True self.symbol_table.define('this', self.symbol_table.get_class_name(), 'ARG') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE', 'SUBROUTINE') name = self.symbol_table.get_class_name( ) + '.' + self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_parameter_list() self.jack_tokenizer.advance() self.jack_tokenizer.advance() var_num = 0 while self.jack_tokenizer.key_word() == 'var': var_num += self.compile_var_dec() self.vm_writer.write_function(name, var_num) if method: self.vm_writer.write_push('ARG', 0) self.vm_writer.write_pop('POINTER', 0) elif constructor: field_count = self.symbol_table.var_count('FIELD') self.vm_writer.write_push('CONST', field_count) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('POINTER', 0) self.compile_statements() self.jack_tokenizer.advance() def compile_parameter_list(self): if self.jack_tokenizer.symbol() != ')': parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": self.jack_tokenizer.advance() parameter_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), parameter_type, 'ARG') self.jack_tokenizer.advance() def get_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': parameter_type = self.jack_tokenizer.key_word() elif self.jack_tokenizer.token_type() == 'IDENTIFIER': parameter_type = self.jack_tokenizer.identifier() return parameter_type def compile_var_dec(self): var_num = 1 self.jack_tokenizer.advance() var_type = self.get_type() self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() while self.jack_tokenizer.symbol() == ",": var_num += 1 self.jack_tokenizer.advance() self.symbol_table.define(self.jack_tokenizer.identifier(), var_type, 'VAR') self.jack_tokenizer.advance() self.jack_tokenizer.advance() return var_num def compile_statements(self): while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() def compile_do(self): self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() self.compile_subroutine_call(name) # must dispose of void function return value self.vm_writer.write_pop('TEMP', 0) self.jack_tokenizer.advance() def compile_subroutine_call(self, prefix_call=''): if self.jack_tokenizer.symbol() == '(': subroutine = False # If not in symbol table - then subroutine if not self.symbol_table.kind_of( prefix_call) or self.symbol_table.kind_of( prefix_call) == 'SUBROUTINE': subroutine = True self.jack_tokenizer.advance() args_count = 0 if subroutine: self.vm_writer.write_push('POINTER', 0) args_count += 1 args_count += self.compile_expression_list() if subroutine: self.vm_writer.write_call( self.symbol_table.get_class_name() + '.' + prefix_call, args_count) else: self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol() == '.': variable = False self.jack_tokenizer.advance() if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']: variable = True variable_name = prefix_call prefix_call = self.symbol_table.type_of(prefix_call) prefix_call += '.{0}'.format(self.jack_tokenizer.identifier()) self.jack_tokenizer.advance() self.jack_tokenizer.advance() args_count = 0 if variable: self.vm_writer.write_push( self.symbol_table.kind_of(variable_name), self.symbol_table.index_of(variable_name)) args_count += 1 args_count += self.compile_expression_list() self.vm_writer.write_call(prefix_call, args_count) self.jack_tokenizer.advance() def compile_let(self): self.jack_tokenizer.advance() var_name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic("add") self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop('TEMP', 0) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('TEMP', 0) self.vm_writer.write_pop('THAT', 0) else: self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_pop(self.symbol_table.kind_of(var_name), self.symbol_table.index_of(var_name)) self.jack_tokenizer.advance() def compile_while(self): while_idx = self.vm_writer.get_next_label_index('while') if_label = 'WHILE_IF_{0}'.format(while_idx) end_label = 'WHILE_END_{0}'.format(while_idx) self.vm_writer.write_label(if_label) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(end_label) self.compile_statements() self.vm_writer.write_goto(if_label) self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_return(self): self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() != ';': self.compile_expression() else: self.vm_writer.write_push('CONST', 0) self.vm_writer.write_return() self.jack_tokenizer.advance() def compile_if(self): if_idx = self.vm_writer.get_next_label_index('if') else_label = 'IF_ELSE_{0}'.format(if_idx) end_label = 'IF_END_{0}'.format(if_idx) self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_expression() self.vm_writer.write_arithmetic('not') self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.vm_writer.write_if(else_label) self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_goto(end_label) self.vm_writer.write_label(else_label) if self.jack_tokenizer.key_word() == 'else': self.jack_tokenizer.advance() self.jack_tokenizer.advance() self.compile_statements() self.jack_tokenizer.advance() self.vm_writer.write_label(end_label) def compile_expression(self): self.compile_term() while self.jack_tokenizer.symbol( ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() self.jack_tokenizer.advance() self.compile_term() if symbol in self.BINARY_OPERATORS_TO_COMMAND: self.vm_writer.write_arithmetic( self.BINARY_OPERATORS_TO_COMMAND[symbol]) elif symbol == '*': self.vm_writer.write_call('Math.multiply', 2) elif symbol == '/': self.vm_writer.write_call('Math.divide', 2) def compile_term(self): token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': name = self.jack_tokenizer.identifier() self.jack_tokenizer.advance() if self.jack_tokenizer.symbol( ) == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call(name) elif self.jack_tokenizer.symbol() == '[': self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() self.vm_writer.write_arithmetic('add') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) else: kind = self.symbol_table.kind_of(name) idx = self.symbol_table.index_of(name) self.vm_writer.write_push(kind, idx) elif token_type == 'STRING_CONST': string_const = self.jack_tokenizer.string_val() self.vm_writer.write_push("CONST", len(string_const)) self.vm_writer.write_call("String.new", 1) for char in string_const: self.vm_writer.write_push('CONST', ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.jack_tokenizer.advance() elif token_type == 'KEYWORD': keyword = self.jack_tokenizer.key_word() if keyword == 'true': self.vm_writer.write_push('CONST', 1) self.vm_writer.write_arithmetic('neg') elif keyword == 'false' or keyword == 'null': self.vm_writer.write_push('CONST', 0) elif keyword == 'this': self.vm_writer.write_push('POINTER', 0) self.jack_tokenizer.advance() elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.jack_tokenizer.advance() self.compile_expression() self.jack_tokenizer.advance() elif self.jack_tokenizer.symbol( ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS: command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[ self.jack_tokenizer.symbol()] self.jack_tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(command) elif token_type == 'INT_CONST': self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val()) self.jack_tokenizer.advance() def compile_expression_list(self): expression_count = 0 if self.jack_tokenizer.symbol() != ')': self.compile_expression() expression_count += 1 while self.jack_tokenizer.symbol() == ',': self.jack_tokenizer.advance() self.compile_expression() expression_count += 1 return expression_count
class CompilationEnginge(object): """ lalala """ def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.out = open(output_file, 'w') self.token = None self.class_name = None ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) ####################### def analyze(self): self.token = self.tokenizer.advance() self.compile_class() self.close() print('CLASS TABLE:') print(self.symbol_table.class_table) def close(self): if self.out: self.out.close() self.out = None def advance(self): self.token = self.tokenizer.advance() def write_to_out(self): pass def format_line(self, defined_or_used=''): token_type = self.tokenizer.token_type() running_index = '' if token_type == self.tokenizer.keyword_token: meat = self.tokenizer.keyword() defined_or_used='' elif token_type == self.tokenizer.symbol_token: meat = self.tokenizer.symbol() defined_or_used='' elif token_type == self.tokenizer.identifier_token: meat = self.tokenizer.identifier() ####################### ### PROJECT 11 CODE ### ####################### # Extending compilaiton engine to output <var/argument/static/field...> instead of <indentifier> name = self.tokenizer.token if self.symbol_table.kind_of(name): token_type = self.symbol_table.kind_of(name) running_index = str(self.symbol_table.index_of(name)) elif name[0].islower(): token_type = 'subroutine' else: token_type = 'class' ####################### elif token_type == self.tokenizer.int_const: meat = self.tokenizer.int_val() defined_or_used='' elif token_type == self.tokenizer.string_const: meat = self.tokenizer.string_val() defined_or_used='' else: raise ValueError('Something went wrong with token: {}'.format(self.token)) if defined_or_used != '': defined_or_used += ' ' if running_index != '': running_index = ' ' + running_index formated_line = '<{2}{0}{3}> {1} </{2}{0}{3}>\n'.format(token_type, meat, defined_or_used, running_index) return formated_line ######################### ### PROGARM STRUCTURE ### ######################### def compile_class(self): """ #################################################################### ### class: 'class' className '{' classVarDec* subroutineDec* '}' ### #################################################################### """ self.out.write('<class>\n') # 'class' keyword_line = self.format_line() self.out.write(keyword_line) # className self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.class_name = self.tokenizer.token ####################### identifier_line = self.format_line('defined') self.out.write(identifier_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) ### classVarDec* subroutineDec* ### self.advance() # classVarDec* while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_field, self.tokenizer.key_static]: self.compile_class_var_dec() # subroutineDec* while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_function, self.tokenizer.key_method, self.tokenizer.key_constructor]: self.compile_subroutine() # '}' if self.tokenizer.token_type() == self.tokenizer.symbol_token: # Class compilation is done symbol_line = self.format_line() self.out.write(symbol_line) else: raise ValueError('Something went wrong') # Closing with </class> self.out.write('</class>\n') is_sucessfull = not(self.advance()) if is_sucessfull: print('Compilation enginge succesfully finished') else: print('Something went wrong!') def compile_class_var_dec(self): """ ####################################################################### ### classVarDec: ('static'|'field') type varName (',' varName)* ';' ### ####################################################################### """ self.out.write('<classVarDec>\n') ####################### ### PROJECT 11 CODE ### ####################### # Extract field or static # field_or_static = re.match('<[a-z]*>', field_or_static_line)[0][1:-1] field_or_static = self.tokenizer.token ####################### # ('static' | 'field') field_or_static_line = self.format_line() self.out.write(field_or_static_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### # Extract token type type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static) ####################### varname_line = self.format_line('defined') self.out.write(varname_line) # (',' varName)* self.advance() symbol = self.tokenizer.symbol() while symbol == ',': colon_line = self.format_line() self.out.write(colon_line) self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static) ####################### varname_line = self.format_line('defined') self.out.write(varname_line) self.advance() symbol = self.tokenizer.symbol() # symbol == ';' semicolon_line = self.format_line() self.out.write(semicolon_line) self.advance() self.out.write('</classVarDec>\n') def compile_subroutine(self): """ ########################################################################### ### subroutineDec: ('constructor'|'function'|'method') ### ### ('void' | type) subroutineName '(' parameterList ')' ### ### subroutineBody ### ########################################################################### """ ####################### ### PROJECT 11 CODE ### ####################### print() print('SUBROUTINE TABLE:') print(self.symbol_table.subroutine_table) print() self.symbol_table.start_subroutine() self.symbol_table.define(name='this', type_=self.class_name, kind='argument') ####################### self.out.write('<subroutineDec>\n') # ('constructor'|'function'|'method') constructor_function_method_line = self.format_line() self.out.write(constructor_function_method_line) # ('void' | type) self.advance() void_or_type_line = self.format_line() self.out.write(void_or_type_line) # subroutineName self.advance() subroutine_name_line = self.format_line('defined') self.out.write(subroutine_name_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # parameterList self.advance() self.compile_parameter_list() # ')' symbol_line = self.format_line() self.out.write(symbol_line) ################################################## ### subroutineBody: '{' varDec* statements '}' ### ################################################## self.out.write('<subroutineBody>\n') # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) ############### ### varDec* ### ############### self.advance() while self.tokenizer.token == self.tokenizer.key_var: self.compile_var_dec() ################## ### statements ### ################## self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</subroutineBody>\n') self.out.write('</subroutineDec>\n') ####################### ### PROJECT 11 CODE ### ####################### print() print('SUBROUTINE TABLE:') print(self.symbol_table.subroutine_table) print() ####################### def compile_parameter_list(self): """ ############################################################ ### parameterList: ((type varName) (',' type varName)*)? ### ############################################################ """ self.out.write('<parameterList>\n') # If token type is symbol then we have empty parameter list # If we have symbol token then it means our parameter list is fully processed if self.tokenizer.token_type() != self.tokenizer.symbol_token: # type ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) # If next token is ',' we have more then one parameter self.advance() while self.tokenizer.token_type() == self.tokenizer.symbol_token and self.tokenizer.symbol() == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument') # We are in new subroutine so add next nested scope # self.symbol_table.start_subroutine() ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) self.advance() self.out.write('</parameterList>\n') def compile_var_dec(self): """ ##################################################### ### varDec: 'var' type varName (',' varName)* ';' ### ##################################################### """ self.out.write('<varDec>\n') # var var_line = self.format_line() self.out.write(var_line) # type self.advance() ####################### ### PROJECT 11 CODE ### ####################### type_ = self.tokenizer.token ####################### type_line = self.format_line() self.out.write(type_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) # (',' varName)* self.advance() while self.tokenizer.symbol() == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # varName self.advance() ####################### ### PROJECT 11 CODE ### ####################### self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local') ####################### var_name_line = self.format_line('defined') self.out.write(var_name_line) self.advance() # ';' semicolon_line = self.format_line() self.out.write(semicolon_line) self.advance() self.out.write('</varDec>\n') ################## ### STATEMENTS ### ################## def compile_statements(self): """ ############################## ### statements: statement* ### ############################## """ self.out.write('<statements>\n') while self.tokenizer.token_type() != self.tokenizer.symbol_token: keyword = self.tokenizer.keyword() # letStatement if keyword == self.tokenizer.key_let: self.compile_let() # ifStatement elif keyword == self.tokenizer.key_if: self.compile_if() # whileStatement elif keyword == self.tokenizer.key_while: self.compile_while() # doStatement elif keyword == self.tokenizer.key_do: self.compile_do() # returnStatement elif keyword == self.tokenizer.key_return: self.compile_return() else: raise ValueError('Wrong statement: {}'.format(keyword)) self.out.write('</statements>\n') def compile_do(self): """ ############################################ ### doStatement: 'do' subroutineCall ';' ### ############################################ """ self.out.write('<doStatement>\n') # 'do' do_line = self.format_line() self.out.write(do_line) # subroutineCall self.advance() self.compile_subroutine_call() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</doStatement>\n') def compile_let(self): """ ############################################################################ ### letStatement: 'let' varName ('[' expression ']')? '=' expression ';' ### ############################################################################ """ self.out.write('<letStatement>\n') # let let_line = self.format_line() self.out.write(let_line) # varName self.advance() var_name_line = self.format_line('used') self.out.write(var_name_line) # Check if '[' or '=' self.advance() if self.tokenizer.token == '[': # '[' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ']' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # '=' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</letStatement>\n') def compile_while(self): """ ##################################################################### ### whileStatement: 'while' '(' expression ')' '{' statements '}' ### ##################################################################### """ self.out.write('<whileStatement>\n') # 'while' while_line = self.format_line() self.out.write(while_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</whileStatement>\n') def compile_return(self): """ ################################################ ### ReturnStatement 'return' expression? ';' ### ################################################ """ self.out.write('<returnStatement>\n') # 'return' return_line = self.format_line() self.out.write(return_line) # Ceck if expression self.advance() if self.tokenizer.token != ';': # 'expression' self.compile_expression() # ';' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</returnStatement>\n') def compile_if(self): """ ############################################################### ### ifStatement: 'if' '(' expression ')' '{' statements '}' ### ### ('else' '{' statements '}')? ### ############################################################### """ self.out.write('<ifStatement>\n') # 'if' if_line = self.format_line() self.out.write(if_line) # '(' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) # Check if there is 'else' part of ifStatement self.advance() if self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() == 'else': # 'else' else_line = self.format_line() self.out.write(else_line) # '{' self.advance() symbol_line = self.format_line() self.out.write(symbol_line) # statements self.advance() self.compile_statements() # '}' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() self.out.write('</ifStatement>\n') ################### ### EXPRESSIONS ### ################### def compile_subroutine_call(self, skip_subroutine_name=False): """ ############################################################################ ### subroutineCall: subroutineName '(' expressionList ')' | (className | ### ### varName) '.' subroutineName '(' expressionList ')' ### ############################################################################ """ if not skip_subroutine_name: # subroutineName or className or varName subroutine_class_var_name_line = self.format_line('used') self.out.write(subroutine_class_var_name_line) self.advance() # Check '(' or '.' if self.tokenizer.token == '.': # '.' symbol_line = self.format_line() self.out.write(symbol_line) # subroutineName self.advance() subroutine_name_line = self.format_line('used') self.out.write(subroutine_name_line) self.advance() # '(' symbol_line = self.format_line() self.out.write(symbol_line) # expressionList self.advance() self.compile_expression_list() # ')' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() def compile_expression(self): """ ################################### ### expression: term (op term)* ### ################################### """ self.out.write('<expression>\n') ops = ['+', '-', '*', '/', '&', '|', '<', '>', '='] # 'term' self.compile_term() # Check if there is (op term)* part while self.tokenizer.token in ops: # op op_line = self.format_line() self.out.write(op_line) # term self.advance() self.compile_term() self.out.write('</expression>\n') def compile_term(self): """ ################################################################ ### integerConstant | stringConstant | keywordConstant | ### ### varName | varName '[' expression ']' | subroutineCall | ### ### '(' expression ')' | unaryOp term ### ################################################################ """ self.out.write('<term>\n') unary_ops = ['-', '~'] ############################################# ### constant, name, expression or unaryOp ### ############################################# # '(' expression ')' if self.tokenizer.token == '(': # '(' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ')' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # unaryOp term elif self.tokenizer.token in unary_ops: # unaryOp unary_op_line = self.format_line() self.out.write(unary_op_line) # term self.advance() self.compile_term() # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall else: # constant or name constant_or_name = self.format_line('used') self.out.write(constant_or_name) # varName '[' expression ']' | subroutineCall or end of compile_term function # Check if expression: '[', subroutineCall: '(' with parameter skip_subroutine_name = True, # otherwise end of compile_term function self.advance() # '[' expression ']' if self.tokenizer.token == '[': # '[' symbol_line = self.format_line() self.out.write(symbol_line) # expression self.advance() self.compile_expression() # ']' symbol_line = self.format_line() self.out.write(symbol_line) self.advance() # subroutineCall with skip_subroutine_name=True elif self.tokenizer.token in ['(', '.']: self.compile_subroutine_call(skip_subroutine_name=True) self.out.write('</term>\n') def compile_expression_list(self): """ ######################################################## ### expressionList: (expression (',' expression)* )? ### ######################################################## """ self.out.write('<expressionList>\n') # Check if token is ')', if so we got empty expression list if self.tokenizer.token != ')': # 'expression' self.compile_expression() # Check if token is ',', if so we got more expressions while self.tokenizer.token == ',': # ',' comma_line = self.format_line() self.out.write(comma_line) # expression self.advance() self.compile_expression() self.out.write('</expressionList>\n')
class CompilationEngine: CLASS_VAR_DEC_KEYWORDS = ['static', 'field'] SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method'] BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATOR_SYMBOLS = ['-', '~'] TYPE_TO_TAG = {'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword', 'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol'} SYMBOLS_TO_XML_CONVENTION = {'<': '<', '>': '>', '&': '&', '"': '"'} def __init__(self, input_file_path, output_file_path): self.output_file = open(output_file_path, 'w') self.jack_tokenizer = JackTokenizer(input_file_path) if self.jack_tokenizer.has_more_tokens(): self.compile_class() def compile_class(self): self.output_file.write('<class>\n') # get first token self.jack_tokenizer.advance() self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) if self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.compile_class_var_dec() if self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.compile_subroutine() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</class>') self.output_file.close() def write_token(self, token_name): type_tag = CompilationEngine.TYPE_TO_TAG[self.jack_tokenizer.token_type()] self.output_file.write('<{0}> {1} </{0}>\n'.format( type_tag, token_name)) self.jack_tokenizer.advance() def compile_class_var_dec(self): while self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS: self.output_file.write('<classVarDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() != ';': self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</classVarDec>\n') def write_type(self): if self.jack_tokenizer.token_type() == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif self.jack_tokenizer.token_type() == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) def compile_subroutine(self): while self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS: self.output_file.write('<subroutineDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_parameter_list() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('<subroutineBody>\n') self.write_token(self.jack_tokenizer.symbol()) while self.jack_tokenizer.key_word() == 'var': self.compile_var_dec() self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</subroutineBody>\n') self.output_file.write('</subroutineDec>\n') def compile_parameter_list(self): self.output_file.write('<parameterList>\n') if self.jack_tokenizer.symbol() != ')': self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() == ",": self.write_token(self.jack_tokenizer.symbol()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) self.output_file.write('</parameterList>\n') def compile_var_dec(self): self.output_file.write('<varDec>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_type() self.write_token(self.jack_tokenizer.identifier()) while self.jack_tokenizer.symbol() == ",": self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</varDec>\n') def compile_statements(self): self.output_file.write('<statements>\n') while self.jack_tokenizer.token_type() == 'KEYWORD': if self.jack_tokenizer.key_word() == 'let': self.compile_let() elif self.jack_tokenizer.key_word() == 'if': self.compile_if() elif self.jack_tokenizer.key_word() == 'while': self.compile_while() elif self.jack_tokenizer.key_word() == 'do': self.compile_do() elif self.jack_tokenizer.key_word() == 'return': self.compile_return() self.output_file.write('</statements>\n') def compile_do(self): self.output_file.write('<doStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) self.compile_subroutine_call() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</doStatement>\n') def compile_subroutine_call(self): if self.jack_tokenizer.symbol() == '(': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression_list() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() == '.': self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression_list() self.write_token(self.jack_tokenizer.symbol()) def compile_let(self): self.output_file.write('<letStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.identifier()) if self.jack_tokenizer.symbol() == '[': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</letStatement>\n') def compile_while(self): self.output_file.write('<whileStatement>\n') self.write_token(self.jack_tokenizer.identifier()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</whileStatement>\n') def compile_return(self): self.output_file.write('<returnStatement>\n') self.write_token(self.jack_tokenizer.key_word()) if self.jack_tokenizer.symbol() != ';': self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</returnStatement>\n') def compile_if(self): self.output_file.write('<ifStatement>\n') self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) if self.jack_tokenizer.key_word() == 'else': self.write_token(self.jack_tokenizer.key_word()) self.write_token(self.jack_tokenizer.symbol()) self.compile_statements() self.write_token(self.jack_tokenizer.symbol()) self.output_file.write('</ifStatement>\n') def compile_expression(self): self.output_file.write('<expression>\n') self.compile_term() while self.jack_tokenizer.symbol() in CompilationEngine.BINARY_OPERATOR_SYMBOLS: symbol = self.jack_tokenizer.symbol() if symbol in CompilationEngine.SYMBOLS_TO_XML_CONVENTION: symbol = CompilationEngine.SYMBOLS_TO_XML_CONVENTION[symbol] self.write_token(symbol) self.compile_term() self.output_file.write('</expression>\n') def compile_term(self): self.output_file.write('<term>\n') token_type = self.jack_tokenizer.token_type() if token_type == 'IDENTIFIER': self.write_token(self.jack_tokenizer.identifier()) if self.jack_tokenizer.symbol() == '[': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() == '(' or self.jack_tokenizer.symbol() == '.': self.compile_subroutine_call() elif token_type == 'STRING_CONST': self.write_token(self.jack_tokenizer.string_val()) elif token_type == 'INT_CONST': self.write_token(self.jack_tokenizer.int_val()) elif token_type == 'KEYWORD': self.write_token(self.jack_tokenizer.key_word()) elif token_type == 'INT_CONST': self.write_token(self.jack_tokenizer.int_val()) elif token_type == 'SYMBOL': if self.jack_tokenizer.symbol() == '(': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.write_token(self.jack_tokenizer.symbol()) elif self.jack_tokenizer.symbol() in CompilationEngine.UNARY_OPERATOR_SYMBOLS: self.write_token(self.jack_tokenizer.symbol()) self.compile_term() self.output_file.write('</term>\n') def compile_expression_list(self): self.output_file.write('<expressionList>\n') if self.jack_tokenizer.symbol() != ')': self.compile_expression() while self.jack_tokenizer.symbol() == ',': self.write_token(self.jack_tokenizer.symbol()) self.compile_expression() self.output_file.write('</expressionList>\n')