class CompilationEngine: """ Recursive top-down compilation engine for the Jack langauge. Using a Tokenizer object, this module will process different Jack tokens and compile it to VM code using the VMWriter. While at it, invalid Jack syntax will raise SyntaxError. """ def __init__(self, tokenizer): """ Creates a new compilation engine with the given tokenizer. """ if not tokenizer or not tokenizer.filename: raise TypeError('Tokenizer not valid.') filename = re.sub('.jack$', '.vm', tokenizer.filename) self.tokenizer = tokenizer self.vm_writer = VMWriter(filename) self.symbol_table = SymbolTable(filename) self.classname = self.get_classname(filename) self.tokenizer.seperate_all() # Different keywords and operators partition to digest # the structure of program. self.class_var_dec = ['static', 'field'] self.subroutines = ['constructor', 'function', 'method'] self.statements = ['let', 'do', 'if', 'while', 'return'] self.ops = ['+', '-', '*', '/', '&', '|', '<', '>', '='] self.unary_ops = ['~', '-'] # Determines the current subroutine in use. self.current_fn_type = None self.current_fn_name = None self.if_idx = 0 self.while_idx = 0 self.verbal_arithemtic = { '>': 'GT', '<': 'LT', '=': 'EQ', '|': 'OR', '-': 'SUB', '+': 'ADD', '&': 'AND' } self.verbal_unary = {'~': 'NOT', '-': 'NEG'} def compile_class(self): """ Compiles a complete class. """ if not self.tokenizer.has_more_tokens(): raise SyntaxError('No tokens available to compile.') # Advance to the first token. self.tokenizer.advance() self.process('class') self.get_token() # class name self.process('{') # Reached a variable declaration or a subroutine, # there might be more than one. while self.tokenizer.current_token in self.class_var_dec: self.compile_class_var_dec() # Keep on writing subroutines until end of class. while self.tokenizer.current_token in self.subroutines: self.compile_subroutine_dec() # Validates the closing bracket of a class. self.process('}') self.vm_writer.close() def compile_class_var_dec(self): """ Compiles a static variable declaration, or a field declaration. """ kind = self.get_token() type = self.get_token() # Iterate tokens until reaching a command break (';'). while self.tokenizer.current_token != ';': name = self.get_token() self.symbol_table.define(name, type, kind) if self.tokenizer.current_token == ',': self.process() self.process(';') def compile_subroutine_dec(self): """ Compiles a complete method, function, or constructor. """ self.current_fn_type = self.get_token( ) # static function, method or constructor. self.current_fn_return = self.get_token() # void or type. self.current_fn_name = self.get_token() # name of the subroutine. # Reset symbol table for current scope. self.symbol_table.start_subroutine() if self.current_fn_type == 'method': # The type of 'this' is the class name (for exmaple, 'Point'). self.symbol_table.define('this', self.classname, 'arg') # Parameters list, e.g, (int Ax, int Ay, int Asize) self.process('(') self.compile_parameter_list() self.process(')') # Subroutine body self.compile_subroutine_body() def compile_parameter_list(self): """ Compiles a (possibly empty) parameter list. """ while self.tokenizer.current_token != ')': type = self.get_token() name = self.get_token() self.symbol_table.define(name, type, 'arg') if self.tokenizer.current_token == ',': self.process() def compile_subroutine_body(self): """ Compiles a subroutine's body. """ self.process('{') # Before proceeding to the routine's statements, # check if there are any variable declarations. while self.tokenizer.current_token not in self.statements: self.compile_var_dec() # Ouput the subroutine's declaration VM code. subroutine_name = '{}.{}'.format(self.classname, self.current_fn_name) nlocals = self.symbol_table.var_count('var') self.vm_writer.write_function(subroutine_name, nlocals) # Constructors require allocating memory to object fields. if self.current_fn_type == 'constructor': nargs = self.symbol_table.var_count('field') self.vm_writer.write_push('constant', nargs) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('pointer', 0) # THIS = argument 0 for class methods. if self.current_fn_type == 'method': self.vm_writer.write_push('argument', 0) self.vm_writer.write_pop('pointer', 0) # The subroutine body contains statements. For example, # let x = Ax; let statement # do draw(); do statement # return x; return statement self.compile_statements() self.process('}') def compile_var_dec(self): """ Compiles a var declaration. """ kind = self.get_token() type = self.get_token() while self.tokenizer.current_token != ';': name = self.get_token() self.symbol_table.define(name, type, kind) if self.tokenizer.current_token == ',': self.process(',') self.process(';') def compile_statements(self): """ Compiles a sequence of statements. """ # Write statements until ending closing bracket of parent subroutine. while self.tokenizer.current_token != '}': # Explicitly validate statement statement = self.get_token() if statement not in self.statements: s = ', '.join(self.statements) raise SyntaxError('Statement should start with one of ' + s) # Compile full statement. method = getattr(self, 'compile_' + statement) method() def compile_let(self): """ Compiles a let statement. """ if self.tokenizer.current_type != 'IDENTIFIER': raise SyntaxError('Let statement must proceed with an identifier.') identifier = self.get_token() index = self.get_index(identifier) segment = self.get_kind(identifier) # Placement might be an array entring. if self.tokenizer.current_token == '[': self.compile_array_entry() self.vm_writer.write_push(segment, index) self.vm_writer.write_arithmetic('ADD') self.vm_writer.write_pop('TEMP', 0) self.process('=') self.compile_expression() self.vm_writer.write_push('TEMP', 0) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_pop('THAT', 0) else: # Regular assignment. self.process('=') self.compile_expression() self.vm_writer.write_pop(segment, index) self.process(';') def compile_do(self): """ Compiles a do statement. """ self.compile_subroutine_invoke() self.vm_writer.write_pop('TEMP', 0) self.process(';') # end of do statement. def compile_subroutine_invoke(self): """ Compiles a subroutine invokation. """ identifier = self.get_token() args_count = 0 # Either a static (outer) class funciton or an instance function call. if self.tokenizer.current_token == '.': self.process('.') subroutine_name = self.get_token() inst_type = self.symbol_table.type_of(identifier) if inst_type: # It's an instance. inst_kind = self.get_kind(identifier) inst_indx = self.get_index(identifier) self.vm_writer.write_push(inst_kind, inst_indx) fn_name = '{}.{}'.format(inst_type, subroutine_name) args_count += 1 # Pass 'this' as an argument. else: # Static function of a class. fn_name = '{}.{}'.format(identifier, subroutine_name) else: # Local method call. fn_name = '{}.{}'.format(self.classname, identifier) args_count += 1 # Pass 'this' as an argument. self.vm_writer.write_push('POINTER', 0) self.process('(') args_count += self.compile_expression_list() self.process(')') self.vm_writer.write_call(fn_name, args_count) def compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. """ self.process('(') self.compile_expression() # E.g., x > 2 self.vm_writer.write_arithmetic('NOT') self.process(')') # End of if condition statement. # if statement body self.process('{') idx = self.if_idx self.if_idx += 1 label_false = '{}.if_false.{}'.format(self.current_fn_name, idx) label_proceed = '{}.{}'.format(self.current_fn_name, idx) self.vm_writer.write_if(label_false) self.compile_statements() self.vm_writer.write_goto(label_proceed) self.process('}') # Lables statements. self.vm_writer.write_label(label_false) if self.tokenizer.current_token == 'else': # We have a proceeding else. self.process('else') self.process('{') self.compile_statements() self.process('}') self.vm_writer.write_label(label_proceed) def compile_while(self): """ Compiles a while statement. """ self.process('(') fn_name = self.current_fn_name idx = self.while_idx self.while_idx += 1 while_start_label = '{}.while_start.{}'.format(fn_name, idx) while_end_label = '{}.while_end.{}'.format(fn_name, idx) self.vm_writer.write_label(while_start_label) self.compile_expression() self.vm_writer.write_arithmetic('NOT') self.process(')') # while's body. self.process('{') self.vm_writer.write_if(while_end_label) self.compile_statements() self.vm_writer.write_goto(while_start_label) self.vm_writer.write_label(while_end_label) self.process('}') # We're done def compile_return(self): """ Compiles a return statement. """ if self.tokenizer.current_token != ';': self.compile_expression() else: # Return VOID. self.vm_writer.write_push('CONSTANT', 0) self.vm_writer.write_return() self.process(';') def compile_expression(self): """ Compiles an expression. """ self.compile_term() while self.tokenizer.current_token in self.ops: op = self.get_token() self.compile_term() # Push is done by compile_term # Explicitly use Math.multiply or Math.divide. if op == '*': self.vm_writer.write_call('Math.multiply', 2) elif op == '/': self.vm_writer.write_call('Math.divide', 2) else: name = self.verbal_arithemtic.get(op) self.vm_writer.write_arithmetic(name) def compile_term(self): """ Compiles a term. If the current token is an identifier, the routine must resolve it into a variable, an array entry, or a subroutine call. A single lookahead token, which may be [, (, or ., suffices to distinguish between the possibilities. Any other token is not part of this term and should not be advanced over. """ current_token = self.tokenizer.current_token token_type = self.get_current_type() if current_token == '(': self.process('(') self.compile_expression() self.process(')') elif self.tokenizer.peek() == '[': arr_identifier = self.get_token() self.compile_array_entry() index = self.get_index(arr_identifier) segment = self.get_kind(arr_identifier) self.vm_writer.write_push(segment, index) self.vm_writer.write_arithmetic('ADD') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) elif current_token in self.unary_ops: unary_op = self.get_token() self.compile_term() name = self.verbal_unary.get(unary_op) self.vm_writer.write_arithmetic(name) elif self.peek() in ['.', '(']: self.compile_subroutine_invoke() elif token_type == 'INT_CONST': self._compile_integer() elif token_type == 'STRING_CONST': self._compile_string() elif token_type == 'KEYWORD': self._compile_keyword() else: self._compile_identifier() def _compile_integer(self): """Compiles the current token as an integer.""" token = self.get_token() self.vm_writer.write_push('CONSTANT', abs(token)) if token < 0: self.vm_writer.write_arithmetic('NEG') def _compile_string(self): """Compiles the current token as a string.""" current_token = self.tokenizer.current_token self.vm_writer.write_push('CONSTANT', len(current_token)) self.vm_writer.write_call('String.new', 1) # String assignments are handled using a series of calls # to String.appendChar(c), when c is the integer representing # unicode code point. for c in current_token: self.vm_writer.write_push('CONSTANT', ord(c)) self.vm_writer.write_call('String.appendChar', 2) self.process() # Finished compiling string. def _compile_keyword(self): """Compiles the current token as a keyword.""" current_token = self.get_token() if current_token == 'this': self.vm_writer.write_push('POINTER', 0) return if current_token == 'true': self.vm_writer.write_push('CONSTANT', 1) self.vm_writer.write_arithmetic('NEG') return # null or false. self.vm_writer.write_push('CONSTANT', 0) def _compile_identifier(self): """Compiles the current token as an identifier.""" current_token = self.get_token() index = self.get_index(current_token) segment = self.get_kind(current_token) self.vm_writer.write_push(segment, index) def get_current_type(self): """Returns the type of the current token.""" return self.tokenizer.current_type def compile_expression_list(self): """ Compiles a (possibly empty) comma- separated list of expressions and returns the number of arguments in this expression list. """ args_count = 0 while self.tokenizer.current_token != ')': args_count += 1 self.compile_expression() if self.tokenizer.current_token == ',': self.process(',') return args_count def process(self, string=None): """ A helper routine that validates the current token, and advances to get the next token. """ t = self.tokenizer.current_token if string and t != string: caller = inspect.stack()[1][3] msg = 'Invalid token rasied from {}. Got {} when expected: {}'.format( caller, string, t) raise SyntaxError(msg) if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def get_token(self): """ Helper method to get the current token and advance to the next one. """ token = self.tokenizer.current_token if self.tokenizer.has_more_tokens(): self.tokenizer.advance() return token def peek(self): """Peeks into the toknes deque.""" return self.tokenizer.peek() def compile_array_entry(self): """ A helper routine to compile an array entry. """ self.process('[') self.compile_expression() self.process(']') def is_int(self, input): try: input = int(input) return True except ValueError: return None def get_kind(self, name): """Returns the kind value of a symbol table value.""" segment = self.symbol_table.kind_of(name) segment = segment.lower() if segment == 'field': return 'this' if segment == 'var': return 'local' if segment == 'arg': return 'argument' return segment def get_index(self, name): """Returns the index value of a symbol table value.""" return self.symbol_table.index_of(name) def get_classname(self, filename): """Returns the clean class name.""" return filename.split('/')[-1].split('.')[0] def close(self): """ Closes the vm stream. """ self.vm_writer.close()