Example #1
0
class Engine(object):
    def __init__(self, tokens, filepath):
        # compilation engine init
        self.lex = tokens
        self.symbols = SymbolTable()
        self.vm = VMWriter(filepath)
        self.compile_class()
        self.vm.closeout()

    # Routines to advance the token
    def _require(self, tok, val=None):
        lextok, lexval = self._advance()
        if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval:
            raise Exception(self._require_failed_msg(tok, val))
        else:
            return lexval

    def _require_failed_msg(self, tok, val):
        if val is None:
            val = token_list[tok]
        return 'Expected: {0}, {1} \ntoken is: {2}'.format(tok, val, self.lex.tokens)

    def _advance(self):
        return self.lex.advance()

    def vm_function_name(self):
        return self._cur_class + '.' + self._cur_subroutine

    def vm_push_variable(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        self.vm.write_push(segments[kind], index)

    def vm_pop_variable(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        self.vm.write_pop(segments[kind], index)

    def load_this_ptr(self, kwd):
        if kwd == KW_METHOD:
            self.vm.push_arg(0)
            self.vm.pop_this_ptr()  # set up 'this' pointer to point to new object
        elif kwd == KW_CONSTRUCTOR:
            self.vm.push_const(self.symbols.var_count(SK_FIELD))  # object size
            self.vm.write_call('Memory.alloc', 1)
            self.vm.pop_this_ptr()  # set up 'this' pointer to point to new object

    def write_func_decl(self, kwd):
        self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR))
        self.load_this_ptr(kwd)

    def write_string_const_init(self, val):
        self.vm.push_const(len(val))
        self.vm.write_call('String.new', 1)         # String.new(len(str))
        for c in val:
            self.vm.push_const(ord(c))
            self.vm.write_call('String.appendChar', 2)  # String.appendChar(nextchar)

    label_num = 0

    def new_label(self):
        self.label_num += 1
        return 'label' + str(self.label_num)

    # ------------- verify part ----------------

    def _is_token(self, tok, val=None):
        lextok, lexval = self.lex.peek()
        return val == None and lextok == tok or (lextok, lexval) == (tok, val)

    def _is_keyword(self, *keywords):
        lextok, lexval = self.lex.peek()
        return lextok == T_KEYWORD and lexval in keywords

    def _is_sym(self, symbols):
        lextok, lexval = self.lex.peek()
        return lextok == T_SYM and lexval in symbols

    # Variable declarations
    def _is_class_var_dec(self):
        return self._is_keyword(KW_STATIC, KW_FIELD)

    def _is_type(self):
        return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN)

    # Subroutine declarations
    def _is_subroutine(self):
        return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _is_var_dec(self):
        return self._is_keyword(KW_VAR)

    def _is_let(self):
        return self._is_keyword(KW_LET)

    def _is_if(self):
        return self._is_keyword(KW_IF)

    def _is_while(self):
        return self._is_keyword(KW_WHILE)

    def _is_do(self):
        return self._is_keyword(KW_DO)

    def _is_return(self):
        return self._is_keyword(KW_RETURN)

    def _is_statement(self):
        return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return()

    def _is_const(self):
        return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant()

    def _is_keyword_constant(self):
        return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    def _is_op(self):
        return self._is_sym('+-*/&|<>=')

    def _is_unary_op(self):
        return self._is_sym('-~')

    def _is_var_name(self):
        return self._is_token(T_ID)

    def _is_builtin_type(self, type):
        return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID]

    def _is_term(self):
        return self._is_const() or self._is_var_name() or self._is_sym('(') or self._is_unary_op()

    # --------------- compile part -----------------
    # Parser and compile Jack code
    # class: 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        self._require(T_KEYWORD, KW_CLASS)
        self.compile_class_name()
        self._require(T_SYM, '{')
        while self._is_class_var_dec():
            self.compile_class_var_dec()
        while self._is_subroutine():
            self.compile_subroutine()
        self._require(T_SYM, '}')

    # className: identifier
    def compile_class_name(self):
        self._cur_class = self.compile_var_name()  # Class names don't have to go into the symbol table

    # type varName (',' varName)* ';'
    def _compile_dec(self, kind):
        type = self.compile_type()
        name = self.compile_var_name()
        self.symbols.define(name, type, kind)
        while self._is_sym(','):
            self._advance()
            name = self.compile_var_name()
            self.symbols.define(name, type, kind)
        self._require(T_SYM, ';')

    def compile_type(self):
        """
        type: 'int' | 'char' | 'boolean' | className
        """
        if self._is_type():
            return self._advance()[1]
        else:
            raise ValueError(self._require_failed_msg(*self.lex.peek()))

    # classVarDec: {'static'|'field'} type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        tok, kwd = self._advance()  # static | field
        self._compile_dec(kwd_to_kind[kwd])

    # varName: identifier
    def compile_var_name(self):
        return self._require(T_ID)

    # subroutineDec: ('constructor'|'function'|'method') ('void'|type)
    #                subroutineName '(' parameterList ')' subroutineBody
    def compile_subroutine(self):
        tok, kwd = self._advance()
        type = self.compile_void_or_type()
        self.compile_subroutine_name()
        self.symbols.start_subroutine()
        if kwd == KW_METHOD:
            self.symbols.define('this', self._cur_class, SK_ARG)
        self._require(T_SYM, '(')
        self.compile_parameter_list()
        self._require(T_SYM, ')')
        self.compile_subroutine_body(kwd)

    # 'void' | type
    def compile_void_or_type(self):
        if self._is_keyword(KW_VOID):
            return self._advance()[1]
        else:
            return self.compile_type()

    # subroutineName: identifier
    def compile_subroutine_name(self):
        self._cur_subroutine = self.compile_var_name()  # subroutine names don't have to go in the symbol table

    # parameterList: (parameter (',' parameter)*)?
    def compile_parameter_list(self):
        if self._is_type():
            self.compile_parameter()
            while self._is_sym(','):
                self._advance()
                self.compile_parameter()

    # parameter: type varName
    def compile_parameter(self):
        if self._is_type():
            type = self.compile_type()
            name = self.compile_var_name()
            self.symbols.define(name, type, SK_ARG)

    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine_body(self, kwd):
        self._require(T_SYM, '{')
        while self._is_var_dec():
            self.compile_var_dec()
        self.write_func_decl(kwd)
        self.compile_statements()

    # varDec: 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        self._require(T_KEYWORD, KW_VAR)
        return self._compile_dec(SK_VAR)

    # statement: statement*
    def compile_statements(self):
        while self._is_statement():
            self._compile_statement()

    # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def _compile_statement(self):
        if self._is_let():
            self.compile_let()
        elif self._is_if():
            self.compile_if()
        elif self._is_while():
            self.compile_while()
        elif self._is_do():
            self.compile_do()
        elif self._is_return():
            self.compile_return()

    # letStatement: 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        self._require(T_KEYWORD, KW_LET)
        name = self.compile_var_name()
        subscript = self._is_sym('[')
        if subscript:
            self.compile_base_plus_index(name)  # calculate base+index
        self._require(T_SYM, '=')
        self.compile_expression()  # calculate expression to assign
        self._require(T_SYM, ';')
        if subscript:
            self.pop_array_element()  # *(base+index) = expr
        else:
            self.vm_pop_variable(name)  # pop value directly into variable

    def pop_array_element(self):
        self.vm.pop_temp(TEMP_ARRAY)        # Pop expr value to temp register
        self.vm.pop_that_ptr()              # Pop base+index into 'that' register
        self.vm.push_temp(TEMP_ARRAY)       # Push expr back onto stack
        self.vm.pop_that()                  # Pop value into *(base+index)

    # ('[' expression ']')?
    def compile_base_plus_index(self, name):
        self.vm_push_variable(name)  # push array ptr onto stack
        self._advance()
        self.compile_expression()  # push index onto stack
        self._require(T_SYM, ']')
        self.vm.write_vm_cmd('add')  # base+index - leave on the stack for later

    def compile_expression(self):
        self.compile_term()
        # Doesn't handle normal order of operations - just left to right for now
        while self._is_op():
            op = self._advance()
            self.compile_term()
            self.vm.write_vm_cmd(vm_cmds[op[1]])  # op

    # term: integerConstant | stringConstant | keywordConstant | varName
    #     | varName '[' expression ']' | subroutineCall | '(' expression ')'
    #     | unaryOp term
    def compile_term(self):
        if self._is_const():
            self.compile_const()
        elif self._is_sym('('):
            self._advance()
            self.compile_expression()  # VM code to evaluate expression
            self._require(T_SYM, ')')
        elif self._is_unary_op():
            tok, op = self._advance()
            self.compile_term()
            self.vm.write_vm_cmd(vm_unary_cmds[op])  # op
        elif self._is_var_name():
            tok, name = self._advance()
            if self._is_sym('['):
                self.compile_array_subscript(name)  # VM code for array subscript
            elif self._is_sym('(.'):
                self.compile_subroutine_call(name)  # VM code for subroutine call
            else:
                self.vm_push_variable(name)  # push variable on stack

    # integerConstant | stringConstant | keywordConstant
    def compile_const(self):
        tok, val = self._advance()
        if tok == T_NUM:
            self.vm.push_const(val)                 # push constant val
        elif tok == T_STR:
            self.write_string_const_init(val)       # initialize string & push str addr
        elif tok == T_KEYWORD:
            self.compile_kwd_const(val)             # push TRUE, FALSE, NULL etc.

    # '[' expression ']'
    def compile_array_subscript(self, name):
        self.vm_push_variable(name)     # push array ptr onto stack
        self._require(T_SYM, '[')
        self.compile_expression()       # push index onto stack
        self._require(T_SYM, ']')
        self.vm.write_vm_cmd('add')     # base+index
        self.vm.pop_that_ptr()          # pop into 'that' ptr
        self.vm.push_that()             # push *(base+index) onto stack

    # subroutineCall: subroutineName '(' expressionList ')'
    #               | (className | varName) '.' subroutineName '(' expressionList ')'
    def compile_subroutine_call(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        if self._is_sym('.'):
            num_args, name = self.compile_dotted_subroutine_call(name, type)
        else:
            num_args = 1
            self.vm.push_this_ptr()
            name = self._cur_class+'.'+name
        self._require(T_SYM, '(')
        num_args += self.compile_expr_list() # VM code to push arguments
        self._require(T_SYM, ')')
        self.vm.write_call(name, num_args)  # call name num_args

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kwd_const(self, kwd):
        if kwd == KW_THIS:
            self.vm.push_this_ptr()
        elif kwd == KW_TRUE:
            self.vm.push_const(1)
            self.vm.write_vm_cmd('neg')
        else:   # KW_FALSE or KW_NULL
            self.vm.push_const(0)

    def compile_dotted_subroutine_call(self, name, type):
        num_args = 0
        obj_name = name
        self._advance()
        name = self.compile_var_name()
        if self._is_builtin_type(type):     # e.g. int.func(123) not allowed
            ValueError('Cannot use "." operator on builtin type')
        elif type == None:                  # Calling using class name
            name = obj_name+'.'+name
        else:                               # Calling using object variable name
            num_args = 1
            self.vm_push_variable(obj_name) # push object ptr onto stack
            name = self.symbols.type_of(obj_name)+'.'+name
        return num_args, name

    # expressionList: (expression (',' expression)*)?
    def compile_expr_list(self):
        num_args = 0
        if self._is_term():
            self.compile_expression()
            num_args = 1
            while self._is_sym(','):
                self._advance()
                self.compile_expression()
                num_args += 1
        return num_args

    # ifStatement: 'if' '(' expression ')' '{' statements '}'
    #              ('else' '{' statements '}')?
    def compile_if(self):
        self._require(T_KEYWORD, KW_IF)
        end_label = self.new_label()
        self._compile_cond_expression_statements(end_label) # VM code for condition and if statements
        if self._is_keyword(KW_ELSE):
            self._advance()
            self._require(T_SYM, '{')
            self.compile_statements()   # VM code for else statements
            self._require(T_SYM, "}")
        self.vm.write_label(end_label)  # label end_label

    # '(' expression ')' '{' statements '}'
    def _compile_cond_expression_statements(self, label):
        self._require(T_SYM, '(')
        self.compile_expression()
        self._require(T_SYM, ')')
        self.vm.write_vm_cmd('not')     # ~(cond)
        notif_label = self.new_label()
        self.vm.write_if(notif_label)   # if-goto notif_label
        self._require(T_SYM, '{')
        self.compile_statements()       # VM code for if statements
        self._require(T_SYM, '}')
        self.vm.write_goto(label)       # goto label
        self.vm.write_label(notif_label)# label notif_label

    # whileStatement: 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        self._require(T_KEYWORD, KW_WHILE)
        top_label = self.new_label()
        self.vm.write_label(top_label)                      # label top_label
        self._compile_cond_expression_statements(top_label) # VM code for condition and while statements

    # do_statement: 'do' subroutineCall ';'
    def compile_do(self):
        self._require(T_KEYWORD, KW_DO)
        name = self._require(T_ID)
        self.compile_subroutine_call(name)  # VM code for subroutine call
        self.vm.pop_temp(TEMP_RETURN)       # Pop return value and discard
        self._require(T_SYM, ';')

    # returnStatement: 'return' expression? ';'
    def compile_return(self):
        self._require(T_KEYWORD, KW_RETURN)
        if not self._is_sym(';'):
            self.compile_expression()   # VM code for return expression if any
        else:
            self.vm.push_const(0)       # push 0 if not returning a value
        self._require(T_SYM, ';')
        self.vm.write_return()          # return