Exemple #1
0
class Engine(object):
    def __init__(self, tokens, filepath):
        # compilation engine init
        self.lex = tokens
        self.symbols = SymbolTable()
        self.vm = VMWriter(filepath)
        self.compile_class()
        self.vm.closeout()

    # Routines to advance the token
    def _require(self, tok, val=None):
        lextok, lexval = self._advance()
        if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval:
            raise Exception(self._require_failed_msg(tok, val))
        else:
            return lexval

    def _require_failed_msg(self, tok, val):
        if val is None:
            val = token_list[tok]
        return 'Expected: {0}, {1} \ntoken is: {2}'.format(tok, val, self.lex.tokens)

    def _advance(self):
        return self.lex.advance()

    def vm_function_name(self):
        return self._cur_class + '.' + self._cur_subroutine

    def vm_push_variable(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        self.vm.write_push(segments[kind], index)

    def vm_pop_variable(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        self.vm.write_pop(segments[kind], index)

    def load_this_ptr(self, kwd):
        if kwd == KW_METHOD:
            self.vm.push_arg(0)
            self.vm.pop_this_ptr()  # set up 'this' pointer to point to new object
        elif kwd == KW_CONSTRUCTOR:
            self.vm.push_const(self.symbols.var_count(SK_FIELD))  # object size
            self.vm.write_call('Memory.alloc', 1)
            self.vm.pop_this_ptr()  # set up 'this' pointer to point to new object

    def write_func_decl(self, kwd):
        self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR))
        self.load_this_ptr(kwd)

    def write_string_const_init(self, val):
        self.vm.push_const(len(val))
        self.vm.write_call('String.new', 1)         # String.new(len(str))
        for c in val:
            self.vm.push_const(ord(c))
            self.vm.write_call('String.appendChar', 2)  # String.appendChar(nextchar)

    label_num = 0

    def new_label(self):
        self.label_num += 1
        return 'label' + str(self.label_num)

    # ------------- verify part ----------------

    def _is_token(self, tok, val=None):
        lextok, lexval = self.lex.peek()
        return val == None and lextok == tok or (lextok, lexval) == (tok, val)

    def _is_keyword(self, *keywords):
        lextok, lexval = self.lex.peek()
        return lextok == T_KEYWORD and lexval in keywords

    def _is_sym(self, symbols):
        lextok, lexval = self.lex.peek()
        return lextok == T_SYM and lexval in symbols

    # Variable declarations
    def _is_class_var_dec(self):
        return self._is_keyword(KW_STATIC, KW_FIELD)

    def _is_type(self):
        return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN)

    # Subroutine declarations
    def _is_subroutine(self):
        return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _is_var_dec(self):
        return self._is_keyword(KW_VAR)

    def _is_let(self):
        return self._is_keyword(KW_LET)

    def _is_if(self):
        return self._is_keyword(KW_IF)

    def _is_while(self):
        return self._is_keyword(KW_WHILE)

    def _is_do(self):
        return self._is_keyword(KW_DO)

    def _is_return(self):
        return self._is_keyword(KW_RETURN)

    def _is_statement(self):
        return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return()

    def _is_const(self):
        return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant()

    def _is_keyword_constant(self):
        return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    def _is_op(self):
        return self._is_sym('+-*/&|<>=')

    def _is_unary_op(self):
        return self._is_sym('-~')

    def _is_var_name(self):
        return self._is_token(T_ID)

    def _is_builtin_type(self, type):
        return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID]

    def _is_term(self):
        return self._is_const() or self._is_var_name() or self._is_sym('(') or self._is_unary_op()

    # --------------- compile part -----------------
    # Parser and compile Jack code
    # class: 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        self._require(T_KEYWORD, KW_CLASS)
        self.compile_class_name()
        self._require(T_SYM, '{')
        while self._is_class_var_dec():
            self.compile_class_var_dec()
        while self._is_subroutine():
            self.compile_subroutine()
        self._require(T_SYM, '}')

    # className: identifier
    def compile_class_name(self):
        self._cur_class = self.compile_var_name()  # Class names don't have to go into the symbol table

    # type varName (',' varName)* ';'
    def _compile_dec(self, kind):
        type = self.compile_type()
        name = self.compile_var_name()
        self.symbols.define(name, type, kind)
        while self._is_sym(','):
            self._advance()
            name = self.compile_var_name()
            self.symbols.define(name, type, kind)
        self._require(T_SYM, ';')

    def compile_type(self):
        """
        type: 'int' | 'char' | 'boolean' | className
        """
        if self._is_type():
            return self._advance()[1]
        else:
            raise ValueError(self._require_failed_msg(*self.lex.peek()))

    # classVarDec: {'static'|'field'} type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        tok, kwd = self._advance()  # static | field
        self._compile_dec(kwd_to_kind[kwd])

    # varName: identifier
    def compile_var_name(self):
        return self._require(T_ID)

    # subroutineDec: ('constructor'|'function'|'method') ('void'|type)
    #                subroutineName '(' parameterList ')' subroutineBody
    def compile_subroutine(self):
        tok, kwd = self._advance()
        type = self.compile_void_or_type()
        self.compile_subroutine_name()
        self.symbols.start_subroutine()
        if kwd == KW_METHOD:
            self.symbols.define('this', self._cur_class, SK_ARG)
        self._require(T_SYM, '(')
        self.compile_parameter_list()
        self._require(T_SYM, ')')
        self.compile_subroutine_body(kwd)

    # 'void' | type
    def compile_void_or_type(self):
        if self._is_keyword(KW_VOID):
            return self._advance()[1]
        else:
            return self.compile_type()

    # subroutineName: identifier
    def compile_subroutine_name(self):
        self._cur_subroutine = self.compile_var_name()  # subroutine names don't have to go in the symbol table

    # parameterList: (parameter (',' parameter)*)?
    def compile_parameter_list(self):
        if self._is_type():
            self.compile_parameter()
            while self._is_sym(','):
                self._advance()
                self.compile_parameter()

    # parameter: type varName
    def compile_parameter(self):
        if self._is_type():
            type = self.compile_type()
            name = self.compile_var_name()
            self.symbols.define(name, type, SK_ARG)

    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine_body(self, kwd):
        self._require(T_SYM, '{')
        while self._is_var_dec():
            self.compile_var_dec()
        self.write_func_decl(kwd)
        self.compile_statements()

    # varDec: 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        self._require(T_KEYWORD, KW_VAR)
        return self._compile_dec(SK_VAR)

    # statement: statement*
    def compile_statements(self):
        while self._is_statement():
            self._compile_statement()

    # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def _compile_statement(self):
        if self._is_let():
            self.compile_let()
        elif self._is_if():
            self.compile_if()
        elif self._is_while():
            self.compile_while()
        elif self._is_do():
            self.compile_do()
        elif self._is_return():
            self.compile_return()

    # letStatement: 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        self._require(T_KEYWORD, KW_LET)
        name = self.compile_var_name()
        subscript = self._is_sym('[')
        if subscript:
            self.compile_base_plus_index(name)  # calculate base+index
        self._require(T_SYM, '=')
        self.compile_expression()  # calculate expression to assign
        self._require(T_SYM, ';')
        if subscript:
            self.pop_array_element()  # *(base+index) = expr
        else:
            self.vm_pop_variable(name)  # pop value directly into variable

    def pop_array_element(self):
        self.vm.pop_temp(TEMP_ARRAY)        # Pop expr value to temp register
        self.vm.pop_that_ptr()              # Pop base+index into 'that' register
        self.vm.push_temp(TEMP_ARRAY)       # Push expr back onto stack
        self.vm.pop_that()                  # Pop value into *(base+index)

    # ('[' expression ']')?
    def compile_base_plus_index(self, name):
        self.vm_push_variable(name)  # push array ptr onto stack
        self._advance()
        self.compile_expression()  # push index onto stack
        self._require(T_SYM, ']')
        self.vm.write_vm_cmd('add')  # base+index - leave on the stack for later

    def compile_expression(self):
        self.compile_term()
        # Doesn't handle normal order of operations - just left to right for now
        while self._is_op():
            op = self._advance()
            self.compile_term()
            self.vm.write_vm_cmd(vm_cmds[op[1]])  # op

    # term: integerConstant | stringConstant | keywordConstant | varName
    #     | varName '[' expression ']' | subroutineCall | '(' expression ')'
    #     | unaryOp term
    def compile_term(self):
        if self._is_const():
            self.compile_const()
        elif self._is_sym('('):
            self._advance()
            self.compile_expression()  # VM code to evaluate expression
            self._require(T_SYM, ')')
        elif self._is_unary_op():
            tok, op = self._advance()
            self.compile_term()
            self.vm.write_vm_cmd(vm_unary_cmds[op])  # op
        elif self._is_var_name():
            tok, name = self._advance()
            if self._is_sym('['):
                self.compile_array_subscript(name)  # VM code for array subscript
            elif self._is_sym('(.'):
                self.compile_subroutine_call(name)  # VM code for subroutine call
            else:
                self.vm_push_variable(name)  # push variable on stack

    # integerConstant | stringConstant | keywordConstant
    def compile_const(self):
        tok, val = self._advance()
        if tok == T_NUM:
            self.vm.push_const(val)                 # push constant val
        elif tok == T_STR:
            self.write_string_const_init(val)       # initialize string & push str addr
        elif tok == T_KEYWORD:
            self.compile_kwd_const(val)             # push TRUE, FALSE, NULL etc.

    # '[' expression ']'
    def compile_array_subscript(self, name):
        self.vm_push_variable(name)     # push array ptr onto stack
        self._require(T_SYM, '[')
        self.compile_expression()       # push index onto stack
        self._require(T_SYM, ']')
        self.vm.write_vm_cmd('add')     # base+index
        self.vm.pop_that_ptr()          # pop into 'that' ptr
        self.vm.push_that()             # push *(base+index) onto stack

    # subroutineCall: subroutineName '(' expressionList ')'
    #               | (className | varName) '.' subroutineName '(' expressionList ')'
    def compile_subroutine_call(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        if self._is_sym('.'):
            num_args, name = self.compile_dotted_subroutine_call(name, type)
        else:
            num_args = 1
            self.vm.push_this_ptr()
            name = self._cur_class+'.'+name
        self._require(T_SYM, '(')
        num_args += self.compile_expr_list() # VM code to push arguments
        self._require(T_SYM, ')')
        self.vm.write_call(name, num_args)  # call name num_args

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kwd_const(self, kwd):
        if kwd == KW_THIS:
            self.vm.push_this_ptr()
        elif kwd == KW_TRUE:
            self.vm.push_const(1)
            self.vm.write_vm_cmd('neg')
        else:   # KW_FALSE or KW_NULL
            self.vm.push_const(0)

    def compile_dotted_subroutine_call(self, name, type):
        num_args = 0
        obj_name = name
        self._advance()
        name = self.compile_var_name()
        if self._is_builtin_type(type):     # e.g. int.func(123) not allowed
            ValueError('Cannot use "." operator on builtin type')
        elif type == None:                  # Calling using class name
            name = obj_name+'.'+name
        else:                               # Calling using object variable name
            num_args = 1
            self.vm_push_variable(obj_name) # push object ptr onto stack
            name = self.symbols.type_of(obj_name)+'.'+name
        return num_args, name

    # expressionList: (expression (',' expression)*)?
    def compile_expr_list(self):
        num_args = 0
        if self._is_term():
            self.compile_expression()
            num_args = 1
            while self._is_sym(','):
                self._advance()
                self.compile_expression()
                num_args += 1
        return num_args

    # ifStatement: 'if' '(' expression ')' '{' statements '}'
    #              ('else' '{' statements '}')?
    def compile_if(self):
        self._require(T_KEYWORD, KW_IF)
        end_label = self.new_label()
        self._compile_cond_expression_statements(end_label) # VM code for condition and if statements
        if self._is_keyword(KW_ELSE):
            self._advance()
            self._require(T_SYM, '{')
            self.compile_statements()   # VM code for else statements
            self._require(T_SYM, "}")
        self.vm.write_label(end_label)  # label end_label

    # '(' expression ')' '{' statements '}'
    def _compile_cond_expression_statements(self, label):
        self._require(T_SYM, '(')
        self.compile_expression()
        self._require(T_SYM, ')')
        self.vm.write_vm_cmd('not')     # ~(cond)
        notif_label = self.new_label()
        self.vm.write_if(notif_label)   # if-goto notif_label
        self._require(T_SYM, '{')
        self.compile_statements()       # VM code for if statements
        self._require(T_SYM, '}')
        self.vm.write_goto(label)       # goto label
        self.vm.write_label(notif_label)# label notif_label

    # whileStatement: 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        self._require(T_KEYWORD, KW_WHILE)
        top_label = self.new_label()
        self.vm.write_label(top_label)                      # label top_label
        self._compile_cond_expression_statements(top_label) # VM code for condition and while statements

    # do_statement: 'do' subroutineCall ';'
    def compile_do(self):
        self._require(T_KEYWORD, KW_DO)
        name = self._require(T_ID)
        self.compile_subroutine_call(name)  # VM code for subroutine call
        self.vm.pop_temp(TEMP_RETURN)       # Pop return value and discard
        self._require(T_SYM, ';')

    # returnStatement: 'return' expression? ';'
    def compile_return(self):
        self._require(T_KEYWORD, KW_RETURN)
        if not self._is_sym(';'):
            self.compile_expression()   # VM code for return expression if any
        else:
            self.vm.push_const(0)       # push 0 if not returning a value
        self._require(T_SYM, ';')
        self.vm.write_return()          # return
Exemple #2
0
class CompilationEngine(object):
    def __init__(self, jack_fname):
        self._jack_fname = jack_fname
        self._s_table = SymbolTable()
        self._writer = None
        self._class_name = None
        self._is_writing_void_func = None
        self._current_func_name = None
        self._n_labels = 0

    def compile(self, out_fname: str) -> None:
        tknizer = Tokenizer(self._jack_fname)
        with VMWriter(out_fname) as writer:
            self._writer = writer
            token = self._compile_class(tknizer, tknizer.next_token())
            if token:
                raise CompilationException(
                    f"Expected end of file, found {token}")

    def _compile_class(self, tknizer, token):
        _assert(token, CLASS)
        token = tknizer.next_token()
        _assert_identifier(token)
        self._class_name = token.value
        _assert(tknizer.next_token(), "{")

        token = tknizer.next_token()
        while token.value in [STATIC, FIELD]:
            token = self._compile_class_var_dec(tknizer, token)

        while token.value in [CONSTRUCTOR, FUNCTION, METHOD]:
            token = self._compile_subroutine_dec(tknizer, token)

        _assert(token, "}")
        return tknizer.next_token()

    def _compile_class_var_dec(self, tknizer, token):
        _assert(token, [STATIC, FIELD])
        if token.value == STATIC:
            kind = SymbolTable.STATIC
        else:
            kind = SymbolTable.FIELD

        token = tknizer.next_token()
        _assert_type(token)
        var_type = token.value
        self._record_symbol(tknizer.next_token(), var_type, kind)

        token = tknizer.next_token()
        while token.value == ",":
            self._record_symbol(tknizer.next_token(), var_type, kind)
            token = tknizer.next_token()

        _assert(token, ";")
        return tknizer.next_token()

    def _compile_subroutine_dec(self, tknizer, token):
        _assert(token, [CONSTRUCTOR, FUNCTION, METHOD])
        subroutine_type = token.value

        token = tknizer.next_token()
        _assert_type(token, allow_void=True)
        self._is_writing_void_func = token.value == VOID
        self._s_table.start_subroutine(is_method=subroutine_type == METHOD)

        token = tknizer.next_token()
        _assert_identifier(token)
        subroutine_name = token.value
        self._current_func_name = subroutine_name

        _assert(tknizer.next_token(), "(")
        # populates symbol table with arguments
        token = self._compile_parameter_list(tknizer, tknizer.next_token())
        _assert(token, ")")

        _assert(tknizer.next_token(), "{")
        token = tknizer.next_token()
        while token.value == VAR:
            # populates symbol table with local variabls
            token = self._compile_var_dec(tknizer, token)

        n_locals = self._s_table.var_count(SymbolTable.VAR)
        qualified_name = ".".join([self._class_name, subroutine_name])
        self._writer.write_function(qualified_name, n_locals)

        if subroutine_type == CONSTRUCTOR:
            size = self._s_table.var_count(SymbolTable.FIELD)
            self._writer.write_push("constant", size)
            self._writer.write_call("Memory.alloc", 1)
            self._writer.write_pop("pointer", 0)
        elif subroutine_type == METHOD:
            self._writer.write_push("argument", 0)
            self._writer.write_pop("pointer", 0)

        token = self._compile_statements(tknizer, token)
        _assert(token, "}")
        self._is_writing_void_func = None
        self._s_table.complete_subroutine()

        return tknizer.next_token()

    def _compile_parameter_list(self, tknizer, token):
        if not (token.value in [INT, CHAR, BOOLEAN]
                or token.type == IDENTIFIER):
            return token

        while True:
            var_type = token.value
            self._record_symbol(tknizer.next_token(), var_type,
                                SymbolTable.ARG)
            token = tknizer.next_token()
            if token.value == ",":
                token = tknizer.next_token()
            else:
                return token

    def _compile_var_dec(self, tknizer, token):
        _assert(token, VAR)
        token = tknizer.next_token()
        _assert_type(token)
        var_type = token.value
        token = tknizer.next_token()
        self._record_symbol(token, var_type, SymbolTable.VAR)

        token = tknizer.next_token()
        while token.value == ",":
            self._record_symbol(tknizer.next_token(), var_type,
                                SymbolTable.VAR)
            token = tknizer.next_token()

        _assert(token, ";")
        return tknizer.next_token()

    def _compile_statements(self, tknizer, token):
        while token.value in [LET, IF, WHILE, DO, RETURN]:
            method = getattr(self, f"_compile_{token.value}")
            token = method(tknizer, token)

        return token

    def _compile_let(self, tknizer, token):
        _assert(token, LET)
        token = tknizer.next_token()
        _assert_identifier(token)
        var_name = token.value

        token = tknizer.next_token()
        if token.value == "[":
            token = self._compile_expression(tknizer, tknizer.next_token())
            _assert(token, "]")
            self._push_variable(var_name)
            self._writer.write_add()

            _assert(tknizer.next_token(), "=")
            token = self._compile_expression(tknizer, tknizer.next_token())
            self._writer.write_pop("temp", 0)
            self._writer.write_pop("pointer", 1)
            self._writer.write_push("temp", 0)
            self._writer.write_pop("that", 0)
        else:
            _assert(token, "=")
            token = self._compile_expression(tknizer, tknizer.next_token())
            self._pop_variable(var_name)

        _assert(token, ";")
        return tknizer.next_token()

    def _compile_if(self, tknizer, token):
        _assert(token, IF)
        _assert(tknizer.next_token(), "(")
        token = self._compile_expression(tknizer, tknizer.next_token())
        _assert(token, ")")
        _assert(tknizer.next_token(), "{")

        self._writer.write_push("constant", 0)
        self._writer.write_equals()
        false_label = self._allocate_label("IF_FALSE")
        self._writer.write_if(false_label)

        token = self._compile_statements(tknizer, tknizer.next_token())
        _assert(token, "}")

        token = tknizer.next_token()
        if token.value == ELSE:
            skip_else_label = self._allocate_label("SKIP_ELSE")
            self._writer.write_goto(skip_else_label)
            self._writer.write_label(false_label)
            _assert(tknizer.next_token(), "{")
            token = self._compile_statements(tknizer, tknizer.next_token())
            _assert(token, "}")
            token = tknizer.next_token()
            self._writer.write_label(skip_else_label)
        else:
            self._writer.write_label(false_label)

        return token

    def _compile_while(self, tknizer, token):
        _assert(token, WHILE)
        _assert(tknizer.next_token(), "(")
        true_label = self._allocate_label("WHILE_TRUE")
        self._writer.write_label(true_label)

        token = self._compile_expression(tknizer, tknizer.next_token())
        _assert(token, ")")
        _assert(tknizer.next_token(), "{")

        self._writer.write_push("constant", 0)
        self._writer.write_equals()
        false_label = self._allocate_label("WHILE_FALSE")
        self._writer.write_if(false_label)

        token = self._compile_statements(tknizer, tknizer.next_token())
        _assert(token, "}")
        self._writer.write_goto(true_label)
        self._writer.write_label(false_label)
        return tknizer.next_token()

    def _compile_do(self, tknizer, token):
        _assert(token, DO)
        token = self._compile_subroutine_call(tknizer, tknizer.next_token())
        _assert(token, ";")
        self._writer.write_pop("temp", 0)
        return tknizer.next_token()

    def _compile_return(self, tknizer, token):
        _assert(token, RETURN)
        token = tknizer.next_token()
        if self._is_writing_void_func is True:
            _assert(token, ";")
            self._writer.write_push("constant", 0)
        elif self._is_writing_void_func is False:
            if token.value == THIS:
                self._writer.write_push("pointer", 0)
                token = tknizer.next_token()
            else:
                token = self._compile_expression(tknizer, token)
            _assert(token, ";")
        else:
            raise CompilationEngine(
                "Encountered return statement outside function")
        self._writer.write_return()

        return tknizer.next_token()

    def _compile_subroutine_call(self, tknizer, first_token):
        _assert_identifier(first_token)

        is_method = False
        second_token = tknizer.next_token()
        if second_token.value == ".":
            token = tknizer.next_token()
            _assert_identifier(token)
            if self._s_table.has(first_token.value):
                # method call on another object
                is_method = True
                class_name = self._s_table.type_of(first_token.value)
                subroutine_name = ".".join([class_name, token.value])
                self._push_variable(first_token.value)
            else:
                # constructor or class function
                subroutine_name = ".".join([first_token.value, token.value])
            token = tknizer.next_token()
        else:
            # method call on this object
            is_method = True
            subroutine_name = ".".join([self._class_name, first_token.value])
            self._writer.write_push("pointer", 0)
            token = second_token

        _assert(token, "(")
        n_args = 1 if is_method else 0
        token = tknizer.next_token()
        if token.value != ")":
            token = self._compile_expression(tknizer, token)
            n_args += 1
            while token.value == ",":
                token = self._compile_expression(tknizer, tknizer.next_token())
                n_args += 1

        _assert(token, ")")
        self._writer.write_call(subroutine_name, n_args)

        return tknizer.next_token()

    def _compile_expression(self, tknizer, token):
        token = self._compile_term(tknizer, token)
        ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="]
        while token.value in ops:
            op = token.value
            token = self._compile_term(tknizer, tknizer.next_token())
            if op == "+":
                self._writer.write_add()
            elif op == "-":
                self._writer.write_sub()
            elif op == "*":
                self._writer.write_call("Math.multiply", 2)
            elif op == "/":
                self._writer.write_call("Math.divide", 2)
            elif op == "&":
                self._writer.write_and()
            elif op == "|":
                self._writer.write_or()
            elif op == "<":
                self._writer.write_less_than()
            elif op == ">":
                self._writer.write_greater_than()
            elif op == "=":
                self._writer.write_equals()
            else:
                raise Exception(f"Bug: no case for op {token.value}")

        return token

    def _compile_term(self, tknizer, token):
        if token.type == INT_CONSTANT:
            self._writer.write_push("constant", token.value)
            return tknizer.next_token()
        elif token.type == STRING_CONSTANT:
            str_val = token.value[1:-1]
            self._writer.write_push("constant", len(str_val))
            self._writer.write_call("String.new", 1)
            for char in str_val:
                self._writer.write_push("constant", ord(char))
                self._writer.write_call("String.appendChar", 2)
            return tknizer.next_token()
        elif token.type == KEYWORD and token.value in [
                TRUE, FALSE, NULL, THIS
        ]:
            if token.value == TRUE:
                self._writer.write_push("constant", 1)
                self._writer.write_neg()
            elif token.value in [FALSE, NULL]:
                self._writer.write_push("constant", 0)
            elif token.value == THIS:
                self._writer.write_push("argument", 0)
            else:
                raise Exception(f"Bug: unexpected keyword {token.value}")
            return tknizer.next_token()
        elif token.value == "(":
            token = self._compile_expression(tknizer, tknizer.next_token())
            _assert(token, ")")
            return tknizer.next_token()
        elif token.value in ["-", "~"]:
            next_token = self._compile_term(tknizer, tknizer.next_token())
            if token.value == "-":
                self._writer.write_neg()
            elif token.value == "~":
                self._writer.write_not()
            else:
                raise CompilationException(
                    f"Bug: Unexpected unary op {token.value}")
            return next_token
        else:
            next_token = tknizer.next_token()
            if next_token.value == "[":
                _assert_identifier(token)
                array_var_name = token.value
                token = self._compile_expression(tknizer, tknizer.next_token())
                _assert(token, "]")
                self._push_variable(array_var_name)
                self._writer.write_add()
                self._writer.write_pop("pointer", 1)
                self._writer.write_push("that", 0)
                return tknizer.next_token()
            elif next_token.value in ["(", "."]:
                tknizer.rewind()
                return self._compile_subroutine_call(tknizer, token)
            else:
                _assert_identifier(token)
                if not self._s_table.has(token.value):
                    raise CompilationException(
                        f"Unknown variable {token.value}")
                self._push_variable(token.value)
                return next_token

    def _push_variable(self, var_name):
        idx = self._s_table.index_of(var_name)
        kind = self._s_table.kind_of(var_name)
        if kind == SymbolTable.STATIC:
            self._writer.write_push(STATIC, idx)
        elif kind == SymbolTable.FIELD:
            self._writer.write_push(THIS, idx)
        elif kind == SymbolTable.ARG:
            self._writer.write_push("argument", idx)
        elif kind == SymbolTable.VAR:
            self._writer.write_push("local", idx)
        else:
            raise Exception(f"Bug: unexpected variable kind {kind}")

    def _pop_variable(self, var_name):
        idx = self._s_table.index_of(var_name)
        kind = self._s_table.kind_of(var_name)
        if kind == SymbolTable.STATIC:
            self._writer.write_pop(STATIC, idx)
        elif kind == SymbolTable.FIELD:
            self._writer.write_pop(THIS, idx)
        elif kind == SymbolTable.ARG:
            self._writer.write_pop("argument", idx)
        elif kind == SymbolTable.VAR:
            self._writer.write_pop("local", idx)
        else:
            raise Exception(f"Bug: unexpected variable kind {kind}")

    def _allocate_label(self, label_name):
        label = "{cls}.{func}${name}${id}".format(
            cls=self._class_name,
            func=self._current_func_name,
            name=label_name,
            id=self._n_labels,
        )
        self._n_labels += 1
        return label

    def _record_symbol(self, token, typ, kind):
        if token.type != IDENTIFIER:
            raise CompilationException(f"Expected an {IDENTIFIER}, "
                                       f'found {token.type}: "{token.value}"')
        self._s_table.define(token.value, typ, kind)
Exemple #3
0
class Parser:
    def __init__(self, f):
        self.in_name, self.out_name = f
        self.output = []
        self.tokeniser = Tokeniser(f)
        self.st_handler = SymbolTable()
        self.writer = VMCodeWriter(f)
        self.local_state = {'labeler': labeler()}
        self.parse()
        self.writer.close()

    def parse(self):
        if self.tokeniser.has_next():
            self.compileClass()
        return self.out_name, self.output

    def compileClass(self):
        self.expect(TokenType.KEYWORD, 'class')
        self.local_state['class'] = self.expect(TokenType.IDENTIFIER)
        self.expect(TokenType.SYMBOL, '{')
        while self.peek(TokenType.KEYWORD, CLASS_VAR_KEYWORDS):
            self.compileClassVarDec()
        while self.peek(TokenType.KEYWORD, FXN_KEYWORDS):
            self.st_handler.start_subroutine()
            self.compileSubroutine()
        self.expect(TokenType.SYMBOL, '}')
        del self.local_state['class']

    def compileClassVarDec(self):
        kind = keyword_to_kind[self.expect(TokenType.KEYWORD)]
        taipu = self.compileType()
        name = self.expect(TokenType.IDENTIFIER)
        self.st_handler.define(name, taipu, kind)
        varlist = self.tryCompileVarList(taipu=taipu, kind=kind)
        self.expect(TokenType.SYMBOL, ";")

    def compileType(self):
        ttype, token = self.tokeniser.peek()
        if ttype == TokenType.KEYWORD and token in BI_TYPES:
            return self.expect(TokenType.KEYWORD, token)
        elif ttype == TokenType.IDENTIFIER:
            return self.expect(TokenType.IDENTIFIER)
        else:
            raise SyntaxError(
                "Expected type in {} or identifier, got: {} of type {}".format(
                    BI_TYPES, token, ttype))

    def tryCompileVarList(self, exp_type=False, taipu=None, kind=None):
        varlist = []
        while self.peek(TokenType.SYMBOL, ","):
            self.expect(TokenType.SYMBOL, ",")
            if exp_type:
                taipu = self.compileType()
            name = self.expect(TokenType.IDENTIFIER)
            varlist.append((name, taipu, kind))
        for entry in varlist:
            self.st_handler.define(*entry)

    def compileSubroutine(self):
        fxn_kind = self.expect(TokenType.KEYWORD, FXN_KEYWORDS)
        if fxn_kind == 'method':
            self.st_handler.define('this', self.local_state['class'],
                                   IdentifierKind.ARGUMENT)
        self.compileType()
        fxn_name = self.expect(TokenType.IDENTIFIER)
        self.expect(TokenType.SYMBOL, "(")
        self.compileParameterList()
        self.expect(TokenType.SYMBOL, ")")
        self.compileSubroutineBody(fxn_name, fxn_kind)

    def compileParameterList(self):
        if not self.peek(TokenType.SYMBOL, ")"):
            kind = IdentifierKind.ARGUMENT
            taipu = self.compileType()
            name = self.expect(TokenType.IDENTIFIER)
            self.st_handler.define(name, taipu, kind)
            self.tryCompileVarList(exp_type=True, kind=kind)

    def compileSubroutineBody(self, fxn_name, fxn_kind):
        self.expect(TokenType.SYMBOL, "{")
        while self.peek(TokenType.KEYWORD, "var"):
            self.compileVarDec()
        self.writer.fun_dec(fxn_name,
                            self.st_handler.var_count(IdentifierKind.VAR))
        self.compileFxnKind(fxn_kind)
        self.compileStatements()
        self.expect(TokenType.SYMBOL, "}")

    def compileFxnKind(self, kind):
        if kind == 'constructor':
            num_fields = self.st_handler.var_count(IdentifierKind.FIELD)
            self.writer.alloc(num_fields)
            self.writer.pop_this_ptr()
        elif kind == 'method':
            self.writer.push_variable('this', self.st_handler)
            self.writer.pop_this_ptr()

    def compileVarDec(self):
        self.expect(TokenType.KEYWORD, "var")
        kind = IdentifierKind.VAR
        taipu = self.compileType()
        name = self.expect(TokenType.IDENTIFIER)
        self.st_handler.define(name, taipu, kind)
        self.tryCompileVarList(taipu=taipu, kind=kind)
        self.expect(TokenType.SYMBOL, ";")

    def compileStatements(self):
        while self.peek(TokenType.KEYWORD, STMT_KEYWORDS):
            self.compileStatement()

    def compileStatement(self):
        if self.peek(TokenType.KEYWORD, "let"):
            self.compileLet()
        elif self.peek(TokenType.KEYWORD, "if"):
            self.compileIf()
        elif self.peek(TokenType.KEYWORD, "while"):
            self.compileWhile()
        elif self.peek(TokenType.KEYWORD, "do"):
            self.compileDo()
        elif self.peek(TokenType.KEYWORD, "return"):
            self.compileReturn()

    def compileLet(self):
        self.expect(TokenType.KEYWORD, "let")
        var = self.expect(TokenType.IDENTIFIER)
        array_assignment = False
        if self.peek(TokenType.SYMBOL, "["):
            array_assignment = True
            self.compileBasePlusOffset(var)
        self.expect(TokenType.SYMBOL, "=")
        self.compileExpression()
        self.expect(TokenType.SYMBOL, ";")
        if array_assignment:
            self.saveToTemp()
            self.popToArray()
        else:
            self.writer.pop_variable(var, self.st_handler)

    def compileBasePlusOffset(self, base):
        self.writer.push_variable(base, self.st_handler)
        self.expect(TokenType.SYMBOL, "[")
        self.compileExpression()
        self.expect(TokenType.SYMBOL, "]")
        self.writer.binary_op("+")

    def saveToTemp(self):
        self.writer.pop('temp', 0)

    def popToArray(self):
        self.writer.pop_that_ptr()
        self.writer.push('temp', 0)
        self.writer.pop_that()

    def compileIf(self):
        self.expect(TokenType.KEYWORD, "if")
        endif = next(self.local_state['labeler'])
        self.compileCond(endif)
        if self.peek(TokenType.KEYWORD, "else"):
            self.expect(TokenType.KEYWORD, "else")
            self.expectBracedStatements()
        self.writer.label(endif)

    def expectBracedStatements(self):
        self.expect(TokenType.SYMBOL, "{")
        self.compileStatements()
        self.expect(TokenType.SYMBOL, "}")

    def compileWhile(self):
        self.expect(TokenType.KEYWORD, "while")
        loop = next(self.local_state['labeler'])
        self.writer.label(loop)
        self.compileCond(loop)

    def compileCond(self, ret):
        self.expectGroupedExpression()
        self.writer.unary_op('~')
        not_cond = next(self.local_state['labeler'])
        self.writer.ifgoto(not_cond)
        self.expectBracedStatements()
        self.writer.goto(ret)
        self.writer.label(not_cond)

    def expectGroupedExpression(self):
        self.expect(TokenType.SYMBOL, "(")
        self.compileExpression()
        self.expect(TokenType.SYMBOL, ")")

    def compileDo(self):
        self.expect(TokenType.KEYWORD, "do")
        caller = self.expect(TokenType.IDENTIFIER)
        self.compileSubroutineCall(caller)
        self.writer.pop("temp", "0")
        self.expect(TokenType.SYMBOL, ";")

    def compileReturn(self):
        self.expect(TokenType.KEYWORD, "return")
        if not self.peek(TokenType.SYMBOL, ";"):
            self.compileExpression()
        else:
            self.writer.int_const(0)
        self.writer.ret()
        self.expect(TokenType.SYMBOL, ";")

    def compileExpression(self):
        self.compileTerm()
        while self.peek(TokenType.SYMBOL, EXP_SYMBOLS):
            op = self.compileOp()
            self.compileTerm()
            self.writer.binary_op(op)

    def compileOp(self):
        return self.expect(TokenType.SYMBOL)

    def compileTerm(self):
        if self.peek(TokenType.INT_CONST):
            int = self.expect(TokenType.INT_CONST)
            self.writer.int_const(int)
        elif self.peek(TokenType.STR_CONST):
            str = self.compileStrConst()
            self.writer.str_const(str)
        elif self.peek(TokenType.KEYWORD, KEYWORD_CONSTANTS):
            kw = self.expect(TokenType.KEYWORD, KEYWORD_CONSTANTS)
            self.writer.kw_const(kw)
        elif self.peek(TokenType.SYMBOL, UNARY_OPS):
            self.compileUnaryOp()
        elif self.peek(TokenType.SYMBOL, "("):
            self.expectGroupedExpression()
        elif self.tokeniser.has_next():
            t1, token1 = self.tokeniser.next()
            if self.tokeniser.has_next():
                t2, token2 = self.tokeniser.peek()
                if self.peek(TokenType.SYMBOL, "["):
                    self.compileArrayAccess(token1)
                elif self.peek(TokenType.SYMBOL, ["(", "."]):
                    self.compileSubroutineCall(token1)
                else:
                    self.writer.push_variable(token1, self.st_handler)

    def compileStrConst(self):
        ttype, token = self.tokeniser.next()
        return token[1:-1]

    def compileUnaryOp(self):
        op = self.expect(TokenType.SYMBOL, ["-", "~"])
        self.compileTerm()
        self.writer.unary_op(op)

    def compileArrayAccess(self, arr):
        self.compileBasePlusOffset(arr)
        self.writer.pop_that_ptr()
        self.writer.push_that()

    def compileSubroutineCall(self, caller):
        if self.peek(TokenType.SYMBOL, "("):
            method, nargs = self.compileSelfFunctionCall(caller)
            qualified_name = self.local_state['class'] + '.' + method
        elif self.peek(TokenType.SYMBOL, "."):
            method, nargs = self.compileMethodCall(caller)
            qualified_name = self.st_handler.qualify(caller, method)
        self.writer.call(qualified_name, nargs)

    def compileSelfFunctionCall(self, method):
        self.writer.push_this_ptr()
        nargs = self.expectExpressionList() + 1
        return method, nargs

    def compileMethodCall(self, caller):
        nargs = 0
        if self.st_handler.is_object(caller):
            nargs += 1
            self.writer.push_variable(caller, self.st_handler)
        self.expect(TokenType.SYMBOL, ".")
        method = self.expect(TokenType.IDENTIFIER)
        nargs += self.expectExpressionList()
        return method, nargs

    def expectExpressionList(self):
        self.expect(TokenType.SYMBOL, "(")
        nexps = self.compileExpressionList()
        self.expect(TokenType.SYMBOL, ")")
        return nexps

    def compileExpressionList(self):
        nexps = 0
        if not self.peek(TokenType.SYMBOL, ")"):
            self.compileExpression()
            nexps += 1
            while self.peek(TokenType.SYMBOL, ","):
                self.expect(TokenType.SYMBOL, ",")
                self.compileExpression()
                nexps += 1
        return nexps

    def peek(self, e_type, e_token=None):
        if not self.tokeniser.has_next():
            return False
        a_type, a_token = self.tokeniser.peek()
        return self.token_match(e_type, e_token, a_type, a_token)

    def expect(self, e_type, e_token=None):
        a_type, a_token = self.tokeniser.next()
        if self.token_match(e_type, e_token, a_type, a_token):
            return a_token
        else:
            raise SyntaxError(
                "Expected {} of type {}, got {} of type {}".format(
                    e_token, e_type, a_token, a_type))

    def token_match(self, e_type, e_token, a_type, a_token):
        return (e_type == a_type or (type(e_type) == list and a_type in e_type)) and \
                    (e_token is None or e_token == a_token or (type(e_token) == list and a_token in e_token))
Exemple #4
0
class CompilationEngine:
	def __init__(self, source, destination):
		self.src = source
		self.dst = destination
		self.writer = VMWriter(destination)
		self.iter = Lookahead(tokenizor.newTokenizor(self.src))
		self._symbol_table = SymbolTable()

	def compile(self):
		root = self._compileClass()
		return root

	def _compileClass(self):
		classE = Element(ELEMENTS.CLASS)
		self._readKeyword(classE, ELEMENTS.CLASS)
		self.className = self._readIdentifier(classE)
		self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN)
		self._compileClassVarDec(classE)
		self._compileSubroutine(classE)
		self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE)
		return classE

	def _compileClassVarDec(self, parent):
		while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES:
			classVarDecE = Element(ELEMENTS.CLASSVARDEC)
			self._readKeyword(classVarDecE)
			self._readType(classVarDecE)
			self._readIdentifier(classVarDecE)
			while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA):
				self._readIdentifier(classVarDecE)
			self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON)
			parent.append(classVarDecE)

	def _compileSubroutine(self, parent):
		while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES:
			subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC)
			function_type = self._readKeyword(subroutineDecE)
			self._readReturnType(subroutineDecE)
			self.methodName = self._readIdentifier(subroutineDecE)
			self._symbol_table.startSubroutine(self.className, self.methodName)
			if function_type == _SUBROUTINEDEC.METHOD:
				self._symbol_table.define("this", self.className, SYM_KINDS.ARG)
			self._uid = -1
			self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN)
			self._compileParameters(subroutineDecE)
			self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE)
			self._compileSubroutineBody(subroutineDecE, function_type)
			parent.append(subroutineDecE)

	def _gen_label(self, type_):
		self._uid += 1
		return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid)

	def _gen_labels(self, *parts):
		self._uid += 1
		return ["%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts]

	def _compileSubroutineBody(self, parent, function_type):
		bodyE = Element(ELEMENTS.SUBROUTINEBODY)
		self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN)
		nArgs = self._compileVarDec(bodyE)
		function_name = parent[2].text
		function_full_name = "%s.%s" % (self.className, function_name)
		self.writer.writeFunction(function_full_name, nArgs)
		if function_type == _SUBROUTINEDEC.CONSTRUCTOR:
			field_count = self._symbol_table.varCount(SYM_KINDS.FIELD)
			self.writer.writePush(SEGMENT.CONST, field_count)
			self.writer.writeCall("Memory.alloc", 1)
			self.writer.writePop(SEGMENT.POINTER, 0)
		elif function_type == _SUBROUTINEDEC.METHOD:
			self.writer.writePush(SEGMENT.ARG, 0)
			self.writer.writePop(SEGMENT.POINTER, 0)
		self._compileStatements(bodyE)
		self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE)
		parent.append(bodyE)

	def _compileStatements(self, parent):
		statementsE = Element(ELEMENTS.STATEMENTS)
		while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES:
			if self.nextTok.value == _STATEMENTS.LET:
				statementE = Element(ELEMENTS.STATEMENT_LET)
				self._readKeyword(statementE)
				identifier = self._readIdentifier(statementE)
				is_array = False
				if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN):
					is_array = True
					self._compileExpression(statementE)
					self.writer.writePush(*self._identifier_data(identifier))
					self.writer.writeArithmetic("add")
					self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE)
				self._readSymbol(statementE, _SYMBOLS.EQUAL)
				self._compileExpression(statementE)
				self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
				if is_array:
					self.writer.writePop(SEGMENT.TEMP, 0)
					self.writer.writePop(SEGMENT.POINTER, 1)
					self.writer.writePush(SEGMENT.TEMP, 0)
					self.writer.writePop(SEGMENT.THAT, 0)
				else:
					self.writer.writePop(*self._identifier_data(identifier))
				statementsE.append(statementE)
			elif self.nextTok.value == _STATEMENTS.IF:
				label_else, label_end = self._gen_labels("if.else", "if.end")
				statementE = Element(ELEMENTS.STATEMENT_IF)
				self._readKeyword(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
				self._compileExpression(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
				self.writer.writeArithmetic("not")
				self.writer.writeIf(label_else)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
				self._compileStatements(statementE)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
				self.writer.writeGoto(label_end)
				self.writer.writeLabel(label_else)
				if self._readKeywordOptional(statementE, _KEYWORDS.ELSE):
					self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
					self._compileStatements(statementE)
					self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
				self.writer.writeLabel(label_end)
				statementsE.append(statementE)
			elif self.nextTok.value == _STATEMENTS.WHILE:
				label_start, label_end = self._gen_labels("while.start", "while.end")
				self.writer.writeLabel(label_start)
				statementE = Element(ELEMENTS.STATEMENT_WHILE)
				self._readKeyword(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
				self._compileExpression(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
				self.writer.writeArithmetic("not")
				self.writer.writeIf(label_end)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
				self._compileStatements(statementE)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
				statementsE.append(statementE)
				self.writer.writeGoto(label_start)
				self.writer.writeLabel(label_end)
			elif self.nextTok.value == _STATEMENTS.DO:
				self._compileDo(statementsE)
			elif self.nextTok.value == _STATEMENTS.RETURN:
				statementE = Element(ELEMENTS.STATEMENT_RETURN)
				self._readKeyword(statementE)
				if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON):
					self._compileExpression(statementE)
				else:
					self.writer.writePush(SEGMENT.CONST, 0)
				self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
				self.writer.writeReturn()
				statementsE.append(statementE)
		if len(statementsE) == 0:
			statementsE.text = "\n"
		parent.append(statementsE)

	def _compileExpression(self, parent):
		expressionE = Element(ELEMENTS.EXPRESSION)
		self._readTerm(expressionE)
		while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS:
			symbol = self._readSymbol(expressionE)
			self._readTerm(expressionE)
			self.writer.writeArithmetic(symbol)
		parent.append(expressionE)

	def _compileExpressionList(self, parent):
		self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN)
		expListE = Element(ELEMENTS.EXPRESSION_LIST)
		nArgs = 0
		while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE):
			self._compileExpression(expListE)
			self._readSymbolOptional(expListE, _SYMBOLS.COMMA)
			nArgs += 1
		# hack for TextComparer
		if len(expListE) == 0:
			expListE.text = "\n"
		parent.append(expListE)
		self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE)
		return nArgs

	def _compileDo(self, parent):
		statementE = Element(ELEMENTS.STATEMENT_DO)
		self._readKeyword(statementE, _STATEMENTS.DO)
		identifier = self._readIdentifier(statementE)
		nArgs = 0
		if self._readSymbolOptional(statementE, _SYMBOLS.DOT):
			type_ = self._symbol_table.typeOf(identifier)
			if type_:
				segment, index = self._identifier_data(identifier)
				self.writer.writePush(segment, index)
				nArgs += 1
				identifier = "%s.%s" % (type_, self._readIdentifier(statementE))
			else:
				identifier = "%s.%s" % (identifier, self._readIdentifier(statementE))
		else:
			identifier = "%s.%s" % (self.className, identifier)
			self.writer.writePush(SEGMENT.POINTER, 0)
			nArgs += 1
		nArgs += self._compileExpressionList(statementE)
		self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
		self.writer.writeCall(identifier, nArgs)
		self.writer.writePop(SEGMENT.TEMP, 0)
		parent.append(statementE)

	def _compileVarDec(self, parent):
		nArgs = 0
		while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR:
			varDecE = Element(ELEMENTS.VAR_DEC)
			self._readKeyword(varDecE, _KEYWORDS.VAR)
			self._readType(varDecE)
			self._readIdentifier(varDecE)
			nArgs += 1
			while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA):
				self._readIdentifier(varDecE)
				nArgs += 1
			self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON)
			parent.append(varDecE)
		return nArgs

	def _compileParameters(self, parent):
		paramListE = Element(ELEMENTS.PARAM_LIST)
		while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES) or self.nextTok.type == tokenizor.IDENTIFIER:
			self._readType(paramListE)
			self._readIdentifier(paramListE)
			self._readSymbolOptional(paramListE, _SYMBOLS.COMMA)
		if len(paramListE) == 0:
			paramListE.text = "\n"
		parent.append(paramListE)

##############################
########## READ ##############
##############################

	def _readTerm(self, parent):
		termE = Element(ELEMENTS.TERM)
		if self.nextTok.type == tokenizor.INTEGER:
			self.next()
			termE.append(_leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value))
			self.writer.writePush(SEGMENT.CONST, self.tok.value)
		elif self.nextTok.type == tokenizor.STRING:
			self.next()
			termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value))
			string_value = self.tok.value
			self.writer.writePush(SEGMENT.CONST, len(string_value))
			self.writer.writeCall("String.new", 1)
			for char in string_value:
				self.writer.writePush(SEGMENT.CONST, ord(char))
				self.writer.writeCall("String.appendChar", 2)
		elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS:
			self.next()
			termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
			_KW_CONT_WRITE[self.tok.value](self.writer)
		elif self.nextTok.type == tokenizor.IDENTIFIER:
			identifier = self._readIdentifier(termE)
			nArgs = 0
			if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN):
				self._compileExpression(termE)
				self.writer.writePush(*self._identifier_data(identifier))
				self.writer.writeArithmetic("add")
				self.writer.writePop(SEGMENT.POINTER, 1)
				self.writer.writePush(SEGMENT.THAT, 0)
				self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE)
			elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
				nArgs = self._compileExpressionList(termE)
				self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
				self.writer.writeCall(identifier, nArgs)
			elif self._readSymbolOptional(termE, _SYMBOLS.DOT):
				type_ = self._symbol_table.typeOf(identifier)
				if type_:
					segment, index = self._identifier_data(identifier)
					self.writer.writePush(segment, index)
					nArgs += 1
					identifier = "%s.%s" % (type_, self._readIdentifier(termE))
				else:
					identifier = "%s.%s" % (identifier, self._readIdentifier(termE))
				nArgs += self._compileExpressionList(termE)
				self.writer.writeCall(identifier, nArgs)
			else:
				self.writer.writePush(*self._identifier_data(identifier))
		elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
			self.next()
			termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
			self._compileExpression(termE)
			self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
		elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS:
			self.next()
			sym = self.tok.value
			termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
			self._readTerm(termE)
			self.writer.writeArithmeticUnary(sym)
		else:
			raise self._syntaxError("Unexpected %s." % self.tok.value)
		parent.append(termE)

	def _identifier_data(self, identifier):
		return _SEG_TRANSLATE[self._symbol_table.kindOf(identifier)], self._symbol_table.indexOf(identifier)

	def _readIdentifier(self, parent):
		self.next()
		self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER)
		name = self.tok.value
		element = _leafElement(ELEMENTS.IDENTIFIER, name)
		type_ = self._symbol_table.typeOf(name)
		kind = None
		index = None
		if type_ is None:
			if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1:
				type_ = parent[1].text
				kind = _SYM_KIND_MAP[parent[0].text]
			elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0:
				type_ = parent[-1].text
				kind = SYM_KINDS.ARG
			if kind is not None:
				index = self._symbol_table.define(name, type_, kind)
		else:
			type_ = self._symbol_table.typeOf(name)
			kind = self._symbol_table.kindOf(name)
			index = self._symbol_table.indexOf(name)
		if kind is not None:
			element.set("type", type_)
			element.set("kind", str(kind))
			element.set("index", str(index))
		parent.append(element)
		return name

	def _readType(self, parent):
		if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES:
			self.next()
			parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
		else:
			self._readIdentifier(parent)

	def _readReturnType(self, parent):
		if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES:
			self.next()
			parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
		else:
			self._readIdentifier(parent)

	def _readSymbol(self, parent, expected = None):
		self.next()
		expectedStr = expected if expected is not None else ELEMENTS.SYMBOL
		self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL)
		if expected is not None:
			self._assertToken(self.tok, expected, value_=expected)
		parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
		return self.tok.value

	def _readKeyword(self, parent, expected = None):
		self.next()
		expectedStr = expected if expected is not None else ELEMENTS.KEYWORD
		self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD)
		if expected is not None:
			self._assertToken(self.tok, expected, value_=expected)
		parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
		return self.tok.value

	def _readSymbolOptional(self, parent, expected):
		if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected:
			self.next()
			parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
			return True
		return False

	def _readKeywordOptional(self, parent, expected):
		if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected:
			self.next()
			parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
			return True
		return False

	def next(self):
		self.tok = self.iter.next()
		self.nextTok = self.iter.lookahead()

	def _assertToken(self, tok, expected_str, type_ = None, value_ = None):
		if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_):
			raise self._syntaxError("Expected %s but found %s" % (expected_str, tok.value), tok)

	def _syntaxError(self, msg, tok = None):
		if tok is None:
			tok = self.tok
		return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class jackVisitor(jackGrammarVisitor):
    """Clase que hereda del visitor para ir escribiendo en lenguaje de maquina virtual"""

    def __init__(self):
        """Inciializa una tabald e simbolos y un ecritor de codigo junto con variables auxiliares"""
        self.symbolTable = SymbolTable()
        self.contWhile = -1
        self.contIf = -1
        self.nombreClase = ""
        self.kindMetodo = ""
        self.nombreMetodo = ""
        self.vmWriter = CodeWriter()
        self.vmWriter.vm = ""
        self.nArgs = 0

    def visitClasses(self, ctx):
        """Obtiene y guarda el nombre de la clase actualmente compilada"""
        self.nombreClase = ctx.children[1].children[0].getText()
        return self.visitChildren(ctx)

    def visitClassVarDec(self, ctx):
        """Guarda en la tabla de simbolos cada uno de los fields  variables taticas declaradas """
        kind = ctx.children[0].getText()
        tipo = ctx.children[1].children[0].getText()
        i = 2
        while ctx.children[i].getText() != ';': 
            name = ctx.children[i].getText()  
            if name == ',':
                pass
            else:
                self.symbolTable.define(name, tipo, kind)
            i +=1
        return self.visitChildren(ctx)

    def visitTypes(self, ctx):
        return self.visitChildren(ctx)

    def visitSubroutineDec(self, ctx):
        """Inicializa en la tabla de simbolos una subrotina, y en caso de se un metodo agrega this como parametro"""
        self.kindMetodo = ctx.children[0].getText()
        self.nombreMetodo = ctx.children[2].children[0].getText()
        self.symbolTable.startSubroutine()
        if self.kindMetodo == 'method':
            self.symbolTable.define('this', self.nombreMetodo, 'argument')
        return self.visitChildren(ctx)

    def visitParameterList(self, ctx):
        """Agrega a la tabla de simbolos de la subroutina cada uno de los parametros """
        if ctx.getChildCount() > 0:
            tipo = ctx.children[0].children[0].getText()
            nombre = ctx.children[1].children[0].getText()
            self.symbolTable.define(nombre, tipo, 'argument')
            i = 2
            while i < len(ctx.children)-1  and ctx.children[i].getText() != ')':
                tipo = ctx.children[i+1].getText()
                nombre = ctx.children[i+2].getText()
                self.symbolTable.define(nombre, tipo, 'argument')
                i+=3
        return self.visitChildren(ctx)

    def visitSubroutineBody(self, ctx):
        """Despues de contar las variables locales escribe la funcion en 
        maquina virtual y dependiendo del tipo de funcion hace los llamados, push y pop correspondientes"""
        i = 1
        while ctx.children[i].children[0].getText() == "var":
            self.visit(ctx.children[i])
            i += 1
        funcion = self.nombreClase +'.'+ self.nombreMetodo
        numLcl = self.symbolTable.varCount('local')
        self.vmWriter.writeFunction(funcion, numLcl)
        if self.kindMetodo == 'constructor':
            numFields = self.symbolTable.varCount('field')
            self.vmWriter.writePush('constant', numFields)
            self.vmWriter.writeCall('Memory.alloc', 1)
            self.vmWriter.writePop('pointer', 0)
        elif self.kindMetodo == 'method':
            self.vmWriter.writePush('argument', 0)
            self.vmWriter.writePop('pointer', 0)
        while i < ctx.getChildCount():
            self.visit(ctx.children[i])
            i += 1

    def visitVarDec(self, ctx):
        """Inicializa en la tabla de simbolos todas las variables locales de la subrutina para poder escribir la función"""
        tipo = ctx.children[1].children[0].getText()
        nombre = ctx.children[2].getText()
        self.symbolTable.define(nombre, tipo, 'local')
        i = 3
        while ctx.children[i].getText() != ';':
            nombre = ctx.children[i].getText()
            if nombre == ',':
                pass
            else:
                self.symbolTable.define(nombre, tipo, 'local')
            i += 1
        return self.visitChildren(ctx)

    """Llamados en los que no es necesario  escribir codigo de VM"""
    def visitClassName(self, ctx):
        return self.visitChildren(ctx)

    def visitSubroutineName(self, ctx):
        return self.visitChildren(ctx)

    def visitVarName(self, ctx):
        return self.visitChildren(ctx)

    def visitStatements(self, ctx):
        return self.visitChildren(ctx)

    def visitStatement(self, ctx):
        return self.visitChildren(ctx)

    def visitLetStatement(self, ctx): 
        """Realiza los push y pop necesarios para guardar un valor y asignarle una posiicon en memoria"""
        nombre = ctx.children[1].getText()
        tipo = self.symbolTable.kindOf(nombre)
        index = self.symbolTable.indexOf(nombre)
        if tipo  == None:
            tipo = self.symbolTable.kindOf(nombre)
            index = self.symbolTable.indexOf(nombre)
        if ctx.children[2].getText() == '[':
            self.visit(ctx.children[3])
            self.vmWriter.writePush(tipo,index)
            self.vmWriter.writeArithmetic('add')
            self.visit(ctx.children[6]) 
            self.vmWriter.writePop('temp', 0)           
            self.vmWriter.writePop('pointer', 1)
            self.vmWriter.writePush('temp', 0)
            self.vmWriter.writePop('that', 0)
        else:
            self.visit(ctx.children[3])
            self.vmWriter.writePop(tipo,index)

    def visitIfStatement(self, ctx):
        """Escribe los labels necesarios para manejar el flujo del programa de a cuerdo a lo indicado por la expresión"""
        self.contIf += 1
        cont = self.contIf
        self.visit(ctx.children[2])
        self.vmWriter.writeIf('IF_TRUE' + str(cont))
        self.vmWriter.writeGoto('IF_FALSE' + str(cont))
        self.vmWriter.writeLabel('IF_TRUE' + str(cont))
        self.visit(ctx.children[5])
        if ctx.getChildCount() > 7 :
            if str(ctx.children[7]) == 'else':
                self.vmWriter.writeGoto('IF_END' + str(cont))
                self.vmWriter.writeLabel('IF_FALSE' + str(cont))
                self.visit(ctx.children[9])
                self.vmWriter.writeLabel('IF_END' + str(cont))
        else:
            self.vmWriter.writeLabel('IF_FALSE' + str(cont))

    def visitWhileStatement(self, ctx):
        """Similar al if, escribe labels para que el flujo del programa se repita hasta que una condicion no se cumpla"""
        self.contWhile += 1 
        contW = self.contWhile
        self.vmWriter.writeLabel('WHILE_EXP' + str(contW))
        self.visit(ctx.children[2])
        self.vmWriter.writeArithmetic('not')
        self.vmWriter.writeIf('WHILE_END' + str(contW))
        self.visit(ctx.children[5])
        self.vmWriter.writeGoto('WHILE_EXP' + str(contW))
        self.vmWriter.writeLabel('WHILE_END' + str(contW))

    def visitDoStatement(self, ctx):
        """Hago el llamado y posteriormente vuelvo a la función de donde hice el llamado"""
        self.visitChildren(ctx)
        self.vmWriter.writePop('temp', 0)

    def visitReturnStatement(self, ctx):
        """Obtengo valor de retorno, si no hay, el valor de retorno es 0"""
        if ctx.children[1].getText() != ';':
            self.visit(ctx.children[1])
        else:
            self.vmWriter.writePush('constant', 0)
        self.vmWriter.writeReturn()

    def visitExpression(self, ctx):
        """Separo al expresion por partes para irla compilando"""
        self.visit(ctx.children[0])
        i = 2
        while i < ctx.getChildCount():
            self.visit(ctx.children[i])
            self.visit(ctx.children[i-1])
            i +=2

    def visitTerm(self, ctx):
        """Determino el tipo de termino,si es un tipo de dato o un valor de un arreglo, dependiendo de esto obtengo 
        su valor si está en la tabla de simbolos o lo busco en un arreglo o busco el siguiente etrmino con el que opera y lo guardo en memoria"""
        term = ctx.children[0].getText()
        if ctx.getChildCount() == 1:
            if term.isdigit():
                self.vmWriter.writePush('constant', term)
            elif term.startswith('"'):
                term = term.strip('"')
                tam = len(term)
                self.vmWriter.writePush('constant', tam)
                self.vmWriter.writeCall('String.new', 1)
                for char in term:
                    self.vmWriter.writePush('constant', ord(char))
                    self.vmWriter.writeCall('String.appendChar', 2)
            elif term in ['true', 'false', 'null', 'this']:
                self.visitChildren(ctx)
            elif term in self.symbolTable.subrutina.keys():
                tipo = self.symbolTable.kindOf(term)
                index = self.symbolTable.indexOf(term)
                self.vmWriter.writePush(tipo,index)
            elif term in self.symbolTable.clase.keys():
                tipo = self.symbolTable.kindOf(term)
                index = self.symbolTable.indexOf(term)
                self.vmWriter.writePush(tipo,index)
            else:
                self.visitChildren(ctx) 
        else:
            var = ctx.children[0].getText()
            if ctx.children[1].getText() == '[':
                index = self.symbolTable.indexOf(var)
                segment = self.symbolTable.kindOf(var)
                self.visit(ctx.children[2])
                self.vmWriter.writePush(segment, index)
                self.vmWriter.writeArithmetic('add')
                self.vmWriter.writePop('pointer', '1')
                self.vmWriter.writePush('that', '0')
            elif term == '(':
                self.visitChildren(ctx)
            elif term  == '-':
                self.visit(ctx.children[1])
                self.visit(ctx.children[0])
            elif term  == '~':
                self.visit(ctx.children[1])
                self.visit(ctx.children[0])

    def visitSubroutineCall(self, ctx):
        """Ubica la subrutina de acuerdo a la clase en la que se encuentre y escribe en VM el respectivo llamado con su paso de parametros"""
        nombre = ctx.children[0].children[0].getText()
        funcion = nombre
        args = 0
        if ctx.children[1].getText() == '.':    
            nombreSubrutina = ctx.children[2].children[0].getText()
            tipo = self.symbolTable.typeOf(nombre)
            if tipo != None:
                kind = self.symbolTable.kindOf(nombre)
                index = self.symbolTable.indexOf(nombre)
                self.vmWriter.writePush(kind, index)
                funcion = tipo + '.' + nombreSubrutina
                args += 1
            else: 
                funcion = nombre + '.' + nombreSubrutina
        elif ctx.children[1].getText() == '(':
            funcion =  self.nombreClase + '.' + nombre
            args += 1
            self.vmWriter.writePush('pointer', 0)
        self.visitChildren(ctx)
        args = args +self.nArgs
        self.vmWriter.writeCall(funcion, args)

    def visitExpressionList(self, ctx):
        """Evalua cada expresion indivudualmente"""
        self.nArgs = 0
        if ctx.getChildCount() > 0:
            self.nArgs = 1
            self.visit(ctx.children[0])
            i = 2
            while i < ctx.getChildCount():
                self.visit(ctx.children[i])
                self.visit(ctx.children[i-1])
                self.nArgs += 1
                i += 2

    def visitOp(self, ctx):
        """Genera el comando de VM respectivo dependiendo del operador"""
        op = ctx.children[0].getText()
        if op == "+":
            self.vmWriter.writeArithmetic('add')
        elif op == "-":
            self.vmWriter.writeArithmetic('sub')
        elif op == "*":
            self.vmWriter.writeArithmetic('call Math.multiply 2')
        elif op == "/":
            self.vmWriter.writeArithmetic('call Math.divide 2')
        elif op == "&":
            self.vmWriter.writeArithmetic('and')
        elif op == "|":
            self.vmWriter.writeArithmetic('or')
        elif op == ">":
            self.vmWriter.writeArithmetic('gt')
        elif op == "<":
            self.vmWriter.writeArithmetic('lt')
        elif op == "=":
            self.vmWriter.writeArithmetic('eq')
        return self.visitChildren(ctx)

    def visitUnaryop(self, ctx):
        """Determina el comando de VM para cada operaodr unario"""
        op = ctx.children[0].getText()
        if op == "~":
            self.vmWriter.writeArithmetic('not')
        elif op == "-":
            self.vmWriter.writeArithmetic('neg')

    def visitKeywordconstant(self, ctx):
        """Escribe el comando de VM para poder hacer uso de una palabra reservada espcifica"""
        keyword = ctx.children[0].getText()
        if keyword == 'this':
            self.vmWriter.writePush('pointer', 0)
        elif keyword in ['false','null']:
            self.vmWriter.writePush('constant', 0)
        elif keyword == 'true':
            self.vmWriter.writePush('constant', 0)
            self.vmWriter.writeArithmetic('not')
        return self.visitChildren(ctx)

    def crearArchivo(self,path):
        """Abre el archivo .vm donde se escribirán lso comandos de máquina virtual"""
        filewrite = path.split('.jack') #Reemplazo el .jack con .xml si lo tiene 
        filewritef = filewrite[0]+'.vm'  #Sino le agrego el .
        codigoVM = self.vmWriter.vm
        archivo = filewritef
        try:
            file = open(archivo,'w')  #Abro el file en modo escribir
        except FileNotFoundError:
            print('ERROR:No hay directorio existente para escribir')   
            exit(1) 
        file.write(codigoVM)
Exemple #6
0
class CompilationEnginge(object):
    """
    lalala
    """

    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.out = open(output_file, 'w')
        self.token = None
        self.class_name = None

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)

        #######################


    def analyze(self):
        self.token = self.tokenizer.advance()
        self.compile_class()
        self.close()
        print('CLASS TABLE:')
        print(self.symbol_table.class_table)


    def close(self):
        if self.out:
            self.out.close()
            self.out = None


    def advance(self):
        self.token = self.tokenizer.advance()


    def write_to_out(self):
        pass


    def format_line(self, defined_or_used=''):
        token_type = self.tokenizer.token_type()
        running_index = ''
        if token_type == self.tokenizer.keyword_token:
            meat = self.tokenizer.keyword()
            defined_or_used=''
        elif token_type == self.tokenizer.symbol_token:
            meat = self.tokenizer.symbol()
            defined_or_used=''
        elif token_type == self.tokenizer.identifier_token:
            meat = self.tokenizer.identifier()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            # Extending compilaiton engine to output <var/argument/static/field...> instead of <indentifier>
            name = self.tokenizer.token
            if self.symbol_table.kind_of(name):
                token_type = self.symbol_table.kind_of(name)
                running_index = str(self.symbol_table.index_of(name))
            elif name[0].islower():
                token_type = 'subroutine'
            else:
                token_type = 'class'

            #######################  

        elif token_type == self.tokenizer.int_const:
            meat = self.tokenizer.int_val()
            defined_or_used=''
        elif token_type == self.tokenizer.string_const:
            meat = self.tokenizer.string_val()
            defined_or_used=''
        else:
            raise ValueError('Something went wrong with token: {}'.format(self.token))
        
        if defined_or_used != '':
            defined_or_used += ' '
        if running_index != '':
            running_index = ' ' + running_index
        formated_line = '<{2}{0}{3}> {1} </{2}{0}{3}>\n'.format(token_type, meat, defined_or_used, running_index)
        return formated_line


    #########################
    ### PROGARM STRUCTURE ###
    #########################

    def compile_class(self):
        """
        ####################################################################
        ### class: 'class' className '{' classVarDec* subroutineDec* '}' ###
        ####################################################################
        """

        self.out.write('<class>\n')

        # 'class'
        keyword_line = self.format_line()
        self.out.write(keyword_line)
        
        # className
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.class_name = self.tokenizer.token

        ####################### 

        identifier_line = self.format_line('defined')
        self.out.write(identifier_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        ### classVarDec* subroutineDec* ###
        self.advance()
        # classVarDec*
        while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_field, self.tokenizer.key_static]:
            self.compile_class_var_dec()
        
        # subroutineDec*
        while  self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_function, self.tokenizer.key_method, self.tokenizer.key_constructor]:
            self.compile_subroutine()

        # '}'
        if  self.tokenizer.token_type() == self.tokenizer.symbol_token:
            # Class compilation is done
            symbol_line = self.format_line()
            self.out.write(symbol_line)
        else:
            raise ValueError('Something went wrong')

        # Closing with </class>
        self.out.write('</class>\n')
        is_sucessfull = not(self.advance())
        if is_sucessfull:
            print('Compilation enginge succesfully finished')
        else:
            print('Something went wrong!')


    def compile_class_var_dec(self):
        """
        #######################################################################
        ### classVarDec: ('static'|'field') type varName (',' varName)* ';' ###
        #######################################################################
        """

        self.out.write('<classVarDec>\n')

        #######################
        ### PROJECT 11 CODE ###
        #######################

        # Extract field or static
        # field_or_static = re.match('<[a-z]*>', field_or_static_line)[0][1:-1]
        field_or_static = self.tokenizer.token

        #######################

        #  ('static' | 'field')
        field_or_static_line = self.format_line()
        self.out.write(field_or_static_line)

        # type
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        # Extract token type
        type_ = self.tokenizer.token

        #######################

        type_line = self.format_line()
        self.out.write(type_line)

        # varName
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static)

        #######################

        varname_line = self.format_line('defined')
        self.out.write(varname_line)

        # (',' varName)*
        self.advance()
        symbol = self.tokenizer.symbol()
        while symbol == ',':
            colon_line = self.format_line()
            self.out.write(colon_line)
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static)

            #######################

            varname_line = self.format_line('defined')
            self.out.write(varname_line)
            self.advance()
            symbol = self.tokenizer.symbol()
        # symbol == ';'
        semicolon_line = self.format_line()
        self.out.write(semicolon_line)
        self.advance()

        self.out.write('</classVarDec>\n')


    def compile_subroutine(self):
        """
        ###########################################################################
        ### subroutineDec: ('constructor'|'function'|'method')                  ###             
        ###                ('void' | type) subroutineName '(' parameterList ')' ###
        ###                subroutineBody                                       ###
        ###########################################################################
        """

        #######################
        ### PROJECT 11 CODE ###
        #######################

        print()
        print('SUBROUTINE TABLE:')
        print(self.symbol_table.subroutine_table)
        print()
        self.symbol_table.start_subroutine()
        self.symbol_table.define(name='this', type_=self.class_name, kind='argument')

        #######################

        self.out.write('<subroutineDec>\n')

        # ('constructor'|'function'|'method')
        constructor_function_method_line = self.format_line()
        self.out.write(constructor_function_method_line)

        # ('void' | type)
        self.advance()
        void_or_type_line = self.format_line()
        self.out.write(void_or_type_line)

        # subroutineName 
        self.advance()
        subroutine_name_line = self.format_line('defined')
        self.out.write(subroutine_name_line)

        # '(' 
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # parameterList
        self.advance()
        self.compile_parameter_list()

        # ')' 
        symbol_line = self.format_line()
        self.out.write(symbol_line)
        
        ##################################################
        ### subroutineBody: '{' varDec* statements '}' ###
        ##################################################
        
        self.out.write('<subroutineBody>\n')

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        ###############
        ### varDec* ###
        ###############

        self.advance()
        while self.tokenizer.token == self.tokenizer.key_var:
            self.compile_var_dec()

        ##################
        ### statements ###
        ##################

        self.compile_statements()
        
        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</subroutineBody>\n')
        self.out.write('</subroutineDec>\n')

        #######################
        ### PROJECT 11 CODE ###
        #######################

        print()
        print('SUBROUTINE TABLE:')
        print(self.symbol_table.subroutine_table)
        print()

        #######################


    def compile_parameter_list(self):
        """
        ############################################################
        ### parameterList: ((type varName) (',' type varName)*)? ###
        ############################################################
        """

        self.out.write('<parameterList>\n')

        # If token type is symbol then we have empty parameter list
        # If we have symbol token then it means our parameter list is fully processed
        if self.tokenizer.token_type() != self.tokenizer.symbol_token:
            
            # type

            #######################
            ### PROJECT 11 CODE ###
            #######################

            type_ = self.tokenizer.token

            #######################

            type_line = self.format_line()
            self.out.write(type_line)
            
            # varName
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument')

            #######################

            var_name_line = self.format_line('defined')
            self.out.write(var_name_line)

            # If next token is ',' we have more then one parameter
            self.advance()
            while self.tokenizer.token_type() == self.tokenizer.symbol_token and self.tokenizer.symbol() == ',':
                # ','
                comma_line = self.format_line()
                self.out.write(comma_line)

                # type
                self.advance()

                #######################
                ### PROJECT 11 CODE ###
                #######################

                type_ = self.tokenizer.token

                #######################

                type_line = self.format_line()
                self.out.write(type_line)

                # varName
                self.advance()

                #######################
                ### PROJECT 11 CODE ###
                #######################

                self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument')

                # We are in new subroutine so add next nested scope
                # self.symbol_table.start_subroutine()

                #######################

                var_name_line = self.format_line('defined')
                self.out.write(var_name_line)

                self.advance()

        self.out.write('</parameterList>\n')



    def compile_var_dec(self):
        """
        #####################################################
        ### varDec: 'var' type varName (',' varName)* ';' ###
        #####################################################
        """

        self.out.write('<varDec>\n')

        # var
        var_line = self.format_line()
        self.out.write(var_line)

        # type
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        type_ = self.tokenizer.token

        #######################

        type_line = self.format_line()
        self.out.write(type_line)

        # varName
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local')

        #######################

        var_name_line = self.format_line('defined')
        self.out.write(var_name_line)

        # (',' varName)*
        self.advance()
        while self.tokenizer.symbol() == ',':
            # ','
            comma_line = self.format_line()
            self.out.write(comma_line)

            # varName
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local')

            #######################

            var_name_line = self.format_line('defined')
            self.out.write(var_name_line)

            self.advance()

        # ';'
        semicolon_line = self.format_line()
        self.out.write(semicolon_line)

        self.advance()

        self.out.write('</varDec>\n')


    ##################
    ### STATEMENTS ###
    ##################

    def compile_statements(self):
        """
        ##############################
        ### statements: statement* ###
        ##############################
        """
        
        self.out.write('<statements>\n')

        while self.tokenizer.token_type() != self.tokenizer.symbol_token:
            
            keyword = self.tokenizer.keyword()
            # letStatement
            if keyword == self.tokenizer.key_let:
                self.compile_let()

            # ifStatement
            elif keyword == self.tokenizer.key_if:
                self.compile_if()

            # whileStatement
            elif keyword == self.tokenizer.key_while:
                self.compile_while()

            # doStatement
            elif keyword == self.tokenizer.key_do:
                self.compile_do()

            # returnStatement
            elif keyword == self.tokenizer.key_return:
                self.compile_return()

            else:
                raise ValueError('Wrong statement: {}'.format(keyword))

        self.out.write('</statements>\n')


    def compile_do(self):
        """
        ############################################
        ### doStatement: 'do' subroutineCall ';' ###
        ############################################
        """

        self.out.write('<doStatement>\n')

        # 'do'
        do_line = self.format_line()
        self.out.write(do_line)

        # subroutineCall
        self.advance()
        self.compile_subroutine_call()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</doStatement>\n')


    def compile_let(self):
        """
        ############################################################################
        ### letStatement: 'let' varName ('[' expression ']')? '=' expression ';' ###
        ############################################################################
        """

        self.out.write('<letStatement>\n')

        # let 
        let_line = self.format_line()
        self.out.write(let_line)

        # varName
        self.advance()

        var_name_line = self.format_line('used')
        self.out.write(var_name_line)

        # Check if '[' or '='
        self.advance()
        if self.tokenizer.token == '[':
            # '['
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # expression
            self.advance()
            self.compile_expression()

            # ']'
            symbol_line = self.format_line()
            self.out.write(symbol_line)
            self.advance()

        # '='
        symbol_line = self.format_line()
        self.out.write(symbol_line)
        
        # expression
        self.advance()
        self.compile_expression()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</letStatement>\n')


    def compile_while(self):
        """
        #####################################################################
        ### whileStatement: 'while' '(' expression ')' '{' statements '}' ###
        #####################################################################
        """

        self.out.write('<whileStatement>\n')

        # 'while'
        while_line = self.format_line()
        self.out.write(while_line)

        # '('
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expression
        self.advance()
        self.compile_expression()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # statements
        self.advance()
        self.compile_statements()

        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</whileStatement>\n')


    def compile_return(self):
        """
        ################################################
        ### ReturnStatement 'return' expression? ';' ###
        ################################################
        """

        self.out.write('<returnStatement>\n')

        # 'return'
        return_line = self.format_line()
        self.out.write(return_line)

        # Ceck if expression
        self.advance()
        if self.tokenizer.token != ';':
            # 'expression'
            self.compile_expression()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</returnStatement>\n')


    def compile_if(self):
        """
        ###############################################################
        ### ifStatement: 'if' '(' expression ')' '{' statements '}' ###
        ###              ('else' '{' statements '}')?               ###
        ###############################################################
        """

        self.out.write('<ifStatement>\n')

        # 'if'
        if_line = self.format_line()
        self.out.write(if_line)

        # '('
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expression
        self.advance()
        self.compile_expression()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # statements
        self.advance()
        self.compile_statements()

        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # Check if there is 'else' part of ifStatement
        self.advance()
        if self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() == 'else':
            # 'else'
            else_line = self.format_line()
            self.out.write(else_line)

            # '{'
            self.advance()
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # statements
            self.advance()
            self.compile_statements()

            # '}'
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            self.advance()

        self.out.write('</ifStatement>\n')

    ###################
    ### EXPRESSIONS ###
    ###################

    def compile_subroutine_call(self, skip_subroutine_name=False):
        """
        ############################################################################
        ### subroutineCall: subroutineName '(' expressionList ')' | (className | ###
        ### varName) '.' subroutineName '(' expressionList ')'                   ###
        ############################################################################
        """

        if not skip_subroutine_name:
            # subroutineName or className or varName
            subroutine_class_var_name_line = self.format_line('used')
            self.out.write(subroutine_class_var_name_line)
            self.advance()

        # Check '(' or '.'
        if self.tokenizer.token == '.':
            # '.'
            symbol_line = self.format_line()
            self.out.write(symbol_line)
            
            # subroutineName
            self.advance()
            subroutine_name_line = self.format_line('used')
            self.out.write(subroutine_name_line)

            self.advance()

        # '('
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expressionList
        self.advance()
        self.compile_expression_list()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()


    def compile_expression(self):
        """
        ###################################
        ### expression: term (op term)* ###
        ###################################
        """

        self.out.write('<expression>\n')

        ops = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

        # 'term'
        self.compile_term()

        # Check if there is (op term)* part
        while self.tokenizer.token in ops:
            # op
            op_line = self.format_line()
            self.out.write(op_line)

            # term
            self.advance()
            self.compile_term()

        self.out.write('</expression>\n')

    
    def compile_term(self):
        """
        ################################################################
        ###  integerConstant | stringConstant | keywordConstant |    ###
        ###  varName | varName '[' expression ']' | subroutineCall | ###
        ###  '(' expression ')' | unaryOp term                       ###
        ################################################################
        """

        self.out.write('<term>\n')

        unary_ops = ['-', '~']

        #############################################
        ### constant, name, expression or unaryOp ###
        #############################################

        # '(' expression ')'
        if self.tokenizer.token == '(':
            # '('
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # expression
            self.advance()
            self.compile_expression()

            # ')'
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            self.advance()
        
        # unaryOp term
        elif self.tokenizer.token in unary_ops:
            # unaryOp
            unary_op_line = self.format_line()
            self.out.write(unary_op_line)

            # term
            self.advance()
            self.compile_term()

        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall
        else:
            # constant or name
            constant_or_name = self.format_line('used')
            self.out.write(constant_or_name)

            # varName '[' expression ']' | subroutineCall or end of compile_term function
            # Check if expression: '[', subroutineCall: '(' with parameter skip_subroutine_name = True,
            # otherwise end of compile_term function
            self.advance()
            # '[' expression ']'
            if self.tokenizer.token == '[':
                # '['
                symbol_line = self.format_line()
                self.out.write(symbol_line)

                # expression
                self.advance()
                self.compile_expression()

                # ']'
                symbol_line = self.format_line()
                self.out.write(symbol_line)

                self.advance()
            
            # subroutineCall with skip_subroutine_name=True
            elif self.tokenizer.token in ['(', '.']:
                self.compile_subroutine_call(skip_subroutine_name=True)

        self.out.write('</term>\n')


    def compile_expression_list(self):
        """
        ########################################################
        ### expressionList: (expression (',' expression)* )? ###
        ########################################################
        """

        self.out.write('<expressionList>\n')

        # Check if token is ')', if so we got empty expression list
        if self.tokenizer.token != ')':
            # 'expression'
            self.compile_expression()

            # Check if token is ',', if so we got more expressions
            while self.tokenizer.token == ',':
                # ','
                comma_line = self.format_line()
                self.out.write(comma_line)

                # expression
                self.advance()
                self.compile_expression()

        self.out.write('</expressionList>\n')
Exemple #7
0
class CompilationEngine:
    def __init__(self, tokenizer, vm_writer):
        # We will use the passed-in JackTokenizer to parse the given Jack code.
        self.tokenizer = tokenizer

        # We will use the passed-in VMWriter to write our compiled VM code.
        # The VMWriter instance should have already create the .vm file for us.
        self.vm_writer = vm_writer

        # When handling Jack variable declarations, you need two symbol tables:
        # - a SymbolTable for the class scope, and
        # - a SymbolTable for the subroutine scope.
        self.class_symbol_table = SymbolTable()
        self.subroutine_symbol_table = SymbolTable()

        # Even though a class can contain multiple different subroutines,
        # we only ever need one subroutine symbol table.
        #
        # We can simply reset the subroutine symbol table
        # every time we encounter a new subroutine!

        # We will use simple counters to create distinct labels
        # for each if/while statement in the compiled VM code.
        self.if_counter = 0
        self.while_counter = 0

        # We'll need to track the currently-parsing class and subroutine for various reasons.
        self.current_class_name = None
        self.current_subroutine_name = None

        # We'll need to track a subroutine's type as we parse it.
        # Its value is always one of ["function", "method", "constructor"].
        self.subroutine_type = None

    def run(self):
        # Advance to the first token in the .jack file.
        # This should always be "class".
        self.tokenizer.advance()

        # Let's get started!
        return self.compile_class()

    ###################################################
    # ASSERTIONS
    ###################################################

    def assert_identifier(self):
        assert self.tokenizer.identifier(
        ), f"Expected an identifier but found: {self.tokenizer.current_token}"

    def assert_keyword(self, keyword=None):
        if keyword and type(keyword) is list:
            assert self.tokenizer.keyword(
            ) and self.tokenizer.current_token in keyword, f"Expected one of keywords {keyword} but found: {self.tokenizer.current_token}"
        elif keyword:
            assert self.tokenizer.keyword(
            ) and self.tokenizer.current_token == keyword, f"Expected keyword {keyword} but found: {self.tokenizer.current_token}"
        else:
            assert self.tokenizer.keyword(
            ), f"Expected a keyword but found: {self.tokenizer.current_token}"

    def assert_return_type(self):
        assert self.tokenizer.keyword() or self.tokenizer.identifier(
        ), f"Expected a keyword or identifier as the return type but found: {self.tokenizer.current_token}"

    def assert_symbol(self, symbol=None):
        if symbol and type(symbol) is list:
            assert self.tokenizer.symbol(
            ) and self.tokenizer.current_token in symbol, f"Expected one of symbols {symbol} but found: {self.tokenizer.current_token}"
        elif symbol:
            assert self.tokenizer.symbol(
            ) and self.tokenizer.current_token == symbol, f"Expected symbol \"{symbol}\" but found: {self.tokenizer.current_token}"
        else:
            assert self.tokenizer.symbol(
            ), f"Expected a symbol but found: {self.tokenizer.current_token}"

    ###################################################
    # COMPILER METHODS
    ###################################################

    def compile_class(self):
        self.assert_keyword('class')

        # Advance to the next token, which should be the class name.
        self.tokenizer.advance()
        self.assert_identifier()
        self.current_class_name = self.tokenizer.current_token

        self.tokenizer.advance()
        self.assert_symbol('{')

        # We reset the class-level symbol table on the off-chance that
        # there are multiple classes defined in a .jack file.
        self.class_symbol_table.reset()

        # At this point, we may encounter class-level field or static variables.
        # We will compile those as needed.
        self.tokenizer.advance()
        while self.tokenizer.keyword() and self.tokenizer.current_token in [
                'field', 'static'
        ]:
            self.compile_class_var_dec()
            self.tokenizer.advance()

        # We will compile each class's subroutines one at a time.
        while self.tokenizer.keyword() and self.tokenizer.current_token in [
                'constructor', 'function', 'method'
        ]:
            # We can safely reset the subroutine-level symbol table for each new subroutine.
            # There's no need to keep the old table.
            self.subroutine_symbol_table.reset()

            self.compile_subroutine_dec()
            self.tokenizer.advance()

        self.assert_symbol('}')

    def compile_class_var_dec(self):
        # We will store the variable kind, which should always be one of ['field', 'static'].
        self.assert_keyword(['field', 'static'])
        kind = self.tokenizer.current_token
        self.tokenizer.advance()

        # We'll also keep track of the variable's type.
        typ = self.tokenizer.current_token
        self.tokenizer.advance()

        # We'll need the variable's name, of course, to populate the symbol table.
        name = self.tokenizer.current_token
        self.tokenizer.advance()

        # We now have everything we need to update the class-level symbol table,
        # so let's do that!
        self.class_symbol_table.define(name, typ, kind)

        # It's completely possible that the programmer used comma-separated variable decs.
        # Example: field int x, y, z;
        # We will anticipate this and populate the symbol table accordingly.
        while self.tokenizer.current_token != ';':
            self.assert_symbol(',')
            self.tokenizer.advance()

            name = self.tokenizer.current_token
            self.tokenizer.advance()

            self.class_symbol_table.define(name, typ, kind)

    def compile_do(self):
        self.assert_keyword('do')

        self.tokenizer.advance()
        self.compile_subroutine_call()

        self.tokenizer.advance()
        self.assert_symbol(';')

        # Subroutine calls always return *something*.
        # However, by using a do-statment, we're choosing to do nothing with the return value.
        # Le'ts dump this return value immediately, since we'll never use it.
        self.vm_writer.write_pop('temp', 0)

    def compile_expression(self):
        self.compile_term()

        self.tokenizer.advance()

        if self.tokenizer.binary_op():
            binary_op = self.tokenizer.current_token

            self.tokenizer.advance()
            self.compile_term()

            self.vm_writer.write_binary_op(binary_op)

            self.tokenizer.advance()

    def compile_expression_list(self):
        expression_count = 0

        while self.tokenizer.current_token not in [')', '}']:
            if self.tokenizer.current_token == ',':
                self.tokenizer.advance()
            else:
                expression_count += 1
                self.compile_expression()

        return expression_count

    def compile_if_statement(self):
        self.assert_keyword('if')

        # Let's increment the if_counter for VM labeling.
        self.if_counter += 1

        label_1 = f"IF_STATEMENT_{self.if_counter}_A"
        label_2 = f"IF_STATEMENT_{self.if_counter}_B"

        self.tokenizer.advance()
        self.assert_symbol('(')

        # First, we'll write the if-statement's expression to VM.
        self.tokenizer.advance()
        self.compile_expression()

        self.assert_symbol(')')

        # Next, we'll write the not and if-goto statements to label 1.
        self.vm_writer.write_command('not')
        self.vm_writer.write_if(label_1)

        self.tokenizer.advance()
        self.assert_symbol('{')

        # We'll compile each statement in the if-block.
        self.tokenizer.advance()
        self.compile_statements()

        self.assert_symbol('}')

        # We will writ ethe goto to label 2.
        self.vm_writer.write_goto(label_2)

        # We'll write the VM code for label 1.
        self.vm_writer.write_label(label_1)

        if self.tokenizer.peek() == 'else':
            self.tokenizer.advance()

            self.tokenizer.advance()
            self.assert_symbol('{')

            # If we find an else statement, we'll need to compile its statements as well.
            self.tokenizer.advance()
            self.compile_statements()

            self.assert_symbol('}')

        # Finally, we'll write the VM code for label 2.
        self.vm_writer.write_label(label_2)

    def compile_let(self):
        self.assert_keyword('let')

        self.tokenizer.advance()
        self.assert_identifier()
        name = self.tokenizer.current_token

        self.tokenizer.advance()
        self.assert_symbol()

        # TODO: Handle arrays
        # if self.tokenizer.current_token == '[':
        #   let_statement += self.add_xml_for_current_token()
        #   self.tokenizer.advance()
        #   let_statement += self.compile_expression()
        #   self.assert_symbol(']')
        #   let_statement += self.add_xml_for_current_token()
        #   self.tokenizer.advance()

        self.assert_symbol('=')

        self.tokenizer.advance()
        self.compile_expression()

        self.assert_symbol(';')

        if self.subroutine_symbol_table.has_name(name):
            self.vm_writer.write_pop(
                self.subroutine_symbol_table.kind_of(name),
                self.subroutine_symbol_table.index_of(name))
        elif self.class_symbol_table.has_name(name):
            kind = self.class_symbol_table.kind_of(name)
            kind = "this" if kind == "field" else kind

            self.vm_writer.write_pop(kind,
                                     self.class_symbol_table.index_of(name))
        else:
            raise AssertionError(f"Undeclared variable found: {name}")

    def compile_parameter_list(self):
        # We'll keep a running list of the contents inside of a subroutine's params.
        param_content = []

        while self.tokenizer.current_token != ')':
            if self.tokenizer.current_token != ',':
                param_content.append(self.tokenizer.current_token)

            self.tokenizer.advance()

        typ = None

        # By this point, we'll likely have a series of tokens with alternating types and identifiers.
        # Example: ["int", "x", "char", "y"]
        # We'll need to populate the symbol table with the params we found.
        # We *should* have (len(param_content) / 2) params total.
        while len(param_content) > 0:
            val = param_content.pop(0)

            if typ == None:
                typ = val
            else:
                name = val
                self.subroutine_symbol_table.define(name, typ, 'argument')
                typ = None

    def compile_return(self):
        self.assert_keyword('return')

        self.tokenizer.advance()

        if self.tokenizer.keyword() and self.tokenizer.current_token == "this":
            # If we're returning "this", then we'll need to push "this" onto the stack first.
            self.vm_writer.write_push('pointer', 0)
            self.tokenizer.advance()
        elif self.tokenizer.current_token != ';':
            # We'll need to compile any expressions we find after the "return" keyword.
            self.compile_expression()
        else:
            # Even if a Jack subroutine returns nothing,
            # the compiler always expects a method to return *something*.
            #
            # We'll push the constant 0, which will be immediately thrown away,
            # to satisfy this contract.
            self.vm_writer.write_push('constant', 0)

        self.assert_symbol(';')

        self.vm_writer.write_return()

    def compile_statement(self):
        if self.tokenizer.current_token == 'do':
            return self.compile_do()

        if self.tokenizer.current_token == 'let':
            return self.compile_let()

        if self.tokenizer.current_token == 'if':
            return self.compile_if_statement()

        if self.tokenizer.current_token == 'while':
            return self.compile_while_statement()

        if self.tokenizer.current_token == 'return':
            return self.compile_return()

        raise AssertionError(
            f"Unrecognized token in compile_statement(): {self.tokenizer.current_token}"
        )

    def compile_statements(self):
        while self.tokenizer.current_token != '}':
            if self.tokenizer.symbol():
                pass
            else:
                self.compile_statement()

            self.tokenizer.advance()

    def compile_subroutine_body(self):
        # First, we'll do a sanity check and look for a left brace.
        # A left brace symbol indicates the start of a block of statements.
        self.tokenizer.advance()
        self.assert_symbol('{')

        # A function declaration in VM code has the format:
        # function MyClass.method local_count
        # We need to know the number of local variables in the function.
        # Let's initialize the local count.
        local_count = 0

        self.tokenizer.advance()

        # We'll now check for any local variable declarations.
        #
        # We'll compile each declaration we find and update the running tally
        # of our local count.
        while self.tokenizer.current_token == 'var':
            local_count += self.compile_var_dec()
            self.tokenizer.advance()

        # With the class name, subroutine name, and local count on hand,
        # we can finally declare our function in VM bytecode.
        self.vm_writer.write_function(
            f"{self.current_class_name}.{self.current_subroutine_name}",
            local_count)

        # Edge case!
        if self.subroutine_type == 'constructor':
            # If we're compiling a constructor, we'll need to do some initialization
            # before compiling any statements.

            # First, we'll use Memory.alloc() to allocate memory for the new object.
            field_count = self.class_symbol_table.var_count('field')
            self.vm_writer.write_push("constant", field_count)
            self.vm_writer.write_call("Memory.alloc", 1)

            # We will then anchor _this_ to the THIS base address.
            self.vm_writer.write_pop("pointer", 0)

        # Another edge case!
        elif self.subroutine_type == "method":
            # Since we're in a method, we need to initialize _this_ to the current object.
            # We can use our recently-updated symbol table to do this.
            # First, let's push the first argument _this_ onto the stack.
            self.vm_writer.write_push("argument", 0)

            # Next, we must immediately pop this value from the stack
            # and store it at the THIS address in memory.
            self.vm_writer.write_pop("pointer", 0)

            # Now the compiled code can access the object's fields.

        # We'll now compile every statement inside of the subroutine.
        self.compile_statements()

        # Finally, we'll do another sanity check to ensure we've hit the
        # end of our statement block.
        self.assert_symbol('}')

    def compile_subroutine_call(self):
        # We'll keep a running tally of the argument count.
        # This is required for the call VM code.
        # Example: call {subroutine_name} {arg_count}
        arg_count = 0

        # First, let's make sure that the current token is an identifier.
        self.assert_identifier()

        # This identifier can be one of the following:
        # - a subroutine name, such as doAThing in doAThing()
        # - a class name, such as MyClass in MyClass.doAThing()
        # - an object, such as myObj in myObj.doAThing()
        #
        # We'll store it for future use.
        name = self.tokenizer.current_token

        # We'll also store whether the subroutine has a prefix, e.g. MyClass or myObj.
        # By default, let's assume there's a prefix.
        has_prefix = True

        self.tokenizer.advance()
        self.assert_symbol(['(', '.'])

        # If the current token is a period, then this is a method call.
        #
        # At this point in time, the name is either a class name or an object,
        # like MyClass or myObj.
        if self.tokenizer.current_token == '.':
            obj_in_sub_symbol_table = self.subroutine_symbol_table.has_name(
                name)
            obj_in_class_symbol_table = self.class_symbol_table.has_name(name)

            # If the current token is an object, we need to do something special.
            # Specifically, we need to push the current object onto the stack.
            #
            # In a sense, we're converting our object-oriented Jack code into
            # procedural code.
            #
            # myObj.doAThing(a, b) -> doAThing(myObj, a, b)
            #
            # From a VM perspective, the procedural version is easier to compile.
            if obj_in_sub_symbol_table or obj_in_class_symbol_table:
                arg_count += 1

                # First, we'll for the object identifier in the subroutine symbol table.
                if obj_in_sub_symbol_table:
                    # Push the object to the stack.
                    self.vm_writer.write_push(
                        self.subroutine_symbol_table.kind_of(name),
                        self.subroutine_symbol_table.index_of(name))

                    # We'll need to replace our current name with the object's type (aka class).
                    name = self.subroutine_symbol_table.type_of(name)

                # Next, we'll check for the object identifier in the class symbol table.
                elif obj_in_class_symbol_table:
                    kind = self.class_symbol_table.kind_of(name)
                    kind = "this" if kind == "field" else kind

                    # Push the object to the stack.
                    self.vm_writer.write_push(
                        kind, self.class_symbol_table.index_of(name))

                    # We'll need to replace our current name with the object's type (aka class).
                    name = self.class_symbol_table.type_of(name)

            name += "."

            # At this point, we can be confident that we're at the method name.
            self.tokenizer.advance()
            self.assert_identifier()
            name += self.tokenizer.current_token

            self.tokenizer.advance()

        else:
            # If we hit this code, then we've encounted a subroutine call without a prefix.
            # Example: doAThing()
            # We can assume that this is a method call and never a function call.
            has_prefix = False

            # VM function calls are always of the format Class.subroutine
            # Therefore, we'll need to prepend the current class's name to the subroutine identifier.
            name = f"{self.current_class_name}.{name}"

        self.assert_symbol('(')

        # We'll need to compile every expression inside of the subroutine call.
        #
        # We'll also get the number of expressions in the call,
        # which will increase our argument counter.
        #
        # Example: myObj.doAThing(exp1, exp2, exp3...)
        self.tokenizer.advance()
        arg_count += self.compile_expression_list()

        self.assert_symbol(')')

        if not has_prefix:
            # If this subroutine didn't have a prefix, we're assuming it's a method call.
            # Method calls always take at least one argument: the object itself.
            # We'll need to push that object onto the stack.
            self.vm_writer.write_push("pointer", 0)

            # We should also increment arg_count to account for the object itself.
            arg_count += 1

        # FINALLY, we can write our VM code!
        self.vm_writer.write_call(name, arg_count)

    def compile_subroutine_dec(self):
        self.assert_keyword(['constructor', 'method', 'function'])
        self.subroutine_type = self.tokenizer.current_token

        self.tokenizer.advance()
        self.assert_return_type()
        return_type = self.tokenizer.current_token

        # Methods are unique, since they implicitly imply an extra parameter: the object itself.
        # We'll add the object to the subroutine symbol table as "this".
        if self.subroutine_type == 'method':
            self.subroutine_symbol_table.define('this', return_type,
                                                'argument')

        self.tokenizer.advance()
        self.assert_identifier()
        self.current_subroutine_name = self.tokenizer.current_token

        self.tokenizer.advance()
        self.assert_symbol('(')

        self.tokenizer.advance()
        self.compile_parameter_list()

        # compile_parameter_list() should have already advanced to ")" for us.
        self.assert_symbol(')')

        self.compile_subroutine_body()

    def compile_term(self):
        # We need to compile each individual term to VM code as needed.
        #
        # The definition for "term" in this context is quite broad,
        # so bear with me as we go through each possible term!

        # First, if we have an identifier on our hands, we'll need to peek one token ahead.
        #
        # The token ahead could be one of the following:
        # - a period, indicating that the identifier is a class name or object
        # - a left parens, indicating that the identifier is a subroutine
        # - a left bracket, indiciating that the identifier is an array
        if self.tokenizer.identifier() and self.tokenizer.peek() in [
                '.', '(', '['
        ]:
            next_token = self.tokenizer.peek()

            # The next token is either a period or left parens,
            # which means we're in a subroutine call!
            #
            # Examples: Memory.alloc(), myObj.doAThing(), doSomethingElse()
            if next_token in ['.', '(']:
                self.compile_subroutine_call()

            # The next token is a left bracket, which means
            # we're trying to access an array.
            #
            # Examples: myArray[3], myArray[x + (y - 2)]
            elif next_token == '[':
                # TODO: Handle identifier.

                self.tokenizer.advance()
                self.assert_symbol('[')

                self.tokenizer.advance()
                self.compile_expression()

                self.assert_symbol(']')

        # Let's check if the current token is a unary operation,
        # such as "-" (negate, or neg) or "~" (not).
        #
        # Examples: -3, ~(~(x))
        elif self.tokenizer.unary_op():
            unary_op = self.tokenizer.current_token

            self.tokenizer.advance()
            self.compile_term()

            self.vm_writer.write_unary_op(unary_op)

        # We can always have expressions inside of parentheses.
        # We can treat this like its own term.
        # Examples: (x + 3), ((x + 2) > 9)
        elif self.tokenizer.current_token == '(':
            self.tokenizer.advance()
            self.compile_expression()

            self.assert_symbol(')')

        # Now we've reached some simpler terms!
        # If we encounter a number, we simply write "push constant {number}".
        elif self.tokenizer.int_val() or self.tokenizer.int_val() == 0:
            self.vm_writer.write_push("constant", self.tokenizer.current_token)

        # We need to consider some special keyword expressions.
        # Most of keywords ultimately resolve to simple "push constant" VM commands.
        elif self.tokenizer.keyword():
            # null and false keywords map to constant 0.
            if self.tokenizer.current_token in ["null", "false"]:
                self.vm_writer.write_push("constant", 0)
            # The true keyword maps to constant -1.
            elif self.tokenizer.current_token == "true":
                self.vm_writer.write_push("constant", 1)
                self.vm_writer.write_command("neg")
            # The this keyword indicates a reference to the current object in the THIS address.
            elif self.tokenizer.current_token == "this":
                self.vm_writer.write_push("pointer", 0)

        # If we have an identifer at this point, we can safely assume that
        # its a standalone variable, not part of a subroutine call or array access.
        #
        # We will leverage our symbol tables to write the VM code here.
        elif self.tokenizer.identifier():
            name = self.tokenizer.current_token

            # First, let's check the subroutine symbol table for the identifier.
            if self.subroutine_symbol_table.has_name(name):
                # If we find it, we can now write the VM push code.
                self.vm_writer.write_push(
                    self.subroutine_symbol_table.kind_of(name),
                    self.subroutine_symbol_table.index_of(name))

            # Next, we'll check the class symbol table for the identifier.
            elif self.class_symbol_table.has_name(name):
                # We need to make a small tweak to address _field_s.
                kind = self.class_symbol_table.kind_of(name)
                kind = "this" if kind == "field" else kind

                # We can now write the VM push code.
                self.vm_writer.write_push(
                    kind, self.class_symbol_table.index_of(name))
            else:
                raise AssertionError(f"Unknown identifier: {name}")

        # TODO
        # For strings, we'll need to call String.new() and String.appendChar().
        elif self.tokenizer.string_val():
            pass

        else:
            raise AssertionError(
                f"Unsure how to handle parse the current token as a term: {self.tokenizer.current_token}"
            )

    def compile_var_dec(self):
        # We'll keep a running tally of the local variable count.
        # This is necessary for function declarations in VM code.
        var_count = 1

        self.assert_keyword('var')
        kind = 'local'
        self.tokenizer.advance()

        typ = self.tokenizer.current_token
        self.tokenizer.advance()

        name = self.tokenizer.current_token
        self.tokenizer.advance()

        # We have the name, type, and kind (which should be local)
        # for this variable declaration.
        #
        # Let's add it to our symbol table!
        self.subroutine_symbol_table.define(name, typ, kind)

        # It's entirely possible that we have comma-separated var declarations.
        # Let's account for that.
        # Note that we'll increment the variable count for each new variable we find.
        while self.tokenizer.current_token != ';':
            self.assert_symbol(',')
            var_count += 1
            self.tokenizer.advance()

            name = self.tokenizer.current_token
            self.tokenizer.advance()

            # We'll now add this variable to the symbol table.
            self.subroutine_symbol_table.define(name, typ, kind)

        # The compile_subroutine_body() method will find the var_count useful
        # for declaring the function as VM code.
        return var_count

    def compile_while_statement(self):
        self.assert_keyword('while')

        # We'll increment while_counter for VM labeling.
        # This way, we'll have distinct labels for each while statement we encounter.
        self.while_counter += 1

        label_1 = f"WHILE_STATEMENT_{self.while_counter}_A"
        label_2 = f"WHILE_STATEMENT_{self.while_counter}_B"

        # Let's write the first label.
        self.vm_writer.write_label(label_1)

        self.tokenizer.advance()
        self.assert_symbol('(')

        # Now we'll write the while's expression.
        self.tokenizer.advance()
        self.compile_expression()

        self.assert_symbol(')')

        # Let's write the not and if-goto statements.
        self.vm_writer.write_command("not")
        self.vm_writer.write_if(label_2)

        self.tokenizer.advance()
        self.assert_symbol('{')

        # Let's write the while's inner statements.
        self.tokenizer.advance()
        self.compile_statements()

        self.assert_symbol('}')

        # Write the goto statement.
        self.vm_writer.write_goto(label_1)

        # Finally, let's write the second label.
        self.vm_writer.write_label(label_2)
Exemple #8
0
from symbol_table import SymbolTable, SymbolKind

# Create a new symbol table
s1 = SymbolTable()

# Insert some test data
s1.define("a", "int", SymbolKind.ARG)
s1.define("b", "int", SymbolKind.ARG)
s1.define("c", "String", SymbolKind.FEILD)
s1.define("d", "Point", SymbolKind.ARG)

# Test var index
assert s1.get_index_of("a") == 0
assert s1.get_index_of("b") == 1
assert s1.get_index_of("c") == 0
assert s1.get_index_of("d") == 2

# Test var kind
assert s1.get_kind_of("d") == SymbolKind.ARG
assert s1.get_kind_of("c") == SymbolKind.FEILD
# When symbol is not defined
assert s1.get_kind_of("e") == SymbolKind.NONE
assert s1.get_kind_of("spam") == SymbolKind.NONE

# Test var type
assert s1.get_type_of("a") == "int"
assert s1.get_type_of("c") == "String"
assert s1.get_type_of("d") == "Point"

print("All assertions are True!")
Exemple #9
0
class CompilationEngine:
    def __init__(self, source, destination):
        self.src = source
        self.dst = destination
        self.writer = VMWriter(destination)
        self.iter = Lookahead(tokenizor.newTokenizor(self.src))
        self._symbol_table = SymbolTable()

    def compile(self):
        root = self._compileClass()
        return root

    def _compileClass(self):
        classE = Element(ELEMENTS.CLASS)
        self._readKeyword(classE, ELEMENTS.CLASS)
        self.className = self._readIdentifier(classE)
        self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN)
        self._compileClassVarDec(classE)
        self._compileSubroutine(classE)
        self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE)
        return classE

    def _compileClassVarDec(self, parent):
        while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES:
            classVarDecE = Element(ELEMENTS.CLASSVARDEC)
            self._readKeyword(classVarDecE)
            self._readType(classVarDecE)
            self._readIdentifier(classVarDecE)
            while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA):
                self._readIdentifier(classVarDecE)
            self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON)
            parent.append(classVarDecE)

    def _compileSubroutine(self, parent):
        while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES:
            subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC)
            function_type = self._readKeyword(subroutineDecE)
            self._readReturnType(subroutineDecE)
            self.methodName = self._readIdentifier(subroutineDecE)
            self._symbol_table.startSubroutine(self.className, self.methodName)
            if function_type == _SUBROUTINEDEC.METHOD:
                self._symbol_table.define("this", self.className,
                                          SYM_KINDS.ARG)
            self._uid = -1
            self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN)
            self._compileParameters(subroutineDecE)
            self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE)
            self._compileSubroutineBody(subroutineDecE, function_type)
            parent.append(subroutineDecE)

    def _gen_label(self, type_):
        self._uid += 1
        return "%s.%s.%s.%d" % (self.className, self.methodName, type_,
                                self._uid)

    def _gen_labels(self, *parts):
        self._uid += 1
        return [
            "%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid)
            for part in parts
        ]

    def _compileSubroutineBody(self, parent, function_type):
        bodyE = Element(ELEMENTS.SUBROUTINEBODY)
        self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN)
        nArgs = self._compileVarDec(bodyE)
        function_name = parent[2].text
        function_full_name = "%s.%s" % (self.className, function_name)
        self.writer.writeFunction(function_full_name, nArgs)
        if function_type == _SUBROUTINEDEC.CONSTRUCTOR:
            field_count = self._symbol_table.varCount(SYM_KINDS.FIELD)
            self.writer.writePush(SEGMENT.CONST, field_count)
            self.writer.writeCall("Memory.alloc", 1)
            self.writer.writePop(SEGMENT.POINTER, 0)
        elif function_type == _SUBROUTINEDEC.METHOD:
            self.writer.writePush(SEGMENT.ARG, 0)
            self.writer.writePop(SEGMENT.POINTER, 0)
        self._compileStatements(bodyE)
        self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE)
        parent.append(bodyE)

    def _compileStatements(self, parent):
        statementsE = Element(ELEMENTS.STATEMENTS)
        while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES:
            if self.nextTok.value == _STATEMENTS.LET:
                statementE = Element(ELEMENTS.STATEMENT_LET)
                self._readKeyword(statementE)
                identifier = self._readIdentifier(statementE)
                is_array = False
                if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN):
                    is_array = True
                    self._compileExpression(statementE)
                    self.writer.writePush(*self._identifier_data(identifier))
                    self.writer.writeArithmetic("add")
                    self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE)
                self._readSymbol(statementE, _SYMBOLS.EQUAL)
                self._compileExpression(statementE)
                self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
                if is_array:
                    self.writer.writePop(SEGMENT.TEMP, 0)
                    self.writer.writePop(SEGMENT.POINTER, 1)
                    self.writer.writePush(SEGMENT.TEMP, 0)
                    self.writer.writePop(SEGMENT.THAT, 0)
                else:
                    self.writer.writePop(*self._identifier_data(identifier))
                statementsE.append(statementE)
            elif self.nextTok.value == _STATEMENTS.IF:
                label_else, label_end = self._gen_labels("if.else", "if.end")
                statementE = Element(ELEMENTS.STATEMENT_IF)
                self._readKeyword(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
                self._compileExpression(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
                self.writer.writeArithmetic("not")
                self.writer.writeIf(label_else)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
                self._compileStatements(statementE)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
                self.writer.writeGoto(label_end)
                self.writer.writeLabel(label_else)
                if self._readKeywordOptional(statementE, _KEYWORDS.ELSE):
                    self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
                    self._compileStatements(statementE)
                    self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
                self.writer.writeLabel(label_end)
                statementsE.append(statementE)
            elif self.nextTok.value == _STATEMENTS.WHILE:
                label_start, label_end = self._gen_labels(
                    "while.start", "while.end")
                self.writer.writeLabel(label_start)
                statementE = Element(ELEMENTS.STATEMENT_WHILE)
                self._readKeyword(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
                self._compileExpression(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
                self.writer.writeArithmetic("not")
                self.writer.writeIf(label_end)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
                self._compileStatements(statementE)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
                statementsE.append(statementE)
                self.writer.writeGoto(label_start)
                self.writer.writeLabel(label_end)
            elif self.nextTok.value == _STATEMENTS.DO:
                self._compileDo(statementsE)
            elif self.nextTok.value == _STATEMENTS.RETURN:
                statementE = Element(ELEMENTS.STATEMENT_RETURN)
                self._readKeyword(statementE)
                if not (self.nextTok.type == tokenizor.SYMBOL
                        and self.nextTok.value == _SYMBOLS.SEMI_COLON):
                    self._compileExpression(statementE)
                else:
                    self.writer.writePush(SEGMENT.CONST, 0)
                self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
                self.writer.writeReturn()
                statementsE.append(statementE)
        if len(statementsE) == 0:
            statementsE.text = "\n"
        parent.append(statementsE)

    def _compileExpression(self, parent):
        expressionE = Element(ELEMENTS.EXPRESSION)
        self._readTerm(expressionE)
        while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS:
            symbol = self._readSymbol(expressionE)
            self._readTerm(expressionE)
            self.writer.writeArithmetic(symbol)
        parent.append(expressionE)

    def _compileExpressionList(self, parent):
        self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN)
        expListE = Element(ELEMENTS.EXPRESSION_LIST)
        nArgs = 0
        while not (self.nextTok.type == tokenizor.SYMBOL
                   and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE):
            self._compileExpression(expListE)
            self._readSymbolOptional(expListE, _SYMBOLS.COMMA)
            nArgs += 1
        # hack for TextComparer
        if len(expListE) == 0:
            expListE.text = "\n"
        parent.append(expListE)
        self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE)
        return nArgs

    def _compileDo(self, parent):
        statementE = Element(ELEMENTS.STATEMENT_DO)
        self._readKeyword(statementE, _STATEMENTS.DO)
        identifier = self._readIdentifier(statementE)
        nArgs = 0
        if self._readSymbolOptional(statementE, _SYMBOLS.DOT):
            type_ = self._symbol_table.typeOf(identifier)
            if type_:
                segment, index = self._identifier_data(identifier)
                self.writer.writePush(segment, index)
                nArgs += 1
                identifier = "%s.%s" % (type_,
                                        self._readIdentifier(statementE))
            else:
                identifier = "%s.%s" % (identifier,
                                        self._readIdentifier(statementE))
        else:
            identifier = "%s.%s" % (self.className, identifier)
            self.writer.writePush(SEGMENT.POINTER, 0)
            nArgs += 1
        nArgs += self._compileExpressionList(statementE)
        self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
        self.writer.writeCall(identifier, nArgs)
        self.writer.writePop(SEGMENT.TEMP, 0)
        parent.append(statementE)

    def _compileVarDec(self, parent):
        nArgs = 0
        while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR:
            varDecE = Element(ELEMENTS.VAR_DEC)
            self._readKeyword(varDecE, _KEYWORDS.VAR)
            self._readType(varDecE)
            self._readIdentifier(varDecE)
            nArgs += 1
            while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA):
                self._readIdentifier(varDecE)
                nArgs += 1
            self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON)
            parent.append(varDecE)
        return nArgs

    def _compileParameters(self, parent):
        paramListE = Element(ELEMENTS.PARAM_LIST)
        while (self.nextTok.type == tokenizor.KEYWORD
               and self.nextTok.value in _CLASSVARDEC.VAR_TYPES
               ) or self.nextTok.type == tokenizor.IDENTIFIER:
            self._readType(paramListE)
            self._readIdentifier(paramListE)
            self._readSymbolOptional(paramListE, _SYMBOLS.COMMA)
        if len(paramListE) == 0:
            paramListE.text = "\n"
        parent.append(paramListE)

##############################
########## READ ##############
##############################

    def _readTerm(self, parent):
        termE = Element(ELEMENTS.TERM)
        if self.nextTok.type == tokenizor.INTEGER:
            self.next()
            termE.append(
                _leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value))
            self.writer.writePush(SEGMENT.CONST, self.tok.value)
        elif self.nextTok.type == tokenizor.STRING:
            self.next()
            termE.append(_leafElement(ELEMENTS.STRING_CONSTANT,
                                      self.tok.value))
            string_value = self.tok.value
            self.writer.writePush(SEGMENT.CONST, len(string_value))
            self.writer.writeCall("String.new", 1)
            for char in string_value:
                self.writer.writePush(SEGMENT.CONST, ord(char))
                self.writer.writeCall("String.appendChar", 2)
        elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS:
            self.next()
            termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
            _KW_CONT_WRITE[self.tok.value](self.writer)
        elif self.nextTok.type == tokenizor.IDENTIFIER:
            identifier = self._readIdentifier(termE)
            nArgs = 0
            if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN):
                self._compileExpression(termE)
                self.writer.writePush(*self._identifier_data(identifier))
                self.writer.writeArithmetic("add")
                self.writer.writePop(SEGMENT.POINTER, 1)
                self.writer.writePush(SEGMENT.THAT, 0)
                self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE)
            elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
                nArgs = self._compileExpressionList(termE)
                self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
                self.writer.writeCall(identifier, nArgs)
            elif self._readSymbolOptional(termE, _SYMBOLS.DOT):
                type_ = self._symbol_table.typeOf(identifier)
                if type_:
                    segment, index = self._identifier_data(identifier)
                    self.writer.writePush(segment, index)
                    nArgs += 1
                    identifier = "%s.%s" % (type_, self._readIdentifier(termE))
                else:
                    identifier = "%s.%s" % (identifier,
                                            self._readIdentifier(termE))
                nArgs += self._compileExpressionList(termE)
                self.writer.writeCall(identifier, nArgs)
            else:
                self.writer.writePush(*self._identifier_data(identifier))
        elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
            self.next()
            termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
            self._compileExpression(termE)
            self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
        elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS:
            self.next()
            sym = self.tok.value
            termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
            self._readTerm(termE)
            self.writer.writeArithmeticUnary(sym)
        else:
            raise self._syntaxError("Unexpected %s." % self.tok.value)
        parent.append(termE)

    def _identifier_data(self, identifier):
        return _SEG_TRANSLATE[self._symbol_table.kindOf(
            identifier)], self._symbol_table.indexOf(identifier)

    def _readIdentifier(self, parent):
        self.next()
        self._assertToken(self.tok,
                          ELEMENTS.IDENTIFIER,
                          type_=tokenizor.IDENTIFIER)
        name = self.tok.value
        element = _leafElement(ELEMENTS.IDENTIFIER, name)
        type_ = self._symbol_table.typeOf(name)
        kind = None
        index = None
        if type_ is None:
            if parent.tag in (ELEMENTS.CLASSVARDEC,
                              ELEMENTS.VAR_DEC) and len(parent) > 1:
                type_ = parent[1].text
                kind = _SYM_KIND_MAP[parent[0].text]
            elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0:
                type_ = parent[-1].text
                kind = SYM_KINDS.ARG
            if kind is not None:
                index = self._symbol_table.define(name, type_, kind)
        else:
            type_ = self._symbol_table.typeOf(name)
            kind = self._symbol_table.kindOf(name)
            index = self._symbol_table.indexOf(name)
        if kind is not None:
            element.set("type", type_)
            element.set("kind", str(kind))
            element.set("index", str(index))
        parent.append(element)
        return name

    def _readType(self, parent):
        if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES:
            self.next()
            parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
        else:
            self._readIdentifier(parent)

    def _readReturnType(self, parent):
        if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES:
            self.next()
            parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
        else:
            self._readIdentifier(parent)

    def _readSymbol(self, parent, expected=None):
        self.next()
        expectedStr = expected if expected is not None else ELEMENTS.SYMBOL
        self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL)
        if expected is not None:
            self._assertToken(self.tok, expected, value_=expected)
        parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
        return self.tok.value

    def _readKeyword(self, parent, expected=None):
        self.next()
        expectedStr = expected if expected is not None else ELEMENTS.KEYWORD
        self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD)
        if expected is not None:
            self._assertToken(self.tok, expected, value_=expected)
        parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
        return self.tok.value

    def _readSymbolOptional(self, parent, expected):
        if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected:
            self.next()
            parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
            return True
        return False

    def _readKeywordOptional(self, parent, expected):
        if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected:
            self.next()
            parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
            return True
        return False

    def next(self):
        self.tok = self.iter.next()
        self.nextTok = self.iter.lookahead()

    def _assertToken(self, tok, expected_str, type_=None, value_=None):
        if (type_ != None and tok.type != type_) or (value_ != None
                                                     and tok.value != value_):
            raise self._syntaxError(
                "Expected %s but found %s" % (expected_str, tok.value), tok)

    def _syntaxError(self, msg, tok=None):
        if tok is None:
            tok = self.tok
        return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    BINARY_OPERATORS_TO_COMMAND = {
        '+': 'add',
        '-': 'sub',
        '=': 'eq',
        '>': 'gt',
        '<': 'lt',
        '&': 'and',
        '|': 'or'
    }
    UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'}
    TYPE_TO_TAG = {
        'STRING_CONST': 'stringConstant',
        'INT_CONST': 'integerConstant',
        'KEYWORD': 'keyword',
        'IDENTIFIER': 'identifier',
        'SYMBOL': 'symbol'
    }

    SYMBOLS_TO_XML_CONVENTION = {
        '<': '&lt;',
        '>': '&gt;',
        '&': '&amp;',
        '"': '&quot;'
    }

    def __init__(self, input_file_path, vm_writer: VMWriter):
        self.jack_tokenizer = JackTokenizer(input_file_path)
        self.symbol_table = SymbolTable()
        self.vm_writer = vm_writer
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                 'CLASS')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()

        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.jack_tokenizer.advance()
        self.vm_writer.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[
            self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            kind = ''
            if self.jack_tokenizer.key_word() == 'field':
                kind = 'FIELD'
            elif self.jack_tokenizer.key_word() == 'static':
                kind = 'STATIC'
            self.jack_tokenizer.advance()
            field_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     field_type, kind)

            self.jack_tokenizer.advance()

            while self.jack_tokenizer.symbol() != ';':
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         field_type, kind)
                self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        self.vm_writer.zero_branching_indexes()
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.symbol_table.start_subroutine()
            constructor = True if self.jack_tokenizer.key_word(
            ) == 'constructor' else False

            method = False
            if self.jack_tokenizer.key_word() == 'method':
                method = True
                self.symbol_table.define('this',
                                         self.symbol_table.get_class_name(),
                                         'ARG')

            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                     'SUBROUTINE')
            name = self.symbol_table.get_class_name(
            ) + '.' + self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_parameter_list()
            self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()
            var_num = 0
            while self.jack_tokenizer.key_word() == 'var':
                var_num += self.compile_var_dec()
            self.vm_writer.write_function(name, var_num)
            if method:
                self.vm_writer.write_push('ARG', 0)
                self.vm_writer.write_pop('POINTER', 0)
            elif constructor:
                field_count = self.symbol_table.var_count('FIELD')
                self.vm_writer.write_push('CONST', field_count)
                self.vm_writer.write_call('Memory.alloc', 1)
                self.vm_writer.write_pop('POINTER', 0)
            self.compile_statements()
            self.jack_tokenizer.advance()

    def compile_parameter_list(self):
        if self.jack_tokenizer.symbol() != ')':
            parameter_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     parameter_type, 'ARG')
            self.jack_tokenizer.advance()
            while self.jack_tokenizer.symbol() == ",":
                self.jack_tokenizer.advance()
                parameter_type = self.get_type()
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         parameter_type, 'ARG')
                self.jack_tokenizer.advance()

    def get_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            parameter_type = self.jack_tokenizer.key_word()
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            parameter_type = self.jack_tokenizer.identifier()
        return parameter_type

    def compile_var_dec(self):
        var_num = 1
        self.jack_tokenizer.advance()
        var_type = self.get_type()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), var_type,
                                 'VAR')
        self.jack_tokenizer.advance()

        while self.jack_tokenizer.symbol() == ",":
            var_num += 1
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     var_type, 'VAR')
            self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        return var_num

    def compile_statements(self):
        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

    def compile_do(self):
        self.jack_tokenizer.advance()

        name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        self.compile_subroutine_call(name)

        # must dispose of void function return value
        self.vm_writer.write_pop('TEMP', 0)
        self.jack_tokenizer.advance()

    def compile_subroutine_call(self, prefix_call=''):
        if self.jack_tokenizer.symbol() == '(':
            subroutine = False
            # If not in symbol table - then subroutine
            if not self.symbol_table.kind_of(
                    prefix_call) or self.symbol_table.kind_of(
                        prefix_call) == 'SUBROUTINE':
                subroutine = True
            self.jack_tokenizer.advance()

            args_count = 0
            if subroutine:
                self.vm_writer.write_push('POINTER', 0)
                args_count += 1
            args_count += self.compile_expression_list()

            if subroutine:
                self.vm_writer.write_call(
                    self.symbol_table.get_class_name() + '.' + prefix_call,
                    args_count)
            else:
                self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()
        elif self.jack_tokenizer.symbol() == '.':
            variable = False
            self.jack_tokenizer.advance()
            if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']:
                variable = True
                variable_name = prefix_call
                prefix_call = self.symbol_table.type_of(prefix_call)
            prefix_call += '.{0}'.format(self.jack_tokenizer.identifier())
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()

            args_count = 0
            if variable:
                self.vm_writer.write_push(
                    self.symbol_table.kind_of(variable_name),
                    self.symbol_table.index_of(variable_name))
                args_count += 1
            args_count += self.compile_expression_list()

            self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()

    def compile_let(self):
        self.jack_tokenizer.advance()
        var_name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() == '[':
            self.vm_writer.write_push(self.symbol_table.kind_of(var_name),
                                      self.symbol_table.index_of(var_name))
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_arithmetic("add")
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop('TEMP', 0)
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_push('TEMP', 0)
            self.vm_writer.write_pop('THAT', 0)
        else:
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop(self.symbol_table.kind_of(var_name),
                                     self.symbol_table.index_of(var_name))

        self.jack_tokenizer.advance()

    def compile_while(self):
        while_idx = self.vm_writer.get_next_label_index('while')
        if_label = 'WHILE_IF_{0}'.format(while_idx)
        end_label = 'WHILE_END_{0}'.format(while_idx)

        self.vm_writer.write_label(if_label)
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.vm_writer.write_if(end_label)
        self.compile_statements()
        self.vm_writer.write_goto(if_label)
        self.jack_tokenizer.advance()
        self.vm_writer.write_label(end_label)

    def compile_return(self):
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        else:
            self.vm_writer.write_push('CONST', 0)
        self.vm_writer.write_return()
        self.jack_tokenizer.advance()

    def compile_if(self):
        if_idx = self.vm_writer.get_next_label_index('if')
        else_label = 'IF_ELSE_{0}'.format(if_idx)
        end_label = 'IF_END_{0}'.format(if_idx)

        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()

        self.jack_tokenizer.advance()
        self.vm_writer.write_if(else_label)
        self.compile_statements()
        self.jack_tokenizer.advance()
        self.vm_writer.write_goto(end_label)

        self.vm_writer.write_label(else_label)
        if self.jack_tokenizer.key_word() == 'else':
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_statements()
            self.jack_tokenizer.advance()

        self.vm_writer.write_label(end_label)

    def compile_expression(self):
        self.compile_term()

        while self.jack_tokenizer.symbol(
        ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.advance()

            self.compile_term()

            if symbol in self.BINARY_OPERATORS_TO_COMMAND:
                self.vm_writer.write_arithmetic(
                    self.BINARY_OPERATORS_TO_COMMAND[symbol])
            elif symbol == '*':
                self.vm_writer.write_call('Math.multiply', 2)
            elif symbol == '/':
                self.vm_writer.write_call('Math.divide', 2)

    def compile_term(self):
        token_type = self.jack_tokenizer.token_type()

        if token_type == 'IDENTIFIER':
            name = self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol(
            ) == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call(name)
            elif self.jack_tokenizer.symbol() == '[':
                self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                          self.symbol_table.index_of(name))
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop('POINTER', 1)
                self.vm_writer.write_push('THAT', 0)
            else:
                kind = self.symbol_table.kind_of(name)
                idx = self.symbol_table.index_of(name)
                self.vm_writer.write_push(kind, idx)
        elif token_type == 'STRING_CONST':
            string_const = self.jack_tokenizer.string_val()

            self.vm_writer.write_push("CONST", len(string_const))
            self.vm_writer.write_call("String.new", 1)
            for char in string_const:
                self.vm_writer.write_push('CONST', ord(char))
                self.vm_writer.write_call("String.appendChar", 2)
            self.jack_tokenizer.advance()
        elif token_type == 'KEYWORD':
            keyword = self.jack_tokenizer.key_word()
            if keyword == 'true':
                self.vm_writer.write_push('CONST', 1)
                self.vm_writer.write_arithmetic('neg')
            elif keyword == 'false' or keyword == 'null':
                self.vm_writer.write_push('CONST', 0)
            elif keyword == 'this':
                self.vm_writer.write_push('POINTER', 0)
            self.jack_tokenizer.advance()
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
            elif self.jack_tokenizer.symbol(
            ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[
                    self.jack_tokenizer.symbol()]
                self.jack_tokenizer.advance()
                self.compile_term()
                self.vm_writer.write_arithmetic(command)
        elif token_type == 'INT_CONST':
            self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val())
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        expression_count = 0
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            expression_count += 1
            while self.jack_tokenizer.symbol() == ',':
                self.jack_tokenizer.advance()
                self.compile_expression()
                expression_count += 1
        return expression_count
Exemple #11
0
class Compiler(object):
    def __init__(self, file_address, compile_address, vm=False):
        self.here = False
        self.file_object = open(file_address, 'rb')
        self.compiled = open(compile_address, 'wb')
        first_line = self.advance()
        self.current_line = first_line
        self.nest_level = 0
        self.vm = vm
        self.SYMBOL_TABLE = SymbolTable()

    def get_xml_value(self):
       line = self.current_line
       start = line.find('>')
       end = line.find('</')
       return line[start+1:end-1].strip() # + and - for spaces wrapping the value

    def format_and_write_line(self, dict_=None):
        if not self.vm:
            if dict_:
                return self.compiled.write("{0}{1}{2}\n".format(" "*self.nest_level*2, self.current_line, dict_))
            else:
                return self.compiled.write("{0}{1}\n".format(" "*self.nest_level*2, self.current_line))

    def words_exist(self, words):
        for word in words:
            if self.current_line.replace('</', '').find(word) != -1:
                continue
            else:
                return False
        return True

    def open_tag(self, tag_name):
        if not self.vm:
            self.compiled.write("{0}{1}\n".format(" "*self.nest_level*2,"<{}>".format(tag_name)))
            self.nest_level += 1

    def close_tag(self, tag_name):
        if not self.vm:
            self.nest_level -= 1
            self.compiled.write("{0}{1}\n".format(" "*self.nest_level*2,"</{}>".format(tag_name)))

    def advance(self):
        new_line = self.file_object.readline()
        if new_line == '':
            return
        else:
            self.current_line = new_line.strip()
            return
    def compileClass(self):
        self.open_tag("class")
        self.advance()

        if self.words_exist(['keyword','class']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['identifier']): # gotta regex for the name too
            self.format_and_write_line({'category': 'class', 'defined': True, 'kind':None, 'index': None})
            self.SYMBOL_TABLE.class_name = self.get_xml_value()
            self.advance()
        else:
            raise
        if self.words_exist(['symbol', '{']):
            self.format_and_write_line()
            self.advance()
        else:
            raise

        while self.words_exist(['keyword', 'static']) or self.words_exist(['keyword', 'field']):
            self.compileClassVarDec()

        while self.words_exist(['keyword', 'function']) or self.words_exist(['keyword', 'constructor']) or self.words_exist(['keyword', 'method']):
            self.compileSubroutine()

        if self.words_exist(['symbol', '}']):
            self.format_and_write_line()
            self.advance()
        else:
            raise

        self.close_tag("class")

    def compileClassVarDec(self):
        self.open_tag("classVarDec")

        if self.words_exist(['keyword', 'static']) or self.words_exist(['keyword', 'field']):
            self.format_and_write_line()
            kind = self.get_xml_value()
            self.advance()
        else:
            raise
        if self.words_exist(['int']) or self.words_exist(['char']) or self.words_exist(['boolean']) or self.words_exist(['identifier']):
            type_ = self.get_xml_value()
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['identifier']):
            name = self.get_xml_value()
            self.SYMBOL_TABLE.define(name, type_, kind)
            self.format_and_write_line({'category': kind, 'defined': True, 'kind':kind, 'index': self.SYMBOL_TABLE.index_of(name)})
            self.advance()
        else:
            raise
        has_next = lambda: self.current_line.find(',') != -1
        while has_next():
            if self.words_exist(['symbol', ',']):
                self.format_and_write_line()
                self.advance()
            if self.words_exist(['identifier']):
                name = self.get_xml_value()
                self.SYMBOL_TABLE.define(name, type_, kind)
                self.format_and_write_line({'category': kind, 'defined': True, 'kind':kind, 'index': self.SYMBOL_TABLE.index_of(name)})
                self.advance()
        if self.words_exist(['symbol', ';']):
            self.format_and_write_line()
            self.advance()

        self.close_tag('classVarDec')


    def compileSubroutine(self):
        self.open_tag("subroutineDec")
        n_params = 0
        self.SYMBOL_TABLE.start_subroutine()
        if self.words_exist(['keyword', 'constructor']) or self.words_exist(['keyword', 'function']) or self.words_exist(['keyword', 'method']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        # void, int etc are keywords, class names are identifiers
        # Here is where we should set a flag if we need to return 0 on void functions
        if self.words_exist(['keyword']) or self.words_exist(['identifier']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['identifier']):
            self.format_and_write_line({'category': 'subroutine', 'defined': True, 'kind':None, 'index': None})
            self.SYMBOL_TABLE.subroutine_name = self.get_xml_value()
            self.advance()
        else:
            raise
        if self.words_exist(['symbol', '(' ]):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        # no raise needed here cause its optional
        if self.words_exist(['keyword']): # we have parameters
            n_params = self.compileParameterList()

        else:
            self.open_tag('parameterList')
            self.close_tag('parameterList')
        if self.words_exist(['symbol',')']):
            self.format_and_write_line()
            self.advance()

        if self.words_exist(['{']):
            self.compileSubroutineBody()

        self.close_tag("subroutineDec")

    def compileParameterList(self):
        self.open_tag('parameterList')
        n_params = 0
        has_next = True
        while has_next:
            if self.words_exist(['identifier']) or self.words_exist(['keyword']):
                type_ = self.get_xml_value()
                self.format_and_write_line()
                self.advance()
            else:
                raise
            if self.words_exist(['identifier']):
                name = self.get_xml_value()
                self.SYMBOL_TABLE.define(name, type_, 'arg', n_params)
                self.format_and_write_line({'category': 'ARG', 'defined':True, 'kind': self.SYMBOL_TABLE.kind_of(name), 'index':self.SYMBOL_TABLE.index_of(name)})
                self.advance()
            else:
                raise
            has_next = False
            n_params += 1
            if self.words_exist([',']):
                self.format_and_write_line()
                self.advance()
                has_next = True

        self.close_tag('parameterList')
        return n_params

    def compileSubroutineBody(self):
        self.open_tag('subroutineBody')

        if self.words_exist(['{']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        while self.words_exist(['keyword','var']):
            self.compileVarDec()

        # generate code for function definition after counting the number of local vars
        if self.vm:
            self.compiled.write(
                VMWriter.write_function(
                    '{0}.{1}'.format(self.SYMBOL_TABLE.class_name, self.SYMBOL_TABLE.subroutine_name), 
                    self.SYMBOL_TABLE.var_count('var')) # local variables
            )    

            # no need to pop the args, the args are already on the stack, and 
            # the arg address is altered by the Assembler to point to the corret
            # base

        while  self.words_exist(['if']) or self.words_exist(['let']) or self.words_exist(['while']) or self.words_exist(['do']) or self.words_exist(['return']):
            self.compileStatements()
        if self.words_exist(['}']):
            self.format_and_write_line()
            self.advance()

        self.close_tag('subroutineBody')

    def compileVarDec(self):
        self.open_tag('varDec')

        if self.words_exist(['var']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['keyword']) or self.words_exist(['identifier']):
            type_=self.get_xml_value()
            self.format_and_write_line()
            self.advance()
        else:
            raise
        has_next = True
        while has_next:
            if self.words_exist(['identifier']):
                name = self.get_xml_value()
                self.SYMBOL_TABLE.define(name, type_, 'var')
                self.format_and_write_line({'category': 'VAR', 'defined': True, 'kind': self.SYMBOL_TABLE.kind_of(name), 'index': self.SYMBOL_TABLE.index_of(name)})
                self.advance()
                has_next = False
            if self.words_exist(['symbol', ',']):
                self.format_and_write_line()
                self.advance()
                has_next = True
        if self.words_exist(['symbol', ';']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        self.close_tag('varDec')

    def compileStatements(self):
        self.open_tag('statements')

        while self.words_exist(['keyword', 'let']) or self.words_exist(['keyword', 'if']) or self.words_exist(['keyword', 'while']) or self.words_exist(['keyword', 'do']) or self.words_exist(['keyword', 'return']):
            if self.words_exist(['keyword', 'let']):
                self.compileLet()
            if self.words_exist(['keyword', 'if']):
                self.compileIf()
            if self.words_exist(['keyword', 'while']):
                self.compileWhile()
            if self.words_exist(['keyword', 'do']):
                self.compileDo()
            if self.words_exist(['keyword', 'return']):
                self.compileReturn()
        self.close_tag('statements')

    def compileDo(self):
        self.open_tag('doStatement')

        if self.words_exist(['keyword', 'do']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['identifier']):
            self.compileSubroutineCall()
        if self.words_exist(['symbol',';']):
            self.format_and_write_line()
            self.advance()

        self.close_tag('doStatement')
        if self.vm:
            self.compiled.write(
                VMWriter.write_pop('temp', 0)
            )


    def compileSubroutineCall(self, identifier_compiled = False, identifier=None):
        # no tags
        # subroutineName, varName|className
        if self.words_exist(['identifier']) and not identifier_compiled:
            self.format_and_write_line({'category': 'subroutine', 'defined': False, 'kind': None, 'index':None})
            subroutine_name = self.get_xml_value()
            self.advance()
        else:
            subroutine_name = identifier
        if self.words_exist(['symbol','(']):
            # subroutine call
            self.format_and_write_line()
            self.advance()
            self.compileExpressionList()
            if self.words_exist(['symbol', ')']):
                self.format_and_write_line()
                self.advance()
        elif self.words_exist(['symbol', '.']):
            self.format_and_write_line()
            subroutine_name += '.'
            self.advance()
            if self.words_exist(['identifier']):
                self.format_and_write_line({'category': 'subroutine', 'defined':False, 'kind':None, 'index':None})
                subroutine_name += self.get_xml_value()
                self.advance()
            if self.words_exist(['symbol','(']):
                # subroutine call
                self.format_and_write_line()
                self.advance()
            # always compile expresionLists cause "nothing" is also an expressionList
            n_args = self.compileExpressionList()
            if self.words_exist(['symbol', ')']):
                self.format_and_write_line()
                self.advance()
        else:
            raise

        if self.vm:
            self.compiled.write(
                VMWriter.write_call(subroutine_name, n_args)
            )


    def compileLet(self):
        self.open_tag('letStatement')
        if self.words_exist(['keyword', 'let']):
            self.format_and_write_line()
            self.advance()
        if self.words_exist(['identifier']):
            name = self.get_xml_value()
            type_ = 'int' # for lack of a better way to get this; the type will be whatever the expression returns
            kind = self.SYMBOL_TABLE.kind_of(name) or 'var'
            # always defined after a let
            self.SYMBOL_TABLE.define(name, type_, kind)
            self.format_and_write_line({'category': 'VAR', 'defined':True, 'kind': self.SYMBOL_TABLE.kind_of(name), 'index': self.SYMBOL_TABLE.index_of(name)})
            self.advance()
        else:
            raise
        if self.words_exist(['symbol', '[']):
            self.format_and_write_line()
            self.advance()
            self.compileExpression()
            if self.words_exist(['symbol', ']']):
                self.format_and_write_line()
                self.advance()
            else:
                raise
        if self.words_exist(['symbol', '=']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        self.compileExpression()
        if self.words_exist(['symbol', ';']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.vm:
            # might need extending
            segment = {'var': 'local', 'arg': 'argument'}
            self.compiled.write(
                VMWriter.write_pop(segment[kind], self.SYMBOL_TABLE.index_of(name))
            )
        self.close_tag('letStatement')

    def compileWhile(self):
        self.open_tag('whileStatement')
        while_condition_address = str(randint(200, 500))
        while_start_address = str(randint(200, 500))
        while_end_address = str(randint(200, 500))

        # this is the start address 
        self.compiled.write(
            VMWriter.write_label(while_condition_address)
        )
        if self.words_exist(['while', 'keyword']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['symbol', '(']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        self.compileExpression()
        if self.words_exist(['symbol', ')']):
            self.format_and_write_line()
            self.advance()
        else:
            raise

        # Stack: true or false
        # if true go to start
        self.compiled.write(
            VMWriter.write_if(while_start_address)
        )
        # if not true then do this one, go to the end
        self.compiled.write(
            VMWriter.write_go_to(while_end_address)
        )
        # this is the start address of the while block
        self.compiled.write(
            VMWriter.write_label(while_start_address)
        )

        if self.words_exist(['symbol', '{']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        self.compileStatements()
        if self.words_exist(['symbol', '}']):
            self.format_and_write_line()
            self.advance()
        else:
            raise

        # return to beginning
        self.compiled.write(
            VMWriter.write_go_to(while_condition_address)
        )
        # this is the end address
        self.compiled.write(
            VMWriter.write_label(while_end_address)
        )
        self.close_tag('whileStatement')

    def compileReturn(self):
        self.open_tag('returnStatement')
        if self.words_exist(['return', 'keyword']):
            self.format_and_write_line()
            if self.vm:
                self.compiled.write(
                    VMWriter.write_return()
                )
            self.advance()
        else:
            raise
        if not self.words_exist([';']):
            self.compileExpression()
        if self.words_exist([';', 'symbol']):
            self.format_and_write_line()
            self.advance()
        self.close_tag('returnStatement')

    def compileIf(self):
        self.open_tag('ifStatement')
        if self.words_exist(['if', 'keyword']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        if self.words_exist(['symbol', '(']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        self.compileExpression()
        if self.words_exist(['symbol', ')']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        # Now the condition result is on the stack
        # we need to negate it to use it with if-goto else
        else_block = str(randint(500, 700))
        exit_address = str(randint(500, 700))

        self.compiled.write(
            VMWriter.write_arithmetic('~')
        )
        self.compiled.write(
            VMWriter.write_if(else_block)
        )
        if self.words_exist(['symbol', '{']):
            self.format_and_write_line()
            self.advance()
        else:
            raise
        self.compileStatements()
        if self.words_exist(['symbol', '}']):
            self.format_and_write_line()
            self.advance()
        else:
            raise

        self.compiled.write(
            VMWriter.write_go_to(exit_address)
        )

        if self.words_exist(['else', 'keyword']):
            self.here = True  # omg what is this hack? i have no recollection
            self.format_and_write_line()
            self.advance()

            self.compiled.write(
                VMWriter.write_label(else_block)
            )

            if self.words_exist(['symbol', '{']):
                self.format_and_write_line()
                self.advance()
            else:
                raise
            self.compileStatements()
            if self.words_exist(['symbol', '}']):
                self.format_and_write_line()
                self.advance()
            else:
                raise
        
        self.compiled.write(
            VMWriter.write_label(exit_address)
        )

        self.close_tag('ifStatement')

    def compileExpression(self):
        def get_condition():
            res_list = []
            for op in OPERATIONS:
                res_list.append(self.words_exist([op]))
            res = False
            for r in res_list:
                res = res or r
            return res

        self.open_tag('expression')
        self.compileTerm()

        while get_condition():
            self.format_and_write_line()
            symbol = self.get_xml_value()
            self.advance()
            self.compileTerm()
            if self.vm:
                self.compiled.write(
                    VMWriter.write_arithmetic(symbol)
                )
        self.close_tag('expression')

    def compileTerm(self, operation=None):
        def get_condition():
            res_list = []
            for k in KEYWORD_CONSTANTS:
                res_list.append(self.words_exist([k]))
            res = False
            for r in res_list:
                res = res or r
            return res
        
        self.open_tag('term')
        if self.words_exist(['integerConstant']) or self.words_exist(['stringConstant']) or get_condition():
            self.format_and_write_line()
            if self.vm:
                value = self.get_xml_value()
                if value == 'true':
                    value = '1'
                    # this might have consequence. PLUM
                    operation = 'neg'
                elif value == 'false' or value == 'null':
                    value = 0
                self.compiled.write(
                    VMWriter.write_push('constant', value)
                )
                if operation:
                    self.compiled.write(
                        VMWriter.write_arithmetic(operation)
                    )
            self.advance()
        elif self.words_exist(['identifier']):
            name = self.get_xml_value()
            kind = self.SYMBOL_TABLE.kind_of(name)
            index = self.SYMBOL_TABLE.index_of(name)
            self.format_and_write_line({'category': None, 'defined':False, 'kind':kind, 'index':index})
            self.advance()
            # THIS ONLY WORKS FOR SIMPLE IDENTIFIERS, should refactor for indexing arrays
            KIND_LOOKUP = {'static': 'static', 'field': 'this', 'arg': 'argument', 'var': 'local'}
            if kind is not None:
                self.compiled.write(
                    VMWriter.write_push(KIND_LOOKUP[kind], index)
                )
            # if there is a [ next
            if self.words_exist(['symbol', '[']):
                self.format_and_write_line()
                self.advance()
                self.compileExpression()
                if self.words_exist(['symbol', ']']):
                    self.format_and_write_line()
                    self.advance()
                else:
                    raise
            # if there is a ( next subroutine call, it will leave its value on the stack
            elif self.words_exist(['(']) or self.words_exist(['.']):
                self.compileSubroutineCall(identifier_compiled=True, identifier=name)

        elif self.words_exist(['(', 'symbol']):
            self.format_and_write_line()
            self.advance()
            self.compileExpression()
            if self.words_exist([')', 'symbol']):
                self.format_and_write_line()
                self.advance()
            else:
                raise
        elif self.words_exist(['-']) or self.words_exist(['~']):
            if self.words_exist(['-']):
                operation = 'neg'
            else:
                operation = '~'
            self.format_and_write_line()
            self.advance()
            self.compileTerm(operation=operation)
        else:
            raise
        self.close_tag('term')

    def compileExpressionList(self):
        self.open_tag('expressionList')
        n_expressions = 0
        has_next = (self.current_line.find(')') == -1)
        while has_next:
            self.compileExpression()
            n_expressions += 1
            has_next = False
            if self.words_exist([',']):
                self.format_and_write_line()
                self.advance()
                has_next = True

        self.close_tag('expressionList')
        return n_expressions
Exemple #12
0
class CompilationEngine:
    def __init__(self, token_stream, out_file, xml_name):
        '''
        creates a new compilation engine with the given input and output.
        The next method called must be compileClass().
        '''
        self.stream = token_stream
        self.writer = VMWriter(out_file)
        self.symbols = SymbolTable()
        self.xml_name = xml_name
        self.root = ET.Element('class')

        self.stream.advance()
        assert self.stream.keyword() == 'class'

    def add_terminal(self, root, text):
        terminal = ET.SubElement(root, self.stream.token_type())
        terminal.text = ' {text} '.format(text=text)
        if self.stream.has_more_tokens():
            self.stream.advance()

    def compile_class(self):
        '''
        compiles a complete class
        '''
        self.add_terminal(self.root, self.stream.keyword())
        self.class_name = self.stream.identifier()
        self.add_terminal(self.root, self.class_name)
        self.add_terminal(self.root, self.stream.symbol())

        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in CLASS_VARS:
            self.compile_class_var_dec()

        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in SUBROUTINE_TYPES:
            self.compile_subroutine()

        self.add_terminal(self.root, self.stream.symbol())

    def compile_class_var_dec(self):
        '''
        compiles a static declaration or a field declaration.
        '''
        class_var_root = ET.SubElement(self.root, CLASS_VAR_DEC)
        kind = self.stream.keyword()
        self.add_terminal(class_var_root, kind)
        if self.stream.token_type() == tokenizer.KEYWORD:
            type_name = self.stream.keyword()
        else:
            type_name = self.stream.identifier()
        self.add_terminal(class_var_root, type_name)
        name = self.stream.identifier()
        self.add_terminal(class_var_root, name)
        self.symbols.define(name, type_name, kind)

        while self.stream.symbol() == COMMA:
            self.add_terminal(class_var_root, self.stream.symbol())
            name = self.stream.identifier()
            self.add_terminal(class_var_root, name)
            self.symbols.define(name, type_name, kind)

        self.add_terminal(class_var_root, self.stream.symbol())

    def compile_subroutine(self):
        '''
        compiles a complete method, function, or constructor.
        '''
        subroutine_dec = ET.SubElement(self.root, SUBROUTINE_DEC)
        self.symbols.start_subroutine()
        subroutine_type = self.stream.keyword()
        if subroutine_type in ['method', 'constructor']:
            self.symbols.define('this', self.class_name, 'argument')
        self.add_terminal(subroutine_dec, subroutine_type)
        if self.stream.token_type() == tokenizer.KEYWORD:
            self.add_terminal(subroutine_dec, self.stream.keyword())
        else:
            self.add_terminal(subroutine_dec, self.stream.identifier())
        name = self.stream.identifier()
        self.add_terminal(subroutine_dec, name)

        self.add_terminal(subroutine_dec, self.stream.symbol())
        self.compile_parameter_list(subroutine_dec)
        self.add_terminal(subroutine_dec, self.stream.symbol())

        subroutine_body = ET.SubElement(subroutine_dec, SUBROUTINE_BODY)
        self.add_terminal(subroutine_body, self.stream.symbol())
        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == VAR:
            self.compile_var_dec(subroutine_body)
        func_name = '{cls}.{sub}'.format(
            cls=self.class_name,
            sub=name)
        self.writer.write_function(func_name, self.symbols.var_count('var'))
        self.compile_statements(subroutine_body)
        self.add_terminal(subroutine_body, self.stream.symbol())

    def compile_parameter_list(self, root):
        '''
        compiles a (possibly empty) parameter list, not including the enclosing “()”.
        '''
        parameter_list_root = ET.SubElement(root, PARAMETER_LIST)
        if self.stream.token_type() != tokenizer.SYMBOL:
            type_name = self.stream.keyword()
            self.add_terminal(parameter_list_root, type_name)
            name = self.stream.identifier()
            self.add_terminal(parameter_list_root, name)
            self.symbols.define(name, type_name, 'argument')

        while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == COMMA:
            self.add_terminal(parameter_list_root, self.stream.symbol())
            type_name = self.stream.keyword()
            self.add_terminal(parameter_list_root, type_name)
            name = self.stream.identifier()
            self.add_terminal(parameter_list_root, name)
            self.symbols.define(name, type_name, 'argument')

    def compile_var_dec(self, root):
        '''
        compiles a var declaration
        '''
        var_dec_root = ET.SubElement(root, VAR_DEC)
        self.add_terminal(var_dec_root, self.stream.keyword())
        type_name = None
        if self.stream.token_type() == tokenizer.IDENTIFIER:
            type_name = self.stream.identifier()
        else:
            type_name = self.stream.keyword()
        self.add_terminal(var_dec_root, type_name)
        name = self.stream.identifier()
        self.add_terminal(var_dec_root, name)
        self.symbols.define(name, type_name, 'var')

        while self.stream.symbol() == COMMA:
            self.add_terminal(var_dec_root, self.stream.symbol())
            name = self.stream.identifier()
            self.add_terminal(var_dec_root, name)
            self.symbols.define(name, type_name, 'var')

        self.add_terminal(var_dec_root, self.stream.symbol())

    def compile_statements(self, root):
        '''
        compiles a sequence of statements, not including the enclosing “{}”.
        '''
        statements_root = ET.SubElement(root, STATEMENTS)
        while self.stream.token_type() == tokenizer.KEYWORD:
            keyword = self.stream.keyword()
            if keyword == 'let':
                self.compile_let(statements_root)
            elif keyword == 'if':
                self.compile_if(statements_root)
            elif keyword == 'while':
                self.compile_while(statements_root)
            elif keyword == 'do':
                self.compile_do(statements_root)
            elif keyword == 'return':
                self.compile_return(statements_root)
            else:
                assert False, 'unsupported keyword {keyword}'.format(keyword=keyword)

    def compile_do(self, root):
        '''
        compiles a do statement
        '''
        do_root = ET.SubElement(root, DO)
        self.add_terminal(do_root, self.stream.keyword())
        self.compile_subroutine_call(do_root)
        self.writer.write_pop('temp', 0)
        self.add_terminal(do_root, self.stream.symbol())

    def compile_let(self, root):
        '''
        compiles a let statement
        '''
        let_root = ET.SubElement(root, LET)
        self.add_terminal(let_root, self.stream.keyword())
        lhs = self.stream.identifier()
        self.add_terminal(let_root, lhs)
        if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == OPEN_BRACKET:
            self.add_terminal(let_root, self.stream.symbol())
            self.compile_expression(let_root)
            self.add_terminal(let_root, self.stream.symbol())
        self.add_terminal(let_root, self.stream.symbol())
        self.compile_expression(let_root)
        self.add_terminal(let_root, self.stream.symbol())
        self.writer.write_pop(self.symbols.kind_of(lhs), self.symbols.index_of(lhs))

    def compile_while(self, root):
        '''
        compiles a while statement
        '''
        while_root = ET.SubElement(root, WHILE)
        while_expression = self.symbols.generate_label('WHILE_EXP')
        while_end = self.symbols.generate_label('WHILE_END')
        self.add_terminal(while_root, self.stream.keyword())
        self.add_terminal(while_root, self.stream.symbol())
        self.writer.write_label(while_expression)
        self.compile_expression(while_root)
        self.writer.write_arithmetic('not')
        self.writer.write_if(while_end)
        self.add_terminal(while_root, self.stream.symbol())
        self.add_terminal(while_root, self.stream.symbol())
        self.compile_statements(while_root)
        self.writer.write_goto(while_expression)
        self.writer.write_label(while_end)
        self.add_terminal(while_root, self.stream.symbol())

    def compile_return(self, root):
        '''
        compiles a return statement
        '''
        return_root = ET.SubElement(root, RETURN)
        self.add_terminal(return_root, self.stream.keyword())
        if self.stream.token_type() != tokenizer.SYMBOL:
            self.compile_expression(return_root)
        else:
            self.writer.write_push('constant', 0)
        self.writer.write_return()
        self.add_terminal(return_root, self.stream.symbol())

    def compile_if(self, root):
        '''
        compiles an if statement
        '''
        if_root = ET.SubElement(root, IF)
        if_label = self.symbols.generate_label('IF_TRUE')
        else_label = self.symbols.generate_label('IF_FALSE')
        end_label = self.symbols.generate_label('IF_END')
        self.add_terminal(if_root, self.stream.keyword())
        self.add_terminal(if_root, self.stream.symbol())
        self.compile_expression(if_root)
        self.writer.write_if(if_label)
        self.writer.write_goto(else_label)
        self.writer.write_label(if_label)
        self.add_terminal(if_root, self.stream.symbol())
        self.add_terminal(if_root, self.stream.symbol())
        self.compile_statements(if_root)
        self.writer.write_goto(end_label)
        self.add_terminal(if_root, self.stream.symbol())
        self.writer.write_label(else_label)
        if self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == 'else':
            self.add_terminal(if_root, self.stream.keyword())
            self.add_terminal(if_root, self.stream.symbol())
            self.compile_statements(if_root)
            self.add_terminal(if_root, self.stream.symbol())
        self.writer.write_label(end_label)

    def compile_expression(self, root):
        '''
        compiles an expression
        '''
        expression_root = ET.SubElement(root, EXPRESSION)
        self.compile_term(expression_root)
        while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() in OPS:
            operator = self.stream.symbol()
            self.add_terminal(expression_root, operator)
            self.compile_term(expression_root)
            if operator == '+':
                self.writer.write_arithmetic('add'),
            if operator == '-':
                self.writer.write_arithmetic('sub'),
            if operator == '*':
                self.writer.write_call('Math.multiply', 2),
            if operator == '/':
                self.writer.write_call('Math.divide', 2),
            if operator == '&':
                self.writer.write_arithmetic('and'),
            if operator == '|':
                self.writer.write_arithmetic('or'),
            if operator == '<':
                self.writer.write_arithmetic('lt'),
            if operator == '>':
                self.writer.write_arithmetic('gt'),
            if operator == '=':
                self.writer.write_arithmetic('eq')

    def compile_term(self, root):
        '''
        compiles a term. This method is faced with a slight difficulty when trying to
        decide between some of the alternative rules. Specifically, if the current token
        is an identifier, it must still distinguish between a variable, an array entry, and
        a subroutine call. The distinction can be made by looking ahead one extra token.
        A single look-ahead token, which may be one of “[“, “(“, “.”, suffices to
        distinguish between the three possibilities. Any other token is not
        part of this term and should not be advanced over.
        '''
        term_root = ET.SubElement(root, TERM)
        token_type = self.stream.token_type()
        if token_type == tokenizer.INT:
            val = self.stream.int_val()
            self.add_terminal(term_root, val)
            self.writer.write_push('constant', val)
        elif token_type == tokenizer.STRING:
            val = self.stream.string_val()
            self.add_terminal(term_root, val)
            #TODO I think it's a character by character push, ugh
            self.writer.write_push('constant', val)
        elif token_type == tokenizer.KEYWORD and self.stream.keyword() in KEYWORD_CONSTANTS:
            keyword = self.stream.keyword()
            self.add_terminal(term_root, keyword)
            if keyword == 'true':
                self.writer.write_push('constant', 0)
                self.writer.write_arithmetic('not')
            elif keyword in ['false', 'null']:
                self.writer.write_push('constant', 0)
            else:
                self.writer.write_push('this', 0)
        elif token_type == tokenizer.IDENTIFIER:
            if self.stream.peek() == OPEN_BRACKET:
                name = self.stream.identifier()
                self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name))
                self.add_terminal(term_root, name)
                self.add_terminal(term_root, self.stream.symbol())
                self.compile_expression(term_root)
                self.add_terminal(term_root, self.stream.symbol())
            elif self.stream.peek() == OPEN_PAREN or self.stream.peek() == PERIOD:
                self.compile_subroutine_call(term_root)
            else:
                name = self.stream.identifier()
                self.add_terminal(term_root, self.stream.identifier())
                self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name))
        elif token_type == tokenizer.SYMBOL and self.stream.symbol() == OPEN_PAREN:
            self.add_terminal(term_root, self.stream.symbol())
            self.compile_expression(term_root)
            self.add_terminal(term_root, self.stream.symbol())
        elif token_type == tokenizer.SYMBOL and self.stream.symbol() in UNARY_OPS:
            operator = self.stream.symbol()
            self.add_terminal(term_root, operator)
            self.compile_term(term_root)
            self.writer.write_arithmetic('neg' if operator == '-' else 'not')
        else:
            assert False, 'unsupported token {token}'.format(token=self.stream.current_token)

    def compile_expression_list(self, root):
        '''
        compiles a (possibly empty) comma-separated list of expressions.
        '''
        expression_list_root = ET.SubElement(root, EXPRESSION_LIST)
        if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == CLOSE_PAREN:
            return 0
        self.compile_expression(expression_list_root)
        num_vars = 1
        while self.stream.symbol() == COMMA:
            self.add_terminal(expression_list_root, self.stream.symbol())
            self.compile_expression(expression_list_root)
            num_vars += 1
        return num_vars

    def compile_subroutine_call(self, root):
        class_name = self.class_name
        subroutine_name = self.stream.identifier()
        self.add_terminal(root, class_name)
        if self.stream.symbol() == PERIOD:
            self.add_terminal(root, self.stream.symbol())
            class_name = subroutine_name
            subroutine_name = self.stream.identifier()
            self.add_terminal(root, self.stream.identifier())
        self.add_terminal(root, self.stream.symbol())
        num_vars = self.compile_expression_list(root)
        self.add_terminal(root, self.stream.symbol())
        self.writer.write_call('{cls}.{sub}'.format(
            cls=class_name,
            sub=subroutine_name),
            num_vars)

    def write(self):
        if self.xml_name:
            lines = self._write(self.root).split('\n')
            lines = lines[1:]
            file = open(self.xml_name, 'w')
            file.write('\n'.join(lines))
            file.close()
        self.writer.close()

    def _write(self, root):
        return minidom.parseString(ET.tostring(root)).toprettyxml()
class CompilationEngine():
    def __init__(self, filepath, vm_writer):
        self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
        self.tokenizer = JackTokenizer(filepath)
        self.symbol_table = SymbolTable()
        self.vmw = vm_writer
        self.compiled_class_name = None
        self.label_num = 0

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.wf.close()

    def get_new_label(self):
        self.label_num += 1
        return 'LABEL_%d' % self.label_num

    def compile(self):
        self.compile_class()

    def compile_class(self):

        self.write_element_start('class')

        self.compile_keyword([Tokens.CLASS])
        self.compiled_class_name = self.compile_class_name().token
        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)

        while self.next_is_class_var_dec():
            self.compile_class_var_dec()

        while self.next_is_subroutine_dec():
            self.compile_subroutine_dec()

        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        kind = None
        if token == Tokens.STATIC:
            kind = IdentifierKind.STATIC
        elif token == Tokens.FIELD:
            kind = IdentifierKind.FIELD
        else:
            self.raise_syntax_error('Unexpected token')

        type_token = self.compile_type()
        self.compile_var_name(declaration=True, type=type_token.token, kind=kind)

        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True, type=type_token.token, kind=kind)

        self.compile_symbol(Tokens.SEMI_COLON)

        self.write_element_end('classVarDec')

    def compile_var_dec(self):

        self.write_element_start('varDec')
        self.compile_keyword(Tokens.VAR)
        type_token = self.compile_type()
        var_num = 0
        self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR)
        var_num += 1
        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR)
            var_num += 1
        self.compile_symbol(Tokens.SEMI_COLON)
        self.write_element_end('varDec')

        return var_num

    def compile_subroutine_dec(self):
        self.symbol_table.start_subroutine()

        self.write_element_start('subroutineDec')

        token = self.compile_keyword([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])
        if self.tokenizer.see_next() == Tokens.VOID:
            self.compile_keyword(Tokens.VOID)
        else:
            self.compile_type()
        subroutine_name = self.compile_subroutine_name().token
        self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)

        if token == Tokens.METHOD:
            self.symbol_table.define('$this',self.compiled_class_name,IdentifierKind.ARG)

        self.compile_parameter_list()
        self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        self.compile_subroutine_body(subroutine_name, token)

        self.write_element_end('subroutineDec')

    def compile_subroutine_name(self):
        self.write_identifier_info('category: subroutine')
        return self.compile_identifier()

    def compile_class_name(self):
        self.write_identifier_info('category: class')
        return self.compile_identifier()

    def compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False):
        if declaration:
            self.symbol_table.define(self.tokenizer.see_next().token, type, kind)
        elif let:
            pass
        elif call:
            pass
        else:
            kind = self.symbol_table.kind_of(self.tokenizer.see_next().token)
            if kind == IdentifierKind.ARG:
                self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(self.tokenizer.see_next().token))
            elif kind == IdentifierKind.VAR:
                self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(self.tokenizer.see_next().token))
            elif kind == IdentifierKind.FIELD:
                self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(self.tokenizer.see_next().token))
            elif kind == IdentifierKind.STATIC:
                self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(self.tokenizer.see_next().token))

        self.write_identifier_info('declaration: %s, kind: %s, index: %d' % (
            declaration, self.symbol_table.kind_of(self.tokenizer.see_next().token),
            self.symbol_table.index_of(self.tokenizer.see_next().token)))
        return self.compile_identifier()

    def write_identifier_info(self, value):
        self.write_element('IdentifierInfo', value)

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] or isinstance(
                self.tokenizer.see_next(), Identifier):
            type_token = self.compile_type()
            self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG)

            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                type_token = self.compile_type()
                self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG)

        self.write_element_end('parameterList')

    def compile_subroutine_body(self, subroutine_name, subroutine_dec_token):
        self.write_element_start('subroutineBody')

        print subroutine_name,subroutine_dec_token

        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
        local_num = 0
        while self.next_is(Tokens.VAR):
            var_num = self.compile_var_dec()
            local_num += var_num

        self.vmw.write_function("%s.%s" % (self.compiled_class_name, subroutine_name), local_num)

        if subroutine_dec_token == Tokens.METHOD:
            self.vmw.write_push(Segment.ARG, 0)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.CONSTRUCTOR:
            self.vmw.write_push(Segment.CONST, self.symbol_table.var_count(IdentifierKind.FIELD))
            self.vmw.write_call('Memory.alloc', 1)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.FUNCTION:
            pass
        else:
            self.raise_syntax_error('Invalid token')

        self.compile_statements()
        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('subroutineBody')

        print "========="
        for key in self.symbol_table.arg_table:
            print self.symbol_table.arg_table[key].type,key,"kind:",self.symbol_table.arg_table[key].kind,"index:",self.symbol_table.arg_table[key].index

        return local_num

    def compile_statements(self):
        self.write_element_start('statements')

        while self.next_is_statement():
            self.compile_statement()

        self.write_element_end('statements')

    def compile_statement(self):
        if self.next_is(Tokens.LET):
            self.write_element_start('letStatement')
            self.compile_keyword(Tokens.LET)
            let_var = self.compile_var_name(let=True).token

            if self.next_is(Tokens.LEFT_BOX_BRACKET):
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()  # i
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
                self.compile_symbol(Tokens.EQUAL)

                # base address
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(let_var))

                # temp_2 <- base + i
                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.TEMP, 2)

                # value
                self.compile_expression()

                # set THAT <- base+i
                self.vmw.write_push(Segment.TEMP, 2)
                self.vmw.write_pop(Segment.POINTER, 1)

                self.vmw.write_pop(Segment.THAT, 0)
                self.compile_symbol(Tokens.SEMI_COLON)

            else:
                self.compile_symbol(Tokens.EQUAL)
                self.compile_expression()
                self.compile_symbol(Tokens.SEMI_COLON)
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.VAR:
                    self.vmw.write_pop(Segment.LOCAL, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.ARG:
                    self.vmw.write_pop(Segment.ARG, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_pop(Segment.THIS, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_pop(Segment.STATIC, self.symbol_table.index_of(let_var))

            self.write_element_end('letStatement')

        elif self.next_is(Tokens.IF):
            self.write_element_start('ifStatement')
            self.compile_keyword(Tokens.IF)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.vmw.write_if(l1)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l2)
            self.vmw.write_label(l1)
            if self.next_is(Tokens.ELSE):
                self.compile_keyword(Tokens.ELSE)
                self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
                self.compile_statements()
                self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_label(l2)
            self.write_element_end('ifStatement')

        elif self.next_is(Tokens.WHILE):
            self.write_element_start('whileStatement')
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.compile_keyword(Tokens.WHILE)
            self.vmw.write_label(l1)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            self.vmw.write_if(l2)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l1)
            self.vmw.write_label(l2)
            self.write_element_end('whileStatement')

        elif self.next_is(Tokens.DO):
            self.write_element_start('doStatement')
            self.compile_keyword(Tokens.DO)
            self.compile_subroutine_call()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('doStatement')
            self.vmw.write_pop(Segment.TEMP, 0)

        elif self.next_is(Tokens.RETURN):
            self.write_element_start('returnStatement')
            self.compile_keyword(Tokens.RETURN)
            if not self.next_is(Tokens.SEMI_COLON):
                self.compile_expression()
            else:
                self.vmw.write_push(Segment.CONST, 0)

            self.compile_symbol(Tokens.SEMI_COLON)
            self.vmw.write_return()

            self.write_element_end('returnStatement')

    def compile_subroutine_call(self):
        if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1):
            subroutinename = self.compile_subroutine_name().token
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.vmw.write_push(Segment.POINTER, 0)
            argnum = self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_call("%s.%s" % (self.compiled_class_name, subroutinename), argnum + 1)
        else:
            identifier_str = self.tokenizer.see_next().token
            if self.symbol_table.kind_of(identifier_str):
                instance_name = self.compile_var_name(call=True).token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                kind = self.symbol_table.kind_of(instance_name)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(instance_name))
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call("%s.%s" % (self.symbol_table.type_of(instance_name), subroutinename), argnum + 1)
            else:
                classname = self.compile_class_name().token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call("%s.%s" % (classname, subroutinename), argnum)

    def compile_expression_list(self):
        self.write_element_start('expressionList')
        argnum = 0
        if not self.next_is(Tokens.RIGHT_ROUND_BRACKET):
            self.compile_expression()
            argnum += 1
            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_expression()
                argnum += 1
        self.write_element_end('expressionList')

        return argnum

    def compile_expression(self):
        self.write_element_start('expression')
        self.compile_term()
        while self.next_is([
            Tokens.PLUS,
            Tokens.MINUS,
            Tokens.MULTI,
            Tokens.DIV,
            Tokens.AND,
            Tokens.PIPE,
            Tokens.LESS_THAN,
            Tokens.GREATER_THAN,
            Tokens.EQUAL]):
            op_token = self.compile_symbol([
                Tokens.PLUS,
                Tokens.MINUS,
                Tokens.MULTI,
                Tokens.DIV,
                Tokens.AND,
                Tokens.PIPE,
                Tokens.LESS_THAN,
                Tokens.GREATER_THAN,
                Tokens.EQUAL])
            self.compile_term()
            if op_token == Tokens.PLUS:
                self.vmw.write_arithmetic(Command.ADD)
            elif op_token == Tokens.MINUS:
                self.vmw.write_arithmetic(Command.SUB)
            elif op_token == Tokens.MULTI:
                self.vmw.write_call('Math.multiply', 2)
            elif op_token == Tokens.DIV:
                self.vmw.write_call('Math.divide', 2)
            elif op_token == Tokens.AND:
                self.vmw.write_arithmetic(Command.AND)
            elif op_token == Tokens.PIPE:
                self.vmw.write_arithmetic(Command.OR)
            elif op_token == Tokens.LESS_THAN:
                self.vmw.write_arithmetic(Command.LT)
            elif op_token == Tokens.GREATER_THAN:
                self.vmw.write_arithmetic(Command.GT)
            elif op_token == Tokens.EQUAL:
                self.vmw.write_arithmetic(Command.EQ)

        self.write_element_end('expression')

    def compile_term(self):
        self.write_element_start('term')

        if self.next_type_is(TokenType.INT_CONST):
            value_str = self.compile_integer_constant()
            self.vmw.write_push(Segment.CONST, value_str)
        elif self.next_type_is(TokenType.STRING_CONST):
            self.compile_string_constant()
        elif self.next_is(Tokens.NULL):
            self.compile_keyword(Tokens.NULL)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_is(Tokens.THIS):
            self.compile_keyword(Tokens.THIS)
            self.vmw.write_push(Segment.POINTER, 0)
        elif self.next_is(Tokens.TRUE):
            self.compile_keyword(Tokens.TRUE)
            self.vmw.write_push(Segment.CONST, 0)
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.FALSE):
            self.compile_keyword(Tokens.FALSE)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_type_is(TokenType.IDENTIFIER):
            if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1):

                var_name = self.compile_var_name().token
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()

                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.POINTER, 1)
                self.vmw.write_push(Segment.THAT, 0)
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1):
                self.compile_subroutine_call()
            else:
                self.compile_var_name()

        elif self.next_is(Tokens.LEFT_ROUND_BRACKET):
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        elif self.next_is(Tokens.TILDE):
            self.compile_symbol(Tokens.TILDE)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.MINUS):
            self.compile_symbol(Tokens.MINUS)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NEG)
        else:
            self.raise_syntax_error('')
        self.write_element_end('term')

    def next_type_is(self, token_type):
        return self.tokenizer.see_next().type == token_type

    def compile_type(self):
        type_token = self.tokenizer.see_next()

        if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]):
            self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN])
        else:
            self.compile_class_name()
        return type_token

    def next_is_statement(self):
        return self.next_is([Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN])

    def next_is(self, tokens, idx=0):
        if type(tokens) == list:
            return self.tokenizer.see_next(idx=idx) in tokens
        else:
            return self.tokenizer.see_next(idx=idx) == tokens

    def next_is_class_var_dec(self):
        return self.next_is([Tokens.STATIC, Tokens.FIELD])

    def next_is_subroutine_dec(self):
        return self.next_is([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])

    def compile_symbol(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('symbol', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('symbol', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_keyword(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('keyword', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('keyword', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, Identifier):
            identifier_str = self.tokenizer.current_token.token_escaped
            self.write_element(
                'identifier',
                identifier_str
            )
            return self.tokenizer.current_token
        else:
            self.raise_syntax_error('')

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerConstant):
            self.write_element('integerConstant', self.tokenizer.current_token.token_escaped)
            return self.tokenizer.current_token.token_escaped
        else:
            self.raise_syntax_error('')

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringConstant):
            string = self.tokenizer.current_token.token
            self.write_element('stringConstant', self.tokenizer.current_token.token_escaped)
            self.vmw.write_push(Segment.CONST, len(string))
            self.vmw.write_call('String.new', 1)
            for c in string:
                self.vmw.write_push(Segment.CONST, ord(c))
                self.vmw.write_call('String.appendChar', 2)
        else:
            self.raise_syntax_error('')

    def write_element(self, elem_name, value):
        self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name))

    def write_element_start(self, elem_name):
        self.wf.write('<%s>\n' % elem_name)

    def write_element_end(self, elem_name):
        self.wf.write('</%s>\n' % elem_name)

    def raise_syntax_error(self, msg):
        raise Exception('%s' % msg)
Exemple #14
0
class CompilationEngine():
    def __init__(self, jack_file, vm_file):
        self._jack_tokenizer = JackTokenizer(jack_file)
        self._vm_file = vm_file
        self._vm_text = ''
        self._xml_text = ''
        self._symbol_table = SymbolTable()
        self._vm_writer = VmWriter(self._vm_file)
        self._class_name = None
        self._label_count = 0
        self._compiled_class_name = ''

    def compile_class(self):
        self._write_start('class')
        self._compile_keyword()
        self._write('IdentifierInfo', 'category: class')
        self._compiled_class_name = self._compile_identifier()
        self._compile_symbol()
        while self._what_next_token([Keyword.STATIC, Keyword.FIELD]):
            self.compile_class_var_dec()
        while self._what_next_token(
            [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]):
            self.compile_subroutine_dec()
        self._compile_symbol()
        self._write_end('class')

    def compile_class_var_dec(self):
        self._write_start('classVarDec')
        token = self._compile_keyword()
        kind = None
        if token == Keyword.STATIC:
            kind = Kind.STATIC
        elif token == Keyword.FIELD:
            kind = Kind.FIELD
        type_token = self._jack_tokenizer.next_token()
        if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
            self._compile_keyword()
        else:
            self._write('IdentifierInfo', 'category: class')
            self._compile_identifier()
        self._compile_var_name(declaration=True, type=type_token, kind=kind)
        while self._what_next_token([Symbol.COMMA]):
            self._compile_symbol()
            self._compile_var_name(declaration=True,
                                   type=type_token,
                                   kind=kind)
        self._compile_symbol()
        self._write_end('classVarDec')

    def compile_subroutine_dec(self):
        self._symbol_table.start_subroutine()
        self._write_start('subroutineDec')
        token = self._compile_keyword()
        if self._jack_tokenizer.next_token() == Keyword.VOID:
            self._compile_keyword()
        else:
            self._jack_tokenizer.next_token()
            if self._what_next_token(
                [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                self._compile_keyword()
            else:
                self._write('IdentifierInfo', 'category: class')
                self._compile_identifier()
        self._write('IdentifierInfo', 'category: subroutine')
        subroutine_name = self._compile_identifier()
        self._compile_symbol()
        if token == Keyword.METHOD:
            self._symbol_table.define('$this', self._compiled_class_name,
                                      Kind.ARG)
        self.compile_parameter_list()
        self._compile_symbol()
        self.compile_subroutine_body(subroutine_name, token)
        self._write_end('subroutineDec')

    def compile_parameter_list(self):
        self._write_start('parameterList')
        if (self._jack_tokenizer.next_token()
                in [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]
                or self._jack_tokenizer.next_token_type() == Type.IDENTIFIER):
            type_token = self._jack_tokenizer.next_token()
            if self._what_next_token(
                [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                self._compile_keyword()
            else:
                self._write('IdentifierInfo', 'category: class')
                self._compile_identifier()
            self._compile_var_name(declaration=True,
                                   type=type_token,
                                   kind=Kind.ARG)
            while self._what_next_token([Symbol.COMMA]):
                self._compile_symbol()
                type_token = self._jack_tokenizer.next_token()
                if self._what_next_token(
                    [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                    self._compile_keyword()
                else:
                    self._write('IdentifierInfo', 'category: class')
                    self._compile_identifier()
                self._compile_var_name(declaration=True,
                                       type=type_token,
                                       kind=Kind.ARG)
        self._write_end('parameterList')

    def compile_subroutine_body(self, subroutine_name, subroutine_token):
        self._write_start('subroutineBody')
        self._compile_symbol()
        local_num = 0
        while self._what_next_token([Keyword.VAR]):
            var_num = self.compile_var_dec()
            local_num += var_num
        self._vm_writer.write_function(
            '%s.%s' % (self._compiled_class_name, subroutine_name), local_num)
        if subroutine_token == Keyword.METHOD:
            self._vm_writer.write_push(Segment.ARG, 0)
            self._vm_writer.write_pop(Segment.POINTER, 0)
        elif subroutine_token == Keyword.CONSTRUCTOR:
            self._vm_writer.write_push(
                Segment.CONST, self._symbol_table.var_count(Kind.FIELD))
            self._vm_writer.write_call('Memory.alloc', 1)
            self._vm_writer.write_pop(Segment.POINTER, 0)
        elif subroutine_token == Keyword.FUNCTION:
            pass
        self.compile_statements()
        self._compile_symbol()
        self._write_end('subroutineBody')
        return local_num

    def compile_var_dec(self):
        self._write_start('varDec')
        self._compile_keyword()
        type_token = self._jack_tokenizer.next_token()
        if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
            self._compile_keyword()
        else:
            self._write('IdentifierInfo', 'category: class')
            self._compile_identifier()
        self._compile_var_name(declaration=True,
                               type=type_token,
                               kind=Kind.VAR)
        var_num = 1  # TODO
        while self._what_next_token([Symbol.COMMA]):
            self._compile_symbol()
            self._compile_var_name(declaration=True,
                                   type=type_token,
                                   kind=Kind.VAR)
            var_num += 1
        self._compile_symbol()
        self._write_end('varDec')
        return var_num

    def compile_statements(self):
        self._write_start('statements')
        while self._what_next_token([
                Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO,
                Keyword.RETURN
        ]):
            if self._what_next_token([Keyword.LET]):
                self.compile_let()
            elif self._what_next_token([Keyword.IF]):
                self.compile_if()
            elif self._what_next_token([Keyword.WHILE]):
                self.compile_while()
            elif self._what_next_token([Keyword.DO]):
                self.compile_do()
            elif self._what_next_token([Keyword.RETURN]):
                self.compile_return()
        self._write_end('statements')

    def compile_let(self):
        self._write_start('letStatement')
        self._compile_keyword()
        let_var = self._compile_var_name(let=True)
        if self._what_next_token([Symbol.LEFT_BOX_BRACKET]):
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
            self._compile_symbol()
            kind = self._symbol_table.kind_of(let_var)
            if kind == Kind.ARG:
                self._vm_writer.write_push(
                    Segment.ARG, self._symbol_table.index_of(let_var))
            elif kind == Kind.VAR:
                self._vm_writer.write_push(
                    Segment.LOCAL, self._symbol_table.index_of(let_var))
            elif kind == Kind.FIELD:
                self._vm_writer.write_push(
                    Segment.THIS, self._symbol_table.index_of(let_var))
            elif kind == Kind.STATIC:
                self._vm_writer.write_push(
                    Segment.STATIC, self._symbol_table.index_of(let_var))
            self._vm_writer.write_arithmetic(Command.ADD)
            self._vm_writer.write_pop(Segment.TEMP, 2)
            self.compile_expression()
            self._vm_writer.write_push(Segment.TEMP, 2)
            self._vm_writer.write_pop(Segment.POINTER, 1)
            self._vm_writer.write_pop(Segment.THAT, 0)
            self._compile_symbol()
        else:
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
            kind = self._symbol_table.kind_of(let_var)
            if kind == Kind.VAR:
                self._vm_writer.write_pop(Segment.LOCAL,
                                          self._symbol_table.index_of(let_var))
            elif kind == Kind.ARG:
                self._vm_writer.write_pop(Segment.ARG,
                                          self._symbol_table.index_of(let_var))
            elif kind == Kind.FIELD:
                self._vm_writer.write_pop(Segment.THIS,
                                          self._symbol_table.index_of(let_var))
            elif kind == Kind.STATIC:
                self._vm_writer.write_pop(Segment.STATIC,
                                          self._symbol_table.index_of(let_var))
        self._write_end('letStatement')

    def compile_if(self):
        self._write_start('ifStatement')
        self._compile_keyword()
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._vm_writer.write_arithmetic(Command.NOT)
        l1 = self._new_label()
        l2 = self._new_label()
        self._vm_writer.write_if(l1)
        self._compile_symbol()
        self.compile_statements()
        self._compile_symbol()
        self._vm_writer.write_goto(l2)
        self._vm_writer.write_label(l1)
        if self._what_next_token([Keyword.ELSE]):
            self._compile_keyword()
            self._compile_symbol()
            self.compile_statements()
            self._compile_symbol()
        self._vm_writer.write_label(l2)
        self._write_end('ifStatement')

    def compile_while(self):
        self._write_start('whileStatement')
        l1 = self._new_label()
        l2 = self._new_label()
        self._compile_keyword()
        self._vm_writer.write_label(l1)
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._vm_writer.write_arithmetic(Command.NOT)
        self._vm_writer.write_if(l2)
        self._compile_symbol()
        self.compile_statements()
        self._compile_symbol()
        self._vm_writer.write_goto(l1)
        self._vm_writer.write_label(l2)
        self._write_end('whileStatement')

    def compile_do(self):
        self._write_start('doStatement')
        self._compile_keyword()
        if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1):
            self._write('IdentifierInfo', 'category: subroutine')
            subroutine_name = self._compile_identifier()
            self._compile_symbol()
            self._vm_writer.write_push(Segment.POINTER, 0)
            arg_num = self.compile_expression_list()
            self._compile_symbol()
            self._vm_writer.write_call(
                '%s.%s' % (self._compiled_class_name, subroutine_name),
                arg_num + 1)
        else:
            identifier_str = self._jack_tokenizer.next_token()
            if self._symbol_table.kind_of(identifier_str):
                instance_name = self._compile_var_name(call=True)
                self._compile_symbol()
                self._write('IdentifierInfo', 'category: subroutine')
                subroutine_name = self._compile_identifier()
                self._compile_symbol()
                kind = self._symbol_table.kind_of(instance_name)
                if kind == Kind.ARG:
                    self._vm_writer.write_push(
                        Segment.ARG,
                        self._symbol_table.index_of(instance_name))
                elif kind == Kind.VAR:
                    self._vm_writer.write_push(
                        Segment.LOCAL,
                        self._symbol_table.index_of(instance_name))
                elif kind == Kind.FIELD:
                    self._vm_writer.write_push(
                        Segment.THIS,
                        self._symbol_table.index_of(instance_name))
                elif kind == Kind.STATIC:
                    self._vm_writer.write_push(
                        Segment.STATIC,
                        self._symbol_table.index_of(instance_name))
                arg_num = self.compile_expression_list()
                self._compile_symbol()
                self._vm_writer.write_call(
                    '%s.%s' % (self._symbol_table.type_of(instance_name),
                               subroutine_name), arg_num + 1)
            else:
                self._write('IdentifierInfo', 'category: class')
                class_name = self._compile_identifier()
                self._compile_symbol()
                self._write('IdentifierInfo', 'category: subroutine')
                subroutine_name = self._compile_identifier()
                self._compile_symbol()
                arg_num = self.compile_expression_list()
                self._compile_symbol()
                self._vm_writer.write_call(
                    '%s.%s' % (class_name, subroutine_name), arg_num)
        self._compile_symbol()
        self._write_end('doStatement')
        self._vm_writer.write_pop(Segment.TEMP, 0)

    def compile_return(self):
        self._write_start('returnStatement')
        self._compile_keyword()
        if not self._what_next_token([Symbol.SEMI_COLON]):
            self.compile_expression()
        else:
            self._vm_writer.write_push(Segment.CONST, 0)
        self._compile_symbol()
        self._vm_writer.write_return()
        self._write_end('returnStatement')

    def compile_expression(self):
        self._write_start('expression')
        self.compile_term()
        while self._what_next_token([
                Symbol.PLUS, Symbol.MINUS, Symbol.MULTI, Symbol.DIV,
                Symbol.AND, Symbol.PIPE, Symbol.LESS_THAN, Symbol.GREATER_THAN,
                Symbol.EQUAL
        ]):
            token = self._compile_symbol()
            self.compile_term()
            if token == Symbol.PLUS:
                self._vm_writer.write_arithmetic(Command.ADD)
            elif token == Symbol.MINUS:
                self._vm_writer.write_arithmetic(Command.SUB)
            elif token == Symbol.MULTI:
                self._vm_writer.write_call('Math.multiply', 2)
            elif token == Symbol.DIV:
                self._vm_writer.write_call('Math.divide', 2)
            elif token == Symbol.AND:
                self._vm_writer.write_arithmetic(Command.AND)
            elif token == Symbol.PIPE:
                self._vm_writer.write_arithmetic(Command.OR)
            elif token == Symbol.LESS_THAN:
                self._vm_writer.write_arithmetic(Command.LT)
            elif token == Symbol.GREATER_THAN:
                self._vm_writer.write_arithmetic(Command.GT)
            elif token == Symbol.EQUAL:
                self._vm_writer.write_arithmetic(Command.EQ)
        self._write_end('expression')

    def compile_term(self):
        self._write_start('term')
        if self._what_next_token_type([Type.INT_CONST]):
            value = self._compile_integer_constant()
            self._vm_writer.write_push(Segment.CONST, value)
        elif self._what_next_token_type([Type.STRING_CONST]):
            value = self._compile_string_constant()
            self._vm_writer.write_push(Segment.CONST, len(value))
            self._vm_writer.write_call('String.new', 1)
            for v in value:
                self._vm_writer.write_push(Segment.CONST, ord(v))
                self._vm_writer.write_call('String.appendChar', 2)
        elif self._what_next_token([Keyword.NULL]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.CONST, 0)
        elif self._what_next_token([Keyword.THIS]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.POINTER, 0)
        elif self._what_next_token([Keyword.TRUE]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.CONST, 0)
            self._vm_writer.write_arithmetic(Command.NOT)
        elif self._what_next_token([Keyword.FALSE]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.CONST, 0)
        elif self._what_next_token_type([Type.IDENTIFIER]):
            if self._what_next_token([Symbol.LEFT_BOX_BRACKET], 1):
                self._compile_var_name()
                self._compile_symbol()
                self.compile_expression()
                self._vm_writer.write_arithmetic(Command.ADD)
                self._vm_writer.write_pop(Segment.POINTER, 1)
                self._vm_writer.write_push(Segment.THAT, 0)
                self._compile_symbol()
            elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET, Symbol.DOT],
                                       1):
                if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1):
                    self._write('IdentifierInfo', 'category: subroutine')
                    subroutine_name = self._compile_identifier()
                    self._compile_symbol()
                    self._vm_writer.write_push(Segment.POINTER, 0)
                    arg_num = self.compile_expression_list()
                    self._compile_symbol()
                    self._vm_writer.write_call(
                        '%s.%s' % (self._compiled_class_name, subroutine_name),
                        arg_num + 1)
                else:
                    identifier_str = self._jack_tokenizer.next_token()
                    if self._symbol_table.kind_of(identifier_str):
                        instance_name = self._compile_var_name(call=True)
                        self._compile_symbol()
                        self._write('IdentifierInfo', 'category: subroutine')
                        subroutine_name = self._compile_identifier()
                        self._compile_symbol()
                        kind = self._symbol_table.kind_of(instance_name)
                        if kind == Kind.ARG:
                            self._vm_writer.write_push(
                                Segment.ARG,
                                self._symbol_table.index_of(instance_name))
                        elif kind == Kind.VAR:
                            self._vm_writer.write_push(
                                Segment.LOCAL,
                                self._symbol_table.index_of(instance_name))
                        elif kind == Kind.FIELD:
                            self._vm_writer.write_push(
                                Segment.THIS,
                                self._symbol_table.index_of(instance_name))
                        elif kind == Kind.STATIC:
                            self._vm_writer.write_push(
                                Segment.STATIC,
                                self._symbol_table.index_of(instance_name))
                        arg_num = self.compile_expression_list()
                        self._compile_symbol()
                        self._vm_writer.write_call(
                            '%s.%s' %
                            (self._symbol_table.type_of(instance_name),
                             subroutine_name), arg_num + 1)
                    else:
                        self._write('IdentifierInfo', 'category: class')
                        class_name = self._compile_identifier()
                        self._compile_symbol()
                        self._write('IdentifierInfo', 'category: subroutine')
                        subroutine_name = self._compile_identifier()
                        self._compile_symbol()
                        arg_num = self.compile_expression_list()
                        self._compile_symbol()
                        self._vm_writer.write_call(
                            '%s.%s' % (class_name, subroutine_name), arg_num)
            else:
                self._compile_var_name()
        elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET]):
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
        elif self._what_next_token([Symbol.TILDE]):
            self._compile_symbol()
            self.compile_term()
            self._vm_writer.write_arithmetic(Command.NOT)
        elif self._what_next_token([Symbol.MINUS]):
            self._compile_symbol()
            self.compile_term()
            self._vm_writer.write_arithmetic(Command.NEG)
        self._write_end('term')

    def compile_expression_list(self):
        self._write_start('expressionList')
        arg_num = 0
        if not self._what_next_token([Symbol.RIGHT_ROUND_BRACKET]):
            self.compile_expression()
            arg_num += 1
            while self._what_next_token([Symbol.COMMA]):
                self._compile_symbol()
                self.compile_expression()
                arg_num += 1
        self._write_end('expressionList')
        return arg_num

    def save(self):
        self._vm_writer.save()

    def _what_next_token(self, values, index=0):
        return self._jack_tokenizer.next_token(index) in values

    def _what_next_token_type(self, values, index=0):
        return self._jack_tokenizer.next_token_type(index) in values

    def _compile_symbol(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('symbol', value)
        return value

    def _compile_keyword(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('keyword', value)
        return value

    def _compile_identifier(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('identifier', value)
        return value

    def _compile_integer_constant(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('integerConstant', value)
        return value

    def _compile_string_constant(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('stringConstant', value)
        return value

    def _compile_var_name(self,
                          declaration=False,
                          type=None,
                          kind=None,
                          let=False,
                          call=False):
        if declaration:
            self._symbol_table.define(self._jack_tokenizer.next_token(), type,
                                      kind)
        elif let:
            pass
        elif call:
            pass
        else:
            kind = self._symbol_table.kind_of(
                self._jack_tokenizer.next_token())
            if kind == Kind.ARG:
                self._vm_writer.write_push(
                    Segment.ARG,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))
            elif kind == Kind.VAR:
                self._vm_writer.write_push(
                    Segment.LOCAL,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))
            elif kind == Kind.FIELD:
                self._vm_writer.write_push(
                    Segment.THIS,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))
            elif kind == Kind.STATIC:
                self._vm_writer.write_push(
                    Segment.STATIC,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))

        self._write(
            'IdentifierInfo', 'declaration: %s, kind: %s, index: %d' %
            (declaration,
             self._symbol_table.kind_of(self._jack_tokenizer.next_token()),
             self._symbol_table.index_of(self._jack_tokenizer.next_token())))
        return self._compile_identifier()

    def _write(self, element, value):
        self._xml_text += '<{}> {} </{}>\n'.format(element, value, element)

    def _write_start(self, element):
        self._xml_text += '<%s>\n' % element

    def _write_end(self, element):
        self._xml_text += '</%s>\n' % element

    def _new_label(self):
        self._label_count += 1
        return 'LABEL_%d' % self._label_count
class CompilationEngine():
    def __init__(self, filepath, vm_writer):
        self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
        self.tokenizer = JackTokenizer(filepath)
        self.symbol_table = SymbolTable()
        self.vmw = vm_writer
        self.compiled_class_name = None
        self.label_num = 0

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.wf.close()

    def get_new_label(self):
        self.label_num += 1
        return 'LABEL_%d' % self.label_num

    def compile(self):
        self.compile_class()

    def compile_class(self):

        self.write_element_start('class')

        self.compile_keyword([Tokens.CLASS])
        self.compiled_class_name = self.compile_class_name().token
        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)

        while self.next_is_class_var_dec():
            self.compile_class_var_dec()

        while self.next_is_subroutine_dec():
            self.compile_subroutine_dec()

        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        kind = None
        if token == Tokens.STATIC:
            kind = IdentifierKind.STATIC
        elif token == Tokens.FIELD:
            kind = IdentifierKind.FIELD
        else:
            self.raise_syntax_error('Unexpected token')

        type_token = self.compile_type()
        self.compile_var_name(declaration=True,
                              type=type_token.token,
                              kind=kind)

        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True,
                                  type=type_token.token,
                                  kind=kind)

        self.compile_symbol(Tokens.SEMI_COLON)

        self.write_element_end('classVarDec')

    def compile_var_dec(self):

        self.write_element_start('varDec')
        self.compile_keyword(Tokens.VAR)
        type_token = self.compile_type()
        var_num = 0
        self.compile_var_name(declaration=True,
                              type=type_token.token,
                              kind=IdentifierKind.VAR)
        var_num += 1
        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True,
                                  type=type_token.token,
                                  kind=IdentifierKind.VAR)
            var_num += 1
        self.compile_symbol(Tokens.SEMI_COLON)
        self.write_element_end('varDec')

        return var_num

    def compile_subroutine_dec(self):
        self.symbol_table.start_subroutine()

        self.write_element_start('subroutineDec')

        token = self.compile_keyword(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])
        if self.tokenizer.see_next() == Tokens.VOID:
            self.compile_keyword(Tokens.VOID)
        else:
            self.compile_type()
        subroutine_name = self.compile_subroutine_name().token
        self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)

        if token == Tokens.METHOD:
            self.symbol_table.define('$this', self.compiled_class_name,
                                     IdentifierKind.ARG)

        self.compile_parameter_list()
        self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        self.compile_subroutine_body(subroutine_name, token)

        self.write_element_end('subroutineDec')

    def compile_subroutine_name(self):
        self.write_identifier_info('category: subroutine')
        return self.compile_identifier()

    def compile_class_name(self):
        self.write_identifier_info('category: class')
        return self.compile_identifier()

    def compile_var_name(self,
                         declaration=False,
                         type=None,
                         kind=None,
                         let=False,
                         call=False):
        if declaration:
            self.symbol_table.define(self.tokenizer.see_next().token, type,
                                     kind)
        elif let:
            pass
        elif call:
            pass
        else:
            kind = self.symbol_table.kind_of(self.tokenizer.see_next().token)
            if kind == IdentifierKind.ARG:
                self.vmw.write_push(
                    Segment.ARG,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))
            elif kind == IdentifierKind.VAR:
                self.vmw.write_push(
                    Segment.LOCAL,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))
            elif kind == IdentifierKind.FIELD:
                self.vmw.write_push(
                    Segment.THIS,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))
            elif kind == IdentifierKind.STATIC:
                self.vmw.write_push(
                    Segment.STATIC,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))

        self.write_identifier_info(
            'declaration: %s, kind: %s, index: %d' %
            (declaration,
             self.symbol_table.kind_of(self.tokenizer.see_next().token),
             self.symbol_table.index_of(self.tokenizer.see_next().token)))
        return self.compile_identifier()

    def write_identifier_info(self, value):
        self.write_element('IdentifierInfo', value)

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [
                Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN
        ] or isinstance(self.tokenizer.see_next(), Identifier):
            type_token = self.compile_type()
            self.compile_var_name(declaration=True,
                                  type=type_token.token,
                                  kind=IdentifierKind.ARG)

            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                type_token = self.compile_type()
                self.compile_var_name(declaration=True,
                                      type=type_token.token,
                                      kind=IdentifierKind.ARG)

        self.write_element_end('parameterList')

    def compile_subroutine_body(self, subroutine_name, subroutine_dec_token):
        self.write_element_start('subroutineBody')

        print(subroutine_name, subroutine_dec_token)

        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
        local_num = 0
        while self.next_is(Tokens.VAR):
            var_num = self.compile_var_dec()
            local_num += var_num

        self.vmw.write_function(
            "%s.%s" % (self.compiled_class_name, subroutine_name), local_num)

        if subroutine_dec_token == Tokens.METHOD:
            self.vmw.write_push(Segment.ARG, 0)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.CONSTRUCTOR:
            self.vmw.write_push(
                Segment.CONST,
                self.symbol_table.var_count(IdentifierKind.FIELD))
            self.vmw.write_call('Memory.alloc', 1)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.FUNCTION:
            pass
        else:
            self.raise_syntax_error('Invalid token')

        self.compile_statements()
        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('subroutineBody')

        print("=========")
        for key in self.symbol_table.arg_table:
            print(self.symbol_table.arg_table[key].type, key, "kind:",
                  self.symbol_table.arg_table[key].kind, "index:",
                  self.symbol_table.arg_table[key].index)

        return local_num

    def compile_statements(self):
        self.write_element_start('statements')

        while self.next_is_statement():
            self.compile_statement()

        self.write_element_end('statements')

    def compile_statement(self):
        if self.next_is(Tokens.LET):
            self.write_element_start('letStatement')
            self.compile_keyword(Tokens.LET)
            let_var = self.compile_var_name(let=True).token

            if self.next_is(Tokens.LEFT_BOX_BRACKET):
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()  # i
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
                self.compile_symbol(Tokens.EQUAL)

                # base address
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(Segment.ARG,
                                        self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(Segment.LOCAL,
                                        self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(Segment.THIS,
                                        self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(Segment.STATIC,
                                        self.symbol_table.index_of(let_var))

                # temp_2 <- base + i
                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.TEMP, 2)

                # value
                self.compile_expression()

                # set THAT <- base+i
                self.vmw.write_push(Segment.TEMP, 2)
                self.vmw.write_pop(Segment.POINTER, 1)

                self.vmw.write_pop(Segment.THAT, 0)
                self.compile_symbol(Tokens.SEMI_COLON)

            else:
                self.compile_symbol(Tokens.EQUAL)
                self.compile_expression()
                self.compile_symbol(Tokens.SEMI_COLON)
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.VAR:
                    self.vmw.write_pop(Segment.LOCAL,
                                       self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.ARG:
                    self.vmw.write_pop(Segment.ARG,
                                       self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_pop(Segment.THIS,
                                       self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_pop(Segment.STATIC,
                                       self.symbol_table.index_of(let_var))

            self.write_element_end('letStatement')

        elif self.next_is(Tokens.IF):
            self.write_element_start('ifStatement')
            self.compile_keyword(Tokens.IF)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.vmw.write_if(l1)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l2)
            self.vmw.write_label(l1)
            if self.next_is(Tokens.ELSE):
                self.compile_keyword(Tokens.ELSE)
                self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
                self.compile_statements()
                self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_label(l2)
            self.write_element_end('ifStatement')

        elif self.next_is(Tokens.WHILE):
            self.write_element_start('whileStatement')
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.compile_keyword(Tokens.WHILE)
            self.vmw.write_label(l1)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            self.vmw.write_if(l2)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l1)
            self.vmw.write_label(l2)
            self.write_element_end('whileStatement')

        elif self.next_is(Tokens.DO):
            self.write_element_start('doStatement')
            self.compile_keyword(Tokens.DO)
            self.compile_subroutine_call()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('doStatement')
            self.vmw.write_pop(Segment.TEMP, 0)

        elif self.next_is(Tokens.RETURN):
            self.write_element_start('returnStatement')
            self.compile_keyword(Tokens.RETURN)
            if not self.next_is(Tokens.SEMI_COLON):
                self.compile_expression()
            else:
                self.vmw.write_push(Segment.CONST, 0)

            self.compile_symbol(Tokens.SEMI_COLON)
            self.vmw.write_return()

            self.write_element_end('returnStatement')

    def compile_subroutine_call(self):
        if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1):
            subroutinename = self.compile_subroutine_name().token
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.vmw.write_push(Segment.POINTER, 0)
            argnum = self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_call(
                "%s.%s" % (self.compiled_class_name, subroutinename),
                argnum + 1)
        else:
            identifier_str = self.tokenizer.see_next().token
            if self.symbol_table.kind_of(identifier_str):
                instance_name = self.compile_var_name(call=True).token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                kind = self.symbol_table.kind_of(instance_name)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(
                        Segment.ARG, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(
                        Segment.LOCAL,
                        self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(
                        Segment.THIS,
                        self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(
                        Segment.STATIC,
                        self.symbol_table.index_of(instance_name))
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call(
                    "%s.%s" %
                    (self.symbol_table.type_of(instance_name), subroutinename),
                    argnum + 1)
            else:
                classname = self.compile_class_name().token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call("%s.%s" % (classname, subroutinename),
                                    argnum)

    def compile_expression_list(self):
        self.write_element_start('expressionList')
        argnum = 0
        if not self.next_is(Tokens.RIGHT_ROUND_BRACKET):
            self.compile_expression()
            argnum += 1
            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_expression()
                argnum += 1
        self.write_element_end('expressionList')

        return argnum

    def compile_expression(self):
        self.write_element_start('expression')
        self.compile_term()
        while self.next_is([
                Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV,
                Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN,
                Tokens.EQUAL
        ]):
            op_token = self.compile_symbol([
                Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV,
                Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN,
                Tokens.EQUAL
            ])
            self.compile_term()
            if op_token == Tokens.PLUS:
                self.vmw.write_arithmetic(Command.ADD)
            elif op_token == Tokens.MINUS:
                self.vmw.write_arithmetic(Command.SUB)
            elif op_token == Tokens.MULTI:
                self.vmw.write_call('Math.multiply', 2)
            elif op_token == Tokens.DIV:
                self.vmw.write_call('Math.divide', 2)
            elif op_token == Tokens.AND:
                self.vmw.write_arithmetic(Command.AND)
            elif op_token == Tokens.PIPE:
                self.vmw.write_arithmetic(Command.OR)
            elif op_token == Tokens.LESS_THAN:
                self.vmw.write_arithmetic(Command.LT)
            elif op_token == Tokens.GREATER_THAN:
                self.vmw.write_arithmetic(Command.GT)
            elif op_token == Tokens.EQUAL:
                self.vmw.write_arithmetic(Command.EQ)

        self.write_element_end('expression')

    def compile_term(self):
        self.write_element_start('term')

        if self.next_type_is(TokenType.INT_CONST):
            value_str = self.compile_integer_constant()
            self.vmw.write_push(Segment.CONST, value_str)
        elif self.next_type_is(TokenType.STRING_CONST):
            self.compile_string_constant()
        elif self.next_is(Tokens.NULL):
            self.compile_keyword(Tokens.NULL)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_is(Tokens.THIS):
            self.compile_keyword(Tokens.THIS)
            self.vmw.write_push(Segment.POINTER, 0)
        elif self.next_is(Tokens.TRUE):
            self.compile_keyword(Tokens.TRUE)
            self.vmw.write_push(Segment.CONST, 0)
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.FALSE):
            self.compile_keyword(Tokens.FALSE)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_type_is(TokenType.IDENTIFIER):
            if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1):

                var_name = self.compile_var_name().token
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()

                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.POINTER, 1)
                self.vmw.write_push(Segment.THAT, 0)
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1):
                self.compile_subroutine_call()
            else:
                self.compile_var_name()

        elif self.next_is(Tokens.LEFT_ROUND_BRACKET):
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        elif self.next_is(Tokens.TILDE):
            self.compile_symbol(Tokens.TILDE)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.MINUS):
            self.compile_symbol(Tokens.MINUS)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NEG)
        else:
            self.raise_syntax_error('')
        self.write_element_end('term')

    def next_type_is(self, token_type):
        return self.tokenizer.see_next().type == token_type

    def compile_type(self):
        type_token = self.tokenizer.see_next()

        if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]):
            self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN])
        else:
            self.compile_class_name()
        return type_token

    def next_is_statement(self):
        return self.next_is(
            [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN])

    def next_is(self, tokens, idx=0):
        if type(tokens) == list:
            return self.tokenizer.see_next(idx=idx) in tokens
        else:
            return self.tokenizer.see_next(idx=idx) == tokens

    def next_is_class_var_dec(self):
        return self.next_is([Tokens.STATIC, Tokens.FIELD])

    def next_is_subroutine_dec(self):
        return self.next_is(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])

    def compile_symbol(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('symbol',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('symbol',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_keyword(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('keyword',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('keyword',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, Identifier):
            identifier_str = self.tokenizer.current_token.token_escaped
            self.write_element('identifier', identifier_str)
            return self.tokenizer.current_token
        else:
            self.raise_syntax_error('')

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerConstant):
            self.write_element('integerConstant',
                               self.tokenizer.current_token.token_escaped)
            return self.tokenizer.current_token.token_escaped
        else:
            self.raise_syntax_error('')

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringConstant):
            string = self.tokenizer.current_token.token
            self.write_element('stringConstant',
                               self.tokenizer.current_token.token_escaped)
            self.vmw.write_push(Segment.CONST, len(string))
            self.vmw.write_call('String.new', 1)
            for c in string:
                self.vmw.write_push(Segment.CONST, ord(c))
                self.vmw.write_call('String.appendChar', 2)
        else:
            self.raise_syntax_error('')

    def write_element(self, elem_name, value):
        self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name))

    def write_element_start(self, elem_name):
        self.wf.write('<%s>\n' % elem_name)

    def write_element_end(self, elem_name):
        self.wf.write('</%s>\n' % elem_name)

    def raise_syntax_error(self, msg):
        raise Exception('%s' % msg)
Exemple #16
0
class CompilationEngine:
    def __init__(self, input_texts, output_file_path, vmw):
        self.tokenizer = Tokenizer(input_texts)
        self.wf = open(output_file_path, 'w')
        self.vmw = vmw
        self.elements = []

        self.symbol_table = SymbolTable()
        self.symbol_table.show_tables()

        # tokens
        self.op_tokens = [
            Tokens.PLUS,
            Tokens.MINUS,
            Tokens.MULTI,
            Tokens.DIV,
            Tokens.AND,
            Tokens.OR,
            Tokens.LESS_THAN,
            Tokens.GREATER_THAN,
            Tokens.EQUAL,
        ]
        self.unary_op_tokens = [
            Tokens.MINUS,
            Tokens.TILDE,
        ]
        self.statement_tokens = [
            Tokens.LET,
            Tokens.IF,
            Tokens.WHILE,
            Tokens.DO,
            Tokens.RETURN,
        ]
        self.keyword_constant_tokens = [
            Tokens.TRUE,
            Tokens.FALSE,
            Tokens.NULL,
            Tokens.THIS,
        ]

        # SymbolTable を作成するのに必要な変数
        self.class_name = None
        self.kind = None
        self.var_type = None
        self.var_name = None

        # VM
        self.subroutine_class_name = None
        self.subroutine_name = None
        self.label_number = 0

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        return self.wf.close()

    def compile(self):
        self.compile_class()

    def compile_class(self):
        self.write_element_start('class')

        # class
        self.compile_keyword([Tokens.CLASS])
        #
        self.class_name = self.tokenizer.see_next()
        # className
        self.compile_class_name()
        # {
        self.compile_keyword([Tokens.LEFT_CURLY_BRACKET])
        # classVarDec*
        while self.tokenizer.next_is([Tokens.STATIC, Tokens.FIELD]):
            self.compile_class_var_dec()
        # subroutineDec*
        while self.tokenizer.next_is(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD, Tokens.VOID]):
            self.compile_subroutine_dec()
        # }
        self.compile_keyword([Tokens.RIGHT_CURLY_BRACKET])

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        # static or field
        self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        self.kind = self.get_kind(self.tokenizer.current_token)
        # type
        self.compile_type()
        self.var_type = self.tokenizer.current_token
        # varName
        self.compile_var_name(define=True,
                              var_type=self.var_type,
                              kind=self.kind)

        # (, varName)*
        while self.tokenizer.next_is([Tokens.COMMA]):
            self.compile_symbol([Tokens.COMMA])
            self.compile_var_name(define=True,
                                  var_type=self.var_type,
                                  kind=self.kind)

        # ;
        self.compile_symbol([Tokens.SEMICOLON])

        self.write_element_end('classVarDec')

    def compile_subroutine_dec(self):
        # Symbol Table の初期化
        self.symbol_table.start_subroutine()
        self.write_element_start('subroutineDec')

        # constructor or function or method or void
        self.compile_keyword(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD, Tokens.VOID])

        # VM
        subroutine_type = self.tokenizer.current_token

        # Symbol Table の作成
        if self.tokenizer.current_token == Tokens.METHOD:
            self.symbol_table.define('$this', self.class_name, SymbolKind.ARG)

        # void or type
        if self.tokenizer.next_is([Tokens.VOID]):
            self.compile_keyword([Tokens.VOID])
        else:
            self.compile_type()

        # subroutineName
        self.compile_subroutine_name()

        # VM
        subroutine_name = self.tokenizer.current_token

        # (
        self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
        # parameterList
        self.compile_parameter_list()
        # )
        self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])
        # subroutineBody
        self.compile_subroutine_body(subroutine_type, subroutine_name)

        self.write_element_end('subroutineDec')

    def compile_subroutine_body(self, subroutine_type, subroutine_name):
        self.write_element_start('subroutineBody')

        # {
        self.compile_keyword([Tokens.LEFT_CURLY_BRACKET])
        # varDec*
        local_var_counts = 0
        while not self.tokenizer.next_is(self.statement_tokens):
            counts = self.compile_var_dec()
            local_var_counts += counts

        # VM
        function_name = '{}.{}'.format(self.class_name, subroutine_name)
        self.vmw.write_function(function_name, local_var_counts)
        if subroutine_type == Tokens.CONSTRUCTOR:
            self.vmw.write_push(SegmentType.CONST,
                                self.symbol_table.var_count(SymbolKind.FIELD))
            self.vmw.write_call('Memory.alloc', 1)
            self.vmw.write_pop(SegmentType.POINTER, 0)
        elif subroutine_type == Tokens.METHOD:
            self.vmw.write_push(SegmentType.ARG, 0)
            self.vmw.write_pop(SegmentType.POINTER, 0)
        elif subroutine_type == Tokens.FUNCTION:
            pass
        else:
            self.raise_syntax_error('Invalid subroutine type.')

        # statements
        self.compile_statements()
        # }
        self.compile_keyword([Tokens.RIGHT_CURLY_BRACKET])

        self.write_element_end('subroutineBody')

    def compile_var_dec(self):
        self.write_element_start('varDec')

        local_var_counts = 0

        self.kind = SymbolKind.VAR
        # var
        self.compile_keyword([Tokens.VAR])
        # type
        self.compile_type()
        self.var_type = self.tokenizer.current_token
        # varName
        self.compile_var_name(define=True,
                              var_type=self.var_type,
                              kind=self.kind)
        local_var_counts += 1
        # (',' varName)*
        while self.tokenizer.next_is([Tokens.COMMA]):
            self.compile_symbol([Tokens.COMMA])
            self.compile_var_name(define=True,
                                  var_type=self.var_type,
                                  kind=self.kind)
            local_var_counts += 1
        # ;
        self.compile_symbol([Tokens.SEMICOLON])

        self.write_element_end('varDec')

        return local_var_counts

    def compile_term(self):
        self.write_element_start('term')

        if isinstance(self.tokenizer.see_next(), IntegerToken):
            self.compile_integer_constant()
            self.vmw.write_push(SegmentType.CONST,
                                self.tokenizer.current_token)
        elif isinstance(self.tokenizer.see_next(), StringToken):
            self.compile_string_constant()
        elif isinstance(self.tokenizer.see_next(), KeywordToken):
            if self.tokenizer.see_next() == Tokens.TRUE:
                self.vmw.write_push(SegmentType.CONST, 1)
                self.vmw.write_arithmetic(ArithmeticType.NEG)
            elif self.tokenizer.see_next() in [Tokens.FALSE, Tokens.NULL]:
                self.vmw.write_push(SegmentType.CONST, 0)
            elif self.tokenizer.see_next() == Tokens.THIS:
                self.vmw.write_push(SegmentType.POINTER, 0)
            self.compile_keyword(self.keyword_constant_tokens)
        elif isinstance(self.tokenizer.see_next(), IdentifierToken):
            # varName[expression]
            if self.tokenizer.next_is([Tokens.LEFT_SQUARE_BRACKET], index=1):
                # varName
                self.compile_var_name(is_other=True)

                # VM
                var_name = self.tokenizer.current_token.token
                kind = self.symbol_table.kind_of(var_name)
                index = self.symbol_table.index_of(var_name)
                segment_type = self.get_segment_type(kind)
                self.vmw.write_push(segment_type, index)

                # [
                self.compile_symbol([Tokens.LEFT_SQUARE_BRACKET])
                # expression
                self.compile_expression()
                # ]
                self.compile_symbol([Tokens.RIGHT_SQUARE_BRACKET])

                # VM a[i] のケースのみを考慮
                self.vmw.write_arithmetic(ArithmeticType.ADD)
                self.vmw.write_pop(SegmentType.POINTER, 1)
                self.vmw.write_push(SegmentType.THAT, 0)

            # subroutineCall
            elif self.tokenizer.next_is(
                [Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], index=1):
                self.compile_subroutine_call()
            # varName
            else:
                self.compile_var_name()
        # ( expression )
        elif self.tokenizer.next_is([Tokens.LEFT_ROUND_BRACKET]):
            # (
            self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
            # expression
            self.compile_expression()
            # )
            self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])
        # unaryOp -
        elif self.tokenizer.see_next() == Tokens.MINUS:
            # unaryOp
            self.compile_symbol([Tokens.MINUS])
            # term
            self.compile_term()
            # VM
            self.vmw.write_arithmetic(ArithmeticType.NEG)
        # unaryOp ~
        elif self.tokenizer.see_next() == Tokens.TILDE:
            # unaryOp
            self.compile_symbol([Tokens.TILDE])
            # term
            self.compile_term()
            # VM
            self.vmw.write_arithmetic(ArithmeticType.NOT)
        else:
            self.raise_syntax_error(self.tokenizer.see_next())

        self.write_element_end('term')

    def compile_expression(self):
        self.write_element_start('expression')

        self.compile_term()
        # (op term)*
        while self.tokenizer.next_is(self.op_tokens):
            self.compile_op()
            op_token = self.tokenizer.current_token
            self.compile_term()
            if op_token == Tokens.PLUS:
                self.vmw.write_arithmetic(ArithmeticType.ADD)
            elif op_token == Tokens.MINUS:
                self.vmw.write_arithmetic(ArithmeticType.SUB)
            elif op_token == Tokens.MULTI:
                self.vmw.write_call('Math.multiply', 2)
            elif op_token == Tokens.DIV:
                self.vmw.write_call('Math.divide', 2)
            elif op_token == Tokens.GREATER_THAN:
                self.vmw.write_arithmetic(ArithmeticType.GT)
            elif op_token == Tokens.LESS_THAN:
                self.vmw.write_arithmetic(ArithmeticType.LT)
            elif op_token == Tokens.AND:
                self.vmw.write_arithmetic(ArithmeticType.AND)
            elif op_token == Tokens.OR:
                self.vmw.write_arithmetic(ArithmeticType.OR)
            elif op_token == Tokens.TILDE:
                self.vmw.write_arithmetic(ArithmeticType.NOT)
            elif op_token == Tokens.EQUAL:
                self.vmw.write_arithmetic(ArithmeticType.EQ)
            else:
                self.raise_syntax_error('Invalid op token.')

        self.write_element_end('expression')

    def compile_expression_list(self):
        self.write_element_start('expressionList')

        # VM
        argument_counts = 0

        # (expression (',' expression)* )?
        if not self.tokenizer.next_is([Tokens.RIGHT_ROUND_BRACKET]):
            # expression
            self.compile_expression()
            argument_counts += 1
            # (',' expression)*
            while self.tokenizer.next_is([Tokens.COMMA]):
                self.compile_symbol([Tokens.COMMA])
                self.compile_expression()
                argument_counts += 1

        self.write_element_end('expressionList')

        return argument_counts

    def compile_subroutine_call(self):
        # ( のケース
        if self.tokenizer.next_is([Tokens.LEFT_ROUND_BRACKET], index=1):
            # subroutinename
            self.compile_subroutine_name()

            # VM
            subroutine_name = self.tokenizer.current_token.token
            self.vmw.write_push(SegmentType.POINTER, 0)

            # (
            self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
            # expressionList
            argument_counts = self.compile_expression_list()
            argument_counts += 1
            # )
            self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])

            function_name = '{}.{}'.format(self.class_name, subroutine_name)
            self.vmw.write_call(function_name, argument_counts)
        # . のケース
        elif self.tokenizer.next_is([Tokens.DOT], index=1):
            # className | varName
            self.compile_class_name()
            # varName (クラスのインスタンスのメソッドを使用するケース)
            if self.symbol_table.kind_of(
                    self.tokenizer.current_token.token) is not None:
                # VM
                instance_name = self.tokenizer.current_token.token

                # .
                self.compile_symbol([Tokens.DOT])
                # subroutineName
                self.compile_subroutine_name()

                # VM
                subroutine_name = self.tokenizer.current_token.token
                kind = self.symbol_table.kind_of(instance_name)
                index = self.symbol_table.index_of(instance_name)
                segment_type = self.get_segment_type(kind)
                self.vmw.write_push(segment_type, index)

                # (
                self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
                # expressionList
                argument_counts = self.compile_expression_list()
                argument_counts += 1
                # )
                self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])

                # VM
                function_name = '{}.{}'.format(
                    self.symbol_table.type_of(instance_name),
                    subroutine_name,
                )
                self.vmw.write_call(function_name, argument_counts)
            # className (例えば Output.printInt 関数を使用するケース)
            else:
                # VM
                class_name = self.tokenizer.current_token
                # .
                self.compile_symbol([Tokens.DOT])
                # subroutineName
                self.compile_subroutine_name()
                # VM
                subroutine_name = self.tokenizer.current_token
                # (
                self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
                # expressionList
                argument_counts = self.compile_expression_list()
                # )
                self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])

                # VM
                function_name = '{}.{}'.format(
                    class_name,
                    subroutine_name,
                )
                self.vmw.write_call(function_name, argument_counts)
        else:
            self.raise_syntax_error(self.tokenizer.see_next(index=1))

    def compile_let_statement(self):
        self.write_element_start('letStatement')

        # let
        self.compile_keyword([Tokens.LET])
        # varName
        self.compile_var_name(is_other=True)

        # VM
        let_var_name = self.tokenizer.current_token.token
        kind = self.symbol_table.kind_of(let_var_name)
        index = self.symbol_table.index_of(let_var_name)
        segment_type = self.get_segment_type(kind)

        # ('[' expression ']')?
        if self.tokenizer.next_is([Tokens.LEFT_SQUARE_BRACKET]):
            # [
            self.compile_symbol([Tokens.LEFT_SQUARE_BRACKET])
            # expression
            self.compile_expression()
            # ]
            self.compile_symbol([Tokens.RIGHT_SQUARE_BRACKET])

            # VM a[i] のケースのみを考慮
            self.vmw.write_push(segment_type, index)
            self.vmw.write_arithmetic(ArithmeticType.ADD)
            self.vmw.write_pop(SegmentType.TEMP, 1)

            # =
            self.compile_symbol([Tokens.EQUAL])
            # expression
            self.compile_expression()

            # VM
            self.vmw.write_push(SegmentType.TEMP, 1)
            self.vmw.write_pop(SegmentType.POINTER, 1)
            self.vmw.write_pop(SegmentType.THAT, 0)

            # ;
            self.compile_symbol([Tokens.SEMICOLON])
        else:
            # =
            self.compile_symbol([Tokens.EQUAL])
            # expression
            self.compile_expression()
            # ;
            self.compile_symbol([Tokens.SEMICOLON])

            # VM
            self.vmw.write_pop(segment_type, index)

        self.write_element_end('letStatement')

    def compile_if_statement(self):
        self.write_element_start('ifStatement')

        label_first = self.get_label()
        label_last = self.get_label()

        # if
        self.compile_keyword([Tokens.IF])
        # (
        self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
        # expression
        self.compile_expression()
        # )
        self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])

        # VM
        self.vmw.write_arithmetic(ArithmeticType.NOT)
        self.vmw.write_if(label_first)

        # {
        self.compile_symbol([Tokens.LEFT_CURLY_BRACKET])
        # statements
        self.compile_statements()
        # }
        self.compile_symbol([Tokens.RIGHT_CURLY_BRACKET])

        # VM
        self.vmw.write_goto(label_last)
        self.vmw.write_label(label_first)

        # (else { statemens })
        if self.tokenizer.next_is([Tokens.ELSE]):
            # else
            self.compile_keyword([Tokens.ELSE])
            # {
            self.compile_symbol([Tokens.LEFT_CURLY_BRACKET])
            # statements
            self.compile_statements()
            # }
            self.compile_symbol([Tokens.RIGHT_CURLY_BRACKET])

        # VM
        self.vmw.write_label(label_last)

        self.write_element_end('ifStatement')

    def compile_while_statement(self):
        self.write_element_start('whileStatement')

        label_first = self.get_label()
        label_last = self.get_label()

        self.vmw.write_label(label_first)

        # while
        self.compile_keyword([Tokens.WHILE])
        # (
        self.compile_symbol([Tokens.LEFT_ROUND_BRACKET])
        # expression
        self.compile_expression()
        # )
        self.compile_symbol([Tokens.RIGHT_ROUND_BRACKET])

        # VM
        self.vmw.write_arithmetic(ArithmeticType.NOT)
        self.vmw.write_if(label_last)

        # {
        self.compile_symbol([Tokens.LEFT_CURLY_BRACKET])
        # statements
        self.compile_statements()
        # }
        self.compile_symbol([Tokens.RIGHT_CURLY_BRACKET])

        self.vmw.write_goto(label_first)
        self.vmw.write_label(label_last)

        self.write_element_end('whileStatement')

    def compile_do_statement(self):
        self.write_element_start('doStatement')

        # do
        self.compile_keyword([Tokens.DO])
        # subroutineCall
        self.compile_subroutine_call()
        # ;
        self.compile_symbol([Tokens.SEMICOLON])

        # VM
        self.vmw.write_pop(SegmentType.TEMP, 0)

        self.write_element_end('doStatement')

    def compile_return_statement(self):
        self.write_element_start('returnStatement')

        # return
        self.compile_keyword([Tokens.RETURN])
        # expression?
        if not self.tokenizer.next_is([Tokens.SEMICOLON]):
            self.compile_expression()
        else:
            self.vmw.write_push(SegmentType.CONST, 0)
        # ;
        self.compile_symbol([Tokens.SEMICOLON])

        # VM
        self.vmw.write_return()

        self.write_element_end('returnStatement')

    def compile_statement(self):
        if self.tokenizer.next_is([Tokens.LET]):
            self.compile_let_statement()
        elif self.tokenizer.next_is([Tokens.IF]):
            self.compile_if_statement()
        elif self.tokenizer.next_is([Tokens.WHILE]):
            self.compile_while_statement()
        elif self.tokenizer.next_is([Tokens.DO]):
            self.compile_do_statement()
        elif self.tokenizer.next_is([Tokens.RETURN]):
            self.compile_return_statement()
        else:
            self.raise_syntax_error(self.tokenizer.see_next())

    def compile_statements(self):
        self.write_element_start('statements')

        while self.tokenizer.next_is(
            [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN]):
            self.compile_statement()

        self.write_element_end('statements')

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] \
        or isinstance(self.tokenizer.see_next(), StringToken) \
        or isinstance(self.tokenizer.see_next(), IdentifierToken):

            self.kind = SymbolKind.ARG
            # type
            self.compile_type()
            self.var_type = self.tokenizer.current_token
            # varName
            self.compile_var_name(define=True,
                                  var_type=self.var_type,
                                  kind=self.kind)
            # (, type varName)*
            while self.tokenizer.next_is([Tokens.COMMA]):
                self.compile_symbol([Tokens.COMMA])
                self.compile_type()
                self.var_type = self.tokenizer.current_token
                self.compile_var_name(define=True,
                                      var_type=self.var_type,
                                      kind=self.kind)

        self.write_element_end('parameterList')

    def compile_op(self):
        self.compile_symbol(self.op_tokens)

    def compile_type(self):
        self.tokenizer.advance()
        if self.tokenizer.current_token in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] \
        or isinstance(self.tokenizer.current_token, IdentifierToken):
            self.write_element(self.tokenizer.current_token)
        else:
            self.raise_syntax_error(self.tokenizer.current_token)

    def compile_class_name(self):
        self.compile_identifier()
        self.subroutine_class_name = self.tokenizer.current_token

    def compile_subroutine_name(self):
        self.compile_identifier()
        self.subroutine_name = self.tokenizer.current_token

    def compile_var_name(self,
                         define=False,
                         var_type=None,
                         kind=None,
                         is_other=False):
        if define:
            self.symbol_table.define(self.tokenizer.see_next().token, var_type,
                                     kind)
        elif is_other:
            pass
        else:
            # VM
            kind = self.symbol_table.kind_of(self.tokenizer.see_next().token)
            index = self.symbol_table.index_of(self.tokenizer.see_next().token)
            segment_type = self.get_segment_type(kind)
            self.vmw.write_push(segment_type, index)

        self.compile_identifier()

    def compile_keyword(self, keyword_tokens):
        self.tokenizer.advance()
        if self.tokenizer.current_token in keyword_tokens:
            self.write_element(self.tokenizer.current_token)
        else:
            self.raise_syntax_error(self.tokenizer.current_token)

    def compile_symbol(self, keyword_tokens):
        self.tokenizer.advance()
        if self.tokenizer.current_token in keyword_tokens:
            self.write_element(self.tokenizer.current_token)
        else:
            self.raise_syntax_error(self.tokenizer.current_token)

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerToken):
            self.write_element(self.tokenizer.current_token)
        else:
            self.raise_syntax_error(self.tokenizer.current_token)

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringToken):
            self.write_element(self.tokenizer.current_token)
            # VM
            string = str(self.tokenizer.current_token)
            length = len(string)
            self.vmw.write_push(SegmentType.CONST, length)
            self.vmw.write_call('String.new', 1)
            for s in string:
                self.vmw.write_push(SegmentType.CONST, ord(s))
                self.vmw.write_call('String.appendChar', 2)
        else:
            self.raise_syntax_error(self.tokenizer.current_token)

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IdentifierToken):
            self.write_element(self.tokenizer.current_token)
        else:
            self.raise_syntax_error(self.tokenizer.current_token)

    def write_element_start(self, element_name):
        self.wf.write('<{}> \n'.format(element_name))

    def write_element(self, token):
        element_name = get_element(token.type)
        self.wf.write('<{}> {} </{}>\n'.format(element_name, token,
                                               element_name))

    def write_element_end(self, element_name):
        self.wf.write('</{}> \n'.format(element_name))

    def raise_syntax_error(self, token):
        raise ValueError('Invalid syntax of {}'.format(token))

    def get_kind(self, token):
        if token == Tokens.STATIC:
            return SymbolKind.STATIC
        elif token == Tokens.FIELD:
            return SymbolKind.FIELD
        else:
            return ValueError('Invalid token in get_kind.')

    def get_segment_type(self, kind):
        if kind == SymbolKind.STATIC:
            return SegmentType.STATIC
        elif kind == SymbolKind.FIELD:
            return SegmentType.THIS
        elif kind == SymbolKind.ARG:
            return SegmentType.ARG
        elif kind == SymbolKind.VAR:
            return SegmentType.LOCAL
        else:
            self.raise_syntax_error('Invalid kind and index error.')

    def get_label(self):
        self.label_number += 1
        return 'LABEL_{}'.format(self.label_number)
Exemple #17
0
class CompilationEngine(object):
   # the destination file for writing
   destination_file = None

   # the tokenizer for the input file
   tokenizer = None

   # symbol table
   symbol_table = None

   # vm writer
   vm_writer = None

   # the class name
   class_name = ""

   # indicies for if and while loops
   # start at -1 because we increment before use
   while_index = -1
   if_index = -1

   # the constructor for compiling a single class
   # the next method to be called after construction must be compile_class
   # source_filename must be a single file, not a directory
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)

   # compiles a complete class and closes the output file
   def compile_class(self):
      # class keyword
      tt, t = self._token_next(True, "KEYWORD", "class")

      # name of class
      tt, t = self._token_next(True, "IDENTIFIER")
      self.class_name = t

      # open brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # one or more variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["field", "static"]:
            self.compile_class_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # one or more subroutine declarations
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["constructor", "function", "method"]:
            self.compile_subroutine()
         else:
            # stop trying to process functions
            break

      # close brace
      # do not advance because we already advanced upon exiting the last loop
      tt, t = self._token_next(False, "SYMBOL", "}")

      # done with compilation; close the output file
      self.destination_file.close()

   # compiles a static declaration or field declaration
   def compile_class_var_dec(self):
      # compile the variable declaration
      # False means this is a class (not a subroutine)
      self.compile_var_dec(False)

   # compiles a complete method, function, or constructor
   def compile_subroutine(self):
      # start of subroutine
      self.symbol_table.start_subroutine()

      # constructor, function, or method keyword
      tt, type = self._token_next(False, "KEYWORD")

      # type of the return value
      # can be either keyword (void) or an identifier (any type)
      tt, t = self._token_next(True)

      # name of the method/function/constructor
      tt, name = self._token_next(True)
      name = self.class_name + "." + name

      # if the type is a method, "define" this as an argument, so the other
      # argument indexes work correctly
      if type == "method":
         self.symbol_table.define("this", self.class_name, SymbolTable.ARG)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # arguments
      self.tokenizer.advance()
      self.compile_parameter_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t == "var":
            self.compile_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # write the function
      num_locals = self.symbol_table.var_count(self.symbol_table.VAR)
      self.vm_writer.write_function(name, num_locals)

      # write any special code at the top of the function
      if type == "constructor":
         # code to allocate memory and set "this"
         size = self.symbol_table.var_count(self.symbol_table.FIELD)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("Memory.alloc", 1)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      elif type == "function":
         # nothing special
         pass
      elif type == "method":
         # put argument 0 into pointer 0 (this)
         self.vm_writer.write_push(self.vm_writer.ARG, 0)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      else:
         print "WARNING: Expected constructor, function, or name; got", type

      # statements
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      self.tokenizer.advance()

   # compiles a (possibly empty) parameter list, not including the enclosing
   # parentheses
   def compile_parameter_list(self):
      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # keyword (variable type)
            tt, type = self._token_next(False)

            # identifier (variable name)
            tt, name = self._token_next(True)

            # the kind is always an arg, since these are all parameters to the
            # function
            kind = SymbolTable.ARG

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            # possible comma
            tt, t = self._token_next(True)
            if tt != "SYMBOL" or t != ",":
               # not a comma; stop processing parameters
               break

            self.tokenizer.advance()

   # compiles a var declaration
   # if subroutine is true, only the var keyword can be used
   # if subroutine is false, only the static and field keywords can be used
   def compile_var_dec(self, subroutine=True):
      # the keyword to start the declaration
      tt, kind = self._token_next(False, "KEYWORD")

      # check for required types
      if subroutine:
         if kind == "var":
            kind = SymbolTable.VAR
         else:
            print "WARNING: expecting var, but received %s" % (str(kind))
      else:
         if kind == "static":
            kind = SymbolTable.STATIC
         elif kind == "field":
            kind = SymbolTable.FIELD
         else:
            print "WARNING: expecting static or field, but received %s" % (str(kind))

      # type of the declaration
      # could be an identifier or a keyword (int, etc)
      tt, type = self._token_next(True)

      # name of the declaration
      tt, name = self._token_next(True, "IDENTIFIER")

      # define the variable in the symbol table
      self.symbol_table.define(name, type, kind)

      # can support more than one identifier name, to declare more than one
      # variable, separated by commas; process the 2nd-infinite variables
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t == ",":
            # another variable name follows
            tt, name = self._token_next(True, "IDENTIFIER")

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            self.tokenizer.advance()
         else:
            # no more variable names
            break

      # should be on the semicolon at the end of the line
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a sequence of statements, not including the enclosing {}
   def compile_statements(self):
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]:
            # call compile_t, where t is the type of compilation we want
            token = getattr(self, "compile_" + t)()
         else:
            # not a statement; stop processing statements
            break

   # compiles a do statement
   def compile_do(self):
      # do keyword
      tt, t = self._token_next(False, "KEYWORD", "do")

      # subroutine call
      self.tokenizer.advance()
      self.compile_subroutine_call()

      # do statements do not have a return value, so eliminate the return
      # off of the stack
      self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a let statement
   def compile_let(self):
      # let keyword
      tt, t = self._token_next(False, "KEYWORD", "let")

      # variable name
      tt, name = self._token_next(True, "IDENTIFIER")

      # possible brackets for array
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == "[":
         # array - write operation
         array = True

         # compile the offset expression
         self.tokenizer.advance()
         self.compile_expression()

         # write the base address onto the stack
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_push(segment, index)

         # add base and offset
         self.vm_writer.write_arithmetic("add")

         # we cannot yet put the result into pointer 1, since the read
         # operation (which hasn't been parsed/computed yet) may use pointer 1
         # to read from an arrya value

         # closing bracket
         tt, t = self._token_next(False, "SYMBOL", "]")

         # advance to the next token, since we are expected to be on the = for
         # the next line
         self.tokenizer.advance()
      else:
         array = False

      # equals sign
      tt, t = self._token_next(False, "SYMBOL", "=")

      # expression
      self.tokenizer.advance()
      self.compile_expression()

      if array:
         # our stack now looks like this:
         #    TOP OF STACK
         #    computed result to store
         #    address in which value should be stored
         #    ... previous stuff ...

         # pop the computed value to temp 0
         self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

         # pop the array address to pointer 1 (that)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

         # put the computed value back onto the stack
         self.vm_writer.write_push(self.vm_writer.TEMP, 0)

         # pop to the variable name or the array reference
         self.vm_writer.write_pop(self.vm_writer.THAT, 0)
      else:
         # not an array - pop the expression to the variable
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_pop(segment, index)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a while statement
   def compile_while(self):
      # labels for this while loop
      self.while_index += 1
      while_start = "WHILE_START_%d" % (self.while_index)
      while_end = "WHILE_END_%d" % (self.while_index)

      # while keyword
      tt, t = self._token_next(False, "KEYWORD", "while")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # label for the start of the while statement
      self.vm_writer.write_label(while_start)

      # the expression that is the condition of the while statement
      self.tokenizer.advance()
      self.compile_expression()

      # the closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto to the end of the loop
      # to do this, negate and then call if-goto
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(while_end)

      # the opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # the statments that is the body of the while loop
      self.tokenizer.advance()
      self.compile_statements()

      # the closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      # after the last statement of the while loop
      # need to jump back up to the top of the loop to evaluate again
      self.vm_writer.write_goto(while_start)

      # label at the end of the loop
      self.vm_writer.write_label(while_end)

      self.tokenizer.advance()

   # compiles a return statement
   def compile_return(self):
      # return keyword
      tt, t = self._token_next(False, "KEYWORD", "return")

      # possible expression to return
      tt, t = self._token_next(True)
      if tt != "SYMBOL" and t != ";":
         self.compile_expression()
      else:
         # no return expression; return 0
         self.vm_writer.write_push(self.vm_writer.CONST, 0)

      # ending semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.vm_writer.write_return()

      self.tokenizer.advance()

   # compiles a if statement, including a possible trailing else clause
   def compile_if(self):
      # it is more efficient in an if-else case to have the else portion first
      # in the code when testing, but we use the less-efficient but
      # easier-to-write true-false pattern here

      # labels for this if statement
      self.if_index += 1
      if_false = "IF_FALSE_%d" % (self.if_index)
      if_end = "IF_END_%d" % (self.if_index)

      # if keyword
      tt, t = self._token_next(False, "KEYWORD", "if")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # expression of if statement
      self.tokenizer.advance()
      self.compile_expression()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto the false label
      # if true, fall through to executing code
      # if there is no else, then false and end are the same, but having two
      # labels does not increase code size
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(if_false)

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # statements for true portion
      self.tokenizer.advance()
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      tt, t = self._token_next(True)
      if tt == "KEYWORD" and t == "else":
         # else statement exists

         # goto the end of the if statement at the end of the true portion
         self.vm_writer.write_goto(if_end)

         # label for the start of the false portion
         self.vm_writer.write_label(if_false)

         # opening brace
         tt, t = self._token_next(True, "SYMBOL", "{")

         # statements
         self.tokenizer.advance()
         self.compile_statements()

         # closing brace
         tt, t = self._token_next(False, "SYMBOL", "}")

         # end label
         self.vm_writer.write_label(if_end)

         # advance tokenizer only if we are in the else, since otherwise the
         # token was advanced by the else check
         self.tokenizer.advance()
      else:
         # no else portion; only put in a label for false, since end is not
         # used
         self.vm_writer.write_label(if_false)

   # compiles an expression (one or more terms connected by operators)
   def compile_expression(self):
      # the first term
      self.compile_term()

      # finish any number of operators followed by terms
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t in "+-*/&|<>=":
            # found an operator
            # postfix order - add the next term and then do the operator

            # the next term
            self.tokenizer.advance()
            self.compile_term()

            # the operator
            if t == "+":
               self.vm_writer.write_arithmetic("add")
            if t == "-":
               self.vm_writer.write_arithmetic("sub")
            if t == "=":
               self.vm_writer.write_arithmetic("eq")
            if t == ">":
               self.vm_writer.write_arithmetic("gt")
            if t == "<":
               self.vm_writer.write_arithmetic("lt")
            if t == "&":
               self.vm_writer.write_arithmetic("and")
            if t == "|":
               self.vm_writer.write_arithmetic("or")
            if t == "*":
               self.vm_writer.write_call("Math.multiply", 2)
            if t == "/":
               self.vm_writer.write_call("Math.divide", 2)
         else:
            # no term found; done parsing the expression
            break

   # compiles a term
   # this routine is faced with a slight difficulty when trying to decide
   # between some of the alternative parsing rules. specifically, if the
   # current token is an identifier, the routine must distinguish between a
   # variable, an array entry, and a subroutine call. a single lookahead token,
   # which may be one of [, (, or ., suffices to distinguish between the three
   # possibilities. any other token is not part of this term and should not
   # be advanced over.
   def compile_term(self):
      # a term: integer_constant | string_constant | keyword_constant |
      # varname | varname[expression] | subroutine_call | (expression) |
      # unary_op term
      tt, t = self._token_next(False)
      if tt == "INT_CONST":
         self.vm_writer.write_push(self.vm_writer.CONST, t)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "STRING_CONST":
         # after this portion is run, a pointer to a string should be on the
         # stack
         # we create a new string of a certain size and then append characters
         # one by one; each append operation returns the pointer to the same
         # string

         # create the string
         # string is a len, data tuple; not null-terminated
         size = len(t)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("String.new", 1)

         # append each character
         for char in t:
            self.vm_writer.write_push(self.vm_writer.CONST, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "KEYWORD":
         if t == "true":
            # true is -1, which is 0 negated
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
            self.vm_writer.write_arithmetic("not")
         elif t == "false" or t == "null":
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
         elif t == "this":
            self.vm_writer.write_push(self.vm_writer.POINTER, 0)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "SYMBOL" and t == "(":
         # ( expression )

         # parse the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing parenthesis
         tt, t = self._token_next(False, "SYMBOL", ")")

         # advance for the next statement
         self.tokenizer.advance()

      elif tt == "SYMBOL" and t in "-~":
         # unary_op term
         # postfix order - add the next term and then do the operator

         # parse the rest of the term
         self.tokenizer.advance()
         self.compile_term()

         # write the unary operation
         if t == "-":
            self.vm_writer.write_arithmetic("neg")
         elif t == "~":
            self.vm_writer.write_arithmetic("not")

      elif tt == "IDENTIFIER":
         # varname, varname[expression], subroutine_call

         # do not write the identifer yet

         # get the next bit of the expression
         # if it is a [, then array; if it is a ( or ., then subroutine call
         # if none of above, then pass over
         tt2, t2 = self._token_next(True)

         if tt2 == "SYMBOL" and t2 in "(.":
            # subroutine call
            # back up and then compile the subroutine call
            self.tokenizer.retreat()

            self.compile_subroutine_call()
         elif tt2 == "SYMBOL" and t2 == "[":
            # array - read operation

            # write the base address onto the stack
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

            # compile the offset expression
            self.tokenizer.advance()
            self.compile_expression()

            # add base and offset
            self.vm_writer.write_arithmetic("add")

            # put the resulting address into pointer 1 (that)
            self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

            # read from that 0 onto the stack
            self.vm_writer.write_push(self.vm_writer.THAT, 0)

            # closing bracket
            tt, t = self._token_next(False, "SYMBOL", "]")

            # advance for the next statement
            self.tokenizer.advance()
         else:
            # none of above - just a single identifier
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

      else:
         # unknown
         print "WARNING: Unknown term expression object:", tt, t

   # compiles a (possible empty) comma-separated list of expressions
   def compile_expression_list(self):
      num_args = 0

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # expression to pass
            self.compile_expression()
            num_args += 1

            # possible comma
            tt, t = self._token_next(False)
            if tt == "SYMBOL" and t == ",":
               self.tokenizer.advance()
            else:
               # not a comma; stop processing parameters
               break

      return num_args

   # compiles a subroutine call
   # two cases:
   # - subroutineName(expressionList)
   # - (class|var).subroutineName(expressionList)
   def compile_subroutine_call(self):
      # first part of name
      tt, name1 = self._token_next(False, "IDENTIFIER")

      # a dot and another name may exist, or it could be a parenthesis
      name2 = None
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == ".":
         # the name after the dot
         tt, name2 = self._token_next(True, "IDENTIFIER")

         # advance so that we are on the parenthesis
         self.tokenizer.advance()

      # determine if this is a method call
      # three possibilities
      # - class.func() - function call
      # - var.func()   - method call
      # - func()       - method call on current object
      if self.symbol_table.contains(name1):
         method_call = True
         local_call = False
      elif name2 == None:
         method_call = True
         local_call = True
      else:
         method_call = False

      # if a method call, push variable name1
      # this a method call if the symbol table contains name1 and name2 exists
      # OR name1 is a method in the current object
      if method_call and local_call:
         # push the current object onto the stack as a hidden argument
         self.vm_writer.write_push(self.vm_writer.POINTER, 0)
      elif method_call and not local_call:
         # push the variable onto the stack as a hidden argument
         segment, index = self._resolve_symbol(name1)
         self.vm_writer.write_push(segment, index)

      # opening parenthesis
      tt, t = self._token_next(False, "SYMBOL", "(")

      # expression list
      self.tokenizer.advance()
      num_args = self.compile_expression_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # write the call
      if method_call and local_call:
         # methd + <blank>

         # get the name of the vm function to call
         classname = self.class_name
         vm_function_name = classname + "." + name1

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      elif method_call and not local_call:
         # variable name + method

         # get the name of the vm function to call
         classname = self.symbol_table.get(name1)[1]
         vm_function_name = classname + "." + name2

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)
      else:
         # get the name of the vm function to call
         vm_function_name = name1 + "." + name2

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      self.tokenizer.advance()

   # returns the token_type and token of the next token after advancing the
   # tokenizer before reading if advance is True
   def _token_next(self, advance=False, expected_type=None, expected_value=None):
      # advance the tokenizer, if requested
      if advance:
         self.tokenizer.advance()

      # get the token type and the token itself
      token_type = self.tokenizer.token_type()
      token = str(getattr(self.tokenizer, token_type.lower())())

      if expected_type and token_type != expected_type:
         print "WARNING: Type", token_type, "found; expected", expected_type
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)
      if expected_value and token != expected_value:
         print "WARNING: Value", token, "found; expected", expected_value
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)

      return token_type, token

   # convets a symbol table type into a segment type
   def _type_to_segment(self, type):
      if type == self.symbol_table.STATIC:
         return self.vm_writer.STATIC
      elif type == self.symbol_table.FIELD:
         return self.vm_writer.THIS
      elif type == self.symbol_table.ARG:
         return self.vm_writer.ARG
      elif type == self.symbol_table.VAR:
         return self.vm_writer.LOCAL
      else:
         print "ERROR: Bad type %s" % (str(type))
 
   # resolves the symbol from the symbol table
   # the segment and index is returned as a 2-tuple
   def _resolve_symbol(self, name):
      kind, type, index = self.symbol_table.get(name)
      return self._type_to_segment(kind), index
class CompilationEngine:
    """NOTE remember that "is_xxx()" checks on the next token,
    and load the next token to curr_token before starting sub-methods
    using "load_next_token()" and you can use values with it
    """
    def __init__(self, jack_file):
        self.vm_writer = VMWriter(jack_file)
        self.tokenizer = JackTokenizer(jack_file)
        self.symbol_table = SymbolTable()

        self.if_index = -1
        self.while_index = -1

    # 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        #! Beginning of all
        # * save name of the class and move on
        self.load_next_token()  # 'class'
        self.class_name = self.load_next_token()  # className
        self.load_next_token()  # curr_token = '{'

        # while next token == 'static' | 'field',
        while self.is_class_var_dec():  # check next token
            self.compile_class_var_dec()  # classVarDec*
        # while next_token == constructor | function | method
        while self.is_subroutine_dec():
            self.compile_subroutine()  # subroutineDec*
        self.vm_writer.close()

    # ('static' | 'field' ) type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        kind = self.load_next_token()  # curr_token = static | field
        type = self.load_next_token()  # curr_token = type
        name = self.load_next_token()  # curr_token = varName
        self.symbol_table.define(name, type, kind.upper())
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, kind.upper())
        self.load_next_token()  # ';'
        # next_token = 'constructor' | 'function' | 'method'

    # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine(self):
        subroutine_kind = (self.load_next_token()
                           )  # ('constructor' | 'function' | 'method')
        self.load_next_token()  # ('void' | type)
        subroutine_name = self.load_next_token()  # subroutineName

        self.symbol_table.start_subroutine()  # init subroutine table
        if subroutine_kind == "method":
            self.symbol_table.define("instance", self.class_name, "ARG")

        self.load_next_token()  # curr_token '('
        self.compile_parameter_list()  # parameterList
        # next_token == ')' when escaped
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        while self.check_next_token() == "var":
            self.compile_var_dec()  # varDec*
        # NOTE next_token is neither 'var' or ';'
        # NOTE next_token is statements* (zero or more)

        # ANCHOR actual writing
        func_name = f"{self.class_name}.{subroutine_name}"  # Main.main
        num_locals = self.symbol_table.counts["VAR"]  # get 'var' count
        self.vm_writer.write_function(func_name, num_locals)
        if subroutine_kind == "constructor":
            num_fields = self.symbol_table.counts["FIELD"]
            self.vm_writer.write_push("CONST", num_fields)
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop("POINTER", 0)
        elif subroutine_kind == "method":
            self.vm_writer.write_push("ARG", 0)
            self.vm_writer.write_pop("POINTER", 0)

        # NOTE statement starts here
        self.compile_statements()  # statements
        self.load_next_token()  # '}

        # ( (type varName) (',' type varName)*)?

    def compile_parameter_list(self):
        # curr_token == '('
        if self.check_next_token() != ")":
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        while self.check_next_token() != ")":
            self.load_next_token()  # ','
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        # NOTE param compilation finishes when next_token == ')'

        # 'var' type varName (',' varName)* ';'

    def compile_var_dec(self):
        self.load_next_token()  # 'var'
        type = self.load_next_token()  # type
        name = self.load_next_token()  #  # varName
        self.symbol_table.define(name, type, "VAR")
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "VAR")
        self.load_next_token()  # ';'

    # statement*
    # letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def compile_statements(self):
        # if next_token == let | if | while | do | return
        while self.is_statement():
            statement = (self.load_next_token()
                         )  # curr_token == let | if | while | do | return
            if statement == "let":
                self.compile_let()
            elif statement == "if":
                self.compile_if()
            elif statement == "while":
                self.compile_while()
            elif statement == "do":
                self.compile_do()
            elif statement == "return":
                self.compile_return()

        # 'let' varName ('[' expression ']')? '=' expression ';'

    def compile_let(self):
        var_name = self.load_next_token()  # curr_token == varName
        var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
        var_index = self.symbol_table.index_of(var_name)
        # if next_token == "["
        if self.is_array():  # array assignment
            self.load_next_token()  # curr_token == '['
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ']'
            self.vm_writer.write_push(var_kind, var_index)
            self.vm_writer.write_arithmetic("ADD")

            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ';'
            #! POP TEMP and PUSH TEMP location changed
            self.vm_writer.write_pop("TEMP", 0)
            self.vm_writer.write_pop("POINTER", 1)
            self.vm_writer.write_push("TEMP", 0)
            self.vm_writer.write_pop("THAT", 0)
        else:  # regular assignment
            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # ';'
            self.vm_writer.write_pop(var_kind, var_index)

    # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
    def compile_if(self):
        # curr_token == if
        self.if_index += 1
        if_index = self.if_index
        # TODO IF indexes count separately
        self.load_next_token()  # curr_token == '('
        self.compile_expression()  # expression
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        # S = statement, L = label
        self.vm_writer.write_if(f"IF_TRUE{if_index}")  #! if-goto L1
        self.vm_writer.write_goto(f"IF_FALSE{if_index}")  #! goto L2
        self.vm_writer.write_label(f"IF_TRUE{if_index}")  #! label L1
        self.compile_statements()  # statements #! executing S1
        self.vm_writer.write_goto(f"IF_END{if_index}")  #! goto END
        self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_FALSE{if_index}")  #! label L2
        if self.check_next_token() == "else":  # ( 'else' '{' statements '}' )?
            self.load_next_token()  # 'else'
            self.load_next_token()  # '{'
            self.compile_statements()  # statements #! executing S2
            self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_END{if_index}")

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        # curr_token == while
        self.while_index += 1
        while_index = self.while_index
        self.vm_writer.write_label(f"WHILE{while_index}")
        self.load_next_token()  # '('
        self.compile_expression()  # expression
        self.vm_writer.write_arithmetic("NOT")  # eval false condition first
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        self.vm_writer.write_if(f"WHILE_END{while_index}")
        self.compile_statements()  # statements
        self.vm_writer.write_goto(f"WHILE{while_index}")
        self.vm_writer.write_label(f"WHILE_END{while_index}")
        self.load_next_token()  # '}'

        # 'do' subroutineCall ';'

    def compile_do(self):
        # curr_token == do
        self.load_next_token()  #! to sync with compile_term()
        self.compile_subroutine_call()
        self.vm_writer.write_pop("TEMP", 0)
        self.load_next_token()  # ';'

        # 'return' expression? ';'

    def compile_return(self):
        # curr_token == return
        if self.check_next_token() != ";":
            self.compile_expression()
        else:
            self.vm_writer.write_push("CONST", 0)
        self.vm_writer.write_return()
        self.load_next_token()  # ';'

    # term (op term)*
    def compile_expression(self):
        self.compile_term()  # term
        while self.is_op():  # (op term)*
            op: str = self.load_next_token()  # op
            self.compile_term()  # term
            if op in ARITHMETIC.keys():
                self.vm_writer.write_arithmetic(ARITHMETIC[op])
            elif op == "*":
                self.vm_writer.write_call("Math.multiply", 2)
            elif op == "/":
                self.vm_writer.write_call("Math.divide", 2)

    # integerConstant | stringConstant | keywordConstant | varName |
    # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
    def compile_term(self):
        # if next_token == '~' | '-'
        if self.is_unary_op_term():
            unary_op = self.load_next_token()  # curr_token == '~' | '-'
            self.compile_term()  # term (recursive)
            self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op])
        # if next_token == '(' => '(' expression ')'
        elif self.check_next_token() == "(":
            self.load_next_token()  # '('
            self.compile_expression()  # expression
            self.load_next_token()  # ')'
        # if next_token == INTEGER(const)
        elif self.check_next_type() == "INT_CONST":  # integerConstant
            self.vm_writer.write_push("CONST", self.load_next_token())  # )
        # if next_token == STRING(const)
        elif self.check_next_type() == "STRING_CONST":  # stringConstant
            self.compile_string()
        # if next_token == KEYWORD(const)
        elif self.check_next_type() == "KEYWORD":  # keywordConstant
            self.compile_keyword()
        # varName | varName '[' expression ']' | subroutineCall
        else:
            #! (varName | varName for expression | subroutine)'s base
            var_name = self.load_next_token(
            )  # curr_token = varName | subroutineCall
            # (e.g. Screen.setColor | show() )
            #! next_token == '[' | '(' or '.' | just varName
            # varName '[' expression ']'
            if self.is_array():  # if next_token == '['
                self.load_next_token()  # '['
                self.compile_expression()  # expression
                self.load_next_token()  # ']'
                array_kind = self.symbol_table.kind_of(var_name)
                array_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(CONVERT_KIND[array_kind],
                                          array_index)
                self.vm_writer.write_arithmetic("ADD")
                self.vm_writer.write_pop("POINTER", 1)
                self.vm_writer.write_push("THAT", 0)
            # if next_token == "(" | "." => curr_token == subroutineCall

            #! if varName is not found, assume class or function name
            elif self.is_subroutine_call():
                # NOTE curr_token == subroutineName | className | varName
                self.compile_subroutine_call()
            # varName
            else:
                # curr_token == varName
                # FIXME cannot catch subroutine call and pass it to 'else' below
                # TODO error caught on Math.abs() part on Ball.vm
                var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
                var_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(var_kind, var_index)

    # subroutineCall: subroutineName '(' expressionList ')' |
    # ( className | varName) '.' subroutineName '(' expressionList ')'

    # e.g.) (do) game.run()
    # ! in case of 'do' order is different from 'let game = Class.new()'
    def compile_subroutine_call(self):
        # NOTE curr_token == subroutineName | className | varName
        subroutine_caller = self.get_curr_token()
        function_name = subroutine_caller
        # _next_token()  # FIXME now it loads '.' or '('
        # func_name = identifier
        number_args = 0
        #! '.' or '(' 2 cases
        if self.check_next_token() == ".":
            self.load_next_token()  # curr_token == '.'
            subroutine_name = self.load_next_token(
            )  # curr_token == subroutineName
            type = self.symbol_table.type_of(subroutine_caller)
            if type != "NONE":  # it's an instance
                kind = self.symbol_table.kind_of(subroutine_caller)
                index = self.symbol_table.index_of(subroutine_caller)
                self.vm_writer.write_push(CONVERT_KIND[kind], index)
                function_name = f"{type}.{subroutine_name}"
                number_args += 1
            else:  # it's a class
                class_name = subroutine_caller
                function_name = f"{class_name}.{subroutine_name}"
        elif self.check_next_token() == "(":
            subroutine_name = subroutine_caller
            function_name = f"{self.class_name}.{subroutine_name}"
            number_args += 1
            self.vm_writer.write_push("POINTER", 0)
        self.load_next_token()  # '('
        number_args += self.compile_expression_list()  # expressionList
        self.load_next_token()  # ')'
        self.vm_writer.write_call(function_name, number_args)

    # (expression (',' expression)* )?
    def compile_expression_list(self):
        number_args = 0
        if self.check_next_token() != ")":
            number_args += 1
            self.compile_expression()
        while self.check_next_token() != ")":
            number_args += 1
            self.load_next_token()  # curr_token == ','
            self.compile_expression()
        return number_args

    def compile_string(self):
        string = self.load_next_token()  # curr_token == stringConstant
        self.vm_writer.write_push("CONST", len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push("CONST", ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def compile_keyword(self):
        keyword = self.load_next_token()  # curr_token == keywordConstant
        if keyword == "this":
            self.vm_writer.write_push("POINTER", 0)
        else:
            self.vm_writer.write_push("CONST", 0)
            if keyword == "true":
                self.vm_writer.write_arithmetic("NOT")

    def is_subroutine_call(self):
        return self.check_next_token() in [".", "("]

    def is_array(self):
        return self.check_next_token() == "["

    def is_class_var_dec(self):
        return self.check_next_token() in ["static", "field"]

    def is_subroutine_dec(self):
        return self.check_next_token() in ["constructor", "function", "method"]

    def is_statement(self):
        return self.check_next_token() in [
            "let", "if", "while", "do", "return"
        ]

    def is_op(self):
        return self.check_next_token() in [
            "+", "-", "*", "/", "&", "|", "<", ">", "="
        ]

    def is_unary_op_term(self):
        return self.check_next_token() in ["~", "-"]

    def check_next_token(self):
        return self.tokenizer.next_token[1]

    def check_next_type(self):
        return self.tokenizer.next_token[0]

    def get_curr_token(self):
        return self.tokenizer.curr_token[1]

    def load_next_token(self):
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()  # curr_token = next_token
            return self.tokenizer.curr_token[1]
        else:
            return ""
Exemple #19
0
class CompilationEngine:
    def __init__(self, tokenizer, vm_writer):
        self.tokenizer = tokenizer
        self.st = SymbolTable()
        self.vm_writer = vm_writer
        self.class_name = ""

    def compile(self):
        self.compile_class()

    def compile_class(self):
        self.tokenizer.advance()

        # eat 'class' keyword
        self.eat(CLASS)

        # eat class name
        self.class_name = self.tokenizer.current_token
        self.eat(self.tokenizer.current_token)

        # eat opening brace
        self.eat("{")

        # compile class variable declarations
        self.compile_class_var_dec()

        # compile class subroutines
        self.compile_subroutine_dec()

        # eat closing brace
        self.eat("}")

    def compile_class_var_dec(self):
        while self.tokenizer.current_token in [STATIC, FIELD]:
            kind = self.tokenizer.current_token

            # eat keyword 'static' or 'field'
            self.eat(self.tokenizer.current_token)

            # eat variable declaration sequence e.g. int x, y, z
            # eat var type
            type = self.tokenizer.current_token
            self.eat(self.tokenizer.current_token)
            # eat var name
            name = self.tokenizer.current_token
            self.st.define(name, type, kind)
            self.eat(self.tokenizer.current_token)
            while self.tokenizer.current_token == ",":
                self.eat(",")
                self.st.define(self.tokenizer.current_token, type, kind)
                self.eat(self.tokenizer.current_token)  # eat varName

            # eat terminating semi-colon
            self.eat(";")

    def compile_subroutine_dec(self):
        while self.tokenizer.current_token in [CONSTRUCTOR, FUNCTION, METHOD]:
            self.st.start_subroutine()

            # eat subroutine type: method, function or constructor
            subroutine_type = self.tokenizer.current_token
            self.eat(self.tokenizer.current_token)

            # eat subroutine return type
            self.eat(self.tokenizer.current_token)

            # eat subroutine name
            subroutine_name = self.class_name + "." + self.tokenizer.current_token
            self.eat(self.tokenizer.current_token)

            # compile parameter list and add args to symbol table
            self.compile_parameter_list(subroutine_type)

            self.eat("{")

            # compile func local VARs and add them to symbol table
            # output subroutine type specific VM code
            self.compile_subroutine_header(subroutine_name, subroutine_type)

            # compile subroutine statements
            self.compile_statements()

            self.eat("}")

    def compile_subroutine_header(self, subroutine_name, subroutine_type):
        # compile local variable declarations
        while self.tokenizer.current_token == VAR:
            # eat keyword 'var'
            self.eat(VAR)

            # eat variable declaration sequence e.g. int x, y, z
            # eat var type
            type = self.tokenizer.current_token
            self.eat(self.tokenizer.current_token)
            # eat var name
            name = self.tokenizer.current_token
            self.st.define(name, type, VAR)
            self.eat(self.tokenizer.current_token)
            while self.tokenizer.current_token == ",":
                self.eat(",")
                self.st.define(self.tokenizer.current_token, type, VAR)
                self.eat(self.tokenizer.current_token)  # eat varName

            # eat terminating semicolon
            self.eat(";")

        self.vm_writer.write_function(subroutine_name, self.st.var_count(VAR))

        # add subroutine type specific VM code
        if subroutine_type == CONSTRUCTOR:
            # allocate memory for the object being constructed
            self.vm_writer.write_push(VM_CONST, self.st.var_count(FIELD))
            self.vm_writer.write_call("Memory.alloc", "1")
            self.vm_writer.write_pop(VM_POINTER, "0")
        elif subroutine_type == METHOD:
            # anchor THIS to the current object
            self.vm_writer.write_push(VM_ARGUMENT, "0")
            self.vm_writer.write_pop(VM_POINTER, "0")

    def compile_parameter_list(self, subroutine_type):
        if subroutine_type == METHOD:  # arg0 in a method is always THIS object
            self.st.define(
                "thisObject", "type", ARG
            )  # dummy entry, just to make sure all other method args will begin indexing from 1
        self.eat("(")
        if self.tokenizer.current_token != ")":
            # eat var type
            type = self.tokenizer.current_token
            self.eat(self.tokenizer.current_token)
            # eat var name
            name = self.tokenizer.current_token
            self.st.define(name, type, ARG)
            self.eat(self.tokenizer.current_token)

            while self.tokenizer.current_token == ",":
                self.eat(",")
                # eat var type
                type = self.tokenizer.current_token
                self.eat(self.tokenizer.current_token)
                # eat var name
                name = self.tokenizer.current_token
                self.st.define(name, type, ARG)
                self.eat(self.tokenizer.current_token)
        self.eat(")")

    def compile_statements(self):
        statement_map = {
            LET: self.compile_let_statement,
            IF: self.compile_if_statement,
            WHILE: self.compile_while_statement,
            DO: self.compile_do_statement,
            RETURN: self.compile_return_statement
        }
        while self.tokenizer.current_token in [LET, IF, WHILE, DO, RETURN]:
            statement_map[self.tokenizer.current_token]()

    def compile_let_statement(self):
        # eat 'let' keyword
        self.eat(LET)

        # eat variable name
        var_name = self.tokenizer.current_token
        self.eat(var_name)

        # eat potential array indexing expression
        if self.tokenizer.current_token == "[":
            self.eat("[")
            self.compile_expression()
            self.eat("]")
            self.vm_writer.write_push(segment_map[self.st.kind_of(var_name)],
                                      self.st.index_of(var_name))
            self.vm_writer.write_arithmetic(VM_ADD)
            self.eat("=")
            self.compile_expression()
            self.eat(";")
            self.vm_writer.write_pop(VM_TEMP, "0")
            self.vm_writer.write_pop(VM_POINTER, "1")
            self.vm_writer.write_push(VM_TEMP, "0")
            self.vm_writer.write_pop(VM_THAT, "0")
        else:
            self.eat("=")
            self.compile_expression()
            self.eat(";")
            self.vm_writer.write_pop(segment_map[self.st.kind_of(var_name)],
                                     self.st.index_of(var_name))

    def compile_do_statement(self):
        self.eat(DO)
        self.compile_subroutine_call()
        # pop and ignore the returned value
        self.vm_writer.write_pop(VM_TEMP, 0)
        self.eat(";")

    def compile_subroutine_call(self):
        # eat identifier
        identifier = self.tokenizer.current_token
        self.eat(identifier)
        nArgs = 0
        if self.tokenizer.current_token == "(":  # 'subroutine_name()' is always a method call
            self.vm_writer.write_push(VM_POINTER, "0")

            # eat arguments
            self.eat("(")
            if self.tokenizer.is_valid_term(self.tokenizer.current_token):
                self.compile_expression()
                nArgs += 1
            while self.tokenizer.current_token == ",":
                self.eat(",")
                self.compile_expression()
                nArgs += 1
            self.eat(")")

            self.vm_writer.write_call(self.class_name + "." + identifier,
                                      nArgs + 1)
        elif self.tokenizer.current_token == ".":
            is_method_call = self.st.type_of(identifier) is not None

            if is_method_call:
                self.vm_writer.write_push(
                    segment_map[self.st.kind_of(identifier)],
                    self.st.index_of(identifier))

            self.eat(".")

            # eat subroutine name
            subroutine_name = self.tokenizer.current_token
            self.eat(subroutine_name)

            # eat arguments
            self.eat("(")
            if self.tokenizer.is_valid_term(self.tokenizer.current_token):
                self.compile_expression()
                nArgs += 1
            while self.tokenizer.current_token == ",":
                self.eat(",")
                self.compile_expression()
                nArgs += 1
            self.eat(")")

            prefix = identifier
            if is_method_call:
                nArgs += 1
                prefix = self.st.type_of(identifier)

            self.vm_writer.write_call(prefix + "." + subroutine_name, nArgs)

    def compile_if_statement(self):
        L1 = str(uuid.uuid4().hex)
        L2 = str(uuid.uuid4().hex)
        self.eat(IF)
        self.eat("(")
        self.compile_expression()
        self.vm_writer.write_arithmetic(VM_NOT)
        self.vm_writer.write_if(L1)
        self.eat(")")
        self.eat("{")
        self.compile_statements()
        self.eat("}")
        self.vm_writer.write_goto(L2)
        self.vm_writer.write_label(L1)
        if self.tokenizer.current_token == ELSE:
            self.eat(ELSE)
            self.eat("{")
            self.compile_statements()
            self.eat("}")
        self.vm_writer.write_label(L2)

    def compile_while_statement(self):
        L1 = str(uuid.uuid4().hex)
        L2 = str(uuid.uuid4().hex)
        self.vm_writer.write_label(L1)
        self.eat(WHILE)
        self.eat("(")
        self.compile_expression()
        self.vm_writer.write_arithmetic(VM_NOT)
        self.vm_writer.write_if(L2)
        self.eat(")")
        self.eat("{")
        self.compile_statements()
        self.vm_writer.write_goto(L1)
        self.eat("}")
        self.vm_writer.write_label(L2)

    def compile_return_statement(self):
        self.eat(RETURN)
        if self.tokenizer.current_token != ";":
            self.compile_expression()
        else:
            self.vm_writer.write_push(VM_CONST, 0)
        self.eat(";")
        self.vm_writer.write_return()

    def compile_expression(self):
        self.compile_term()
        while self.tokenizer.current_token in operation_map:
            op = operation_map[self.tokenizer.current_token]
            self.eat(self.tokenizer.current_token)
            self.compile_term()
            self.vm_writer.write_arithmetic(op)

    def compile_term(self):
        if lexical_elements.is_int_constant(self.tokenizer.current_token):
            self.compile_int_constant()
        elif lexical_elements.is_string_constant(self.tokenizer.current_token):
            self.compile_string_constant()
        elif self.tokenizer.is_keyword_constant(self.tokenizer.current_token):
            self.compile_keyword_constant()
        elif self.tokenizer.current_token == "(":
            self.eat("(")
            self.compile_expression()
            self.eat(")")
        elif self.tokenizer.current_token == "-" or self.tokenizer.current_token == "~":
            self.compile_unary()
        elif lexical_elements.is_identifier(self.tokenizer.current_token):
            if self.tokenizer.peek() == "[":
                self.compile_array_expression()
            elif self.tokenizer.peek() == "(" or self.tokenizer.peek() == ".":
                self.compile_subroutine_call()
            else:
                var_name = self.tokenizer.current_token
                self.eat(var_name)
                self.vm_writer.write_push(
                    segment_map[self.st.kind_of(var_name)],
                    self.st.index_of(var_name))

    def compile_int_constant(self):
        self.vm_writer.write_push(VM_CONST, self.tokenizer.current_token)
        self.eat(self.tokenizer.current_token)

    def compile_string_constant(self):
        constant = self.tokenizer.current_token.replace('"', '')
        self.eat(self.tokenizer.current_token)
        self.vm_writer.write_push(VM_CONST, len(constant))
        self.vm_writer.write_call("String.new", 1)
        for c in constant:
            self.vm_writer.write_push(VM_CONST, ord(c))
            self.vm_writer.write_call("String.appendChar", "2")

    def compile_keyword_constant(self):
        if self.tokenizer.current_token == TRUE:
            self.vm_writer.write_push(VM_CONST, "1")
            self.vm_writer.write_arithmetic(VM_NEG)
        elif self.tokenizer.current_token == THIS:
            self.vm_writer.write_push(VM_POINTER, 0)
        else:
            self.vm_writer.write_push(VM_CONST,
                                      "0")  # handles both FALSE and NULL
        self.eat(self.tokenizer.current_token)

    def compile_unary(self):
        op = VM_NEG
        if self.tokenizer.current_token == "~":
            op = VM_NOT
        self.eat(self.tokenizer.current_token)
        self.compile_term()
        self.vm_writer.write_arithmetic(op)

    def compile_array_expression(self):
        array = self.tokenizer.current_token
        self.eat(self.tokenizer.current_token)
        self.eat("[")
        self.compile_expression()
        self.eat("]")
        self.vm_writer.write_push(segment_map[self.st.kind_of(array)],
                                  self.st.index_of(array))
        self.vm_writer.write_arithmetic(VM_ADD)
        self.vm_writer.write_pop(VM_POINTER, "1")  # anchor THAT to array entry
        self.vm_writer.write_push(
            VM_THAT, "0")  # push result of array evaluation to stack

    def eat(self, token):
        current_token = self.tokenizer.current_token
        if current_token != token:
            raise CompilationError(
                "Expected to find token '{0:}' but found '{1:}'".format(
                    token, current_token))
        self.tokenizer.advance()
Exemple #20
0
class CompilationEngine:
    '''The brain of the Jack syntax analyzer'''

    # Constructor
    def __init__(self, tokenizer: JackTokenizer, out_path: Path):
        self.tokenizer = tokenizer

        # Create symbol tables
        self.class_level_st = SymbolTable()
        self.subroutine_level_st = SymbolTable()

        # class's name
        self.class_name = None
        self.func_name = None
        self.sub_type = None

        # Open the output file for writing
        self.out_stream = out_path.open('w')

        # Create a new VM writer for writing
        self.vm_writer = VMWriter(out_path.with_suffix(".vm"))

        # For generating labels
        self.label_count = {"if": 0, "while": 0}

    def get_if_labels(self):
        self.label_count["if"] += 1
        return (f"LABEL_IF_{self.label_count['if'] - 1}_1",
                f"LABEL_IF_{self.label_count['if'] - 1}_2")

    def get_while_labels(self):
        self.label_count["while"] += 1
        return (f"LABEL_WHILE_{self.label_count['while'] - 1}_1",
                f"LABEL_WHILE_{self.label_count['while'] - 1}_2")

    def start_compilation(self):
        # Read the first token into memory
        self.tokenizer.has_more_tokens()

        # Start analyzing syntax
        if self.tokenizer.get_token_type() == TokenType.KEYWORD:
            if self.tokenizer.get_keyword_type() == KeywordType.CLASS:
                self.compile_class()
        else:
            raise AttributeError("Not starting with a class")

    # Helper method to write terminal XML tags
    def write_terminal_tag(self, t, v):
        if t == TokenType.KEYWORD:
            self.out_stream.write(f"<keyword> {v} </keyword>\n")
        elif t == TokenType.IDENTIFIER:
            self.out_stream.write(f"<identifier> {v} </identifier>\n")
        elif t == TokenType.SYMBOL:
            self.out_stream.write(f"<symbol> {v} </symbol>\n")
        elif t == TokenType.INT_CONST:
            self.out_stream.write(
                f"<integerConstant> {v} </integerConstant>\n")
        elif t == TokenType.STRING_CONST:
            self.out_stream.write(f"<stringConstant> {v} </stringConstant>\n")

    # 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        # Write opening tag
        self.out_stream.write("<class>\n")
        self.write_terminal_tag(self.tokenizer.get_token_type(), 'class')

        # Read the next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            self.class_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(self.tokenizer.get_token_type(),
                                    self.class_name)
            self.out_stream.write("\n===DECLARED===\nclass name\n=======")
        else:
            raise AttributeError("Not a valid class name!")

        # Read the next token
        self.tokenizer.has_more_tokens()

        self.eat('{')
        self.write_terminal_tag(self.tokenizer.get_token_type(),
                                self.tokenizer.get_symbol())

        # Handle class variable declaration (classVarDec*)
        # Proceed to next token
        self.tokenizer.has_more_tokens()

        # While there are field/static declarations
        while \
        (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\
        (
            self.tokenizer.get_keyword_type() in (KeywordType.FIELD, KeywordType.STATIC)
        ):
            self.compile_class_var_dec()

        while \
        (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\
        (
            self.tokenizer.get_keyword_type() in (KeywordType.CONSTRUCTOR, KeywordType.FUNCTION, KeywordType.METHOD)
        ):
            self.compile_subroutine_dec()

        # Class ending curly brackets
        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # At the end of function call
        self.out_stream.write("</class>\n")

    # ('static'|'field') type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        # Write opening tag
        self.out_stream.write("<classVarDec>\n")

        # Write static/field
        self.write_terminal_tag(TokenType.KEYWORD,
                                self.tokenizer.get_cur_ident())

        # To store variable properties
        var_kind = None
        var_type = None
        var_index = None
        var_name = None

        if self.tokenizer.get_cur_ident() == "static":
            var_kind = SymbolKind.STATIC
        elif self.tokenizer.get_cur_ident() == "field":
            var_kind = SymbolKind.FEILD
        else:
            raise Exception("Other than static or feild:" +
                            self.tokenizer.get_cur_ident())

        # Read the next token
        self.tokenizer.has_more_tokens()

        if self.is_valid_type():
            self.write_terminal_tag(self.tokenizer.get_token_type(),
                                    self.tokenizer.get_cur_ident())

            var_type = self.tokenizer.get_cur_ident()
        else:
            raise AssertionError("Invalid class variable type!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()

            # Write varible tag to XML file
            self.write_terminal_tag(self.tokenizer.get_token_type(), var_name)

            # Define new class level variable
            self.class_level_st.define(var_name, var_type, var_kind)
            var_index = self.class_level_st.get_index_of(var_name)

            # Write variable properties
            self.out_stream.write(
                f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
            )
        else:
            raise AssertionError("Invalid class variable name!")

        # Move to the next token
        self.tokenizer.has_more_tokens()

        # If has more than one varibles: E.g. field int x, y, z;
        while self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ",":
            self.write_terminal_tag(TokenType.SYMBOL, ",")

            # Move to next token
            self.tokenizer.has_more_tokens()

            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                var_name = self.tokenizer.get_cur_ident()

                # Write varible tag to XML file
                self.write_terminal_tag(self.tokenizer.get_token_type(),
                                        var_name)

                # Define new class level variable
                self.class_level_st.define(var_name, var_type, var_kind)
                var_index = self.class_level_st.get_index_of(var_name)

                # Write variable properties
                self.out_stream.write(
                    f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
                )
            else:
                raise AssertionError(
                    "Invalid Syntax for class varible declaration!")

            # Move to next token
            self.tokenizer.has_more_tokens()

        # Must end with ";"
        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write closing tag
        self.out_stream.write("</classVarDec>\n")

    # ('constructor' | 'function' | 'method') ('void' | 'type') subroutineName
    def compile_subroutine_dec(self):
        # Opening tag
        self.out_stream.write("<subroutineDec>\n")

        # To store function parameters
        func_params = {}

        # Write subroutine type
        self.sub_type = self.tokenizer.get_cur_ident()
        self.write_terminal_tag(TokenType.KEYWORD, self.sub_type)

        # Reset subroutine level symbol table
        self.subroutine_level_st.reset_table()

        # Insert `this`, if method
        if self.sub_type == "method":
            self.subroutine_level_st.define("this", self.class_name,
                                            SymbolKind.ARG)

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.is_valid_type() or \
            (self.tokenizer.get_token_type() == TokenType.KEYWORD \
            and self.tokenizer.get_keyword_type() == KeywordType.VOID):
            self.write_terminal_tag(self.tokenizer.get_token_type(),
                                    self.tokenizer.get_cur_ident())
        else:
            raise AssertionError("Not a valid subroutine return type!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            func_params["name"] = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, func_params["name"])

        else:
            raise AssertionError("Invalid Syntax for function name!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat('(')
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # If there are some parameters
        self.out_stream.write("<parameterList>\n")
        if not (self.tokenizer.get_token_type() == TokenType.SYMBOL):
            self.compile_parameter_list()
        self.out_stream.write("</parameterList>\n")

        # Move to next token
        self.eat(')')
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        # Write function VM command
        self.func_name = func_params['name']

        # Move to the next token
        self.tokenizer.has_more_tokens()
        self.compile_subroutine_body()

        # Closing tag
        self.out_stream.write("</subroutineDec>\n")

    # ((type varName) (',' type varName)*)?
    def compile_parameter_list(self):
        # For storing varible params
        var_name = None
        var_type = None
        var_kind = SymbolKind.ARG  # Argument list
        var_index = None

        if self.is_valid_type():
            var_type = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(self.tokenizer.get_token_type(), var_type)
        else:
            raise AssertionError("Invalid syntax in parameter list!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
        else:
            raise AssertionError(
                "Invalid Syntax for function parameter name name!")

        # Define the argument variable
        self.subroutine_level_st.define(var_name, var_type, var_kind)

        # Get the index of the newly created variable
        var_index = self.subroutine_level_st.get_index_of(var_name)

        # Write variable properties
        self.out_stream.write(
            f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
        )
        # Move to next token
        self.tokenizer.has_more_tokens()

        # Handle more than one parameters
        while self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ",":
            self.write_terminal_tag(TokenType.SYMBOL, ",")

            # Read the next token
            self.tokenizer.has_more_tokens()

            # If the current token is a valid type name
            if self.is_valid_type():
                var_type = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(self.tokenizer.get_token_type(),
                                        var_type)
            else:
                raise AssertionError("Invalid variable type in parameter list")

            # Read the next token
            self.tokenizer.has_more_tokens()

            # If current token is a valid identifier
            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                var_name = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
            else:
                raise AssertionError(
                    "Invalid variable name in parameter list!!")

            self.subroutine_level_st.define(var_name, var_type, var_kind)

            var_index = self.subroutine_level_st.get_index_of(var_name)

            # Write variable properties
            self.out_stream.write(
                f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
            )
            # Read the next token
            self.tokenizer.has_more_tokens()

    # '{' varDec* statements '}'
    def compile_subroutine_body(self):
        # Write opening tag
        self.out_stream.write("<subroutineBody>\n")

        # Eat opening curly bracket
        self.eat("{")
        self.write_terminal_tag(TokenType.SYMBOL, "{")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Handle variable declarations
        while self.tokenizer.get_token_type() == TokenType.KEYWORD  \
        and self.tokenizer.get_keyword_type() == KeywordType.VAR:

            # Current token is the 'var' keyword
            self.compile_var_dec()

        # Get number of local variables
        # for the current compiling function
        nVars = self.subroutine_level_st.get_var_count(SymbolKind.VAR)

        # Write function
        self.vm_writer.write_function(f"{self.class_name}.{self.func_name}",
                                      nVars)

        if self.sub_type == "constructor":
            nFeilds = self.class_level_st.get_var_count(SymbolKind.FEILD)

            # write "push constant nFeilds"
            self.vm_writer.write_push(SegmentType.CONST, nFeilds)

            self.vm_writer.write_call("Memory.alloc", 1)

            self.vm_writer.write_pop(SegmentType.POINTER, 0)

        elif self.sub_type == "method":
            # push argument 0
            self.vm_writer.write_push(SegmentType.ARG, 0)

            # pop pointer 0
            self.vm_writer.write_pop(SegmentType.POINTER, 0)

        # Handle statements
        self.compile_statements()

        # Eat closing curly bracker
        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write closing tag
        self.out_stream.write("</subroutineBody>\n")

    # 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        # Write opening tag
        self.out_stream.write("<varDec>\n")

        # Write var keyword tag
        self.write_terminal_tag(TokenType.KEYWORD, "var")

        # For storing variable params
        var_name = None
        var_type = None
        var_kind = SymbolKind.VAR
        var_index = None

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write the type of variables
        if self.is_valid_type():
            var_type = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(self.tokenizer.get_token_type(), var_type)
        else:
            raise AssertionError("Not a valid var type!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
        else:
            raise AssertionError("Invalid Syntax for var name!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.subroutine_level_st.define(var_name, var_type, var_kind)

        var_index = self.subroutine_level_st.get_index_of(var_name)

        # Write variable properties
        self.out_stream.write(
            f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
        )

        while self.tokenizer.get_token_type(
        ) == TokenType.SYMBOL and self.tokenizer.get_symbol() == ",":
            # Write this symbol
            self.write_terminal_tag(TokenType.SYMBOL, ",")

            # Move to the next token
            self.tokenizer.has_more_tokens()

            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                var_name = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
            else:
                raise AssertionError("Invalid Syntax for var name!")

            self.subroutine_level_st.define(var_name, var_type, var_kind)
            var_index = self.subroutine_level_st.get_index_of(var_name)

            # Write variable properties
            self.out_stream.write(
                f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
            )

            # Move to the next token
            self.tokenizer.has_more_tokens()

        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to the next token
        self.tokenizer.has_more_tokens()

        # Write closing tag
        self.out_stream.write("</varDec>\n")

    # statement*
    def compile_statements(self):
        # Write open tag
        self.out_stream.write("<statements>\n")
        # Process statements
        while self.tokenizer.get_token_type(
        ) == TokenType.KEYWORD and self.tokenizer.get_keyword_type(
        ) in statement_types:
            # Statment type is based on the starting keyword
            statement_type = self.tokenizer.get_keyword_type()

            # Call compile method based on type
            if statement_type == KeywordType.LET:
                self.compile_let()
            elif statement_type == KeywordType.IF:
                self.compile_if()
            elif statement_type == KeywordType.WHILE:
                self.compile_while_statement()
            elif statement_type == KeywordType.DO:
                self.compile_do()
            elif statement_type == KeywordType.RETURN:
                self.compile_return()

        self.out_stream.write("</statements>\n")

    # 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        self.out_stream.write("<letStatement>\n")

        self.write_terminal_tag(TokenType.KEYWORD, "let")

        # Is Array?
        is_array_access = False

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)

            var_props = self.lookup_st(var_name)
            # Write variable properties
            self.out_stream.write(
                f"\n===USED===\nkind: {var_props['kind']}, type: {var_props['type']}, index: {var_props['index']}\n======="
            )

            # Finding segment type
            var_props["seg_type"] = self.var_t_to_segment_t(var_props["kind"])

        else:
            raise AssertionError("Invalid Syntax for varName!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Optional bracket syntax
        if self.tokenizer.get_token_type() == TokenType.SYMBOL \
        and self.tokenizer.get_symbol() == "[":
            is_array_access = True

            # push arr
            self.vm_writer.write_push(
                self.var_t_to_segment_t(var_props["kind"]), var_props["index"])

            self.write_terminal_tag(TokenType.SYMBOL, "[")

            # Move to next token
            self.tokenizer.has_more_tokens()

            # Compile the expression
            self.compile_expression()

            self.eat("]")
            self.write_terminal_tag(TokenType.SYMBOL, "]")

            # add
            self.vm_writer.write_arithmetic(ArithmeticCType.ADD)

            # Move to the next token
            self.tokenizer.has_more_tokens()

        # Eat assignment operator
        self.eat("=")
        self.write_terminal_tag(TokenType.SYMBOL, "=")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.compile_expression()

        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if not is_array_access:
            self.vm_writer.write_pop(var_props["seg_type"], var_props["index"])
        else:
            # pop temp 0
            self.vm_writer.write_pop(SegmentType.TEMP, 0)

            # pop pointer 1
            self.vm_writer.write_pop(SegmentType.POINTER, 1)

            # push temp 0
            self.vm_writer.write_push(SegmentType.TEMP, 0)

            # pop that 0
            self.vm_writer.write_pop(SegmentType.THAT, 0)

        self.out_stream.write("</letStatement>\n")

    # 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
    def compile_if(self):
        self.out_stream.write("<ifStatement>\n")
        self.vm_writer.write_comment("if statement")

        self.write_terminal_tag(TokenType.KEYWORD, "if")

        # get the next labels
        L1, L2 = self.get_if_labels()

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat("(")
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # write code for the expression
        self.compile_expression()

        self.eat(")")
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # not, the condition inside if
        self.vm_writer.write_arithmetic(ArithmeticCType.NOT)

        self.vm_writer.write_if(L1)

        self.eat("{")
        self.write_terminal_tag(TokenType.SYMBOL, "{")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Compile if-block body
        self.compile_statements()

        self.vm_writer.write_goto(L2)

        self.vm_writer.write_label(L1)

        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # If there is an else statement
        # Handle else block
        if self.tokenizer.get_token_type() == TokenType.KEYWORD \
        and self.tokenizer.get_keyword_type() == KeywordType.ELSE:
            self.write_terminal_tag(TokenType.KEYWORD, "else")

            # Move to next token
            self.tokenizer.has_more_tokens()

            self.eat("{")
            self.write_terminal_tag(TokenType.SYMBOL, "{")

            # Move to next token
            self.tokenizer.has_more_tokens()

            self.compile_statements()

            self.eat("}")
            self.write_terminal_tag(TokenType.SYMBOL, "}")

            # Move to next token
            self.tokenizer.has_more_tokens()

        self.vm_writer.write_label(L2)

        # Write closing tag
        self.out_stream.write("</ifStatement>\n")

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while_statement(self):
        self.out_stream.write("<whileStatement>\n")

        self.write_terminal_tag(TokenType.KEYWORD, "while")
        L1, L2 = self.get_while_labels()

        self.vm_writer.write_label(L1)

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat("(")
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.compile_expression()

        self.eat(")")
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        self.vm_writer.write_arithmetic(ArithmeticCType.NOT)
        self.vm_writer.write_if(L2)
        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat("{")
        self.write_terminal_tag(TokenType.SYMBOL, "{")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Compile block body
        self.compile_statements()

        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # Move to next token
        self.tokenizer.has_more_tokens()
        self.vm_writer.write_goto(L1)
        self.vm_writer.write_label(L2)
        # Write closing tag
        self.out_stream.write("</whileStatement>\n")

    # 'do' subroutineCall ';'
    def compile_do(self):
        # To store first and second parts of subroutine call
        first_part, second_part = None, None
        # To store nArgs passed to the subroutine
        nArgs = 0

        # Write opening tag
        self.out_stream.write("<doStatement>\n")

        # Write do keyword tag
        self.write_terminal_tag(TokenType.KEYWORD, "do")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Handle subroutineCall
        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            first_part = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, first_part)
        else:
            raise AssertionError("Not a valid subroutine/class name!!!")

        var_props = self.lookup_st(first_part)

        if var_props:
            self.vm_writer.write_push(
                self.var_t_to_segment_t(var_props["kind"]), var_props["index"])

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Is is a method call
        if self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ".":
            self.write_terminal_tag(TokenType.SYMBOL, ".")

            # Move to next token
            self.tokenizer.has_more_tokens()

            # Handle subroutineCall
            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                second_part = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(TokenType.IDENTIFIER, second_part)
            else:
                raise AssertionError("Not a valid subroutine/class name!!!")

            # Move to next token
            self.tokenizer.has_more_tokens()

        self.eat("(")
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.out_stream.write("<expressionList>\n")
        if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ")"):
            nArgs = self.compile_expression_list()
        self.out_stream.write("</expressionList>\n")

        self.eat(")")
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if var_props:
            if second_part:
                self.vm_writer.write_call(f"{var_props['type']}.{second_part}",
                                          nArgs + 1)
        else:
            # Write method call
            if second_part:
                # Of some other class
                self.vm_writer.write_call(f"{first_part}.{second_part}", nArgs)
            else:
                # Of this class
                self.vm_writer.write_call(f"{self.class_name}.{first_part}",
                                          nArgs)

        # call-and-return contract
        self.vm_writer.write_pop(SegmentType.TEMP, 0)

        # Write closing tag
        self.out_stream.write("</doStatement>\n")

    # 'return' expression? ';'
    def compile_return(self):
        # Write opening tag
        self.out_stream.write("<returnStatement>\n")

        # Write do keyword tag
        self.write_terminal_tag(TokenType.KEYWORD, "return")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ";":
            self.write_terminal_tag(TokenType.SYMBOL, ";")
            # the subroutine void return type
            self.vm_writer.write_push(SegmentType.CONST, 0)
        else:
            self.compile_expression()
            self.eat(";")
            self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write return command
        self.vm_writer.write_return()
        # Write closing tag
        self.out_stream.write("</returnStatement>\n")

    # term (op term)*
    def compile_expression(self):
        self.out_stream.write("<expression>\n")

        # Compile term
        self.compile_term()

        # Handle (op term)*
        while self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() in allowed_op:
            symbol = self.tokenizer.get_symbol()
            # Write tag for operation symbol
            self.write_terminal_tag(TokenType.SYMBOL,
                                    self.tokenizer.get_symbol())

            # Move to next token
            self.tokenizer.has_more_tokens()

            # Compile term
            self.compile_term()

            # Apply operation
            self.vm_writer.write_arithmetic(allowed_op[symbol])

        # Write closing tag
        self.out_stream.write("</expression>\n")

    # integerConstant | stringConstant | keywordConstant | varName |
    # varName '[' expression ']' | subroutineCall | '(' expression ')'
    # | unaryOp term
    def compile_term(self):
        self.out_stream.write("<term>\n")

        if self.tokenizer.get_token_type() == TokenType.INT_CONST:
            self.write_terminal_tag(TokenType.INT_CONST,
                                    self.tokenizer.get_int_val())
            self.vm_writer.write_push(SegmentType.CONST,
                                      self.tokenizer.get_int_val())
            self.tokenizer.has_more_tokens()

        elif self.tokenizer.get_token_type() == TokenType.STRING_CONST:
            self.write_terminal_tag(TokenType.STRING_CONST,
                                    self.tokenizer.get_string_val())
            self.tokenizer.has_more_tokens()

        elif self.tokenizer.get_token_type() == TokenType.KEYWORD \
            and self.tokenizer.get_keyword_type() in keyword_constants:
            # keyword constant
            kc = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.KEYWORD, kc)

            if kc == "null" or kc == "false":
                # push const 0
                self.vm_writer.write_push(SegmentType.CONST, 0)

            elif kc == "true":
                # push const -1
                self.vm_writer.write_push(SegmentType.CONST, 1)
                self.vm_writer.write_arithmetic(ArithmeticCType.NEG)

            elif kc == "this":
                # push pointer 0
                self.vm_writer.write_push(SegmentType.POINTER, 0)

            self.tokenizer.has_more_tokens()

        elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            first_part, second_part = None, None
            nArgs = 0
            var_name = self.tokenizer.get_cur_ident()
            first_part = var_name
            var_props = self.lookup_st(var_name)

            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)

            if var_props:
                self.vm_writer.write_push(
                    self.var_t_to_segment_t(var_props["kind"]),
                    var_props["index"])

            # Move to next token
            self.tokenizer.has_more_tokens()

            if self.tokenizer.get_token_type() == TokenType.SYMBOL:
                # Handle varName '[' expression ']'
                if self.tokenizer.get_symbol() == "[":
                    self.eat("[")
                    self.write_terminal_tag(TokenType.SYMBOL, "[")
                    self.tokenizer.has_more_tokens()

                    self.compile_expression()

                    self.eat(']')
                    self.write_terminal_tag(TokenType.SYMBOL, "]")

                    # add
                    self.vm_writer.write_arithmetic(ArithmeticCType.ADD)

                    # pop pointer 1
                    self.vm_writer.write_pop(SegmentType.POINTER, 1)

                    # push that 0
                    self.vm_writer.write_push(SegmentType.THAT, 0)

                    # Move to next token
                    self.tokenizer.has_more_tokens()

                # Handle subroutineCall
                elif self.tokenizer.get_symbol() == "(" \
                    or self.tokenizer.get_symbol() == ".":
                    # Is a method call
                    if self.tokenizer.get_symbol() == ".":
                        self.write_terminal_tag(TokenType.SYMBOL, ".")
                        # Move to next token
                        self.tokenizer.has_more_tokens()

                        # Handle subroutineCall
                        if self.tokenizer.get_token_type(
                        ) == TokenType.IDENTIFIER:
                            second_part = self.tokenizer.get_cur_ident()
                            self.write_terminal_tag(TokenType.IDENTIFIER,
                                                    second_part)
                        else:
                            raise AssertionError(
                                "Not a valid subroutine/class name!!!")

                        # Move to next token
                        self.tokenizer.has_more_tokens()

                    self.eat("(")
                    self.write_terminal_tag(TokenType.SYMBOL, "(")

                    # Move to next token
                    self.tokenizer.has_more_tokens()
                    self.out_stream.write("<expressionList>\n")
                    if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \
                        and self.tokenizer.get_symbol() == ")"):
                        nArgs = self.compile_expression_list()
                    self.out_stream.write("</expressionList>\n")

                    self.eat(")")
                    self.write_terminal_tag(TokenType.SYMBOL, ")")

                    # Move to next token
                    self.tokenizer.has_more_tokens()

            if var_props:
                print("Looked up: ", var_name)
                # Is it a method call?
                if second_part:
                    # Of some other class
                    self.vm_writer.write_call(
                        f"{var_props['type']}.{second_part}", nArgs + 1)
            # This is no variable with given name
            else:
                if second_part:
                    # Of some other class
                    self.vm_writer.write_call(f"{first_part}.{second_part}",
                                              nArgs)
                else:
                    # Of this class
                    self.vm_writer.write_call(
                        f"{self.class_name}.{first_part}", nArgs)

        elif self.tokenizer.get_token_type() == TokenType.SYMBOL:
            # Handle '(' expression ')'
            if self.tokenizer.get_symbol() == '(':
                self.eat("(")
                self.write_terminal_tag(TokenType.SYMBOL, "(")
                self.tokenizer.has_more_tokens()

                self.compile_expression()

                self.eat(")")
                self.write_terminal_tag(TokenType.SYMBOL, ")")
                self.tokenizer.has_more_tokens()
            # Handle unaryOp term
            elif self.tokenizer.get_symbol() in allowed_unary_op:
                unary_op = self.tokenizer.get_symbol()
                self.write_terminal_tag(TokenType.SYMBOL,
                                        self.tokenizer.get_symbol())

                self.tokenizer.has_more_tokens()
                self.compile_term()

                self.vm_writer.write_arithmetic(allowed_unary_op[unary_op])
            else:
                raise AssertionError("( or unary Op expected!!")

        self.out_stream.write("</term>\n")

    # expression (',' expression)*
    def compile_expression_list(self):
        self.compile_expression()
        arg_count = 1

        while (self.tokenizer.get_token_type() == TokenType.SYMBOL) \
            and (self.tokenizer.get_symbol() == ","):
            self.write_terminal_tag(TokenType.SYMBOL, ",")
            self.tokenizer.has_more_tokens()
            self.compile_expression()
            arg_count += 1

        return arg_count

    # eat the given string, else raise error
    def eat(self, string):
        if self.tokenizer.get_token_type() == TokenType.SYMBOL:
            if not (self.tokenizer.get_symbol() == string):
                raise AssertionError(
                    f"Expected symbol {string}, found: {self.tokenizer.get_symbol()}"
                )
        else:
            raise AssertionError("Symbol not found!!")

    # Utility method to check weather
    # the current token is a valid data type
    def is_valid_type(self):
        # If built-in data type
        if self.tokenizer.get_token_type() == TokenType.KEYWORD:
            # if int, char, boolean
            if self.tokenizer.get_keyword_type() in data_types:
                return True

        # If custom data type
        elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            return True

        # Invalid data type
        return False

    # Lookup variable in symbol table
    def lookup_st(self, v_name):
        '''return variable properties'''
        # FOR DEBUGGING
        from pprint import pprint
        pprint(self.subroutine_level_st.hash_map)
        pprint(self.class_level_st.hash_map)

        # To store looked up props
        v_props = {}

        # lookup subroutine level table
        v_kind = self.subroutine_level_st.get_kind_of(v_name)

        # var not found in subroutine level st
        if v_kind == SymbolKind.NONE:
            # lookup class level table
            v_kind = self.class_level_st.get_kind_of(v_name)

            if v_kind == SymbolKind.NONE:
                return False

            v_props["kind"] = v_kind
            v_props["type"] = self.class_level_st.get_type_of(v_name)
            v_props["index"] = self.class_level_st.get_index_of(v_name)

            # return class level variable data
            return v_props

        # Data found for subroutine level table
        v_props["kind"] = v_kind
        v_props["type"] = self.subroutine_level_st.get_type_of(v_name)
        v_props["index"] = self.subroutine_level_st.get_index_of(v_name)

        return v_props

    def var_t_to_segment_t(self, v_kind: SymbolKind) -> SegmentType:
        if v_kind == SymbolKind.STATIC:
            return SegmentType.STATIC
        elif v_kind == SymbolKind.ARG:
            return SegmentType.ARG
        elif v_kind == SymbolKind.VAR:
            return SegmentType.LOCAL
        elif v_kind == SymbolKind.FEILD:
            return SegmentType.THIS
        else:
            raise AssertionError("No segment kind for given v_kind!!")