Esempio n. 1
0
def tokens_to_xml(path):
    """Write the tokens into a xml file with its type as tags. The outpath
    is the dirpath of the a new directory in the module path to avoid name
    clashes."""
    paths = retrive_files(path)
    out_dirpath = os.path.join(path, 'Xmlresult')
    for path in paths:
        outfile = os.path.basename(path).replace('.jack', 'T.xml')
        outpath = os.path.join(out_dirpath, outfile)
        tokenizer = Tokenizer(path)
        analyzer = TokenAnalyzer(outpath)
        while tokenizer.has_more_tokens():
            tokenizer.advance()
            t_type = tokenizer.token_type
            tag = token_tags[t_type]
            if t_type == T_KEYWORD:
                analyzer.write_info(tokenizer.keyword, tag)
            elif t_type == T_SYMBOL:
                analyzer.write_info(tokenizer.symbol, tag)
            elif t_type == T_ID:
                analyzer.write_info(tokenizer.identifier, tag)
            elif t_type == T_INTEGER:
                analyzer.write_info(tokenizer.intval, tag)
            elif t_type == T_STRING:
                analyzer.write_info(tokenizer.stringval, tag)
        analyzer.close()
Esempio n. 2
0
class CompilationEngine:
    _OPEN_PARENTHESIS = "\("
    _CLOSE_PARENTHESIS = "\)"
    _OPEN_BRACKET = "\["
    _CLOSE_BRACKET = "\]"
    _DOT = "\."
    _OPS = "\+|-|\*|\/|&|\||<|>|="

    def __init__(self, in_address):
        self.tokenizer = Tokenizer(in_address)
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(in_address.replace(".jack", ".vm"))
        self.curr_token = self.tokenizer.get_current_token()
        self.out_address = in_address.replace(".jack", ".xml")
        self.output = ""
        self.indent = 0
        self.label_count = -1
        self.class_name = ""
        self.compile_class()

    def write_file(self):
        # with open(self.out_address, 'w') as f:
        #     f.write(self.output)
        self.vm_writer.write_file()

    def write(self, to_write):
        """
        Writes to the output, with indentation.
        :param to_write: The string to write
        """
        self.output += (self.indent * " ") + to_write + "\n"

    # ========== Compilation Methods ========== #

    def compile_class(self):
        """
        Compiles a complete class.
        """
        def comp_class():
            self.eat("class")
            self.class_name = self.eat(NAME_REG)
            self.eat("{")
            self.compile_class_var_dec()
            self.compile_subroutine()
            self.eat("}")

        self.wrap("class", comp_class)

    def compile_class_var_dec(self):
        """
        Compiles a static or field declaration.
        :return:
        """
        var_type_reg = "static|field"
        if self.peek_token(var_type_reg):
            self.wrap("classVarDec", self.__class_var_dec)
            self.compile_class_var_dec()

    def compile_subroutine(self):
        """
        Compiles a complete method, function or constructor.
        :return:
        """
        sub_regex = "(constructor|function|method)"
        self.symbol_table.start_subroutine()
        kind = self.eat(sub_regex)
        self.__compile_type(True)
        # subroutine name
        name = self.__compile_name()
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        self.compile_parameter_list(kind)
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        if self.peek_token("var"):
            self.compile_var_dec()
        num_locals = self.symbol_table.var_count("local")
        self.vm_writer.write_function("{}.{}".format(self.class_name, name),
                                      num_locals)
        self.__set_pointer(kind)
        self.compile_statements()
        self.eat("}")

        # def subroutine_dec():
        #     kind = self.eat(sub_regex)
        #     self.__compile_type(True)
        #     # subroutine name
        #     name = self.__compile_name()
        #     self.eat(CompilationEngine._OPEN_PARENTHESIS)
        #     self.compile_parameter_list(kind)
        #     self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        #     subroutine_body(name)
        #     # self.wrap("subroutineBody", subroutine_body)
        #
        # def subroutine_body(name):
        #     self.eat("{")
        #     num_locals = 0
        #     if self.peek_token("var"):
        #         num_locals = self.compile_var_dec()
        #     self.vm_writer.write_function("{}.{}".format(self.class_name,
        #                                                  name), num_locals)
        #
        #     self.compile_statements()
        #     # if sub_type == "void":
        #     #     self.vm_writer.write_push("constant", 0)
        #     self.eat("}")
        # Handle next subroutine if there is one
        if self.peek_token(sub_regex):
            self.compile_subroutine()

    def compile_parameter_list(self, kind):
        """
        Compiles a possibly empty parameter list, not including the
        enclosing ()
        :return:
        """
        if kind == "method":
            self.symbol_table.define("this", self.class_name, "argument")
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        while self.peek_token(type_reg):
            self.__params()

    def compile_var_dec(self):
        """
        Compiles a var declaration.
        :return:
        """
        # self.wrap("varDec", self.__comp_var_dec)
        self.eat("var")
        var_type = self.__compile_type(False)
        self.__var_declare(var_type, "var")
        self.eat(";")
        if self.peek_token("var"):
            self.compile_var_dec()

    def compile_statements(self):
        """
        Compiles a sequence of statements, not including the enclosing {}
        :return:
        """
        statement_reg = "let|if|while|do|return"
        if self.peek_token(statement_reg):
            if self.peek_token("let"):
                self.compile_let()
            elif self.peek_token("if"):
                self.compile_if()
            elif self.peek_token("while"):
                self.compile_while()
            elif self.peek_token("do"):
                self.compile_do()
            elif self.peek_token("return"):
                self.compile_return()
            self.compile_statements()

    def compile_do(self):
        """
        Compiles a do statement
        """
        self.eat("do")
        self.__subroutine_call()
        # Since we don't use the return value, we pop it to temp
        self.vm_writer.write_pop("temp", 0)
        self.eat(";")

    def compile_let(self):
        """
        Compiles a let statement
        """
        self.eat("let")
        name = self.__compile_name()
        is_array = False
        # Determine [expression]
        if self.peek_token(CompilationEngine._OPEN_BRACKET):
            is_array = True
            self.__handle_array(name)
        self.eat("=")
        self.compile_expression()
        # Pop the value to the spot in the memory
        if is_array:
            self.vm_writer.write_pop("temp", 0)
            self.vm_writer.write_pop("pointer", 1)
            self.vm_writer.write_push("temp", 0)
            self.vm_writer.write_pop("that", 0)
        else:
            self.__write_pop(name)
        self.eat(";")

    def compile_while(self):
        """
        Compiles a while statement.
        :return:
        """
        self.eat("while")
        loop_label = self.__get_label("WHILE_START")
        exit_label = self.__get_label("WHILE_END")
        self.vm_writer.write_label(loop_label)
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        # Compute ~condition
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        # if ~condition exit loop
        self.vm_writer.write_if(exit_label)
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        self.compile_statements()
        self.vm_writer.write_goto(loop_label)
        self.vm_writer.write_label(exit_label)
        self.eat("}")

    def compile_return(self):
        """
        Compiles a return statement.
        """
        self.eat("return")
        # if next is expression:
        if self.__is_term():
            self.compile_expression()
        else:
            # Void function - push 0
            self.vm_writer.write_push(CONSTANT, 0)
        self.vm_writer.write_return()
        self.eat(";")

    def compile_if(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        :return:
        """
        self.eat("if")
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        # ~cond
        self.compile_expression()
        # self.vm_writer.write_arithmetic("~")
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        if_true = self.__get_label("IF_TRUE")
        self.vm_writer.write_if(if_true)
        if_false = self.__get_label("IF_FALSE")
        self.vm_writer.write_goto(if_false)
        self.vm_writer.write_label(if_true)
        self.compile_statements()
        self.eat("}")
        # Handle else:
        if self.peek_token("else"):
            if_end = self.__get_label("IF_END")
            self.vm_writer.write_goto(if_end)
            self.vm_writer.write_label(if_false)
            self.eat("else")
            self.eat("{")
            self.compile_statements()
            self.eat("}")
            self.vm_writer.write_label(if_end)
        else:
            self.vm_writer.write_label(if_false)

    def compile_expression(self):
        """
        Compiles an expression.
        :return:
        """
        def comp_expression():
            self.compile_term()
            # Case: term op term
            if self.peek_token(CompilationEngine._OPS):
                operation = self.eat(CompilationEngine._OPS)
                self.compile_term()
                self.vm_writer.write_arithmetic(operation)

        self.wrap("expression", comp_expression)

    def compile_term(self):
        """
        Compiles a term.
        :return:
        """
        def term():
            curr_type = self.peek_type()
            val = self.curr_token.get_token()
            # Handle integer constant
            if curr_type == INT_CONST:
                self.vm_writer.write_push(CONSTANT, int(val))
                self.__advance_token()
            # Handle String constant
            elif curr_type == STRING_CONST:
                self.__handle_string_constant(val)
                self.__advance_token()
            # Handle Keyword constant
            elif curr_type == KEYWORD:
                self.__handle_keyword_constant(val)
                self.__advance_token()
            # Case: token is a varName or a subroutineName
            elif curr_type == IDENTIFIER:
                self.__handle_identifier()
            # Case: ( expression )
            elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS):
                self.eat(CompilationEngine._OPEN_PARENTHESIS)
                self.compile_expression()
                self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            # Case: unaryOp term
            elif self.peek_token("-|~"):
                self.__handle_unary_op()
            else:
                print("Error: Incorrect Term")
                exit(-1)

        term()
        # self.wrap("term", term)

    def compile_expression_list(self):
        """
        Compiles a possibly empty list of comma separated expressions
        :return:
        """
        def exp_list():
            count = 0
            if self.__is_term():
                self.compile_expression()
                count += 1
                while self.peek_token(","):
                    self.eat(",")
                    self.compile_expression()
                    count += 1
            return count

        return exp_list()
        # self.wrap("expressionList", exp_list)

    # ========== Compilation Helper ========== #

    def __class_var_dec(self):
        """
        Compiles a single class var declaration.
        """
        var_type_reg = "static|field"
        # (static|field)
        kind = self.eat(var_type_reg)
        # type
        var_type = self.__compile_type(False)
        # Compile varName combo until no more ","
        self.__var_declare(var_type, kind)
        self.eat(";")

    def __var_declare(self, var_type, kind):
        name = self.eat(NAME_REG)
        self.symbol_table.define(name, var_type, kind)
        if self.peek_token(","):
            self.eat(",")
            self.__var_declare(var_type, kind)

    def __compile_type(self, for_function):
        """
        Compiles a type for a function or variable, determined by
        a received boolean value.
        :param for_function: True if is type of function, false otherwise.
        :return:
        """
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if for_function:
            type_reg += "|void"
        return self.eat(type_reg)

    def __set_pointer(self, kind):
        if kind == "method":
            self.vm_writer.write_push("argument", 0)
            self.vm_writer.write_pop("pointer", 0)
        elif kind == "constructor":
            self.__handle_constructor()

    def __handle_constructor(self):
        # Allocate memory for the new object
        var_num = self.symbol_table.var_count("this")
        self.vm_writer.write_push(CONSTANT, var_num)
        self.vm_writer.write_call("Memory.alloc", 1)
        # Set the new memory spot to this
        self.vm_writer.write_pop("pointer", 0)

    def __compile_name(self):
        if self.peek_type() == IDENTIFIER:
            return self.eat(NAME_REG)
        else:
            print("ERROR: Identifier Expected")
            exit(-1)

    def __params(self):
        var_type = self.__compile_type(False)
        name = self.eat(NAME_REG)
        self.symbol_table.define(name, var_type, "argument")
        if self.peek_token(","):
            self.eat(",")

    def __handle_unary_op(self):
        command = self.eat("-|~")
        self.compile_term()
        if command == "-":
            self.vm_writer.write_arithmetic("neg")
        else:
            self.vm_writer.write_arithmetic(command)

    def __handle_identifier(self):
        """
        Handles the case of an identifier given as a term
        """
        # Case: varName [ expression ]
        if self.peek_next(CompilationEngine._OPEN_BRACKET):
            name = self.__compile_name()
            self.__handle_array(name)
            self.vm_writer.write_pop("pointer", 1)
            self.vm_writer.write_push("that", 0)
            # self.__var_name_array()
        # Case: subroutineCall:
        elif self.peek_next(CompilationEngine._OPEN_PARENTHESIS) or \
                self.peek_next(CompilationEngine._DOT):
            self.__subroutine_call()
        else:
            name = self.eat(NAME_REG)
            self.__write_push(name)

    def __handle_string_constant(self, string):
        """
        Handles the case of a string constant in a term
        :param string: the constant
        """
        self.vm_writer.write_push(CONSTANT, len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push(CONSTANT, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def __handle_keyword_constant(self, word):
        """
        Handles the case of a keyword constant given in a term.
        If the word is not valid the program prints a relevant message and
        exits.
        :param word: The keyword
        """
        if word == "this":
            self.vm_writer.write_push("pointer", 0)
        else:
            self.vm_writer.write_push(CONSTANT, 0)
            if word == "true":
                self.vm_writer.write_arithmetic("~")

    def __is_term(self):
        curr_type = self.peek_type()
        return curr_type == STRING_CONST or curr_type == INT_CONST or \
               curr_type == KEYWORD or curr_type == IDENTIFIER or \
               self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \
               self.peek_token(CompilationEngine._OPS)

    def __subroutine_call(self):
        if self.curr_token.get_type() == IDENTIFIER:
            if self.peek_next(CompilationEngine._OPEN_PARENTHESIS):
                self.vm_writer.write_push("pointer", 0)
                self.__subroutine_name(self.class_name, 1)
            elif self.peek_next(CompilationEngine._DOT):
                self.__object_subroutine_call()
            else:
                print("Error: ( or . expected")
                exit(-1)

    def __object_subroutine_call(self):
        name = self.eat(NAME_REG)

        n_args = 0
        # Push the object reference to the stack
        if self.symbol_table.kind_of(name):
            self.__write_push(name)
            name = self.symbol_table.type_of(name)
            n_args = 1
        self.eat(CompilationEngine._DOT)
        self.__subroutine_name(name, n_args)

    def __subroutine_name(self, type_name, n_args):
        """
        Handles the case of subroutineName(expressionList)
        :return:
        """
        name = self.eat(NAME_REG)
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        nargs = self.compile_expression_list()
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.vm_writer.write_call("{}.{}".format(type_name, name),
                                  nargs + n_args)

    def __handle_array(self, name):
        self.eat(CompilationEngine._OPEN_BRACKET)
        self.compile_expression()
        self.eat(CompilationEngine._CLOSE_BRACKET)
        self.__write_push(name)
        self.vm_writer.write_arithmetic("+")

    # ========== XML Handling ========== #

    def wrap(self, section_name, func):
        """
        Wraps a program structure block with the section_name, and executes
        its function
        :param section_name: The name of the section
        :param func: The function to perform
        :return:
        """
        self.write("<{}>".format(section_name))
        self.indent += 2
        func()
        self.indent -= 2
        self.write("</{}>".format(section_name))

    # ========== Token Handling ========== #

    def eat(self, token):
        """
        Handles advancing and writing terminal tokens.
        Will exit the program if an error occurs.
        :param token: The regex of the token to compare
        :return:
        """
        ctoken = self.curr_token.get_token()
        if re.match(token, self.curr_token.get_token()):
            # self.write(self.curr_token.get_xml_wrap())
            self.__advance_token()
            return ctoken
            # else:
            #     # if self.tokenizer.get_current_token() != token:
            #     print("Error: Expected " + token)
            #     exit(-1)

    def peek_token(self, compare_next):
        """
        :param compare_next: The regex to compare.
        :return: True if the current token matches the regex, False otherwise.
        """
        if self.curr_token:
            return re.match(compare_next, self.curr_token.get_token())
        return False

    def peek_type(self):
        """
        :return: the type of the current token
        """
        return self.curr_token.get_type()

    def peek_next(self, comp):
        next_token = self.tokenizer.get_next_token()
        # Case: There actually is a next token
        if next_token:
            return re.match(comp, self.tokenizer.get_next_token().get_token())
        return False

    def __advance_token(self):
        self.tokenizer.advance()
        if self.tokenizer.has_more_tokens():
            self.curr_token = self.tokenizer.get_current_token()

    # ========== VM Helper ========== #

    def __get_label(self, label):
        self.label_count += 1
        return "{}{}".format(label, str(self.label_count))

    def __write_pop(self, name):
        self.vm_writer.write_pop(self.symbol_table.kind_of(name),
                                 self.symbol_table.index_of(name))

    def __write_push(self, name):
        self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                  self.symbol_table.index_of(name))
Esempio n. 3
0
class CompilationEngine:
    def __init__(self, inpath, outpath):
        self.tokenizer = Tokenizer(inpath)
        self.symboltable = SymbolTable()
        self.vmwriter = VMWriter(outpath)
        self._class_name = None
        if self.tokenizer.has_more_tokens():
            self.compile_class()
        self.vmwriter.close()
        print("{0} completed.".format(outpath))

    def _subroutine_init(self):
        self._sub_kind = None
        self._sub_name = None
        self._ret_type = None

    def _advance(self):
        self._check_EOF()
        self.tokenizer.advance()

    @property
    def _current_token(self):
        t_type = self.tokenizer.token_type
        return (self.tokenizer.keyword if t_type == T_KEYWORD else
                self.tokenizer.symbol if t_type == T_SYMBOL else
                self.tokenizer.identifier if t_type == T_ID else self.tokenizer
                .intval if t_type == T_INTEGER else self.tokenizer.stringval)

    @property
    def _current_tok_type(self):
        return self.tokenizer.token_type

    @property
    def _current_tok_tag(self):
        return token_tags[self._current_tok_type]

    @property
    def _next_token(self):
        """return raw next_token in the tokenizer"""
        return str(self.tokenizer.next_token)

    def _require_token(self, tok_type, token=None):
        """Check whether the next_token(terminal) in the tokenizer meets the 
        requirement (specific token or just token type). If meets, tokenizer
        advances (update current_token and next_token)  and terminal will be 
        writed into outfile; If not, report an error."""
        self._advance()
        if token and self._current_token != token:
            return self._error(expect_toks=(token, ))
        elif self._current_tok_type != tok_type:
            return self._error(expect_types=(tok_type, ))

    def _require_id(self):
        self._require_token(T_ID)

    def _require_kw(self, token):
        return self._require_token(T_KEYWORD, token=token)

    def _require_sym(self, token):
        return self._require_token(T_SYMBOL, token=token)

    def _require_brackets(self, brackets, procedure):
        front, back = brackets
        self._require_sym(front)
        procedure()
        self._require_sym(back)

    def _fol_by_class_vardec(self):
        return self._next_token in (KW_STATIC, KW_FIELD)

    def _fol_by_subroutine(self):
        return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _fol_by_vardec(self):
        return self._next_token == KW_VAR

    #########################
    # structure compilation #
    #########################

    def compile_class_name(self):
        self._require_id()
        self._class_name = self._current_token

    def compile_subroutine_name(self):
        self._require_id()
        self._sub_name = self._current_token

    def compile_var_name(self, kind=None, type=None, declare=False):
        self._require_id()
        name = self._current_token
        if declare is True:  # kind and type are not None
            self.symboltable.define(name, type, kind)
        else:
            self.check_var_name(name, type)

    def check_var_name(self, name, type=None):
        recorded_kind = self.symboltable.kindof(name)
        if recorded_kind is None:
            self._traceback('name used before declared: {0}'.format(name))
        elif type is not None:
            recorded_type = self.symboltable.typeof(name)
            if recorded_type != type:
                get = '{0} "{1}"'.format(recorded_type, name)
                self._error(expect_types=(type, ), get=get)

    def compile_type(self, advanced=False, expect='type'):
        # int, string, boolean or identifier(className)
        if advanced is False:
            self._advance()
        if (self._current_token not in SymbolTable.builtIn_types
                and self._current_tok_type != T_ID):
            return self._error(expect=expect)

    def compile_return_type(self):
        # void or type
        self._advance()
        if self._current_token != KW_VOID:
            self.compile_type(True, '"void" or type')
        self._ret_type = self._current_token
        if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name:
            me = 'constructor expect current class as return type'
            self._traceback(me)

    @record_non_terminal('class')
    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self._require_kw(KW_CLASS)
        self.compile_class_name()
        self._require_sym('{')
        while self._fol_by_class_vardec():
            self.compile_class_vardec()
        while self._fol_by_subroutine():
            self.compile_subroutine()
        self._advance()
        if self._current_token != '}':
            self._traceback("Except classVarDec first, subroutineDec second.")
        if self.tokenizer.has_more_tokens():
            if self._next_token == KW_CLASS:
                self._traceback('Only expect one classDec.')
            self._traceback('Unexpected extra tokens.')

    def compile_declare(self):
        self._advance()
        id_kind = self._current_token  # ('static | field | var')
        # type varName (',' varName)* ';'
        self.compile_type()
        id_type = self._current_token
        self.compile_var_name(id_kind, id_type, declare=True)
        # compile ',' or ';'
        self._advance()
        while self._current_token == ',':
            self.compile_var_name(id_kind, id_type, declare=True)
            self._advance()
        if self._current_token != ';':
            return self._error((',', ';'))

    @record_non_terminal('classVarDec')
    def compile_class_vardec(self):
        # ('static|field') type varName (',' varName)* ';'
        self.compile_declare()

    @record_non_terminal('subroutineDec')
    def compile_subroutine(self):
        # ('constructor'|'function'|'method')
        # ('void'|type) subroutineName '(' parameterList ')' subroutineBody
        self._subroutine_init()
        self.symboltable.start_subroutine()
        self._advance()
        self._sub_kind = self._current_token
        if self._sub_kind == KW_METHOD:
            self.symboltable.define('this', self._class_name, 'argument')
        self.compile_return_type()
        self.compile_subroutine_name()
        self._require_brackets('()', self.compile_parameter_list)
        self.compile_subroutine_body()

    @record_non_terminal('parameterList')
    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        if self._next_token == ')':
            return
        self.compile_type()
        self.compile_var_name('argument', self._current_token, True)
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_type()
            self.compile_var_name('argument', self._current_token, True)

    @record_non_terminal('subroutineBody')
    def compile_subroutine_body(self):
        # '{' varDec* statements '}'
        self._require_sym('{')
        while self._fol_by_vardec():
            self.compile_vardec()
        self.compile_function()
        self.compile_statements()
        self._require_sym('}')

    def compile_function(self):
        fn_name = '.'.join((self._class_name, self._sub_name))
        num_locals = self.symboltable.varcount(KW_VAR)
        self.vmwriter.write_function(fn_name,
                                     num_locals)  # function fn_name num_locals
        # set up pointer this
        if self._sub_kind == KW_CONSTRUCTOR:
            num_fields = self.symboltable.varcount(KW_FIELD)
            self.vmwriter.write_push('constant', num_fields)
            self.vmwriter.write_call('Memory.alloc', 1)
            self.vmwriter.write_pop('pointer', 0)
        elif self._sub_kind == KW_METHOD:
            self.vmwriter.write_push('argument', 0)
            self.vmwriter.write_pop('pointer', 0)

    @record_non_terminal('varDec')
    def compile_vardec(self):
        # 'var' type varName (',' varName)* ';'
        self.compile_declare()

    #########################
    # statement compilation #
    #########################

    @record_non_terminal('statements')
    def compile_statements(self):
        # (letStatement | ifStatement | whileStatement | doStatement |
        # returnStatement)*
        last_statement = None
        while self._next_token != '}':
            self._advance()
            last_statement = self._current_token
            if last_statement == 'do':
                self.compile_do()
            elif last_statement == 'let':
                self.compile_let()
            elif last_statement == 'while':
                self.compile_while()
            elif last_statement == 'return':
                self.compile_return()
            elif last_statement == 'if':
                self.compile_if()
            else:
                return self._error(expect='statement expression')
        #if STACK[-2] == 'subroutineBody' and last_statement != 'return':
        #    self._error(expect='return statement', get=last_statement)

    @record_non_terminal('doStatement')
    def compile_do(self):
        # 'do' subroutineCall ';'
        self._advance()
        self.compile_subroutine_call()
        self.vmwriter.write_pop('temp', 0)  # temp[0] store useless value
        self._require_sym(';')

    @record_non_terminal('letStatement')
    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self.compile_var_name()
        var_name = self._current_token
        array = (self._next_token == '[')
        if array:
            self.compile_array_subscript(
                var_name)  # push (array base + subscript)
        self._require_sym('=')
        self.compile_expression()  # push expression value
        self._require_sym(';')
        if array:
            self.vmwriter.write_pop('temp', 1)  # pop exp value to temp[1]
            self.vmwriter.write_pop('pointer',
                                    1)  # that = array base + subscript
            self.vmwriter.write_push('temp', 1)
            self.vmwriter.write_pop('that', 0)
        else:
            self.assign_variable(var_name)

    kind_segment = {
        'static': 'static',
        'field': 'this',
        'argument': 'argument',
        'var': 'local'
    }

    def assign_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_pop(self.kind_segment[kind], index)

    def load_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_push(self.kind_segment[kind], index)

    label_num = 0

    @record_non_terminal('whileStatement')
    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        start_label = 'WHILE_START_' + str(self.label_num)
        end_label = 'WHILE_END_' + str(self.label_num)
        self.label_num += 1
        self.vmwriter.write_label(start_label)
        self.compile_cond_expression(start_label, end_label)

    @record_non_terminal('ifStatement')
    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        else_label = 'IF_ELSE_' + str(self.label_num)
        end_label = 'IF_END_' + str(self.label_num)
        self.label_num += 1
        self.compile_cond_expression(end_label, else_label)
        # else clause
        if self._next_token == KW_ELSE:
            self._require_kw(KW_ELSE)
            self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_label(end_label)

    def compile_cond_expression(self, goto_label, end_label):
        self._require_brackets('()', self.compile_expression)
        self.vmwriter.write_arithmetic('not')
        self.vmwriter.write_if(end_label)
        self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_goto(goto_label)  # meet
        self.vmwriter.write_label(end_label)

    @record_non_terminal('returnStatement')
    def compile_return(self):
        # 'return' expression? ';'
        if self._sub_kind == KW_CONSTRUCTOR:
            self._require_kw(KW_THIS)  # constructor must return 'this'
            self.vmwriter.write_push('pointer', 0)
        elif self._next_token != ';':
            self.compile_expression()
        else:
            if self._ret_type != KW_VOID:
                self._traceback('expect return ' + self._ret_type)
            self.vmwriter.write_push('constant', 0)
        self._require_sym(';')
        self.vmwriter.write_return()

    ##########################
    # expression compilation #
    ##########################

    unary_ops = {'-': 'neg', '~': 'not'}
    binary_ops = {
        '+': 'add',
        '-': 'sub',
        '*': None,
        '/': None,
        '&': 'and',
        '|': 'or',
        '<': 'lt',
        '>': 'gt',
        '=': 'eq'
    }

    @record_non_terminal('expression')
    def compile_expression(self):
        # term (op term)*
        self.compile_term()
        while self._next_token in self.binary_ops:
            self._advance()
            if self._current_tok_type != T_SYMBOL:
                self._error(expect_types=(T_SYMBOL, ))
            op = self._current_token
            self.compile_term()
            self.compile_binaryop(op)

    def compile_binaryop(self, op):
        if op == '*':
            self.vmwriter.write_call('Math.multiply', 2)
        elif op == '/':
            self.vmwriter.write_call('Math.divide', 2)
        else:
            self.vmwriter.write_arithmetic(self.binary_ops[op])

    kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    @record_non_terminal('term')
    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        if self._next_token == '(':
            self._require_brackets('()', self.compile_expression)
        else:
            self._advance()
            tok = self._current_token
            tok_type = self._current_tok_type
            if tok_type == T_KEYWORD and tok in self.kw_consts:
                self.compile_kw_consts(tok)
            elif tok_type == T_INTEGER:
                self.vmwriter.write_push('constant', tok)
            elif tok_type == T_STRING:
                self.compile_string(tok)
            elif tok_type == T_ID:
                if self._next_token in '(.':
                    self.compile_subroutine_call()
                elif self._next_token == '[':
                    self.check_var_name(tok)
                    self.compile_array_subscript(tok)
                    self.vmwriter.write_pop('pointer', 1)
                    self.vmwriter.write_push('that', 0)
                else:
                    self.check_var_name(tok)
                    self.load_variable(tok)
            elif tok_type == T_SYMBOL and tok in self.unary_ops:
                self.compile_term()
                self.vmwriter.write_arithmetic(self.unary_ops[tok])
            else:
                self._error(expect='term')

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kw_consts(self, kw):
        if kw == KW_THIS:
            self.vmwriter.write_push('pointer', 0)
        elif kw == KW_TRUE:
            self.vmwriter.write_push('constant', 1)
            self.vmwriter.write_arithmetic('neg')
        else:
            self.vmwriter.write_push('constant', 0)

    def compile_string(self, string):
        self.vmwriter.write_push('constant', len(string))
        self.vmwriter.write_call('String.new', 1)
        for char in string:
            self.vmwriter.write_push('constant', ord(char))
            self.vmwriter.write_call('String.appendChar', 2)

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')' |
        # (className | varName) '.' subroutineName '(' expressionList ')'
        ## the first element of structure has already been compiled.
        fn_name, num_args = self.compile_call_name()
        self._require_sym('(')
        num_args = self.compile_expressionlist(num_args)
        self._require_sym(')')
        self.vmwriter.write_call(fn_name, num_args)

    def compile_call_name(self):
        # the fisrt name of subroutine call could be (className or varName) if
        # it is followed by '.', or subroutineName if followed by '('.
        # return name of function call and num_args (1: means pushing this, 0:
        # means don't)
        if self._current_tok_type != T_ID:
            self._error(expect_types=(T_ID, ))
        name = self._current_token
        if self._next_token == '.':
            self._require_sym('.')
            self.compile_subroutine_name()
            sub_name = self._current_token
            if (name in self.symboltable.all_class_types()
                    or name in SymbolTable.builtIn_class
                    or name == self._class_name):
                return '.'.join((name, sub_name)), 0  # className
            else:
                self.check_var_name(name)  # varName with class type
                type = self.symboltable.typeof(name)
                if type in SymbolTable.builtIn_types:
                    return self._error(expect='class instance or class',
                                       get=type)
                self.load_variable(name)
                return '.'.join((type, sub_name)), 1
        elif self._next_token == '(':
            self.vmwriter.write_push('pointer', 0)  # push this to be 1st arg
            return '.'.join((self._class_name, name)), 1  # subroutineName

    @record_non_terminal('expressionList')
    def compile_expressionlist(self, num_args):
        # (expression (',' expression)*)?
        if self._next_token != ')':
            self.compile_expression()
            num_args += 1
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_expression()
            num_args += 1
        return num_args

    def compile_array_subscript(self, var_name):
        # varName '[' expression ']'
        self.check_var_name(var_name, 'Array')
        self._require_brackets(
            '[]', self.compile_expression)  # push expression value
        self.load_variable(var_name)
        self.vmwriter.write_arithmetic('add')  # base + subscript

    def _check_EOF(self):
        if not self.tokenizer.has_more_tokens():
            self._traceback("Unexpected EOF.")

    def _error(self, expect_toks=(), expect_types=(), expect=None, get=None):
        if expect is None:
            exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks))
            exp_types = ('type {0}'.format(token_tags[t])
                         for t in expect_types)
            exp_type = ' or '.join(exp_types)
            if exp_tok and exp_type:
                expect = ' or '.join(exp_tok, exp_type)
            else:
                expect = exp_tok + exp_type
        if get is None:
            get = self._current_token
        me = 'Expect {0} but get "{1}"'.format(expect, get)
        return self._traceback(me)

    def _traceback(self, message):
        if DEBUG:
            print('--------------------------------------------')
            print(self.symboltable)
            print(self.symboltable.all_class_types())
            print('--------------------------------------------')
        file_info = 'file: "{0}"'.format(self.tokenizer.filename)
        line_info = 'line {0}'.format(self.tokenizer.line_count)
        raise CompileError("{0}, {1}: {2}".format(file_info, line_info,
                                                  message))
Esempio n. 4
0
class CompilationEngine:
    _OPEN_PARENTHESIS = "\("
    _CLOSE_PARENTHESIS = "\)"
    _OPEN_BRACKET = "\["
    _CLOSE_BRACKET = "\]"
    _DOT = "\."
    _OPS = "\+|-|\*|\/|&|\||<|>|="

    def __init__(self, in_address):
        self.tokenizer = Tokenizer(in_address)
        self.curr_token = self.tokenizer.get_current_token()
        self.out_address = in_address.replace(".jack", ".xml")
        self.output = ""
        self.indent = 0
        self.compile_class()

    def write_file(self):
        with open(self.out_address, 'w') as f:
            f.write(self.output)

    def write(self, to_write):
        """
        Writes to the output, with indentation.
        :param to_write: The string to write
        """
        self.output += (self.indent * " ") + to_write + "\n"

    def compile_class(self):
        """
        Compiles a complete class.
        """
        def comp_class():
            self.eat("class")
            self.eat(NAME_REG)
            self.eat("{")
            self.compile_class_var_dec()
            self.compile_subroutine()
            self.eat("}")

        self.wrap("class", comp_class)

    def compile_class_var_dec(self):
        """
        Compiles a static or field declaration.
        :return:
        """
        var_type_reg = "static|field"
        if self.peek_token(var_type_reg):
            self.wrap("classVarDec", self.__class_var_dec)
            self.compile_class_var_dec()

    def __class_var_dec(self):
        """
        Compiles a single class var declaration.
        """
        var_type_reg = "static|field"
        # (static|field)
        var_type = self.curr_token.get_token()
        self.eat(var_type_reg)
        # type
        self.__compile_type(False)
        # Compile varName combo until no more ","
        self.__single_var()
        self.eat(";")

    def __single_var(self):
        """
        Compiles a single set of variables separated by commas.
        """
        # varName
        self.eat(NAME_REG)
        if self.peek_token(","):
            self.eat(",")
            self.__single_var()

    def __compile_type(self, for_function):
        """
        Compiles a type for a function or variable, determined by
        a received boolean value.
        :param for_function: True if is type of function, false otherwise.
        :return:
        """
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if for_function:
            type_reg += "|void"
        self.eat(type_reg)

    def compile_subroutine(self):
        """
        Compiles a complete method, function or constructor.
        :return:
        """
        sub_regex = "(constructor|function|method)"

        def subroutine_dec():
            self.eat(sub_regex)
            self.__compile_type(True)
            # subroutine name
            self.__compile_name()
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_parameter_list()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            self.wrap("subroutineBody", subroutine_body)

        def subroutine_body():
            self.eat("{")
            if self.peek_token("var"):
                self.compile_var_dec()
            self.compile_statements()
            self.eat("}")

        if self.peek_token(sub_regex):
            self.wrap("subroutineDec", subroutine_dec)
            # Handle next subroutine if there is one
            self.compile_subroutine()

    def __compile_name(self):

        if self.peek_type() == IDENTIFIER:
            self.eat(NAME_REG)
        else:
            print("ERROR: Identifier Expected")
            exit(-1)

    def compile_parameter_list(self):
        """
        Compiles a possibly empty parameter list, not including the
        enclosing ()
        :return:
        """
        self.wrap("parameterList", self.__params)

    def __params(self):
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if self.peek_token(type_reg):
            self.__compile_type(False)
            self.eat(NAME_REG)
            if self.peek_token(","):
                self.eat(",")
                self.__params()

    def compile_var_dec(self):
        """
        Compiles a var declaration.
        :return:
        """
        self.wrap("varDec", self.__comp_var_dec)
        if self.peek_token("var"):
            self.compile_var_dec()

    def __comp_var_dec(self):
        self.eat("var")
        self.__compile_type(False)
        self.__single_var()
        self.eat(";")

    def compile_statements(self):
        """
        Compiles a sequence of statements, not including the enclosing {}
        :return:
        """
        def statement():
            """
            Determines the type of statement and compiles it. Calls itself
            afterwards to check for more statements.
            :return:
            """
            # statement_reg = "let|if|while|do|return"
            # if self.peek_token(statement_reg):
            if self.peek_token("let"):
                self.compile_let()
                statement()
            if self.peek_token("if"):
                self.compile_if()
                statement()
            if self.peek_token("while"):
                self.compile_while()
                statement()
            if self.peek_token("do"):
                self.compile_do()
                statement()
            if self.peek_token("return"):
                self.compile_return()
                statement()

        self.wrap("statements", statement)

    def compile_do(self):
        """
        Compiles a do statement
        :return:
        """
        def do():
            self.eat("do")
            self.__subroutine_call()
            self.eat(";")

        self.wrap("doStatement", do)

    def __comp_do(self):
        self.eat("do")
        self.__subroutine_call()
        self.eat(";")

    def compile_let(self):
        """
        Compiles a let statement
        :return:
        """
        self.wrap("letStatement", self.__comp_let)

    def __comp_let(self):
        self.eat("let")
        self.__compile_name()
        # Determine [expression]
        if self.peek_token(CompilationEngine._OPEN_BRACKET):
            self.eat(CompilationEngine._OPEN_BRACKET)
            self.compile_expression()
            self.eat(CompilationEngine._CLOSE_BRACKET)
        self.eat("=")
        self.compile_expression()
        self.eat(";")

    def compile_while(self):
        """
        Compiles a while statement.
        :return:
        """
        def comp_while():
            self.eat("while")
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_expression()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            self.eat("{")
            self.compile_statements()
            self.eat("}")

        self.wrap("whileStatement", comp_while)

    def compile_return(self):
        """
        Compiles a return statement.
        :return:
        """
        def comp_return():
            self.eat("return")
            # if next is expression:
            if self.is_term():
                self.compile_expression()
            self.eat(";")

        self.wrap("returnStatement", comp_return)

    def compile_if(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        :return:
        """
        def comp_if():
            self.eat("if")
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_expression()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            self.eat("{")
            # self.indent += 1
            self.compile_statements()
            self.eat("}")
            # Handle else:
            if self.peek_token("else"):
                self.eat("else")
                self.eat("{")
                self.compile_statements()
                self.eat("}")

        self.wrap("ifStatement", comp_if)

    def compile_expression(self):
        """
        Compiles an expression.
        :return:
        """
        def comp_expression():
            self.compile_term()
            # Case: term op term
            if self.peek_token(CompilationEngine._OPS):
                self.eat(CompilationEngine._OPS)
                self.compile_term()

        self.wrap("expression", comp_expression)

    def compile_term(self):
        """
        Compiles a term.
        :return:
        """
        def term():
            curr_type = self.peek_type()
            is_const = curr_type == STRING_CONST or \
                       curr_type == INT_CONST or \
                       curr_type == KEYWORD
            # Case: term is integerConstant or stringConstant or
            # keywordConstant
            if is_const:
                self.write(self.tokenizer.get_current_token().get_xml_wrap())
                self.__advance_token()
            # Case: token is a varName or a subroutineName
            elif curr_type == IDENTIFIER:
                # self.write(self.tokenizer.get_current_token().get_xml_wrap())
                # self.tokenizer.advance()
                # Case: varName [ expression ]
                if self.peek_next(CompilationEngine._OPEN_BRACKET):
                    self.__var_name_array()
                # Case: subroutineCall:
                elif self.peek_next(
                        CompilationEngine._OPEN_PARENTHESIS) or self.peek_next(
                            CompilationEngine._DOT):
                    self.__subroutine_call()
                else:
                    self.eat(NAME_REG)
            # Case: ( expression )
            elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS):
                self.eat(CompilationEngine._OPEN_PARENTHESIS)
                self.compile_expression()
                self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            # Case: unaryOp term
            elif self.peek_token("-|~"):
                self.eat("-|~")
                self.compile_term()
            else:
                print("Error: Incorrect Term")
                exit(-1)

        self.wrap("term", term)

    def __var_name_array(self):
        """
        Handles the case of varName[expression]
        :return:
        """
        self.eat(NAME_REG)
        self.eat(CompilationEngine._OPEN_BRACKET)
        self.compile_expression()
        self.eat(CompilationEngine._CLOSE_BRACKET)

    def is_term(self):
        curr_type = self.peek_type()
        return curr_type == STRING_CONST or curr_type == INT_CONST or \
               curr_type == KEYWORD or curr_type == IDENTIFIER or \
               self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \
               self.peek_token(CompilationEngine._OPS)

    def __subroutine_call(self):
        if self.curr_token.get_type() == IDENTIFIER:
            # self.write(self.curr_token.get_xml_wrap())
            # self.__advance_token()
            if self.peek_next(CompilationEngine._OPEN_PARENTHESIS):
                self.__subroutine_name()
            elif self.peek_next(CompilationEngine._DOT):
                self.__object_subroutine_call()
            else:
                print("Error: ( or . expected")
                exit(-1)

    def __object_subroutine_call(self):
        self.eat(NAME_REG)
        self.eat(CompilationEngine._DOT)
        self.__subroutine_name()

    def __subroutine_name(self):
        """
        Handles the case of subroutineName(expressionList)
        :return:
        """
        if self.curr_token.get_type() == IDENTIFIER:
            self.eat(NAME_REG)
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_expression_list()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)

    def compile_expression_list(self):
        """
        Compiles a possibly empty list of comma separated expressions
        :return:
        """
        def exp_list():
            if self.is_term():
                self.compile_expression()
                while self.peek_token(","):
                    self.eat(",")
                    self.compile_expression()

        self.wrap("expressionList", exp_list)

    def wrap(self, section_name, func):
        """
        Wraps a program structure block with the section_name, and executes
        its function
        :param section_name: The name of the section
        :param func: The function to perform
        :return:
        """
        self.write("<{}>".format(section_name))
        self.indent += 2
        func()
        self.indent -= 2
        self.write("</{}>".format(section_name))

    def eat(self, token):
        """
        Handles advancing and writing terminal tokens.
        Will exit the program if an error occurs.
        :param token: The regex of the token to compare
        :return:
        """
        if re.match(token, self.curr_token.get_token()):
            self.write(self.curr_token.get_xml_wrap())
            self.__advance_token()
        else:
            # if self.tokenizer.get_current_token() != token:
            print("Error: Expected " + token)
            exit(-1)

    def peek_token(self, compare_next):
        """
        :param compare_next: The regex to compare.
        :return: True if the current token matches the regex, False otherwise.
        """
        if self.curr_token:
            return re.match(compare_next, self.curr_token.get_token())
        return False

    def peek_type(self):
        """
        :return: the type of the current token
        """
        return self.curr_token.get_type()

    def peek_next(self, comp):
        next_token = self.tokenizer.get_next_token()
        # Case: There actually is a next token
        if next_token:
            return re.match(comp, self.tokenizer.get_next_token().get_token())
        return False

    def __advance_token(self):
        self.tokenizer.advance()
        if self.tokenizer.has_more_tokens():
            self.curr_token = self.tokenizer.get_current_token()
Esempio n. 5
0
class Compiler:
    def __init__(self, inpath, outpath):
        self.tokenizer = Tokenizer(inpath)
        XMLWriter.set_filepath(outpath)
        if self.tokenizer.has_more_tokens():
            self.compile_class()
        XMLWriter.close()

    def _write_current_terminal(self):
        XMLWriter.write_terminal(self._current_token, self._current_tok_tag)

    def _advance(self):
        self._check_EOF()
        self.tokenizer.advance()

    type_kws = (KW_INT, KW_CHAR, KW_BOOLEAN)
    kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    @property
    def _current_token(self):
        t_type = self.tokenizer.token_type
        return (self.tokenizer.keyword    if t_type == T_KEYWORD else
                self.tokenizer.symbol     if t_type == T_SYMBOL  else
                self.tokenizer.identifier if t_type == T_ID      else
                self.tokenizer.intval     if t_type == T_INTEGER else
                self.tokenizer.stringval)

    @property
    def _current_tok_type(self):
        return self.tokenizer.token_type

    @property
    def _current_tok_tag(self):
        return token_tags[self._current_tok_type]

    @property
    def _next_token(self):
        """return raw next_token in the tokenizer"""
        return str(self.tokenizer.next_token)

    def _require_token(self, tok_type, token=None):
        """Check whether the next_token(terminal) in the tokenizer meets the 
        requirement (specific token or just token type). If meets, tokenizer
        advances (update current_token and next_token)  and terminal will be 
        writed into outfile; If not, report an error."""
        self._advance()
        if token and self._current_token != token:
            return self._error(expect_toks=(token,))
        elif self._current_tok_type != tok_type:
            return self._error(expect_types=(tok_type,))
        self._write_current_terminal()

    def _require_id(self):
        return self._require_token(T_ID)

    def _require_kw(self, token):
        return self._require_token(T_KEYWORD, token=token)

    def _require_sym(self, token):
        return self._require_token(T_SYMBOL, token=token)

    def _require_brackets(self, brackets, procedure):
        front, back = brackets
        self._require_sym(front)
        procedure()
        self._require_sym(back)

    def _fol_by_class_vardec(self):
        return self._next_token in (KW_STATIC, KW_FIELD)

    def _fol_by_subroutine(self):
        return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _fol_by_vardec(self):
        return self._next_token == KW_VAR

    #########################
    # structure compilation #
    #########################

    # the compilation of three types of name might seem redundant here, but
    # it was for abstraction and later code generation in project 11.
    def compile_class_name(self):
        self._require_id()

    def compile_subroutine_name(self):
        self._require_id()

    def compile_var_name(self):
        self._require_id()

    def compile_type(self, advanced=False, expect='type'):
        # int, string, boolean or identifier(className)
        if advanced is False:
            self._advance()
        if self._current_token in self.type_kws:
            return self._write_current_terminal()
        elif self._current_tok_type == T_ID:
            return self._write_current_terminal()
        else:
            return self._error(expect=expect)

    def compile_void_or_type(self):
        # void or type
        self._advance()
        if self._current_token == KW_VOID:
            self._write_current_terminal()
        else:
            self.compile_type(True, '"void" or type')

    @record_non_terminal('class')
    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self._require_kw(KW_CLASS)
        self.compile_class_name()
        self._require_sym('{')
        while self._fol_by_class_vardec():
            self.compile_class_vardec()
        while self._fol_by_subroutine():
            self.compile_subroutine()
        self._advance()
        if self._current_token != '}':
            self._traceback("Except classVarDec or subroutineDec.")
        self._write_current_terminal()

    def compile_declare(self):
        self._advance()
        self._write_current_terminal()
        # type varName (',' varName)* ';'
        self.compile_type()
        self.compile_var_name()
        # compile ',' or ';'
        self._advance()
        while self._current_token == ',':
            self._write_current_terminal()
            self.compile_var_name()
            self._advance()
        if self._current_token != ';':
            return self._error((',', ';'))
        self._write_current_terminal()

    @record_non_terminal('classVarDec')
    def compile_class_vardec(self):
        # ('static|field') type varName (',' varName)* ';'
        self.compile_declare()

    @record_non_terminal('subroutineDec')
    def compile_subroutine(self):
        # ('constructor'|'function'|'method')
        # ('void'|type) subroutineName '(' parameterList ')' subroutineBody
        self._advance()
        self._write_current_terminal()      # ('constructor'|'function'|'method')
        self.compile_void_or_type()
        self.compile_subroutine_name()
        self._require_brackets('()', self.compile_parameter_list)
        self.compile_subroutine_body()

    @record_non_terminal('parameterList')
    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        if self._next_token == ')':
            return
        self.compile_type()
        self.compile_var_name()
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_type()
            self.compile_var_name()

    @record_non_terminal('subroutineBody')
    def compile_subroutine_body(self):
        # '{' varDec* statements '}'
        self._require_sym('{')
        while self._fol_by_vardec():
            self.compile_vardec()
        self.compile_statements()
        self._require_sym('}')

    @record_non_terminal('varDec')
    def compile_vardec(self):
        # 'var' type varName (',' varName)* ';'
        self.compile_declare()

    #########################
    # statement compilation #
    #########################

    @record_non_terminal('statements')
    def compile_statements(self):
        # (letStatement | ifStatement | whileStatement | doStatement | 
        # returnStatement)*
        last_statement = None
        while self._next_token != '}':
            self._advance()
            last_statement = self._current_token
            if last_statement == 'do':
                self.compile_do()
            elif last_statement == 'let':
                self.compile_let()
            elif last_statement == 'while':
                self.compile_while()
            elif last_statement == 'return':
                self.compile_return()
            elif last_statement == 'if':
                self.compile_if()
            else:
                return self._error(expect='statement expression')
        if STACK[-2] == 'subroutineBody' and last_statement != 'return':
            self._error(expect='return statement', get=last_statement)

    @record_non_terminal('doStatement')
    def compile_do(self):
        # 'do' subroutineCall ';'
        self._write_current_terminal()
        # compile identifier first
        self._advance()
        self.compile_subroutine_call()
        self._require_sym(';')

    @record_non_terminal('letStatement')
    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self._write_current_terminal()
        self.compile_var_name()
        if self._next_token == '[':
            self._compile_array_subscript()
        self._require_sym('=')
        self.compile_expression()
        self._require_sym(';')

    @record_non_terminal('whileStatement')
    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        self._write_current_terminal()
        self._require_brackets('()', self.compile_expression)
        self._require_brackets('{}', self.compile_statements)

    @record_non_terminal('returnStatement')
    def compile_return(self):
        # 'return' expression? ';'
        self._write_current_terminal()
        if self._next_token != ';':
            self.compile_expression()
        self._require_sym(';')

    @record_non_terminal('ifStatement')
    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        self._write_current_terminal()
        self._require_brackets('()', self.compile_expression)
        self._require_brackets('{}', self.compile_statements)
        # else clause
        if self._next_token == KW_ELSE:
            self._require_kw(KW_ELSE)
            self._require_brackets('{}', self.compile_statements)

    ##########################
    # expression compilation #
    ##########################

    @record_non_terminal('expression')
    def compile_expression(self):
        # term (op term)*
        self.compile_term()
        while is_op(self._next_token):
            self.compile_op()
            self.compile_term()

    @record_non_terminal('term')
    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        if self._next_token == '(':
            self._require_brackets('()', self.compile_expression)
        elif self._next_token in set('-~'):
            self.compile_unaryop()
        else:
            self._advance()
            tok = self._current_token
            tok_type = self._current_tok_type
            if tok in self.kw_consts or tok_type in (T_INTEGER, T_STRING):
                self._write_current_terminal()
            elif tok_type == T_ID:
                if self._next_token in '(.':
                    self.compile_subroutine_call()
                else:
                    self._write_current_terminal()
                    if self._next_token == '[':
                        self._compile_array_subscript()
            else:
                self._error(expect='term')

    def compile_call_name(self):
        # the fisrt name of subroutine call could be (className or varName) if
        # it is followed by '.', or subroutineName if followed by '('.
        if self._current_tok_type != T_ID:
            self._error(expect_types=(T_ID,))
        self._write_current_terminal()
        # just write it without analysis.
        # this method will be extended to decide which kind the name is.

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')' | (className |
        # varName) '.' subroutineName '(' expressionList ')'
        ## the first element of structure has already been compiled.
        self.compile_call_name()
        if self._next_token == '.':
            self._require_sym('.')
            self.compile_subroutine_name()
        self._require_brackets('()', self.compile_expressionlist)

    @record_non_terminal('expressionList')
    def compile_expressionlist(self):
        # (expression (',' expression)*)?
        if self._next_token != ')':
            self.compile_expression()
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_expression()

    def compile_op(self):
        # exclude '~'
        self._advance()
        if self._current_token == '~':
            self._traceback('Unexpected operator: ~')
        self._write_current_terminal()

    def compile_unaryop(self):
        self._advance()
        self._write_current_terminal()      # symbol: - or ~
        self.compile_term()

    def _compile_array_subscript(self):
        # '[' expression ']'
        self._require_brackets('[]', self.compile_expression)

    def _check_EOF(self):
        if not self.tokenizer.has_more_tokens():
            self._traceback("Unexpected EOF.")

    def _error(self, expect_toks=(), expect_types=(), expect=None, get=None):
        if expect is None:
            exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks))
            exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types)
            exp_type = ' or '.join(exp_types)
            if exp_tok and exp_type:
                expect = ' or '.join(expect_tok, expect_type)
            else:
                expect = exp_toks + exp_types
        if get is None:
            get = self._current_token
        me = 'Expect {0} but get "{1}"'.format(expect, get)
        return self._traceback(me)

    def _traceback(self, message):
        file_info = 'file: "{0}"'.format(self.tokenizer.filename)
        line_info = 'line {0}'.format(self.tokenizer.line_count)
        raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))