コード例 #1
0
 def __init__(self, tokens, filepath):
     # compilation engine init
     self.lex = tokens
     self.symbols = SymbolTable()
     self.vm = VMWriter(filepath)
     self.compile_class()
     self.vm.closeout()
コード例 #2
0
    def __init__(self, jack_file):
        self.vm_writer = VMWriter(jack_file)
        self.tokenizer = JackTokenizer(jack_file)
        self.symbol_table = SymbolTable()

        self.if_index = -1
        self.while_index = -1
コード例 #3
0
    def __init__(self, token_stream, out_file, xml_name):
        '''
        creates a new compilation engine with the given input and output.
        The next method called must be compileClass().
        '''
        self.stream = token_stream
        self.writer = VMWriter(out_file)
        self.symbols = SymbolTable()
        self.xml_name = xml_name
        self.root = ET.Element('class')

        self.stream.advance()
        assert self.stream.keyword() == 'class'
コード例 #4
0
    def __init__(self, jack_tokenizer: JackTokenizer, output_path: str):
        super().__init__()
        self.tokenizer = jack_tokenizer
        self.table = SymbolTable()
        self.writer = VMWriter(output_path)
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

        self.class_name = ''
        self.curr_func_name = ''
        self._if_count = 0
        self._while_count = 0

        self.compile_class()
コード例 #5
0
 def compile(self, out_fname: str) -> None:
     tknizer = Tokenizer(self._jack_fname)
     with VMWriter(out_fname) as writer:
         self._writer = writer
         token = self._compile_class(tknizer, tknizer.next_token())
         if token:
             raise CompilationException(
                 f"Expected end of file, found {token}")
コード例 #6
0
    def __init__(self, tokenizer: JackTokenizer, out_path: Path):
        self.tokenizer = tokenizer

        # Create symbol tables
        self.class_level_st = SymbolTable()
        self.subroutine_level_st = SymbolTable()

        # class's name
        self.class_name = None
        self.func_name = None
        self.sub_type = None

        # Open the output file for writing
        self.out_stream = out_path.open('w')

        # Create a new VM writer for writing
        self.vm_writer = VMWriter(out_path.with_suffix(".vm"))

        # For generating labels
        self.label_count = {"if": 0, "while": 0}
コード例 #7
0
    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.out = open(output_file, 'w')
        self.token = None
        self.class_name = None

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
コード例 #8
0
def main(argv):
    """
    Main flow of program dealing with extracting files for reading and initializing files to translate into
    """
    if not check_args(argv):
        return

    #  extracting jack file to be processed
    jack_files_path = argv[1]

    #  creating a .vm file to contain jack files translation to vm language
    if os.path.isdir(jack_files_path):
        for file in os.listdir(jack_files_path):
            if file.endswith(".jack"):
                vm_file_name = "{0}/{1}.vm".format(
                    jack_files_path,
                    os.path.splitext(os.path.basename(file))[0])
                vm_writer = VMWriter(vm_file_name)
                CompilationEngine('{0}/{1}'.format(jack_files_path, file),
                                  vm_writer)
    else:
        vm_file_name = "{0}.vm".format(os.path.splitext(jack_files_path)[0])
        vm_writer = VMWriter(vm_file_name)
        CompilationEngine(jack_files_path, vm_writer)
コード例 #9
0
def main():
    # Input
    if len(sys.argv) != 2:
        raise ValueError('Invalid file name.')
    input_file_path = sys.argv[1]
    input_texts = get_file_text(input_file_path)
    splited_input_file_path = input_file_path.split('/')
    input_file_name = splited_input_file_path[-1]
    # Output
    output_tokenizer_file_name = '{}.xml'.format(input_file_name.split('.')[0])
    output_tokenizer_file_path = '/'.join([*splited_input_file_path[:-1], output_tokenizer_file_name])
    output_vm_file_name = '{}.vm'.format(input_file_name.split('.')[0])
    output_vm_file_path = '/'.join([*splited_input_file_path[:-1], output_vm_file_name])
    # Text Processing
    del_blank_content = lambda value: value != ''
    del_new_line_in_text = lambda value: value.replace('\n', '')
    # 文中の // を削除して先頭と末尾と空白の文字列を削除
    del_comment_in_line = lambda string: re.sub(r'//\s.*', '', string).strip()
    input_texts = list(
        filter(
            del_blank_content, map(
                del_comment_in_line, filter(
                    remove_comments, map(
                        del_new_line_in_text, input_texts
                    )
                )
            )
        )
    )
    update_input_texts = []
    for input_text in input_texts:
        # プログラム中のコメントアウト (/** */) は上のテキスト処理では削除できないのでこの処理を追加
        if remove_comments(input_text):
            update_input_texts.append(input_text)

    print('output_tokenizer_file_name: {}'.format(output_tokenizer_file_name))
    print('output_vm_file_name: {}'.format(output_vm_file_name))
    with VMWriter(output_vm_file_path) as vmw:
        with CompilationEngine(update_input_texts, output_tokenizer_file_path, vmw) as engine:
            engine.compile()
コード例 #10
0
class CompilationEngine:
    """NOTE remember that "is_xxx()" checks on the next token,
    and load the next token to curr_token before starting sub-methods
    using "load_next_token()" and you can use values with it
    """
    def __init__(self, jack_file):
        self.vm_writer = VMWriter(jack_file)
        self.tokenizer = JackTokenizer(jack_file)
        self.symbol_table = SymbolTable()

        self.if_index = -1
        self.while_index = -1

    # 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        #! Beginning of all
        # * save name of the class and move on
        self.load_next_token()  # 'class'
        self.class_name = self.load_next_token()  # className
        self.load_next_token()  # curr_token = '{'

        # while next token == 'static' | 'field',
        while self.is_class_var_dec():  # check next token
            self.compile_class_var_dec()  # classVarDec*
        # while next_token == constructor | function | method
        while self.is_subroutine_dec():
            self.compile_subroutine()  # subroutineDec*
        self.vm_writer.close()

    # ('static' | 'field' ) type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        kind = self.load_next_token()  # curr_token = static | field
        type = self.load_next_token()  # curr_token = type
        name = self.load_next_token()  # curr_token = varName
        self.symbol_table.define(name, type, kind.upper())
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, kind.upper())
        self.load_next_token()  # ';'
        # next_token = 'constructor' | 'function' | 'method'

    # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine(self):
        subroutine_kind = (self.load_next_token()
                           )  # ('constructor' | 'function' | 'method')
        self.load_next_token()  # ('void' | type)
        subroutine_name = self.load_next_token()  # subroutineName

        self.symbol_table.start_subroutine()  # init subroutine table
        if subroutine_kind == "method":
            self.symbol_table.define("instance", self.class_name, "ARG")

        self.load_next_token()  # curr_token '('
        self.compile_parameter_list()  # parameterList
        # next_token == ')' when escaped
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        while self.check_next_token() == "var":
            self.compile_var_dec()  # varDec*
        # NOTE next_token is neither 'var' or ';'
        # NOTE next_token is statements* (zero or more)

        # ANCHOR actual writing
        func_name = f"{self.class_name}.{subroutine_name}"  # Main.main
        num_locals = self.symbol_table.counts["VAR"]  # get 'var' count
        self.vm_writer.write_function(func_name, num_locals)
        if subroutine_kind == "constructor":
            num_fields = self.symbol_table.counts["FIELD"]
            self.vm_writer.write_push("CONST", num_fields)
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop("POINTER", 0)
        elif subroutine_kind == "method":
            self.vm_writer.write_push("ARG", 0)
            self.vm_writer.write_pop("POINTER", 0)

        # NOTE statement starts here
        self.compile_statements()  # statements
        self.load_next_token()  # '}

        # ( (type varName) (',' type varName)*)?

    def compile_parameter_list(self):
        # curr_token == '('
        if self.check_next_token() != ")":
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        while self.check_next_token() != ")":
            self.load_next_token()  # ','
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        # NOTE param compilation finishes when next_token == ')'

        # 'var' type varName (',' varName)* ';'

    def compile_var_dec(self):
        self.load_next_token()  # 'var'
        type = self.load_next_token()  # type
        name = self.load_next_token()  #  # varName
        self.symbol_table.define(name, type, "VAR")
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "VAR")
        self.load_next_token()  # ';'

    # statement*
    # letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def compile_statements(self):
        # if next_token == let | if | while | do | return
        while self.is_statement():
            statement = (self.load_next_token()
                         )  # curr_token == let | if | while | do | return
            if statement == "let":
                self.compile_let()
            elif statement == "if":
                self.compile_if()
            elif statement == "while":
                self.compile_while()
            elif statement == "do":
                self.compile_do()
            elif statement == "return":
                self.compile_return()

        # 'let' varName ('[' expression ']')? '=' expression ';'

    def compile_let(self):
        var_name = self.load_next_token()  # curr_token == varName
        var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
        var_index = self.symbol_table.index_of(var_name)
        # if next_token == "["
        if self.is_array():  # array assignment
            self.load_next_token()  # curr_token == '['
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ']'
            self.vm_writer.write_push(var_kind, var_index)
            self.vm_writer.write_arithmetic("ADD")

            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ';'
            #! POP TEMP and PUSH TEMP location changed
            self.vm_writer.write_pop("TEMP", 0)
            self.vm_writer.write_pop("POINTER", 1)
            self.vm_writer.write_push("TEMP", 0)
            self.vm_writer.write_pop("THAT", 0)
        else:  # regular assignment
            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # ';'
            self.vm_writer.write_pop(var_kind, var_index)

    # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
    def compile_if(self):
        # curr_token == if
        self.if_index += 1
        if_index = self.if_index
        # TODO IF indexes count separately
        self.load_next_token()  # curr_token == '('
        self.compile_expression()  # expression
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        # S = statement, L = label
        self.vm_writer.write_if(f"IF_TRUE{if_index}")  #! if-goto L1
        self.vm_writer.write_goto(f"IF_FALSE{if_index}")  #! goto L2
        self.vm_writer.write_label(f"IF_TRUE{if_index}")  #! label L1
        self.compile_statements()  # statements #! executing S1
        self.vm_writer.write_goto(f"IF_END{if_index}")  #! goto END
        self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_FALSE{if_index}")  #! label L2
        if self.check_next_token() == "else":  # ( 'else' '{' statements '}' )?
            self.load_next_token()  # 'else'
            self.load_next_token()  # '{'
            self.compile_statements()  # statements #! executing S2
            self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_END{if_index}")

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        # curr_token == while
        self.while_index += 1
        while_index = self.while_index
        self.vm_writer.write_label(f"WHILE{while_index}")
        self.load_next_token()  # '('
        self.compile_expression()  # expression
        self.vm_writer.write_arithmetic("NOT")  # eval false condition first
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        self.vm_writer.write_if(f"WHILE_END{while_index}")
        self.compile_statements()  # statements
        self.vm_writer.write_goto(f"WHILE{while_index}")
        self.vm_writer.write_label(f"WHILE_END{while_index}")
        self.load_next_token()  # '}'

        # 'do' subroutineCall ';'

    def compile_do(self):
        # curr_token == do
        self.load_next_token()  #! to sync with compile_term()
        self.compile_subroutine_call()
        self.vm_writer.write_pop("TEMP", 0)
        self.load_next_token()  # ';'

        # 'return' expression? ';'

    def compile_return(self):
        # curr_token == return
        if self.check_next_token() != ";":
            self.compile_expression()
        else:
            self.vm_writer.write_push("CONST", 0)
        self.vm_writer.write_return()
        self.load_next_token()  # ';'

    # term (op term)*
    def compile_expression(self):
        self.compile_term()  # term
        while self.is_op():  # (op term)*
            op: str = self.load_next_token()  # op
            self.compile_term()  # term
            if op in ARITHMETIC.keys():
                self.vm_writer.write_arithmetic(ARITHMETIC[op])
            elif op == "*":
                self.vm_writer.write_call("Math.multiply", 2)
            elif op == "/":
                self.vm_writer.write_call("Math.divide", 2)

    # integerConstant | stringConstant | keywordConstant | varName |
    # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
    def compile_term(self):
        # if next_token == '~' | '-'
        if self.is_unary_op_term():
            unary_op = self.load_next_token()  # curr_token == '~' | '-'
            self.compile_term()  # term (recursive)
            self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op])
        # if next_token == '(' => '(' expression ')'
        elif self.check_next_token() == "(":
            self.load_next_token()  # '('
            self.compile_expression()  # expression
            self.load_next_token()  # ')'
        # if next_token == INTEGER(const)
        elif self.check_next_type() == "INT_CONST":  # integerConstant
            self.vm_writer.write_push("CONST", self.load_next_token())  # )
        # if next_token == STRING(const)
        elif self.check_next_type() == "STRING_CONST":  # stringConstant
            self.compile_string()
        # if next_token == KEYWORD(const)
        elif self.check_next_type() == "KEYWORD":  # keywordConstant
            self.compile_keyword()
        # varName | varName '[' expression ']' | subroutineCall
        else:
            #! (varName | varName for expression | subroutine)'s base
            var_name = self.load_next_token(
            )  # curr_token = varName | subroutineCall
            # (e.g. Screen.setColor | show() )
            #! next_token == '[' | '(' or '.' | just varName
            # varName '[' expression ']'
            if self.is_array():  # if next_token == '['
                self.load_next_token()  # '['
                self.compile_expression()  # expression
                self.load_next_token()  # ']'
                array_kind = self.symbol_table.kind_of(var_name)
                array_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(CONVERT_KIND[array_kind],
                                          array_index)
                self.vm_writer.write_arithmetic("ADD")
                self.vm_writer.write_pop("POINTER", 1)
                self.vm_writer.write_push("THAT", 0)
            # if next_token == "(" | "." => curr_token == subroutineCall

            #! if varName is not found, assume class or function name
            elif self.is_subroutine_call():
                # NOTE curr_token == subroutineName | className | varName
                self.compile_subroutine_call()
            # varName
            else:
                # curr_token == varName
                # FIXME cannot catch subroutine call and pass it to 'else' below
                # TODO error caught on Math.abs() part on Ball.vm
                var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
                var_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(var_kind, var_index)

    # subroutineCall: subroutineName '(' expressionList ')' |
    # ( className | varName) '.' subroutineName '(' expressionList ')'

    # e.g.) (do) game.run()
    # ! in case of 'do' order is different from 'let game = Class.new()'
    def compile_subroutine_call(self):
        # NOTE curr_token == subroutineName | className | varName
        subroutine_caller = self.get_curr_token()
        function_name = subroutine_caller
        # _next_token()  # FIXME now it loads '.' or '('
        # func_name = identifier
        number_args = 0
        #! '.' or '(' 2 cases
        if self.check_next_token() == ".":
            self.load_next_token()  # curr_token == '.'
            subroutine_name = self.load_next_token(
            )  # curr_token == subroutineName
            type = self.symbol_table.type_of(subroutine_caller)
            if type != "NONE":  # it's an instance
                kind = self.symbol_table.kind_of(subroutine_caller)
                index = self.symbol_table.index_of(subroutine_caller)
                self.vm_writer.write_push(CONVERT_KIND[kind], index)
                function_name = f"{type}.{subroutine_name}"
                number_args += 1
            else:  # it's a class
                class_name = subroutine_caller
                function_name = f"{class_name}.{subroutine_name}"
        elif self.check_next_token() == "(":
            subroutine_name = subroutine_caller
            function_name = f"{self.class_name}.{subroutine_name}"
            number_args += 1
            self.vm_writer.write_push("POINTER", 0)
        self.load_next_token()  # '('
        number_args += self.compile_expression_list()  # expressionList
        self.load_next_token()  # ')'
        self.vm_writer.write_call(function_name, number_args)

    # (expression (',' expression)* )?
    def compile_expression_list(self):
        number_args = 0
        if self.check_next_token() != ")":
            number_args += 1
            self.compile_expression()
        while self.check_next_token() != ")":
            number_args += 1
            self.load_next_token()  # curr_token == ','
            self.compile_expression()
        return number_args

    def compile_string(self):
        string = self.load_next_token()  # curr_token == stringConstant
        self.vm_writer.write_push("CONST", len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push("CONST", ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def compile_keyword(self):
        keyword = self.load_next_token()  # curr_token == keywordConstant
        if keyword == "this":
            self.vm_writer.write_push("POINTER", 0)
        else:
            self.vm_writer.write_push("CONST", 0)
            if keyword == "true":
                self.vm_writer.write_arithmetic("NOT")

    def is_subroutine_call(self):
        return self.check_next_token() in [".", "("]

    def is_array(self):
        return self.check_next_token() == "["

    def is_class_var_dec(self):
        return self.check_next_token() in ["static", "field"]

    def is_subroutine_dec(self):
        return self.check_next_token() in ["constructor", "function", "method"]

    def is_statement(self):
        return self.check_next_token() in [
            "let", "if", "while", "do", "return"
        ]

    def is_op(self):
        return self.check_next_token() in [
            "+", "-", "*", "/", "&", "|", "<", ">", "="
        ]

    def is_unary_op_term(self):
        return self.check_next_token() in ["~", "-"]

    def check_next_token(self):
        return self.tokenizer.next_token[1]

    def check_next_type(self):
        return self.tokenizer.next_token[0]

    def get_curr_token(self):
        return self.tokenizer.curr_token[1]

    def load_next_token(self):
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()  # curr_token = next_token
            return self.tokenizer.curr_token[1]
        else:
            return ""
コード例 #11
0
class Engine(object):
    def __init__(self, tokens, filepath):
        # compilation engine init
        self.lex = tokens
        self.symbols = SymbolTable()
        self.vm = VMWriter(filepath)
        self.compile_class()
        self.vm.closeout()

    # Routines to advance the token
    def _require(self, tok, val=None):
        lextok, lexval = self._advance()
        if tok != lextok or tok in (T_KEYWORD, T_SYM) and val != lexval:
            raise Exception(self._require_failed_msg(tok, val))
        else:
            return lexval

    def _require_failed_msg(self, tok, val):
        if val is None:
            val = token_list[tok]
        return 'Expected: {0}, {1} \ntoken is: {2}'.format(tok, val, self.lex.tokens)

    def _advance(self):
        return self.lex.advance()

    def vm_function_name(self):
        return self._cur_class + '.' + self._cur_subroutine

    def vm_push_variable(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        self.vm.write_push(segments[kind], index)

    def vm_pop_variable(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        self.vm.write_pop(segments[kind], index)

    def load_this_ptr(self, kwd):
        if kwd == KW_METHOD:
            self.vm.push_arg(0)
            self.vm.pop_this_ptr()  # set up 'this' pointer to point to new object
        elif kwd == KW_CONSTRUCTOR:
            self.vm.push_const(self.symbols.var_count(SK_FIELD))  # object size
            self.vm.write_call('Memory.alloc', 1)
            self.vm.pop_this_ptr()  # set up 'this' pointer to point to new object

    def write_func_decl(self, kwd):
        self.vm.write_function(self.vm_function_name(), self.symbols.var_count(SK_VAR))
        self.load_this_ptr(kwd)

    def write_string_const_init(self, val):
        self.vm.push_const(len(val))
        self.vm.write_call('String.new', 1)         # String.new(len(str))
        for c in val:
            self.vm.push_const(ord(c))
            self.vm.write_call('String.appendChar', 2)  # String.appendChar(nextchar)

    label_num = 0

    def new_label(self):
        self.label_num += 1
        return 'label' + str(self.label_num)

    # ------------- verify part ----------------

    def _is_token(self, tok, val=None):
        lextok, lexval = self.lex.peek()
        return val == None and lextok == tok or (lextok, lexval) == (tok, val)

    def _is_keyword(self, *keywords):
        lextok, lexval = self.lex.peek()
        return lextok == T_KEYWORD and lexval in keywords

    def _is_sym(self, symbols):
        lextok, lexval = self.lex.peek()
        return lextok == T_SYM and lexval in symbols

    # Variable declarations
    def _is_class_var_dec(self):
        return self._is_keyword(KW_STATIC, KW_FIELD)

    def _is_type(self):
        return self._is_token(T_ID) or self._is_keyword(KW_INT, KW_CHAR, KW_BOOLEAN)

    # Subroutine declarations
    def _is_subroutine(self):
        return self._is_keyword(KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _is_var_dec(self):
        return self._is_keyword(KW_VAR)

    def _is_let(self):
        return self._is_keyword(KW_LET)

    def _is_if(self):
        return self._is_keyword(KW_IF)

    def _is_while(self):
        return self._is_keyword(KW_WHILE)

    def _is_do(self):
        return self._is_keyword(KW_DO)

    def _is_return(self):
        return self._is_keyword(KW_RETURN)

    def _is_statement(self):
        return self._is_let() or self._is_if() or self._is_while() or self._is_do() or self._is_return()

    def _is_const(self):
        return self._is_token(T_NUM) or self._is_token(T_STR) or self._is_keyword_constant()

    def _is_keyword_constant(self):
        return self._is_keyword(KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    def _is_op(self):
        return self._is_sym('+-*/&|<>=')

    def _is_unary_op(self):
        return self._is_sym('-~')

    def _is_var_name(self):
        return self._is_token(T_ID)

    def _is_builtin_type(self, type):
        return type in [KW_INT, KW_CHAR, KW_BOOLEAN, KW_VOID]

    def _is_term(self):
        return self._is_const() or self._is_var_name() or self._is_sym('(') or self._is_unary_op()

    # --------------- compile part -----------------
    # Parser and compile Jack code
    # class: 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        self._require(T_KEYWORD, KW_CLASS)
        self.compile_class_name()
        self._require(T_SYM, '{')
        while self._is_class_var_dec():
            self.compile_class_var_dec()
        while self._is_subroutine():
            self.compile_subroutine()
        self._require(T_SYM, '}')

    # className: identifier
    def compile_class_name(self):
        self._cur_class = self.compile_var_name()  # Class names don't have to go into the symbol table

    # type varName (',' varName)* ';'
    def _compile_dec(self, kind):
        type = self.compile_type()
        name = self.compile_var_name()
        self.symbols.define(name, type, kind)
        while self._is_sym(','):
            self._advance()
            name = self.compile_var_name()
            self.symbols.define(name, type, kind)
        self._require(T_SYM, ';')

    def compile_type(self):
        """
        type: 'int' | 'char' | 'boolean' | className
        """
        if self._is_type():
            return self._advance()[1]
        else:
            raise ValueError(self._require_failed_msg(*self.lex.peek()))

    # classVarDec: {'static'|'field'} type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        tok, kwd = self._advance()  # static | field
        self._compile_dec(kwd_to_kind[kwd])

    # varName: identifier
    def compile_var_name(self):
        return self._require(T_ID)

    # subroutineDec: ('constructor'|'function'|'method') ('void'|type)
    #                subroutineName '(' parameterList ')' subroutineBody
    def compile_subroutine(self):
        tok, kwd = self._advance()
        type = self.compile_void_or_type()
        self.compile_subroutine_name()
        self.symbols.start_subroutine()
        if kwd == KW_METHOD:
            self.symbols.define('this', self._cur_class, SK_ARG)
        self._require(T_SYM, '(')
        self.compile_parameter_list()
        self._require(T_SYM, ')')
        self.compile_subroutine_body(kwd)

    # 'void' | type
    def compile_void_or_type(self):
        if self._is_keyword(KW_VOID):
            return self._advance()[1]
        else:
            return self.compile_type()

    # subroutineName: identifier
    def compile_subroutine_name(self):
        self._cur_subroutine = self.compile_var_name()  # subroutine names don't have to go in the symbol table

    # parameterList: (parameter (',' parameter)*)?
    def compile_parameter_list(self):
        if self._is_type():
            self.compile_parameter()
            while self._is_sym(','):
                self._advance()
                self.compile_parameter()

    # parameter: type varName
    def compile_parameter(self):
        if self._is_type():
            type = self.compile_type()
            name = self.compile_var_name()
            self.symbols.define(name, type, SK_ARG)

    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine_body(self, kwd):
        self._require(T_SYM, '{')
        while self._is_var_dec():
            self.compile_var_dec()
        self.write_func_decl(kwd)
        self.compile_statements()

    # varDec: 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        self._require(T_KEYWORD, KW_VAR)
        return self._compile_dec(SK_VAR)

    # statement: statement*
    def compile_statements(self):
        while self._is_statement():
            self._compile_statement()

    # statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def _compile_statement(self):
        if self._is_let():
            self.compile_let()
        elif self._is_if():
            self.compile_if()
        elif self._is_while():
            self.compile_while()
        elif self._is_do():
            self.compile_do()
        elif self._is_return():
            self.compile_return()

    # letStatement: 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        self._require(T_KEYWORD, KW_LET)
        name = self.compile_var_name()
        subscript = self._is_sym('[')
        if subscript:
            self.compile_base_plus_index(name)  # calculate base+index
        self._require(T_SYM, '=')
        self.compile_expression()  # calculate expression to assign
        self._require(T_SYM, ';')
        if subscript:
            self.pop_array_element()  # *(base+index) = expr
        else:
            self.vm_pop_variable(name)  # pop value directly into variable

    def pop_array_element(self):
        self.vm.pop_temp(TEMP_ARRAY)        # Pop expr value to temp register
        self.vm.pop_that_ptr()              # Pop base+index into 'that' register
        self.vm.push_temp(TEMP_ARRAY)       # Push expr back onto stack
        self.vm.pop_that()                  # Pop value into *(base+index)

    # ('[' expression ']')?
    def compile_base_plus_index(self, name):
        self.vm_push_variable(name)  # push array ptr onto stack
        self._advance()
        self.compile_expression()  # push index onto stack
        self._require(T_SYM, ']')
        self.vm.write_vm_cmd('add')  # base+index - leave on the stack for later

    def compile_expression(self):
        self.compile_term()
        # Doesn't handle normal order of operations - just left to right for now
        while self._is_op():
            op = self._advance()
            self.compile_term()
            self.vm.write_vm_cmd(vm_cmds[op[1]])  # op

    # term: integerConstant | stringConstant | keywordConstant | varName
    #     | varName '[' expression ']' | subroutineCall | '(' expression ')'
    #     | unaryOp term
    def compile_term(self):
        if self._is_const():
            self.compile_const()
        elif self._is_sym('('):
            self._advance()
            self.compile_expression()  # VM code to evaluate expression
            self._require(T_SYM, ')')
        elif self._is_unary_op():
            tok, op = self._advance()
            self.compile_term()
            self.vm.write_vm_cmd(vm_unary_cmds[op])  # op
        elif self._is_var_name():
            tok, name = self._advance()
            if self._is_sym('['):
                self.compile_array_subscript(name)  # VM code for array subscript
            elif self._is_sym('(.'):
                self.compile_subroutine_call(name)  # VM code for subroutine call
            else:
                self.vm_push_variable(name)  # push variable on stack

    # integerConstant | stringConstant | keywordConstant
    def compile_const(self):
        tok, val = self._advance()
        if tok == T_NUM:
            self.vm.push_const(val)                 # push constant val
        elif tok == T_STR:
            self.write_string_const_init(val)       # initialize string & push str addr
        elif tok == T_KEYWORD:
            self.compile_kwd_const(val)             # push TRUE, FALSE, NULL etc.

    # '[' expression ']'
    def compile_array_subscript(self, name):
        self.vm_push_variable(name)     # push array ptr onto stack
        self._require(T_SYM, '[')
        self.compile_expression()       # push index onto stack
        self._require(T_SYM, ']')
        self.vm.write_vm_cmd('add')     # base+index
        self.vm.pop_that_ptr()          # pop into 'that' ptr
        self.vm.push_that()             # push *(base+index) onto stack

    # subroutineCall: subroutineName '(' expressionList ')'
    #               | (className | varName) '.' subroutineName '(' expressionList ')'
    def compile_subroutine_call(self, name):
        (type, kind, index) = self.symbols.lookup(name)
        if self._is_sym('.'):
            num_args, name = self.compile_dotted_subroutine_call(name, type)
        else:
            num_args = 1
            self.vm.push_this_ptr()
            name = self._cur_class+'.'+name
        self._require(T_SYM, '(')
        num_args += self.compile_expr_list() # VM code to push arguments
        self._require(T_SYM, ')')
        self.vm.write_call(name, num_args)  # call name num_args

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kwd_const(self, kwd):
        if kwd == KW_THIS:
            self.vm.push_this_ptr()
        elif kwd == KW_TRUE:
            self.vm.push_const(1)
            self.vm.write_vm_cmd('neg')
        else:   # KW_FALSE or KW_NULL
            self.vm.push_const(0)

    def compile_dotted_subroutine_call(self, name, type):
        num_args = 0
        obj_name = name
        self._advance()
        name = self.compile_var_name()
        if self._is_builtin_type(type):     # e.g. int.func(123) not allowed
            ValueError('Cannot use "." operator on builtin type')
        elif type == None:                  # Calling using class name
            name = obj_name+'.'+name
        else:                               # Calling using object variable name
            num_args = 1
            self.vm_push_variable(obj_name) # push object ptr onto stack
            name = self.symbols.type_of(obj_name)+'.'+name
        return num_args, name

    # expressionList: (expression (',' expression)*)?
    def compile_expr_list(self):
        num_args = 0
        if self._is_term():
            self.compile_expression()
            num_args = 1
            while self._is_sym(','):
                self._advance()
                self.compile_expression()
                num_args += 1
        return num_args

    # ifStatement: 'if' '(' expression ')' '{' statements '}'
    #              ('else' '{' statements '}')?
    def compile_if(self):
        self._require(T_KEYWORD, KW_IF)
        end_label = self.new_label()
        self._compile_cond_expression_statements(end_label) # VM code for condition and if statements
        if self._is_keyword(KW_ELSE):
            self._advance()
            self._require(T_SYM, '{')
            self.compile_statements()   # VM code for else statements
            self._require(T_SYM, "}")
        self.vm.write_label(end_label)  # label end_label

    # '(' expression ')' '{' statements '}'
    def _compile_cond_expression_statements(self, label):
        self._require(T_SYM, '(')
        self.compile_expression()
        self._require(T_SYM, ')')
        self.vm.write_vm_cmd('not')     # ~(cond)
        notif_label = self.new_label()
        self.vm.write_if(notif_label)   # if-goto notif_label
        self._require(T_SYM, '{')
        self.compile_statements()       # VM code for if statements
        self._require(T_SYM, '}')
        self.vm.write_goto(label)       # goto label
        self.vm.write_label(notif_label)# label notif_label

    # whileStatement: 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        self._require(T_KEYWORD, KW_WHILE)
        top_label = self.new_label()
        self.vm.write_label(top_label)                      # label top_label
        self._compile_cond_expression_statements(top_label) # VM code for condition and while statements

    # do_statement: 'do' subroutineCall ';'
    def compile_do(self):
        self._require(T_KEYWORD, KW_DO)
        name = self._require(T_ID)
        self.compile_subroutine_call(name)  # VM code for subroutine call
        self.vm.pop_temp(TEMP_RETURN)       # Pop return value and discard
        self._require(T_SYM, ';')

    # returnStatement: 'return' expression? ';'
    def compile_return(self):
        self._require(T_KEYWORD, KW_RETURN)
        if not self._is_sym(';'):
            self.compile_expression()   # VM code for return expression if any
        else:
            self.vm.push_const(0)       # push 0 if not returning a value
        self._require(T_SYM, ';')
        self.vm.write_return()          # return
コード例 #12
0
class CompilationEngine:
    """Generates the compiler's output"""
    CLASS_VAR_DEC_TOKENS = ["static", "field"]
    SUBROUTINE_TOKENS = ["function", "method", "constructor"]
    VARIABLE_TYPES = ['int', 'char', 'boolean']
    STATEMENT_TOKENS = ['do', 'let', 'while', 'return', 'if']
    OP = {'+': 'ADD', '-': 'SUB', '&': 'AND', '|': 'OR', '<': 'LT', '>': 'GT', '=': 'EQ', '*': 'Math.multiply',
          '/': 'Math.divide'}

    def __init__(self, jack_tokenizer: JackTokenizer, output_path: str):
        super().__init__()
        self.tokenizer = jack_tokenizer
        self.table = SymbolTable()
        self.writer = VMWriter(output_path)
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

        self.class_name = ''
        self.curr_func_name = ''
        self._if_count = 0
        self._while_count = 0

        self.compile_class()

    def compile_class(self) -> None:
        """
        Compiles a complete class
        :return: None
        """
        self._consume('class')
        if self.tokenizer.token_type() != TokenTypes.IDENTIFIER:
            raise CompilationEngineError(f"{self._get_current_token()} is an invalid token at this point. Expected a "
                                         f"class name.")

        self.class_name = self._get_current_token()
        self._consume(TokenTypes.IDENTIFIER)
        self._consume('{')

        while self._get_current_token() != '}':
            if self._get_current_token() in CompilationEngine.CLASS_VAR_DEC_TOKENS:
                self.compile_class_var_dec()
            elif self._get_current_token() in CompilationEngine.SUBROUTINE_TOKENS:
                self.compile_subroutine_dec()
            else:
                raise CompilationEngineError(f"{self._get_current_token()} is an expected token at this point")

        self._consume('}')

    def compile_class_var_dec(self) -> None:
        """
        Compiles static variable declaration, or a field declaration
        :return: None.
        """
        kind = str_to_kind(self._get_current_token())
        self._consume(self.CLASS_VAR_DEC_TOKENS)
        var_type = self._get_current_token()
        self._consume_type()

        self.table.define(self._get_current_token(), var_type, kind)
        self._consume(TokenTypes.IDENTIFIER)

        while self._get_current_token() != ';':
            self._consume(',')
            self.table.define(self._get_current_token(), var_type, kind)
            self._consume(TokenTypes.IDENTIFIER)

        self._consume(';')

    def compile_subroutine_dec(self) -> None:
        """
        Compiles a complete method, function or constructor.
        :return: None
        """
        self.table.reset()
        subroutine_type = self._get_current_token()
        if subroutine_type == 'method':
            self.table.define('this', self.class_name, Kind.ARG)  # Put this as the first arg in case it's a
            # class method
        self._consume(self.SUBROUTINE_TOKENS)
        try:
            self._consume_type()
        except CompilationEngineError:
            self._consume('void')

        self.curr_func_name = f'{self.class_name}.{self._get_current_token()}'
        self._consume(TokenTypes.IDENTIFIER)
        self._consume('(')
        self.compile_parameter_list()
        self._consume(')')
        self.compile_subroutine_body(subroutine_type)

    def compile_parameter_list(self) -> None:
        """
        Compiles a (possibly empty) parameter list. Doesn't handle the enclosing "()".
        :return:
        """
        if self._get_current_token() != ')':
            var_type = self._get_current_token()
            self._consume_type()

            self.table.define(self._get_current_token(), var_type, Kind.ARG)
            self._consume(TokenTypes.IDENTIFIER)
            while self._get_current_token() != ')':
                self._consume(',')
                var_type = self._get_current_token()
                self._consume_type()

                self.table.define(self._get_current_token(), var_type, Kind.ARG)
                self._consume(TokenTypes.IDENTIFIER)

    def compile_subroutine_body(self, subroutine_type: str) -> None:
        """
        Compiles a subroutine's body.
        :return: None
        """
        self._consume('{')
        while self._get_current_token() == 'var':
            self.compile_var_dec()
        var_count = self.table.var_count(Kind.VAR)
        self.writer.write_function(self.curr_func_name, var_count)

        if subroutine_type == 'constructor':
            n_fields = self.table.var_count(Kind.FIELD)
            self.writer.write_push('CONST', n_fields)
            self.writer.write_call('Memory.alloc', 1)
            self.writer.write_pop('POINTER', 0)

        elif subroutine_type == 'method':
            self.writer.write_push('ARG', 0)
            self.writer.write_pop('POINTER', 0)

        while self._get_current_token() != '}':
            self.compile_statements()

        self._consume('}')

    def compile_var_dec(self) -> None:
        """
        Compiles a var declaration.
        :return: None.
        """
        self._consume('var')
        var_type = self._get_current_token()
        self._consume_type()
        self.table.define(self._get_current_token(), var_type, Kind.VAR)
        self._consume(TokenTypes.IDENTIFIER)

        while self._get_current_token() != ';':
            self._consume(',')
            self.table.define(self._get_current_token(), var_type, Kind.VAR)
            self._consume(TokenTypes.IDENTIFIER)

        self._consume(';')

    def compile_statements(self) -> None:
        """
        Compiles a sequence of statements. Doesn't handle the enclosing "{}".
        :return: None.
        """
        while self._get_current_token() != '}':
            if self._get_current_token() in self.STATEMENT_TOKENS:
                getattr(self, 'compile_' + self._get_current_token())()
            else:
                raise CompilationEngineError(f"{self._get_current_token()} is an expected token at this point")

    def compile_do(self) -> None:
        """
        Compiles a do statement.
        :return: None.
        """
        self._consume('do')
        self.compile_subroutine_call()
        self.writer.write_pop('TEMP', 0)  # void method
        self._consume(';')

    def compile_let(self) -> None:
        """
        Compiles a let statement.
        :return: None.
        """
        self._consume('let')
        name = self._get_current_token()
        kind = convert_kind(self.table.kind_of(name))
        index = self.table.index_of(name)

        self._consume(TokenTypes.IDENTIFIER)
        if self._get_current_token() == '[':
            self._consume('[')
            self.compile_expression()
            self._consume(']')

            self.writer.write_push(kind, index)
            self.writer.write_arithmetic('ADD')
            self.writer.write_pop('TEMP', 0)

            self._consume('=')
            self.compile_expression()
            self.writer.write_push('TEMP', 0)
            self.writer.write_pop('POINTER', 1)
            self.writer.write_pop('THAT', 0)

        else:
            self._consume('=')
            self.compile_expression()
            self.writer.write_pop(kind, index)

        self._consume(';')

    def compile_while(self) -> None:
        """
        Compiles a while statement.
        :return: None.
        """
        self._consume('while')
        self._consume('(')

        while_lbl = f"WHILE_{self._while_count}"
        while_false_lbl = f"WHILE_FALSE{self._while_count}"
        self._while_count += 1
        self.writer.write_label(while_lbl)

        self.compile_expression()
        self._consume(')')

        self._consume('{')
        self.writer.write_if(while_false_lbl)

        self.compile_statements()
        self.writer.write_goto(while_lbl)
        self.writer.write_label(while_false_lbl)

        self._consume('}')

    def compile_return(self) -> None:
        """
        Compiles a return statement.
        :return: None.
        """
        self._consume('return')
        if self._get_current_token() != ';':
            self.compile_expression()
        else:
            self.writer.write_push('CONST', 0)
        self.writer.write_return()
        self._consume(';')

    def compile_if(self) -> None:
        """
        Compiles an if statement, possibly with a trailing else clause.
        :return: None.
        """
        self._consume('if')
        self._consume('(')
        self.compile_expression()
        self._consume(')')

        end_lbl = f'IF_END_{self._if_count}'
        false_lbl = f'IF_FALSE_{self._if_count}'
        self._if_count += 1

        self._consume('{')
        self.writer.write_if(false_lbl)

        self.compile_statements()
        self.writer.write_goto(end_lbl)
        self.writer.write_label(false_lbl)

        self._consume('}')

        if self._get_current_token() == 'else':
            self._consume('else')
            self._consume('{')
            self.compile_statements()
            self._consume('}')

        self.writer.write_label(end_lbl)

    def compile_expression(self) -> None:
        """
        Compiles an expression.
        :return: None
        """
        self.compile_term()
        while self._get_current_token() in self.OP:
            op = self._get_current_token()
            self._consume(op)
            self.compile_term()
            if op == '*':
                self.writer.write_call('Math.multiply', 2)
            elif op == '/':
                self.writer.write_call('Math.divide', 2)
            else:
                self.writer.write_arithmetic(self.OP[op])

    def compile_term(self) -> None:
        """
        Compiles a term. If the current token is an identifier, the routine must distinguish between a variable,
        an array entry, or a subroutine call.
        :return: None.
        """
        token_type = self.tokenizer.token_type()

        if token_type == TokenTypes.IDENTIFIER:
            curr_token = self._get_current_token()
            self.tokenizer.advance()
            if self._get_current_token() in ('(', '.'):
                self.compile_subroutine_call(curr_token)
            elif self._get_current_token() == '[':
                self._consume('[')
                self.compile_expression()
                self._consume(']')

                kind = convert_kind(self.table.kind_of(curr_token))
                index = self.table.index_of(curr_token)

                self.writer.write_push(kind, index)
                self.writer.write_arithmetic('ADD')
                self.writer.write_pop('POINTER', 1)
                self.writer.write_push('THAT', 0)

            else:
                kind = convert_kind(self.table.kind_of(curr_token))
                index = self.table.index_of(curr_token)
                self.writer.write_push(kind, index)

        elif token_type == token_type.INT_CONST:
            self.writer.write_push('CONST', int(self._get_current_token()))
            self._consume(token_type)

        elif token_type == token_type.KEYWORD:
            curr_token = self._get_current_token()
            if curr_token in ['true', 'false', 'null']:
                self.writer.write_push('CONST', 0)
                if curr_token == 'true':
                    self.writer.write_arithmetic('NOT')
            if curr_token == 'this':
                self.writer.write_push('POINTER', 0)
            self._consume(token_type)

        elif token_type == token_type.STRING_CONST:
            const_str = ''
            first = True
            while const_str.count('"') < 2:
                if first:
                    const_str += self._get_current_token()
                    first = False
                else:
                    const_str += ' ' + self._get_current_token()
                if self.tokenizer.has_more_tokens():
                    self.tokenizer.advance()
            const_str = const_str.replace('"', '')

            self.writer.write_push('CONST', len(const_str))
            self.writer.write_call('String.new', 1)

            for char in const_str:
                self.writer.write_push('CONST', ord(char))
                self.writer.write_call('String.appendChar', 2)

        else:
            if self._get_current_token() == '(':
                self._consume('(')
                self.compile_expression()
                self._consume(')')
            else:
                op = self._get_current_token()
                self._consume(['-', '~'])  # unaryOp term
                self.compile_term()
                if op == '-':
                    self.writer.write_arithmetic('NEG')
                else:
                    self.writer.write_arithmetic('NOT')

    def compile_subroutine_call(self, subroutine_name=None) -> None:
        n_args = 0
        if not subroutine_name:
            subroutine_name = self._get_current_token()
            self._consume(TokenTypes.IDENTIFIER)

        if self._get_current_token() == '.':
            self._consume('.')
            sub_name = self._get_current_token()
            self._consume(TokenTypes.IDENTIFIER)
            try:  # Instance
                var_type = self.table.type_of(subroutine_name)
                kind = convert_kind(self.table.kind_of(subroutine_name))
                index = self.table.index_of(subroutine_name)
                self.writer.write_push(kind, index)
                func_name = f'{var_type}.{sub_name}'
            except KeyError:  # Class
                func_name = f'{subroutine_name}.{sub_name}'

        else:
            func_name = f'{self.class_name}.{subroutine_name}'
            n_args += 1
            self.writer.write_pop('POINTER', 0)

        self._consume('(')
        n_args += self.compile_expression_list()
        self._consume(')')

        self.writer.write_call(func_name, n_args)

    def compile_expression_list(self) -> int:
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        :return: Int. Number of arguments.
        """
        n_args = 0

        if self._get_current_token() != ')':
            self.compile_expression()
            n_args += 1
            while self._get_current_token() == ',':
                self._consume(',')
                self.compile_expression()
                n_args += 1
        return n_args

    @singledispatchmethod
    def _consume(self, expected) -> None:
        """
        Check if the current token matches what it's expected to be. Either by value or by type.
        In case of a match, the function will advance to the next token.
        Otherwise the function will raise CompilationEngineError.
        :return: None
        """
        raise TypeError("Unsupported type: ", type(expected))

    @_consume.register(str)
    @_consume.register(list)
    def _(self, expected_tokens) -> None:
        """Consume by value"""
        if not isinstance(expected_tokens, list):
            expected_tokens = [expected_tokens]

        curr_token = self._get_current_token()
        if curr_token not in expected_tokens:
            raise CompilationEngineError(f"Expected {expected_tokens} but current token is {curr_token}. "
                                         f"Compilation failed.")
        else:
            if self.tokenizer.has_more_tokens():
                self.tokenizer.advance()

    @_consume.register
    def _(self, expected_types: TokenTypes):
        """Consume by type"""
        if not isinstance(expected_types, list):
            expected_types = [expected_types]
        curr_type = self.tokenizer.token_type()
        if curr_type not in expected_types:
            raise CompilationEngineError(f"Expected {expected_types} but current token type is {curr_type}. "
                                         f"Compilation failed.")
        else:
            if self.tokenizer.has_more_tokens():
                self.tokenizer.advance()

    def _consume_type(self):
        """
        Int / char / boolean / class name
        :return: None.
        """
        try:
            self._consume(self.VARIABLE_TYPES)
        except CompilationEngineError:
            self._consume(TokenTypes.IDENTIFIER)  # Class name

    def _get_current_token(self) -> str:
        token_type = self.tokenizer.token_type()
        if token_type is TokenTypes.INT_CONST:
            curr_token = str(self.tokenizer.int_val())
        elif token_type is TokenTypes.KEYWORD:
            curr_token = self.tokenizer.key_word()
        else:
            curr_token = self.tokenizer.current_token

        return curr_token
コード例 #13
0
ファイル: compilation.py プロジェクト: dankor91/kj-nand
	def __init__(self, source, destination):
		self.src = source
		self.dst = destination
		self.writer = VMWriter(destination)
		self.iter = Lookahead(tokenizor.newTokenizor(self.src))
		self._symbol_table = SymbolTable()
コード例 #14
0
ファイル: compilation.py プロジェクト: dankor91/kj-nand
class CompilationEngine:
	def __init__(self, source, destination):
		self.src = source
		self.dst = destination
		self.writer = VMWriter(destination)
		self.iter = Lookahead(tokenizor.newTokenizor(self.src))
		self._symbol_table = SymbolTable()

	def compile(self):
		root = self._compileClass()
		return root

	def _compileClass(self):
		classE = Element(ELEMENTS.CLASS)
		self._readKeyword(classE, ELEMENTS.CLASS)
		self.className = self._readIdentifier(classE)
		self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN)
		self._compileClassVarDec(classE)
		self._compileSubroutine(classE)
		self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE)
		return classE

	def _compileClassVarDec(self, parent):
		while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES:
			classVarDecE = Element(ELEMENTS.CLASSVARDEC)
			self._readKeyword(classVarDecE)
			self._readType(classVarDecE)
			self._readIdentifier(classVarDecE)
			while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA):
				self._readIdentifier(classVarDecE)
			self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON)
			parent.append(classVarDecE)

	def _compileSubroutine(self, parent):
		while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES:
			subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC)
			function_type = self._readKeyword(subroutineDecE)
			self._readReturnType(subroutineDecE)
			self.methodName = self._readIdentifier(subroutineDecE)
			self._symbol_table.startSubroutine(self.className, self.methodName)
			if function_type == _SUBROUTINEDEC.METHOD:
				self._symbol_table.define("this", self.className, SYM_KINDS.ARG)
			self._uid = -1
			self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN)
			self._compileParameters(subroutineDecE)
			self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE)
			self._compileSubroutineBody(subroutineDecE, function_type)
			parent.append(subroutineDecE)

	def _gen_label(self, type_):
		self._uid += 1
		return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid)

	def _gen_labels(self, *parts):
		self._uid += 1
		return ["%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts]

	def _compileSubroutineBody(self, parent, function_type):
		bodyE = Element(ELEMENTS.SUBROUTINEBODY)
		self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN)
		nArgs = self._compileVarDec(bodyE)
		function_name = parent[2].text
		function_full_name = "%s.%s" % (self.className, function_name)
		self.writer.writeFunction(function_full_name, nArgs)
		if function_type == _SUBROUTINEDEC.CONSTRUCTOR:
			field_count = self._symbol_table.varCount(SYM_KINDS.FIELD)
			self.writer.writePush(SEGMENT.CONST, field_count)
			self.writer.writeCall("Memory.alloc", 1)
			self.writer.writePop(SEGMENT.POINTER, 0)
		elif function_type == _SUBROUTINEDEC.METHOD:
			self.writer.writePush(SEGMENT.ARG, 0)
			self.writer.writePop(SEGMENT.POINTER, 0)
		self._compileStatements(bodyE)
		self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE)
		parent.append(bodyE)

	def _compileStatements(self, parent):
		statementsE = Element(ELEMENTS.STATEMENTS)
		while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES:
			if self.nextTok.value == _STATEMENTS.LET:
				statementE = Element(ELEMENTS.STATEMENT_LET)
				self._readKeyword(statementE)
				identifier = self._readIdentifier(statementE)
				is_array = False
				if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN):
					is_array = True
					self._compileExpression(statementE)
					self.writer.writePush(*self._identifier_data(identifier))
					self.writer.writeArithmetic("add")
					self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE)
				self._readSymbol(statementE, _SYMBOLS.EQUAL)
				self._compileExpression(statementE)
				self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
				if is_array:
					self.writer.writePop(SEGMENT.TEMP, 0)
					self.writer.writePop(SEGMENT.POINTER, 1)
					self.writer.writePush(SEGMENT.TEMP, 0)
					self.writer.writePop(SEGMENT.THAT, 0)
				else:
					self.writer.writePop(*self._identifier_data(identifier))
				statementsE.append(statementE)
			elif self.nextTok.value == _STATEMENTS.IF:
				label_else, label_end = self._gen_labels("if.else", "if.end")
				statementE = Element(ELEMENTS.STATEMENT_IF)
				self._readKeyword(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
				self._compileExpression(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
				self.writer.writeArithmetic("not")
				self.writer.writeIf(label_else)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
				self._compileStatements(statementE)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
				self.writer.writeGoto(label_end)
				self.writer.writeLabel(label_else)
				if self._readKeywordOptional(statementE, _KEYWORDS.ELSE):
					self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
					self._compileStatements(statementE)
					self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
				self.writer.writeLabel(label_end)
				statementsE.append(statementE)
			elif self.nextTok.value == _STATEMENTS.WHILE:
				label_start, label_end = self._gen_labels("while.start", "while.end")
				self.writer.writeLabel(label_start)
				statementE = Element(ELEMENTS.STATEMENT_WHILE)
				self._readKeyword(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
				self._compileExpression(statementE)
				self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
				self.writer.writeArithmetic("not")
				self.writer.writeIf(label_end)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
				self._compileStatements(statementE)
				self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
				statementsE.append(statementE)
				self.writer.writeGoto(label_start)
				self.writer.writeLabel(label_end)
			elif self.nextTok.value == _STATEMENTS.DO:
				self._compileDo(statementsE)
			elif self.nextTok.value == _STATEMENTS.RETURN:
				statementE = Element(ELEMENTS.STATEMENT_RETURN)
				self._readKeyword(statementE)
				if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON):
					self._compileExpression(statementE)
				else:
					self.writer.writePush(SEGMENT.CONST, 0)
				self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
				self.writer.writeReturn()
				statementsE.append(statementE)
		if len(statementsE) == 0:
			statementsE.text = "\n"
		parent.append(statementsE)

	def _compileExpression(self, parent):
		expressionE = Element(ELEMENTS.EXPRESSION)
		self._readTerm(expressionE)
		while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS:
			symbol = self._readSymbol(expressionE)
			self._readTerm(expressionE)
			self.writer.writeArithmetic(symbol)
		parent.append(expressionE)

	def _compileExpressionList(self, parent):
		self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN)
		expListE = Element(ELEMENTS.EXPRESSION_LIST)
		nArgs = 0
		while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE):
			self._compileExpression(expListE)
			self._readSymbolOptional(expListE, _SYMBOLS.COMMA)
			nArgs += 1
		# hack for TextComparer
		if len(expListE) == 0:
			expListE.text = "\n"
		parent.append(expListE)
		self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE)
		return nArgs

	def _compileDo(self, parent):
		statementE = Element(ELEMENTS.STATEMENT_DO)
		self._readKeyword(statementE, _STATEMENTS.DO)
		identifier = self._readIdentifier(statementE)
		nArgs = 0
		if self._readSymbolOptional(statementE, _SYMBOLS.DOT):
			type_ = self._symbol_table.typeOf(identifier)
			if type_:
				segment, index = self._identifier_data(identifier)
				self.writer.writePush(segment, index)
				nArgs += 1
				identifier = "%s.%s" % (type_, self._readIdentifier(statementE))
			else:
				identifier = "%s.%s" % (identifier, self._readIdentifier(statementE))
		else:
			identifier = "%s.%s" % (self.className, identifier)
			self.writer.writePush(SEGMENT.POINTER, 0)
			nArgs += 1
		nArgs += self._compileExpressionList(statementE)
		self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
		self.writer.writeCall(identifier, nArgs)
		self.writer.writePop(SEGMENT.TEMP, 0)
		parent.append(statementE)

	def _compileVarDec(self, parent):
		nArgs = 0
		while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR:
			varDecE = Element(ELEMENTS.VAR_DEC)
			self._readKeyword(varDecE, _KEYWORDS.VAR)
			self._readType(varDecE)
			self._readIdentifier(varDecE)
			nArgs += 1
			while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA):
				self._readIdentifier(varDecE)
				nArgs += 1
			self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON)
			parent.append(varDecE)
		return nArgs

	def _compileParameters(self, parent):
		paramListE = Element(ELEMENTS.PARAM_LIST)
		while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES) or self.nextTok.type == tokenizor.IDENTIFIER:
			self._readType(paramListE)
			self._readIdentifier(paramListE)
			self._readSymbolOptional(paramListE, _SYMBOLS.COMMA)
		if len(paramListE) == 0:
			paramListE.text = "\n"
		parent.append(paramListE)

##############################
########## READ ##############
##############################

	def _readTerm(self, parent):
		termE = Element(ELEMENTS.TERM)
		if self.nextTok.type == tokenizor.INTEGER:
			self.next()
			termE.append(_leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value))
			self.writer.writePush(SEGMENT.CONST, self.tok.value)
		elif self.nextTok.type == tokenizor.STRING:
			self.next()
			termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value))
			string_value = self.tok.value
			self.writer.writePush(SEGMENT.CONST, len(string_value))
			self.writer.writeCall("String.new", 1)
			for char in string_value:
				self.writer.writePush(SEGMENT.CONST, ord(char))
				self.writer.writeCall("String.appendChar", 2)
		elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS:
			self.next()
			termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
			_KW_CONT_WRITE[self.tok.value](self.writer)
		elif self.nextTok.type == tokenizor.IDENTIFIER:
			identifier = self._readIdentifier(termE)
			nArgs = 0
			if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN):
				self._compileExpression(termE)
				self.writer.writePush(*self._identifier_data(identifier))
				self.writer.writeArithmetic("add")
				self.writer.writePop(SEGMENT.POINTER, 1)
				self.writer.writePush(SEGMENT.THAT, 0)
				self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE)
			elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
				nArgs = self._compileExpressionList(termE)
				self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
				self.writer.writeCall(identifier, nArgs)
			elif self._readSymbolOptional(termE, _SYMBOLS.DOT):
				type_ = self._symbol_table.typeOf(identifier)
				if type_:
					segment, index = self._identifier_data(identifier)
					self.writer.writePush(segment, index)
					nArgs += 1
					identifier = "%s.%s" % (type_, self._readIdentifier(termE))
				else:
					identifier = "%s.%s" % (identifier, self._readIdentifier(termE))
				nArgs += self._compileExpressionList(termE)
				self.writer.writeCall(identifier, nArgs)
			else:
				self.writer.writePush(*self._identifier_data(identifier))
		elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
			self.next()
			termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
			self._compileExpression(termE)
			self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
		elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS:
			self.next()
			sym = self.tok.value
			termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
			self._readTerm(termE)
			self.writer.writeArithmeticUnary(sym)
		else:
			raise self._syntaxError("Unexpected %s." % self.tok.value)
		parent.append(termE)

	def _identifier_data(self, identifier):
		return _SEG_TRANSLATE[self._symbol_table.kindOf(identifier)], self._symbol_table.indexOf(identifier)

	def _readIdentifier(self, parent):
		self.next()
		self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER)
		name = self.tok.value
		element = _leafElement(ELEMENTS.IDENTIFIER, name)
		type_ = self._symbol_table.typeOf(name)
		kind = None
		index = None
		if type_ is None:
			if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1:
				type_ = parent[1].text
				kind = _SYM_KIND_MAP[parent[0].text]
			elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0:
				type_ = parent[-1].text
				kind = SYM_KINDS.ARG
			if kind is not None:
				index = self._symbol_table.define(name, type_, kind)
		else:
			type_ = self._symbol_table.typeOf(name)
			kind = self._symbol_table.kindOf(name)
			index = self._symbol_table.indexOf(name)
		if kind is not None:
			element.set("type", type_)
			element.set("kind", str(kind))
			element.set("index", str(index))
		parent.append(element)
		return name

	def _readType(self, parent):
		if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES:
			self.next()
			parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
		else:
			self._readIdentifier(parent)

	def _readReturnType(self, parent):
		if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES:
			self.next()
			parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
		else:
			self._readIdentifier(parent)

	def _readSymbol(self, parent, expected = None):
		self.next()
		expectedStr = expected if expected is not None else ELEMENTS.SYMBOL
		self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL)
		if expected is not None:
			self._assertToken(self.tok, expected, value_=expected)
		parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
		return self.tok.value

	def _readKeyword(self, parent, expected = None):
		self.next()
		expectedStr = expected if expected is not None else ELEMENTS.KEYWORD
		self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD)
		if expected is not None:
			self._assertToken(self.tok, expected, value_=expected)
		parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
		return self.tok.value

	def _readSymbolOptional(self, parent, expected):
		if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected:
			self.next()
			parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
			return True
		return False

	def _readKeywordOptional(self, parent, expected):
		if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected:
			self.next()
			parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
			return True
		return False

	def next(self):
		self.tok = self.iter.next()
		self.nextTok = self.iter.lookahead()

	def _assertToken(self, tok, expected_str, type_ = None, value_ = None):
		if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_):
			raise self._syntaxError("Expected %s but found %s" % (expected_str, tok.value), tok)

	def _syntaxError(self, msg, tok = None):
		if tok is None:
			tok = self.tok
		return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
コード例 #15
0
ファイル: compilation.py プロジェクト: dankor91/kj-nand
 def __init__(self, source, destination):
     self.src = source
     self.dst = destination
     self.writer = VMWriter(destination)
     self.iter = Lookahead(tokenizor.newTokenizor(self.src))
     self._symbol_table = SymbolTable()
コード例 #16
0
ファイル: compilation.py プロジェクト: dankor91/kj-nand
class CompilationEngine:
    def __init__(self, source, destination):
        self.src = source
        self.dst = destination
        self.writer = VMWriter(destination)
        self.iter = Lookahead(tokenizor.newTokenizor(self.src))
        self._symbol_table = SymbolTable()

    def compile(self):
        root = self._compileClass()
        return root

    def _compileClass(self):
        classE = Element(ELEMENTS.CLASS)
        self._readKeyword(classE, ELEMENTS.CLASS)
        self.className = self._readIdentifier(classE)
        self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN)
        self._compileClassVarDec(classE)
        self._compileSubroutine(classE)
        self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE)
        return classE

    def _compileClassVarDec(self, parent):
        while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES:
            classVarDecE = Element(ELEMENTS.CLASSVARDEC)
            self._readKeyword(classVarDecE)
            self._readType(classVarDecE)
            self._readIdentifier(classVarDecE)
            while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA):
                self._readIdentifier(classVarDecE)
            self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON)
            parent.append(classVarDecE)

    def _compileSubroutine(self, parent):
        while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES:
            subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC)
            function_type = self._readKeyword(subroutineDecE)
            self._readReturnType(subroutineDecE)
            self.methodName = self._readIdentifier(subroutineDecE)
            self._symbol_table.startSubroutine(self.className, self.methodName)
            if function_type == _SUBROUTINEDEC.METHOD:
                self._symbol_table.define("this", self.className,
                                          SYM_KINDS.ARG)
            self._uid = -1
            self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN)
            self._compileParameters(subroutineDecE)
            self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE)
            self._compileSubroutineBody(subroutineDecE, function_type)
            parent.append(subroutineDecE)

    def _gen_label(self, type_):
        self._uid += 1
        return "%s.%s.%s.%d" % (self.className, self.methodName, type_,
                                self._uid)

    def _gen_labels(self, *parts):
        self._uid += 1
        return [
            "%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid)
            for part in parts
        ]

    def _compileSubroutineBody(self, parent, function_type):
        bodyE = Element(ELEMENTS.SUBROUTINEBODY)
        self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN)
        nArgs = self._compileVarDec(bodyE)
        function_name = parent[2].text
        function_full_name = "%s.%s" % (self.className, function_name)
        self.writer.writeFunction(function_full_name, nArgs)
        if function_type == _SUBROUTINEDEC.CONSTRUCTOR:
            field_count = self._symbol_table.varCount(SYM_KINDS.FIELD)
            self.writer.writePush(SEGMENT.CONST, field_count)
            self.writer.writeCall("Memory.alloc", 1)
            self.writer.writePop(SEGMENT.POINTER, 0)
        elif function_type == _SUBROUTINEDEC.METHOD:
            self.writer.writePush(SEGMENT.ARG, 0)
            self.writer.writePop(SEGMENT.POINTER, 0)
        self._compileStatements(bodyE)
        self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE)
        parent.append(bodyE)

    def _compileStatements(self, parent):
        statementsE = Element(ELEMENTS.STATEMENTS)
        while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES:
            if self.nextTok.value == _STATEMENTS.LET:
                statementE = Element(ELEMENTS.STATEMENT_LET)
                self._readKeyword(statementE)
                identifier = self._readIdentifier(statementE)
                is_array = False
                if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN):
                    is_array = True
                    self._compileExpression(statementE)
                    self.writer.writePush(*self._identifier_data(identifier))
                    self.writer.writeArithmetic("add")
                    self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE)
                self._readSymbol(statementE, _SYMBOLS.EQUAL)
                self._compileExpression(statementE)
                self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
                if is_array:
                    self.writer.writePop(SEGMENT.TEMP, 0)
                    self.writer.writePop(SEGMENT.POINTER, 1)
                    self.writer.writePush(SEGMENT.TEMP, 0)
                    self.writer.writePop(SEGMENT.THAT, 0)
                else:
                    self.writer.writePop(*self._identifier_data(identifier))
                statementsE.append(statementE)
            elif self.nextTok.value == _STATEMENTS.IF:
                label_else, label_end = self._gen_labels("if.else", "if.end")
                statementE = Element(ELEMENTS.STATEMENT_IF)
                self._readKeyword(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
                self._compileExpression(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
                self.writer.writeArithmetic("not")
                self.writer.writeIf(label_else)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
                self._compileStatements(statementE)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
                self.writer.writeGoto(label_end)
                self.writer.writeLabel(label_else)
                if self._readKeywordOptional(statementE, _KEYWORDS.ELSE):
                    self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
                    self._compileStatements(statementE)
                    self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
                self.writer.writeLabel(label_end)
                statementsE.append(statementE)
            elif self.nextTok.value == _STATEMENTS.WHILE:
                label_start, label_end = self._gen_labels(
                    "while.start", "while.end")
                self.writer.writeLabel(label_start)
                statementE = Element(ELEMENTS.STATEMENT_WHILE)
                self._readKeyword(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN)
                self._compileExpression(statementE)
                self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE)
                self.writer.writeArithmetic("not")
                self.writer.writeIf(label_end)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN)
                self._compileStatements(statementE)
                self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE)
                statementsE.append(statementE)
                self.writer.writeGoto(label_start)
                self.writer.writeLabel(label_end)
            elif self.nextTok.value == _STATEMENTS.DO:
                self._compileDo(statementsE)
            elif self.nextTok.value == _STATEMENTS.RETURN:
                statementE = Element(ELEMENTS.STATEMENT_RETURN)
                self._readKeyword(statementE)
                if not (self.nextTok.type == tokenizor.SYMBOL
                        and self.nextTok.value == _SYMBOLS.SEMI_COLON):
                    self._compileExpression(statementE)
                else:
                    self.writer.writePush(SEGMENT.CONST, 0)
                self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
                self.writer.writeReturn()
                statementsE.append(statementE)
        if len(statementsE) == 0:
            statementsE.text = "\n"
        parent.append(statementsE)

    def _compileExpression(self, parent):
        expressionE = Element(ELEMENTS.EXPRESSION)
        self._readTerm(expressionE)
        while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS:
            symbol = self._readSymbol(expressionE)
            self._readTerm(expressionE)
            self.writer.writeArithmetic(symbol)
        parent.append(expressionE)

    def _compileExpressionList(self, parent):
        self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN)
        expListE = Element(ELEMENTS.EXPRESSION_LIST)
        nArgs = 0
        while not (self.nextTok.type == tokenizor.SYMBOL
                   and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE):
            self._compileExpression(expListE)
            self._readSymbolOptional(expListE, _SYMBOLS.COMMA)
            nArgs += 1
        # hack for TextComparer
        if len(expListE) == 0:
            expListE.text = "\n"
        parent.append(expListE)
        self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE)
        return nArgs

    def _compileDo(self, parent):
        statementE = Element(ELEMENTS.STATEMENT_DO)
        self._readKeyword(statementE, _STATEMENTS.DO)
        identifier = self._readIdentifier(statementE)
        nArgs = 0
        if self._readSymbolOptional(statementE, _SYMBOLS.DOT):
            type_ = self._symbol_table.typeOf(identifier)
            if type_:
                segment, index = self._identifier_data(identifier)
                self.writer.writePush(segment, index)
                nArgs += 1
                identifier = "%s.%s" % (type_,
                                        self._readIdentifier(statementE))
            else:
                identifier = "%s.%s" % (identifier,
                                        self._readIdentifier(statementE))
        else:
            identifier = "%s.%s" % (self.className, identifier)
            self.writer.writePush(SEGMENT.POINTER, 0)
            nArgs += 1
        nArgs += self._compileExpressionList(statementE)
        self._readSymbol(statementE, _SYMBOLS.SEMI_COLON)
        self.writer.writeCall(identifier, nArgs)
        self.writer.writePop(SEGMENT.TEMP, 0)
        parent.append(statementE)

    def _compileVarDec(self, parent):
        nArgs = 0
        while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR:
            varDecE = Element(ELEMENTS.VAR_DEC)
            self._readKeyword(varDecE, _KEYWORDS.VAR)
            self._readType(varDecE)
            self._readIdentifier(varDecE)
            nArgs += 1
            while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA):
                self._readIdentifier(varDecE)
                nArgs += 1
            self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON)
            parent.append(varDecE)
        return nArgs

    def _compileParameters(self, parent):
        paramListE = Element(ELEMENTS.PARAM_LIST)
        while (self.nextTok.type == tokenizor.KEYWORD
               and self.nextTok.value in _CLASSVARDEC.VAR_TYPES
               ) or self.nextTok.type == tokenizor.IDENTIFIER:
            self._readType(paramListE)
            self._readIdentifier(paramListE)
            self._readSymbolOptional(paramListE, _SYMBOLS.COMMA)
        if len(paramListE) == 0:
            paramListE.text = "\n"
        parent.append(paramListE)

##############################
########## READ ##############
##############################

    def _readTerm(self, parent):
        termE = Element(ELEMENTS.TERM)
        if self.nextTok.type == tokenizor.INTEGER:
            self.next()
            termE.append(
                _leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value))
            self.writer.writePush(SEGMENT.CONST, self.tok.value)
        elif self.nextTok.type == tokenizor.STRING:
            self.next()
            termE.append(_leafElement(ELEMENTS.STRING_CONSTANT,
                                      self.tok.value))
            string_value = self.tok.value
            self.writer.writePush(SEGMENT.CONST, len(string_value))
            self.writer.writeCall("String.new", 1)
            for char in string_value:
                self.writer.writePush(SEGMENT.CONST, ord(char))
                self.writer.writeCall("String.appendChar", 2)
        elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS:
            self.next()
            termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
            _KW_CONT_WRITE[self.tok.value](self.writer)
        elif self.nextTok.type == tokenizor.IDENTIFIER:
            identifier = self._readIdentifier(termE)
            nArgs = 0
            if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN):
                self._compileExpression(termE)
                self.writer.writePush(*self._identifier_data(identifier))
                self.writer.writeArithmetic("add")
                self.writer.writePop(SEGMENT.POINTER, 1)
                self.writer.writePush(SEGMENT.THAT, 0)
                self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE)
            elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
                nArgs = self._compileExpressionList(termE)
                self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
                self.writer.writeCall(identifier, nArgs)
            elif self._readSymbolOptional(termE, _SYMBOLS.DOT):
                type_ = self._symbol_table.typeOf(identifier)
                if type_:
                    segment, index = self._identifier_data(identifier)
                    self.writer.writePush(segment, index)
                    nArgs += 1
                    identifier = "%s.%s" % (type_, self._readIdentifier(termE))
                else:
                    identifier = "%s.%s" % (identifier,
                                            self._readIdentifier(termE))
                nArgs += self._compileExpressionList(termE)
                self.writer.writeCall(identifier, nArgs)
            else:
                self.writer.writePush(*self._identifier_data(identifier))
        elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN:
            self.next()
            termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
            self._compileExpression(termE)
            self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE)
        elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS:
            self.next()
            sym = self.tok.value
            termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
            self._readTerm(termE)
            self.writer.writeArithmeticUnary(sym)
        else:
            raise self._syntaxError("Unexpected %s." % self.tok.value)
        parent.append(termE)

    def _identifier_data(self, identifier):
        return _SEG_TRANSLATE[self._symbol_table.kindOf(
            identifier)], self._symbol_table.indexOf(identifier)

    def _readIdentifier(self, parent):
        self.next()
        self._assertToken(self.tok,
                          ELEMENTS.IDENTIFIER,
                          type_=tokenizor.IDENTIFIER)
        name = self.tok.value
        element = _leafElement(ELEMENTS.IDENTIFIER, name)
        type_ = self._symbol_table.typeOf(name)
        kind = None
        index = None
        if type_ is None:
            if parent.tag in (ELEMENTS.CLASSVARDEC,
                              ELEMENTS.VAR_DEC) and len(parent) > 1:
                type_ = parent[1].text
                kind = _SYM_KIND_MAP[parent[0].text]
            elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0:
                type_ = parent[-1].text
                kind = SYM_KINDS.ARG
            if kind is not None:
                index = self._symbol_table.define(name, type_, kind)
        else:
            type_ = self._symbol_table.typeOf(name)
            kind = self._symbol_table.kindOf(name)
            index = self._symbol_table.indexOf(name)
        if kind is not None:
            element.set("type", type_)
            element.set("kind", str(kind))
            element.set("index", str(index))
        parent.append(element)
        return name

    def _readType(self, parent):
        if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES:
            self.next()
            parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
        else:
            self._readIdentifier(parent)

    def _readReturnType(self, parent):
        if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES:
            self.next()
            parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
        else:
            self._readIdentifier(parent)

    def _readSymbol(self, parent, expected=None):
        self.next()
        expectedStr = expected if expected is not None else ELEMENTS.SYMBOL
        self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL)
        if expected is not None:
            self._assertToken(self.tok, expected, value_=expected)
        parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
        return self.tok.value

    def _readKeyword(self, parent, expected=None):
        self.next()
        expectedStr = expected if expected is not None else ELEMENTS.KEYWORD
        self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD)
        if expected is not None:
            self._assertToken(self.tok, expected, value_=expected)
        parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
        return self.tok.value

    def _readSymbolOptional(self, parent, expected):
        if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected:
            self.next()
            parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value))
            return True
        return False

    def _readKeywordOptional(self, parent, expected):
        if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected:
            self.next()
            parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value))
            return True
        return False

    def next(self):
        self.tok = self.iter.next()
        self.nextTok = self.iter.lookahead()

    def _assertToken(self, tok, expected_str, type_=None, value_=None):
        if (type_ != None and tok.type != type_) or (value_ != None
                                                     and tok.value != value_):
            raise self._syntaxError(
                "Expected %s but found %s" % (expected_str, tok.value), tok)

    def _syntaxError(self, msg, tok=None):
        if tok is None:
            tok = self.tok
        return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
コード例 #17
0
class CompilationEngine:
    def __init__(self, token_stream, out_file, xml_name):
        '''
        creates a new compilation engine with the given input and output.
        The next method called must be compileClass().
        '''
        self.stream = token_stream
        self.writer = VMWriter(out_file)
        self.symbols = SymbolTable()
        self.xml_name = xml_name
        self.root = ET.Element('class')

        self.stream.advance()
        assert self.stream.keyword() == 'class'

    def add_terminal(self, root, text):
        terminal = ET.SubElement(root, self.stream.token_type())
        terminal.text = ' {text} '.format(text=text)
        if self.stream.has_more_tokens():
            self.stream.advance()

    def compile_class(self):
        '''
        compiles a complete class
        '''
        self.add_terminal(self.root, self.stream.keyword())
        self.class_name = self.stream.identifier()
        self.add_terminal(self.root, self.class_name)
        self.add_terminal(self.root, self.stream.symbol())

        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in CLASS_VARS:
            self.compile_class_var_dec()

        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in SUBROUTINE_TYPES:
            self.compile_subroutine()

        self.add_terminal(self.root, self.stream.symbol())

    def compile_class_var_dec(self):
        '''
        compiles a static declaration or a field declaration.
        '''
        class_var_root = ET.SubElement(self.root, CLASS_VAR_DEC)
        kind = self.stream.keyword()
        self.add_terminal(class_var_root, kind)
        if self.stream.token_type() == tokenizer.KEYWORD:
            type_name = self.stream.keyword()
        else:
            type_name = self.stream.identifier()
        self.add_terminal(class_var_root, type_name)
        name = self.stream.identifier()
        self.add_terminal(class_var_root, name)
        self.symbols.define(name, type_name, kind)

        while self.stream.symbol() == COMMA:
            self.add_terminal(class_var_root, self.stream.symbol())
            name = self.stream.identifier()
            self.add_terminal(class_var_root, name)
            self.symbols.define(name, type_name, kind)

        self.add_terminal(class_var_root, self.stream.symbol())

    def compile_subroutine(self):
        '''
        compiles a complete method, function, or constructor.
        '''
        subroutine_dec = ET.SubElement(self.root, SUBROUTINE_DEC)
        self.symbols.start_subroutine()
        subroutine_type = self.stream.keyword()
        if subroutine_type in ['method', 'constructor']:
            self.symbols.define('this', self.class_name, 'argument')
        self.add_terminal(subroutine_dec, subroutine_type)
        if self.stream.token_type() == tokenizer.KEYWORD:
            self.add_terminal(subroutine_dec, self.stream.keyword())
        else:
            self.add_terminal(subroutine_dec, self.stream.identifier())
        name = self.stream.identifier()
        self.add_terminal(subroutine_dec, name)

        self.add_terminal(subroutine_dec, self.stream.symbol())
        self.compile_parameter_list(subroutine_dec)
        self.add_terminal(subroutine_dec, self.stream.symbol())

        subroutine_body = ET.SubElement(subroutine_dec, SUBROUTINE_BODY)
        self.add_terminal(subroutine_body, self.stream.symbol())
        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == VAR:
            self.compile_var_dec(subroutine_body)
        func_name = '{cls}.{sub}'.format(
            cls=self.class_name,
            sub=name)
        self.writer.write_function(func_name, self.symbols.var_count('var'))
        self.compile_statements(subroutine_body)
        self.add_terminal(subroutine_body, self.stream.symbol())

    def compile_parameter_list(self, root):
        '''
        compiles a (possibly empty) parameter list, not including the enclosing “()”.
        '''
        parameter_list_root = ET.SubElement(root, PARAMETER_LIST)
        if self.stream.token_type() != tokenizer.SYMBOL:
            type_name = self.stream.keyword()
            self.add_terminal(parameter_list_root, type_name)
            name = self.stream.identifier()
            self.add_terminal(parameter_list_root, name)
            self.symbols.define(name, type_name, 'argument')

        while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == COMMA:
            self.add_terminal(parameter_list_root, self.stream.symbol())
            type_name = self.stream.keyword()
            self.add_terminal(parameter_list_root, type_name)
            name = self.stream.identifier()
            self.add_terminal(parameter_list_root, name)
            self.symbols.define(name, type_name, 'argument')

    def compile_var_dec(self, root):
        '''
        compiles a var declaration
        '''
        var_dec_root = ET.SubElement(root, VAR_DEC)
        self.add_terminal(var_dec_root, self.stream.keyword())
        type_name = None
        if self.stream.token_type() == tokenizer.IDENTIFIER:
            type_name = self.stream.identifier()
        else:
            type_name = self.stream.keyword()
        self.add_terminal(var_dec_root, type_name)
        name = self.stream.identifier()
        self.add_terminal(var_dec_root, name)
        self.symbols.define(name, type_name, 'var')

        while self.stream.symbol() == COMMA:
            self.add_terminal(var_dec_root, self.stream.symbol())
            name = self.stream.identifier()
            self.add_terminal(var_dec_root, name)
            self.symbols.define(name, type_name, 'var')

        self.add_terminal(var_dec_root, self.stream.symbol())

    def compile_statements(self, root):
        '''
        compiles a sequence of statements, not including the enclosing “{}”.
        '''
        statements_root = ET.SubElement(root, STATEMENTS)
        while self.stream.token_type() == tokenizer.KEYWORD:
            keyword = self.stream.keyword()
            if keyword == 'let':
                self.compile_let(statements_root)
            elif keyword == 'if':
                self.compile_if(statements_root)
            elif keyword == 'while':
                self.compile_while(statements_root)
            elif keyword == 'do':
                self.compile_do(statements_root)
            elif keyword == 'return':
                self.compile_return(statements_root)
            else:
                assert False, 'unsupported keyword {keyword}'.format(keyword=keyword)

    def compile_do(self, root):
        '''
        compiles a do statement
        '''
        do_root = ET.SubElement(root, DO)
        self.add_terminal(do_root, self.stream.keyword())
        self.compile_subroutine_call(do_root)
        self.writer.write_pop('temp', 0)
        self.add_terminal(do_root, self.stream.symbol())

    def compile_let(self, root):
        '''
        compiles a let statement
        '''
        let_root = ET.SubElement(root, LET)
        self.add_terminal(let_root, self.stream.keyword())
        lhs = self.stream.identifier()
        self.add_terminal(let_root, lhs)
        if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == OPEN_BRACKET:
            self.add_terminal(let_root, self.stream.symbol())
            self.compile_expression(let_root)
            self.add_terminal(let_root, self.stream.symbol())
        self.add_terminal(let_root, self.stream.symbol())
        self.compile_expression(let_root)
        self.add_terminal(let_root, self.stream.symbol())
        self.writer.write_pop(self.symbols.kind_of(lhs), self.symbols.index_of(lhs))

    def compile_while(self, root):
        '''
        compiles a while statement
        '''
        while_root = ET.SubElement(root, WHILE)
        while_expression = self.symbols.generate_label('WHILE_EXP')
        while_end = self.symbols.generate_label('WHILE_END')
        self.add_terminal(while_root, self.stream.keyword())
        self.add_terminal(while_root, self.stream.symbol())
        self.writer.write_label(while_expression)
        self.compile_expression(while_root)
        self.writer.write_arithmetic('not')
        self.writer.write_if(while_end)
        self.add_terminal(while_root, self.stream.symbol())
        self.add_terminal(while_root, self.stream.symbol())
        self.compile_statements(while_root)
        self.writer.write_goto(while_expression)
        self.writer.write_label(while_end)
        self.add_terminal(while_root, self.stream.symbol())

    def compile_return(self, root):
        '''
        compiles a return statement
        '''
        return_root = ET.SubElement(root, RETURN)
        self.add_terminal(return_root, self.stream.keyword())
        if self.stream.token_type() != tokenizer.SYMBOL:
            self.compile_expression(return_root)
        else:
            self.writer.write_push('constant', 0)
        self.writer.write_return()
        self.add_terminal(return_root, self.stream.symbol())

    def compile_if(self, root):
        '''
        compiles an if statement
        '''
        if_root = ET.SubElement(root, IF)
        if_label = self.symbols.generate_label('IF_TRUE')
        else_label = self.symbols.generate_label('IF_FALSE')
        end_label = self.symbols.generate_label('IF_END')
        self.add_terminal(if_root, self.stream.keyword())
        self.add_terminal(if_root, self.stream.symbol())
        self.compile_expression(if_root)
        self.writer.write_if(if_label)
        self.writer.write_goto(else_label)
        self.writer.write_label(if_label)
        self.add_terminal(if_root, self.stream.symbol())
        self.add_terminal(if_root, self.stream.symbol())
        self.compile_statements(if_root)
        self.writer.write_goto(end_label)
        self.add_terminal(if_root, self.stream.symbol())
        self.writer.write_label(else_label)
        if self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == 'else':
            self.add_terminal(if_root, self.stream.keyword())
            self.add_terminal(if_root, self.stream.symbol())
            self.compile_statements(if_root)
            self.add_terminal(if_root, self.stream.symbol())
        self.writer.write_label(end_label)

    def compile_expression(self, root):
        '''
        compiles an expression
        '''
        expression_root = ET.SubElement(root, EXPRESSION)
        self.compile_term(expression_root)
        while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() in OPS:
            operator = self.stream.symbol()
            self.add_terminal(expression_root, operator)
            self.compile_term(expression_root)
            if operator == '+':
                self.writer.write_arithmetic('add'),
            if operator == '-':
                self.writer.write_arithmetic('sub'),
            if operator == '*':
                self.writer.write_call('Math.multiply', 2),
            if operator == '/':
                self.writer.write_call('Math.divide', 2),
            if operator == '&':
                self.writer.write_arithmetic('and'),
            if operator == '|':
                self.writer.write_arithmetic('or'),
            if operator == '<':
                self.writer.write_arithmetic('lt'),
            if operator == '>':
                self.writer.write_arithmetic('gt'),
            if operator == '=':
                self.writer.write_arithmetic('eq')

    def compile_term(self, root):
        '''
        compiles a term. This method is faced with a slight difficulty when trying to
        decide between some of the alternative rules. Specifically, if the current token
        is an identifier, it must still distinguish between a variable, an array entry, and
        a subroutine call. The distinction can be made by looking ahead one extra token.
        A single look-ahead token, which may be one of “[“, “(“, “.”, suffices to
        distinguish between the three possibilities. Any other token is not
        part of this term and should not be advanced over.
        '''
        term_root = ET.SubElement(root, TERM)
        token_type = self.stream.token_type()
        if token_type == tokenizer.INT:
            val = self.stream.int_val()
            self.add_terminal(term_root, val)
            self.writer.write_push('constant', val)
        elif token_type == tokenizer.STRING:
            val = self.stream.string_val()
            self.add_terminal(term_root, val)
            #TODO I think it's a character by character push, ugh
            self.writer.write_push('constant', val)
        elif token_type == tokenizer.KEYWORD and self.stream.keyword() in KEYWORD_CONSTANTS:
            keyword = self.stream.keyword()
            self.add_terminal(term_root, keyword)
            if keyword == 'true':
                self.writer.write_push('constant', 0)
                self.writer.write_arithmetic('not')
            elif keyword in ['false', 'null']:
                self.writer.write_push('constant', 0)
            else:
                self.writer.write_push('this', 0)
        elif token_type == tokenizer.IDENTIFIER:
            if self.stream.peek() == OPEN_BRACKET:
                name = self.stream.identifier()
                self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name))
                self.add_terminal(term_root, name)
                self.add_terminal(term_root, self.stream.symbol())
                self.compile_expression(term_root)
                self.add_terminal(term_root, self.stream.symbol())
            elif self.stream.peek() == OPEN_PAREN or self.stream.peek() == PERIOD:
                self.compile_subroutine_call(term_root)
            else:
                name = self.stream.identifier()
                self.add_terminal(term_root, self.stream.identifier())
                self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name))
        elif token_type == tokenizer.SYMBOL and self.stream.symbol() == OPEN_PAREN:
            self.add_terminal(term_root, self.stream.symbol())
            self.compile_expression(term_root)
            self.add_terminal(term_root, self.stream.symbol())
        elif token_type == tokenizer.SYMBOL and self.stream.symbol() in UNARY_OPS:
            operator = self.stream.symbol()
            self.add_terminal(term_root, operator)
            self.compile_term(term_root)
            self.writer.write_arithmetic('neg' if operator == '-' else 'not')
        else:
            assert False, 'unsupported token {token}'.format(token=self.stream.current_token)

    def compile_expression_list(self, root):
        '''
        compiles a (possibly empty) comma-separated list of expressions.
        '''
        expression_list_root = ET.SubElement(root, EXPRESSION_LIST)
        if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == CLOSE_PAREN:
            return 0
        self.compile_expression(expression_list_root)
        num_vars = 1
        while self.stream.symbol() == COMMA:
            self.add_terminal(expression_list_root, self.stream.symbol())
            self.compile_expression(expression_list_root)
            num_vars += 1
        return num_vars

    def compile_subroutine_call(self, root):
        class_name = self.class_name
        subroutine_name = self.stream.identifier()
        self.add_terminal(root, class_name)
        if self.stream.symbol() == PERIOD:
            self.add_terminal(root, self.stream.symbol())
            class_name = subroutine_name
            subroutine_name = self.stream.identifier()
            self.add_terminal(root, self.stream.identifier())
        self.add_terminal(root, self.stream.symbol())
        num_vars = self.compile_expression_list(root)
        self.add_terminal(root, self.stream.symbol())
        self.writer.write_call('{cls}.{sub}'.format(
            cls=class_name,
            sub=subroutine_name),
            num_vars)

    def write(self):
        if self.xml_name:
            lines = self._write(self.root).split('\n')
            lines = lines[1:]
            file = open(self.xml_name, 'w')
            file.write('\n'.join(lines))
            file.close()
        self.writer.close()

    def _write(self, root):
        return minidom.parseString(ET.tostring(root)).toprettyxml()
コード例 #18
0
 def build_vm_writer(self, jack_file):
     self.vm_writer = VMWriter(jack_file)
コード例 #19
0
class CompilationEngine:
    '''The brain of the Jack syntax analyzer'''

    # Constructor
    def __init__(self, tokenizer: JackTokenizer, out_path: Path):
        self.tokenizer = tokenizer

        # Create symbol tables
        self.class_level_st = SymbolTable()
        self.subroutine_level_st = SymbolTable()

        # class's name
        self.class_name = None
        self.func_name = None
        self.sub_type = None

        # Open the output file for writing
        self.out_stream = out_path.open('w')

        # Create a new VM writer for writing
        self.vm_writer = VMWriter(out_path.with_suffix(".vm"))

        # For generating labels
        self.label_count = {"if": 0, "while": 0}

    def get_if_labels(self):
        self.label_count["if"] += 1
        return (f"LABEL_IF_{self.label_count['if'] - 1}_1",
                f"LABEL_IF_{self.label_count['if'] - 1}_2")

    def get_while_labels(self):
        self.label_count["while"] += 1
        return (f"LABEL_WHILE_{self.label_count['while'] - 1}_1",
                f"LABEL_WHILE_{self.label_count['while'] - 1}_2")

    def start_compilation(self):
        # Read the first token into memory
        self.tokenizer.has_more_tokens()

        # Start analyzing syntax
        if self.tokenizer.get_token_type() == TokenType.KEYWORD:
            if self.tokenizer.get_keyword_type() == KeywordType.CLASS:
                self.compile_class()
        else:
            raise AttributeError("Not starting with a class")

    # Helper method to write terminal XML tags
    def write_terminal_tag(self, t, v):
        if t == TokenType.KEYWORD:
            self.out_stream.write(f"<keyword> {v} </keyword>\n")
        elif t == TokenType.IDENTIFIER:
            self.out_stream.write(f"<identifier> {v} </identifier>\n")
        elif t == TokenType.SYMBOL:
            self.out_stream.write(f"<symbol> {v} </symbol>\n")
        elif t == TokenType.INT_CONST:
            self.out_stream.write(
                f"<integerConstant> {v} </integerConstant>\n")
        elif t == TokenType.STRING_CONST:
            self.out_stream.write(f"<stringConstant> {v} </stringConstant>\n")

    # 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        # Write opening tag
        self.out_stream.write("<class>\n")
        self.write_terminal_tag(self.tokenizer.get_token_type(), 'class')

        # Read the next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            self.class_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(self.tokenizer.get_token_type(),
                                    self.class_name)
            self.out_stream.write("\n===DECLARED===\nclass name\n=======")
        else:
            raise AttributeError("Not a valid class name!")

        # Read the next token
        self.tokenizer.has_more_tokens()

        self.eat('{')
        self.write_terminal_tag(self.tokenizer.get_token_type(),
                                self.tokenizer.get_symbol())

        # Handle class variable declaration (classVarDec*)
        # Proceed to next token
        self.tokenizer.has_more_tokens()

        # While there are field/static declarations
        while \
        (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\
        (
            self.tokenizer.get_keyword_type() in (KeywordType.FIELD, KeywordType.STATIC)
        ):
            self.compile_class_var_dec()

        while \
        (self.tokenizer.get_token_type() == TokenType.KEYWORD) and\
        (
            self.tokenizer.get_keyword_type() in (KeywordType.CONSTRUCTOR, KeywordType.FUNCTION, KeywordType.METHOD)
        ):
            self.compile_subroutine_dec()

        # Class ending curly brackets
        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # At the end of function call
        self.out_stream.write("</class>\n")

    # ('static'|'field') type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        # Write opening tag
        self.out_stream.write("<classVarDec>\n")

        # Write static/field
        self.write_terminal_tag(TokenType.KEYWORD,
                                self.tokenizer.get_cur_ident())

        # To store variable properties
        var_kind = None
        var_type = None
        var_index = None
        var_name = None

        if self.tokenizer.get_cur_ident() == "static":
            var_kind = SymbolKind.STATIC
        elif self.tokenizer.get_cur_ident() == "field":
            var_kind = SymbolKind.FEILD
        else:
            raise Exception("Other than static or feild:" +
                            self.tokenizer.get_cur_ident())

        # Read the next token
        self.tokenizer.has_more_tokens()

        if self.is_valid_type():
            self.write_terminal_tag(self.tokenizer.get_token_type(),
                                    self.tokenizer.get_cur_ident())

            var_type = self.tokenizer.get_cur_ident()
        else:
            raise AssertionError("Invalid class variable type!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()

            # Write varible tag to XML file
            self.write_terminal_tag(self.tokenizer.get_token_type(), var_name)

            # Define new class level variable
            self.class_level_st.define(var_name, var_type, var_kind)
            var_index = self.class_level_st.get_index_of(var_name)

            # Write variable properties
            self.out_stream.write(
                f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
            )
        else:
            raise AssertionError("Invalid class variable name!")

        # Move to the next token
        self.tokenizer.has_more_tokens()

        # If has more than one varibles: E.g. field int x, y, z;
        while self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ",":
            self.write_terminal_tag(TokenType.SYMBOL, ",")

            # Move to next token
            self.tokenizer.has_more_tokens()

            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                var_name = self.tokenizer.get_cur_ident()

                # Write varible tag to XML file
                self.write_terminal_tag(self.tokenizer.get_token_type(),
                                        var_name)

                # Define new class level variable
                self.class_level_st.define(var_name, var_type, var_kind)
                var_index = self.class_level_st.get_index_of(var_name)

                # Write variable properties
                self.out_stream.write(
                    f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
                )
            else:
                raise AssertionError(
                    "Invalid Syntax for class varible declaration!")

            # Move to next token
            self.tokenizer.has_more_tokens()

        # Must end with ";"
        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write closing tag
        self.out_stream.write("</classVarDec>\n")

    # ('constructor' | 'function' | 'method') ('void' | 'type') subroutineName
    def compile_subroutine_dec(self):
        # Opening tag
        self.out_stream.write("<subroutineDec>\n")

        # To store function parameters
        func_params = {}

        # Write subroutine type
        self.sub_type = self.tokenizer.get_cur_ident()
        self.write_terminal_tag(TokenType.KEYWORD, self.sub_type)

        # Reset subroutine level symbol table
        self.subroutine_level_st.reset_table()

        # Insert `this`, if method
        if self.sub_type == "method":
            self.subroutine_level_st.define("this", self.class_name,
                                            SymbolKind.ARG)

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.is_valid_type() or \
            (self.tokenizer.get_token_type() == TokenType.KEYWORD \
            and self.tokenizer.get_keyword_type() == KeywordType.VOID):
            self.write_terminal_tag(self.tokenizer.get_token_type(),
                                    self.tokenizer.get_cur_ident())
        else:
            raise AssertionError("Not a valid subroutine return type!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            func_params["name"] = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, func_params["name"])

        else:
            raise AssertionError("Invalid Syntax for function name!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat('(')
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # If there are some parameters
        self.out_stream.write("<parameterList>\n")
        if not (self.tokenizer.get_token_type() == TokenType.SYMBOL):
            self.compile_parameter_list()
        self.out_stream.write("</parameterList>\n")

        # Move to next token
        self.eat(')')
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        # Write function VM command
        self.func_name = func_params['name']

        # Move to the next token
        self.tokenizer.has_more_tokens()
        self.compile_subroutine_body()

        # Closing tag
        self.out_stream.write("</subroutineDec>\n")

    # ((type varName) (',' type varName)*)?
    def compile_parameter_list(self):
        # For storing varible params
        var_name = None
        var_type = None
        var_kind = SymbolKind.ARG  # Argument list
        var_index = None

        if self.is_valid_type():
            var_type = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(self.tokenizer.get_token_type(), var_type)
        else:
            raise AssertionError("Invalid syntax in parameter list!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
        else:
            raise AssertionError(
                "Invalid Syntax for function parameter name name!")

        # Define the argument variable
        self.subroutine_level_st.define(var_name, var_type, var_kind)

        # Get the index of the newly created variable
        var_index = self.subroutine_level_st.get_index_of(var_name)

        # Write variable properties
        self.out_stream.write(
            f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
        )
        # Move to next token
        self.tokenizer.has_more_tokens()

        # Handle more than one parameters
        while self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ",":
            self.write_terminal_tag(TokenType.SYMBOL, ",")

            # Read the next token
            self.tokenizer.has_more_tokens()

            # If the current token is a valid type name
            if self.is_valid_type():
                var_type = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(self.tokenizer.get_token_type(),
                                        var_type)
            else:
                raise AssertionError("Invalid variable type in parameter list")

            # Read the next token
            self.tokenizer.has_more_tokens()

            # If current token is a valid identifier
            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                var_name = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
            else:
                raise AssertionError(
                    "Invalid variable name in parameter list!!")

            self.subroutine_level_st.define(var_name, var_type, var_kind)

            var_index = self.subroutine_level_st.get_index_of(var_name)

            # Write variable properties
            self.out_stream.write(
                f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
            )
            # Read the next token
            self.tokenizer.has_more_tokens()

    # '{' varDec* statements '}'
    def compile_subroutine_body(self):
        # Write opening tag
        self.out_stream.write("<subroutineBody>\n")

        # Eat opening curly bracket
        self.eat("{")
        self.write_terminal_tag(TokenType.SYMBOL, "{")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Handle variable declarations
        while self.tokenizer.get_token_type() == TokenType.KEYWORD  \
        and self.tokenizer.get_keyword_type() == KeywordType.VAR:

            # Current token is the 'var' keyword
            self.compile_var_dec()

        # Get number of local variables
        # for the current compiling function
        nVars = self.subroutine_level_st.get_var_count(SymbolKind.VAR)

        # Write function
        self.vm_writer.write_function(f"{self.class_name}.{self.func_name}",
                                      nVars)

        if self.sub_type == "constructor":
            nFeilds = self.class_level_st.get_var_count(SymbolKind.FEILD)

            # write "push constant nFeilds"
            self.vm_writer.write_push(SegmentType.CONST, nFeilds)

            self.vm_writer.write_call("Memory.alloc", 1)

            self.vm_writer.write_pop(SegmentType.POINTER, 0)

        elif self.sub_type == "method":
            # push argument 0
            self.vm_writer.write_push(SegmentType.ARG, 0)

            # pop pointer 0
            self.vm_writer.write_pop(SegmentType.POINTER, 0)

        # Handle statements
        self.compile_statements()

        # Eat closing curly bracker
        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write closing tag
        self.out_stream.write("</subroutineBody>\n")

    # 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        # Write opening tag
        self.out_stream.write("<varDec>\n")

        # Write var keyword tag
        self.write_terminal_tag(TokenType.KEYWORD, "var")

        # For storing variable params
        var_name = None
        var_type = None
        var_kind = SymbolKind.VAR
        var_index = None

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write the type of variables
        if self.is_valid_type():
            var_type = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(self.tokenizer.get_token_type(), var_type)
        else:
            raise AssertionError("Not a valid var type!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
        else:
            raise AssertionError("Invalid Syntax for var name!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.subroutine_level_st.define(var_name, var_type, var_kind)

        var_index = self.subroutine_level_st.get_index_of(var_name)

        # Write variable properties
        self.out_stream.write(
            f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
        )

        while self.tokenizer.get_token_type(
        ) == TokenType.SYMBOL and self.tokenizer.get_symbol() == ",":
            # Write this symbol
            self.write_terminal_tag(TokenType.SYMBOL, ",")

            # Move to the next token
            self.tokenizer.has_more_tokens()

            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                var_name = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(TokenType.IDENTIFIER, var_name)
            else:
                raise AssertionError("Invalid Syntax for var name!")

            self.subroutine_level_st.define(var_name, var_type, var_kind)
            var_index = self.subroutine_level_st.get_index_of(var_name)

            # Write variable properties
            self.out_stream.write(
                f"\n===DECLARED===\nkind: {var_kind}, type: {var_type}, index: {var_index}\n======="
            )

            # Move to the next token
            self.tokenizer.has_more_tokens()

        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to the next token
        self.tokenizer.has_more_tokens()

        # Write closing tag
        self.out_stream.write("</varDec>\n")

    # statement*
    def compile_statements(self):
        # Write open tag
        self.out_stream.write("<statements>\n")
        # Process statements
        while self.tokenizer.get_token_type(
        ) == TokenType.KEYWORD and self.tokenizer.get_keyword_type(
        ) in statement_types:
            # Statment type is based on the starting keyword
            statement_type = self.tokenizer.get_keyword_type()

            # Call compile method based on type
            if statement_type == KeywordType.LET:
                self.compile_let()
            elif statement_type == KeywordType.IF:
                self.compile_if()
            elif statement_type == KeywordType.WHILE:
                self.compile_while_statement()
            elif statement_type == KeywordType.DO:
                self.compile_do()
            elif statement_type == KeywordType.RETURN:
                self.compile_return()

        self.out_stream.write("</statements>\n")

    # 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        self.out_stream.write("<letStatement>\n")

        self.write_terminal_tag(TokenType.KEYWORD, "let")

        # Is Array?
        is_array_access = False

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            var_name = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)

            var_props = self.lookup_st(var_name)
            # Write variable properties
            self.out_stream.write(
                f"\n===USED===\nkind: {var_props['kind']}, type: {var_props['type']}, index: {var_props['index']}\n======="
            )

            # Finding segment type
            var_props["seg_type"] = self.var_t_to_segment_t(var_props["kind"])

        else:
            raise AssertionError("Invalid Syntax for varName!")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Optional bracket syntax
        if self.tokenizer.get_token_type() == TokenType.SYMBOL \
        and self.tokenizer.get_symbol() == "[":
            is_array_access = True

            # push arr
            self.vm_writer.write_push(
                self.var_t_to_segment_t(var_props["kind"]), var_props["index"])

            self.write_terminal_tag(TokenType.SYMBOL, "[")

            # Move to next token
            self.tokenizer.has_more_tokens()

            # Compile the expression
            self.compile_expression()

            self.eat("]")
            self.write_terminal_tag(TokenType.SYMBOL, "]")

            # add
            self.vm_writer.write_arithmetic(ArithmeticCType.ADD)

            # Move to the next token
            self.tokenizer.has_more_tokens()

        # Eat assignment operator
        self.eat("=")
        self.write_terminal_tag(TokenType.SYMBOL, "=")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.compile_expression()

        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if not is_array_access:
            self.vm_writer.write_pop(var_props["seg_type"], var_props["index"])
        else:
            # pop temp 0
            self.vm_writer.write_pop(SegmentType.TEMP, 0)

            # pop pointer 1
            self.vm_writer.write_pop(SegmentType.POINTER, 1)

            # push temp 0
            self.vm_writer.write_push(SegmentType.TEMP, 0)

            # pop that 0
            self.vm_writer.write_pop(SegmentType.THAT, 0)

        self.out_stream.write("</letStatement>\n")

    # 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
    def compile_if(self):
        self.out_stream.write("<ifStatement>\n")
        self.vm_writer.write_comment("if statement")

        self.write_terminal_tag(TokenType.KEYWORD, "if")

        # get the next labels
        L1, L2 = self.get_if_labels()

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat("(")
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # write code for the expression
        self.compile_expression()

        self.eat(")")
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # not, the condition inside if
        self.vm_writer.write_arithmetic(ArithmeticCType.NOT)

        self.vm_writer.write_if(L1)

        self.eat("{")
        self.write_terminal_tag(TokenType.SYMBOL, "{")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Compile if-block body
        self.compile_statements()

        self.vm_writer.write_goto(L2)

        self.vm_writer.write_label(L1)

        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # If there is an else statement
        # Handle else block
        if self.tokenizer.get_token_type() == TokenType.KEYWORD \
        and self.tokenizer.get_keyword_type() == KeywordType.ELSE:
            self.write_terminal_tag(TokenType.KEYWORD, "else")

            # Move to next token
            self.tokenizer.has_more_tokens()

            self.eat("{")
            self.write_terminal_tag(TokenType.SYMBOL, "{")

            # Move to next token
            self.tokenizer.has_more_tokens()

            self.compile_statements()

            self.eat("}")
            self.write_terminal_tag(TokenType.SYMBOL, "}")

            # Move to next token
            self.tokenizer.has_more_tokens()

        self.vm_writer.write_label(L2)

        # Write closing tag
        self.out_stream.write("</ifStatement>\n")

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while_statement(self):
        self.out_stream.write("<whileStatement>\n")

        self.write_terminal_tag(TokenType.KEYWORD, "while")
        L1, L2 = self.get_while_labels()

        self.vm_writer.write_label(L1)

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat("(")
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.compile_expression()

        self.eat(")")
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        self.vm_writer.write_arithmetic(ArithmeticCType.NOT)
        self.vm_writer.write_if(L2)
        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat("{")
        self.write_terminal_tag(TokenType.SYMBOL, "{")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Compile block body
        self.compile_statements()

        self.eat("}")
        self.write_terminal_tag(TokenType.SYMBOL, "}")

        # Move to next token
        self.tokenizer.has_more_tokens()
        self.vm_writer.write_goto(L1)
        self.vm_writer.write_label(L2)
        # Write closing tag
        self.out_stream.write("</whileStatement>\n")

    # 'do' subroutineCall ';'
    def compile_do(self):
        # To store first and second parts of subroutine call
        first_part, second_part = None, None
        # To store nArgs passed to the subroutine
        nArgs = 0

        # Write opening tag
        self.out_stream.write("<doStatement>\n")

        # Write do keyword tag
        self.write_terminal_tag(TokenType.KEYWORD, "do")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Handle subroutineCall
        if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            first_part = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.IDENTIFIER, first_part)
        else:
            raise AssertionError("Not a valid subroutine/class name!!!")

        var_props = self.lookup_st(first_part)

        if var_props:
            self.vm_writer.write_push(
                self.var_t_to_segment_t(var_props["kind"]), var_props["index"])

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Is is a method call
        if self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ".":
            self.write_terminal_tag(TokenType.SYMBOL, ".")

            # Move to next token
            self.tokenizer.has_more_tokens()

            # Handle subroutineCall
            if self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
                second_part = self.tokenizer.get_cur_ident()
                self.write_terminal_tag(TokenType.IDENTIFIER, second_part)
            else:
                raise AssertionError("Not a valid subroutine/class name!!!")

            # Move to next token
            self.tokenizer.has_more_tokens()

        self.eat("(")
        self.write_terminal_tag(TokenType.SYMBOL, "(")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.out_stream.write("<expressionList>\n")
        if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ")"):
            nArgs = self.compile_expression_list()
        self.out_stream.write("</expressionList>\n")

        self.eat(")")
        self.write_terminal_tag(TokenType.SYMBOL, ")")

        # Move to next token
        self.tokenizer.has_more_tokens()

        self.eat(";")
        self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if var_props:
            if second_part:
                self.vm_writer.write_call(f"{var_props['type']}.{second_part}",
                                          nArgs + 1)
        else:
            # Write method call
            if second_part:
                # Of some other class
                self.vm_writer.write_call(f"{first_part}.{second_part}", nArgs)
            else:
                # Of this class
                self.vm_writer.write_call(f"{self.class_name}.{first_part}",
                                          nArgs)

        # call-and-return contract
        self.vm_writer.write_pop(SegmentType.TEMP, 0)

        # Write closing tag
        self.out_stream.write("</doStatement>\n")

    # 'return' expression? ';'
    def compile_return(self):
        # Write opening tag
        self.out_stream.write("<returnStatement>\n")

        # Write do keyword tag
        self.write_terminal_tag(TokenType.KEYWORD, "return")

        # Move to next token
        self.tokenizer.has_more_tokens()

        if self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() == ";":
            self.write_terminal_tag(TokenType.SYMBOL, ";")
            # the subroutine void return type
            self.vm_writer.write_push(SegmentType.CONST, 0)
        else:
            self.compile_expression()
            self.eat(";")
            self.write_terminal_tag(TokenType.SYMBOL, ";")

        # Move to next token
        self.tokenizer.has_more_tokens()

        # Write return command
        self.vm_writer.write_return()
        # Write closing tag
        self.out_stream.write("</returnStatement>\n")

    # term (op term)*
    def compile_expression(self):
        self.out_stream.write("<expression>\n")

        # Compile term
        self.compile_term()

        # Handle (op term)*
        while self.tokenizer.get_token_type() == TokenType.SYMBOL \
            and self.tokenizer.get_symbol() in allowed_op:
            symbol = self.tokenizer.get_symbol()
            # Write tag for operation symbol
            self.write_terminal_tag(TokenType.SYMBOL,
                                    self.tokenizer.get_symbol())

            # Move to next token
            self.tokenizer.has_more_tokens()

            # Compile term
            self.compile_term()

            # Apply operation
            self.vm_writer.write_arithmetic(allowed_op[symbol])

        # Write closing tag
        self.out_stream.write("</expression>\n")

    # integerConstant | stringConstant | keywordConstant | varName |
    # varName '[' expression ']' | subroutineCall | '(' expression ')'
    # | unaryOp term
    def compile_term(self):
        self.out_stream.write("<term>\n")

        if self.tokenizer.get_token_type() == TokenType.INT_CONST:
            self.write_terminal_tag(TokenType.INT_CONST,
                                    self.tokenizer.get_int_val())
            self.vm_writer.write_push(SegmentType.CONST,
                                      self.tokenizer.get_int_val())
            self.tokenizer.has_more_tokens()

        elif self.tokenizer.get_token_type() == TokenType.STRING_CONST:
            self.write_terminal_tag(TokenType.STRING_CONST,
                                    self.tokenizer.get_string_val())
            self.tokenizer.has_more_tokens()

        elif self.tokenizer.get_token_type() == TokenType.KEYWORD \
            and self.tokenizer.get_keyword_type() in keyword_constants:
            # keyword constant
            kc = self.tokenizer.get_cur_ident()
            self.write_terminal_tag(TokenType.KEYWORD, kc)

            if kc == "null" or kc == "false":
                # push const 0
                self.vm_writer.write_push(SegmentType.CONST, 0)

            elif kc == "true":
                # push const -1
                self.vm_writer.write_push(SegmentType.CONST, 1)
                self.vm_writer.write_arithmetic(ArithmeticCType.NEG)

            elif kc == "this":
                # push pointer 0
                self.vm_writer.write_push(SegmentType.POINTER, 0)

            self.tokenizer.has_more_tokens()

        elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            first_part, second_part = None, None
            nArgs = 0
            var_name = self.tokenizer.get_cur_ident()
            first_part = var_name
            var_props = self.lookup_st(var_name)

            self.write_terminal_tag(TokenType.IDENTIFIER, var_name)

            if var_props:
                self.vm_writer.write_push(
                    self.var_t_to_segment_t(var_props["kind"]),
                    var_props["index"])

            # Move to next token
            self.tokenizer.has_more_tokens()

            if self.tokenizer.get_token_type() == TokenType.SYMBOL:
                # Handle varName '[' expression ']'
                if self.tokenizer.get_symbol() == "[":
                    self.eat("[")
                    self.write_terminal_tag(TokenType.SYMBOL, "[")
                    self.tokenizer.has_more_tokens()

                    self.compile_expression()

                    self.eat(']')
                    self.write_terminal_tag(TokenType.SYMBOL, "]")

                    # add
                    self.vm_writer.write_arithmetic(ArithmeticCType.ADD)

                    # pop pointer 1
                    self.vm_writer.write_pop(SegmentType.POINTER, 1)

                    # push that 0
                    self.vm_writer.write_push(SegmentType.THAT, 0)

                    # Move to next token
                    self.tokenizer.has_more_tokens()

                # Handle subroutineCall
                elif self.tokenizer.get_symbol() == "(" \
                    or self.tokenizer.get_symbol() == ".":
                    # Is a method call
                    if self.tokenizer.get_symbol() == ".":
                        self.write_terminal_tag(TokenType.SYMBOL, ".")
                        # Move to next token
                        self.tokenizer.has_more_tokens()

                        # Handle subroutineCall
                        if self.tokenizer.get_token_type(
                        ) == TokenType.IDENTIFIER:
                            second_part = self.tokenizer.get_cur_ident()
                            self.write_terminal_tag(TokenType.IDENTIFIER,
                                                    second_part)
                        else:
                            raise AssertionError(
                                "Not a valid subroutine/class name!!!")

                        # Move to next token
                        self.tokenizer.has_more_tokens()

                    self.eat("(")
                    self.write_terminal_tag(TokenType.SYMBOL, "(")

                    # Move to next token
                    self.tokenizer.has_more_tokens()
                    self.out_stream.write("<expressionList>\n")
                    if not (self.tokenizer.get_token_type() == TokenType.SYMBOL \
                        and self.tokenizer.get_symbol() == ")"):
                        nArgs = self.compile_expression_list()
                    self.out_stream.write("</expressionList>\n")

                    self.eat(")")
                    self.write_terminal_tag(TokenType.SYMBOL, ")")

                    # Move to next token
                    self.tokenizer.has_more_tokens()

            if var_props:
                print("Looked up: ", var_name)
                # Is it a method call?
                if second_part:
                    # Of some other class
                    self.vm_writer.write_call(
                        f"{var_props['type']}.{second_part}", nArgs + 1)
            # This is no variable with given name
            else:
                if second_part:
                    # Of some other class
                    self.vm_writer.write_call(f"{first_part}.{second_part}",
                                              nArgs)
                else:
                    # Of this class
                    self.vm_writer.write_call(
                        f"{self.class_name}.{first_part}", nArgs)

        elif self.tokenizer.get_token_type() == TokenType.SYMBOL:
            # Handle '(' expression ')'
            if self.tokenizer.get_symbol() == '(':
                self.eat("(")
                self.write_terminal_tag(TokenType.SYMBOL, "(")
                self.tokenizer.has_more_tokens()

                self.compile_expression()

                self.eat(")")
                self.write_terminal_tag(TokenType.SYMBOL, ")")
                self.tokenizer.has_more_tokens()
            # Handle unaryOp term
            elif self.tokenizer.get_symbol() in allowed_unary_op:
                unary_op = self.tokenizer.get_symbol()
                self.write_terminal_tag(TokenType.SYMBOL,
                                        self.tokenizer.get_symbol())

                self.tokenizer.has_more_tokens()
                self.compile_term()

                self.vm_writer.write_arithmetic(allowed_unary_op[unary_op])
            else:
                raise AssertionError("( or unary Op expected!!")

        self.out_stream.write("</term>\n")

    # expression (',' expression)*
    def compile_expression_list(self):
        self.compile_expression()
        arg_count = 1

        while (self.tokenizer.get_token_type() == TokenType.SYMBOL) \
            and (self.tokenizer.get_symbol() == ","):
            self.write_terminal_tag(TokenType.SYMBOL, ",")
            self.tokenizer.has_more_tokens()
            self.compile_expression()
            arg_count += 1

        return arg_count

    # eat the given string, else raise error
    def eat(self, string):
        if self.tokenizer.get_token_type() == TokenType.SYMBOL:
            if not (self.tokenizer.get_symbol() == string):
                raise AssertionError(
                    f"Expected symbol {string}, found: {self.tokenizer.get_symbol()}"
                )
        else:
            raise AssertionError("Symbol not found!!")

    # Utility method to check weather
    # the current token is a valid data type
    def is_valid_type(self):
        # If built-in data type
        if self.tokenizer.get_token_type() == TokenType.KEYWORD:
            # if int, char, boolean
            if self.tokenizer.get_keyword_type() in data_types:
                return True

        # If custom data type
        elif self.tokenizer.get_token_type() == TokenType.IDENTIFIER:
            return True

        # Invalid data type
        return False

    # Lookup variable in symbol table
    def lookup_st(self, v_name):
        '''return variable properties'''
        # FOR DEBUGGING
        from pprint import pprint
        pprint(self.subroutine_level_st.hash_map)
        pprint(self.class_level_st.hash_map)

        # To store looked up props
        v_props = {}

        # lookup subroutine level table
        v_kind = self.subroutine_level_st.get_kind_of(v_name)

        # var not found in subroutine level st
        if v_kind == SymbolKind.NONE:
            # lookup class level table
            v_kind = self.class_level_st.get_kind_of(v_name)

            if v_kind == SymbolKind.NONE:
                return False

            v_props["kind"] = v_kind
            v_props["type"] = self.class_level_st.get_type_of(v_name)
            v_props["index"] = self.class_level_st.get_index_of(v_name)

            # return class level variable data
            return v_props

        # Data found for subroutine level table
        v_props["kind"] = v_kind
        v_props["type"] = self.subroutine_level_st.get_type_of(v_name)
        v_props["index"] = self.subroutine_level_st.get_index_of(v_name)

        return v_props

    def var_t_to_segment_t(self, v_kind: SymbolKind) -> SegmentType:
        if v_kind == SymbolKind.STATIC:
            return SegmentType.STATIC
        elif v_kind == SymbolKind.ARG:
            return SegmentType.ARG
        elif v_kind == SymbolKind.VAR:
            return SegmentType.LOCAL
        elif v_kind == SymbolKind.FEILD:
            return SegmentType.THIS
        else:
            raise AssertionError("No segment kind for given v_kind!!")