class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    BINARY_OPERATORS_TO_COMMAND = {
        '+': 'add',
        '-': 'sub',
        '=': 'eq',
        '>': 'gt',
        '<': 'lt',
        '&': 'and',
        '|': 'or'
    }
    UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'}
    TYPE_TO_TAG = {
        'STRING_CONST': 'stringConstant',
        'INT_CONST': 'integerConstant',
        'KEYWORD': 'keyword',
        'IDENTIFIER': 'identifier',
        'SYMBOL': 'symbol'
    }

    SYMBOLS_TO_XML_CONVENTION = {
        '<': '&lt;',
        '>': '&gt;',
        '&': '&amp;',
        '"': '&quot;'
    }

    def __init__(self, input_file_path, vm_writer: VMWriter):
        self.jack_tokenizer = JackTokenizer(input_file_path)
        self.symbol_table = SymbolTable()
        self.vm_writer = vm_writer
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                 'CLASS')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()

        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.jack_tokenizer.advance()
        self.vm_writer.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[
            self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            kind = ''
            if self.jack_tokenizer.key_word() == 'field':
                kind = 'FIELD'
            elif self.jack_tokenizer.key_word() == 'static':
                kind = 'STATIC'
            self.jack_tokenizer.advance()
            field_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     field_type, kind)

            self.jack_tokenizer.advance()

            while self.jack_tokenizer.symbol() != ';':
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         field_type, kind)
                self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        self.vm_writer.zero_branching_indexes()
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.symbol_table.start_subroutine()
            constructor = True if self.jack_tokenizer.key_word(
            ) == 'constructor' else False

            method = False
            if self.jack_tokenizer.key_word() == 'method':
                method = True
                self.symbol_table.define('this',
                                         self.symbol_table.get_class_name(),
                                         'ARG')

            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                     'SUBROUTINE')
            name = self.symbol_table.get_class_name(
            ) + '.' + self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_parameter_list()
            self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()
            var_num = 0
            while self.jack_tokenizer.key_word() == 'var':
                var_num += self.compile_var_dec()
            self.vm_writer.write_function(name, var_num)
            if method:
                self.vm_writer.write_push('ARG', 0)
                self.vm_writer.write_pop('POINTER', 0)
            elif constructor:
                field_count = self.symbol_table.var_count('FIELD')
                self.vm_writer.write_push('CONST', field_count)
                self.vm_writer.write_call('Memory.alloc', 1)
                self.vm_writer.write_pop('POINTER', 0)
            self.compile_statements()
            self.jack_tokenizer.advance()

    def compile_parameter_list(self):
        if self.jack_tokenizer.symbol() != ')':
            parameter_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     parameter_type, 'ARG')
            self.jack_tokenizer.advance()
            while self.jack_tokenizer.symbol() == ",":
                self.jack_tokenizer.advance()
                parameter_type = self.get_type()
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         parameter_type, 'ARG')
                self.jack_tokenizer.advance()

    def get_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            parameter_type = self.jack_tokenizer.key_word()
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            parameter_type = self.jack_tokenizer.identifier()
        return parameter_type

    def compile_var_dec(self):
        var_num = 1
        self.jack_tokenizer.advance()
        var_type = self.get_type()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), var_type,
                                 'VAR')
        self.jack_tokenizer.advance()

        while self.jack_tokenizer.symbol() == ",":
            var_num += 1
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     var_type, 'VAR')
            self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        return var_num

    def compile_statements(self):
        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

    def compile_do(self):
        self.jack_tokenizer.advance()

        name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        self.compile_subroutine_call(name)

        # must dispose of void function return value
        self.vm_writer.write_pop('TEMP', 0)
        self.jack_tokenizer.advance()

    def compile_subroutine_call(self, prefix_call=''):
        if self.jack_tokenizer.symbol() == '(':
            subroutine = False
            # If not in symbol table - then subroutine
            if not self.symbol_table.kind_of(
                    prefix_call) or self.symbol_table.kind_of(
                        prefix_call) == 'SUBROUTINE':
                subroutine = True
            self.jack_tokenizer.advance()

            args_count = 0
            if subroutine:
                self.vm_writer.write_push('POINTER', 0)
                args_count += 1
            args_count += self.compile_expression_list()

            if subroutine:
                self.vm_writer.write_call(
                    self.symbol_table.get_class_name() + '.' + prefix_call,
                    args_count)
            else:
                self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()
        elif self.jack_tokenizer.symbol() == '.':
            variable = False
            self.jack_tokenizer.advance()
            if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']:
                variable = True
                variable_name = prefix_call
                prefix_call = self.symbol_table.type_of(prefix_call)
            prefix_call += '.{0}'.format(self.jack_tokenizer.identifier())
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()

            args_count = 0
            if variable:
                self.vm_writer.write_push(
                    self.symbol_table.kind_of(variable_name),
                    self.symbol_table.index_of(variable_name))
                args_count += 1
            args_count += self.compile_expression_list()

            self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()

    def compile_let(self):
        self.jack_tokenizer.advance()
        var_name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() == '[':
            self.vm_writer.write_push(self.symbol_table.kind_of(var_name),
                                      self.symbol_table.index_of(var_name))
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_arithmetic("add")
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop('TEMP', 0)
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_push('TEMP', 0)
            self.vm_writer.write_pop('THAT', 0)
        else:
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop(self.symbol_table.kind_of(var_name),
                                     self.symbol_table.index_of(var_name))

        self.jack_tokenizer.advance()

    def compile_while(self):
        while_idx = self.vm_writer.get_next_label_index('while')
        if_label = 'WHILE_IF_{0}'.format(while_idx)
        end_label = 'WHILE_END_{0}'.format(while_idx)

        self.vm_writer.write_label(if_label)
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.vm_writer.write_if(end_label)
        self.compile_statements()
        self.vm_writer.write_goto(if_label)
        self.jack_tokenizer.advance()
        self.vm_writer.write_label(end_label)

    def compile_return(self):
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        else:
            self.vm_writer.write_push('CONST', 0)
        self.vm_writer.write_return()
        self.jack_tokenizer.advance()

    def compile_if(self):
        if_idx = self.vm_writer.get_next_label_index('if')
        else_label = 'IF_ELSE_{0}'.format(if_idx)
        end_label = 'IF_END_{0}'.format(if_idx)

        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()

        self.jack_tokenizer.advance()
        self.vm_writer.write_if(else_label)
        self.compile_statements()
        self.jack_tokenizer.advance()
        self.vm_writer.write_goto(end_label)

        self.vm_writer.write_label(else_label)
        if self.jack_tokenizer.key_word() == 'else':
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_statements()
            self.jack_tokenizer.advance()

        self.vm_writer.write_label(end_label)

    def compile_expression(self):
        self.compile_term()

        while self.jack_tokenizer.symbol(
        ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.advance()

            self.compile_term()

            if symbol in self.BINARY_OPERATORS_TO_COMMAND:
                self.vm_writer.write_arithmetic(
                    self.BINARY_OPERATORS_TO_COMMAND[symbol])
            elif symbol == '*':
                self.vm_writer.write_call('Math.multiply', 2)
            elif symbol == '/':
                self.vm_writer.write_call('Math.divide', 2)

    def compile_term(self):
        token_type = self.jack_tokenizer.token_type()

        if token_type == 'IDENTIFIER':
            name = self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol(
            ) == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call(name)
            elif self.jack_tokenizer.symbol() == '[':
                self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                          self.symbol_table.index_of(name))
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop('POINTER', 1)
                self.vm_writer.write_push('THAT', 0)
            else:
                kind = self.symbol_table.kind_of(name)
                idx = self.symbol_table.index_of(name)
                self.vm_writer.write_push(kind, idx)
        elif token_type == 'STRING_CONST':
            string_const = self.jack_tokenizer.string_val()

            self.vm_writer.write_push("CONST", len(string_const))
            self.vm_writer.write_call("String.new", 1)
            for char in string_const:
                self.vm_writer.write_push('CONST', ord(char))
                self.vm_writer.write_call("String.appendChar", 2)
            self.jack_tokenizer.advance()
        elif token_type == 'KEYWORD':
            keyword = self.jack_tokenizer.key_word()
            if keyword == 'true':
                self.vm_writer.write_push('CONST', 1)
                self.vm_writer.write_arithmetic('neg')
            elif keyword == 'false' or keyword == 'null':
                self.vm_writer.write_push('CONST', 0)
            elif keyword == 'this':
                self.vm_writer.write_push('POINTER', 0)
            self.jack_tokenizer.advance()
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
            elif self.jack_tokenizer.symbol(
            ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[
                    self.jack_tokenizer.symbol()]
                self.jack_tokenizer.advance()
                self.compile_term()
                self.vm_writer.write_arithmetic(command)
        elif token_type == 'INT_CONST':
            self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val())
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        expression_count = 0
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            expression_count += 1
            while self.jack_tokenizer.symbol() == ',':
                self.jack_tokenizer.advance()
                self.compile_expression()
                expression_count += 1
        return expression_count
Exemple #2
0
class CompilationEnginge(object):
    """
    lalala
    """

    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.out = open(output_file, 'w')
        self.token = None
        self.class_name = None

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)

        #######################


    def analyze(self):
        self.token = self.tokenizer.advance()
        self.compile_class()
        self.close()
        print('CLASS TABLE:')
        print(self.symbol_table.class_table)


    def close(self):
        if self.out:
            self.out.close()
            self.out = None


    def advance(self):
        self.token = self.tokenizer.advance()


    def write_to_out(self):
        pass


    def format_line(self, defined_or_used=''):
        token_type = self.tokenizer.token_type()
        running_index = ''
        if token_type == self.tokenizer.keyword_token:
            meat = self.tokenizer.keyword()
            defined_or_used=''
        elif token_type == self.tokenizer.symbol_token:
            meat = self.tokenizer.symbol()
            defined_or_used=''
        elif token_type == self.tokenizer.identifier_token:
            meat = self.tokenizer.identifier()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            # Extending compilaiton engine to output <var/argument/static/field...> instead of <indentifier>
            name = self.tokenizer.token
            if self.symbol_table.kind_of(name):
                token_type = self.symbol_table.kind_of(name)
                running_index = str(self.symbol_table.index_of(name))
            elif name[0].islower():
                token_type = 'subroutine'
            else:
                token_type = 'class'

            #######################  

        elif token_type == self.tokenizer.int_const:
            meat = self.tokenizer.int_val()
            defined_or_used=''
        elif token_type == self.tokenizer.string_const:
            meat = self.tokenizer.string_val()
            defined_or_used=''
        else:
            raise ValueError('Something went wrong with token: {}'.format(self.token))
        
        if defined_or_used != '':
            defined_or_used += ' '
        if running_index != '':
            running_index = ' ' + running_index
        formated_line = '<{2}{0}{3}> {1} </{2}{0}{3}>\n'.format(token_type, meat, defined_or_used, running_index)
        return formated_line


    #########################
    ### PROGARM STRUCTURE ###
    #########################

    def compile_class(self):
        """
        ####################################################################
        ### class: 'class' className '{' classVarDec* subroutineDec* '}' ###
        ####################################################################
        """

        self.out.write('<class>\n')

        # 'class'
        keyword_line = self.format_line()
        self.out.write(keyword_line)
        
        # className
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.class_name = self.tokenizer.token

        ####################### 

        identifier_line = self.format_line('defined')
        self.out.write(identifier_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        ### classVarDec* subroutineDec* ###
        self.advance()
        # classVarDec*
        while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_field, self.tokenizer.key_static]:
            self.compile_class_var_dec()
        
        # subroutineDec*
        while  self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_function, self.tokenizer.key_method, self.tokenizer.key_constructor]:
            self.compile_subroutine()

        # '}'
        if  self.tokenizer.token_type() == self.tokenizer.symbol_token:
            # Class compilation is done
            symbol_line = self.format_line()
            self.out.write(symbol_line)
        else:
            raise ValueError('Something went wrong')

        # Closing with </class>
        self.out.write('</class>\n')
        is_sucessfull = not(self.advance())
        if is_sucessfull:
            print('Compilation enginge succesfully finished')
        else:
            print('Something went wrong!')


    def compile_class_var_dec(self):
        """
        #######################################################################
        ### classVarDec: ('static'|'field') type varName (',' varName)* ';' ###
        #######################################################################
        """

        self.out.write('<classVarDec>\n')

        #######################
        ### PROJECT 11 CODE ###
        #######################

        # Extract field or static
        # field_or_static = re.match('<[a-z]*>', field_or_static_line)[0][1:-1]
        field_or_static = self.tokenizer.token

        #######################

        #  ('static' | 'field')
        field_or_static_line = self.format_line()
        self.out.write(field_or_static_line)

        # type
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        # Extract token type
        type_ = self.tokenizer.token

        #######################

        type_line = self.format_line()
        self.out.write(type_line)

        # varName
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static)

        #######################

        varname_line = self.format_line('defined')
        self.out.write(varname_line)

        # (',' varName)*
        self.advance()
        symbol = self.tokenizer.symbol()
        while symbol == ',':
            colon_line = self.format_line()
            self.out.write(colon_line)
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static)

            #######################

            varname_line = self.format_line('defined')
            self.out.write(varname_line)
            self.advance()
            symbol = self.tokenizer.symbol()
        # symbol == ';'
        semicolon_line = self.format_line()
        self.out.write(semicolon_line)
        self.advance()

        self.out.write('</classVarDec>\n')


    def compile_subroutine(self):
        """
        ###########################################################################
        ### subroutineDec: ('constructor'|'function'|'method')                  ###             
        ###                ('void' | type) subroutineName '(' parameterList ')' ###
        ###                subroutineBody                                       ###
        ###########################################################################
        """

        #######################
        ### PROJECT 11 CODE ###
        #######################

        print()
        print('SUBROUTINE TABLE:')
        print(self.symbol_table.subroutine_table)
        print()
        self.symbol_table.start_subroutine()
        self.symbol_table.define(name='this', type_=self.class_name, kind='argument')

        #######################

        self.out.write('<subroutineDec>\n')

        # ('constructor'|'function'|'method')
        constructor_function_method_line = self.format_line()
        self.out.write(constructor_function_method_line)

        # ('void' | type)
        self.advance()
        void_or_type_line = self.format_line()
        self.out.write(void_or_type_line)

        # subroutineName 
        self.advance()
        subroutine_name_line = self.format_line('defined')
        self.out.write(subroutine_name_line)

        # '(' 
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # parameterList
        self.advance()
        self.compile_parameter_list()

        # ')' 
        symbol_line = self.format_line()
        self.out.write(symbol_line)
        
        ##################################################
        ### subroutineBody: '{' varDec* statements '}' ###
        ##################################################
        
        self.out.write('<subroutineBody>\n')

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        ###############
        ### varDec* ###
        ###############

        self.advance()
        while self.tokenizer.token == self.tokenizer.key_var:
            self.compile_var_dec()

        ##################
        ### statements ###
        ##################

        self.compile_statements()
        
        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</subroutineBody>\n')
        self.out.write('</subroutineDec>\n')

        #######################
        ### PROJECT 11 CODE ###
        #######################

        print()
        print('SUBROUTINE TABLE:')
        print(self.symbol_table.subroutine_table)
        print()

        #######################


    def compile_parameter_list(self):
        """
        ############################################################
        ### parameterList: ((type varName) (',' type varName)*)? ###
        ############################################################
        """

        self.out.write('<parameterList>\n')

        # If token type is symbol then we have empty parameter list
        # If we have symbol token then it means our parameter list is fully processed
        if self.tokenizer.token_type() != self.tokenizer.symbol_token:
            
            # type

            #######################
            ### PROJECT 11 CODE ###
            #######################

            type_ = self.tokenizer.token

            #######################

            type_line = self.format_line()
            self.out.write(type_line)
            
            # varName
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument')

            #######################

            var_name_line = self.format_line('defined')
            self.out.write(var_name_line)

            # If next token is ',' we have more then one parameter
            self.advance()
            while self.tokenizer.token_type() == self.tokenizer.symbol_token and self.tokenizer.symbol() == ',':
                # ','
                comma_line = self.format_line()
                self.out.write(comma_line)

                # type
                self.advance()

                #######################
                ### PROJECT 11 CODE ###
                #######################

                type_ = self.tokenizer.token

                #######################

                type_line = self.format_line()
                self.out.write(type_line)

                # varName
                self.advance()

                #######################
                ### PROJECT 11 CODE ###
                #######################

                self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument')

                # We are in new subroutine so add next nested scope
                # self.symbol_table.start_subroutine()

                #######################

                var_name_line = self.format_line('defined')
                self.out.write(var_name_line)

                self.advance()

        self.out.write('</parameterList>\n')



    def compile_var_dec(self):
        """
        #####################################################
        ### varDec: 'var' type varName (',' varName)* ';' ###
        #####################################################
        """

        self.out.write('<varDec>\n')

        # var
        var_line = self.format_line()
        self.out.write(var_line)

        # type
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        type_ = self.tokenizer.token

        #######################

        type_line = self.format_line()
        self.out.write(type_line)

        # varName
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local')

        #######################

        var_name_line = self.format_line('defined')
        self.out.write(var_name_line)

        # (',' varName)*
        self.advance()
        while self.tokenizer.symbol() == ',':
            # ','
            comma_line = self.format_line()
            self.out.write(comma_line)

            # varName
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local')

            #######################

            var_name_line = self.format_line('defined')
            self.out.write(var_name_line)

            self.advance()

        # ';'
        semicolon_line = self.format_line()
        self.out.write(semicolon_line)

        self.advance()

        self.out.write('</varDec>\n')


    ##################
    ### STATEMENTS ###
    ##################

    def compile_statements(self):
        """
        ##############################
        ### statements: statement* ###
        ##############################
        """
        
        self.out.write('<statements>\n')

        while self.tokenizer.token_type() != self.tokenizer.symbol_token:
            
            keyword = self.tokenizer.keyword()
            # letStatement
            if keyword == self.tokenizer.key_let:
                self.compile_let()

            # ifStatement
            elif keyword == self.tokenizer.key_if:
                self.compile_if()

            # whileStatement
            elif keyword == self.tokenizer.key_while:
                self.compile_while()

            # doStatement
            elif keyword == self.tokenizer.key_do:
                self.compile_do()

            # returnStatement
            elif keyword == self.tokenizer.key_return:
                self.compile_return()

            else:
                raise ValueError('Wrong statement: {}'.format(keyword))

        self.out.write('</statements>\n')


    def compile_do(self):
        """
        ############################################
        ### doStatement: 'do' subroutineCall ';' ###
        ############################################
        """

        self.out.write('<doStatement>\n')

        # 'do'
        do_line = self.format_line()
        self.out.write(do_line)

        # subroutineCall
        self.advance()
        self.compile_subroutine_call()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</doStatement>\n')


    def compile_let(self):
        """
        ############################################################################
        ### letStatement: 'let' varName ('[' expression ']')? '=' expression ';' ###
        ############################################################################
        """

        self.out.write('<letStatement>\n')

        # let 
        let_line = self.format_line()
        self.out.write(let_line)

        # varName
        self.advance()

        var_name_line = self.format_line('used')
        self.out.write(var_name_line)

        # Check if '[' or '='
        self.advance()
        if self.tokenizer.token == '[':
            # '['
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # expression
            self.advance()
            self.compile_expression()

            # ']'
            symbol_line = self.format_line()
            self.out.write(symbol_line)
            self.advance()

        # '='
        symbol_line = self.format_line()
        self.out.write(symbol_line)
        
        # expression
        self.advance()
        self.compile_expression()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</letStatement>\n')


    def compile_while(self):
        """
        #####################################################################
        ### whileStatement: 'while' '(' expression ')' '{' statements '}' ###
        #####################################################################
        """

        self.out.write('<whileStatement>\n')

        # 'while'
        while_line = self.format_line()
        self.out.write(while_line)

        # '('
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expression
        self.advance()
        self.compile_expression()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # statements
        self.advance()
        self.compile_statements()

        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</whileStatement>\n')


    def compile_return(self):
        """
        ################################################
        ### ReturnStatement 'return' expression? ';' ###
        ################################################
        """

        self.out.write('<returnStatement>\n')

        # 'return'
        return_line = self.format_line()
        self.out.write(return_line)

        # Ceck if expression
        self.advance()
        if self.tokenizer.token != ';':
            # 'expression'
            self.compile_expression()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</returnStatement>\n')


    def compile_if(self):
        """
        ###############################################################
        ### ifStatement: 'if' '(' expression ')' '{' statements '}' ###
        ###              ('else' '{' statements '}')?               ###
        ###############################################################
        """

        self.out.write('<ifStatement>\n')

        # 'if'
        if_line = self.format_line()
        self.out.write(if_line)

        # '('
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expression
        self.advance()
        self.compile_expression()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # statements
        self.advance()
        self.compile_statements()

        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # Check if there is 'else' part of ifStatement
        self.advance()
        if self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() == 'else':
            # 'else'
            else_line = self.format_line()
            self.out.write(else_line)

            # '{'
            self.advance()
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # statements
            self.advance()
            self.compile_statements()

            # '}'
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            self.advance()

        self.out.write('</ifStatement>\n')

    ###################
    ### EXPRESSIONS ###
    ###################

    def compile_subroutine_call(self, skip_subroutine_name=False):
        """
        ############################################################################
        ### subroutineCall: subroutineName '(' expressionList ')' | (className | ###
        ### varName) '.' subroutineName '(' expressionList ')'                   ###
        ############################################################################
        """

        if not skip_subroutine_name:
            # subroutineName or className or varName
            subroutine_class_var_name_line = self.format_line('used')
            self.out.write(subroutine_class_var_name_line)
            self.advance()

        # Check '(' or '.'
        if self.tokenizer.token == '.':
            # '.'
            symbol_line = self.format_line()
            self.out.write(symbol_line)
            
            # subroutineName
            self.advance()
            subroutine_name_line = self.format_line('used')
            self.out.write(subroutine_name_line)

            self.advance()

        # '('
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expressionList
        self.advance()
        self.compile_expression_list()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()


    def compile_expression(self):
        """
        ###################################
        ### expression: term (op term)* ###
        ###################################
        """

        self.out.write('<expression>\n')

        ops = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

        # 'term'
        self.compile_term()

        # Check if there is (op term)* part
        while self.tokenizer.token in ops:
            # op
            op_line = self.format_line()
            self.out.write(op_line)

            # term
            self.advance()
            self.compile_term()

        self.out.write('</expression>\n')

    
    def compile_term(self):
        """
        ################################################################
        ###  integerConstant | stringConstant | keywordConstant |    ###
        ###  varName | varName '[' expression ']' | subroutineCall | ###
        ###  '(' expression ')' | unaryOp term                       ###
        ################################################################
        """

        self.out.write('<term>\n')

        unary_ops = ['-', '~']

        #############################################
        ### constant, name, expression or unaryOp ###
        #############################################

        # '(' expression ')'
        if self.tokenizer.token == '(':
            # '('
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # expression
            self.advance()
            self.compile_expression()

            # ')'
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            self.advance()
        
        # unaryOp term
        elif self.tokenizer.token in unary_ops:
            # unaryOp
            unary_op_line = self.format_line()
            self.out.write(unary_op_line)

            # term
            self.advance()
            self.compile_term()

        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall
        else:
            # constant or name
            constant_or_name = self.format_line('used')
            self.out.write(constant_or_name)

            # varName '[' expression ']' | subroutineCall or end of compile_term function
            # Check if expression: '[', subroutineCall: '(' with parameter skip_subroutine_name = True,
            # otherwise end of compile_term function
            self.advance()
            # '[' expression ']'
            if self.tokenizer.token == '[':
                # '['
                symbol_line = self.format_line()
                self.out.write(symbol_line)

                # expression
                self.advance()
                self.compile_expression()

                # ']'
                symbol_line = self.format_line()
                self.out.write(symbol_line)

                self.advance()
            
            # subroutineCall with skip_subroutine_name=True
            elif self.tokenizer.token in ['(', '.']:
                self.compile_subroutine_call(skip_subroutine_name=True)

        self.out.write('</term>\n')


    def compile_expression_list(self):
        """
        ########################################################
        ### expressionList: (expression (',' expression)* )? ###
        ########################################################
        """

        self.out.write('<expressionList>\n')

        # Check if token is ')', if so we got empty expression list
        if self.tokenizer.token != ')':
            # 'expression'
            self.compile_expression()

            # Check if token is ',', if so we got more expressions
            while self.tokenizer.token == ',':
                # ','
                comma_line = self.format_line()
                self.out.write(comma_line)

                # expression
                self.advance()
                self.compile_expression()

        self.out.write('</expressionList>\n')
class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    TYPE_TO_TAG = {'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword',
                   'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol'}

    SYMBOLS_TO_XML_CONVENTION = {'<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;'}

    def __init__(self, input_file_path, output_file_path):
        self.output_file = open(output_file_path, 'w')
        self.jack_tokenizer = JackTokenizer(input_file_path)
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.output_file.write('<class>\n')  # get first token
        self.jack_tokenizer.advance()
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())

        if self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</class>')
        self.output_file.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.output_file.write('<classVarDec>\n')

            self.write_token(self.jack_tokenizer.key_word())
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())

            while self.jack_tokenizer.symbol() != ';':
                self.write_token(self.jack_tokenizer.symbol())
                self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())

            self.output_file.write('</classVarDec>\n')

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        while self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.output_file.write('<subroutineDec>\n')
            self.write_token(self.jack_tokenizer.key_word())
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_parameter_list()
            self.write_token(self.jack_tokenizer.symbol())


            self.output_file.write('<subroutineBody>\n')
            self.write_token(self.jack_tokenizer.symbol())
            while self.jack_tokenizer.key_word() == 'var':
                self.compile_var_dec()
            self.compile_statements()
            self.write_token(self.jack_tokenizer.symbol())
            self.output_file.write('</subroutineBody>\n')

            self.output_file.write('</subroutineDec>\n')

    def compile_parameter_list(self):
        self.output_file.write('<parameterList>\n')
        if self.jack_tokenizer.symbol() != ')':
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())
            while self.jack_tokenizer.symbol() == ",":
                self.write_token(self.jack_tokenizer.symbol())
                self.write_type()
                self.write_token(self.jack_tokenizer.identifier())
        self.output_file.write('</parameterList>\n')


    def compile_var_dec(self):
        self.output_file.write('<varDec>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_type()
        self.write_token(self.jack_tokenizer.identifier())
        while self.jack_tokenizer.symbol() == ",":
            self.write_token(self.jack_tokenizer.symbol())
            self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</varDec>\n')


    def compile_statements(self):
        self.output_file.write('<statements>\n')

        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

        self.output_file.write('</statements>\n')

    def compile_do(self):
        self.output_file.write('<doStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())

        self.write_token(self.jack_tokenizer.identifier())
        self.compile_subroutine_call()

        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</doStatement>\n')

    def compile_subroutine_call(self):
        if self.jack_tokenizer.symbol() == '(':
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.write_token(self.jack_tokenizer.symbol())
        elif self.jack_tokenizer.symbol() == '.':
            self.write_token(self.jack_tokenizer.symbol())
            self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.write_token(self.jack_tokenizer.symbol())

    def compile_let(self):
        self.output_file.write('<letStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.identifier())
        if self.jack_tokenizer.symbol() == '[':
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression()
            self.write_token(self.jack_tokenizer.symbol())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</letStatement>\n')

    def compile_while(self):
        self.output_file.write('<whileStatement>\n')
        self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_statements()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</whileStatement>\n')


    def compile_return(self):
        self.output_file.write('<returnStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</returnStatement>\n')


    def compile_if(self):
        self.output_file.write('<ifStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())

        self.write_token(self.jack_tokenizer.symbol())
        self.compile_statements()
        self.write_token(self.jack_tokenizer.symbol())
        if self.jack_tokenizer.key_word() == 'else':
            self.write_token(self.jack_tokenizer.key_word())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_statements()
            self.write_token(self.jack_tokenizer.symbol())

        self.output_file.write('</ifStatement>\n')


    def compile_expression(self):
        self.output_file.write('<expression>\n')
        self.compile_term()
        while self.jack_tokenizer.symbol() in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            if symbol in CompilationEngine.SYMBOLS_TO_XML_CONVENTION:
                symbol = CompilationEngine.SYMBOLS_TO_XML_CONVENTION[symbol]
            self.write_token(symbol)
            self.compile_term()
        self.output_file.write('</expression>\n')


    def compile_term(self):
        self.output_file.write('<term>\n')
        token_type = self.jack_tokenizer.token_type()
        if token_type == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())
            if self.jack_tokenizer.symbol() == '[':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
                self.write_token(self.jack_tokenizer.symbol())
            elif self.jack_tokenizer.symbol() == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call()
        elif token_type == 'STRING_CONST':
            self.write_token(self.jack_tokenizer.string_val())
        elif token_type == 'INT_CONST':
            self.write_token(self.jack_tokenizer.int_val())
        elif token_type == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif token_type == 'INT_CONST':
            self.write_token(self.jack_tokenizer.int_val())
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
                self.write_token(self.jack_tokenizer.symbol())
            elif self.jack_tokenizer.symbol() in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_term()
        self.output_file.write('</term>\n')

    def compile_expression_list(self):
        self.output_file.write('<expressionList>\n')
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            while self.jack_tokenizer.symbol() == ',':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
        self.output_file.write('</expressionList>\n')