Exemple #1
0
class CompilationEngine:
    def __init__(self, token_stream, out_file, xml_name):
        '''
        creates a new compilation engine with the given input and output.
        The next method called must be compileClass().
        '''
        self.stream = token_stream
        self.writer = VMWriter(out_file)
        self.symbols = SymbolTable()
        self.xml_name = xml_name
        self.root = ET.Element('class')

        self.stream.advance()
        assert self.stream.keyword() == 'class'

    def add_terminal(self, root, text):
        terminal = ET.SubElement(root, self.stream.token_type())
        terminal.text = ' {text} '.format(text=text)
        if self.stream.has_more_tokens():
            self.stream.advance()

    def compile_class(self):
        '''
        compiles a complete class
        '''
        self.add_terminal(self.root, self.stream.keyword())
        self.class_name = self.stream.identifier()
        self.add_terminal(self.root, self.class_name)
        self.add_terminal(self.root, self.stream.symbol())

        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in CLASS_VARS:
            self.compile_class_var_dec()

        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() in SUBROUTINE_TYPES:
            self.compile_subroutine()

        self.add_terminal(self.root, self.stream.symbol())

    def compile_class_var_dec(self):
        '''
        compiles a static declaration or a field declaration.
        '''
        class_var_root = ET.SubElement(self.root, CLASS_VAR_DEC)
        kind = self.stream.keyword()
        self.add_terminal(class_var_root, kind)
        if self.stream.token_type() == tokenizer.KEYWORD:
            type_name = self.stream.keyword()
        else:
            type_name = self.stream.identifier()
        self.add_terminal(class_var_root, type_name)
        name = self.stream.identifier()
        self.add_terminal(class_var_root, name)
        self.symbols.define(name, type_name, kind)

        while self.stream.symbol() == COMMA:
            self.add_terminal(class_var_root, self.stream.symbol())
            name = self.stream.identifier()
            self.add_terminal(class_var_root, name)
            self.symbols.define(name, type_name, kind)

        self.add_terminal(class_var_root, self.stream.symbol())

    def compile_subroutine(self):
        '''
        compiles a complete method, function, or constructor.
        '''
        subroutine_dec = ET.SubElement(self.root, SUBROUTINE_DEC)
        self.symbols.start_subroutine()
        subroutine_type = self.stream.keyword()
        if subroutine_type in ['method', 'constructor']:
            self.symbols.define('this', self.class_name, 'argument')
        self.add_terminal(subroutine_dec, subroutine_type)
        if self.stream.token_type() == tokenizer.KEYWORD:
            self.add_terminal(subroutine_dec, self.stream.keyword())
        else:
            self.add_terminal(subroutine_dec, self.stream.identifier())
        name = self.stream.identifier()
        self.add_terminal(subroutine_dec, name)

        self.add_terminal(subroutine_dec, self.stream.symbol())
        self.compile_parameter_list(subroutine_dec)
        self.add_terminal(subroutine_dec, self.stream.symbol())

        subroutine_body = ET.SubElement(subroutine_dec, SUBROUTINE_BODY)
        self.add_terminal(subroutine_body, self.stream.symbol())
        while self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == VAR:
            self.compile_var_dec(subroutine_body)
        func_name = '{cls}.{sub}'.format(
            cls=self.class_name,
            sub=name)
        self.writer.write_function(func_name, self.symbols.var_count('var'))
        self.compile_statements(subroutine_body)
        self.add_terminal(subroutine_body, self.stream.symbol())

    def compile_parameter_list(self, root):
        '''
        compiles a (possibly empty) parameter list, not including the enclosing “()”.
        '''
        parameter_list_root = ET.SubElement(root, PARAMETER_LIST)
        if self.stream.token_type() != tokenizer.SYMBOL:
            type_name = self.stream.keyword()
            self.add_terminal(parameter_list_root, type_name)
            name = self.stream.identifier()
            self.add_terminal(parameter_list_root, name)
            self.symbols.define(name, type_name, 'argument')

        while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == COMMA:
            self.add_terminal(parameter_list_root, self.stream.symbol())
            type_name = self.stream.keyword()
            self.add_terminal(parameter_list_root, type_name)
            name = self.stream.identifier()
            self.add_terminal(parameter_list_root, name)
            self.symbols.define(name, type_name, 'argument')

    def compile_var_dec(self, root):
        '''
        compiles a var declaration
        '''
        var_dec_root = ET.SubElement(root, VAR_DEC)
        self.add_terminal(var_dec_root, self.stream.keyword())
        type_name = None
        if self.stream.token_type() == tokenizer.IDENTIFIER:
            type_name = self.stream.identifier()
        else:
            type_name = self.stream.keyword()
        self.add_terminal(var_dec_root, type_name)
        name = self.stream.identifier()
        self.add_terminal(var_dec_root, name)
        self.symbols.define(name, type_name, 'var')

        while self.stream.symbol() == COMMA:
            self.add_terminal(var_dec_root, self.stream.symbol())
            name = self.stream.identifier()
            self.add_terminal(var_dec_root, name)
            self.symbols.define(name, type_name, 'var')

        self.add_terminal(var_dec_root, self.stream.symbol())

    def compile_statements(self, root):
        '''
        compiles a sequence of statements, not including the enclosing “{}”.
        '''
        statements_root = ET.SubElement(root, STATEMENTS)
        while self.stream.token_type() == tokenizer.KEYWORD:
            keyword = self.stream.keyword()
            if keyword == 'let':
                self.compile_let(statements_root)
            elif keyword == 'if':
                self.compile_if(statements_root)
            elif keyword == 'while':
                self.compile_while(statements_root)
            elif keyword == 'do':
                self.compile_do(statements_root)
            elif keyword == 'return':
                self.compile_return(statements_root)
            else:
                assert False, 'unsupported keyword {keyword}'.format(keyword=keyword)

    def compile_do(self, root):
        '''
        compiles a do statement
        '''
        do_root = ET.SubElement(root, DO)
        self.add_terminal(do_root, self.stream.keyword())
        self.compile_subroutine_call(do_root)
        self.writer.write_pop('temp', 0)
        self.add_terminal(do_root, self.stream.symbol())

    def compile_let(self, root):
        '''
        compiles a let statement
        '''
        let_root = ET.SubElement(root, LET)
        self.add_terminal(let_root, self.stream.keyword())
        lhs = self.stream.identifier()
        self.add_terminal(let_root, lhs)
        if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == OPEN_BRACKET:
            self.add_terminal(let_root, self.stream.symbol())
            self.compile_expression(let_root)
            self.add_terminal(let_root, self.stream.symbol())
        self.add_terminal(let_root, self.stream.symbol())
        self.compile_expression(let_root)
        self.add_terminal(let_root, self.stream.symbol())
        self.writer.write_pop(self.symbols.kind_of(lhs), self.symbols.index_of(lhs))

    def compile_while(self, root):
        '''
        compiles a while statement
        '''
        while_root = ET.SubElement(root, WHILE)
        while_expression = self.symbols.generate_label('WHILE_EXP')
        while_end = self.symbols.generate_label('WHILE_END')
        self.add_terminal(while_root, self.stream.keyword())
        self.add_terminal(while_root, self.stream.symbol())
        self.writer.write_label(while_expression)
        self.compile_expression(while_root)
        self.writer.write_arithmetic('not')
        self.writer.write_if(while_end)
        self.add_terminal(while_root, self.stream.symbol())
        self.add_terminal(while_root, self.stream.symbol())
        self.compile_statements(while_root)
        self.writer.write_goto(while_expression)
        self.writer.write_label(while_end)
        self.add_terminal(while_root, self.stream.symbol())

    def compile_return(self, root):
        '''
        compiles a return statement
        '''
        return_root = ET.SubElement(root, RETURN)
        self.add_terminal(return_root, self.stream.keyword())
        if self.stream.token_type() != tokenizer.SYMBOL:
            self.compile_expression(return_root)
        else:
            self.writer.write_push('constant', 0)
        self.writer.write_return()
        self.add_terminal(return_root, self.stream.symbol())

    def compile_if(self, root):
        '''
        compiles an if statement
        '''
        if_root = ET.SubElement(root, IF)
        if_label = self.symbols.generate_label('IF_TRUE')
        else_label = self.symbols.generate_label('IF_FALSE')
        end_label = self.symbols.generate_label('IF_END')
        self.add_terminal(if_root, self.stream.keyword())
        self.add_terminal(if_root, self.stream.symbol())
        self.compile_expression(if_root)
        self.writer.write_if(if_label)
        self.writer.write_goto(else_label)
        self.writer.write_label(if_label)
        self.add_terminal(if_root, self.stream.symbol())
        self.add_terminal(if_root, self.stream.symbol())
        self.compile_statements(if_root)
        self.writer.write_goto(end_label)
        self.add_terminal(if_root, self.stream.symbol())
        self.writer.write_label(else_label)
        if self.stream.token_type() == tokenizer.KEYWORD and self.stream.keyword() == 'else':
            self.add_terminal(if_root, self.stream.keyword())
            self.add_terminal(if_root, self.stream.symbol())
            self.compile_statements(if_root)
            self.add_terminal(if_root, self.stream.symbol())
        self.writer.write_label(end_label)

    def compile_expression(self, root):
        '''
        compiles an expression
        '''
        expression_root = ET.SubElement(root, EXPRESSION)
        self.compile_term(expression_root)
        while self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() in OPS:
            operator = self.stream.symbol()
            self.add_terminal(expression_root, operator)
            self.compile_term(expression_root)
            if operator == '+':
                self.writer.write_arithmetic('add'),
            if operator == '-':
                self.writer.write_arithmetic('sub'),
            if operator == '*':
                self.writer.write_call('Math.multiply', 2),
            if operator == '/':
                self.writer.write_call('Math.divide', 2),
            if operator == '&':
                self.writer.write_arithmetic('and'),
            if operator == '|':
                self.writer.write_arithmetic('or'),
            if operator == '<':
                self.writer.write_arithmetic('lt'),
            if operator == '>':
                self.writer.write_arithmetic('gt'),
            if operator == '=':
                self.writer.write_arithmetic('eq')

    def compile_term(self, root):
        '''
        compiles a term. This method is faced with a slight difficulty when trying to
        decide between some of the alternative rules. Specifically, if the current token
        is an identifier, it must still distinguish between a variable, an array entry, and
        a subroutine call. The distinction can be made by looking ahead one extra token.
        A single look-ahead token, which may be one of “[“, “(“, “.”, suffices to
        distinguish between the three possibilities. Any other token is not
        part of this term and should not be advanced over.
        '''
        term_root = ET.SubElement(root, TERM)
        token_type = self.stream.token_type()
        if token_type == tokenizer.INT:
            val = self.stream.int_val()
            self.add_terminal(term_root, val)
            self.writer.write_push('constant', val)
        elif token_type == tokenizer.STRING:
            val = self.stream.string_val()
            self.add_terminal(term_root, val)
            #TODO I think it's a character by character push, ugh
            self.writer.write_push('constant', val)
        elif token_type == tokenizer.KEYWORD and self.stream.keyword() in KEYWORD_CONSTANTS:
            keyword = self.stream.keyword()
            self.add_terminal(term_root, keyword)
            if keyword == 'true':
                self.writer.write_push('constant', 0)
                self.writer.write_arithmetic('not')
            elif keyword in ['false', 'null']:
                self.writer.write_push('constant', 0)
            else:
                self.writer.write_push('this', 0)
        elif token_type == tokenizer.IDENTIFIER:
            if self.stream.peek() == OPEN_BRACKET:
                name = self.stream.identifier()
                self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name))
                self.add_terminal(term_root, name)
                self.add_terminal(term_root, self.stream.symbol())
                self.compile_expression(term_root)
                self.add_terminal(term_root, self.stream.symbol())
            elif self.stream.peek() == OPEN_PAREN or self.stream.peek() == PERIOD:
                self.compile_subroutine_call(term_root)
            else:
                name = self.stream.identifier()
                self.add_terminal(term_root, self.stream.identifier())
                self.writer.write_push(self.symbols.kind_of(name), self.symbols.index_of(name))
        elif token_type == tokenizer.SYMBOL and self.stream.symbol() == OPEN_PAREN:
            self.add_terminal(term_root, self.stream.symbol())
            self.compile_expression(term_root)
            self.add_terminal(term_root, self.stream.symbol())
        elif token_type == tokenizer.SYMBOL and self.stream.symbol() in UNARY_OPS:
            operator = self.stream.symbol()
            self.add_terminal(term_root, operator)
            self.compile_term(term_root)
            self.writer.write_arithmetic('neg' if operator == '-' else 'not')
        else:
            assert False, 'unsupported token {token}'.format(token=self.stream.current_token)

    def compile_expression_list(self, root):
        '''
        compiles a (possibly empty) comma-separated list of expressions.
        '''
        expression_list_root = ET.SubElement(root, EXPRESSION_LIST)
        if self.stream.token_type() == tokenizer.SYMBOL and self.stream.symbol() == CLOSE_PAREN:
            return 0
        self.compile_expression(expression_list_root)
        num_vars = 1
        while self.stream.symbol() == COMMA:
            self.add_terminal(expression_list_root, self.stream.symbol())
            self.compile_expression(expression_list_root)
            num_vars += 1
        return num_vars

    def compile_subroutine_call(self, root):
        class_name = self.class_name
        subroutine_name = self.stream.identifier()
        self.add_terminal(root, class_name)
        if self.stream.symbol() == PERIOD:
            self.add_terminal(root, self.stream.symbol())
            class_name = subroutine_name
            subroutine_name = self.stream.identifier()
            self.add_terminal(root, self.stream.identifier())
        self.add_terminal(root, self.stream.symbol())
        num_vars = self.compile_expression_list(root)
        self.add_terminal(root, self.stream.symbol())
        self.writer.write_call('{cls}.{sub}'.format(
            cls=class_name,
            sub=subroutine_name),
            num_vars)

    def write(self):
        if self.xml_name:
            lines = self._write(self.root).split('\n')
            lines = lines[1:]
            file = open(self.xml_name, 'w')
            file.write('\n'.join(lines))
            file.close()
        self.writer.close()

    def _write(self, root):
        return minidom.parseString(ET.tostring(root)).toprettyxml()