def __init__(self, jack_file):
        self.vm_writer = VMWriter(jack_file)
        self.tokenizer = JackTokenizer(jack_file)
        self.symbol_table = SymbolTable()

        self.if_index = -1
        self.while_index = -1
Example #2
0
 def __init__(self, filepath, vm_writer):
     self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
     self.tokenizer = JackTokenizer(filepath)
     self.symbol_table = SymbolTable()
     self.vmw = vm_writer
     self.compiled_class_name = None
     self.label_num = 0
Example #3
0
 def __init__(self, jack_file, vm_file):
     self._jack_tokenizer = JackTokenizer(jack_file)
     self._vm_file = vm_file
     self._vm_text = ''
     self._xml_text = ''
     self._symbol_table = SymbolTable()
     self._vm_writer = VmWriter(self._vm_file)
     self._class_name = None
     self._label_count = 0
     self._compiled_class_name = ''
Example #4
0
    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.out = open(output_file, 'w')
        self.token = None
        self.class_name = None

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
Example #5
0
    def compile(self, input_file, output_file):
        self._tokenizer = JackTokenizer(input_file)
        self._output_file = output_file
        self._offset = 0

        while self._tokenizer.has_more_tokens:
            if self._tokenizer.advance() == VALID_TOKEN:
                if self._tokenizer.current_token == 'class':
                    tag = 'class'
                    self._open_tag(tag)
                    self._compile_class()
                    self._close_tag(tag)
                else:
                    line_n = self._tokenizer.line_number
                    raise Exception(
                        f"Class declaration expected. Line {line_n}")
Example #6
0
 def __init__(self, filepath, vm_writer):
     self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
     self.tokenizer = JackTokenizer(filepath)
     self.symbol_table = SymbolTable()
     self.vmw = vm_writer
     self.compiled_class_name = None
     self.label_num = 0
Example #7
0
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension T.xml
      # if the original extension was not .jack, then append T.xml
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".xml"
      else:
         destination_filename = source_filename + ".xml"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)
Example #8
0
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)
Example #9
0
def main():
    if len(sys.argv) != 2:
        print(
            "Expected 1 argument (either the .jack file or a directory containing .jack files). Exiting!"
        )
        return

    is_file_arg = sys.argv[1].endswith(".jack")

    if is_file_arg:
        jack_files = [sys.argv[1]]
    else:
        jack_files = [
            join(sys.argv[1], f) for f in listdir(sys.argv[1])
            if f.endswith(".jack")
        ]

    for jack_file in jack_files:
        ce = CompilationEngine(JackTokenizer(jack_file),
                               jack_file.split(".jack")[0] + "Nisarg.xml")
        ce.compile()
Example #10
0
class CompilationEngine():
    def __init__(self, jack_file, vm_file):
        self._jack_tokenizer = JackTokenizer(jack_file)
        self._vm_file = vm_file
        self._vm_text = ''
        self._xml_text = ''
        self._symbol_table = SymbolTable()
        self._vm_writer = VmWriter(self._vm_file)
        self._class_name = None
        self._label_count = 0
        self._compiled_class_name = ''

    def compile_class(self):
        self._write_start('class')
        self._compile_keyword()
        self._write('IdentifierInfo', 'category: class')
        self._compiled_class_name = self._compile_identifier()
        self._compile_symbol()
        while self._what_next_token([Keyword.STATIC, Keyword.FIELD]):
            self.compile_class_var_dec()
        while self._what_next_token(
            [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]):
            self.compile_subroutine_dec()
        self._compile_symbol()
        self._write_end('class')

    def compile_class_var_dec(self):
        self._write_start('classVarDec')
        token = self._compile_keyword()
        kind = None
        if token == Keyword.STATIC:
            kind = Kind.STATIC
        elif token == Keyword.FIELD:
            kind = Kind.FIELD
        type_token = self._jack_tokenizer.next_token()
        if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
            self._compile_keyword()
        else:
            self._write('IdentifierInfo', 'category: class')
            self._compile_identifier()
        self._compile_var_name(declaration=True, type=type_token, kind=kind)
        while self._what_next_token([Symbol.COMMA]):
            self._compile_symbol()
            self._compile_var_name(declaration=True,
                                   type=type_token,
                                   kind=kind)
        self._compile_symbol()
        self._write_end('classVarDec')

    def compile_subroutine_dec(self):
        self._symbol_table.start_subroutine()
        self._write_start('subroutineDec')
        token = self._compile_keyword()
        if self._jack_tokenizer.next_token() == Keyword.VOID:
            self._compile_keyword()
        else:
            self._jack_tokenizer.next_token()
            if self._what_next_token(
                [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                self._compile_keyword()
            else:
                self._write('IdentifierInfo', 'category: class')
                self._compile_identifier()
        self._write('IdentifierInfo', 'category: subroutine')
        subroutine_name = self._compile_identifier()
        self._compile_symbol()
        if token == Keyword.METHOD:
            self._symbol_table.define('$this', self._compiled_class_name,
                                      Kind.ARG)
        self.compile_parameter_list()
        self._compile_symbol()
        self.compile_subroutine_body(subroutine_name, token)
        self._write_end('subroutineDec')

    def compile_parameter_list(self):
        self._write_start('parameterList')
        if (self._jack_tokenizer.next_token()
                in [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]
                or self._jack_tokenizer.next_token_type() == Type.IDENTIFIER):
            type_token = self._jack_tokenizer.next_token()
            if self._what_next_token(
                [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                self._compile_keyword()
            else:
                self._write('IdentifierInfo', 'category: class')
                self._compile_identifier()
            self._compile_var_name(declaration=True,
                                   type=type_token,
                                   kind=Kind.ARG)
            while self._what_next_token([Symbol.COMMA]):
                self._compile_symbol()
                type_token = self._jack_tokenizer.next_token()
                if self._what_next_token(
                    [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                    self._compile_keyword()
                else:
                    self._write('IdentifierInfo', 'category: class')
                    self._compile_identifier()
                self._compile_var_name(declaration=True,
                                       type=type_token,
                                       kind=Kind.ARG)
        self._write_end('parameterList')

    def compile_subroutine_body(self, subroutine_name, subroutine_token):
        self._write_start('subroutineBody')
        self._compile_symbol()
        local_num = 0
        while self._what_next_token([Keyword.VAR]):
            var_num = self.compile_var_dec()
            local_num += var_num
        self._vm_writer.write_function(
            '%s.%s' % (self._compiled_class_name, subroutine_name), local_num)
        if subroutine_token == Keyword.METHOD:
            self._vm_writer.write_push(Segment.ARG, 0)
            self._vm_writer.write_pop(Segment.POINTER, 0)
        elif subroutine_token == Keyword.CONSTRUCTOR:
            self._vm_writer.write_push(
                Segment.CONST, self._symbol_table.var_count(Kind.FIELD))
            self._vm_writer.write_call('Memory.alloc', 1)
            self._vm_writer.write_pop(Segment.POINTER, 0)
        elif subroutine_token == Keyword.FUNCTION:
            pass
        self.compile_statements()
        self._compile_symbol()
        self._write_end('subroutineBody')
        return local_num

    def compile_var_dec(self):
        self._write_start('varDec')
        self._compile_keyword()
        type_token = self._jack_tokenizer.next_token()
        if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
            self._compile_keyword()
        else:
            self._write('IdentifierInfo', 'category: class')
            self._compile_identifier()
        self._compile_var_name(declaration=True,
                               type=type_token,
                               kind=Kind.VAR)
        var_num = 1  # TODO
        while self._what_next_token([Symbol.COMMA]):
            self._compile_symbol()
            self._compile_var_name(declaration=True,
                                   type=type_token,
                                   kind=Kind.VAR)
            var_num += 1
        self._compile_symbol()
        self._write_end('varDec')
        return var_num

    def compile_statements(self):
        self._write_start('statements')
        while self._what_next_token([
                Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO,
                Keyword.RETURN
        ]):
            if self._what_next_token([Keyword.LET]):
                self.compile_let()
            elif self._what_next_token([Keyword.IF]):
                self.compile_if()
            elif self._what_next_token([Keyword.WHILE]):
                self.compile_while()
            elif self._what_next_token([Keyword.DO]):
                self.compile_do()
            elif self._what_next_token([Keyword.RETURN]):
                self.compile_return()
        self._write_end('statements')

    def compile_let(self):
        self._write_start('letStatement')
        self._compile_keyword()
        let_var = self._compile_var_name(let=True)
        if self._what_next_token([Symbol.LEFT_BOX_BRACKET]):
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
            self._compile_symbol()
            kind = self._symbol_table.kind_of(let_var)
            if kind == Kind.ARG:
                self._vm_writer.write_push(
                    Segment.ARG, self._symbol_table.index_of(let_var))
            elif kind == Kind.VAR:
                self._vm_writer.write_push(
                    Segment.LOCAL, self._symbol_table.index_of(let_var))
            elif kind == Kind.FIELD:
                self._vm_writer.write_push(
                    Segment.THIS, self._symbol_table.index_of(let_var))
            elif kind == Kind.STATIC:
                self._vm_writer.write_push(
                    Segment.STATIC, self._symbol_table.index_of(let_var))
            self._vm_writer.write_arithmetic(Command.ADD)
            self._vm_writer.write_pop(Segment.TEMP, 2)
            self.compile_expression()
            self._vm_writer.write_push(Segment.TEMP, 2)
            self._vm_writer.write_pop(Segment.POINTER, 1)
            self._vm_writer.write_pop(Segment.THAT, 0)
            self._compile_symbol()
        else:
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
            kind = self._symbol_table.kind_of(let_var)
            if kind == Kind.VAR:
                self._vm_writer.write_pop(Segment.LOCAL,
                                          self._symbol_table.index_of(let_var))
            elif kind == Kind.ARG:
                self._vm_writer.write_pop(Segment.ARG,
                                          self._symbol_table.index_of(let_var))
            elif kind == Kind.FIELD:
                self._vm_writer.write_pop(Segment.THIS,
                                          self._symbol_table.index_of(let_var))
            elif kind == Kind.STATIC:
                self._vm_writer.write_pop(Segment.STATIC,
                                          self._symbol_table.index_of(let_var))
        self._write_end('letStatement')

    def compile_if(self):
        self._write_start('ifStatement')
        self._compile_keyword()
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._vm_writer.write_arithmetic(Command.NOT)
        l1 = self._new_label()
        l2 = self._new_label()
        self._vm_writer.write_if(l1)
        self._compile_symbol()
        self.compile_statements()
        self._compile_symbol()
        self._vm_writer.write_goto(l2)
        self._vm_writer.write_label(l1)
        if self._what_next_token([Keyword.ELSE]):
            self._compile_keyword()
            self._compile_symbol()
            self.compile_statements()
            self._compile_symbol()
        self._vm_writer.write_label(l2)
        self._write_end('ifStatement')

    def compile_while(self):
        self._write_start('whileStatement')
        l1 = self._new_label()
        l2 = self._new_label()
        self._compile_keyword()
        self._vm_writer.write_label(l1)
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._vm_writer.write_arithmetic(Command.NOT)
        self._vm_writer.write_if(l2)
        self._compile_symbol()
        self.compile_statements()
        self._compile_symbol()
        self._vm_writer.write_goto(l1)
        self._vm_writer.write_label(l2)
        self._write_end('whileStatement')

    def compile_do(self):
        self._write_start('doStatement')
        self._compile_keyword()
        if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1):
            self._write('IdentifierInfo', 'category: subroutine')
            subroutine_name = self._compile_identifier()
            self._compile_symbol()
            self._vm_writer.write_push(Segment.POINTER, 0)
            arg_num = self.compile_expression_list()
            self._compile_symbol()
            self._vm_writer.write_call(
                '%s.%s' % (self._compiled_class_name, subroutine_name),
                arg_num + 1)
        else:
            identifier_str = self._jack_tokenizer.next_token()
            if self._symbol_table.kind_of(identifier_str):
                instance_name = self._compile_var_name(call=True)
                self._compile_symbol()
                self._write('IdentifierInfo', 'category: subroutine')
                subroutine_name = self._compile_identifier()
                self._compile_symbol()
                kind = self._symbol_table.kind_of(instance_name)
                if kind == Kind.ARG:
                    self._vm_writer.write_push(
                        Segment.ARG,
                        self._symbol_table.index_of(instance_name))
                elif kind == Kind.VAR:
                    self._vm_writer.write_push(
                        Segment.LOCAL,
                        self._symbol_table.index_of(instance_name))
                elif kind == Kind.FIELD:
                    self._vm_writer.write_push(
                        Segment.THIS,
                        self._symbol_table.index_of(instance_name))
                elif kind == Kind.STATIC:
                    self._vm_writer.write_push(
                        Segment.STATIC,
                        self._symbol_table.index_of(instance_name))
                arg_num = self.compile_expression_list()
                self._compile_symbol()
                self._vm_writer.write_call(
                    '%s.%s' % (self._symbol_table.type_of(instance_name),
                               subroutine_name), arg_num + 1)
            else:
                self._write('IdentifierInfo', 'category: class')
                class_name = self._compile_identifier()
                self._compile_symbol()
                self._write('IdentifierInfo', 'category: subroutine')
                subroutine_name = self._compile_identifier()
                self._compile_symbol()
                arg_num = self.compile_expression_list()
                self._compile_symbol()
                self._vm_writer.write_call(
                    '%s.%s' % (class_name, subroutine_name), arg_num)
        self._compile_symbol()
        self._write_end('doStatement')
        self._vm_writer.write_pop(Segment.TEMP, 0)

    def compile_return(self):
        self._write_start('returnStatement')
        self._compile_keyword()
        if not self._what_next_token([Symbol.SEMI_COLON]):
            self.compile_expression()
        else:
            self._vm_writer.write_push(Segment.CONST, 0)
        self._compile_symbol()
        self._vm_writer.write_return()
        self._write_end('returnStatement')

    def compile_expression(self):
        self._write_start('expression')
        self.compile_term()
        while self._what_next_token([
                Symbol.PLUS, Symbol.MINUS, Symbol.MULTI, Symbol.DIV,
                Symbol.AND, Symbol.PIPE, Symbol.LESS_THAN, Symbol.GREATER_THAN,
                Symbol.EQUAL
        ]):
            token = self._compile_symbol()
            self.compile_term()
            if token == Symbol.PLUS:
                self._vm_writer.write_arithmetic(Command.ADD)
            elif token == Symbol.MINUS:
                self._vm_writer.write_arithmetic(Command.SUB)
            elif token == Symbol.MULTI:
                self._vm_writer.write_call('Math.multiply', 2)
            elif token == Symbol.DIV:
                self._vm_writer.write_call('Math.divide', 2)
            elif token == Symbol.AND:
                self._vm_writer.write_arithmetic(Command.AND)
            elif token == Symbol.PIPE:
                self._vm_writer.write_arithmetic(Command.OR)
            elif token == Symbol.LESS_THAN:
                self._vm_writer.write_arithmetic(Command.LT)
            elif token == Symbol.GREATER_THAN:
                self._vm_writer.write_arithmetic(Command.GT)
            elif token == Symbol.EQUAL:
                self._vm_writer.write_arithmetic(Command.EQ)
        self._write_end('expression')

    def compile_term(self):
        self._write_start('term')
        if self._what_next_token_type([Type.INT_CONST]):
            value = self._compile_integer_constant()
            self._vm_writer.write_push(Segment.CONST, value)
        elif self._what_next_token_type([Type.STRING_CONST]):
            value = self._compile_string_constant()
            self._vm_writer.write_push(Segment.CONST, len(value))
            self._vm_writer.write_call('String.new', 1)
            for v in value:
                self._vm_writer.write_push(Segment.CONST, ord(v))
                self._vm_writer.write_call('String.appendChar', 2)
        elif self._what_next_token([Keyword.NULL]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.CONST, 0)
        elif self._what_next_token([Keyword.THIS]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.POINTER, 0)
        elif self._what_next_token([Keyword.TRUE]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.CONST, 0)
            self._vm_writer.write_arithmetic(Command.NOT)
        elif self._what_next_token([Keyword.FALSE]):
            self._compile_keyword()
            self._vm_writer.write_push(Segment.CONST, 0)
        elif self._what_next_token_type([Type.IDENTIFIER]):
            if self._what_next_token([Symbol.LEFT_BOX_BRACKET], 1):
                self._compile_var_name()
                self._compile_symbol()
                self.compile_expression()
                self._vm_writer.write_arithmetic(Command.ADD)
                self._vm_writer.write_pop(Segment.POINTER, 1)
                self._vm_writer.write_push(Segment.THAT, 0)
                self._compile_symbol()
            elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET, Symbol.DOT],
                                       1):
                if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1):
                    self._write('IdentifierInfo', 'category: subroutine')
                    subroutine_name = self._compile_identifier()
                    self._compile_symbol()
                    self._vm_writer.write_push(Segment.POINTER, 0)
                    arg_num = self.compile_expression_list()
                    self._compile_symbol()
                    self._vm_writer.write_call(
                        '%s.%s' % (self._compiled_class_name, subroutine_name),
                        arg_num + 1)
                else:
                    identifier_str = self._jack_tokenizer.next_token()
                    if self._symbol_table.kind_of(identifier_str):
                        instance_name = self._compile_var_name(call=True)
                        self._compile_symbol()
                        self._write('IdentifierInfo', 'category: subroutine')
                        subroutine_name = self._compile_identifier()
                        self._compile_symbol()
                        kind = self._symbol_table.kind_of(instance_name)
                        if kind == Kind.ARG:
                            self._vm_writer.write_push(
                                Segment.ARG,
                                self._symbol_table.index_of(instance_name))
                        elif kind == Kind.VAR:
                            self._vm_writer.write_push(
                                Segment.LOCAL,
                                self._symbol_table.index_of(instance_name))
                        elif kind == Kind.FIELD:
                            self._vm_writer.write_push(
                                Segment.THIS,
                                self._symbol_table.index_of(instance_name))
                        elif kind == Kind.STATIC:
                            self._vm_writer.write_push(
                                Segment.STATIC,
                                self._symbol_table.index_of(instance_name))
                        arg_num = self.compile_expression_list()
                        self._compile_symbol()
                        self._vm_writer.write_call(
                            '%s.%s' %
                            (self._symbol_table.type_of(instance_name),
                             subroutine_name), arg_num + 1)
                    else:
                        self._write('IdentifierInfo', 'category: class')
                        class_name = self._compile_identifier()
                        self._compile_symbol()
                        self._write('IdentifierInfo', 'category: subroutine')
                        subroutine_name = self._compile_identifier()
                        self._compile_symbol()
                        arg_num = self.compile_expression_list()
                        self._compile_symbol()
                        self._vm_writer.write_call(
                            '%s.%s' % (class_name, subroutine_name), arg_num)
            else:
                self._compile_var_name()
        elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET]):
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
        elif self._what_next_token([Symbol.TILDE]):
            self._compile_symbol()
            self.compile_term()
            self._vm_writer.write_arithmetic(Command.NOT)
        elif self._what_next_token([Symbol.MINUS]):
            self._compile_symbol()
            self.compile_term()
            self._vm_writer.write_arithmetic(Command.NEG)
        self._write_end('term')

    def compile_expression_list(self):
        self._write_start('expressionList')
        arg_num = 0
        if not self._what_next_token([Symbol.RIGHT_ROUND_BRACKET]):
            self.compile_expression()
            arg_num += 1
            while self._what_next_token([Symbol.COMMA]):
                self._compile_symbol()
                self.compile_expression()
                arg_num += 1
        self._write_end('expressionList')
        return arg_num

    def save(self):
        self._vm_writer.save()

    def _what_next_token(self, values, index=0):
        return self._jack_tokenizer.next_token(index) in values

    def _what_next_token_type(self, values, index=0):
        return self._jack_tokenizer.next_token_type(index) in values

    def _compile_symbol(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('symbol', value)
        return value

    def _compile_keyword(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('keyword', value)
        return value

    def _compile_identifier(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('identifier', value)
        return value

    def _compile_integer_constant(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('integerConstant', value)
        return value

    def _compile_string_constant(self):
        self._jack_tokenizer.advance()
        value = self._jack_tokenizer.token()
        self._write('stringConstant', value)
        return value

    def _compile_var_name(self,
                          declaration=False,
                          type=None,
                          kind=None,
                          let=False,
                          call=False):
        if declaration:
            self._symbol_table.define(self._jack_tokenizer.next_token(), type,
                                      kind)
        elif let:
            pass
        elif call:
            pass
        else:
            kind = self._symbol_table.kind_of(
                self._jack_tokenizer.next_token())
            if kind == Kind.ARG:
                self._vm_writer.write_push(
                    Segment.ARG,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))
            elif kind == Kind.VAR:
                self._vm_writer.write_push(
                    Segment.LOCAL,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))
            elif kind == Kind.FIELD:
                self._vm_writer.write_push(
                    Segment.THIS,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))
            elif kind == Kind.STATIC:
                self._vm_writer.write_push(
                    Segment.STATIC,
                    self._symbol_table.index_of(
                        self._jack_tokenizer.next_token()))

        self._write(
            'IdentifierInfo', 'declaration: %s, kind: %s, index: %d' %
            (declaration,
             self._symbol_table.kind_of(self._jack_tokenizer.next_token()),
             self._symbol_table.index_of(self._jack_tokenizer.next_token())))
        return self._compile_identifier()

    def _write(self, element, value):
        self._xml_text += '<{}> {} </{}>\n'.format(element, value, element)

    def _write_start(self, element):
        self._xml_text += '<%s>\n' % element

    def _write_end(self, element):
        self._xml_text += '</%s>\n' % element

    def _new_label(self):
        self._label_count += 1
        return 'LABEL_%d' % self._label_count
Example #11
0
    def analyze(self):
        for filename in self.files_to_translate:
            tokenizer = JackTokenizer(filename)
            compiler = CompilationEngine(tokenizer.tokens)

            self.__write_out(filename, compiler.xml_output)
class CompilationEngine:
    """NOTE remember that "is_xxx()" checks on the next token,
    and load the next token to curr_token before starting sub-methods
    using "load_next_token()" and you can use values with it
    """
    def __init__(self, jack_file):
        self.vm_writer = VMWriter(jack_file)
        self.tokenizer = JackTokenizer(jack_file)
        self.symbol_table = SymbolTable()

        self.if_index = -1
        self.while_index = -1

    # 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        #! Beginning of all
        # * save name of the class and move on
        self.load_next_token()  # 'class'
        self.class_name = self.load_next_token()  # className
        self.load_next_token()  # curr_token = '{'

        # while next token == 'static' | 'field',
        while self.is_class_var_dec():  # check next token
            self.compile_class_var_dec()  # classVarDec*
        # while next_token == constructor | function | method
        while self.is_subroutine_dec():
            self.compile_subroutine()  # subroutineDec*
        self.vm_writer.close()

    # ('static' | 'field' ) type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        kind = self.load_next_token()  # curr_token = static | field
        type = self.load_next_token()  # curr_token = type
        name = self.load_next_token()  # curr_token = varName
        self.symbol_table.define(name, type, kind.upper())
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, kind.upper())
        self.load_next_token()  # ';'
        # next_token = 'constructor' | 'function' | 'method'

    # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine(self):
        subroutine_kind = (self.load_next_token()
                           )  # ('constructor' | 'function' | 'method')
        self.load_next_token()  # ('void' | type)
        subroutine_name = self.load_next_token()  # subroutineName

        self.symbol_table.start_subroutine()  # init subroutine table
        if subroutine_kind == "method":
            self.symbol_table.define("instance", self.class_name, "ARG")

        self.load_next_token()  # curr_token '('
        self.compile_parameter_list()  # parameterList
        # next_token == ')' when escaped
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        while self.check_next_token() == "var":
            self.compile_var_dec()  # varDec*
        # NOTE next_token is neither 'var' or ';'
        # NOTE next_token is statements* (zero or more)

        # ANCHOR actual writing
        func_name = f"{self.class_name}.{subroutine_name}"  # Main.main
        num_locals = self.symbol_table.counts["VAR"]  # get 'var' count
        self.vm_writer.write_function(func_name, num_locals)
        if subroutine_kind == "constructor":
            num_fields = self.symbol_table.counts["FIELD"]
            self.vm_writer.write_push("CONST", num_fields)
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop("POINTER", 0)
        elif subroutine_kind == "method":
            self.vm_writer.write_push("ARG", 0)
            self.vm_writer.write_pop("POINTER", 0)

        # NOTE statement starts here
        self.compile_statements()  # statements
        self.load_next_token()  # '}

        # ( (type varName) (',' type varName)*)?

    def compile_parameter_list(self):
        # curr_token == '('
        if self.check_next_token() != ")":
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        while self.check_next_token() != ")":
            self.load_next_token()  # ','
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        # NOTE param compilation finishes when next_token == ')'

        # 'var' type varName (',' varName)* ';'

    def compile_var_dec(self):
        self.load_next_token()  # 'var'
        type = self.load_next_token()  # type
        name = self.load_next_token()  #  # varName
        self.symbol_table.define(name, type, "VAR")
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "VAR")
        self.load_next_token()  # ';'

    # statement*
    # letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def compile_statements(self):
        # if next_token == let | if | while | do | return
        while self.is_statement():
            statement = (self.load_next_token()
                         )  # curr_token == let | if | while | do | return
            if statement == "let":
                self.compile_let()
            elif statement == "if":
                self.compile_if()
            elif statement == "while":
                self.compile_while()
            elif statement == "do":
                self.compile_do()
            elif statement == "return":
                self.compile_return()

        # 'let' varName ('[' expression ']')? '=' expression ';'

    def compile_let(self):
        var_name = self.load_next_token()  # curr_token == varName
        var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
        var_index = self.symbol_table.index_of(var_name)
        # if next_token == "["
        if self.is_array():  # array assignment
            self.load_next_token()  # curr_token == '['
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ']'
            self.vm_writer.write_push(var_kind, var_index)
            self.vm_writer.write_arithmetic("ADD")

            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ';'
            #! POP TEMP and PUSH TEMP location changed
            self.vm_writer.write_pop("TEMP", 0)
            self.vm_writer.write_pop("POINTER", 1)
            self.vm_writer.write_push("TEMP", 0)
            self.vm_writer.write_pop("THAT", 0)
        else:  # regular assignment
            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # ';'
            self.vm_writer.write_pop(var_kind, var_index)

    # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
    def compile_if(self):
        # curr_token == if
        self.if_index += 1
        if_index = self.if_index
        # TODO IF indexes count separately
        self.load_next_token()  # curr_token == '('
        self.compile_expression()  # expression
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        # S = statement, L = label
        self.vm_writer.write_if(f"IF_TRUE{if_index}")  #! if-goto L1
        self.vm_writer.write_goto(f"IF_FALSE{if_index}")  #! goto L2
        self.vm_writer.write_label(f"IF_TRUE{if_index}")  #! label L1
        self.compile_statements()  # statements #! executing S1
        self.vm_writer.write_goto(f"IF_END{if_index}")  #! goto END
        self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_FALSE{if_index}")  #! label L2
        if self.check_next_token() == "else":  # ( 'else' '{' statements '}' )?
            self.load_next_token()  # 'else'
            self.load_next_token()  # '{'
            self.compile_statements()  # statements #! executing S2
            self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_END{if_index}")

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        # curr_token == while
        self.while_index += 1
        while_index = self.while_index
        self.vm_writer.write_label(f"WHILE{while_index}")
        self.load_next_token()  # '('
        self.compile_expression()  # expression
        self.vm_writer.write_arithmetic("NOT")  # eval false condition first
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        self.vm_writer.write_if(f"WHILE_END{while_index}")
        self.compile_statements()  # statements
        self.vm_writer.write_goto(f"WHILE{while_index}")
        self.vm_writer.write_label(f"WHILE_END{while_index}")
        self.load_next_token()  # '}'

        # 'do' subroutineCall ';'

    def compile_do(self):
        # curr_token == do
        self.load_next_token()  #! to sync with compile_term()
        self.compile_subroutine_call()
        self.vm_writer.write_pop("TEMP", 0)
        self.load_next_token()  # ';'

        # 'return' expression? ';'

    def compile_return(self):
        # curr_token == return
        if self.check_next_token() != ";":
            self.compile_expression()
        else:
            self.vm_writer.write_push("CONST", 0)
        self.vm_writer.write_return()
        self.load_next_token()  # ';'

    # term (op term)*
    def compile_expression(self):
        self.compile_term()  # term
        while self.is_op():  # (op term)*
            op: str = self.load_next_token()  # op
            self.compile_term()  # term
            if op in ARITHMETIC.keys():
                self.vm_writer.write_arithmetic(ARITHMETIC[op])
            elif op == "*":
                self.vm_writer.write_call("Math.multiply", 2)
            elif op == "/":
                self.vm_writer.write_call("Math.divide", 2)

    # integerConstant | stringConstant | keywordConstant | varName |
    # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
    def compile_term(self):
        # if next_token == '~' | '-'
        if self.is_unary_op_term():
            unary_op = self.load_next_token()  # curr_token == '~' | '-'
            self.compile_term()  # term (recursive)
            self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op])
        # if next_token == '(' => '(' expression ')'
        elif self.check_next_token() == "(":
            self.load_next_token()  # '('
            self.compile_expression()  # expression
            self.load_next_token()  # ')'
        # if next_token == INTEGER(const)
        elif self.check_next_type() == "INT_CONST":  # integerConstant
            self.vm_writer.write_push("CONST", self.load_next_token())  # )
        # if next_token == STRING(const)
        elif self.check_next_type() == "STRING_CONST":  # stringConstant
            self.compile_string()
        # if next_token == KEYWORD(const)
        elif self.check_next_type() == "KEYWORD":  # keywordConstant
            self.compile_keyword()
        # varName | varName '[' expression ']' | subroutineCall
        else:
            #! (varName | varName for expression | subroutine)'s base
            var_name = self.load_next_token(
            )  # curr_token = varName | subroutineCall
            # (e.g. Screen.setColor | show() )
            #! next_token == '[' | '(' or '.' | just varName
            # varName '[' expression ']'
            if self.is_array():  # if next_token == '['
                self.load_next_token()  # '['
                self.compile_expression()  # expression
                self.load_next_token()  # ']'
                array_kind = self.symbol_table.kind_of(var_name)
                array_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(CONVERT_KIND[array_kind],
                                          array_index)
                self.vm_writer.write_arithmetic("ADD")
                self.vm_writer.write_pop("POINTER", 1)
                self.vm_writer.write_push("THAT", 0)
            # if next_token == "(" | "." => curr_token == subroutineCall

            #! if varName is not found, assume class or function name
            elif self.is_subroutine_call():
                # NOTE curr_token == subroutineName | className | varName
                self.compile_subroutine_call()
            # varName
            else:
                # curr_token == varName
                # FIXME cannot catch subroutine call and pass it to 'else' below
                # TODO error caught on Math.abs() part on Ball.vm
                var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
                var_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(var_kind, var_index)

    # subroutineCall: subroutineName '(' expressionList ')' |
    # ( className | varName) '.' subroutineName '(' expressionList ')'

    # e.g.) (do) game.run()
    # ! in case of 'do' order is different from 'let game = Class.new()'
    def compile_subroutine_call(self):
        # NOTE curr_token == subroutineName | className | varName
        subroutine_caller = self.get_curr_token()
        function_name = subroutine_caller
        # _next_token()  # FIXME now it loads '.' or '('
        # func_name = identifier
        number_args = 0
        #! '.' or '(' 2 cases
        if self.check_next_token() == ".":
            self.load_next_token()  # curr_token == '.'
            subroutine_name = self.load_next_token(
            )  # curr_token == subroutineName
            type = self.symbol_table.type_of(subroutine_caller)
            if type != "NONE":  # it's an instance
                kind = self.symbol_table.kind_of(subroutine_caller)
                index = self.symbol_table.index_of(subroutine_caller)
                self.vm_writer.write_push(CONVERT_KIND[kind], index)
                function_name = f"{type}.{subroutine_name}"
                number_args += 1
            else:  # it's a class
                class_name = subroutine_caller
                function_name = f"{class_name}.{subroutine_name}"
        elif self.check_next_token() == "(":
            subroutine_name = subroutine_caller
            function_name = f"{self.class_name}.{subroutine_name}"
            number_args += 1
            self.vm_writer.write_push("POINTER", 0)
        self.load_next_token()  # '('
        number_args += self.compile_expression_list()  # expressionList
        self.load_next_token()  # ')'
        self.vm_writer.write_call(function_name, number_args)

    # (expression (',' expression)* )?
    def compile_expression_list(self):
        number_args = 0
        if self.check_next_token() != ")":
            number_args += 1
            self.compile_expression()
        while self.check_next_token() != ")":
            number_args += 1
            self.load_next_token()  # curr_token == ','
            self.compile_expression()
        return number_args

    def compile_string(self):
        string = self.load_next_token()  # curr_token == stringConstant
        self.vm_writer.write_push("CONST", len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push("CONST", ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def compile_keyword(self):
        keyword = self.load_next_token()  # curr_token == keywordConstant
        if keyword == "this":
            self.vm_writer.write_push("POINTER", 0)
        else:
            self.vm_writer.write_push("CONST", 0)
            if keyword == "true":
                self.vm_writer.write_arithmetic("NOT")

    def is_subroutine_call(self):
        return self.check_next_token() in [".", "("]

    def is_array(self):
        return self.check_next_token() == "["

    def is_class_var_dec(self):
        return self.check_next_token() in ["static", "field"]

    def is_subroutine_dec(self):
        return self.check_next_token() in ["constructor", "function", "method"]

    def is_statement(self):
        return self.check_next_token() in [
            "let", "if", "while", "do", "return"
        ]

    def is_op(self):
        return self.check_next_token() in [
            "+", "-", "*", "/", "&", "|", "<", ">", "="
        ]

    def is_unary_op_term(self):
        return self.check_next_token() in ["~", "-"]

    def check_next_token(self):
        return self.tokenizer.next_token[1]

    def check_next_type(self):
        return self.tokenizer.next_token[0]

    def get_curr_token(self):
        return self.tokenizer.curr_token[1]

    def load_next_token(self):
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()  # curr_token = next_token
            return self.tokenizer.curr_token[1]
        else:
            return ""
Example #13
0
class CompilationEngine():
    def __init__(self, filepath, vm_writer):
        self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
        self.tokenizer = JackTokenizer(filepath)
        self.symbol_table = SymbolTable()
        self.vmw = vm_writer
        self.compiled_class_name = None
        self.label_num = 0

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.wf.close()

    def get_new_label(self):
        self.label_num += 1
        return 'LABEL_%d' % self.label_num

    def compile(self):
        self.compile_class()

    def compile_class(self):

        self.write_element_start('class')

        self.compile_keyword([Tokens.CLASS])
        self.compiled_class_name = self.compile_class_name().token
        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)

        while self.next_is_class_var_dec():
            self.compile_class_var_dec()

        while self.next_is_subroutine_dec():
            self.compile_subroutine_dec()

        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        kind = None
        if token == Tokens.STATIC:
            kind = IdentifierKind.STATIC
        elif token == Tokens.FIELD:
            kind = IdentifierKind.FIELD
        else:
            self.raise_syntax_error('Unexpected token')

        type_token = self.compile_type()
        self.compile_var_name(declaration=True,
                              type=type_token.token,
                              kind=kind)

        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True,
                                  type=type_token.token,
                                  kind=kind)

        self.compile_symbol(Tokens.SEMI_COLON)

        self.write_element_end('classVarDec')

    def compile_var_dec(self):

        self.write_element_start('varDec')
        self.compile_keyword(Tokens.VAR)
        type_token = self.compile_type()
        var_num = 0
        self.compile_var_name(declaration=True,
                              type=type_token.token,
                              kind=IdentifierKind.VAR)
        var_num += 1
        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True,
                                  type=type_token.token,
                                  kind=IdentifierKind.VAR)
            var_num += 1
        self.compile_symbol(Tokens.SEMI_COLON)
        self.write_element_end('varDec')

        return var_num

    def compile_subroutine_dec(self):
        self.symbol_table.start_subroutine()

        self.write_element_start('subroutineDec')

        token = self.compile_keyword(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])
        if self.tokenizer.see_next() == Tokens.VOID:
            self.compile_keyword(Tokens.VOID)
        else:
            self.compile_type()
        subroutine_name = self.compile_subroutine_name().token
        self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)

        if token == Tokens.METHOD:
            self.symbol_table.define('$this', self.compiled_class_name,
                                     IdentifierKind.ARG)

        self.compile_parameter_list()
        self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        self.compile_subroutine_body(subroutine_name, token)

        self.write_element_end('subroutineDec')

    def compile_subroutine_name(self):
        self.write_identifier_info('category: subroutine')
        return self.compile_identifier()

    def compile_class_name(self):
        self.write_identifier_info('category: class')
        return self.compile_identifier()

    def compile_var_name(self,
                         declaration=False,
                         type=None,
                         kind=None,
                         let=False,
                         call=False):
        if declaration:
            self.symbol_table.define(self.tokenizer.see_next().token, type,
                                     kind)
        elif let:
            pass
        elif call:
            pass
        else:
            kind = self.symbol_table.kind_of(self.tokenizer.see_next().token)
            if kind == IdentifierKind.ARG:
                self.vmw.write_push(
                    Segment.ARG,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))
            elif kind == IdentifierKind.VAR:
                self.vmw.write_push(
                    Segment.LOCAL,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))
            elif kind == IdentifierKind.FIELD:
                self.vmw.write_push(
                    Segment.THIS,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))
            elif kind == IdentifierKind.STATIC:
                self.vmw.write_push(
                    Segment.STATIC,
                    self.symbol_table.index_of(
                        self.tokenizer.see_next().token))

        self.write_identifier_info(
            'declaration: %s, kind: %s, index: %d' %
            (declaration,
             self.symbol_table.kind_of(self.tokenizer.see_next().token),
             self.symbol_table.index_of(self.tokenizer.see_next().token)))
        return self.compile_identifier()

    def write_identifier_info(self, value):
        self.write_element('IdentifierInfo', value)

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [
                Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN
        ] or isinstance(self.tokenizer.see_next(), Identifier):
            type_token = self.compile_type()
            self.compile_var_name(declaration=True,
                                  type=type_token.token,
                                  kind=IdentifierKind.ARG)

            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                type_token = self.compile_type()
                self.compile_var_name(declaration=True,
                                      type=type_token.token,
                                      kind=IdentifierKind.ARG)

        self.write_element_end('parameterList')

    def compile_subroutine_body(self, subroutine_name, subroutine_dec_token):
        self.write_element_start('subroutineBody')

        print subroutine_name, subroutine_dec_token

        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
        local_num = 0
        while self.next_is(Tokens.VAR):
            var_num = self.compile_var_dec()
            local_num += var_num

        self.vmw.write_function(
            "%s.%s" % (self.compiled_class_name, subroutine_name), local_num)

        if subroutine_dec_token == Tokens.METHOD:
            self.vmw.write_push(Segment.ARG, 0)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.CONSTRUCTOR:
            self.vmw.write_push(
                Segment.CONST,
                self.symbol_table.var_count(IdentifierKind.FIELD))
            self.vmw.write_call('Memory.alloc', 1)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.FUNCTION:
            pass
        else:
            self.raise_syntax_error('Invalid token')

        self.compile_statements()
        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('subroutineBody')

        print "========="
        for key in self.symbol_table.arg_table:
            print self.symbol_table.arg_table[
                key].type, key, "kind:", self.symbol_table.arg_table[
                    key].kind, "index:", self.symbol_table.arg_table[key].index

        return local_num

    def compile_statements(self):
        self.write_element_start('statements')

        while self.next_is_statement():
            self.compile_statement()

        self.write_element_end('statements')

    def compile_statement(self):
        if self.next_is(Tokens.LET):
            self.write_element_start('letStatement')
            self.compile_keyword(Tokens.LET)
            let_var = self.compile_var_name(let=True).token

            if self.next_is(Tokens.LEFT_BOX_BRACKET):
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()  # i
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
                self.compile_symbol(Tokens.EQUAL)

                # base address
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(Segment.ARG,
                                        self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(Segment.LOCAL,
                                        self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(Segment.THIS,
                                        self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(Segment.STATIC,
                                        self.symbol_table.index_of(let_var))

                # temp_2 <- base + i
                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.TEMP, 2)

                # value
                self.compile_expression()

                # set THAT <- base+i
                self.vmw.write_push(Segment.TEMP, 2)
                self.vmw.write_pop(Segment.POINTER, 1)

                self.vmw.write_pop(Segment.THAT, 0)
                self.compile_symbol(Tokens.SEMI_COLON)

            else:
                self.compile_symbol(Tokens.EQUAL)
                self.compile_expression()
                self.compile_symbol(Tokens.SEMI_COLON)
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.VAR:
                    self.vmw.write_pop(Segment.LOCAL,
                                       self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.ARG:
                    self.vmw.write_pop(Segment.ARG,
                                       self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_pop(Segment.THIS,
                                       self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_pop(Segment.STATIC,
                                       self.symbol_table.index_of(let_var))

            self.write_element_end('letStatement')

        elif self.next_is(Tokens.IF):
            self.write_element_start('ifStatement')
            self.compile_keyword(Tokens.IF)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.vmw.write_if(l1)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l2)
            self.vmw.write_label(l1)
            if self.next_is(Tokens.ELSE):
                self.compile_keyword(Tokens.ELSE)
                self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
                self.compile_statements()
                self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_label(l2)
            self.write_element_end('ifStatement')

        elif self.next_is(Tokens.WHILE):
            self.write_element_start('whileStatement')
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.compile_keyword(Tokens.WHILE)
            self.vmw.write_label(l1)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            self.vmw.write_if(l2)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l1)
            self.vmw.write_label(l2)
            self.write_element_end('whileStatement')

        elif self.next_is(Tokens.DO):
            self.write_element_start('doStatement')
            self.compile_keyword(Tokens.DO)
            self.compile_subroutine_call()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('doStatement')
            self.vmw.write_pop(Segment.TEMP, 0)

        elif self.next_is(Tokens.RETURN):
            self.write_element_start('returnStatement')
            self.compile_keyword(Tokens.RETURN)
            if not self.next_is(Tokens.SEMI_COLON):
                self.compile_expression()
            else:
                self.vmw.write_push(Segment.CONST, 0)

            self.compile_symbol(Tokens.SEMI_COLON)
            self.vmw.write_return()

            self.write_element_end('returnStatement')

    def compile_subroutine_call(self):
        if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1):
            subroutinename = self.compile_subroutine_name().token
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.vmw.write_push(Segment.POINTER, 0)
            argnum = self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_call(
                "%s.%s" % (self.compiled_class_name, subroutinename),
                argnum + 1)
        else:
            identifier_str = self.tokenizer.see_next().token
            if self.symbol_table.kind_of(identifier_str):
                instance_name = self.compile_var_name(call=True).token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                kind = self.symbol_table.kind_of(instance_name)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(
                        Segment.ARG, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(
                        Segment.LOCAL,
                        self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(
                        Segment.THIS,
                        self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(
                        Segment.STATIC,
                        self.symbol_table.index_of(instance_name))
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call(
                    "%s.%s" %
                    (self.symbol_table.type_of(instance_name), subroutinename),
                    argnum + 1)
            else:
                classname = self.compile_class_name().token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call("%s.%s" % (classname, subroutinename),
                                    argnum)

    def compile_expression_list(self):
        self.write_element_start('expressionList')
        argnum = 0
        if not self.next_is(Tokens.RIGHT_ROUND_BRACKET):
            self.compile_expression()
            argnum += 1
            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_expression()
                argnum += 1
        self.write_element_end('expressionList')

        return argnum

    def compile_expression(self):
        self.write_element_start('expression')
        self.compile_term()
        while self.next_is([
                Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV,
                Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN,
                Tokens.EQUAL
        ]):
            op_token = self.compile_symbol([
                Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV,
                Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN,
                Tokens.EQUAL
            ])
            self.compile_term()
            if op_token == Tokens.PLUS:
                self.vmw.write_arithmetic(Command.ADD)
            elif op_token == Tokens.MINUS:
                self.vmw.write_arithmetic(Command.SUB)
            elif op_token == Tokens.MULTI:
                self.vmw.write_call('Math.multiply', 2)
            elif op_token == Tokens.DIV:
                self.vmw.write_call('Math.divide', 2)
            elif op_token == Tokens.AND:
                self.vmw.write_arithmetic(Command.AND)
            elif op_token == Tokens.PIPE:
                self.vmw.write_arithmetic(Command.OR)
            elif op_token == Tokens.LESS_THAN:
                self.vmw.write_arithmetic(Command.LT)
            elif op_token == Tokens.GREATER_THAN:
                self.vmw.write_arithmetic(Command.GT)
            elif op_token == Tokens.EQUAL:
                self.vmw.write_arithmetic(Command.EQ)

        self.write_element_end('expression')

    def compile_term(self):
        self.write_element_start('term')

        if self.next_type_is(TokenType.INT_CONST):
            value_str = self.compile_integer_constant()
            self.vmw.write_push(Segment.CONST, value_str)
        elif self.next_type_is(TokenType.STRING_CONST):
            self.compile_string_constant()
        elif self.next_is(Tokens.NULL):
            self.compile_keyword(Tokens.NULL)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_is(Tokens.THIS):
            self.compile_keyword(Tokens.THIS)
            self.vmw.write_push(Segment.POINTER, 0)
        elif self.next_is(Tokens.TRUE):
            self.compile_keyword(Tokens.TRUE)
            self.vmw.write_push(Segment.CONST, 0)
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.FALSE):
            self.compile_keyword(Tokens.FALSE)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_type_is(TokenType.IDENTIFIER):
            if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1):

                var_name = self.compile_var_name().token
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()

                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.POINTER, 1)
                self.vmw.write_push(Segment.THAT, 0)
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1):
                self.compile_subroutine_call()
            else:
                self.compile_var_name()

        elif self.next_is(Tokens.LEFT_ROUND_BRACKET):
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        elif self.next_is(Tokens.TILDE):
            self.compile_symbol(Tokens.TILDE)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.MINUS):
            self.compile_symbol(Tokens.MINUS)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NEG)
        else:
            self.raise_syntax_error('')
        self.write_element_end('term')

    def next_type_is(self, token_type):
        return self.tokenizer.see_next().type == token_type

    def compile_type(self):
        type_token = self.tokenizer.see_next()

        if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]):
            self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN])
        else:
            self.compile_class_name()
        return type_token

    def next_is_statement(self):
        return self.next_is(
            [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN])

    def next_is(self, tokens, idx=0):
        if type(tokens) == list:
            return self.tokenizer.see_next(idx=idx) in tokens
        else:
            return self.tokenizer.see_next(idx=idx) == tokens

    def next_is_class_var_dec(self):
        return self.next_is([Tokens.STATIC, Tokens.FIELD])

    def next_is_subroutine_dec(self):
        return self.next_is(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])

    def compile_symbol(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('symbol',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('symbol',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_keyword(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('keyword',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('keyword',
                                   self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, Identifier):
            identifier_str = self.tokenizer.current_token.token_escaped
            self.write_element('identifier', identifier_str)
            return self.tokenizer.current_token
        else:
            self.raise_syntax_error('')

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerConstant):
            self.write_element('integerConstant',
                               self.tokenizer.current_token.token_escaped)
            return self.tokenizer.current_token.token_escaped
        else:
            self.raise_syntax_error('')

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringConstant):
            string = self.tokenizer.current_token.token
            self.write_element('stringConstant',
                               self.tokenizer.current_token.token_escaped)
            self.vmw.write_push(Segment.CONST, len(string))
            self.vmw.write_call('String.new', 1)
            for c in string:
                self.vmw.write_push(Segment.CONST, ord(c))
                self.vmw.write_call('String.appendChar', 2)
        else:
            self.raise_syntax_error('')

    def write_element(self, elem_name, value):
        self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name))

    def write_element_start(self, elem_name):
        self.wf.write('<%s>\n' % elem_name)

    def write_element_end(self, elem_name):
        self.wf.write('</%s>\n' % elem_name)

    def raise_syntax_error(self, msg):
        raise Exception('%s' % msg)
Example #14
0
class CompilationEngine:
    def __init__(self):
        self._tokenizer = None
        self._output_file = None
        self._offset = None

    @property
    def _token_n_type(self):
        return self._tokenizer.current_token, self._tokenizer.token_type

    @property
    def _valid_types(self):
        return 'int', 'char', 'boolean'

    @property
    def _valid_statements(self):
        return 'let', 'if', 'while', 'do', 'return'

    @property
    def _valid_operators(self):
        return '+', '-', '*', '/', '&', '|', '<', '>', '='

    def _is_valid_term_start(self, token, ttype):
        if ttype in (TTypes.INT_CONST, TTypes.STRING_CONST, TTypes.IDENTIFIER):
            return True
        elif token in ('true', 'false', 'null', 'this', '(', '-', '~',
                       *self._valid_operators):
            return True
        else:
            return False

    def _raise_missing(self, symbol):
        line_n = self._tokenizer.line_number
        raise Exception(f"{symbol} expected. Line {line_n}")

    def _open_tag(self, tag, new_line=True):
        nl = '\n' if new_line else ''
        pad = ' ' * self._offset
        self._output_file.write(f"{pad}<{tag}>{nl}")

        self._offset = self._offset + 2 if new_line else self._offset

    def _write_token(self, tag, token):
        if tag == 'symbol':
            token = self._tokenizer.symbol

        self._open_tag(tag, new_line=False)
        self._output_file.write(f" {token} ")
        self._close_tag(tag, new_line=False)

    def _close_tag(self, tag, new_line=True):
        self._offset = self._offset - 2 if new_line else self._offset

        pad = ' ' * self._offset if new_line else ''
        self._output_file.write(f"{pad}</{tag}>\n")

    def _advance(self):
        self._tokenizer.advance()
        return self._tokenizer.current_token, self._tokenizer.token_type

    def compile(self, input_file, output_file):
        self._tokenizer = JackTokenizer(input_file)
        self._output_file = output_file
        self._offset = 0

        while self._tokenizer.has_more_tokens:
            if self._tokenizer.advance() == VALID_TOKEN:
                if self._tokenizer.current_token == 'class':
                    tag = 'class'
                    self._open_tag(tag)
                    self._compile_class()
                    self._close_tag(tag)
                else:
                    line_n = self._tokenizer.line_number
                    raise Exception(
                        f"Class declaration expected. Line {line_n}")

    def _compile_class(self):
        self._write_token(tag='keyword', token='class')

        token, ttype = self._advance()

        if ttype == TTypes.IDENTIFIER:
            tag = TTypes.IDENTIFIER.value
            self._write_token(tag, token)

            token, _ = self._advance()
            if token == '{':
                self._write_token(tag='symbol', token=token)
                token, ttype = self._advance()
                while token in ('static', 'field'):
                    tag = 'classVarDec'
                    self._open_tag(tag)
                    self._compile_class_var_dec()
                    self._close_tag(tag)

                    token, ttype = self._advance()

                while token in ('constructor', 'function', 'method'):
                    tag = 'subroutineDec'
                    self._open_tag(tag)
                    self._compile_subroutine()
                    self._close_tag(tag)

                    token, ttype = self._advance()

                if token == '}':
                    self._write_token(tag='symbol', token=token)
                else:
                    self._raise_missing('"}"')

            else:
                self._raise_missing('"{"')

        else:
            line_n = self._tokenizer.line_number
            raise Exception(f"Invalid class name declaration. Line {line_n}")

    def _compile_class_var_dec(self):
        token = self._tokenizer.current_token
        self._write_token(tag='keyword', token=token)

        token, ttype = self._advance()

        if token in self._valid_types or ttype == TTypes.IDENTIFIER:
            tag = ttype.value
            self._write_token(tag=tag, token=token)

            token, ttype = self._advance()

            v = False
            while ttype == TTypes.IDENTIFIER:
                v = True
                self._write_token(tag='identifier', token=token)
                token, ttype = self._advance()
                if token == ',':
                    self._write_token(tag='symbol', token=token)
                    token, ttype = self._advance()
                    continue
                else:
                    break
            if not v:
                self._raise_missing('Valid variable name')

            if token == ';':
                self._write_token(tag='symbol', token=token)
            else:
                self._raise_missing('";"')

        else:
            self._raise_missing('Valid variable type')

    def _compile_subroutine(self):
        token = self._tokenizer.current_token
        self._write_token(tag='keyword', token=token)

        token, ttype = self._advance()

        if token in ('void', *self._valid_types) or ttype == TTypes.IDENTIFIER:
            tag = ttype.value
            self._write_token(tag=tag, token=token)

            token, ttype = self._advance()

            if ttype == TTypes.IDENTIFIER:
                self._write_token(tag='identifier', token=token)

                token, ttype = self._advance()

                if token == '(':
                    self._write_token(tag='symbol', token=token)
                    tag = 'parameterList'
                    self._open_tag(tag)
                    self._compile_parameter_list()
                    self._close_tag(tag)

                    token, _ = self._token_n_type

                    if token == ')':
                        self._write_token(tag='symbol', token=token)
                        self._open_tag('subroutineBody')

                        token, ttype = self._advance()

                        if token == '{':
                            self._write_token(tag='symbol', token=token)
                            tag = 'varDec'
                            while True:
                                token, _ = self._advance()
                                if token == 'var':
                                    self._open_tag(tag)
                                    self._compile_var_dec()
                                    self._close_tag(tag)
                                else:
                                    break

                            tag = 'statements'
                            if token in self._valid_statements:
                                self._open_tag(tag)
                                self._compile_statements()
                                self._close_tag(tag)

                            token, _ = self._token_n_type
                            if token == '}':
                                self._write_token(tag='symbol', token=token)
                                self._close_tag('subroutineBody')
                            else:
                                self._raise_missing('"}"')
                        else:
                            self._raise_missing('"{"')
                    else:
                        self._raise_missing('")"')
                else:
                    self._raise_missing('"("')
            else:
                self._raise_missing('Valid subroutine name')
        else:
            self._raise_missing('Valid subroutine type')

    def _compile_parameter_list(self):
        token, ttype = self._advance()
        if token == ')':
            return

        elif token in self._valid_types or token == TTypes.IDENTIFIER:
            tag = ttype.value
            self._write_token(tag=tag, token=token)

            token, ttype = self._advance()
            v = False
            while ttype == TTypes.IDENTIFIER:
                v = True
                self._write_token(tag='identifier', token=token)
                token, ttype = self._advance()
                if token == ',':
                    self._write_token(tag='symbol', token=token)
                    token, ttype = self._advance()
                    if token in self._valid_types or ttype == TTypes.IDENTIFIER:
                        tag = ttype.value
                        self._write_token(tag=tag, token=token)
                        token, ttype = self._advance()
                    continue
                elif token == ')':
                    return
                else:
                    break
            if not v:
                self._raise_missing('Valid variable name')
        else:
            self._raise_missing('Valid variable type')

    def _compile_var_dec(self):
        token, _ = self._token_n_type
        self._write_token(tag='keyword', token=token)  # token == 'var'

        token, ttype = self._advance()

        if token in self._valid_types or ttype == TTypes.IDENTIFIER:
            tag = ttype.value
            self._write_token(tag=tag, token=token)

            token, ttype = self._advance()

            v = False
            while ttype == TTypes.IDENTIFIER:
                v = True
                self._write_token(tag='identifier', token=token)
                token, _ = self._advance()

                if token == ',':
                    self._write_token(tag='symbol', token=token)
                    token, ttype = self._advance()
                    continue
                elif token == ';':
                    self._write_token(tag='symbol', token=token)
                    break
                else:
                    self._raise_missing('Valid variable declaration')
            if not v:
                self._raise_missing('Valid variable name')
        else:
            self._raise_missing('Valid variable type')

    def _compile_statements(self):
        token, _ = self._token_n_type

        while token in self._valid_statements:
            tag = None
            comp_call = None
            if token == 'let':
                tag = 'letStatement'
                comp_call = self._compile_let

            if token == 'if':
                tag = 'ifStatement'
                comp_call = self._compile_if

            if token == 'while':
                tag = 'whileStatement'
                comp_call = self._compile_while

            if token == 'do':
                tag = 'doStatement'
                comp_call = self._compile_do

            if token == 'return':
                tag = 'returnStatement'
                comp_call = self._compile_return

            self._open_tag(tag)
            self._write_token(tag='keyword', token=token)
            comp_call()
            self._close_tag(tag)

            token, _ = self._token_n_type

            if token in self._valid_statements:
                continue
            else:
                token, _ = self._advance()

    def _compile_let(self):
        token, ttype = self._advance()

        if ttype == TTypes.IDENTIFIER:
            self._write_token(tag='identifier', token=token)
            token, _ = self._advance()
            if token == '[':
                self._write_token(tag='symbol', token=token)
                tag = 'expression'
                self._advance()
                self._open_tag(tag)
                self._compile_expression()
                self._close_tag(tag)

                token, _ = self._token_n_type

                if token == ']':
                    self._write_token(tag='symbol', token=token)
                    token, _ = self._advance()
                else:
                    self._raise_missing('"]"')

            if token == '=':
                self._write_token(tag='symbol', token=token)
                self._advance()

                tag = 'expression'
                self._open_tag(tag)
                self._compile_expression()
                self._close_tag(tag)

                token, _ = self._token_n_type
                if token == ';':
                    self._write_token(tag='symbol', token=token)
                    return
                else:
                    self._raise_missing('";"')
            else:
                self._raise_missing('"="')
        else:
            self._raise_missing('Valid variable name')

    def _compile_if(self):
        token, _ = self._advance()

        if token == '(':
            self._write_token(tag='symbol', token=token)
            self._advance()
            tag = 'expression'
            self._open_tag(tag)
            self._compile_expression()
            self._close_tag(tag)

            token, _ = self._token_n_type
            if token == ')':
                self._write_token(tag='symbol', token=token)
                token, _ = self._advance()
                if token == '{':
                    self._write_token(tag='symbol', token=token)

                    token, _ = self._advance()

                    tag = 'statements'
                    if token in self._valid_statements:
                        self._open_tag(tag)
                        self._compile_statements()
                        self._close_tag(tag)

                    token, _ = self._token_n_type
                    if token == '}':
                        self._write_token(tag='symbol', token=token)
                        token, _ = self._advance()
                        if token == 'else':
                            self._write_token(tag='keyword', token=token)
                            token, _ = self._advance()
                            if token == '{':
                                self._write_token(tag='symbol', token=token)
                                token, _ = self._advance()
                                tag = 'statements'
                                if token in self._valid_statements:
                                    self._open_tag(tag)
                                    self._compile_statements()
                                    self._close_tag(tag)

                                token, _ = self._token_n_type
                                if token == '}':
                                    self._write_token(tag='symbol',
                                                      token=token)
                                else:
                                    self._raise_missing('"}"')
                            else:
                                self._raise_missing('"{"')
                    else:
                        self._raise_missing('"}"')
                else:
                    self._raise_missing('"{"')
            else:
                self._raise_missing('")"')
        else:
            self._raise_missing('"("')

    def _compile_while(self):
        token, _ = self._advance()
        if token == '(':
            self._write_token(tag='symbol', token=token)
            self._advance()
            tag = 'expression'
            self._open_tag(tag)
            self._compile_expression()
            self._close_tag(tag)

            token, _ = self._token_n_type
            if token == ')':
                self._write_token(tag='symbol', token=token)
                token, _ = self._advance()
                if token == '{':
                    self._write_token(tag='symbol', token=token)
                    token, ttype = self._advance()
                    tag = 'statements'
                    if token in self._valid_statements:
                        self._open_tag(tag)
                        self._compile_statements()
                        self._close_tag(tag)

                    token, _ = self._token_n_type
                    if token == '}':
                        self._write_token(tag='symbol', token=token)
                    else:
                        self._raise_missing('"}"')
                else:
                    self._raise_missing('"{"')
            else:
                self._raise_missing('")"')
        else:
            self._raise_missing('"("')

    def _compile_do(self):
        token, ttype = self._advance()

        if ttype == TTypes.IDENTIFIER:
            self._write_token(tag='identifier', token=token)
            token, _ = self._advance()
            if token == '.':
                self._write_token(tag='symbol', token=token)
                token, ttype = self._advance()
                if ttype == TTypes.IDENTIFIER:
                    self._write_token(tag='identifier', token=token)
                    token, _ = self._advance()
                else:
                    self._raise_missing('Valid class or variable name')
            if token == '(':
                self._write_token(tag='symbol', token=token)

                tag = 'expressionList'
                self._open_tag(tag)
                self._compile_expression_list()
                self._close_tag(tag)

                token, _ = self._token_n_type
                if token == ')':
                    self._write_token(tag='symbol', token=token)

                    token, _ = self._advance()
                    if token == ';':
                        self._write_token(tag='symbol', token=token)
                        return
                    else:
                        self._raise_missing('";"')
                else:
                    self._raise_missing('")"')
            else:
                self._raise_missing('"("')
        else:
            self._raise_missing('Subroutine call')

    def _compile_return(self):
        token, ttype = self._advance()
        tag = 'expression'

        if self._is_valid_term_start(token, ttype):
            self._open_tag(tag)
            self._compile_expression()
            self._close_tag(tag)

        token, _ = self._token_n_type
        if token == ';':
            self._write_token(tag='symbol', token=token)
        else:
            self._raise_missing('";"')

    def _compile_expression(self):
        token, ttype = self._token_n_type
        if self._is_valid_term_start(token, ttype):
            tag = 'term'
            self._open_tag(tag)
            self._compile_term()
            self._close_tag(tag)

            token, ttype = self._token_n_type

            while token in self._valid_operators:
                self._write_token(tag='symbol', token=token)
                token, ttype = self._advance()
                if self._is_valid_term_start(token, ttype):
                    tag = 'term'
                    self._open_tag(tag)
                    self._compile_term()
                    self._close_tag(tag)

                    token, ttype = self._token_n_type
                else:
                    self._raise_missing('Valid term')

    def _compile_term(self):
        token, ttype = self._token_n_type
        self._write_token(tag=ttype.value, token=token)

        if token in ('-', '~'):
            self._advance()
            tag = 'term'
            self._open_tag(tag)
            self._compile_term()
            self._close_tag(tag)

            return

        elif ttype in (TTypes.INT_CONST,
                       TTypes.STRING_CONST) or token in ('true', 'false',
                                                         'null', 'this'):
            self._advance()
            return

        if token == '(':
            tag = 'expression'
            self._advance()
            self._open_tag(tag)
            self._compile_expression()
            self._close_tag(tag)

            token, _ = self._token_n_type

            if token == ')':
                self._write_token(tag='symbol', token=token)
                token, _ = self._advance()
            else:
                self._raise_missing('")"')

        elif ttype == TTypes.IDENTIFIER:
            token, ttype = self._advance()

            if token == '[':
                self._write_token(tag='symbol', token=token)
                tag = 'expression'
                self._advance()
                self._open_tag(tag)
                self._compile_expression()
                self._close_tag(tag)

                token, _ = self._token_n_type

                if token == ']':
                    self._write_token(tag='symbol', token=token)
                    token, _ = self._advance()
                else:
                    self._raise_missing('"]"')

                return

            if token == '.':
                self._write_token(tag='symbol', token=token)
                token, ttype = self._advance()
                if ttype == TTypes.IDENTIFIER:
                    self._write_token(tag='identifier', token=token)
                    token, _ = self._advance()
                else:
                    self._raise_missing('Valid class or variable name')

            if token == '(':
                self._write_token(tag='symbol', token=token)

                tag = 'expressionList'
                self._open_tag(tag)
                self._compile_expression_list()
                self._close_tag(tag)

                token, _ = self._token_n_type
                if token == ')':
                    self._write_token(tag='symbol', token=token)

                    token, _ = self._advance()
                else:
                    self._raise_missing('")"')

    def _compile_expression_list(self):
        token, ttype = self._advance()
        if self._is_valid_term_start(token, ttype):
            tag = 'expression'
            self._open_tag(tag)
            self._compile_expression()
            self._close_tag(tag)

            token, _ = self._token_n_type

        while True:
            if token == ',':
                self._write_token(tag='symbol', token=token)
                token, ttype = self._advance()

                if self._is_valid_term_start(token, ttype):
                    tag = 'expression'
                    self._open_tag(tag)
                    self._compile_expression()
                    self._close_tag(tag)

                    token, _ = self._token_n_type
                else:
                    self._raise_missing('Valid expression')
            if token == ')':
                break
Example #15
0
from sys import argv

# For handling file/dir paths
from pathlib import Path

# Import Analyzer components
from compilation_engine import CompilationEngine
from jack_tokenizer import JackTokenizer

# Get input path
in_path = Path(argv[1])

if in_path.is_file():
    # Path points to a file
    # Initialize tokenizer
    tokenizer = JackTokenizer(in_path)
    # Initialize compilation engine
    compilationEngine = CompilationEngine(tokenizer,
                                          in_path.with_suffix(".xml"))

    # Start compilation
    compilationEngine.start_compilation()

elif in_path.is_dir():
    # Path points to a directory
    for item in in_path.iterdir():
        if item.is_file():
            # Compile every jack file
            if item.suffix == ".jack":
                tokenizer = JackTokenizer(item)
                ci = CompilationEngine(tokenizer, item.with_suffix(".xml"))
Example #16
0
 def __init__(self, jack_file, xml_file):
     self._jack_tokenizer = JackTokenizer(jack_file)
     self._xml_file = xml_file
     self._xml_text = ''
 def build_tokenizer(self):
     self.tokenizer = JackTokenizer(self.jack_input)
Example #18
0
 def __init__(self, filepath):
     self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
     self.tokenizer = JackTokenizer(filepath)
Example #19
0
class CompilationEngine(object):
   # the destination file for writing
   destination_file = None

   # the tokenizer for the input file
   tokenizer = None

   # symbol table
   symbol_table = None

   # vm writer
   vm_writer = None

   # the class name
   class_name = ""

   # indicies for if and while loops
   # start at -1 because we increment before use
   while_index = -1
   if_index = -1

   # the constructor for compiling a single class
   # the next method to be called after construction must be compile_class
   # source_filename must be a single file, not a directory
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)

   # compiles a complete class and closes the output file
   def compile_class(self):
      # class keyword
      tt, t = self._token_next(True, "KEYWORD", "class")

      # name of class
      tt, t = self._token_next(True, "IDENTIFIER")
      self.class_name = t

      # open brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # one or more variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["field", "static"]:
            self.compile_class_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # one or more subroutine declarations
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["constructor", "function", "method"]:
            self.compile_subroutine()
         else:
            # stop trying to process functions
            break

      # close brace
      # do not advance because we already advanced upon exiting the last loop
      tt, t = self._token_next(False, "SYMBOL", "}")

      # done with compilation; close the output file
      self.destination_file.close()

   # compiles a static declaration or field declaration
   def compile_class_var_dec(self):
      # compile the variable declaration
      # False means this is a class (not a subroutine)
      self.compile_var_dec(False)

   # compiles a complete method, function, or constructor
   def compile_subroutine(self):
      # start of subroutine
      self.symbol_table.start_subroutine()

      # constructor, function, or method keyword
      tt, type = self._token_next(False, "KEYWORD")

      # type of the return value
      # can be either keyword (void) or an identifier (any type)
      tt, t = self._token_next(True)

      # name of the method/function/constructor
      tt, name = self._token_next(True)
      name = self.class_name + "." + name

      # if the type is a method, "define" this as an argument, so the other
      # argument indexes work correctly
      if type == "method":
         self.symbol_table.define("this", self.class_name, SymbolTable.ARG)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # arguments
      self.tokenizer.advance()
      self.compile_parameter_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t == "var":
            self.compile_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # write the function
      num_locals = self.symbol_table.var_count(self.symbol_table.VAR)
      self.vm_writer.write_function(name, num_locals)

      # write any special code at the top of the function
      if type == "constructor":
         # code to allocate memory and set "this"
         size = self.symbol_table.var_count(self.symbol_table.FIELD)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("Memory.alloc", 1)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      elif type == "function":
         # nothing special
         pass
      elif type == "method":
         # put argument 0 into pointer 0 (this)
         self.vm_writer.write_push(self.vm_writer.ARG, 0)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      else:
         print "WARNING: Expected constructor, function, or name; got", type

      # statements
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      self.tokenizer.advance()

   # compiles a (possibly empty) parameter list, not including the enclosing
   # parentheses
   def compile_parameter_list(self):
      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # keyword (variable type)
            tt, type = self._token_next(False)

            # identifier (variable name)
            tt, name = self._token_next(True)

            # the kind is always an arg, since these are all parameters to the
            # function
            kind = SymbolTable.ARG

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            # possible comma
            tt, t = self._token_next(True)
            if tt != "SYMBOL" or t != ",":
               # not a comma; stop processing parameters
               break

            self.tokenizer.advance()

   # compiles a var declaration
   # if subroutine is true, only the var keyword can be used
   # if subroutine is false, only the static and field keywords can be used
   def compile_var_dec(self, subroutine=True):
      # the keyword to start the declaration
      tt, kind = self._token_next(False, "KEYWORD")

      # check for required types
      if subroutine:
         if kind == "var":
            kind = SymbolTable.VAR
         else:
            print "WARNING: expecting var, but received %s" % (str(kind))
      else:
         if kind == "static":
            kind = SymbolTable.STATIC
         elif kind == "field":
            kind = SymbolTable.FIELD
         else:
            print "WARNING: expecting static or field, but received %s" % (str(kind))

      # type of the declaration
      # could be an identifier or a keyword (int, etc)
      tt, type = self._token_next(True)

      # name of the declaration
      tt, name = self._token_next(True, "IDENTIFIER")

      # define the variable in the symbol table
      self.symbol_table.define(name, type, kind)

      # can support more than one identifier name, to declare more than one
      # variable, separated by commas; process the 2nd-infinite variables
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t == ",":
            # another variable name follows
            tt, name = self._token_next(True, "IDENTIFIER")

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            self.tokenizer.advance()
         else:
            # no more variable names
            break

      # should be on the semicolon at the end of the line
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a sequence of statements, not including the enclosing {}
   def compile_statements(self):
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]:
            # call compile_t, where t is the type of compilation we want
            token = getattr(self, "compile_" + t)()
         else:
            # not a statement; stop processing statements
            break

   # compiles a do statement
   def compile_do(self):
      # do keyword
      tt, t = self._token_next(False, "KEYWORD", "do")

      # subroutine call
      self.tokenizer.advance()
      self.compile_subroutine_call()

      # do statements do not have a return value, so eliminate the return
      # off of the stack
      self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a let statement
   def compile_let(self):
      # let keyword
      tt, t = self._token_next(False, "KEYWORD", "let")

      # variable name
      tt, name = self._token_next(True, "IDENTIFIER")

      # possible brackets for array
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == "[":
         # array - write operation
         array = True

         # compile the offset expression
         self.tokenizer.advance()
         self.compile_expression()

         # write the base address onto the stack
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_push(segment, index)

         # add base and offset
         self.vm_writer.write_arithmetic("add")

         # we cannot yet put the result into pointer 1, since the read
         # operation (which hasn't been parsed/computed yet) may use pointer 1
         # to read from an arrya value

         # closing bracket
         tt, t = self._token_next(False, "SYMBOL", "]")

         # advance to the next token, since we are expected to be on the = for
         # the next line
         self.tokenizer.advance()
      else:
         array = False

      # equals sign
      tt, t = self._token_next(False, "SYMBOL", "=")

      # expression
      self.tokenizer.advance()
      self.compile_expression()

      if array:
         # our stack now looks like this:
         #    TOP OF STACK
         #    computed result to store
         #    address in which value should be stored
         #    ... previous stuff ...

         # pop the computed value to temp 0
         self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

         # pop the array address to pointer 1 (that)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

         # put the computed value back onto the stack
         self.vm_writer.write_push(self.vm_writer.TEMP, 0)

         # pop to the variable name or the array reference
         self.vm_writer.write_pop(self.vm_writer.THAT, 0)
      else:
         # not an array - pop the expression to the variable
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_pop(segment, index)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a while statement
   def compile_while(self):
      # labels for this while loop
      self.while_index += 1
      while_start = "WHILE_START_%d" % (self.while_index)
      while_end = "WHILE_END_%d" % (self.while_index)

      # while keyword
      tt, t = self._token_next(False, "KEYWORD", "while")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # label for the start of the while statement
      self.vm_writer.write_label(while_start)

      # the expression that is the condition of the while statement
      self.tokenizer.advance()
      self.compile_expression()

      # the closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto to the end of the loop
      # to do this, negate and then call if-goto
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(while_end)

      # the opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # the statments that is the body of the while loop
      self.tokenizer.advance()
      self.compile_statements()

      # the closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      # after the last statement of the while loop
      # need to jump back up to the top of the loop to evaluate again
      self.vm_writer.write_goto(while_start)

      # label at the end of the loop
      self.vm_writer.write_label(while_end)

      self.tokenizer.advance()

   # compiles a return statement
   def compile_return(self):
      # return keyword
      tt, t = self._token_next(False, "KEYWORD", "return")

      # possible expression to return
      tt, t = self._token_next(True)
      if tt != "SYMBOL" and t != ";":
         self.compile_expression()
      else:
         # no return expression; return 0
         self.vm_writer.write_push(self.vm_writer.CONST, 0)

      # ending semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.vm_writer.write_return()

      self.tokenizer.advance()

   # compiles a if statement, including a possible trailing else clause
   def compile_if(self):
      # it is more efficient in an if-else case to have the else portion first
      # in the code when testing, but we use the less-efficient but
      # easier-to-write true-false pattern here

      # labels for this if statement
      self.if_index += 1
      if_false = "IF_FALSE_%d" % (self.if_index)
      if_end = "IF_END_%d" % (self.if_index)

      # if keyword
      tt, t = self._token_next(False, "KEYWORD", "if")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # expression of if statement
      self.tokenizer.advance()
      self.compile_expression()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto the false label
      # if true, fall through to executing code
      # if there is no else, then false and end are the same, but having two
      # labels does not increase code size
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(if_false)

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # statements for true portion
      self.tokenizer.advance()
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      tt, t = self._token_next(True)
      if tt == "KEYWORD" and t == "else":
         # else statement exists

         # goto the end of the if statement at the end of the true portion
         self.vm_writer.write_goto(if_end)

         # label for the start of the false portion
         self.vm_writer.write_label(if_false)

         # opening brace
         tt, t = self._token_next(True, "SYMBOL", "{")

         # statements
         self.tokenizer.advance()
         self.compile_statements()

         # closing brace
         tt, t = self._token_next(False, "SYMBOL", "}")

         # end label
         self.vm_writer.write_label(if_end)

         # advance tokenizer only if we are in the else, since otherwise the
         # token was advanced by the else check
         self.tokenizer.advance()
      else:
         # no else portion; only put in a label for false, since end is not
         # used
         self.vm_writer.write_label(if_false)

   # compiles an expression (one or more terms connected by operators)
   def compile_expression(self):
      # the first term
      self.compile_term()

      # finish any number of operators followed by terms
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t in "+-*/&|<>=":
            # found an operator
            # postfix order - add the next term and then do the operator

            # the next term
            self.tokenizer.advance()
            self.compile_term()

            # the operator
            if t == "+":
               self.vm_writer.write_arithmetic("add")
            if t == "-":
               self.vm_writer.write_arithmetic("sub")
            if t == "=":
               self.vm_writer.write_arithmetic("eq")
            if t == ">":
               self.vm_writer.write_arithmetic("gt")
            if t == "<":
               self.vm_writer.write_arithmetic("lt")
            if t == "&":
               self.vm_writer.write_arithmetic("and")
            if t == "|":
               self.vm_writer.write_arithmetic("or")
            if t == "*":
               self.vm_writer.write_call("Math.multiply", 2)
            if t == "/":
               self.vm_writer.write_call("Math.divide", 2)
         else:
            # no term found; done parsing the expression
            break

   # compiles a term
   # this routine is faced with a slight difficulty when trying to decide
   # between some of the alternative parsing rules. specifically, if the
   # current token is an identifier, the routine must distinguish between a
   # variable, an array entry, and a subroutine call. a single lookahead token,
   # which may be one of [, (, or ., suffices to distinguish between the three
   # possibilities. any other token is not part of this term and should not
   # be advanced over.
   def compile_term(self):
      # a term: integer_constant | string_constant | keyword_constant |
      # varname | varname[expression] | subroutine_call | (expression) |
      # unary_op term
      tt, t = self._token_next(False)
      if tt == "INT_CONST":
         self.vm_writer.write_push(self.vm_writer.CONST, t)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "STRING_CONST":
         # after this portion is run, a pointer to a string should be on the
         # stack
         # we create a new string of a certain size and then append characters
         # one by one; each append operation returns the pointer to the same
         # string

         # create the string
         # string is a len, data tuple; not null-terminated
         size = len(t)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("String.new", 1)

         # append each character
         for char in t:
            self.vm_writer.write_push(self.vm_writer.CONST, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "KEYWORD":
         if t == "true":
            # true is -1, which is 0 negated
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
            self.vm_writer.write_arithmetic("not")
         elif t == "false" or t == "null":
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
         elif t == "this":
            self.vm_writer.write_push(self.vm_writer.POINTER, 0)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "SYMBOL" and t == "(":
         # ( expression )

         # parse the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing parenthesis
         tt, t = self._token_next(False, "SYMBOL", ")")

         # advance for the next statement
         self.tokenizer.advance()

      elif tt == "SYMBOL" and t in "-~":
         # unary_op term
         # postfix order - add the next term and then do the operator

         # parse the rest of the term
         self.tokenizer.advance()
         self.compile_term()

         # write the unary operation
         if t == "-":
            self.vm_writer.write_arithmetic("neg")
         elif t == "~":
            self.vm_writer.write_arithmetic("not")

      elif tt == "IDENTIFIER":
         # varname, varname[expression], subroutine_call

         # do not write the identifer yet

         # get the next bit of the expression
         # if it is a [, then array; if it is a ( or ., then subroutine call
         # if none of above, then pass over
         tt2, t2 = self._token_next(True)

         if tt2 == "SYMBOL" and t2 in "(.":
            # subroutine call
            # back up and then compile the subroutine call
            self.tokenizer.retreat()

            self.compile_subroutine_call()
         elif tt2 == "SYMBOL" and t2 == "[":
            # array - read operation

            # write the base address onto the stack
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

            # compile the offset expression
            self.tokenizer.advance()
            self.compile_expression()

            # add base and offset
            self.vm_writer.write_arithmetic("add")

            # put the resulting address into pointer 1 (that)
            self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

            # read from that 0 onto the stack
            self.vm_writer.write_push(self.vm_writer.THAT, 0)

            # closing bracket
            tt, t = self._token_next(False, "SYMBOL", "]")

            # advance for the next statement
            self.tokenizer.advance()
         else:
            # none of above - just a single identifier
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

      else:
         # unknown
         print "WARNING: Unknown term expression object:", tt, t

   # compiles a (possible empty) comma-separated list of expressions
   def compile_expression_list(self):
      num_args = 0

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # expression to pass
            self.compile_expression()
            num_args += 1

            # possible comma
            tt, t = self._token_next(False)
            if tt == "SYMBOL" and t == ",":
               self.tokenizer.advance()
            else:
               # not a comma; stop processing parameters
               break

      return num_args

   # compiles a subroutine call
   # two cases:
   # - subroutineName(expressionList)
   # - (class|var).subroutineName(expressionList)
   def compile_subroutine_call(self):
      # first part of name
      tt, name1 = self._token_next(False, "IDENTIFIER")

      # a dot and another name may exist, or it could be a parenthesis
      name2 = None
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == ".":
         # the name after the dot
         tt, name2 = self._token_next(True, "IDENTIFIER")

         # advance so that we are on the parenthesis
         self.tokenizer.advance()

      # determine if this is a method call
      # three possibilities
      # - class.func() - function call
      # - var.func()   - method call
      # - func()       - method call on current object
      if self.symbol_table.contains(name1):
         method_call = True
         local_call = False
      elif name2 == None:
         method_call = True
         local_call = True
      else:
         method_call = False

      # if a method call, push variable name1
      # this a method call if the symbol table contains name1 and name2 exists
      # OR name1 is a method in the current object
      if method_call and local_call:
         # push the current object onto the stack as a hidden argument
         self.vm_writer.write_push(self.vm_writer.POINTER, 0)
      elif method_call and not local_call:
         # push the variable onto the stack as a hidden argument
         segment, index = self._resolve_symbol(name1)
         self.vm_writer.write_push(segment, index)

      # opening parenthesis
      tt, t = self._token_next(False, "SYMBOL", "(")

      # expression list
      self.tokenizer.advance()
      num_args = self.compile_expression_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # write the call
      if method_call and local_call:
         # methd + <blank>

         # get the name of the vm function to call
         classname = self.class_name
         vm_function_name = classname + "." + name1

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      elif method_call and not local_call:
         # variable name + method

         # get the name of the vm function to call
         classname = self.symbol_table.get(name1)[1]
         vm_function_name = classname + "." + name2

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)
      else:
         # get the name of the vm function to call
         vm_function_name = name1 + "." + name2

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      self.tokenizer.advance()

   # returns the token_type and token of the next token after advancing the
   # tokenizer before reading if advance is True
   def _token_next(self, advance=False, expected_type=None, expected_value=None):
      # advance the tokenizer, if requested
      if advance:
         self.tokenizer.advance()

      # get the token type and the token itself
      token_type = self.tokenizer.token_type()
      token = str(getattr(self.tokenizer, token_type.lower())())

      if expected_type and token_type != expected_type:
         print "WARNING: Type", token_type, "found; expected", expected_type
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)
      if expected_value and token != expected_value:
         print "WARNING: Value", token, "found; expected", expected_value
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)

      return token_type, token

   # convets a symbol table type into a segment type
   def _type_to_segment(self, type):
      if type == self.symbol_table.STATIC:
         return self.vm_writer.STATIC
      elif type == self.symbol_table.FIELD:
         return self.vm_writer.THIS
      elif type == self.symbol_table.ARG:
         return self.vm_writer.ARG
      elif type == self.symbol_table.VAR:
         return self.vm_writer.LOCAL
      else:
         print "ERROR: Bad type %s" % (str(type))
 
   # resolves the symbol from the symbol table
   # the segment and index is returned as a 2-tuple
   def _resolve_symbol(self, name):
      kind, type, index = self.symbol_table.get(name)
      return self._type_to_segment(kind), index
Example #20
0
class CompilationEngine():
    def __init__(self, filepath, vm_writer):
        self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
        self.tokenizer = JackTokenizer(filepath)
        self.symbol_table = SymbolTable()
        self.vmw = vm_writer
        self.compiled_class_name = None
        self.label_num = 0

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.wf.close()

    def get_new_label(self):
        self.label_num += 1
        return 'LABEL_%d' % self.label_num

    def compile(self):
        self.compile_class()

    def compile_class(self):

        self.write_element_start('class')

        self.compile_keyword([Tokens.CLASS])
        self.compiled_class_name = self.compile_class_name().token
        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)

        while self.next_is_class_var_dec():
            self.compile_class_var_dec()

        while self.next_is_subroutine_dec():
            self.compile_subroutine_dec()

        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        token = self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        kind = None
        if token == Tokens.STATIC:
            kind = IdentifierKind.STATIC
        elif token == Tokens.FIELD:
            kind = IdentifierKind.FIELD
        else:
            self.raise_syntax_error('Unexpected token')

        type_token = self.compile_type()
        self.compile_var_name(declaration=True, type=type_token.token, kind=kind)

        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True, type=type_token.token, kind=kind)

        self.compile_symbol(Tokens.SEMI_COLON)

        self.write_element_end('classVarDec')

    def compile_var_dec(self):

        self.write_element_start('varDec')
        self.compile_keyword(Tokens.VAR)
        type_token = self.compile_type()
        var_num = 0
        self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR)
        var_num += 1
        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.VAR)
            var_num += 1
        self.compile_symbol(Tokens.SEMI_COLON)
        self.write_element_end('varDec')

        return var_num

    def compile_subroutine_dec(self):
        self.symbol_table.start_subroutine()

        self.write_element_start('subroutineDec')

        token = self.compile_keyword([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])
        if self.tokenizer.see_next() == Tokens.VOID:
            self.compile_keyword(Tokens.VOID)
        else:
            self.compile_type()
        subroutine_name = self.compile_subroutine_name().token
        self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)

        if token == Tokens.METHOD:
            self.symbol_table.define('$this',self.compiled_class_name,IdentifierKind.ARG)

        self.compile_parameter_list()
        self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        self.compile_subroutine_body(subroutine_name, token)

        self.write_element_end('subroutineDec')

    def compile_subroutine_name(self):
        self.write_identifier_info('category: subroutine')
        return self.compile_identifier()

    def compile_class_name(self):
        self.write_identifier_info('category: class')
        return self.compile_identifier()

    def compile_var_name(self, declaration=False, type=None, kind=None, let=False, call=False):
        if declaration:
            self.symbol_table.define(self.tokenizer.see_next().token, type, kind)
        elif let:
            pass
        elif call:
            pass
        else:
            kind = self.symbol_table.kind_of(self.tokenizer.see_next().token)
            if kind == IdentifierKind.ARG:
                self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(self.tokenizer.see_next().token))
            elif kind == IdentifierKind.VAR:
                self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(self.tokenizer.see_next().token))
            elif kind == IdentifierKind.FIELD:
                self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(self.tokenizer.see_next().token))
            elif kind == IdentifierKind.STATIC:
                self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(self.tokenizer.see_next().token))

        self.write_identifier_info('declaration: %s, kind: %s, index: %d' % (
            declaration, self.symbol_table.kind_of(self.tokenizer.see_next().token),
            self.symbol_table.index_of(self.tokenizer.see_next().token)))
        return self.compile_identifier()

    def write_identifier_info(self, value):
        self.write_element('IdentifierInfo', value)

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] or isinstance(
                self.tokenizer.see_next(), Identifier):
            type_token = self.compile_type()
            self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG)

            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                type_token = self.compile_type()
                self.compile_var_name(declaration=True, type=type_token.token, kind=IdentifierKind.ARG)

        self.write_element_end('parameterList')

    def compile_subroutine_body(self, subroutine_name, subroutine_dec_token):
        self.write_element_start('subroutineBody')

        print subroutine_name,subroutine_dec_token

        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
        local_num = 0
        while self.next_is(Tokens.VAR):
            var_num = self.compile_var_dec()
            local_num += var_num

        self.vmw.write_function("%s.%s" % (self.compiled_class_name, subroutine_name), local_num)

        if subroutine_dec_token == Tokens.METHOD:
            self.vmw.write_push(Segment.ARG, 0)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.CONSTRUCTOR:
            self.vmw.write_push(Segment.CONST, self.symbol_table.var_count(IdentifierKind.FIELD))
            self.vmw.write_call('Memory.alloc', 1)
            self.vmw.write_pop(Segment.POINTER, 0)
        elif subroutine_dec_token == Tokens.FUNCTION:
            pass
        else:
            self.raise_syntax_error('Invalid token')

        self.compile_statements()
        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('subroutineBody')

        print "========="
        for key in self.symbol_table.arg_table:
            print self.symbol_table.arg_table[key].type,key,"kind:",self.symbol_table.arg_table[key].kind,"index:",self.symbol_table.arg_table[key].index

        return local_num

    def compile_statements(self):
        self.write_element_start('statements')

        while self.next_is_statement():
            self.compile_statement()

        self.write_element_end('statements')

    def compile_statement(self):
        if self.next_is(Tokens.LET):
            self.write_element_start('letStatement')
            self.compile_keyword(Tokens.LET)
            let_var = self.compile_var_name(let=True).token

            if self.next_is(Tokens.LEFT_BOX_BRACKET):
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()  # i
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
                self.compile_symbol(Tokens.EQUAL)

                # base address
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(let_var))

                # temp_2 <- base + i
                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.TEMP, 2)

                # value
                self.compile_expression()

                # set THAT <- base+i
                self.vmw.write_push(Segment.TEMP, 2)
                self.vmw.write_pop(Segment.POINTER, 1)

                self.vmw.write_pop(Segment.THAT, 0)
                self.compile_symbol(Tokens.SEMI_COLON)

            else:
                self.compile_symbol(Tokens.EQUAL)
                self.compile_expression()
                self.compile_symbol(Tokens.SEMI_COLON)
                kind = self.symbol_table.kind_of(let_var)
                if kind == IdentifierKind.VAR:
                    self.vmw.write_pop(Segment.LOCAL, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.ARG:
                    self.vmw.write_pop(Segment.ARG, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_pop(Segment.THIS, self.symbol_table.index_of(let_var))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_pop(Segment.STATIC, self.symbol_table.index_of(let_var))

            self.write_element_end('letStatement')

        elif self.next_is(Tokens.IF):
            self.write_element_start('ifStatement')
            self.compile_keyword(Tokens.IF)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.vmw.write_if(l1)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l2)
            self.vmw.write_label(l1)
            if self.next_is(Tokens.ELSE):
                self.compile_keyword(Tokens.ELSE)
                self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
                self.compile_statements()
                self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_label(l2)
            self.write_element_end('ifStatement')

        elif self.next_is(Tokens.WHILE):
            self.write_element_start('whileStatement')
            l1 = self.get_new_label()
            l2 = self.get_new_label()
            self.compile_keyword(Tokens.WHILE)
            self.vmw.write_label(l1)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_arithmetic(Command.NOT)
            self.vmw.write_if(l2)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.vmw.write_goto(l1)
            self.vmw.write_label(l2)
            self.write_element_end('whileStatement')

        elif self.next_is(Tokens.DO):
            self.write_element_start('doStatement')
            self.compile_keyword(Tokens.DO)
            self.compile_subroutine_call()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('doStatement')
            self.vmw.write_pop(Segment.TEMP, 0)

        elif self.next_is(Tokens.RETURN):
            self.write_element_start('returnStatement')
            self.compile_keyword(Tokens.RETURN)
            if not self.next_is(Tokens.SEMI_COLON):
                self.compile_expression()
            else:
                self.vmw.write_push(Segment.CONST, 0)

            self.compile_symbol(Tokens.SEMI_COLON)
            self.vmw.write_return()

            self.write_element_end('returnStatement')

    def compile_subroutine_call(self):
        if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1):
            subroutinename = self.compile_subroutine_name().token
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.vmw.write_push(Segment.POINTER, 0)
            argnum = self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.vmw.write_call("%s.%s" % (self.compiled_class_name, subroutinename), argnum + 1)
        else:
            identifier_str = self.tokenizer.see_next().token
            if self.symbol_table.kind_of(identifier_str):
                instance_name = self.compile_var_name(call=True).token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                kind = self.symbol_table.kind_of(instance_name)
                if kind == IdentifierKind.ARG:
                    self.vmw.write_push(Segment.ARG, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.VAR:
                    self.vmw.write_push(Segment.LOCAL, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.FIELD:
                    self.vmw.write_push(Segment.THIS, self.symbol_table.index_of(instance_name))
                elif kind == IdentifierKind.STATIC:
                    self.vmw.write_push(Segment.STATIC, self.symbol_table.index_of(instance_name))
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call("%s.%s" % (self.symbol_table.type_of(instance_name), subroutinename), argnum + 1)
            else:
                classname = self.compile_class_name().token
                self.compile_symbol(Tokens.DOT)
                subroutinename = self.compile_subroutine_name().token
                self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
                argnum = self.compile_expression_list()
                self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
                self.vmw.write_call("%s.%s" % (classname, subroutinename), argnum)

    def compile_expression_list(self):
        self.write_element_start('expressionList')
        argnum = 0
        if not self.next_is(Tokens.RIGHT_ROUND_BRACKET):
            self.compile_expression()
            argnum += 1
            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_expression()
                argnum += 1
        self.write_element_end('expressionList')

        return argnum

    def compile_expression(self):
        self.write_element_start('expression')
        self.compile_term()
        while self.next_is([
            Tokens.PLUS,
            Tokens.MINUS,
            Tokens.MULTI,
            Tokens.DIV,
            Tokens.AND,
            Tokens.PIPE,
            Tokens.LESS_THAN,
            Tokens.GREATER_THAN,
            Tokens.EQUAL]):
            op_token = self.compile_symbol([
                Tokens.PLUS,
                Tokens.MINUS,
                Tokens.MULTI,
                Tokens.DIV,
                Tokens.AND,
                Tokens.PIPE,
                Tokens.LESS_THAN,
                Tokens.GREATER_THAN,
                Tokens.EQUAL])
            self.compile_term()
            if op_token == Tokens.PLUS:
                self.vmw.write_arithmetic(Command.ADD)
            elif op_token == Tokens.MINUS:
                self.vmw.write_arithmetic(Command.SUB)
            elif op_token == Tokens.MULTI:
                self.vmw.write_call('Math.multiply', 2)
            elif op_token == Tokens.DIV:
                self.vmw.write_call('Math.divide', 2)
            elif op_token == Tokens.AND:
                self.vmw.write_arithmetic(Command.AND)
            elif op_token == Tokens.PIPE:
                self.vmw.write_arithmetic(Command.OR)
            elif op_token == Tokens.LESS_THAN:
                self.vmw.write_arithmetic(Command.LT)
            elif op_token == Tokens.GREATER_THAN:
                self.vmw.write_arithmetic(Command.GT)
            elif op_token == Tokens.EQUAL:
                self.vmw.write_arithmetic(Command.EQ)

        self.write_element_end('expression')

    def compile_term(self):
        self.write_element_start('term')

        if self.next_type_is(TokenType.INT_CONST):
            value_str = self.compile_integer_constant()
            self.vmw.write_push(Segment.CONST, value_str)
        elif self.next_type_is(TokenType.STRING_CONST):
            self.compile_string_constant()
        elif self.next_is(Tokens.NULL):
            self.compile_keyword(Tokens.NULL)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_is(Tokens.THIS):
            self.compile_keyword(Tokens.THIS)
            self.vmw.write_push(Segment.POINTER, 0)
        elif self.next_is(Tokens.TRUE):
            self.compile_keyword(Tokens.TRUE)
            self.vmw.write_push(Segment.CONST, 0)
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.FALSE):
            self.compile_keyword(Tokens.FALSE)
            self.vmw.write_push(Segment.CONST, 0)
        elif self.next_type_is(TokenType.IDENTIFIER):
            if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1):

                var_name = self.compile_var_name().token
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()

                self.vmw.write_arithmetic(Command.ADD)
                self.vmw.write_pop(Segment.POINTER, 1)
                self.vmw.write_push(Segment.THAT, 0)
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1):
                self.compile_subroutine_call()
            else:
                self.compile_var_name()

        elif self.next_is(Tokens.LEFT_ROUND_BRACKET):
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        elif self.next_is(Tokens.TILDE):
            self.compile_symbol(Tokens.TILDE)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NOT)
        elif self.next_is(Tokens.MINUS):
            self.compile_symbol(Tokens.MINUS)
            self.compile_term()
            self.vmw.write_arithmetic(Command.NEG)
        else:
            self.raise_syntax_error('')
        self.write_element_end('term')

    def next_type_is(self, token_type):
        return self.tokenizer.see_next().type == token_type

    def compile_type(self):
        type_token = self.tokenizer.see_next()

        if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]):
            self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN])
        else:
            self.compile_class_name()
        return type_token

    def next_is_statement(self):
        return self.next_is([Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN])

    def next_is(self, tokens, idx=0):
        if type(tokens) == list:
            return self.tokenizer.see_next(idx=idx) in tokens
        else:
            return self.tokenizer.see_next(idx=idx) == tokens

    def next_is_class_var_dec(self):
        return self.next_is([Tokens.STATIC, Tokens.FIELD])

    def next_is_subroutine_dec(self):
        return self.next_is([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])

    def compile_symbol(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('symbol', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('symbol', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_keyword(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('keyword', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('keyword', self.tokenizer.current_token.token_escaped)
                return self.tokenizer.current_token
            else:
                self.raise_syntax_error('')

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, Identifier):
            identifier_str = self.tokenizer.current_token.token_escaped
            self.write_element(
                'identifier',
                identifier_str
            )
            return self.tokenizer.current_token
        else:
            self.raise_syntax_error('')

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerConstant):
            self.write_element('integerConstant', self.tokenizer.current_token.token_escaped)
            return self.tokenizer.current_token.token_escaped
        else:
            self.raise_syntax_error('')

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringConstant):
            string = self.tokenizer.current_token.token
            self.write_element('stringConstant', self.tokenizer.current_token.token_escaped)
            self.vmw.write_push(Segment.CONST, len(string))
            self.vmw.write_call('String.new', 1)
            for c in string:
                self.vmw.write_push(Segment.CONST, ord(c))
                self.vmw.write_call('String.appendChar', 2)
        else:
            self.raise_syntax_error('')

    def write_element(self, elem_name, value):
        self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name))

    def write_element_start(self, elem_name):
        self.wf.write('<%s>\n' % elem_name)

    def write_element_end(self, elem_name):
        self.wf.write('</%s>\n' % elem_name)

    def raise_syntax_error(self, msg):
        raise Exception('%s' % msg)
Example #21
0
import glob
import sys
from pathlib import Path, PurePath
from os.path import isfile, isdir, join

from jack_tokenizer import JackTokenizer
from compilation_engine import CompilationEngine


if __name__ == '__main__':
    if len(sys.argv) > 1:
        program_path = sys.argv[1]
        if isfile(program_path):
            files = [program_path]
            output_path = Path(program_path).parent
        elif isdir(program_path):
            files = glob.glob(join(program_path, '*.jack'))
            output_path = program_path
        else:
            raise FileNotFoundError("[Errno 2] No such file or directory: ", program_path)

        for file in files:
            output_file_name = PurePath(file).name.split('.')[0] + '.vm'
            output_file = Path(output_path, output_file_name)
            file_tokenizer = JackTokenizer(file)
            CompilationEngine(file_tokenizer, output_file)

    else:
        raise TypeError("1 argument is required: program path, 0 arguments entered")
class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    TYPE_TO_TAG = {'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword',
                   'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol'}

    SYMBOLS_TO_XML_CONVENTION = {'<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;'}

    def __init__(self, input_file_path, output_file_path):
        self.output_file = open(output_file_path, 'w')
        self.jack_tokenizer = JackTokenizer(input_file_path)
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.output_file.write('<class>\n')  # get first token
        self.jack_tokenizer.advance()
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())

        if self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</class>')
        self.output_file.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.output_file.write('<classVarDec>\n')

            self.write_token(self.jack_tokenizer.key_word())
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())

            while self.jack_tokenizer.symbol() != ';':
                self.write_token(self.jack_tokenizer.symbol())
                self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())

            self.output_file.write('</classVarDec>\n')

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        while self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.output_file.write('<subroutineDec>\n')
            self.write_token(self.jack_tokenizer.key_word())
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_parameter_list()
            self.write_token(self.jack_tokenizer.symbol())


            self.output_file.write('<subroutineBody>\n')
            self.write_token(self.jack_tokenizer.symbol())
            while self.jack_tokenizer.key_word() == 'var':
                self.compile_var_dec()
            self.compile_statements()
            self.write_token(self.jack_tokenizer.symbol())
            self.output_file.write('</subroutineBody>\n')

            self.output_file.write('</subroutineDec>\n')

    def compile_parameter_list(self):
        self.output_file.write('<parameterList>\n')
        if self.jack_tokenizer.symbol() != ')':
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())
            while self.jack_tokenizer.symbol() == ",":
                self.write_token(self.jack_tokenizer.symbol())
                self.write_type()
                self.write_token(self.jack_tokenizer.identifier())
        self.output_file.write('</parameterList>\n')


    def compile_var_dec(self):
        self.output_file.write('<varDec>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_type()
        self.write_token(self.jack_tokenizer.identifier())
        while self.jack_tokenizer.symbol() == ",":
            self.write_token(self.jack_tokenizer.symbol())
            self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</varDec>\n')


    def compile_statements(self):
        self.output_file.write('<statements>\n')

        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

        self.output_file.write('</statements>\n')

    def compile_do(self):
        self.output_file.write('<doStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())

        self.write_token(self.jack_tokenizer.identifier())
        self.compile_subroutine_call()

        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</doStatement>\n')

    def compile_subroutine_call(self):
        if self.jack_tokenizer.symbol() == '(':
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.write_token(self.jack_tokenizer.symbol())
        elif self.jack_tokenizer.symbol() == '.':
            self.write_token(self.jack_tokenizer.symbol())
            self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.write_token(self.jack_tokenizer.symbol())

    def compile_let(self):
        self.output_file.write('<letStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.identifier())
        if self.jack_tokenizer.symbol() == '[':
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression()
            self.write_token(self.jack_tokenizer.symbol())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</letStatement>\n')

    def compile_while(self):
        self.output_file.write('<whileStatement>\n')
        self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_statements()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</whileStatement>\n')


    def compile_return(self):
        self.output_file.write('<returnStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</returnStatement>\n')


    def compile_if(self):
        self.output_file.write('<ifStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())

        self.write_token(self.jack_tokenizer.symbol())
        self.compile_statements()
        self.write_token(self.jack_tokenizer.symbol())
        if self.jack_tokenizer.key_word() == 'else':
            self.write_token(self.jack_tokenizer.key_word())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_statements()
            self.write_token(self.jack_tokenizer.symbol())

        self.output_file.write('</ifStatement>\n')


    def compile_expression(self):
        self.output_file.write('<expression>\n')
        self.compile_term()
        while self.jack_tokenizer.symbol() in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            if symbol in CompilationEngine.SYMBOLS_TO_XML_CONVENTION:
                symbol = CompilationEngine.SYMBOLS_TO_XML_CONVENTION[symbol]
            self.write_token(symbol)
            self.compile_term()
        self.output_file.write('</expression>\n')


    def compile_term(self):
        self.output_file.write('<term>\n')
        token_type = self.jack_tokenizer.token_type()
        if token_type == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())
            if self.jack_tokenizer.symbol() == '[':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
                self.write_token(self.jack_tokenizer.symbol())
            elif self.jack_tokenizer.symbol() == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call()
        elif token_type == 'STRING_CONST':
            self.write_token(self.jack_tokenizer.string_val())
        elif token_type == 'INT_CONST':
            self.write_token(self.jack_tokenizer.int_val())
        elif token_type == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif token_type == 'INT_CONST':
            self.write_token(self.jack_tokenizer.int_val())
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
                self.write_token(self.jack_tokenizer.symbol())
            elif self.jack_tokenizer.symbol() in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_term()
        self.output_file.write('</term>\n')

    def compile_expression_list(self):
        self.output_file.write('<expressionList>\n')
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            while self.jack_tokenizer.symbol() == ',':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
        self.output_file.write('</expressionList>\n')
 def __init__(self, input_file_path, output_file_path):
     self.output_file = open(output_file_path, 'w')
     self.jack_tokenizer = JackTokenizer(input_file_path)
     if self.jack_tokenizer.has_more_tokens():
         self.compile_class()
Example #24
0
class SyntaxParser:
    def __init__(self, source_filepath):
        self.source_path = source_filepath
        self.tokenizer = JackTokenizer(self.source_path)
        self.class_name = None
        self.syntax_tree_root = self.compile_class()

    def get_syntax_tree(self) -> SyntaxTreeNode:
        return self.syntax_tree_root

    def __expect_keyword(self, keywords):
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_type() == TokenType.KEYWORD:
            tk_value = next_token.get_keyword()
            if tk_value in keywords:
                self.tokenizer.advance()
                return SyntaxTreeNode('keyword', tk_value)

    def __expect_identifier(self):
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_type() == TokenType.IDENTIFIER:
            self.tokenizer.advance()
            return SyntaxTreeNode('identifier', next_token.get_identifier())

    def __expect_symbol(self, symbols):
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_type() == TokenType.SYMBOL:
            tk_value = next_token.get_symbol()
            if tk_value in symbols:
                self.tokenizer.advance()
                return SyntaxTreeNode('symbol', tk_value)

    def __expect_type(self):
        node = self.__expect_keyword(('int', 'char', 'boolean'))
        if not node:
            node = self.__expect_identifier()
        return node

    def __expect_void_or_type(self):
        node = self.__expect_keyword('void')
        if not node:
            node = self.__expect_type()
        return node

    def __expect_op(self):
        # '+'|'-'|'*'|'/'|'&'|'|'|'<'|'>'|'='
        return self.__expect_symbol(('+', '-', '*', '/', '&', '|', '<', '>', '='))

    def __expect_unary_op(self):
        return self.__expect_symbol(('-', '~'))

    def compile_class(self):
        # class: 'class' className '{' classVarDec* subroutineDec* '}'
        local_root = SyntaxTreeNode('class')
        # 'class'
        local_root.add_child(self.__expect_keyword('class'), 'expect keyword class')
        # className
        class_name_node = self.__expect_identifier()
        local_root.add_child(class_name_node, 'expect identifier className')
        self.class_name = class_name_node.value
        # '{'
        local_root.add_child(self.__expect_symbol('{'), 'expect {')
        # classVarDec*
        local_root.add_many(self.compile_class_var_dec)
        # subroutineDec*
        local_root.add_many(self.compile_subroutine)
        # '}'
        local_root.add_child(self.__expect_symbol('}'), 'expect } in class')
        return local_root

    def compile_class_var_dec(self):
        # classVarDec: ('static' | 'field') type varName (',' varName)* ';'
        # ('static' | 'field')
        node = self.__expect_keyword(('static', 'field'))
        if not node:
            return None
        local_root = SyntaxTreeNode('classVarDec')
        local_root.add_child(node)
        # re.type: 'int' | 'char' | 'boolean' | className
        # re.className: identifier
        local_root.add_child(self.__expect_type(), 'expect type')
        # varName: identifier
        local_root.add_child(self.__expect_identifier(), 'expect varName')
        # (',' varName)*
        while True:
            node = self.__expect_symbol(',')
            if node:
                local_root.add_child(node)
                local_root.add_child(self.__expect_identifier(), 'expect varName after ,')
            else:
                break
        # ';'
        local_root.add_child(self.__expect_symbol(';'), 'expect ; in varDec')
        return local_root

    def compile_subroutine(self):
        # ('constructor' | 'function' | 'method')
        node = self.__expect_keyword(('constructor', 'function', 'method'))
        if not node:
            return None
        local_root = SyntaxTreeNode('subroutineDec')
        local_root.add_child(node)
        # ('void' | type)
        local_root.add_child(self.__expect_void_or_type(), 'expect void or type')
        # subroutineName
        local_root.add_child(self.__expect_identifier(), 'expect subroutineName')
        # '('
        local_root.add_child(self.__expect_symbol('('), 'expect )')
        # parameterList
        local_root.add_child(self.compile_parameter_list(), 'expect parameterList')
        # ')'
        local_root.add_child(self.__expect_symbol(')'), 'expect )')
        # subroutineBody
        local_root.add_child(self.compile_subroutine_body(), 'expect subroutineBody')
        return local_root

    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        local_root = SyntaxTreeNode('parameterList')
        # (type varName)
        node = self.__expect_type()
        if not node:
            return local_root
        local_root.add_child(node)
        local_root.add_child(self.__expect_identifier(), 'expect varName')
        # (',' type varName) *
        while True:
            node = self.__expect_symbol(',')
            if node:
                local_root.add_child(node)
                local_root.add_child(self.__expect_type(), 'expect type')
                local_root.add_child(self.__expect_identifier(), 'expect identifier')
            else:
                break
        return local_root

    def compile_subroutine_body(self):
        # subroutineBody
        # '{' varDec* statements '}'
        local_root = SyntaxTreeNode('subroutineBody')
        # '{'
        node = self.__expect_symbol('{')
        if not node:
            return None
        local_root.add_child(node)
        # varDesc*
        local_root.add_many(self.compile_var_dec)
        # statements
        local_root.add_child(self.compile_statements(), 'expect statements')
        # '}'
        local_root.add_child(self.__expect_symbol('}'), 'expect }')
        return local_root

    def compile_var_dec(self):
        # 'var' type varName (',' varName)* ';'
        node = self.__expect_keyword('var')
        if not node:
            return None
        # 'var' type varName
        local_node = SyntaxTreeNode('varDec')
        local_node.add_child(node)
        local_node.add_child(self.__expect_type(), 'expect type in varDec')
        local_node.add_child(self.__expect_identifier(), 'expect identifier in varDec')
        # (',' varName)*
        while True:
            node = self.__expect_symbol(',')
            if node:
                local_node.add_child(node)
                local_node.add_child(self.__expect_identifier(), 'expect identifier in varDec')
            else:
                break
        # ;
        local_node.add_child(self.__expect_symbol(';'))
        return local_node

    def compile_statements(self):
        local_root = SyntaxTreeNode('statements')
        local_root.add_many(lambda: or_compile((
            self.compile_do,
            self.compile_let,
            self.compile_while,
            self.compile_return,
            self.compile_if,
        )))
        return local_root

    def compile_do(self):
        # 'do' subroutineCall ';'
        node = self.__expect_keyword('do')
        if not node:
            return None
        local_root = SyntaxTreeNode('doStatement')
        local_root.add_child(node)
        sub_call = self.compile_subroutine_call()
        if not sub_call:
            sys.exit('missing subroutine call in do statement')
        local_root.add_child(sub_call)
        # ;
        local_root.add_child(self.__expect_symbol(';'), 'expect ; in do')
        return local_root

    def compile_subroutine_call(self) -> SyntaxTreeNode:
        # subroutineCall: subroutineName '(' expressionList ')'
        # | (className | varName) '.' subroutineName '(' expressionList ')'
        local_root = None
        next_token = self.tokenizer.peek_next()
        if next_token.get_type() == TokenType.IDENTIFIER:
            next2_token = self.tokenizer.peek_next(2)
            n2_value = next2_token.get_symbol()
            if n2_value in ('(', '.'):
                # ok, this is a subroutine call
                local_root = SyntaxTreeNode('subroutineCall')
                node = self.__expect_identifier()
                local_root.add_child(node)
                next_token = self.tokenizer.peek_next()
                if next_token.get_symbol() == '.':
                    local_root.add_child(self.__expect_symbol('.'))
                    local_root.add_child(self.__expect_identifier())
                local_root.add_child(self.__expect_symbol('('))
                local_root.add_child(self.compile_expression_list())
                local_root.add_child(self.__expect_symbol(')'))
        return local_root

    def __expect_keyword_constant(self):
        return self.__expect_keyword(('true', 'false', 'null', 'this'))

    def __expect_integer_constant(self):
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_type() == TokenType.INT_CONSTANT:
            self.tokenizer.advance()
            return SyntaxTreeNode('integerConstant', next_token.get_integer_constant())

    def __expect_string_constant(self):
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_type() == TokenType.STRING_CONSTANT:
            self.tokenizer.advance()
            return SyntaxTreeNode('stringConstant', next_token.get_string_constant())

    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        node = self.__expect_keyword('let')
        if not node:
            return None
        local_root = SyntaxTreeNode('letStatement')
        local_root.add_child(node)
        local_root.add_child(self.__expect_identifier(), 'expect varName in let')
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_symbol() == '[':
            local_root.add_child(self.compile_array_access())
        local_root.add_child(self.__expect_symbol('='), 'expect = in let')
        local_root.add_child(self.compile_expression(), 'expect expression in let statement')
        # ;
        local_root.add_child(self.__expect_symbol(';'), 'expect ; in let')
        return local_root

    def compile_array_access(self):
        local_root = SyntaxTreeNode('arrayAccess')
        local_root.add_child(self.__expect_symbol('['), 'expect [')
        local_root.add_child(self.compile_expression(), 'expect expression in array access')
        local_root.add_child(self.__expect_symbol(']'), 'expect ]')
        return local_root



    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        node = self.__expect_keyword('while')
        if not node:
            return None
        local_root = SyntaxTreeNode('whileStatement')
        local_root.add_child(node)
        local_root.add_child(self.__expect_symbol('('), 'expect ( in while')
        local_root.add_child(self.compile_expression(), 'expect expression in while')
        local_root.add_child(self.__expect_symbol(')'), 'expect ) in while')
        local_root.add_child(self.__expect_symbol('{'), 'expect { in while')
        local_root.add_child(self.compile_statements(), 'expect statements in while')
        local_root.add_child(self.__expect_symbol('}'), 'expect } in while')
        return local_root

    def compile_return(self):
        # 'return' expression? ';'
        node = self.__expect_keyword('return')
        if not node:
            return None
        local_root = SyntaxTreeNode('returnStatement')
        local_root.add_child(node)
        local_root.add_child(self.compile_expression())
        local_root.add_child(self.__expect_symbol(';') ,'expect ; in return')
        return local_root

    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        node = self.__expect_keyword('if')
        if not node:
            return None
        local_root = SyntaxTreeNode('ifStatement')
        local_root.add_child(node)
        local_root.add_child(self.__expect_symbol('('), 'expect ( in if')
        local_root.add_child(self.compile_expression(), 'expect expression in if')
        local_root.add_child(self.__expect_symbol(')'), 'expect ) in if')
        local_root.add_child(self.__expect_symbol('{'), 'expect { in if')
        local_root.add_child(self.compile_statements(), 'expect statements in if')
        local_root.add_child(self.__expect_symbol('}'), 'expect } in if')
        next_token = self.tokenizer.peek_next()
        if next_token and next_token.get_keyword() == 'else':
            local_root.add_child(self.__expect_keyword('else'))
            local_root.add_child(self.__expect_symbol('{'), 'expect { in else')
            local_root.add_child(self.compile_statements(), 'expect statements in else')
            local_root.add_child(self.__expect_symbol('}'), 'expect } in else')
        return local_root

    def compile_expression(self):
        # term (op term)*
        node = self.compile_term()
        if not node:
            return None
        local_root = SyntaxTreeNode('expression')
        local_root.add_child(node)
        while True:
            node = self.__expect_op()
            if node:
                local_root.add_child(node)
                local_root.add_child(self.compile_term(), 'expect term after op')
            else:
                break
        return local_root

    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' |
        # subroutineCall | '(' expression ')' |
        # unaryOp term
        local_root = SyntaxTreeNode('term')

        # integerConstant | stringConstant | keywordConstant
        node = or_compile((
            self.__expect_integer_constant,
            self.__expect_string_constant,
            self.__expect_keyword_constant,
        ))
        if node:
            local_root.add_child(node)
            return local_root

        # subroutineCall
        sub_call = self.compile_subroutine_call()
        if sub_call:
            local_root.add_child(sub_call)
            return local_root

        next_token = self.tokenizer.peek_next()
        # varName | varName '[' expression ']'
        if next_token and next_token.get_type() == TokenType.IDENTIFIER:
            local_root.add_child(self.__expect_identifier())
            next2 = self.tokenizer.peek_next()
            if next2 and next2.get_symbol() == '[':
                local_root.add_child(self.compile_array_access())
            return local_root

        # '(' expression ')'
        if next_token and next_token.get_symbol() == '(':
            local_root.add_child(self.__expect_symbol('('))
            local_root.add_child(self.compile_expression(), 'expect expression after ( in term')
            local_root.add_child(self.__expect_symbol(')'), 'expect ) in term')
            return local_root

        # unaryOp term
        node = self.__expect_unary_op()
        if node:
            local_root.add_child(node)
            local_root.add_child(self.compile_term(), 'expect term after unary op in term')
            return local_root

        return None

    def compile_expression_list(self):
        # (expression (',' expression)* )?
        local_root = SyntaxTreeNode('expressionList')
        node = self.compile_expression()
        if node:
            local_root.add_child(node)
            # (',' expression)*
            while True:
                node = self.__expect_symbol(',')
                if node:
                    local_root.add_child(node)
                    local_root.add_child(self.compile_expression(), 'expect expression in exp list')
                else:
                    break
        return local_root

    def save_as_xml(self, xml_path):
        with open(xml_path, 'w') as writer:
            xml = self.syntax_tree_root.to_xml(indent_num=0)
            print(xml, file=writer, end='')
Example #25
0
    def tokenize(self, code, outfile):

        tokenizer = JackTokenizer(code)
        CompilationEngine(tokenizer, outfile).compile()
Example #26
0
 def __init__(self, source_filepath):
     self.source_path = source_filepath
     self.tokenizer = JackTokenizer(self.source_path)
     self.class_name = None
     self.syntax_tree_root = self.compile_class()
Example #27
0
class CompilationEngine(object):
   # the destination file for writing
   destination_file = None

   # the tokenizer for the input file
   tokenizer = None

   # current indentation level
   indent = 0

   # the constructor for compiling a single class
   # the next method to be called after construction must be compile_class
   # source_filename must be a single file, not a directory
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension T.xml
      # if the original extension was not .jack, then append T.xml
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".xml"
      else:
         destination_filename = source_filename + ".xml"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

   # compiles a complete class and closes the output file
   def compile_class(self):
      # start the class
      self._start_block("class")

      # class
      tt, t = self._token_next(True, "KEYWORD", "class")
      self._write(tt, t)

      # name of class
      tt, t = self._token_next(True, "IDENTIFIER")
      self._write(tt, t)

      # open brace
      tt, t = self._token_next(True, "SYMBOL", "{")
      self._write(tt, t)

      # one or more variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["field", "static"]:
            self.compile_class_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # one or more subroutine declarations
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["constructor", "function", "method"]:
            self.compile_subroutine()
         else:
            # stop trying to process functions
            break

      # close brace
      # do not advance because we already advanced upon exiting the last loop
      tt, t = self._token_next(False, "SYMBOL", "}")
      self._write(tt, t)

      # end the class
      self._end_block("class")

      # close the output file
      self.destination_file.close()

   # compiles a static declaration or field declaration
   def compile_class_var_dec(self):
      # start variable declaration
      self._start_block("classVarDec")

      # compile the variable declaration
      # False means don't print the tags
      self.compile_var_dec(False)

      # end variable declaration
      self._end_block("classVarDec")

   # compiles a complete method, function, or constructor
   def compile_subroutine(self):
      # start subroutine declaration
      self._start_block("subroutineDec")

      # constructor, function, or name keyword
      tt, t = self._token_next(False, "KEYWORD")
      self._write(tt, t)

      # type of the return value
      # can be either keyword (void) or an identifier (any type)
      tt, t = self._token_next(True)
      self._write(tt, t)

      # name of the method/function/constructor
      tt, t = self._token_next(True)
      self._write(tt, t)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")
      self._write(tt, t)

      # arguments
      self.tokenizer.advance()
      self.compile_parameter_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")
      self._write(tt, t)

      # start body of subroutine
      self._start_block("subroutineBody")

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")
      self._write(tt, t)

      # variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t == "var":
            self.compile_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # statements
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")
      self._write(tt, t)

      # end body of subroutine
      self._end_block("subroutineBody")

      # finish subroutine declaration
      self._end_block("subroutineDec")

      self.tokenizer.advance()

   # compiles a (possibly empty) parameter list, not including the enclosing
   # parentheses
   def compile_parameter_list(self):
      self._start_block("parameterList")

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # keyword (variable type)
            tt, t = self._token_next(False)
            self._write(tt, t)

            # identifier (variable name)
            tt, t = self._token_next(True)
            self._write(tt, t)

            # possible comma
            tt, t = self._token_next(True)
            if tt == "SYMBOL" and t == ",":
               self._write(tt, t)
            else:
               # not a comma; stop processing parameters
               break

            self.tokenizer.advance()

      self._end_block("parameterList")

   # compiles a var declaration
   def compile_var_dec(self, print_tags=True):
      if print_tags:
         self._start_block("varDec")

      # the keyword to start the declaration
      tt, t = self._token_next(False, "KEYWORD")
      self._write(tt, t)

      # type of the declaration
      # could be an identifier or a keyword (int, etc)
      tt, t = self._token_next(True)
      self._write(tt, t)

      # name of the declaration
      tt, t = self._token_next(True, "IDENTIFIER")
      self._write(tt, t)

      # can support more than one identifier name, to declare more than one
      # variable, separated by commas; process the 2nd-infinite variables
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t == ",":
            # write the comma
            self._write(tt, t)

            # another variable name follows
            tt, t = self._token_next(True, "IDENTIFIER")
            self._write(tt, t)

            self.tokenizer.advance()
         else:
            # no more variable names
            break

      # should be on the semicolon at the end of the line
      tt, t = self._token_next(False, "SYMBOL", ";")
      self._write(tt, t)

      self.tokenizer.advance()

      if print_tags:
         self._end_block("varDec")

   # compiles a sequence of statements, not including the enclosing {}
   def compile_statements(self):
      self._start_block("statements")

      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]:
            # call compile_t, where t is the type of compilation we want
            token = getattr(self, "compile_" + t)()
         else:
            # not a statement; stop processing statements
            break

      self._end_block("statements")

   # compiles a do statement
   def compile_do(self):
      self._start_block("doStatement")

      # do keyword
      tt, t = self._token_next(False, "KEYWORD", "do")
      self._write(tt, t)

      # subroutine call
      self.tokenizer.advance()
      self.compile_subroutine_call()

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")
      self._write(tt, t)

      self._end_block("doStatement")
      self.tokenizer.advance()

   # compiles a let statement
   def compile_let(self):
      self._start_block("letStatement")

      # let keyword
      tt, t = self._token_next(False, "KEYWORD", "let")
      self._write(tt, t)

      # variable name
      tt, t = self._token_next(True, "IDENTIFIER")
      self._write(tt, t)

      # possible brackets for array
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == "[":
         # write bracket
         self._write(tt, t)

         # compile the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing bracket
         tt, t = self._token_next(False, "SYMBOL", "]")
         self._write(tt, t)

         # advance to the next token, since we are expected to be on the = for
         # the next line
         self.tokenizer.advance()

      # equals sign
      tt, t = self._token_next(False, "SYMBOL", "=")
      self._write(tt, t)

      # expression
      self.tokenizer.advance()
      self.compile_expression()

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")
      self._write(tt, t)

      self._end_block("letStatement")
      self.tokenizer.advance()

   # compiles a while statement
   def compile_while(self):
      self._start_block("whileStatement")

      # while keyword
      tt, t = self._token_next(False, "KEYWORD", "while")
      self._write(tt, t)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")
      self._write(tt, t)

      # the expression that is the condition of the while statement
      self.tokenizer.advance()
      self.compile_expression()

      # the closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")
      self._write(tt, t)

      # the opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")
      self._write(tt, t)

      # the statments that is the body of the while loop
      self.tokenizer.advance()
      self.compile_statements()

      # the closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")
      self._write(tt, t)

      self._end_block("whileStatement")
      self.tokenizer.advance()

   # compiles a return statement
   def compile_return(self):
      self._start_block("returnStatement")

      # return keyword
      tt, t = self._token_next(False, "KEYWORD", "return")
      self._write(tt, t)

      # possible expression to return
      tt, t = self._token_next(True)
      if tt != "SYMBOL" and t != ";":
         self.compile_expression()

      # ending semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")
      self._write(tt, t)

      self._end_block("returnStatement")
      self.tokenizer.advance()

   # compiles a if statement, including a possible trailing else clause
   def compile_if(self):
      self._start_block("ifStatement")

      # if keyword
      tt, t = self._token_next(False, "KEYWORD", "if")
      self._write(tt, t)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")
      self._write(tt, t)

      # expression of if statement
      self.tokenizer.advance()
      self.compile_expression()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")
      self._write(tt, t)

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")
      self._write(tt, t)

      # statements
      self.tokenizer.advance()
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")
      self._write(tt, t)

      tt, t = self._token_next(True)
      if tt == "KEYWORD" and t == "else":
         # else statement exists
         # write else
         seld._write(tt, t)

         # opening brace
         tt, t = self._token_next(False, "SYMBOL", "{")
         self._write(tt, t)

         # statements
         self.compile_statements()

         # closing brace
         tt, t = self._token_next(False, "SYMBOL", "}")
         self._write(tt, t)

         # advance tokenizer only if we are in the else, since otherwise the
         # token was advanced by the else check
         self.tokenizer.advance()

      self._end_block("ifStatement")

   # compiles an expression (one or more terms connected by operators)
   def compile_expression(self):
      self._start_block("expression")

      # the first term
      self.compile_term()

      # finish any number of operators followed by terms
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t in "+-*/&|<>=":
            # found an operator
            self._write(tt, t)

            # the next term
            self.tokenizer.advance()
            self.compile_term()
         else:
            # no term found; done parsing the expression
            break

      self._end_block("expression")

   # compiles a term
   # this routine is faced with a slight difficulty when trying to decide
   # between some of the alternative parsing rules. specifically, if the
   # current token is an identifier, the routine must distinguish between a
   # variable, an array entry, and a subroutine call. a single lookahead token,
   # which may be one of [, (, or ., suffices to distinguish between the three
   # possibilities. any other token is not part of this term and should not
   # be advanced over.
   def compile_term(self):
      self._start_block("term")

      # a term: integer_constant | string_constant | keyword_constant |
      # varname | varname[expression] | subroutine_call | (expression) |
      # unary_op term
      tt, t = self._token_next(False)
      if tt in ["INT_CONST", "STRING_CONST", "KEYWORD"]:
         self._write(tt, t)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "SYMBOL" and t == "(":
         # ( expression )

         # write the opening parenthesis
         self._write(tt, t)

         # parse the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing parenthesis
         tt, t = self._token_next(False, "SYMBOL", ")")
         self._write(tt, t)

         # advance for the next statement
         self.tokenizer.advance()

      elif tt == "SYMBOL" and t in "-~":
         # unary_op term

         # write the unary operation
         self._write(tt, t)

         # parse the rest of the term
         self.tokenizer.advance()
         self.compile_term()

      elif tt == "IDENTIFIER":
         # varname, varname[expression], subroutine_call

         # do not write the identiifer yet

         # get the next bit of the expression
         # if it is a [, then array; if it is a ( or ., then subroutine call
         # if none of above, then pass over
         tt2, t2 = self._token_next(True)

         if tt2 == "SYMBOL" and t2 in "(.":
            # subroutine call
            # back up and then compile the subroutine call
            self.tokenizer.retreat()

            self.compile_subroutine_call()
         elif tt2 == "SYMBOL" and t2 == "[":
            # array
            # write identifier
            self._write(tt, t)

            # write bracket
            self._write(tt2, t2)

            # compile the expression
            self.tokenizer.advance()
            self.compile_expression()

            # closing bracket
            tt, t = self._token_next(False, "SYMBOL", "]")
            self._write(tt, t)

            # advance for the next statement
            self.tokenizer.advance()
         else:
            # none of above - just a single identifier
            self._write(tt, t)

      else:
         # unknown
         print "WARNING: Unknown term expression object:", tt, t

      self._end_block("term")

   # compiles a (possible empty) comma-separated list of expressions
   def compile_expression_list(self):
      self._start_block("expressionList")

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # expression to pass
            self.compile_expression()

            # possible comma
            tt, t = self._token_next(False)
            if tt == "SYMBOL" and t == ",":
               self._write(tt, t)
               self.tokenizer.advance()
            else:
               # not a comma; stop processing parameters
               break

      self._end_block("expressionList")

   # compiles a subroutine call
   # two cases:
   # - subroutineName(expressionList)
   # - (class|var).subroutineName(expressionList)
   def compile_subroutine_call(self):
      # first part of name
      tt, t = self._token_next(False, "IDENTIFIER")
      self._write(tt, t)

      # a dot and another name may exist, or it could be a parenthesis
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == ".":
         self._write(tt, t)

         # the name after the dot
         tt, t = self._token_next(True, "IDENTIFIER")
         self._write(tt, t)

         # advance so that we are on the parenthesis
         self.tokenizer.advance()

      # opening parenthesis
      tt, t = self._token_next(False, "SYMBOL", "(")
      self._write(tt, t)

      # expression list
      self.tokenizer.advance()
      self.compile_expression_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")
      self._write(tt, t)

      self.tokenizer.advance()

   # returns the token_type and token of the next token after advancing the
   # tokenizer before reading if advance is True
   def _token_next(self, advance=False, expected_type=None, expected_value=None):
      # advance the tokenizer, if requested
      if advance:
         self.tokenizer.advance()

      # get the token type and the token itself
      token_type = self.tokenizer.token_type()
      token = str(getattr(self.tokenizer, token_type.lower())())

      if expected_type and token_type != expected_type:
         print "WARNING: Type", token_type, "found; expected", expected_type
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)
      if expected_value and token != expected_value:
         print "WARNING: Value", token, "found; expected", expected_value
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)

      return token_type, token

   # writes the given token to the output file
   def _write(self, token_type, token):
      # lowercase for tag name
      token_type = token_type.lower()

      # special types
      token_type = token_type.replace("int_const", "integerConstant")
      token_type = token_type.replace("string_const", "stringConstant")

      # special values to replace for output
      s = {"<": "&lt;", ">": "&gt;", '"': "&quot;", "&": "&amp;"}
      for s, r in s.iteritems():
         token = token.replace(s, r)

      # print the token type and token to the file
      output = ['<', token_type, '>', ' ', token, ' ', '</', token_type,
            '>', '\n']
      self.destination_file.write(self._indent("".join(output)))

   # starts an XML block
   def _start_block(self, block_name):
      self.destination_file.write(self._indent("<" + block_name + ">\n"))
      self.indent += 2

   # ends an XML block
   def _end_block(self, block_name):
      self.indent -= 2
      self.destination_file.write(self._indent("</" + block_name + ">\n"))

   # indents a single line of text at the current indentation level
   def _indent(self, text):
      return " " * self.indent + text
class CompilationEngine():
    def __init__(self, filepath):
        self.wf = open(filepath[:-5] + ".my.xml", 'w')
        self.tokenizer = JackTokenizer(filepath)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.wf.close()

    def compile(self):
        self.compile_class()

    def compile_class(self):
        self.write_element_start('class')

        self.compile_keyword([Tokens.CLASS])
        self.compile_class_name()
        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)

        while self.next_is_class_var_dec():
            self.compile_class_var_dec()

        while self.next_is_subroutine_dec():
            self.compile_subroutine_dec()

        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        self.compile_type()
        self.compile_var_name()

        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name()

        self.compile_symbol(Tokens.SEMI_COLON)

        self.write_element_end('classVarDec')

    def compile_subroutine_dec(self):
        self.write_element_start('subroutineDec')

        self.compile_keyword(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])
        if self.tokenizer.see_next() == Tokens.VOID:
            self.compile_keyword(Tokens.VOID)
        else:
            self.compile_type()
        self.compile_subroutine_name()
        self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
        self.compile_parameter_list()
        self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        self.compile_subroutine_body()

        self.write_element_end('subroutineDec')

    def compile_subroutine_name(self):
        self.compile_identifier()

    def compile_class_name(self):
        self.compile_identifier()

    def compile_var_name(self):
        self.compile_identifier()

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [
                Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN
        ] or isinstance(self.tokenizer.see_next(), Identifier):
            self.compile_type()
            self.compile_var_name()

            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_type()
                self.compile_var_name()

        self.write_element_end('parameterList')

    def compile_subroutine_body(self):
        self.write_element_start('subroutineBody')

        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
        while self.next_is(Tokens.VAR):
            self.compile_var_dec()

        self.compile_statements()
        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('subroutineBody')

    def compile_var_dec(self):
        self.write_element_start('varDec')
        self.compile_keyword(Tokens.VAR)
        self.compile_type()
        self.compile_var_name()
        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name()
        self.compile_symbol(Tokens.SEMI_COLON)
        self.write_element_end('varDec')

    def compile_statements(self):
        self.write_element_start('statements')

        while self.next_is_statement():
            self.compile_statement()

        self.write_element_end('statements')

    def compile_statement(self):
        if self.next_is(Tokens.LET):
            self.write_element_start('letStatement')
            self.compile_keyword(Tokens.LET)
            self.compile_var_name()
            if self.next_is(Tokens.LEFT_BOX_BRACKET):
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            self.compile_symbol(Tokens.EQUAL)
            self.compile_expression()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('letStatement')

        elif self.next_is(Tokens.IF):
            self.write_element_start('ifStatement')
            self.compile_keyword(Tokens.IF)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            if self.next_is(Tokens.ELSE):
                self.compile_keyword(Tokens.ELSE)
                self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
                self.compile_statements()
                self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.write_element_end('ifStatement')

        elif self.next_is(Tokens.WHILE):
            self.write_element_start('whileStatement')
            self.compile_keyword(Tokens.WHILE)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.write_element_end('whileStatement')

        elif self.next_is(Tokens.DO):
            self.write_element_start('doStatement')
            self.compile_keyword(Tokens.DO)
            self.compile_subroutine_call()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('doStatement')

        elif self.next_is(Tokens.RETURN):
            self.write_element_start('returnStatement')
            self.compile_keyword(Tokens.RETURN)
            if not self.next_is(Tokens.SEMI_COLON):
                self.compile_expression()
            self.compile_symbol(Tokens.SEMI_COLON)

            self.write_element_end('returnStatement')

    def compile_subroutine_call(self):
        if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1):
            self.compile_subroutine_name()
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        else:
            self.compile_identifier()
            self.compile_symbol(Tokens.DOT)
            self.compile_subroutine_name()
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)

    def compile_expression_list(self):
        self.write_element_start('expressionList')
        if not self.next_is(Tokens.RIGHT_ROUND_BRACKET):
            self.compile_expression()
            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_expression()
        self.write_element_end('expressionList')

    def compile_expression(self):
        self.write_element_start('expression')
        self.compile_term()
        while self.next_is([
                Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV,
                Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN,
                Tokens.EQUAL
        ]):
            self.compile_symbol([
                Tokens.PLUS, Tokens.MINUS, Tokens.MULTI, Tokens.DIV,
                Tokens.AND, Tokens.PIPE, Tokens.LESS_THAN, Tokens.GREATER_THAN,
                Tokens.EQUAL
            ])
            self.compile_term()
        self.write_element_end('expression')

    def compile_term(self):
        self.write_element_start('term')

        if self.next_type_is(TokenType.INT_CONST):
            self.compile_integer_constant()
        elif self.next_type_is(TokenType.STRING_CONST):
            self.compile_string_constant()
        elif self.next_is(
            [Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE]):
            self.compile_keyword(
                [Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE])
        elif self.next_type_is(TokenType.IDENTIFIER):

            if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1):
                self.compile_var_name()
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1):
                self.compile_subroutine_call()
            else:
                self.compile_var_name()

        elif self.next_is(Tokens.LEFT_ROUND_BRACKET):
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        elif self.next_is([Tokens.TILDE, Tokens.MINUS]):
            self.compile_symbol([Tokens.TILDE, Tokens.MINUS])
            self.compile_term()
        else:
            self.raise_syntax_error('')
        self.write_element_end('term')

    def next_type_is(self, token_type):
        return self.tokenizer.see_next().type == token_type

    def compile_type(self):
        if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]):
            self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN])
        elif isinstance(self.tokenizer.see_next(), Identifier):
            self.compile_identifier()

    def next_is_statement(self):
        return self.next_is(
            [Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN])

    def next_is(self, tokens, idx=0):
        if type(tokens) == list:
            return self.tokenizer.see_next(idx=idx) in tokens
        else:
            return self.tokenizer.see_next(idx=idx) == tokens

    def next_is_class_var_dec(self):
        return self.next_is([Tokens.STATIC, Tokens.FIELD])

    def next_is_subroutine_dec(self):
        return self.next_is(
            [Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])

    def compile_symbol(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('symbol',
                                   self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('symbol',
                                   self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')

    def compile_keyword(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('keyword',
                                   self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('keyword',
                                   self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, Identifier):
            self.write_element('identifier',
                               self.tokenizer.current_token.token_escaped)
        else:
            self.raise_syntax_error('')

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerConstant):
            self.write_element('integerConstant',
                               self.tokenizer.current_token.token_escaped)
        else:
            self.raise_syntax_error('')

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringConstant):
            self.write_element('stringConstant',
                               self.tokenizer.current_token.token_escaped)
        else:
            self.raise_syntax_error('')

    def write_element(self, elem_name, value):
        self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name))

    def write_element_start(self, elem_name):
        self.wf.write('<%s>\n' % elem_name)

    def write_element_end(self, elem_name):
        self.wf.write('</%s>\n' % elem_name)

    def raise_syntax_error(self, msg):
        raise Exception('%s' % msg)
Example #29
0
class CompilationEngine():
    def __init__(self, jack_file, xml_file):
        self._jack_tokenizer = JackTokenizer(jack_file)
        self._xml_file = xml_file
        self._xml_text = ''

    def compile_class(self):
        self._write_start('class')
        self._compile_keyword()
        self._compile_identifier()
        self._compile_symbol()
        while self._what_next_token([Keyword.STATIC, Keyword.FIELD]):
            self.compile_class_var_dec()
        while self._what_next_token(
            [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]):
            self.compile_subroutine_dec()
        self._compile_symbol()
        self._write_end('class')

    def compile_class_var_dec(self):
        self._write_start('classVarDec')
        self._compile_keyword()
        if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
            self._compile_keyword()
        elif self._what_next_token_type([Type.IDENTIFIER]):
            self._compile_identifier()
        self._compile_identifier()
        while self._what_next_token([Symbol.COMMA]):
            self._compile_symbol()
            self._compile_identifier()
        self._compile_symbol()
        self._write_end('classVarDec')

    def compile_subroutine_dec(self):
        self._write_start('subroutineDec')
        self._compile_keyword()
        if self._what_next_token([Keyword.VOID]):
            self._compile_keyword()
        else:
            if self._what_next_token(
                [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                self._compile_keyword()
            elif self._what_next_token_type([Type.IDENTIFIER]):
                self._compile_identifier()
        self._compile_identifier()
        self._compile_symbol()
        self.compile_parameter_list()
        self._compile_symbol()
        self.compile_subroutine_body()
        self._write_end('subroutineDec')

    def compile_parameter_list(self):
        self._write_start('parameterList')
        if (self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN])
                or self._what_next_token_type([Type.IDENTIFIER])):
            if self._what_next_token(
                [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                self._compile_keyword()
            elif self._what_next_token_type([Type.IDENTIFIER]):
                self._compile_identifier()
            self._compile_identifier()
            while self._what_next_token([Symbol.COMMA]):
                self._compile_symbol()
                if self._what_next_token(
                    [Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
                    self._compile_keyword()
                elif self._what_next_token_type([Type.IDENTIFIER]):
                    self._compile_identifier()
                self._compile_identifier()
        self._write_end('parameterList')

    def compile_subroutine_body(self):
        self._write_start('subroutineBody')
        self._compile_symbol()
        while self._what_next_token([Keyword.VAR]):
            self.compile_var_dec()
        self.compile_statements()
        self._compile_symbol()
        self._write_end('subroutineBody')

    def compile_var_dec(self):
        self._write_start('varDec')
        self._compile_keyword()
        if self._what_next_token([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]):
            self._compile_keyword()
        elif self._what_next_token_type([Type.IDENTIFIER]):
            self._compile_identifier()
        self._compile_identifier()
        while self._what_next_token([Symbol.COMMA]):
            self._compile_symbol()
            self._compile_identifier()
        self._compile_symbol()
        self._write_end('varDec')

    def compile_statements(self):
        self._write_start('statements')
        while self._what_next_token([
                Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO,
                Keyword.RETURN
        ]):
            if self._what_next_token([Keyword.LET]):
                self.compile_let()
            elif self._what_next_token([Keyword.IF]):
                self.compile_if()
            elif self._what_next_token([Keyword.WHILE]):
                self.compile_while()
            elif self._what_next_token([Keyword.DO]):
                self.compile_do()
            elif self._what_next_token([Keyword.RETURN]):
                self.compile_return()
        self._write_end('statements')

    def compile_let(self):
        self._write_start('letStatement')
        self._compile_keyword()
        self._compile_identifier()
        if self._what_next_token([Symbol.LEFT_BOX_BRACKET]):
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._write_end('letStatement')

    def compile_if(self):
        self._write_start('ifStatement')
        self._compile_keyword()
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._compile_symbol()
        self.compile_statements()
        self._compile_symbol()
        if self._what_next_token([Keyword.ELSE]):
            self._compile_keyword()
            self._compile_symbol()
            self.compile_statements()
            self._compile_symbol()
        self._write_end('ifStatement')

    def compile_while(self):
        self._write_start('whileStatement')
        self._compile_keyword()
        self._compile_symbol()
        self.compile_expression()
        self._compile_symbol()
        self._compile_symbol()
        self.compile_statements()
        self._compile_symbol()
        self._write_end('whileStatement')

    def compile_do(self):
        self._write_start('doStatement')
        self._compile_keyword()
        if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1):
            self._compile_identifier()
            self._compile_symbol()
            self.compile_expression_list()
            self._compile_symbol()
        else:
            self._compile_identifier()
            self._compile_symbol()
            self._compile_identifier()
            self._compile_symbol()
            self.compile_expression_list()
            self._compile_symbol()
        self._compile_symbol()
        self._write_end('doStatement')

    def compile_return(self):
        self._write_start('returnStatement')
        self._compile_keyword()
        if not self._what_next_token([Symbol.SEMI_COLON]):
            self.compile_expression()
        self._compile_symbol()
        self._write_end('returnStatement')

    def compile_expression(self):
        self._write_start('expression')
        self.compile_term()
        while self._what_next_token([
                Symbol.PLUS, Symbol.MINUS, Symbol.MULTI, Symbol.DIV,
                Symbol.AND, Symbol.PIPE, Symbol.LESS_THAN, Symbol.GREATER_THAN,
                Symbol.EQUAL
        ]):
            self._compile_symbol()
            self.compile_term()
        self._write_end('expression')

    def compile_term(self):
        self._write_start('term')
        if self._what_next_token_type([Type.INT_CONST]):
            self._compile_integer_constant()
        elif self._what_next_token_type([Type.STRING_CONST]):
            self._compile_string_constant()
        elif self._what_next_token(
            [Keyword.NULL, Keyword.THIS, Keyword.TRUE, Keyword.FALSE]):
            self._compile_keyword()
        elif self._what_next_token_type([Type.IDENTIFIER]):
            if self._what_next_token([Symbol.LEFT_BOX_BRACKET], 1):
                self._compile_identifier()
                self._compile_symbol()
                self.compile_expression()
                self._compile_symbol()
            elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET, Symbol.DOT],
                                       1):
                if self._what_next_token([Symbol.LEFT_ROUND_BRACKET], 1):
                    self._compile_identifier()
                    self._compile_symbol()
                    self.compile_expression_list()
                    self._compile_symbol()
                else:
                    self._compile_identifier()
                    self._compile_symbol()
                    self._compile_identifier()
                    self._compile_symbol()
                    self.compile_expression_list()
                    self._compile_symbol()
            else:
                self._compile_identifier()
        elif self._what_next_token([Symbol.LEFT_ROUND_BRACKET]):
            self._compile_symbol()
            self.compile_expression()
            self._compile_symbol()
        elif self._what_next_token([Symbol.TILDE, Symbol.MINUS]):
            self._compile_symbol()
            self.compile_term()
        self._write_end('term')

    def compile_expression_list(self):
        self._write_start('expressionList')
        if not self._what_next_token([Symbol.RIGHT_ROUND_BRACKET]):
            self.compile_expression()
            while self._what_next_token([Symbol.COMMA]):
                self._compile_symbol()
                self.compile_expression()
        self._write_end('expressionList')

    def save(self):
        self._xml_file.write(self._xml_text)

    def _what_next_token(self, values, index=0):
        return self._jack_tokenizer.next_token(index) in values

    def _what_next_token_type(self, values, index=0):
        return self._jack_tokenizer.next_token_type(index) in values

    def _compile_symbol(self):
        self._jack_tokenizer.advance()
        self._write('symbol', self._jack_tokenizer.token())

    def _compile_keyword(self):
        self._jack_tokenizer.advance()
        self._write('keyword', self._jack_tokenizer.token())

    def _compile_identifier(self):
        self._jack_tokenizer.advance()
        self._write('identifier', self._jack_tokenizer.token())

    def _compile_integer_constant(self):
        self._jack_tokenizer.advance()
        self._write('integerConstant', self._jack_tokenizer.token())

    def _compile_string_constant(self):
        self._jack_tokenizer.advance()
        self._write('stringConstant', self._jack_tokenizer.token())

    def _write(self, element, value):
        self._xml_text += '<{}> {} </{}>\n'.format(element, value, element)

    def _write_start(self, element):
        self._xml_text += '<%s>\n' % element

    def _write_end(self, element):
        self._xml_text += '</%s>\n' % element
 def __init__(self, filepath):
     self.wf = open(filepath[:-5] + ".my.xml", 'w')
     self.tokenizer = JackTokenizer(filepath)
 def __init__(self, input_file_path, vm_writer: VMWriter):
     self.jack_tokenizer = JackTokenizer(input_file_path)
     self.symbol_table = SymbolTable()
     self.vm_writer = vm_writer
     if self.jack_tokenizer.has_more_tokens():
         self.compile_class()
Example #32
0
class CompilationEnginge(object):
    """
    lalala
    """

    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.out = open(output_file, 'w')
        self.token = None
        self.class_name = None

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)

        #######################


    def analyze(self):
        self.token = self.tokenizer.advance()
        self.compile_class()
        self.close()
        print('CLASS TABLE:')
        print(self.symbol_table.class_table)


    def close(self):
        if self.out:
            self.out.close()
            self.out = None


    def advance(self):
        self.token = self.tokenizer.advance()


    def write_to_out(self):
        pass


    def format_line(self, defined_or_used=''):
        token_type = self.tokenizer.token_type()
        running_index = ''
        if token_type == self.tokenizer.keyword_token:
            meat = self.tokenizer.keyword()
            defined_or_used=''
        elif token_type == self.tokenizer.symbol_token:
            meat = self.tokenizer.symbol()
            defined_or_used=''
        elif token_type == self.tokenizer.identifier_token:
            meat = self.tokenizer.identifier()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            # Extending compilaiton engine to output <var/argument/static/field...> instead of <indentifier>
            name = self.tokenizer.token
            if self.symbol_table.kind_of(name):
                token_type = self.symbol_table.kind_of(name)
                running_index = str(self.symbol_table.index_of(name))
            elif name[0].islower():
                token_type = 'subroutine'
            else:
                token_type = 'class'

            #######################  

        elif token_type == self.tokenizer.int_const:
            meat = self.tokenizer.int_val()
            defined_or_used=''
        elif token_type == self.tokenizer.string_const:
            meat = self.tokenizer.string_val()
            defined_or_used=''
        else:
            raise ValueError('Something went wrong with token: {}'.format(self.token))
        
        if defined_or_used != '':
            defined_or_used += ' '
        if running_index != '':
            running_index = ' ' + running_index
        formated_line = '<{2}{0}{3}> {1} </{2}{0}{3}>\n'.format(token_type, meat, defined_or_used, running_index)
        return formated_line


    #########################
    ### PROGARM STRUCTURE ###
    #########################

    def compile_class(self):
        """
        ####################################################################
        ### class: 'class' className '{' classVarDec* subroutineDec* '}' ###
        ####################################################################
        """

        self.out.write('<class>\n')

        # 'class'
        keyword_line = self.format_line()
        self.out.write(keyword_line)
        
        # className
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.class_name = self.tokenizer.token

        ####################### 

        identifier_line = self.format_line('defined')
        self.out.write(identifier_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        ### classVarDec* subroutineDec* ###
        self.advance()
        # classVarDec*
        while self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_field, self.tokenizer.key_static]:
            self.compile_class_var_dec()
        
        # subroutineDec*
        while  self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() in [self.tokenizer.key_function, self.tokenizer.key_method, self.tokenizer.key_constructor]:
            self.compile_subroutine()

        # '}'
        if  self.tokenizer.token_type() == self.tokenizer.symbol_token:
            # Class compilation is done
            symbol_line = self.format_line()
            self.out.write(symbol_line)
        else:
            raise ValueError('Something went wrong')

        # Closing with </class>
        self.out.write('</class>\n')
        is_sucessfull = not(self.advance())
        if is_sucessfull:
            print('Compilation enginge succesfully finished')
        else:
            print('Something went wrong!')


    def compile_class_var_dec(self):
        """
        #######################################################################
        ### classVarDec: ('static'|'field') type varName (',' varName)* ';' ###
        #######################################################################
        """

        self.out.write('<classVarDec>\n')

        #######################
        ### PROJECT 11 CODE ###
        #######################

        # Extract field or static
        # field_or_static = re.match('<[a-z]*>', field_or_static_line)[0][1:-1]
        field_or_static = self.tokenizer.token

        #######################

        #  ('static' | 'field')
        field_or_static_line = self.format_line()
        self.out.write(field_or_static_line)

        # type
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        # Extract token type
        type_ = self.tokenizer.token

        #######################

        type_line = self.format_line()
        self.out.write(type_line)

        # varName
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static)

        #######################

        varname_line = self.format_line('defined')
        self.out.write(varname_line)

        # (',' varName)*
        self.advance()
        symbol = self.tokenizer.symbol()
        while symbol == ',':
            colon_line = self.format_line()
            self.out.write(colon_line)
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind=field_or_static)

            #######################

            varname_line = self.format_line('defined')
            self.out.write(varname_line)
            self.advance()
            symbol = self.tokenizer.symbol()
        # symbol == ';'
        semicolon_line = self.format_line()
        self.out.write(semicolon_line)
        self.advance()

        self.out.write('</classVarDec>\n')


    def compile_subroutine(self):
        """
        ###########################################################################
        ### subroutineDec: ('constructor'|'function'|'method')                  ###             
        ###                ('void' | type) subroutineName '(' parameterList ')' ###
        ###                subroutineBody                                       ###
        ###########################################################################
        """

        #######################
        ### PROJECT 11 CODE ###
        #######################

        print()
        print('SUBROUTINE TABLE:')
        print(self.symbol_table.subroutine_table)
        print()
        self.symbol_table.start_subroutine()
        self.symbol_table.define(name='this', type_=self.class_name, kind='argument')

        #######################

        self.out.write('<subroutineDec>\n')

        # ('constructor'|'function'|'method')
        constructor_function_method_line = self.format_line()
        self.out.write(constructor_function_method_line)

        # ('void' | type)
        self.advance()
        void_or_type_line = self.format_line()
        self.out.write(void_or_type_line)

        # subroutineName 
        self.advance()
        subroutine_name_line = self.format_line('defined')
        self.out.write(subroutine_name_line)

        # '(' 
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # parameterList
        self.advance()
        self.compile_parameter_list()

        # ')' 
        symbol_line = self.format_line()
        self.out.write(symbol_line)
        
        ##################################################
        ### subroutineBody: '{' varDec* statements '}' ###
        ##################################################
        
        self.out.write('<subroutineBody>\n')

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        ###############
        ### varDec* ###
        ###############

        self.advance()
        while self.tokenizer.token == self.tokenizer.key_var:
            self.compile_var_dec()

        ##################
        ### statements ###
        ##################

        self.compile_statements()
        
        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</subroutineBody>\n')
        self.out.write('</subroutineDec>\n')

        #######################
        ### PROJECT 11 CODE ###
        #######################

        print()
        print('SUBROUTINE TABLE:')
        print(self.symbol_table.subroutine_table)
        print()

        #######################


    def compile_parameter_list(self):
        """
        ############################################################
        ### parameterList: ((type varName) (',' type varName)*)? ###
        ############################################################
        """

        self.out.write('<parameterList>\n')

        # If token type is symbol then we have empty parameter list
        # If we have symbol token then it means our parameter list is fully processed
        if self.tokenizer.token_type() != self.tokenizer.symbol_token:
            
            # type

            #######################
            ### PROJECT 11 CODE ###
            #######################

            type_ = self.tokenizer.token

            #######################

            type_line = self.format_line()
            self.out.write(type_line)
            
            # varName
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument')

            #######################

            var_name_line = self.format_line('defined')
            self.out.write(var_name_line)

            # If next token is ',' we have more then one parameter
            self.advance()
            while self.tokenizer.token_type() == self.tokenizer.symbol_token and self.tokenizer.symbol() == ',':
                # ','
                comma_line = self.format_line()
                self.out.write(comma_line)

                # type
                self.advance()

                #######################
                ### PROJECT 11 CODE ###
                #######################

                type_ = self.tokenizer.token

                #######################

                type_line = self.format_line()
                self.out.write(type_line)

                # varName
                self.advance()

                #######################
                ### PROJECT 11 CODE ###
                #######################

                self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='argument')

                # We are in new subroutine so add next nested scope
                # self.symbol_table.start_subroutine()

                #######################

                var_name_line = self.format_line('defined')
                self.out.write(var_name_line)

                self.advance()

        self.out.write('</parameterList>\n')



    def compile_var_dec(self):
        """
        #####################################################
        ### varDec: 'var' type varName (',' varName)* ';' ###
        #####################################################
        """

        self.out.write('<varDec>\n')

        # var
        var_line = self.format_line()
        self.out.write(var_line)

        # type
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        type_ = self.tokenizer.token

        #######################

        type_line = self.format_line()
        self.out.write(type_line)

        # varName
        self.advance()

        #######################
        ### PROJECT 11 CODE ###
        #######################

        self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local')

        #######################

        var_name_line = self.format_line('defined')
        self.out.write(var_name_line)

        # (',' varName)*
        self.advance()
        while self.tokenizer.symbol() == ',':
            # ','
            comma_line = self.format_line()
            self.out.write(comma_line)

            # varName
            self.advance()

            #######################
            ### PROJECT 11 CODE ###
            #######################

            self.symbol_table.define(name=self.tokenizer.token, type_=type_, kind='local')

            #######################

            var_name_line = self.format_line('defined')
            self.out.write(var_name_line)

            self.advance()

        # ';'
        semicolon_line = self.format_line()
        self.out.write(semicolon_line)

        self.advance()

        self.out.write('</varDec>\n')


    ##################
    ### STATEMENTS ###
    ##################

    def compile_statements(self):
        """
        ##############################
        ### statements: statement* ###
        ##############################
        """
        
        self.out.write('<statements>\n')

        while self.tokenizer.token_type() != self.tokenizer.symbol_token:
            
            keyword = self.tokenizer.keyword()
            # letStatement
            if keyword == self.tokenizer.key_let:
                self.compile_let()

            # ifStatement
            elif keyword == self.tokenizer.key_if:
                self.compile_if()

            # whileStatement
            elif keyword == self.tokenizer.key_while:
                self.compile_while()

            # doStatement
            elif keyword == self.tokenizer.key_do:
                self.compile_do()

            # returnStatement
            elif keyword == self.tokenizer.key_return:
                self.compile_return()

            else:
                raise ValueError('Wrong statement: {}'.format(keyword))

        self.out.write('</statements>\n')


    def compile_do(self):
        """
        ############################################
        ### doStatement: 'do' subroutineCall ';' ###
        ############################################
        """

        self.out.write('<doStatement>\n')

        # 'do'
        do_line = self.format_line()
        self.out.write(do_line)

        # subroutineCall
        self.advance()
        self.compile_subroutine_call()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</doStatement>\n')


    def compile_let(self):
        """
        ############################################################################
        ### letStatement: 'let' varName ('[' expression ']')? '=' expression ';' ###
        ############################################################################
        """

        self.out.write('<letStatement>\n')

        # let 
        let_line = self.format_line()
        self.out.write(let_line)

        # varName
        self.advance()

        var_name_line = self.format_line('used')
        self.out.write(var_name_line)

        # Check if '[' or '='
        self.advance()
        if self.tokenizer.token == '[':
            # '['
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # expression
            self.advance()
            self.compile_expression()

            # ']'
            symbol_line = self.format_line()
            self.out.write(symbol_line)
            self.advance()

        # '='
        symbol_line = self.format_line()
        self.out.write(symbol_line)
        
        # expression
        self.advance()
        self.compile_expression()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</letStatement>\n')


    def compile_while(self):
        """
        #####################################################################
        ### whileStatement: 'while' '(' expression ')' '{' statements '}' ###
        #####################################################################
        """

        self.out.write('<whileStatement>\n')

        # 'while'
        while_line = self.format_line()
        self.out.write(while_line)

        # '('
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expression
        self.advance()
        self.compile_expression()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # statements
        self.advance()
        self.compile_statements()

        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</whileStatement>\n')


    def compile_return(self):
        """
        ################################################
        ### ReturnStatement 'return' expression? ';' ###
        ################################################
        """

        self.out.write('<returnStatement>\n')

        # 'return'
        return_line = self.format_line()
        self.out.write(return_line)

        # Ceck if expression
        self.advance()
        if self.tokenizer.token != ';':
            # 'expression'
            self.compile_expression()

        # ';'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()

        self.out.write('</returnStatement>\n')


    def compile_if(self):
        """
        ###############################################################
        ### ifStatement: 'if' '(' expression ')' '{' statements '}' ###
        ###              ('else' '{' statements '}')?               ###
        ###############################################################
        """

        self.out.write('<ifStatement>\n')

        # 'if'
        if_line = self.format_line()
        self.out.write(if_line)

        # '('
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expression
        self.advance()
        self.compile_expression()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # '{'
        self.advance()
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # statements
        self.advance()
        self.compile_statements()

        # '}'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # Check if there is 'else' part of ifStatement
        self.advance()
        if self.tokenizer.token_type() == self.tokenizer.keyword_token and self.tokenizer.keyword() == 'else':
            # 'else'
            else_line = self.format_line()
            self.out.write(else_line)

            # '{'
            self.advance()
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # statements
            self.advance()
            self.compile_statements()

            # '}'
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            self.advance()

        self.out.write('</ifStatement>\n')

    ###################
    ### EXPRESSIONS ###
    ###################

    def compile_subroutine_call(self, skip_subroutine_name=False):
        """
        ############################################################################
        ### subroutineCall: subroutineName '(' expressionList ')' | (className | ###
        ### varName) '.' subroutineName '(' expressionList ')'                   ###
        ############################################################################
        """

        if not skip_subroutine_name:
            # subroutineName or className or varName
            subroutine_class_var_name_line = self.format_line('used')
            self.out.write(subroutine_class_var_name_line)
            self.advance()

        # Check '(' or '.'
        if self.tokenizer.token == '.':
            # '.'
            symbol_line = self.format_line()
            self.out.write(symbol_line)
            
            # subroutineName
            self.advance()
            subroutine_name_line = self.format_line('used')
            self.out.write(subroutine_name_line)

            self.advance()

        # '('
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        # expressionList
        self.advance()
        self.compile_expression_list()

        # ')'
        symbol_line = self.format_line()
        self.out.write(symbol_line)

        self.advance()


    def compile_expression(self):
        """
        ###################################
        ### expression: term (op term)* ###
        ###################################
        """

        self.out.write('<expression>\n')

        ops = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

        # 'term'
        self.compile_term()

        # Check if there is (op term)* part
        while self.tokenizer.token in ops:
            # op
            op_line = self.format_line()
            self.out.write(op_line)

            # term
            self.advance()
            self.compile_term()

        self.out.write('</expression>\n')

    
    def compile_term(self):
        """
        ################################################################
        ###  integerConstant | stringConstant | keywordConstant |    ###
        ###  varName | varName '[' expression ']' | subroutineCall | ###
        ###  '(' expression ')' | unaryOp term                       ###
        ################################################################
        """

        self.out.write('<term>\n')

        unary_ops = ['-', '~']

        #############################################
        ### constant, name, expression or unaryOp ###
        #############################################

        # '(' expression ')'
        if self.tokenizer.token == '(':
            # '('
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            # expression
            self.advance()
            self.compile_expression()

            # ')'
            symbol_line = self.format_line()
            self.out.write(symbol_line)

            self.advance()
        
        # unaryOp term
        elif self.tokenizer.token in unary_ops:
            # unaryOp
            unary_op_line = self.format_line()
            self.out.write(unary_op_line)

            # term
            self.advance()
            self.compile_term()

        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall
        else:
            # constant or name
            constant_or_name = self.format_line('used')
            self.out.write(constant_or_name)

            # varName '[' expression ']' | subroutineCall or end of compile_term function
            # Check if expression: '[', subroutineCall: '(' with parameter skip_subroutine_name = True,
            # otherwise end of compile_term function
            self.advance()
            # '[' expression ']'
            if self.tokenizer.token == '[':
                # '['
                symbol_line = self.format_line()
                self.out.write(symbol_line)

                # expression
                self.advance()
                self.compile_expression()

                # ']'
                symbol_line = self.format_line()
                self.out.write(symbol_line)

                self.advance()
            
            # subroutineCall with skip_subroutine_name=True
            elif self.tokenizer.token in ['(', '.']:
                self.compile_subroutine_call(skip_subroutine_name=True)

        self.out.write('</term>\n')


    def compile_expression_list(self):
        """
        ########################################################
        ### expressionList: (expression (',' expression)* )? ###
        ########################################################
        """

        self.out.write('<expressionList>\n')

        # Check if token is ')', if so we got empty expression list
        if self.tokenizer.token != ')':
            # 'expression'
            self.compile_expression()

            # Check if token is ',', if so we got more expressions
            while self.tokenizer.token == ',':
                # ','
                comma_line = self.format_line()
                self.out.write(comma_line)

                # expression
                self.advance()
                self.compile_expression()

        self.out.write('</expressionList>\n')
class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    BINARY_OPERATORS_TO_COMMAND = {
        '+': 'add',
        '-': 'sub',
        '=': 'eq',
        '>': 'gt',
        '<': 'lt',
        '&': 'and',
        '|': 'or'
    }
    UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'}
    TYPE_TO_TAG = {
        'STRING_CONST': 'stringConstant',
        'INT_CONST': 'integerConstant',
        'KEYWORD': 'keyword',
        'IDENTIFIER': 'identifier',
        'SYMBOL': 'symbol'
    }

    SYMBOLS_TO_XML_CONVENTION = {
        '<': '&lt;',
        '>': '&gt;',
        '&': '&amp;',
        '"': '&quot;'
    }

    def __init__(self, input_file_path, vm_writer: VMWriter):
        self.jack_tokenizer = JackTokenizer(input_file_path)
        self.symbol_table = SymbolTable()
        self.vm_writer = vm_writer
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                 'CLASS')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()

        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.jack_tokenizer.advance()
        self.vm_writer.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[
            self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            kind = ''
            if self.jack_tokenizer.key_word() == 'field':
                kind = 'FIELD'
            elif self.jack_tokenizer.key_word() == 'static':
                kind = 'STATIC'
            self.jack_tokenizer.advance()
            field_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     field_type, kind)

            self.jack_tokenizer.advance()

            while self.jack_tokenizer.symbol() != ';':
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         field_type, kind)
                self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        self.vm_writer.zero_branching_indexes()
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.symbol_table.start_subroutine()
            constructor = True if self.jack_tokenizer.key_word(
            ) == 'constructor' else False

            method = False
            if self.jack_tokenizer.key_word() == 'method':
                method = True
                self.symbol_table.define('this',
                                         self.symbol_table.get_class_name(),
                                         'ARG')

            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                     'SUBROUTINE')
            name = self.symbol_table.get_class_name(
            ) + '.' + self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_parameter_list()
            self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()
            var_num = 0
            while self.jack_tokenizer.key_word() == 'var':
                var_num += self.compile_var_dec()
            self.vm_writer.write_function(name, var_num)
            if method:
                self.vm_writer.write_push('ARG', 0)
                self.vm_writer.write_pop('POINTER', 0)
            elif constructor:
                field_count = self.symbol_table.var_count('FIELD')
                self.vm_writer.write_push('CONST', field_count)
                self.vm_writer.write_call('Memory.alloc', 1)
                self.vm_writer.write_pop('POINTER', 0)
            self.compile_statements()
            self.jack_tokenizer.advance()

    def compile_parameter_list(self):
        if self.jack_tokenizer.symbol() != ')':
            parameter_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     parameter_type, 'ARG')
            self.jack_tokenizer.advance()
            while self.jack_tokenizer.symbol() == ",":
                self.jack_tokenizer.advance()
                parameter_type = self.get_type()
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         parameter_type, 'ARG')
                self.jack_tokenizer.advance()

    def get_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            parameter_type = self.jack_tokenizer.key_word()
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            parameter_type = self.jack_tokenizer.identifier()
        return parameter_type

    def compile_var_dec(self):
        var_num = 1
        self.jack_tokenizer.advance()
        var_type = self.get_type()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), var_type,
                                 'VAR')
        self.jack_tokenizer.advance()

        while self.jack_tokenizer.symbol() == ",":
            var_num += 1
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     var_type, 'VAR')
            self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        return var_num

    def compile_statements(self):
        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

    def compile_do(self):
        self.jack_tokenizer.advance()

        name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        self.compile_subroutine_call(name)

        # must dispose of void function return value
        self.vm_writer.write_pop('TEMP', 0)
        self.jack_tokenizer.advance()

    def compile_subroutine_call(self, prefix_call=''):
        if self.jack_tokenizer.symbol() == '(':
            subroutine = False
            # If not in symbol table - then subroutine
            if not self.symbol_table.kind_of(
                    prefix_call) or self.symbol_table.kind_of(
                        prefix_call) == 'SUBROUTINE':
                subroutine = True
            self.jack_tokenizer.advance()

            args_count = 0
            if subroutine:
                self.vm_writer.write_push('POINTER', 0)
                args_count += 1
            args_count += self.compile_expression_list()

            if subroutine:
                self.vm_writer.write_call(
                    self.symbol_table.get_class_name() + '.' + prefix_call,
                    args_count)
            else:
                self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()
        elif self.jack_tokenizer.symbol() == '.':
            variable = False
            self.jack_tokenizer.advance()
            if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']:
                variable = True
                variable_name = prefix_call
                prefix_call = self.symbol_table.type_of(prefix_call)
            prefix_call += '.{0}'.format(self.jack_tokenizer.identifier())
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()

            args_count = 0
            if variable:
                self.vm_writer.write_push(
                    self.symbol_table.kind_of(variable_name),
                    self.symbol_table.index_of(variable_name))
                args_count += 1
            args_count += self.compile_expression_list()

            self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()

    def compile_let(self):
        self.jack_tokenizer.advance()
        var_name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() == '[':
            self.vm_writer.write_push(self.symbol_table.kind_of(var_name),
                                      self.symbol_table.index_of(var_name))
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_arithmetic("add")
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop('TEMP', 0)
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_push('TEMP', 0)
            self.vm_writer.write_pop('THAT', 0)
        else:
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop(self.symbol_table.kind_of(var_name),
                                     self.symbol_table.index_of(var_name))

        self.jack_tokenizer.advance()

    def compile_while(self):
        while_idx = self.vm_writer.get_next_label_index('while')
        if_label = 'WHILE_IF_{0}'.format(while_idx)
        end_label = 'WHILE_END_{0}'.format(while_idx)

        self.vm_writer.write_label(if_label)
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.vm_writer.write_if(end_label)
        self.compile_statements()
        self.vm_writer.write_goto(if_label)
        self.jack_tokenizer.advance()
        self.vm_writer.write_label(end_label)

    def compile_return(self):
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        else:
            self.vm_writer.write_push('CONST', 0)
        self.vm_writer.write_return()
        self.jack_tokenizer.advance()

    def compile_if(self):
        if_idx = self.vm_writer.get_next_label_index('if')
        else_label = 'IF_ELSE_{0}'.format(if_idx)
        end_label = 'IF_END_{0}'.format(if_idx)

        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()

        self.jack_tokenizer.advance()
        self.vm_writer.write_if(else_label)
        self.compile_statements()
        self.jack_tokenizer.advance()
        self.vm_writer.write_goto(end_label)

        self.vm_writer.write_label(else_label)
        if self.jack_tokenizer.key_word() == 'else':
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_statements()
            self.jack_tokenizer.advance()

        self.vm_writer.write_label(end_label)

    def compile_expression(self):
        self.compile_term()

        while self.jack_tokenizer.symbol(
        ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.advance()

            self.compile_term()

            if symbol in self.BINARY_OPERATORS_TO_COMMAND:
                self.vm_writer.write_arithmetic(
                    self.BINARY_OPERATORS_TO_COMMAND[symbol])
            elif symbol == '*':
                self.vm_writer.write_call('Math.multiply', 2)
            elif symbol == '/':
                self.vm_writer.write_call('Math.divide', 2)

    def compile_term(self):
        token_type = self.jack_tokenizer.token_type()

        if token_type == 'IDENTIFIER':
            name = self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol(
            ) == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call(name)
            elif self.jack_tokenizer.symbol() == '[':
                self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                          self.symbol_table.index_of(name))
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop('POINTER', 1)
                self.vm_writer.write_push('THAT', 0)
            else:
                kind = self.symbol_table.kind_of(name)
                idx = self.symbol_table.index_of(name)
                self.vm_writer.write_push(kind, idx)
        elif token_type == 'STRING_CONST':
            string_const = self.jack_tokenizer.string_val()

            self.vm_writer.write_push("CONST", len(string_const))
            self.vm_writer.write_call("String.new", 1)
            for char in string_const:
                self.vm_writer.write_push('CONST', ord(char))
                self.vm_writer.write_call("String.appendChar", 2)
            self.jack_tokenizer.advance()
        elif token_type == 'KEYWORD':
            keyword = self.jack_tokenizer.key_word()
            if keyword == 'true':
                self.vm_writer.write_push('CONST', 1)
                self.vm_writer.write_arithmetic('neg')
            elif keyword == 'false' or keyword == 'null':
                self.vm_writer.write_push('CONST', 0)
            elif keyword == 'this':
                self.vm_writer.write_push('POINTER', 0)
            self.jack_tokenizer.advance()
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
            elif self.jack_tokenizer.symbol(
            ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[
                    self.jack_tokenizer.symbol()]
                self.jack_tokenizer.advance()
                self.compile_term()
                self.vm_writer.write_arithmetic(command)
        elif token_type == 'INT_CONST':
            self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val())
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        expression_count = 0
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            expression_count += 1
            while self.jack_tokenizer.symbol() == ',':
                self.jack_tokenizer.advance()
                self.compile_expression()
                expression_count += 1
        return expression_count
def analyze(src_jack_file):
    tokenizer = JackTokenizer(src_jack_file)
    while tokenizer.has_more_tokens():
        tokenizer.advance()
        print(tokenizer.get_current_token())
Example #35
0
class CompilationEngine():
    def __init__(self, filepath):
        self.wf = open(filepath[:-5] + ".myImpl.xml", 'w')
        self.tokenizer = JackTokenizer(filepath)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.wf.close()

    def compile(self):
        self.compile_class()

    def compile_class(self):
        self.write_element_start('class')

        self.compile_keyword([Tokens.CLASS])
        self.compile_class_name()
        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)

        while self.next_is_class_var_dec():
            self.compile_class_var_dec()

        while self.next_is_subroutine_dec():
            self.compile_subroutine_dec()

        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('class')

    def compile_class_var_dec(self):
        self.write_element_start('classVarDec')

        self.compile_keyword([Tokens.STATIC, Tokens.FIELD])
        self.compile_type()
        self.compile_var_name()

        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name()

        self.compile_symbol(Tokens.SEMI_COLON)

        self.write_element_end('classVarDec')

    def compile_subroutine_dec(self):
        self.write_element_start('subroutineDec')

        self.compile_keyword([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])
        if self.tokenizer.see_next() == Tokens.VOID:
            self.compile_keyword(Tokens.VOID)
        else:
            self.compile_type()
        self.compile_subroutine_name()
        self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
        self.compile_parameter_list()
        self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        self.compile_subroutine_body()

        self.write_element_end('subroutineDec')

    def compile_subroutine_name(self):
        self.compile_identifier()

    def compile_class_name(self):
        self.compile_identifier()

    def compile_var_name(self):
        self.compile_identifier()

    def compile_parameter_list(self):
        self.write_element_start('parameterList')

        if self.tokenizer.see_next() in [Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN] or isinstance(
                self.tokenizer.see_next(), Identifier):
            self.compile_type()
            self.compile_var_name()

            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_type()
                self.compile_var_name()

        self.write_element_end('parameterList')

    def compile_subroutine_body(self):
        self.write_element_start('subroutineBody')

        self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
        while self.next_is(Tokens.VAR):
            self.compile_var_dec()

        self.compile_statements()
        self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)

        self.write_element_end('subroutineBody')

    def compile_var_dec(self):
        self.write_element_start('varDec')
        self.compile_keyword(Tokens.VAR)
        self.compile_type()
        self.compile_var_name()
        while self.next_is(Tokens.COMMA):
            self.compile_symbol(Tokens.COMMA)
            self.compile_var_name()
        self.compile_symbol(Tokens.SEMI_COLON)
        self.write_element_end('varDec')

    def compile_statements(self):
        self.write_element_start('statements')

        while self.next_is_statement():
            self.compile_statement()

        self.write_element_end('statements')

    def compile_statement(self):
        if self.next_is(Tokens.LET):
            self.write_element_start('letStatement')
            self.compile_keyword(Tokens.LET)
            self.compile_var_name()
            if self.next_is(Tokens.LEFT_BOX_BRACKET):
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            self.compile_symbol(Tokens.EQUAL)
            self.compile_expression()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('letStatement')

        elif self.next_is(Tokens.IF):
            self.write_element_start('ifStatement')
            self.compile_keyword(Tokens.IF)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            if self.next_is(Tokens.ELSE):
                self.compile_keyword(Tokens.ELSE)
                self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
                self.compile_statements()
                self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.write_element_end('ifStatement')

        elif self.next_is(Tokens.WHILE):
            self.write_element_start('whileStatement')
            self.compile_keyword(Tokens.WHILE)
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
            self.compile_symbol(Tokens.LEFT_CURLY_BRACKET)
            self.compile_statements()
            self.compile_symbol(Tokens.RIGHT_CURLY_BRACKET)
            self.write_element_end('whileStatement')

        elif self.next_is(Tokens.DO):
            self.write_element_start('doStatement')
            self.compile_keyword(Tokens.DO)
            self.compile_subroutine_call()
            self.compile_symbol(Tokens.SEMI_COLON)
            self.write_element_end('doStatement')

        elif self.next_is(Tokens.RETURN):
            self.write_element_start('returnStatement')
            self.compile_keyword(Tokens.RETURN)
            if not self.next_is(Tokens.SEMI_COLON):
                self.compile_expression()
            self.compile_symbol(Tokens.SEMI_COLON)

            self.write_element_end('returnStatement')

    def compile_subroutine_call(self):
        if self.next_is(Tokens.LEFT_ROUND_BRACKET, idx=1):
            self.compile_subroutine_name()
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        else:
            self.compile_identifier()
            self.compile_symbol(Tokens.DOT)
            self.compile_subroutine_name()
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression_list()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)

    def compile_expression_list(self):
        self.write_element_start('expressionList')
        if not self.next_is(Tokens.RIGHT_ROUND_BRACKET):
            self.compile_expression()
            while self.next_is(Tokens.COMMA):
                self.compile_symbol(Tokens.COMMA)
                self.compile_expression()
        self.write_element_end('expressionList')

    def compile_expression(self):
        self.write_element_start('expression')
        self.compile_term()
        while self.next_is([
            Tokens.PLUS,
            Tokens.MINUS,
            Tokens.MULTI,
            Tokens.DIV,
            Tokens.AND,
            Tokens.PIPE,
            Tokens.LESS_THAN,
            Tokens.GREATER_THAN,
            Tokens.EQUAL]):
            self.compile_symbol([
                Tokens.PLUS,
                Tokens.MINUS,
                Tokens.MULTI,
                Tokens.DIV,
                Tokens.AND,
                Tokens.PIPE,
                Tokens.LESS_THAN,
                Tokens.GREATER_THAN,
                Tokens.EQUAL])
            self.compile_term()
        self.write_element_end('expression')

    def compile_term(self):
        self.write_element_start('term')

        if self.next_type_is(TokenType.INT_CONST):
            self.compile_integer_constant()
        elif self.next_type_is(TokenType.STRING_CONST):
            self.compile_string_constant()
        elif self.next_is([Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE]):
            self.compile_keyword([Tokens.NULL, Tokens.THIS, Tokens.TRUE, Tokens.FALSE])
        elif self.next_type_is(TokenType.IDENTIFIER):

            if self.next_is(Tokens.LEFT_BOX_BRACKET, idx=1):
                self.compile_var_name()
                self.compile_symbol(Tokens.LEFT_BOX_BRACKET)
                self.compile_expression()
                self.compile_symbol(Tokens.RIGHT_BOX_BRACKET)
            elif self.next_is([Tokens.LEFT_ROUND_BRACKET, Tokens.DOT], idx=1):
                self.compile_subroutine_call()
            else:
                self.compile_var_name()

        elif self.next_is(Tokens.LEFT_ROUND_BRACKET):
            self.compile_symbol(Tokens.LEFT_ROUND_BRACKET)
            self.compile_expression()
            self.compile_symbol(Tokens.RIGHT_ROUND_BRACKET)
        elif self.next_is([Tokens.TILDE, Tokens.MINUS]):
            self.compile_symbol([Tokens.TILDE, Tokens.MINUS])
            self.compile_term()
        else:
            self.raise_syntax_error('')
        self.write_element_end('term')

    def next_type_is(self, token_type):
        return self.tokenizer.see_next().type == token_type

    def compile_type(self):
        if self.next_is([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN]):
            self.compile_keyword([Tokens.INT, Tokens.CHAR, Tokens.BOOLEAN])
        elif isinstance(self.tokenizer.see_next(), Identifier):
            self.compile_identifier()

    def next_is_statement(self):
        return self.next_is([Tokens.LET, Tokens.IF, Tokens.WHILE, Tokens.DO, Tokens.RETURN])

    def next_is(self, tokens, idx=0):
        if type(tokens) == list:
            return self.tokenizer.see_next(idx=idx) in tokens
        else:
            return self.tokenizer.see_next(idx=idx) == tokens

    def next_is_class_var_dec(self):
        return self.next_is([Tokens.STATIC, Tokens.FIELD])

    def next_is_subroutine_dec(self):
        return self.next_is([Tokens.CONSTRUCTOR, Tokens.FUNCTION, Tokens.METHOD])

    def compile_symbol(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('symbol', self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('symbol', self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')

    def compile_keyword(self, tokens):
        self.tokenizer.advance()
        if type(tokens) == list:
            if self.tokenizer.current_token in tokens:
                self.write_element('keyword', self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')
        else:
            if self.tokenizer.current_token == tokens:
                self.write_element('keyword', self.tokenizer.current_token.token_escaped)
            else:
                self.raise_syntax_error('')

    def compile_identifier(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, Identifier):
            self.write_element('identifier', self.tokenizer.current_token.token_escaped)
        else:
            self.raise_syntax_error('')

    def compile_integer_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, IntegerConstant):
            self.write_element('integerConstant', self.tokenizer.current_token.token_escaped)
        else:
            self.raise_syntax_error('')

    def compile_string_constant(self):
        self.tokenizer.advance()
        if isinstance(self.tokenizer.current_token, StringConstant):
            self.write_element('stringConstant', self.tokenizer.current_token.token_escaped)
        else:
            self.raise_syntax_error('')

    def write_element(self, elem_name, value):
        self.wf.write('<%s> %s </%s>\n' % (elem_name, value, elem_name))

    def write_element_start(self, elem_name):
        self.wf.write('<%s>\n' % elem_name)

    def write_element_end(self, elem_name):
        self.wf.write('</%s>\n' % elem_name)

    def raise_syntax_error(self, msg):
        raise Exception('%s' % msg)