Exemple #1
0
  def create_token_file(jack_file_name):
    token_file_name = jack_file_name.replace('.jack', 'T.xml')
    token_file      = open(token_file_name, 'w')
    jack_file       = open(jack_file_name, 'rU')
    tokenizer       = JackTokenizer(jack_file)

    token_file.write('<tokens>\n')

    while tokenizer.hasMoreTokens():
      tokenizer.advance()

      if tokenizer.tokenType() is 'KEYWORD':
        token_file.write('<keyword> {} </keyword>\n'.format(tokenizer.keyWord().lower()))
      elif tokenizer.tokenType() is 'SYMBOL':
        symbol = tokenizer.symbol()

        if symbol in ['<', '>', '&']:
          symbol = Main.XML_CONVSERSIONS[symbol]

        token_file.write('<symbol> {} </symbol>\n'.format(symbol))
      elif tokenizer.tokenType() is 'IDENTIFIER':
        token_file.write('<identifier> {} </identifier>\n'.format(tokenizer.identifier()))
      elif tokenizer.tokenType() is 'INT_CONST':
        token_file.write('<integerConstant> {} </integerConstant>\n'.format(tokenizer.intVal()))
      elif tokenizer.tokenType() is 'STRING_CONST':
        token_file.write('<stringConstant> {} </stringConstant>\n'.format(tokenizer.stringVal()))

    token_file.write('</tokens>\n')
    token_file.close()

    return token_file_name
    def test_token_type(self):
        tokenizer = JackTokenizer("ArrayTest/Main.jack")
        tokenizer.advance()
        self.assertEqual(TokenType.keyword, tokenizer.token_type())
        self.assertEqual(tokenizer.keyword(), KeywordType.CLASS)
        tokenizer.advance()
        self.assertEqual(TokenType.identifier, tokenizer.token_type())
        self.assertEqual(tokenizer.identifier(), "Main")

        tokenizer.advance()
        self.assertEqual(TokenType.symbol, tokenizer.token_type())
        self.assertEqual(tokenizer.symbol(), "{")

        i = 0
        while tokenizer.has_more_tokens():
            i += 1
            tokenizer.advance()
            if i == 43:
                self.assertEqual(TokenType.integerConstant, tokenizer.token_type())
                self.assertEqual(tokenizer.intVal(), '0')
            if i == 28:
                self.assertEqual(TokenType.stringConstant, tokenizer.token_type())
                self.assertEqual(tokenizer.stringVal(), "HOW MANY NUMBERS? ")
Exemple #3
0
    def create_token_file(jack_file_name):
        token_file_name = jack_file_name.replace('.jack', 'T.xml')
        token_file = open(token_file_name, 'w')
        jack_file = open(jack_file_name, 'rU')
        tokenizer = JackTokenizer(jack_file)

        token_file.write('<tokens>\n')

        while tokenizer.hasMoreTokens():
            tokenizer.advance()

            if tokenizer.tokenType() is 'KEYWORD':
                token_file.write('<keyword> {} </keyword>\n'.format(
                    tokenizer.keyWord().lower()))
            elif tokenizer.tokenType() is 'SYMBOL':
                symbol = tokenizer.symbol()

                if symbol in ['<', '>', '&']:
                    symbol = Main.XML_CONVSERSIONS[symbol]

                token_file.write('<symbol> {} </symbol>\n'.format(symbol))
            elif tokenizer.tokenType() is 'IDENTIFIER':
                token_file.write('<identifier> {} </identifier>\n'.format(
                    tokenizer.identifier()))
            elif tokenizer.tokenType() is 'INT_CONST':
                token_file.write(
                    '<integerConstant> {} </integerConstant>\n'.format(
                        tokenizer.intVal()))
            elif tokenizer.tokenType() is 'STRING_CONST':
                token_file.write(
                    '<stringConstant> {} </stringConstant>\n'.format(
                        tokenizer.stringVal()))

        token_file.write('</tokens>\n')
        token_file.close()

        return token_file_name
Exemple #4
0
class CompilationEngine(object):
    """CompilationEngine: generates the compiler's output."""

    def __init__(self, input_file, output_file, use_text_as_input=False):
        """Creates a new compilation engine with the
        given input and output.

        The next routine called must be compile_class
        """
        self.tokenizer = JackTokenizer(input_file, use_text_as_input)
        self.output_file = output_file
        self.output = []

    def save_output_file(self):
        file = open(self.output_file, 'w')
        for line in self.output:
            file.write(line + '\n')

    def compile_class(self):
        """Compiles a complete class.
        class: 'class' className '{' classVarDec* subroutineDec* '}'
        """
        self.output.append('<class>') # output <class>
        self._handle_keyword() # 'class'
        self._handle_identifier() # className
        self._handle_symbol() # '{'

        # classVarDec*
        while self.tokenizer.peek_at_next_token() in [STATIC, FIELD]:
            self.compile_class_var_dec()

        # subroutineDec*
        while self.tokenizer.peek_at_next_token() in [CONSTRUCTOR, FUNCTION, METHOD]:
            self.compile_subroutine_dec()

        self._handle_symbol() # '}'
        self.output.append('</class>') # output </class>

    def compile_class_var_dec(self):
        """Compiles a static variable declaration,
        or a field declaration.
        classVarDec: ('static'|'field') type varName(',' varName)* ';'
        """
        self.output.append('<classVarDec>') # output <classVarDec>
        self._handle_keyword() # ('static'|'field')
        self._handle_type() # type
        self._handle_identifier() # varName

        while (self.tokenizer.peek_at_next_token() == ','):
            self._handle_symbol() # ','
            self._handle_identifier() # varName

        self._handle_symbol() # ';'
        self.output.append('</classVarDec>') # output <classVarDec>

    def compile_subroutine_dec(self):
        """Compiles a complete method, function, or constructor.
        subroutineDec: ('constructor'|'function'|'method') ('void'|type)
            subroutineName '(' parameterList ')' subroutineBody
        """
        self.output.append('<subroutineDec>')
        self._handle_keyword() # ('constructor'|'function'|'method')

        if self.tokenizer.peek_at_next_token() == VOID:
            self._handle_keyword() # 'void'
        else:
            self._handle_type() # type

        self._handle_identifier() # subroutineName
        self._handle_symbol() # '('
        self.compile_parameter_list()
        self._handle_symbol() # ')'
        self.compile_subroutine_body()
        self.output.append('</subroutineDec>')

    def compile_parameter_list(self):
        """Compiles a (possibly empty) parameter list.
        Does not handle the enclosing "()".
        parameterList: ((type varName) (',' type varName)*)?
        """
        self.output.append('<parameterList>')

        # ((type varName) (',' type varName)*)?
        if self.tokenizer.peek_at_next_token() != ')':
            self._handle_type() # type
            self._handle_identifier() # varName
            while self.tokenizer.peek_at_next_token() != ')':
                self._handle_symbol() # ','
                self._handle_type() # type
                self._handle_identifier() # varName

        self.output.append('</parameterList>')

    def compile_subroutine_body(self):
        """Compiles a subroutine's body.
        subroutineBody: '{' varDec* statements '}'
        """
        self.output.append('<subroutineBody>')
        self._handle_symbol() # '{'

        while self.tokenizer.peek_at_next_token() == VAR:
            self.compile_var_dec()

        self.compile_statements()
        self._handle_symbol() # '}'
        self.output.append('</subroutineBody>')

    def compile_var_dec(self):
        """Compiles a var declaration.
        varDec: 'var' type varName (',' varName)* ';'
        """
        self.output.append('<varDec>') # output <varDec>
        self._handle_keyword() # 'var'
        self._handle_type() # type
        self._handle_identifier() # varName

        while (self.tokenizer.peek_at_next_token() == ','):
            self._handle_symbol() # ','
            self._handle_identifier() # varName

        self._handle_symbol() # ';'
        self.output.append('</varDec>') # output <varDec>

    def compile_statements(self):
        """Compiles a sequence of statements.
        Does not handle the enclosing "{}".
        statements: statement*
        """
        self.output.append('<statements>') # output <statements>
        next_token = self.tokenizer.peek_at_next_token()
        while next_token in [LET, IF, WHILE, DO, RETURN]:
            if next_token == LET:
                self.compile_let()
            elif next_token == IF:
                self.compile_if()
            elif next_token == WHILE:
                self.compile_while()
            elif next_token == DO:
                self.compile_do()
            elif next_token == RETURN:
                self.compile_return()
            next_token = self.tokenizer.peek_at_next_token()
        self.output.append('</statements>') # output </statements>

    def compile_let(self):
        """Compiles a let statement.
        letStatement: 'let' varName('[' expression ']')? '=' expression ';'
        """
        self.output.append('<letStatement>') # output <letStatement>
        self._handle_keyword() # 'let'
        self._handle_identifier() # varName

        if self.tokenizer.peek_at_next_token() == '[':
            self._handle_symbol() # '['
            self.compile_expression() # expression
            self._handle_symbol() # ']'

        self._handle_symbol() # '='
        self.compile_expression() # expression
        self._handle_symbol() # ';'
        self.output.append('</letStatement>') # output </letStatement>

    def compile_if(self):
        """Compiles a if statement.
        ifStatement: 'if' '(' expression ')' '{' statements '}'
            ('else' '{' statements '}')?
        """
        self.output.append('<ifStatement>') # output <ifStatement>
        self._handle_keyword() # 'if'
        self._handle_symbol() # '('
        self.compile_expression() # expression
        self._handle_symbol() # ')'
        self._handle_symbol() # '{'
        self.compile_statements() # statements
        self._handle_symbol() # '}'

        if self.tokenizer.peek_at_next_token() == ELSE:
            self._handle_keyword() # 'if'
            self._handle_symbol() # '{'
            self.compile_statements() # statements
            self._handle_symbol() # '}'

        self.output.append('</ifStatement>') # output </ifStatement>

    def compile_while(self):
        """Compiles a while statement.
        whileStatement: 'while' '(' expression ')' '{' statements '}'
        """
        self.output.append('<whileStatement>') # output <whileStatement>
        self._handle_keyword() # 'while'
        self._handle_symbol() # '('
        self.compile_expression() # expression
        self._handle_symbol() # ')'
        self._handle_symbol() # '{'
        self.compile_statements() # statements
        self._handle_symbol() # '}'
        self.output.append('</whileStatement>') # output </whileStatement>

    def compile_do(self):
        """Compiles a do statement.
        doStatement: 'do' subroutineCall ';'
        """
        self.output.append('<doStatement>') # output <doStatement>
        self._handle_keyword() # 'do'
        self.compile_subroutine_call() # subroutineCall
        self._handle_symbol() # ';'
        self.output.append('</doStatement>') # output </doStatement>

    def compile_subroutine_call(self):
        """subroutineCall: subroutineName'('expressionList')'|
            (className|varName)'.'subroutineName'('expressionList')'
        """
        self._handle_identifier() # subroutineName or (className|varName)
        if self.tokenizer.peek_at_next_token() == '.':
            self._handle_symbol() # '.'
            self._handle_identifier() # subroutineName
        self._handle_symbol() # '('
        self.compile_expression_list() # expressionList
        self._handle_symbol() # ')'

    def compile_expression_list(self):
        """Compiles a (possibly empty) comma-separated list of expressions.
        expressionList: (expression (','expression)* )?
        """
        self.output.append('<expressionList>') # output <expressionList>
        if self.tokenizer.peek_at_next_token() != ')':
            self.compile_expression() # expression
            while self.tokenizer.peek_at_next_token() != ')':
                self._handle_symbol() # ','
                self.compile_expression() # type
        self.output.append('</expressionList>') # output </expressionList>

    def compile_return(self):
        """Compiles a return statement.
        returnStatement: 'return' expression? ';'
        """
        self.output.append('<returnStatement>') # output <returnStatement>
        self._handle_keyword() # 'return'

        if (self.tokenizer.peek_at_next_token() != ';'):
            self.compile_expression()

        self._handle_symbol() # ';'
        self.output.append('</returnStatement>') # output </returnStatement>

    def compile_expression(self):
        """Compiles an expression.
        expression: term (op term)*
        """
        self.output.append('<expression>') # output <expression>
        self.compile_term()

        while (self.tokenizer.peek_at_next_token() in OPS):
            self._handle_symbol() # op
            self.compile_term() # term

        self.output.append('</expression>') # output </expression>

    def compile_term(self):
        """Compiles a term. If the current token is an identifier,
        the routine must distinguish between a variable, an array entry,
        or a subroutine call. A single look-ahead token, which may
        be one of "[", "(", or ".", suffices to distinguish between
        the possibilities. Any other token is not part of this term
        and should not be advanced over.

        term: integerConstant|stringConstant|keywordConstant|varName|
            varName'['expression']'|subroutineCall|'('expression')'|unaryOp term
        """
        self.output.append('<term>') # output <term>
        self.tokenizer.advance()
        token_type = self.tokenizer.token_type()
        if token_type == INT_CONST:
            self.output.append("<integerConstant> {} </integerConstant>".format(self.tokenizer.int_val()))
        elif token_type == STRING_CONST:
            self.output.append("<stringConstant> {} </stringConstant>".format(self.tokenizer.string_val()))
        elif token_type == KEYWORD:
            self.output.append("<keyword> {} </keyword>".format(self.tokenizer.keyword()))
        elif token_type == IDENTIFIER: # varName|varName'['expression']'|subroutineCall
            self.output.append("<identifier> {} </identifier>".format(self.tokenizer.identifier()))
            next_token = self.tokenizer.peek_at_next_token()
            if next_token == '[': # varName'['expression']'
                self._handle_symbol() # '['
                self.compile_expression() # expression
                self._handle_symbol() # ']'
            elif next_token == '(': # subroutineCall
                self._handle_symbol() # '('
                self.compile_expression_list() # expressionList
                self._handle_symbol() # ')'
            elif next_token == '.': # subroutineCall
                self._handle_symbol() # '.'
                self._handle_identifier() # subroutineName
                self._handle_symbol() # '('
                self.compile_expression_list() # expressionList
                self._handle_symbol() # ')'
        elif self.tokenizer.current_token == '(': # '('expression')'
            self.output.append("<symbol> {} </symbol>".format(self.tokenizer.symbol())) # '('
            self.compile_expression() # expression
            self._handle_symbol() # ')'
        elif self.tokenizer.current_token in ['-', '~']: # unaryOp term
            self.output.append("<symbol> {} </symbol>".format(self.tokenizer.symbol()))
            self.compile_term()
        else:
            raise Exception("Token '{}' not matched to any term".format(self.tokenizer.current_token))

        self.output.append('</term>') # output </term>

    def _handle_type(self):
        """ type: 'int'|'char'|'boolean'|className"""
        self.tokenizer.advance()
        if self.tokenizer.current_token in [INT, CHAR, BOOLEAN]:
            self.output.append("<keyword> {} </keyword>".format(self.tokenizer.keyword()))
        else:
            self.output.append("<identifier> {} </identifier>".format(self.tokenizer.identifier()))

    def _handle_keyword(self):
        self.tokenizer.advance()
        self.output.append("<keyword> {} </keyword>".format(self.tokenizer.keyword()))

    def _handle_identifier(self):
        self.tokenizer.advance()
        self.output.append("<identifier> {} </identifier>".format(self.tokenizer.identifier()))

    def _handle_symbol(self):
        self.tokenizer.advance()
        self.output.append("<symbol> {} </symbol>".format(self.tokenizer.symbol()))

    def _handle_int_const(self):
        self.tokenizer.advance()
        self.output.append("<integerConstant> {} </integerConstant>".format(self.tokenizer.int_val()))

    def _handle_string_const(self):
        self.tokenizer.advance()
        self.output.append("<stringConstant> {} </stringConstant>".format(self.tokenizer.string_val()))
class CompilationEngine:

    def __init__(self, inputFile, outputFile):
        self.tokenizer = JackTokenizer(inputFile)
        self.vmWriter = VMWriter(outputFile)
        self.symbolTable = SymbolTable()
        self.classname = ""
        self.CompileClass()
        self.whilecounter = 0
        self.ifcounter = 0

    def CompileClass(self):
        #classname
        self.tokenizer.advance()
        self.classname = self.tokenizer.identifier()
        self.tokenizer.advance()
        # ignore {
        self.tokenizer.advance()

        while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field":
            self.CompileClassVarDec()

        while self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method":
            self.CompileSubroutine()

        #ignore }
        self.tokenizer.advance()



    def CompileClassVarDec(self):

        kind = self.tokenizer.keyWord()
        self.tokenizer.advance()
        type = self.compileType()
        name = self.tokenizer.identifier()
        self.symbolTable.define(name, type, kind)
        self.tokenizer.advance()

        # add the rest of var names, if there are
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            name = self.tokenizer.identifier()
            self.symbolTable.define(name, type, kind)
            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()

    def CompileSubroutine(self):

        self.symbolTable.startSubroutine()
        self.ifcounter = 0
        self.whilecounter = 0
        # constructor | function | method
        functype = self.tokenizer.keyWord()
        self.tokenizer.advance()

        if functype == "method":
            self.symbolTable.define("this", self.classname, "arg")

        self.tokenizer.advance()

        subrotineName = self.classname + "." + self.tokenizer.identifier()
        self.tokenizer.advance()

        # ( parameterList )
        self.tokenizer.advance()
        self.compileParameterList()
        self.tokenizer.advance()

        # subrotineBody
        # ignore {
        self.tokenizer.advance()
        # varDec*
        while self.tokenizer.keyWord() == "var":
            self.compileVarDec()

        self.vmWriter.writeFunction(subrotineName, self.symbolTable.varCount("var"))
        # allocate memory for constructor
        # if functype == "constructor":
        #     self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
        #     self.vmWriter.writeCall("Memory.alloc", "1")

        if functype == "constructor" or functype == "method":
            if functype == "constructor":
                self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
                self.vmWriter.writeCall("Memory.alloc", "1")
            else:
                self.vmWriter.writePush("argument", "0")
            self.vmWriter.writePop("pointer", "0")


        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()

    def compileParameterList(self):
        # if not )
        if self.tokenizer.tokenType() != 1:

            # type varName
            argtype = self.compileType()
            argname = self.tokenizer.identifier()
            self.symbolTable.define(argname, argtype, "arg")
            self.tokenizer.advance()

            # (, type varName)*
            while self.tokenizer.symbol() == ",":
                self.tokenizer.advance()
                argtype = self.compileType()
                argname = self.tokenizer.identifier()
                self.symbolTable.define(argname, argtype, "arg")
                self.tokenizer.advance()

    def compileVarDec(self):

        # var
        self.tokenizer.advance()

        # type
        type = self.compileType()

        # varName
        varname = self.tokenizer.identifier()
        self.symbolTable.define(varname, type, "var")
        self.tokenizer.advance()

        # (, varName)*
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            varname = self.tokenizer.identifier()
            self.symbolTable.define(varname, type, "var")

            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()


    def compileStatements(self):

        while self.tokenizer.tokenType() == 0:
            if self.tokenizer.keyWord() == "let":
                self.compileLet()
            elif self.tokenizer.keyWord() == "if":
                self.compileIf()
            elif self.tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self.tokenizer.keyWord() == "do":
                self.compileDo()
            elif self.tokenizer.keyWord() == "return":
                self.compileReturn()


    def compileDo(self):

        self.tokenizer.advance()
        self.compileSubRoutineCall()
        self.vmWriter.writePop("temp", "0")

        # ignore ;
        self.tokenizer.advance()

    def compileLet(self):

        # let
        self.tokenizer.advance()
        # varName
        varname = self.tokenizer.identifier()
        varkind = self.symbolTable.kindOf(varname)

        self.tokenizer.advance()

        # ([ expression ])?
        if self.tokenizer.symbol() == "[":
            self.tokenizer.advance()
            self.CompileExpression()
            if varkind == "field":
                self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
            self.vmWriter.writeArithmetic("add")

            #ignore ]
            self.tokenizer.advance()
            #ignore =
            self.tokenizer.advance()
            self.CompileExpression()
            self.vmWriter.writePop("temp", "0")

            # that
            self.vmWriter.writePop("pointer", "1")
            self.vmWriter.writePush("temp", "0")
            self.vmWriter.writePop("that", "0")
            self.tokenizer.advance()


        else:

            # ignore =
            self.tokenizer.advance()

            # expression
            self.CompileExpression()

            if varkind == "field":
                self.vmWriter.writePop("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePop("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePop("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePop("static", self.symbolTable.indexOf(varname))

            #ignore ;
            self.tokenizer.advance()


    def compileWhile(self):

        # while
        self.tokenizer.advance()

        # ( expression )
        self.tokenizer.advance()
        whileindex = self.whilecounter
        self.whilecounter += 1
        self.vmWriter.writeLabel("WHILE_EXP" + str(whileindex))
        self.CompileExpression()
        self.vmWriter.writeArithmetic("not")
        self.vmWriter.writeIf("WHILE_END" + str(whileindex))
        self.tokenizer.advance()

        # ignore {
        self.tokenizer.advance()

        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()
        self.vmWriter.writeGoto("WHILE_EXP" + str(whileindex))
        self.vmWriter.writeLabel("WHILE_END" + str(whileindex))

    def compileReturn(self):

        # return
        self.tokenizer.advance()

        # expression?
        if self.isTerm():
            self.CompileExpression()
            self.vmWriter.writeReturn()
        else:
            self.vmWriter.writePush("constant", "0")
            self.vmWriter.writeReturn()

        # ignore;
        self.tokenizer.advance()


    def compileIf(self):
        #if
        self.tokenizer.advance()
        # ( expression )
        self.tokenizer.advance()
        self.CompileExpression()
        ifindex = self.ifcounter
        self.ifcounter += 1
        self.vmWriter.writeIf("IF_TRUE" + str(ifindex))
        self.vmWriter.writeGoto("IF_FALSE" + str(ifindex))
        self.vmWriter.writeLabel("IF_TRUE" + str(ifindex))
        self.tokenizer.advance()

        # { statements }
        self.tokenizer.advance()
        self.compileStatements()
        self.tokenizer.advance()

        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() == "else":
            # else
            self.vmWriter.writeGoto("IF_END" + str(ifindex))
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))

            self.tokenizer.advance()

            # { statements }
            self.tokenizer.advance()
            self.compileStatements()
            self.tokenizer.advance()

            self.vmWriter.writeLabel("IF_END" + str(ifindex))

        else:
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))


    def CompileExpression(self):
        #term
        self.CompileTerm()
        # (op term)*
        op = self.tokenizer.symbol()
        while self.tokenizer.tokenType() == 1 and op in operators:
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "=":
                self.vmWriter.writeArithmetic("eq")
            elif op == "+":
                self.vmWriter.writeArithmetic("add")
            elif op == "-":
                self.vmWriter.writeArithmetic("sub")
            elif op == "*":
                self.vmWriter.writeCall("Math.multiply", "2")
            elif op == "/":
                self.vmWriter.writeCall("Math.divide", "2")
            elif op == "&amp;":
                self.vmWriter.writeArithmetic("and")
            elif op == "|":
                self.vmWriter.writeArithmetic("or")
            elif op == "&lt;":
                self.vmWriter.writeArithmetic("lt")
            elif op == "&gt;":
                self.vmWriter.writeArithmetic("gt")
            op = self.tokenizer.symbol()

    def CompileTerm(self):
        if self.tokenizer.tokenType() == 3:
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 4:
            conststring = self.tokenizer.stringVal()
            self.vmWriter.writePush("constant", str(len(conststring)))
            self.vmWriter.writeCall("String.new", "1")
            for i in range(len(conststring)):
                self.vmWriter.writePush("constant", str(ord(conststring[i])))
                self.vmWriter.writeCall("String.appendChar", "2")

            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 0:
            keywordconst = self.tokenizer.keyWord()
            if keywordconst == "true":
                self.vmWriter.writePush("constant", "0")
                self.vmWriter.writeArithmetic("not")
            elif keywordconst == "false" or keywordconst == "null":
                self.vmWriter.writePush("constant", "0")
            elif keywordconst == "this":
                self.vmWriter.writePush("pointer", "0")
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 2:
            # varName [ expression]
            if self.tokenizer.tokens[self.tokenizer.currentToken +1] == '[':
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                self.tokenizer.advance()
                # [ expression ]
                self.tokenizer.advance()
                self.CompileExpression()
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.vmWriter.writeArithmetic("add")
                # that
                self.vmWriter.writePop("pointer", "1")
                self.vmWriter.writePush("that", "0")
                self.tokenizer.advance()
            # subrutine call
            elif self.tokenizer.tokens[self.tokenizer.currentToken +1] == '(' or self.tokenizer.tokens[self.tokenizer.currentToken +1] == '.':
                self.compileSubRoutineCall()
            # varname
            else:
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(':
            # ( expression )
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
        else:
            #unary!!!
            op = self.tokenizer.symbol()
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "-":
                self.vmWriter.writeArithmetic("neg")
            elif op == "~":
                self.vmWriter.writeArithmetic("not")

    def compileSubRoutineCall(self):
        # subroutineName  | (className | varName)
        identifier = self.tokenizer.identifier()
        self.tokenizer.advance()
        #no "." only name
        if self.tokenizer.symbol() == '(':
            # ( expressionList ) -- subroutine of type method
            self.tokenizer.advance()
            self.vmWriter.writePush("pointer", "0")
            argnum = self.CompileExpressionList()
            self.vmWriter.writeCall(self.classname + "." + identifier, str(argnum +1))

            self.tokenizer.advance()
        else:
            # . -- class.function or var.method
            self.tokenizer.advance()
            # subroutineName
            subname = self.tokenizer.identifier()
            self.tokenizer.advance()

            self.tokenizer.advance()
            if identifier in self.symbolTable.classtable or identifier in self.symbolTable.subroutinetable:
                # varname!!!
                if identifier in self.symbolTable.subroutinetable:
                    if self.symbolTable.kindOf(identifier) == "var":
                        self.vmWriter.writePush("local", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("argument", self.symbolTable.indexOf(identifier))
                else:
                    if self.symbolTable.kindOf(identifier) == "static":
                        self.vmWriter.writePush("static", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("this", self.symbolTable.indexOf(identifier))


                argnum = self.CompileExpressionList()
                identifierclass = self.symbolTable.typeOf(identifier)
                self.vmWriter.writeCall(identifierclass + "." + subname, str(argnum +1))
            else:
                argnum = self.CompileExpressionList()
                self.vmWriter.writeCall(identifier + "." + subname, str(argnum))
            self.tokenizer.advance()

    def CompileExpressionList(self):
        # (expression
        i = 0
        if self.isTerm():
            i += 1
            # (, expression)
            self.CompileExpression()
            while self.tokenizer.symbol() == ',':
                i+= 1
                self.tokenizer.advance()
                self.CompileExpression()
        return i

    def isTerm(self):
        if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4:
            return True
        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() in keyword_const:
            return True
        if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(' :
            return True
        if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'):
            return True
        if self.tokenizer.tokenType() == 2:
            return True
        return False

    def compileType(self):
        if self.tokenizer.tokenType() == 0:
            typen = self.tokenizer.keyWord()
        else:
            typen = self.tokenizer.identifier()
        self.tokenizer.advance()
        return typen
class CompilationEngine():
    """

    """
    def __init__(self, input_file, output_file):
        """
        Creates a new compilation engine with the given input and output. The next
        routine called must be compile_class()
        :param input_file:
        :param output_file:
        """
        self.tokenizer = JackTokenizer(input_file)
        self.num_spaces = 0
        self.buffer = ""
        with open(output_file, 'w') as self.output:
            while self.tokenizer.has_more_tokens():
                self.tokenizer.advance()
                assert self.tokenizer.token_type() == Token_Types.keyword
                if self.tokenizer.keyWord() == 'class':
                    self.compile_class()
                else:
                    raise KeyError(
                        "Received a token that does not fit the beginning of a "
                        "module. " + self.tokenizer.keyWord() + " in " +
                        input_file)

    def compile_class(self):
        """
        Compiles a complete class
        :return:
        """
        self.write('class', delim=True)
        self.num_spaces += 1
        self.write_terminal(self.tokenizer.token_type().value,
                            self.tokenizer.keyWord())
        self.eat('class')

        t_type, class_name = self.tokenizer.token_type(
        ), self.tokenizer.identifier()
        self.write_terminal(t_type.value, class_name)

        self.tokenizer.advance()

        t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol()
        self.write_terminal(t_type.value, symbol)
        self.eat('{')

        t_type = self.tokenizer.token_type()
        while t_type != Token_Types.symbol:
            operation = self.tokenizer.keyWord()
            if operation in ['static', 'field']:
                self.compile_class_var_dec()
            elif operation in ROUTINES:
                self.compile_subroutine()
            else:
                raise KeyError(
                    "Found statement that does not fit class declaration. ",
                    operation)

            # self.tokenizer.advance()

            t_type = self.tokenizer.token_type()

        self.write_terminal(t_type.value, self.tokenizer.symbol())
        self.num_spaces -= 1
        self.write('class', delim=True, end=True)

    def eat(self, string):
        """
        If the given string is the same as current token (only if it keyword or symbol) the
        tokenizer of the object will be advanced, otherwise an exception will be raised.
        :param string: the expected string.
        :raise: the current token is not the expected string.
        """
        type = self.tokenizer.token_type()
        value = "not keyword and not symbol"
        if type == Token_Types.keyword:
            value = self.tokenizer.keyWord()
        elif type == Token_Types.symbol:
            value = self.tokenizer.symbol()

        if value != string:
            raise Exception("Received '" + value +
                            "' which is not the expected string: '" + string +
                            "'")
        # assert value == string
        self.tokenizer.advance()

    def compile_class_var_dec(self):
        """
        Compiles a static declaration or a field declaration.
        """
        self.write("classVarDec", True)
        self.num_spaces += 1

        # First word is static or field.

        # if self.tokenizer.token_type() != Token_Types.keyword:
        #     raise Exception("Cant compile class variable declaration without keyword token.")

        # should i check before if i can get a keyword?
        var_sort = self.tokenizer.keyWord()
        if var_sort not in ["static", "field"]:
            raise Exception(
                "Cant compile class variable declaration without static of field."
            )
        self.write("<keyword> " + var_sort + " </keyword>")
        self.tokenizer.advance()

        # Second word is type.
        if self.tokenizer.token_type() == Token_Types.keyword:
            var_type = self.tokenizer.keyWord()
            if var_type not in ["int", "char", "boolean"]:
                raise Exception(
                    "Cant compile class variable declaration with invalid keyword type."
                )
            self.write("<keyword> " + var_type + " </keyword>")
            self.tokenizer.advance()
        elif self.tokenizer.token_type() == Token_Types.identifier:
            self.write("<identifier> " + self.tokenizer.identifier() +
                       " </identifier>")
            self.tokenizer.advance()
        else:
            raise Exception(
                "Cant compile class variable declaration with invalid identifier type."
            )

        # Third and so on, are variables names.
        # if self.tokenizer.token_type() != Token_Types.identifier:
        #     raise Exception("Cant compile class variable declaration without varName identifier.")
        # assert self.tokenizer.token_type() == Token_Types.identifier
        self.write("<identifier> " + self.tokenizer.identifier() +
                   " </identifier>")
        self.tokenizer.advance()
        self.possible_varName()

        # It will always end with ';'
        self.eat(';')
        self.write("<symbol> ; </symbol>")

        self.num_spaces -= 1
        self.write("classVarDec", True, True)

    def possible_varName(self):
        """
        Compile 0 or more variable names, after an existing variable name.
        """
        try:
            self.eat(',')
        except:
            # There is no varName
            return
        # There is an varName
        self.write("<symbol> , </symbol>")
        # if self.tokenizer.token_type() != Token_Types.identifier:
        #     raise Exception("Cant compile (class or not) variable declaration without varName" +
        #                     " identifier after ',' .")
        self.write("<identifier> " + self.tokenizer.identifier() +
                   " </identifier>")
        self.tokenizer.advance()
        self.possible_varName()

    def compile_subroutine(self):
        """
        Compiles a complete method, function or constructor
        :return:
        """
        self.write('subroutineDec', delim=True)
        self.num_spaces += 1
        self.write_terminal(self.tokenizer.token_type().value,
                            self.tokenizer.keyWord())

        # self.eat('function' | 'method' | 'constructor')
        self.tokenizer.advance()

        t_type = self.tokenizer.token_type()
        if t_type == Token_Types.keyword:
            func_type = self.tokenizer.keyWord()
        else:
            func_type = self.tokenizer.identifier()
        self.write_terminal(t_type.value, func_type)

        # self.eat('void' | some other type)
        self.tokenizer.advance()

        t_type, func_name = self.tokenizer.token_type(
        ), self.tokenizer.identifier()
        self.write_terminal(t_type.value, func_name)

        self.tokenizer.advance()

        t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol()
        self.write_terminal(t_type.value, symbol)
        self.eat('(')

        self.compile_param_list()

        t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol()
        self.write_terminal(t_type.value, symbol)
        self.eat(')')

        self.write("subroutineBody", delim=True)

        self.num_spaces += 1

        t_type, symbol = self.tokenizer.token_type(), self.tokenizer.symbol()
        self.write_terminal(t_type.value, symbol)
        self.eat('{')

        t_type = self.tokenizer.token_type()
        while t_type != Token_Types.symbol:
            token = self.tokenizer.keyWord()
            if token == 'var':
                self.compile_var_dec()
            elif token in STATEMENTS:
                self.compile_statements()
            else:
                raise KeyError("an unknown step inside a subroutine")
            # self.tokenizer.advance()
            t_type = self.tokenizer.token_type()

        self.write_terminal(t_type.value, self.tokenizer.symbol())
        self.eat('}')

        self.num_spaces -= 1

        self.write("subroutineBody", delim=True, end=True)

        self.num_spaces -= 1
        self.write('subroutineDec', delim=True, end=True)

    def compile_param_list(self):
        """
        Compiles a parameter list, which may be empty, not including the "()"
        :return:
        """
        self.write('parameterList', delim=True)
        self.num_spaces += 1

        t_type = self.tokenizer.token_type()
        finished = t_type == Token_Types.symbol and self.tokenizer.symbol(
        ) == ")"
        while not finished:
            # Recognized type
            if t_type == Token_Types.keyword:
                token = self.tokenizer.keyWord()
            elif t_type == Token_Types.identifier:
                token = self.tokenizer.identifier()
            else:
                raise KeyError("Got some weird type in paramlist: " +
                               t_type.value)

            # Write var type
            self.write_terminal(t_type.value, token)

            self.tokenizer.advance()

            # Write var name
            t_type, token = self.tokenizer.token_type(
            ), self.tokenizer.identifier()
            self.write_terminal(t_type.value, token)

            self.tokenizer.advance()

            t_type, symbol = self.tokenizer.token_type(
            ), self.tokenizer.symbol()
            if symbol == ')':
                finished = True
            else:
                self.eat(',')
                self.write_terminal(t_type.value, symbol)
                t_type = self.tokenizer.token_type()

        self.num_spaces -= 1
        self.write('parameterList', delim=True, end=True)

    def compile_var_dec(self):
        """
        Compiles a var declaration
        :return:
        """
        self.write("varDec", True)
        self.num_spaces += 1

        # First word is valid.
        self.eat('var')
        self.write("<keyword> var </keyword>")

        # Second word is type.
        if self.tokenizer.token_type() == Token_Types.keyword:
            var_type = self.tokenizer.keyWord()
            if var_type not in ["int", "char", "boolean"]:
                raise Exception(
                    "Cant compile variable declaration with invalid keyword type."
                )
            self.write("<keyword> " + var_type + " </keyword>")
            self.tokenizer.advance()
        elif self.tokenizer.token_type() == Token_Types.identifier:
            self.write("<identifier> " + self.tokenizer.identifier() +
                       " </identifier>")
            self.tokenizer.advance()
        else:
            raise Exception(
                "Cant compile variable declaration with invalid identifier type."
            )

        # Third and so on, are variables names.
        # if self.tokenizer.token_type() != Token_Types.identifier:
        #     raise Exception("Cant compile variable declaration without varName identifier.")
        self.write("<identifier> " + self.tokenizer.identifier() +
                   " </identifier>")
        self.tokenizer.advance()
        self.possible_varName()

        # It will always end with ';'
        self.eat(';')
        self.write("<symbol> ; </symbol>")

        self.num_spaces -= 1
        self.write("varDec", True, True)

    def compile_statements(self):
        """
        Compile a sequence of 0 or more statements, not including the "{}".
        """
        # if self.tokenizer.token_type() != Token_Types.keyword:
        #     return
        #     # raise Exception("Can't use compile_statement if the current token isn't a keyword.")
        # statement = self.tokenizer.keyWord()
        # if statement not in ['let', 'if', 'while', 'do', 'return']:
        #     return
        self.write("statements", True)
        self.num_spaces += 1

        self.possible_single_statement()

        self.num_spaces -= 1
        self.write("statements", True, True)

    def possible_single_statement(self):
        """
        Compile 0 or more single statements..
        """
        if (self.tokenizer.token_type() == Token_Types.keyword
                and self.tokenizer.keyWord() in STATEMENTS):

            # if self.tokenizer.keyWord() in STATEMENTS:
            statement = self.tokenizer.keyWord()
            self.write(statement + "Statement", True)
            if statement == 'let':
                self.compile_let()
            elif statement == 'if':
                self.compile_if()
            elif statement == 'while':
                self.compile_while()
            elif statement == 'do':
                self.compile_do()
            elif statement == 'return':
                self.compile_return()
            # else:
            #     raise Exception("Invalid statement.")
            self.write(statement + "Statement", True, True)
            self.possible_single_statement()

    def compile_do(self):
        """
        Compile do statement.
        :return:
        """
        self.eat('do')
        self.num_spaces += 1
        self.write("<keyword> do </keyword>")

        # is the check is necessary?  probably not..
        # if type != Token_Types.identifier:
        #     raise Exception()
        self.write("<identifier> " + self.tokenizer.identifier() +
                   " </identifier>")
        self.tokenizer.advance()
        self.subroutineCall_continue()

        self.eat(';')
        self.write("<symbol> ; </symbol>")
        self.num_spaces -= 1

    def compile_let(self):
        """
        Compile let statement.
        """
        self.eat('let')
        self.num_spaces += 1
        self.write("<keyword> let </keyword>")

        # self.compile_var_dec()
        # self.write("<identifier> " + self.tokenizer.identifier() + " </identifier>")
        self.write_terminal("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        self.possible_array()

        self.eat('=')
        self.write("<symbol> = </symbol>")

        self.compile_expression()

        self.eat(';')
        self.write("<symbol> ; </symbol>")
        self.num_spaces -= 1
        # self.write("</letStatement>")

    def possible_array(self):
        """
        Compile 0 or 1 array.
        """
        try:
            self.eat('[')
        except:
            # There is no array
            return
        # There is an array
        self.write("<symbol> [ </symbol>")
        self.compile_expression()
        self.eat(']')
        self.write("<symbol> ] </symbol>")

    def compile_while(self):
        """
        Compile while statement.
        """
        self.eat('while')
        # self.write("<whileStatement>")
        self.num_spaces += 1
        self.write("<keyword> while </keyword>")

        self.eat('(')
        self.write("<symbol> ( </symbol>")
        self.compile_expression()
        self.eat(')')
        self.write("<symbol> ) </symbol>")

        self.eat('{')
        self.write("<symbol> { </symbol>")
        self.compile_statements()
        self.eat('}')
        self.write("<symbol> } </symbol>")

        self.num_spaces -= 1
        # self.write("</whileStatement>")

    def compile_return(self):
        """
        Compile return statement.
        """
        self.eat('return')
        self.num_spaces += 1
        self.write("<keyword> return </keyword>")

        try:
            self.eat(';')
        except:  # would it work?
            self.compile_expression()
            self.eat(';')

        self.write("<symbol> ; </symbol>")
        self.num_spaces -= 1

    def compile_if(self):
        """
        Compile if statement.
        """
        self.eat('if')
        # self.write("<ifStatement>")
        self.num_spaces += 1
        self.write("<keyword> if </keyword>")

        self.eat('(')
        self.write("<symbol> ( </symbol>")
        self.compile_expression()
        self.eat(')')
        self.write("<symbol> ) </symbol>")

        self.eat('{')
        self.write("<symbol> { </symbol>")
        self.compile_statements()
        self.eat('}')
        self.write("<symbol> } </symbol>")
        self.possible_else()

        self.num_spaces -= 1
        # self.write("</ifStatement>" + END_LINE)

    def possible_else(self):
        """
        Compile 0 or 1 else sections.
        """
        try:
            self.eat('else')
        except:
            # There is no else so we can return
            return

        # There is an else, so we handle it properly
        self.write("<keyword> else </keyword>")

        self.eat('{')
        self.write("<symbol> { </symbol>")
        self.compile_statements()
        self.eat('}')
        self.write("<symbol> } </symbol>")

    def compile_expression(self):
        """
        Compile an expression.
        :return:
        """
        self.buffer += self.num_spaces * SPACE + "<expression>\n"
        self.num_spaces += 1
        try:
            self.compile_term()
            self.possible_op_term()
            self.num_spaces -= 1
            self.write("expression", True, True)
        except:
            self.cleanbuffer()

    def subroutineCall_continue(self):
        """
        After an identifier there can be a '.' or '(', otherwise it not function call
        (subroutineCall).
        :return:
        """
        # should i check every time if it's type symbol?
        symbol = self.tokenizer.symbol()
        if symbol == '(':
            self.eat('(')
            self.write("<symbol> ( </symbol>")
            self.compile_expression_list()
            self.eat(')')
            self.write("<symbol> ) </symbol>")

        elif symbol == '.':
            self.eat('.')
            self.write("<symbol> . </symbol>")

            self.write("<identifier> " + self.tokenizer.identifier() +
                       " </identifier>")
            self.tokenizer.advance()

            self.eat('(')
            self.write("<symbol> ( </symbol>")
            self.compile_expression_list()
            self.eat(')')
            self.write("<symbol> ) </symbol>")

        else:
            raise Exception(
                "If there is a symbol in the subroutineCall it have to be . or (."
            )

    def compile_term(self):
        """
        Compiles a temp. This routine is faced with a slight difficulty when trying to
        decide between some of the alternative parsing rules. Specifically,
        if the current token is an identifier, the routine must distinguish between a
        variable, an array entry, and a subroutine call. A single look-ahead token,
        which may be one of "[", "(", or "." suffices to distiguish between the three
        possibilities. Any other token is not part of this term and should not be
        advanced over.
        :return:
        """
        self.buffer += SPACE * self.num_spaces + "<term>\n"
        self.num_spaces += 1

        type = self.tokenizer.token_type()
        # maybe i should divide it for int and string
        # If the token is a string_const or int_const
        if type in [Token_Types.string_const, Token_Types.int_const]:
            value = self.tokenizer.intVal(
            ) if type == Token_Types.int_const else self.tokenizer.stringVal()
            self.write("<" + type.value + "> " + value + " </" + type.value +
                       ">",
                       use_buffer=True)
            self.tokenizer.advance()

        # If the token is a keyword
        elif type == Token_Types.keyword:
            if self.tokenizer.keyWord() in KEY_TERMS:
                self.write("<" + type.value + "> " + self.tokenizer.keyWord() +
                           " </" + type.value + ">",
                           use_buffer=True)
                self.tokenizer.advance()
            else:
                self.cleanbuffer()
                raise Exception()

        # If the token is an identifier
        elif type == Token_Types.identifier:
            # value = self.tokenizer.identifier()
            self.write("<identifier> " + self.tokenizer.identifier() +
                       " </identifier>",
                       use_buffer=True)
            self.tokenizer.advance()
            self.possible_identifier_continue()

        # If the token is an symbol
        elif type == Token_Types.symbol:

            if self.tokenizer.symbol() == '(':
                self.eat('(')
                self.write("<symbol> ( </symbol>", use_buffer=True)
                self.compile_expression()
                self.eat(')')
                self.write("<symbol> ) </symbol>")
            elif self.tokenizer.symbol() in ["-", "~"]:
                self.write("<symbol> " + self.tokenizer.symbol() +
                           " </symbol>",
                           use_buffer=True)
                self.eat(self.tokenizer.symbol())
                # self.write("<symbol> " + self.tokenizer.symbol() + " </symbol>")
                self.compile_term()
            else:
                self.cleanbuffer()
                raise Exception()

        else:
            raise Exception("Invalid token for creating term.")

        self.num_spaces -= 1
        self.write("term", True, True)

    def possible_identifier_continue(self):
        """
        In a term if identifier continues with
        - '[' - it's a call of an array
        - '.' or '('  - it's part of subroutineCall (function call)
        - nothing - it's a variable
        This functions handle every one of this situations after the original identifier was
        handled.
        """
        # try:
        #     self.eat("[")
        # except:
        # if not self.tokenizer.has_more_tokens(): # already doing it by itself
        #     raise Exception()
        if self.tokenizer.token_type() == Token_Types.symbol:
            if self.tokenizer.symbol() == '[':
                self.eat('[')
                self.write("<symbol> [ </symbol>")
                self.compile_expression()
                self.eat(']')
                self.write("<symbol> ] </symbol>")
                return

            try:
                self.subroutineCall_continue()
            except Exception:
                # raise Exception("If there is a symbol in the token it have to be . or [ or (.")
                return

    def possible_op_term(self):
        """
        If the next token is a suitable operation symbol than compile more terms,
        otherwise return nothing.
        """
        # There is no op term
        if self.tokenizer.token_type() != Token_Types.symbol:
            # raise Exception("After term can be only nothing or (op term)*.")
            return
        op = self.tokenizer.symbol()

        if op not in OPERANDS:
            # raise Exception("Invalid operator use in term.")
            return  # should it be like this?

        try:
            # if op in SPECIAL_SYMBOL.keys():
            #     op = SPECIAL_SYMBOL[op]
            self.eat(op)
        except Exception:
            return
        # There is op term
        self.write("<symbol> " + op + " </symbol>")
        self.compile_term()

        self.possible_op_term()

    def compile_expression_list(self):
        """
        Compile a comma-separated list of expressions, which may be empty.
        """
        self.write("expressionList", True)
        self.num_spaces += 1

        try:
            self.compile_expression()
        except Exception:
            self.num_spaces -= 1
            self.write("expressionList", True, True)
            return

        self.possible_more_expression()
        self.num_spaces -= 1
        self.write("expressionList", True, True)

    def possible_more_expression(self):
        """
        If the next token is a ',' than compile more expressions,
        otherwise return nothing.
        """
        try:
            self.eat(',')
        except Exception:
            return
        self.write("<symbol> , </symbol>")
        self.compile_expression()

        self.possible_more_expression()

    def write(self,
              statement,
              delim=False,
              end=False,
              new_line=True,
              no_space=False,
              use_buffer=False):
        """

        :param statement:
        :return:
        """
        if use_buffer:
            self.output.write(self.buffer)
            self.buffer = ""
        if end:
            statement = "/" + statement
        if delim:
            statement = "<" + statement + ">"
        if not no_space:
            statement = SPACE * self.num_spaces + statement
        if new_line:
            statement += END_LINE

        self.output.write(statement)

        # if delim:
        #     self.output.write(TAB * self.num_spaces + "<" + statement + ">")
        # else:

        # if new_line:
        #     self.output.write(END_LINE)

    def write_terminal(self, t_type, arg):
        """

        :param t_type:
        :param arg:
        :return:
        """
        self.write(t_type, delim=True, new_line=False, no_space=False)
        self.write(" " + arg + " ", delim=False, new_line=False, no_space=True)
        self.write(t_type, delim=True, new_line=True, end=True, no_space=True)

    def cleanbuffer(self):
        self.num_spaces -= 1
        self.buffer = ""
class CompilationEngine(object):
    def __init__(self, inputfile, outputfile):
        self._inputfile = inputfile
        self._outputfile = outputfile
        self._tokenizer: JackTokenizer = None
        self._cur_root = []
        self._root = None
        self._init()

    def _init(self):
        self._inputbuf = self.create_buffer(self._inputfile)
        self._outputbuf = self.create_buffer(self._outputfile, mode="w+")
        self._tokenizer = JackTokenizer(self._inputbuf)

    def create_buffer(self, fn, mode='r'):

        if isinstance(fn, str) or isinstance(fn, unicode):
            return open(fn, mode)
        elif isinstance(fn, file) or isinstance(fn, IOBase):
            return fn
        else:
            raise ValueError("file object show file or readable")

    def compile_class(self):
        parent = self._set_parent("class")
        self._root = parent
        self._advance()
        self._pop_required(parent, TokenType.keyword, KeywordType.CLASS)
        self._pop_required(parent, TokenType.identifier)
        self._pop_required(parent, TokenType.symbol, "{")

        try:
            while self._is_class_var():
                self.compile_class_var_desc()
                self._advance()

            while self._is_subroutine():
                self.compile_subroutine()
                self._advance()
            self._pop_required(parent, TokenType.symbol, "}")
        finally:
            self._outputbuf.write(
                unicode(
                    et.tostring(self._root, pretty_print=True,
                                method="c14n2").decode("utf-8")))

        self._outputbuf.close()

    def _required_type(self, token_type, val):
        tp, tv = self._token()
        if token_type != tp or (
            (tp == TokenType.keyword or tp == TokenType.symbol) and
            (val != tv)):
            raise ValueError("token must be %s,%s" % (token_type, val))

    def compile_class_var_desc(self):
        parent = self._set_parent("classVarDec")
        # 具体可以细分变量类型检查,标识符正确检查
        while not self.is_token(TokenType.symbol, ";"):
            parent.append(self._build_element())
            self._advance()
        parent.append(self._build_element())
        self._remove_parent()

    def compile_subroutine(self):
        parent = self._set_parent("subroutineDec")
        while not self.is_token(TokenType.symbol, "("):
            parent.append(self._build_element())
            self._advance()
        parent.append(self.required(TokenType.symbol, "("))
        self._advance()
        self.compile_parameter_list()

        parent.append(self.required(TokenType.symbol, ")"))
        self._advance()

        self._compile_body()
        self._remove_parent()

        # if self._tokenizer.token_type()==TokenType.KEY_WORD:

    def _compile_body(self):
        parent = self._set_parent("subroutineBody")
        parent.append(self.required(TokenType.symbol, "{"))
        self._advance()
        while self._is_var_desc():
            self.compile_var_desc()
            self._advance()
        self.compile_statements()
        parent.append(self.required(TokenType.symbol, "}"))
        self._remove_parent()

    def _remove_parent(self):
        self._cur_root.pop()

    def compile_parameter_list(self):
        parent = self._set_parent("parameterList")
        while not self.is_token(TokenType.symbol, ")"):
            parent.append(self._build_element())
            self._advance()
        self._remove_parent()

    def compile_var_desc(self):
        parent = self._set_parent("varDec")
        while not self.is_token(TokenType.symbol, ";"):
            parent.append(self._build_element())
            self._advance()
        parent.append(self.required(TokenType.symbol, ";"))
        self._remove_parent()

    def compile_statements(self):

        self._set_parent("statements")

        while self._is_statement():
            if self.is_let_statement():
                self.compile_let()
            if self.is_do_statement():
                self.compile_do()
            if self.is_return_statement():
                self.compile_return()
            if self.is_if_statement():
                self.compile_if()
                continue
            if self.is_while_statement():
                self.compile_while()
                continue
            self._advance()

        self._remove_parent()

    def compile_do(self):
        parent = self._set_parent("doStatement")
        parent.append(self.required(TokenType.keyword, KeywordType.DO))
        self._advance()
        while not self.is_token(TokenType.symbol, "("):
            parent.append(self._build_element())
            self._advance()
        parent.append(self.required(TokenType.symbol, "("))
        self._advance()
        self.compile_expression_list()
        parent.append(self.required(TokenType.symbol, ")"))
        self._advance()
        parent.append(self.required(TokenType.symbol, ";"))
        self._remove_parent()

    def compile_let(self):
        parent = self._set_parent("letStatement")
        parent.append(self.required(TokenType.keyword, KeywordType.LET))
        self._advance()
        parent.append(self.required(TokenType.identifier))
        self._advance()
        if self.is_token(TokenType.symbol, "["):
            parent.append(self._build_element())
            self._advance()
            self.compile_expression()
            parent.append(self.required(TokenType.symbol, "]"))
            self._advance()
        # 有可能是数组
        parent.append(self.required(TokenType.symbol, "="))
        self._advance()
        self.compile_expression()
        parent.append(self.required(TokenType.symbol, ";"))
        self._remove_parent()

    def compile_while(self):
        parent = self._set_parent("whileStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.WHILE)
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression()
        self._pop_required(parent, TokenType.symbol, ")")
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self._remove_parent()

    def compile_return(self):
        parent = self._set_parent("returnStatement")
        parent.append(self.required(TokenType.keyword, KeywordType.RETURN))
        self._advance()
        if not self.is_token(TokenType.symbol, ";"):
            self.compile_expression()
        parent.append(self.required(TokenType.symbol, ";"))
        self._remove_parent()

    def compile_if(self):
        parent = self._set_parent("ifStatement")
        parent.append(self.required(TokenType.keyword, KeywordType.IF))
        self._advance()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression()
        self._pop_required(parent, TokenType.symbol, ")")
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        if self.is_token(TokenType.keyword, KeywordType.ELSE):
            self._pop_required(parent, TokenType.keyword, KeywordType.ELSE)
            self._pop_required(parent, TokenType.symbol, "{")
            self.compile_statements()
            parent.append(self.required(TokenType.symbol, "}"))
            self._advance()
        self._remove_parent()

    def compile_expression(self):
        parent = self._set_parent("expression")

        while not self._is_end():
            self.compile_term()
            if self._is_op(False):
                parent.append(self._build_element())
                self._advance()
                # parent.append(self._build_element())
            # self._advance()

        self._remove_parent()

    def compile_term(self):
        parent = self._set_parent("term")
        first = True
        while not self._is_op(first) and not self._is_end():
            first = False
            if self.is_token(TokenType.symbol, "("):
                parent.append(self._build_element())
                self._advance()
                self.compile_expression()
                parent.append(self.required(TokenType.symbol, ")"))
            elif self.is_token(TokenType.symbol, "["):
                parent.append(self._build_element())
                self._advance()
                self.compile_expression()
                parent.append(self.required(TokenType.symbol, "]"))
            elif self._is_unary_op():
                parent.append(self._build_element())
                self._advance()
                self.compile_term()
                continue
            elif self.is_token(TokenType.identifier):
                parent.append(self._build_element())
                self._advance()
                if self.is_token(TokenType.symbol, "("):
                    self.compile_expression_list()
                    parent.append(self.required(TokenType.symbol, ")"))
                if self.is_token(TokenType.symbol, "."):
                    parent.append(self._build_element())
                    self._advance()
                    self._pop_required(parent, TokenType.identifier)
                    self._pop_required(parent, TokenType.symbol, "(")
                    self.compile_expression_list()
                    parent.append(self.required(TokenType.symbol, ")"))
                    self._advance()
                continue

            else:
                parent.append(self._build_element())
            self._advance()
        self._remove_parent()

    def _pop_required(self, parent, tk, val=None):
        parent.append(self.required(tk, val))
        self._advance()

    def _is_op(self, first):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-'
                                                                and not first)

    def _is_unary_op(self):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '-~'

    def compile_expression_list(self):
        parent = self._set_parent("expressionList")
        while not self.is_token(TokenType.symbol, ")"):
            self.compile_expression()
            if self.is_token(TokenType.symbol, ","):
                parent.append(self._build_element())
                self._advance()
        self._remove_parent()

    def build_identifier(self):
        e = et.Element("identifier")
        e.text = self._tokenizer.identifier()
        return e

    def build_keyword(self):
        e = et.Element("keyword")
        e.text = self._tokenizer.keyword().name.lower()
        return e

    def build_symbol(self):
        e = et.Element("symbol")
        e.text = self._tokenizer.symbol()
        return e

    def _token(self):

        token_type = self._tokenizer.token_type()
        if self._tokenizer.token_type() == TokenType.keyword:
            a, b = token_type, self._tokenizer.keyword()
        elif self._tokenizer.token_type() == TokenType.symbol:
            a, b = token_type, self._tokenizer.symbol()
        elif self._tokenizer.token_type() == TokenType.identifier:
            a, b = token_type, self._tokenizer.identifier()
        elif self._tokenizer.token_type() == TokenType.integerConstant:
            a, b = token_type, self._tokenizer.intVal()
        elif self._tokenizer.token_type() == TokenType.stringConstant:
            a, b = token_type, self._tokenizer.stringVal()
        else:
            a, b = None, None
        print(a, b, self._tokenizer.line)
        return a, b

    def _advance(self):
        if self._tokenizer.has_more_tokens():
            self._tokenizer.advance()

    def required(self, token, val=None):
        self._required_type(token, val)
        return self._build_element()

    def _build_element(self):
        a, b = self._token()
        e = et.Element(a.name)
        if isinstance(b, KeywordType):
            e.text = b.name.lower()
        else:
            e.text = b
        return e

    def _is_class_var(self):
        return self.is_token(TokenType.keyword,
                             KeywordType.FIELD) or self.is_token(
                                 TokenType.keyword, KeywordType.STATIC)

    def is_token(self, token, val=None):
        t, v = self._token()
        if val is not None:
            return t == token and v == val
        else:
            return t == token

    def _get_parent(self):
        if len(self._cur_root) > 0:
            return self._cur_root[-1]
        else:
            return None

    def _set_parent(self, name):
        parent = self._get_parent()
        ele2 = et.Element(name)
        if parent is not None:
            parent.append(ele2)
        self._cur_root.append(ele2)
        return ele2

    def _is_subroutine(self):
        return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \
               or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \
               or self.is_token(TokenType.keyword, KeywordType.METHOD)

    def _is_statement(self):
        if self.is_let_statement():
            return True
        if self.is_do_statement():
            return True
        if self.is_return_statement():
            return True
        if self.is_if_statement():
            return True
        if self.is_while_statement():
            return True

    def is_while_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.WHILE)

    def is_let_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.LET)

    def is_do_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.DO)

    def is_return_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.RETURN)

    def is_if_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.IF)

    def _is_var_desc(self):
        return self.is_token(TokenType.keyword, KeywordType.VAR)

    def _is_end(self):
        return self.is_token(TokenType.symbol, ";") or \
               self.is_token(TokenType.symbol, ";") \
               or self.is_token(TokenType.symbol, ")") \
               or self.is_token(TokenType.symbol, ",") \
               or self.is_token(TokenType.symbol, "]")
class CompilationEngine:

    def __init__(self, input_file, output_file):
        self.jack_tokenizer = JackTokenizer(input_file)
        self.symbol_table = SymbolTable()
        self.writer = VMWriter(output_file)
        self.class_name = ""
        self.subroutine_name = ""
        self.return_type = ""
        self.label_counter_if = 0
        self.label_counter_while = 0
        self.num_args_called_function = 0
        self.is_unary = False
        self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2",
                               "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"}

    def compile_class(self):
        # "class className {
        for i in range(NUM_TOKENS_CLASS_DEC):
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # saves the className
            if self.jack_tokenizer.token_type() == IDENTIFIER:
                self.class_name = self.jack_tokenizer.identifier()
        # classVarDec* or SubroutineDec*
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or
                                                  self.jack_tokenizer.key_word() == "field"):
                self.compile_class_var_dec()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or
                                                  self.jack_tokenizer.key_word() == "method" or
                                                  self.jack_tokenizer.key_word() == "constructor"):
                self.compile_subroutine()
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break

    def compile_class_var_dec(self):
        # "static" of "field"
        kind = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # type
        if self.jack_tokenizer.token_type() == KEYWORD:
            type = self.jack_tokenizer.key_word()
        else:
            type = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name,type,kind)
            elif token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_subroutine(self):
        self.symbol_table.start_subroutine()
        self.subroutine_name = ""
        self.return_type = ""
        self.label_counter_if = 0
        self.label_counter_while = 0
        #  the curr token : "constructor" or "function" or "method
        type_of_subroutine = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # the curr token : return type of the subroutine
        if self.jack_tokenizer.token_type() == KEYWORD:
            self.return_type = self.jack_tokenizer.key_word()
        else:
            self.return_type = self.jack_tokenizer.identifier()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        self.subroutine_name = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol() == "(":
                if type_of_subroutine == "method":
                    self.symbol_table.define(THIS, self.class_name, ARG)
                self.compile_parameter_list()
                # the curr token should be -  ")"
            if self.jack_tokenizer.symbol() == '{':
                while self.jack_tokenizer.has_more_tokens():
                    self.jack_tokenizer.advance()
                    token_type = self.jack_tokenizer.token_type()
                    if token_type == KEYWORD:
                        if self.jack_tokenizer.key_word() == "var":
                            self.compile_var_dec()
                            continue
                        else:
                            self.writer.write_function(self.class_name +
                                                       "." + self.subroutine_name, self.symbol_table.var_count(VAR))
                            if type_of_subroutine == "constructor":
                                self.writer.write_push(CONST, self.symbol_table.var_count(FIELD))
                                self.writer.write_call("Memory.alloc", 1)
                                self.writer.write_pop("pointer", 0)
                            elif type_of_subroutine == "method":
                                self.writer.write_push(ARGUMENT, 0)
                                self.writer.write_pop("pointer", 0)
                            self.compile_statements()
                            # the curr token should be -  "}"
                            break
                break


    def compile_parameter_list(self):
        kind = ARG
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            # int, bool....
            if token_type == KEYWORD:
                type = self.jack_tokenizer.key_word()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            # className
            elif token_type == IDENTIFIER:
                type = self.jack_tokenizer.identifier()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            # end of parameter list
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == ")":
                    break


    def compile_var_dec(self):
        # should be "var"
        kind = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # type
        if self.jack_tokenizer.token_type() == KEYWORD:
            type = self.jack_tokenizer.key_word()
        else:
            type = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_statements(self):
        while True:
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do":
                self.compile_do()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let":
                self.compile_let()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while":
                self.compile_while()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return":
                self.compile_return()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            # compile_if returns advanced
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if":
                self.compile_if()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break


    def compile_do(self):
        self.num_args_called_function = 0
        self.compile_subroutine_call()
        self.writer.write_pop(TEMP , 0)
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # return from compile_subroutine_call with ";"

    def compile_let(self):
        init = True
         # the curr token - "let"
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                type = self.symbol_table.type_of(name)
                kind = self.symbol_table.kind_of(name)
                index = self.symbol_table.index_of(name)
            if token_type == SYMBOL:
                # there is an assignment to an array
                if self.jack_tokenizer.symbol() == "[":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    #  handle  - [expression]
                    self.compile_expression()
                    # the curr token -  "]"
                    self.writer.write_push(self.find_segment(kind), index)
                    self.writer.write_arithmetic("add")
                    self.writer.write_pop("pointer", 1)
                    init = False
                # should return from the compile_expression only with ";" or "]"
                if self.jack_tokenizer.symbol() == "=":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # handle the = expression
                    self.compile_expression()
                    # that is only for array
                    if init == False: # was also if type == "Array"
                        self.writer.write_pop(THAT, 0)
                    else:
                        self.writer.write_pop(self.find_segment(kind), index)
                # end of let statement
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_while(self):
        while_counter = self.label_counter_while
        self.label_counter_while += 1
        # the curr token - "while"
        self.writer.write_label("WHILE_EXP" + str(while_counter))
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # the curr token - ")"
                    self.writer.write_arithmetic("not")
                    self.writer.write_if("WHILE_END" + str(while_counter))
                if self.jack_tokenizer.symbol() == "{":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                    # the curr token - "}"
                    self.writer.write_go_to("WHILE_EXP" + str(while_counter))
                    self.writer.write_label("WHILE_END" + str(while_counter))
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break


    def compile_return(self):
        # the curr token - "return"
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";":
            self.writer.write_push(CONST, "0")
        else:
            self.compile_expression()
            # should return from "compile_expression" only with ";"
        self.writer.write_return()

    def compile_if(self):
        if_counter = self.label_counter_if
        self.label_counter_if += 1
        # the curr token - "if"
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # the curr token - ")"
                    self.writer.write_if("IF_TRUE" + str(if_counter))
                    self.writer.write_go_to("IF_FALSE" + str(if_counter))
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "{":
                    self.writer.write_label("IF_TRUE" + str(if_counter))
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                # ~~~~~~~~~~ change : was token_type ~~~~~~~~~~~~~~
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else":
            # print "else"
            self.writer.write_go_to("IF_END" + str(if_counter))
            self.writer.write_label("IF_FALSE" + str(if_counter))
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # print "{"
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.compile_statements()
            # print "}"
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.writer.write_label("IF_END" + str(if_counter))
        else:
            self.writer.write_label("IF_FALSE" + str(if_counter))


    def compile_subroutine_call(self):
        to_add = False
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # "subRoutineName" or ("className" | "varName", as part of className.subRoutineName)
        called_statement = self.jack_tokenizer.identifier()
        type = self.symbol_table.type_of(called_statement)
        kind = self.symbol_table.kind_of(called_statement)
        index = self.symbol_table.index_of(called_statement)


        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # case of "subRoutineCall(expressionList)
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
            to_add = True
            called_statement = self.class_name + "." + called_statement
            self.writer.write_push(POINTER, 0)
            self.compile_expression_list()
            # the curr token - ")"
        # (className | varName).subroutineName(expressionList)
        elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # subroutineName
            if kind <> NONE:
                to_add = True
                self.writer.write_push(self.find_segment(kind), index)
                called_statement = type + "." + self.jack_tokenizer.identifier()
            else:
               called_statement = called_statement + "." + self.jack_tokenizer.identifier()
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # "("
            # expressionList
            self.compile_expression_list()
            # ")"
        if to_add:
            self.writer.write_call(called_statement, self.num_args_called_function + 1)
        else:
            self.writer.write_call(called_statement, self.num_args_called_function)

    def compile_expression(self):
        is_print_unary = False
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
            self.is_unary = True
        self.compile_term()
        while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                ["+", "-", "*", "/", "&", "|", "<", ">", "="]:
            arit_symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
                self.is_unary = True
                is_print_unary = True
            self.compile_term()
            # if not is_print_unary and
            self.writer.write_arithmetic(self.dic_arithmetic[arit_symbol])


    def compile_term(self):
        while True:
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\
                    [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "="]:
                break
            if token_type == INT_CONST:
                self.writer.write_push(CONST, self.jack_tokenizer.int_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == STRING_CONST:
                self.compile_string()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == KEYWORD and self.jack_tokenizer.key_word() in ["true", "false", "null"]:
                self.writer.write_push(CONST, 0)
                if self.jack_tokenizer.key_word() == "true":
                    self.writer.write_arithmetic("not")
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            # like in return this
            if token_type == KEYWORD and self.jack_tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]:
                symbol = self.jack_tokenizer.symbol()
                self.is_unary = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_term()
                if symbol == "~":
                    self.writer.write_arithmetic("not")
                else:
                    self.writer.write_arithmetic("neg")
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(":
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_expression()
                # should return from compile_expression only with ")"
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == IDENTIFIER:
                is_add = True
                name = self.jack_tokenizer.identifier()
                kind = self.symbol_table.kind_of(name)
                index = self.symbol_table.index_of(name)
                if name[0].isupper():
                    is_add = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                        [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                    # in case of a > ...or b;
                    self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name))
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # should return only "]"
                    self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name))
                    self.writer.write_arithmetic("add")
                    self.writer.write_pop(POINTER, 1)
                    self.writer.write_push(THAT, 0)
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
                    self.writer.write_push(POINTER, 0)
                    self.compile_expression_list()
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # case of a = ... bar()
                    self.writer.write_call(self.class_name + "." + name,self.num_args_called_function + 1)
                    break
                # (className | varName).subroutineName(expressionList)
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # subroutineName
                    if is_add:
                        type = self.symbol_table.type_of(name)
                        name = type + "." + self.jack_tokenizer.identifier()
                    else:
                        name = name + "." + self.jack_tokenizer.identifier()
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # "("
                    # expressionList
                    if is_add:
                        self.writer.write_push(self.find_segment(kind), index)
                    self.compile_expression_list()
                    # ")"
                    if is_add:
                        self.writer.write_call(name, self.num_args_called_function + 1)
                    else:
                        self.writer.write_call(name, self.num_args_called_function)
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        num_args = 0
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")":
                break
            else:
                num_args += 1
                self.compile_expression()
                if self.jack_tokenizer.symbol() == ")":
                    break
                # print ","
        self.num_args_called_function = num_args

    def find_segment(self, kind):
        if kind == ARG:
            return ARGUMENT
        if kind == VAR:
            return LCL
        if kind == FIELD:
            return THIS
        if kind == STATIC:
            return STATIC

    def compile_string(self):
        length = len(self.jack_tokenizer.string_val())
        self.writer.write_push(CONST, length)
        self.writer.write_call("String.new", 1)
        for i in range(len(self.jack_tokenizer.string_val())):
            uni = ord(self.jack_tokenizer.string_val()[i])
            self.writer.write_push(CONST, uni)
            self.writer.write_call("String.appendChar", 2)
class CompilationEngine(object):
    """This class recursively compiles a .jack file into vm code."""

    def __init__(self, inFile):
        super(CompilationEngine, self).__init__()
        # create an internal tokenizer to iterate through
        self.tokenizer = JackTokenizer(inFile)

        # setup the output file
        self.outputPath = inFile.name.replace(".jack", ".vm")
        self.outputFile = open(self.outputPath, 'w')
        self.outputFile.close()
        self.outputFile = open(self.outputPath, 'a')

        # create a VMWriter with the output file
        self.vmWriter = VMWriter(self.outputFile)

        # create a symbol table
        self.symbolTable = SymbolTable()

        # stuff we need to keep track of for the symbol table
        self.className = ""
        self.currentName = ""
        self.currentKind = ""
        self.currentType = ""
        self.ifCounter = 0
        self.whileCounter = 0

    def start(self):
        """Starts the compilation by creating the token XML file
        and then calling __compileClass()"""

        # start the tokenizer
        self.tokenizer.advance()

        # start the compilation
        self.__compileClass()

    def __checkIdentifier(self):
        """Makes sure that the current token is an identifier and saves that
        identifier as the current name for the symbol table"""
        if self.tokenizer.tokenType() == "IDENTIFIER":
            self.currentName = self.tokenizer.identifier()
            return True
        return False

    def __checkType(self):
        """Checks for a valid type and saves that type for the symbol table"""
        if self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() in ["int", "char", "boolean", "void"]:
            self.currentType = self.tokenizer.keyWord()
            return True
        elif self.tokenizer.tokenType() == "IDENTIFIER":
            self.currentType = self.tokenizer.identifier()
            return True
        else:
            return False

    def __compileType(self):
        """Compiles a complete jack type grammar. Returns false if there is an error"""
        # check for valid keyword
        if self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()
            if k not in ["int", "char", "boolean"]:
                print("Error: type keyword must be int, char, or boolean")
                return False

            # self.__writeFullTag("keyword", k)

            self.tokenizer.advance()
            return True
        # check for className
        else:
            res = self.__compileClassName()
            # if __compileClassName() errors, this is not a valid type
            if not res:
                print("Error: type not a valid className")
            return res

    def __compileClassName(self):
        """Compiles a complete jack className grammar. Returns false if there is
        an error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False

        # self.__writeFullTag("identifier", self.tokenizer.identifier())

        self.tokenizer.advance()
        return True

    def __compileSubroutineName(self):
        """Compiles a complete jack subroutineName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False

        # self.__writeFullTag("identifier", self.tokenizer.identifier())

        self.tokenizer.advance()
        return True

    def __compileVarName(self):
        """Compiles a complete jack varName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False

        # self.__writeFullTag("identifier", self.tokenizer.identifier())

        self.tokenizer.advance()
        return True

    def __compileClass(self):
        """Compiles a complete jack class grammar"""
        # find the class keyword
        if self.tokenizer.tokenType() != "KEYWORD" or \
                self.tokenizer.keyWord() != "class":
            print("Error: no class declaration found")
            sys.exit(1)

        self.tokenizer.advance()

        # find the className
        if not self.__checkIdentifier():
            print("Error: no class name found in class declaration")
            sys.exit(1)
        # save the class name
        self.className = self.tokenizer.identifier()
        self.tokenizer.advance()

        # find the open curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: no opening brace found after class")
            sys.exit(0)
        self.tokenizer.advance()

        # compile the classVarDecs
        while(self.tokenizer.tokenType() == "KEYWORD" and
              (self.tokenizer.keyWord() == "static" or
               self.tokenizer.keyWord() == "field")):
            self.__compileClassVarDec()

        # compile the subroutines
        while(self.tokenizer.tokenType() == "KEYWORD" and
              (self.tokenizer.keyWord() == "constructor" or
               self.tokenizer.keyWord() == "function" or
               self.tokenizer.keyWord() == "method")):
            self.__compileSubroutineDec()

        # find last curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: no closing brace found after class definition")
            sys.exit(1)
        self.tokenizer.advance()

    def __compileClassVarDec(self):
        """Compiles a complete jack class variable declaration. This advances the
        tokenizer completely through the variable declaration"""
        # we already checked to make sure that the keyword is valid
        self.currentKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # look for a valid type
        if not self.__checkType():
            print("Error: invalid type in classVarDec")
            sys.exit(1)
        self.tokenizer.advance()

        # check for varName
        if self.__checkIdentifier():
            self.symbolTable.define(
                self.currentName, self.currentType, self.currentKind)
            self.tokenizer.advance()
        else:
            print("Error: missing varName identifier in classVarDec")
            sys.exit(1)

        # check for comma then more varNames (possible not existing)
        while self.tokenizer.tokenType() == "SYMBOL" and \
                self.tokenizer.symbol() == ",":
            self.tokenizer.advance()

            # check for varName again
            if self.__checkIdentifier():
                self.symbolTable.define(
                    self.currentName, self.currentType, self.currentKind)
                self.tokenizer.advance()
            else:
                print("Error: missing varName identifier in classVarDec")
                sys.exit(1)

        # check for closing semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing semicolon after classVarDec")
            sys.exit(1)
        self.tokenizer.advance()

    def __compileSubroutineDec(self):
        """Compiles a complete jack subroutine description. This advances the
        tokenizer completely through the subroutine declaration"""
        # clear the subroutine symbol table
        self.symbolTable.startSubroutine()

        # since we already checked for the subroutine kind, grab it
        subroutineKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # look for return type
        if not self.__checkType():
            print("Error: illegal return type for subroutine")
            sys.exit(1)
        self.tokenizer.advance()

        # check for subroutineName and save it with the specified format
        if self.__checkIdentifier():
            currentSubroutineName = self.className + "." + self.currentName
            self.tokenizer.advance()
        else:
            print("Error: missing subroutineName in subroutineDec")
            sys.exit(1)

        # if the subroutine is a method, the first arg needs to be this
        if subroutineKind == "method":
            self.symbolTable.define("this", self.className, "arg")

        # check for open parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
            print("Error: missing ( for parameter list")
            sys.exit(1)
        self.tokenizer.advance()

        # do parameter list (this could add nothing)
        self.__compileParameterList()

        # check for closing parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
            print("Error: missing ) for parameter list")
            sys.exit(1)
        self.tokenizer.advance()

        # compile subroutine body
        self.__compileSubroutineBody(subroutineKind, currentSubroutineName)

    def __compileParameterList(self):
        """Compiles a complete jack parameter list grammar"""
        # we know all parameter lists are arguments, so set the current kind
        self.currentKind = "arg"

        # if the next symbol is a ), then there is no parameter list, so just return
        # the rest of compileSubroutine will handle writing that
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ")":
            return
        # look for a valid type
        else:
            if not self.__checkType():
                print("Error: invalid type in parameter list")
                sys.exit(1)
            self.tokenizer.advance()

            # check for varName
            if self.__checkIdentifier():
                self.symbolTable.define(
                    self.currentName, self.currentType, self.currentKind)
                self.tokenizer.advance()
            else:
                print("Error: missing varName identifier in parameterList")
                sys.exit(1)

            # check for comma separated list of type and varName
            while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",":
                # write the comma
                self.tokenizer.advance()

                # look for a valid type
                if not self.__checkType():
                    print("Error: invalid type in parameter list")
                    sys.exit(1)
                self.tokenizer.advance()

                # check for varName
                if self.__checkIdentifier():
                    self.symbolTable.define(
                        self.currentName, self.currentType, self.currentKind)
                    self.tokenizer.advance()
                else:
                    print("Error: missing varName identifier in parameterList")
                    sys.exit(1)

    def __compileSubroutineBody(self, currentSubKind, currentSubName):
        """Compile a complete jack subroutine body grammar"""
        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: missing { for subroutine body")
            sys.exit(1)
        self.tokenizer.advance()

        # check to see if we need to compile varDec
        while self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() == "var":
            self.__compileVarDec()

        # write the function
        self.vmWriter.writeFunction(
            currentSubName, self.symbolTable.varCount("var"))

        # write stuff for constructor
        if currentSubKind == "constructor":
            # get number of class fields to allocate space for them
            numFields = self.symbolTable.varCount("field")
            if numFields > 0:
                self.vmWriter.writePush("constant", numFields)
            self.vmWriter.writeCall("Memory.alloc", 1)
            self.vmWriter.writePop("pointer", 0)
        # write stuff for method
        elif currentSubKind == "method":
            # get the this pointer
            self.vmWriter.writePush("argument", 0)
            self.vmWriter.writePop("pointer", 0)

        # compile statements
        self.__compileStatements()

        # check for closing }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: missing closing } for subroutine body")
            sys.exit(1)
        self.tokenizer.advance()

        return

    def __compileVarDec(self):
        """Compiles a complete jack varDec grammar"""
        # all var decs are of type var, so set it
        self.currentKind = "var"

        self.tokenizer.advance()

        # check for type
        if not self.__checkType():
            print("Error: invalid type in var dec")
            sys.exit(1)
        self.tokenizer.advance()

        # check for varName
        if self.__checkIdentifier():
            self.symbolTable.define(
                self.currentName, self.currentType, self.currentKind)
            self.tokenizer.advance()
        else:
            print("Error: missing varName identifier in varDec")
            sys.exit(1)

        # check for comma separated list of type and varName
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",":
            # write the comma
            self.tokenizer.advance()

            # check for varName
            if self.__checkIdentifier():
                self.symbolTable.define(
                    self.currentName, self.currentType, self.currentKind)
                self.tokenizer.advance()
            else:
                print("Error: missing varName identifier in varDec")
                sys.exit(1)

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing ; after varDec")
            sys.exit(1)
        self.tokenizer.advance()

        return

    def __compileStatements(self):
        """Compiles a complete jack statements grammar"""
        # check for the keywords for all the statements
        while self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k == "let":
                self.__compileLetStatement()
            elif k == "if":
                self.__compileIfStatement()
            elif k == "while":
                self.__compileWhileStatement()
            elif k == "do":
                self.__compileDoStatement()
            elif k == "return":
                self.__compileReturnStatement()
            else:
                print("Error: invalid statment " + k)
                sys.exit(1)

    def __compileLetStatement(self):
        """Compiles a complete jack let statment grammar"""
        self.tokenizer.advance()

        # look for varName
        if not self.__checkIdentifier():
            print("Error: missing varName for let statement")
        self.tokenizer.advance()

        # get values from symbol table
        varName = self.currentName
        kind = self.symbolTable.kindOf(varName)
        varType = self.symbolTable.typeOf(varName)
        index = self.symbolTable.indexOf(varName)
        isArray = False

        # check for [
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == "[":
            isArray = True
            self.tokenizer.advance()

            # compile expression
            self.__compileExpression()

            # get the index from the top of the stack from compileExpression
            self.vmWriter.writePush(kind, index)
            self.vmWriter.writeArithmetic("add")
            self.vmWriter.writePop("temp", 2)

            # write the closing bracket
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "]":
                print("Error: missing closing ] in let statement")
                sys.exit(1)
            self.tokenizer.advance()

        # check for =
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "=":
            print("Error: missing = in let expression")
            sys.exit(1)
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # write code to pop since expression puts result on top of stack
        if isArray:
            self.vmWriter.writePush("temp", 2)
            self.vmWriter.writePop("pointer", 1)
            self.vmWriter.writePop("that", 0)
        else:
            self.vmWriter.writePop(kind, index)

        # look for ;
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing ; after let statement")
            sys.exit(1)
        self.tokenizer.advance()

    def __compileIfStatement(self):
        """Compiles a complete jack if statement grammar"""
        # setup local counter
        localIfCounter = self.ifCounter
        self.ifCounter += 1

        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # get the ~ if part from the stack
        self.vmWriter.writeArithmetic("not")

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write the if for L1
        self.vmWriter.writeIf("if-false", localIfCounter)

        # compile more statements
        self.__compileStatements()

        # write the goto for L2
        self.vmWriter.writeGoto("if-true", localIfCounter)

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write label for L1
        self.vmWriter.writeLabel("if-false", localIfCounter)

        # check for else
        if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord() == "else":
            self.tokenizer.advance()

            # check for {
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
                print("Error: missing { for if statement")
                sys.exit(1)
            self.tokenizer.advance()

            # compile more statements
            self.__compileStatements()

            # check for }
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
                print("Error: missing } after if statement")
                sys.exit(1)
            self.tokenizer.advance()

        # write label for L2
        self.vmWriter.writeLabel("if-true", localIfCounter)

    def __compileWhileStatement(self):
        """Compiles a complete jack while statement grammar"""
        # get counter and write label for L1
        localWhileCounter = self.whileCounter
        self.whileCounter += 1
        self.vmWriter.writeLabel("whileStart", localWhileCounter)

        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # get ~cond from stack
        self.vmWriter.writeArithmetic("not")

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write the if for L2
        self.vmWriter.writeIf("whileEnd", localWhileCounter)

        # compile more statements
        self.__compileStatements()

        # write the goto for L1
        self.vmWriter.writeGoto("whileStart", localWhileCounter)

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write the label for L2
        self.vmWriter.writeLabel("whileEnd", localWhileCounter)

    def __compileDoStatement(self):
        """Compiles a complete jack do statement grammar"""
        self.tokenizer.advance()

        # compile subroutine call
        if self.__checkIdentifier():
            firstHalf = self.currentName
            self.tokenizer.advance()
            if self.tokenizer.tokenType() == "SYMBOL" and (self.tokenizer.symbol() == "."
                                                           or self.tokenizer.symbol() == "("):
                self.__compileSubroutineCall(firstHalf)

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing ; after do statement")
            sys.exit(1)
        self.tokenizer.advance()

        # pop the 0 from the return
        self.vmWriter.writePop("temp", 0)

    def __compileReturnStatement(self):
        """Compiles a complete jack return statement grammar"""
        self.tokenizer.advance()

        # if the next symbol isn't a symbol, it must be an expression
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            self.__compileExpression()

            # write ;, checking again to make sure after calling compile expression
            # that the next symbol is still a valid ;
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
                print("Error: missing ; after return statement")
                sys.exit(1)
        else:
            # write the return of 0
            self.vmWriter.writePush("constant", 0)

        self.tokenizer.advance()
        # write the return
        self.vmWriter.writeReturn()

    def __convertOp(self, op):
        """Converts the operators that interfere with xml tags to their properly
        escaped versions"""
        op = op.replace("&", "&amp;")
        op = op.replace("<", "&lt;")
        op = op.replace(">", "&gt;")
        op = op.replace("\"", "&quot;")

        return op

    def __compileExpression(self):
        """Compiles a complete jack expression grammar"""
        # compile term
        self.__compileTerm()

        # check for op
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() in op:
            s = self.tokenizer.symbol()

            self.tokenizer.advance()

            # compile another term
            self.__compileTerm()

            # write op vm code
            if s == "+":
                self.vmWriter.writeArithmetic("add")
            elif s == "-":
                self.vmWriter.writeArithmetic("sub")
            elif s == "*":
                self.vmWriter.writeCall("Math.multiply", 2)
            elif s == "/":
                self.vmWriter.writeCall("Math.divide", 2)
            elif s == "&":
                self.vmWriter.writeArithmetic("and")
            elif s == "|":
                self.vmWriter.writeArithmetic("or")
            elif s == "<":
                self.vmWriter.writeArithmetic("lt")
            elif s == ">":
                self.vmWriter.writeArithmetic("gt")
            elif s == "=":
                self.vmWriter.writeArithmetic("eq")

    def __compileTerm(self):
        """Compiles a complete jack term grammar"""
        # term logic
        # check for integerConstant
        if self.tokenizer.tokenType() == "INT_CONST":
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()
        # check for string constant
        elif self.tokenizer.tokenType() == "STRING_CONST":
            # need to make a string constant
            string = self.tokenizer.stringVal()

            # push the length of the string
            self.vmWriter.writePush("constant", len(string))

            # call String.new 1
            self.vmWriter.writeCall("String.new", 1)

            # append to create the string
            for letter in string:
                self.vmWriter.writePush("constant", ord(letter))
                self.vmWriter.writeCall("String.appendChar", 2)

            self.tokenizer.advance()
        # check for keyword for KeywordConstant
        elif self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k not in KeyWordConstant:
                print("Error: invalid KeyWordConstant" + k + " in term")
                sys.exit(1)

            # write the outputs for the keyword constants
            if k == "null" or k == "false":
                self.vmWriter.writePush("constant", 0)
            elif k == "true":
                self.vmWriter.writePush("constant", 1)
                self.vmWriter.writeArithmetic("neg")
            elif k == "this":
                self.vmWriter.writePush("pointer", 0)

            self.tokenizer.advance()
        # check for symbol for either ( expression ) or unary op
        elif self.tokenizer.tokenType() == "SYMBOL":
            s = self.tokenizer.symbol()

            # ( expression )
            if s == "(":
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

                # check for )
                if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
                    print("Error: missing ) after expression in term")
                    sys.exit(1)
                self.tokenizer.advance()
            # unaryOp term
            elif s in unaryOp:
                self.tokenizer.advance()

                # compile term
                self.__compileTerm()

                # write the unary output
                if s == "-":
                    self.vmWriter.writeArithmetic("neg")
                else:
                    self.vmWriter.writeArithmetic("not")
            else:
                print("Error: invalid symbol " + s + " in term")
                sys.exit(1)
        # check for varName | varName [ expression ] | subroutineCall
        elif self.__checkIdentifier():
            # advance the tokenizer one more step to check for [, (, or other
            self.tokenizer.advance()
            firstHalf = self.currentName

            if self.tokenizer.tokenType() == "SYMBOL":
                s = self.tokenizer.symbol()

                # varName[expression]
                if s == "[":
                    # push the array address
                    self.vmWriter.writePush(self.symbolTable.kindOf(firstHalf),
                                            self.symbolTable.indexOf(firstHalf))

                    # write [
                    self.tokenizer.advance()

                    # compile expression
                    self.__compileExpression()

                    # write vm code for array expression
                    self.vmWriter.writeArithmetic("add")
                    self.vmWriter.writePop("pointer", 1)
                    self.vmWriter.writePush("that", 0)

                    # write ]
                    if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "]":
                        print("Error: missing ] after varName[expression]")
                        sys.exit(1)
                    self.tokenizer.advance()
                # subroutineCall
                elif s == "(" or s == ".":
                    # compile subroutineCall
                    self.__compileSubroutineCall(firstHalf)
                else:
                    self.vmWriter.writePush(self.symbolTable.kindOf(self.currentName),
                                            self.symbolTable.indexOf(self.currentName))
            else:
                self.vmWriter.writePush(self.symbolTable.kindOf(self.currentName),
                                        self.symbolTable.indexOf(self.currentName))
        else:
            print("Error: invalid term")
            sys.exit(1)

    def __compileSubroutineCall(self, firstHalf):
        """Compiles a complete jack subroutine call grammar"""
        # look ahead one token to see if it is a ( or a .
        isClass = firstHalf[0].isupper()
        fullSubroutineName = ""
        nArgs = 0

        # subroutineName
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == "(":
            fullSubroutineName = self.className + "." + firstHalf
            # since this a self method, we need to push pointer
            self.vmWriter.writePush("pointer", 0)
            self.tokenizer.advance()

            # compile expression list
            nArgs = self.__compileExpressionList(isClass)

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
                print("Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.tokenizer.advance()
        # className | varName
        elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ".":
            self.tokenizer.advance()
            if self.__checkIdentifier():
                if isClass:
                    fullSubroutineName = firstHalf + "." + self.currentName
                else:
                    fullSubroutineName = self.symbolTable.typeOf(
                        firstHalf) + "." + self.currentName
                    # push the address of firstHalf
                    self.vmWriter.writePush(self.symbolTable.kindOf(
                        firstHalf), self.symbolTable.indexOf(firstHalf))
            else:
                print("Error: missing varName|className in subroutineCall")

            # check for (
            self.tokenizer.advance()
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
                print("Error: missing ( in subroutineCall before expressionList")
                sys.exit(1)
            self.tokenizer.advance()

            # compile expression list
            nArgs = self.__compileExpressionList(isClass)

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
                print("Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.tokenizer.advance()
        else:
            print("Error: invalid subroutineCall")
            sys.exit(1)

        if fullSubroutineName != "":
            self.vmWriter.writeCall(fullSubroutineName, nArgs)

    def __compileExpressionList(self, isClass):
        """Compiles a complete jack expression list grammar"""
        # if the symbol is ), there is no expression list
        if isClass:
            argCounter = 0
        else:
            argCounter = 1

        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ")":
            return argCounter
        else:
            # compile expression
            self.__compileExpression()

            argCounter += 1

            # loop until you dont see a comma
            while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",":
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

                argCounter += 1

            return argCounter
Exemple #10
0
class CompilationEngine(object):
    def __init__(self, inputfile, outputfile):
        self._inputfile = inputfile
        self._outputfile = outputfile
        self._tokenizer: JackTokenizer = None
        self._cur_root = []
        self._n_args = []
        self._root = None
        self.class_name = None
        self.return_type = None
        self._label_cnt = 0
        self.vm_writer = None  # type:VMWriter
        self._init()
        self.symbol = SymbolTable()
        self.vm_writer.set_engine(self)
        self.method_type = None

    def line_num(self):
        return self._tokenizer.line

    def _init(self):
        self._inputbuf = self.create_buffer(self._inputfile)
        self._outputbuf = self.create_buffer(self._outputfile, mode="w+")
        self.vm_writer = VMWriter(self._outputfile[:-4] + ".vm")
        self._tokenizer = JackTokenizer(self._inputbuf)

    def create_buffer(self, fn, mode='r'):

        if isinstance(fn, str) or isinstance(fn, unicode):
            return open(fn, mode)
        elif isinstance(fn, file) or isinstance(fn, IOBase):
            return fn
        else:
            raise ValueError("file object show file or readable")

    def compile_class(self):
        parent = self._set_parent("class")
        self._root = parent
        self._advance()
        self._pop_required(parent, TokenType.keyword, KeywordType.CLASS)
        self.class_name = self._token()[1]
        self._pop_required(parent, TokenType.identifier)
        self._pop_required(parent, TokenType.symbol, "{")

        try:
            while self._is_class_var():
                self.compile_class_var_desc()

            while self._is_subroutine():
                self.compile_subroutine()
            self._pop_required(parent, TokenType.symbol, "}")
            print(self.symbol)
        finally:
            self._outputbuf.write(
                unicode(
                    et.tostring(self._root, pretty_print=True,
                                method="c14n2").decode("utf-8")))
            self.vm_writer.close()
        self._outputbuf.close()

    def _required_type(self, token_type, val=None):
        tp, tv = self._token()
        if token_type != tp or (
            (tp == TokenType.keyword or tp == TokenType.symbol) and
            (val != tv)):
            raise ValueError("token must be %s,%s" % (token_type, val))
        return tp, tv

    def compile_class_var_desc(self):
        parent = self._set_parent("classVarDec")
        # 具体可以细分变量类型检查,标识符正确检查
        parent.append(self._build_element())
        kind = self.get_kind()
        self._advance()
        itype = self.get_type()
        parent.append(self._build_element())
        self._advance()

        while not self.is_token(TokenType.symbol, ";"):
            parent.append(self._build_element())
            if self._token()[1] != "," and self._token()[1] != ";":
                self.symbol.define(self._token()[1], itype, kind)
            self._advance()
        parent.append(self._build_element())
        self._advance()
        self._remove_parent()

    def get_kind(self):
        kind = self._token()[1]
        if isinstance(kind, KeywordType):
            kind = kind.name.lower()
        return kind

    def get_type(self):
        itype = self._token()[1]
        if isinstance(itype, KeywordType):
            return itype.name.lower()
        return itype

    def compile_subroutine(self):
        print(self.symbol)
        self.symbol.start_subroutine()
        parent = self._set_parent("subroutineDec")
        method_type = self._token()[1]
        self.method_type = method_type
        self._advance()
        self.return_type = self._token()[1]
        self._advance()
        function_name = self._token()[1]
        self._advance()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_parameter_list()
        full_name = "{}.{}".format(self.class_name, function_name)

        self._pop_required(parent, TokenType.symbol, ")")
        self._compile_body(full_name, method_type)
        self._remove_parent()
        self.vm_writer.write_comment("end function %s" % function_name)
        self.vm_writer.write_comment("")
        # if self._tokenizer.token_type()==TokenType.KEY_WORD:

    def _compile_body(self, full_name, method_type):
        parent = self._set_parent("subroutineBody")
        self._pop_required(parent, TokenType.symbol, "{")
        while self._is_var_desc():
            self.compile_var_desc()

        var_cnt = self.symbol.var_count("var")
        field_cnt = self.symbol.var_count("field")
        self.vm_writer.write_function(full_name, var_cnt)
        if method_type == KeywordType.CONSTRUCTOR:
            #  构造函数分配对象内存
            self.vm_writer.write_push(SEG_CONSTANT, field_cnt)
            self.vm_writer.write_call("Memory.alloc", "1")
            self.vm_writer.write_pop(SEG_POINTER, "0")
        elif method_type == KeywordType.METHOD:
            # 成员方法,设置this=arg[0]
            self.vm_writer.write_push(SEG_ARG, "0")
            self.vm_writer.write_pop(SEG_POINTER, "0")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self._remove_parent()

    def _remove_parent(self):
        self._cur_root.pop()

    def compile_parameter_list(self):
        kind = "arg"
        while not self.is_token(TokenType.symbol, ")"):
            itype = self.get_type()
            self._advance()
            name = self._token()[1]
            self.symbol.define(name, itype, kind)
            self._advance()
            # parent.append(self._build_element())
            if self.is_token(TokenType.symbol, ","):
                self._advance()

    def compile_var_desc(self):
        parent = self._set_parent("varDec")
        self._pop_required(parent, TokenType.keyword, KeywordType.VAR)
        kind = "var"
        itype = self.get_type()
        parent.append(self._build_element())
        self._advance()

        while not self.is_token(TokenType.symbol, ";"):
            # parent.append(self._build_element())
            if not self.is_token(TokenType.symbol, ",") and not self.is_token(
                    TokenType.symbol, ";"):
                self.symbol.define(self._token()[1], itype, kind)
            self._advance()
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_statements(self):

        self._set_parent("statements")

        while self._is_statement():
            if self.is_let_statement():
                self.compile_let()
            if self.is_do_statement():
                self.compile_do()
            if self.is_return_statement():
                self.compile_return()
            if self.is_if_statement():
                self.compile_if()
                continue
            if self.is_while_statement():
                self.compile_while()
                continue
        self._remove_parent()

    def compile_do(self):
        parent = self._set_parent("doStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.DO)
        type1, id1 = self._pop_required(parent, TokenType.identifier)
        self.compile_call(type1, id1)
        self.vm_writer.write_pop(SEG_TEMP, 0)
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_call(self, typ1, id1):
        parent = None
        symbol_kind = self.symbol.kind_of(id1)
        # 调用变量方法
        n_args = 0
        typ2, id2 = self._token()
        if id2 == ".":
            if symbol_kind:
                # 变量类型
                function_type = self.symbol.type_of(id1)
                # this 指针入栈
                if symbol_kind == "arg":
                    self.vm_writer.write_push("argument",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "static":
                    self.vm_writer.write_push("static",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "var":
                    self.vm_writer.write_push("local",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "field":
                    self.vm_writer.write_push("this",
                                              self.symbol.index_of(id1))
                n_args += 1
            else:
                # 静态方法
                function_type = id1
            self._advance()
            _, method_name = self._pop_required(parent, TokenType.identifier)
            full_name = "%s.%s" % (function_type, method_name)
        else:
            n_args += 1
            self.vm_writer.write_push("pointer", 0)
            function_type = self.class_name
            full_name = "%s.%s" % (function_type, id1)
        self._n_args.append(n_args)
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression_list()
        self._pop_required(parent, TokenType.symbol, ")")
        n_args = self._n_args.pop(-1)
        self.vm_writer.write_call(full_name, n_args=n_args)

    def compile_let(self):
        parent = self._set_parent("letStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.LET)
        tk, val = self._pop_required(parent, TokenType.identifier)
        seg, idx = self.get_var_seg_idx(val)
        is_arr = False
        if self.is_token(TokenType.symbol, "["):
            is_arr = True
            self._advance()
            self.compile_expression()
            self.vm_writer.write_push(seg, idx)
            self.vm_writer.write_arithmetic("+")
            self._pop_required(parent, TokenType.symbol, "]")

        # 有可能是数组
        # 替换正则
        self._pop_required(parent, TokenType.symbol, "=")
        self.compile_expression()
        if is_arr:
            self.vm_writer.write_pop(SEG_TEMP, "0")
            self.vm_writer.write_pop(SEG_POINTER, "1")
            self.vm_writer.write_push(SEG_TEMP, "0")
            self.vm_writer.write_pop(SEG_THAT, "0")
        else:
            self.vm_writer.write_pop(seg, idx)
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_while(self):
        self.vm_writer.write_comment("start while")
        parent = self._set_parent("whileStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.WHILE)
        label1 = self._get_label()
        self.vm_writer.write_label(label1)
        label2 = self._get_label()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        self._pop_required(parent, TokenType.symbol, ")")
        self.vm_writer.write_if(label2)
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_goto(label1)
        self.vm_writer.write_label(label2)
        self._remove_parent()

        self.vm_writer.write_comment("end while")

    def compile_return(self):
        parent = self._set_parent("returnStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.RETURN)
        if not self.is_token(TokenType.symbol, ";"):
            self.compile_expression()
        self._pop_required(parent, TokenType.symbol, ";")
        if self.return_type == KeywordType.VOID:
            self.vm_writer.write_push(SEG_CONSTANT, 0)
        self.vm_writer.write_return()
        self._remove_parent()

    def compile_if(self):
        parent = self._set_parent("ifStatement")
        self.vm_writer.write_comment("compile if")
        self._pop_required(parent, TokenType.keyword, KeywordType.IF)
        self._pop_required(parent, TokenType.symbol, "(")
        label1 = self._get_label()
        label2 = self._get_label()
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        self.vm_writer.write_if(label1)
        self._pop_required(parent, TokenType.symbol, ")")
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_goto(label2)
        self.vm_writer.write_label(label1)
        if self.is_token(TokenType.keyword, KeywordType.ELSE):
            self._pop_required(parent, TokenType.keyword, KeywordType.ELSE)
            self._pop_required(parent, TokenType.symbol, "{")
            self.compile_statements()
            self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_label(label2)
        self._remove_parent()

        self.vm_writer.write_comment(" if end")

    def compile_expression(self):
        parent = self._set_parent("expression")
        op_count = 0
        ops = []
        while not self._is_end():
            self.compile_term()
            if self._is_op(False):
                _, op = self._token()
                self._advance()
                ops.append(op)
            op_count += 1
            if op_count >= 2:
                print(ops)
                self.vm_writer.write_arithmetic(ops.pop(0))
            # parent.append(self._build_element())
            # self._advance()

        self._remove_parent()

    def compile_term(self):
        parent = self._set_parent("term")
        first = True
        while not self._is_op(first) and not self._is_end():
            first = False
            if self.is_token(TokenType.symbol, "("):
                self._advance()
                self.compile_expression()
                self._pop_required(parent, TokenType.symbol, ")")

            elif self._is_unary_op():
                token, op = self._token()
                self._advance()
                op = "neg" if op == "-" else op
                self.compile_term()
                self.vm_writer.write_arithmetic(op)
                continue
            elif self.is_token(TokenType.identifier):
                tk, val = self._pop_required(parent, TokenType.identifier)
                if self.is_token(TokenType.symbol, "(") or self.is_token(
                        TokenType.symbol, "."):
                    self.compile_call(tk, val)
                elif self.is_token(TokenType.symbol, "["):
                    self._advance()
                    self.compile_expression()
                    seg, idx = self.get_var_seg_idx(val)
                    self.vm_writer.write_push(seg, idx)
                    # 数组直接计算基址,通过that[0]访问
                    # fixme a[0] 这种常数的访问
                    self.vm_writer.write_arithmetic("+")
                    self.vm_writer.write_pop(SEG_POINTER, "1")
                    self.vm_writer.write_push(SEG_THAT, "0")
                    self._pop_required(parent, TokenType.symbol, "]")
                else:
                    # 变量
                    seg, idx = self.get_var_seg_idx(val)
                    self.vm_writer.write_push(seg, idx)
            else:
                tk, val = self._token()
                if self.is_token(TokenType.integerConstant):
                    self.vm_writer.write_push(SEG_CONSTANT, val)
                elif self.is_token(TokenType.keyword, KeywordType.TRUE):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                    self.vm_writer.write_arithmetic("~")
                elif self.is_token(TokenType.keyword, KeywordType.FALSE):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                elif self.is_token(TokenType.keyword, KeywordType.NULL):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                elif self.is_token(TokenType.keyword, KeywordType.THIS):
                    self.vm_writer.write_push(SEG_POINTER, "0")
                elif self.is_token(TokenType.stringConstant):
                    str_len = len(val)
                    self.vm_writer.write_push(SEG_CONSTANT, str(str_len))
                    self.vm_writer.write_call("String.new", "1")

                    for idx, x in enumerate(val):
                        self.vm_writer.write_push(SEG_CONSTANT, str(ord(x)))
                        self.vm_writer.write_call("String.appendChar", '2')

                self._advance()
        self._remove_parent()

    def _pop_required(self, parent, tk, val=None):
        tk, val = self.required(tk, val)
        self._advance()
        return tk, val

    def _is_op(self, first):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-'
                                                                and not first)

    def _is_unary_op(self):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '-~'

    def compile_expression_list(self):
        parent = self._set_parent("expressionList")
        n_args = self._n_args[-1]
        while not self.is_token(TokenType.symbol, ")"):
            n_args += 1
            self.compile_expression()
            if self.is_token(TokenType.symbol, ","):
                self._pop_required(parent, TokenType.symbol, ",")
        self._n_args[-1] = n_args
        self._remove_parent()

    def build_identifier(self):
        e = et.Element("identifier")
        e.text = self._tokenizer.identifier()
        return e

    def build_keyword(self):
        e = et.Element("keyword")
        e.text = self._tokenizer.keyword().name.lower()
        return e

    def build_symbol(self):
        e = et.Element("symbol")
        e.text = self._tokenizer.symbol()
        return e

    def _token(self):
        # if self._tokenizer.line > 44:
        #     raise ValueError("测试代码,翻译到此停止")
        token_type = self._tokenizer.token_type()
        if self._tokenizer.token_type() == TokenType.keyword:
            a, b = token_type, self._tokenizer.keyword()
        elif self._tokenizer.token_type() == TokenType.symbol:
            a, b = token_type, self._tokenizer.symbol()
        elif self._tokenizer.token_type() == TokenType.identifier:
            a, b = token_type, self._tokenizer.identifier()
        elif self._tokenizer.token_type() == TokenType.integerConstant:
            a, b = token_type, self._tokenizer.intVal()
        elif self._tokenizer.token_type() == TokenType.stringConstant:
            a, b = token_type, self._tokenizer.stringVal()
        else:
            a, b = None, None
        print(a, b, self._tokenizer.line)
        return a, b

    def _advance(self):
        if self._tokenizer.has_more_tokens():
            self._tokenizer.advance()

    def required(self, token, val=None):
        return self._required_type(token, val)

    def _build_element(self):
        a, b = self._token()
        e = et.Element(a.name)
        if isinstance(b, KeywordType):
            e.text = b.name.lower()
        else:
            e.text = b
        return e

    def _is_class_var(self):
        return self.is_token(TokenType.keyword,
                             KeywordType.FIELD) or self.is_token(
                                 TokenType.keyword, KeywordType.STATIC)

    def is_token(self, token, val=None):
        t, v = self._token()
        if val is not None:
            return t == token and v == val
        else:
            return t == token

    def _get_parent(self):
        if len(self._cur_root) > 0:
            return self._cur_root[-1]
        else:
            return None

    def _set_parent(self, name):
        parent = self._get_parent()
        ele2 = et.Element(name)
        if parent is not None:
            parent.append(ele2)
        self._cur_root.append(ele2)
        return ele2

    def _is_subroutine(self):
        return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \
               or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \
               or self.is_token(TokenType.keyword, KeywordType.METHOD)

    def _is_statement(self):
        if self.is_let_statement():
            return True
        if self.is_do_statement():
            return True
        if self.is_return_statement():
            return True
        if self.is_if_statement():
            return True
        if self.is_while_statement():
            return True

    def is_while_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.WHILE)

    def is_let_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.LET)

    def is_do_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.DO)

    def is_return_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.RETURN)

    def is_if_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.IF)

    def _is_var_desc(self):
        return self.is_token(TokenType.keyword, KeywordType.VAR)

    def _is_end(self):
        return self.is_token(TokenType.symbol, ";") or \
               self.is_token(TokenType.symbol, ";") \
               or self.is_token(TokenType.symbol, ")") \
               or self.is_token(TokenType.symbol, ",") \
               or self.is_token(TokenType.symbol, "]")

    def get_var_seg_idx(self, val):
        kind = self.symbol.kind_of(val)
        idx = self.symbol.index_of(val)
        if kind == "static":
            return SEG_STATIC, idx
        elif kind == "var":
            return SEG_LOCAL, idx
        elif kind == "field":
            return SEG_THIS, idx
        elif kind == "arg":
            if self.method_type == KeywordType.METHOD:
                idx += 1
            return SEG_ARG, idx

    def _get_label(self):
        label = "label_%s" % self._label_cnt
        self._label_cnt += 1
        return label
class CompilationEngine:

    def __init__(self, input_file, output_file):
        self.jack_tokenizer = JackTokenizer(input_file)
        self.output = open(output_file, "w")
        self.level = 0
        self.is_unary = False

    def compile_class(self):
        self.print_title("class", True)
        self.level += 1
        # "class className {
        for i in range(NUM_TOKENS_CLASS_DEC):
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(token_type, self.jack_tokenizer.key_word())
            if token_type == IDENTIFIER:
                self.print_tag(token_type, self.jack_tokenizer.identifier())
            if token_type == SYMBOL:
                self.print_tag(token_type, self.jack_tokenizer.symbol())
        # classVarDec* or SubroutineDec*
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            curr_keyword = self.jack_tokenizer.key_word()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or
                                                  self.jack_tokenizer.key_word() == "field"):
                self.compile_class_var_dec()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or
                                                  self.jack_tokenizer.key_word() == "method" or
                                                  self.jack_tokenizer.key_word() == "constructor"):
                self.compile_subroutine()
                # self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                # break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                break
        self.level -= 1
        self.print_title("class", False)

    def compile_class_var_dec(self):
        self.print_title("classVarDec", True)
        self.level += 1
        # "static" or "field"
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(token_type, self.jack_tokenizer.key_word())
            elif token_type == IDENTIFIER:
                self.print_tag(token_type, self.jack_tokenizer.identifier())
            elif token_type == SYMBOL:
                self.print_tag(token_type, self.jack_tokenizer.symbol())
                if self.jack_tokenizer.symbol() == ";":
                    break
        self.level -= 1
        self.print_title("classVarDec", False)


    def compile_subroutine(self):
        self.print_title("subroutineDec", True)
        self.level += 1
        # "constructor" or "function" or "method"
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            elif token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            elif token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.print_tag(token_type, self.jack_tokenizer.symbol())
                    self.compile_parameter_list()
                    # should print ")"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                # if self.jack_tokenizer.symbol() == "}":
                #     break
                if self.jack_tokenizer.symbol() == '{':
                    self.compile_subroutine_body()
                    break
        self.level -= 1
        self.print_title("subroutineDec", False)

    def compile_subroutine_body(self):
        self.print_title("subroutineBody", True)
        self.level += 1
        self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            if token_type == KEYWORD:
                if self.jack_tokenizer.key_word() == "var":
                    self.compile_var_dec()
                    continue
                else:
                    self.compile_statements()
                    # print "}"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    break
        self.level -= 1
        self.print_title("subroutineBody", False)


    def compile_parameter_list(self):
        self.print_title("parameterList", True)
        self.level += 1
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            elif token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            else:
                if self.jack_tokenizer.symbol() == ")":
                    break
                else:
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("parameterList", False)


    def compile_var_dec(self):
        self.print_title("varDec", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            if token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            if token_type == SYMBOL:
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                if self.jack_tokenizer.symbol() == ";":
                    break
        self.level -= 1
        self.print_title("varDec", False)


    def compile_statements(self):
        self.print_title("statements", True)
        self.level += 1
        while True:
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do":
                self.compile_do()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let":
                self.compile_let()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while":
                self.compile_while()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return":
                self.compile_return()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if":
                self.compile_if()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break
        self.level -= 1
        self.print_title("statements", False)


    def compile_do(self):
        self.print_title("doStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        self.compile_subroutine_call()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # return from compile_subroutine_call with ";"
        self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("doStatement", False)

    def compile_let(self):
        self.print_title("letStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
                #continue
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "[":# or self.jack_tokenizer.symbol() == "=":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # print "]"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                # should return from the compile_expression only with ";" or "]"
                if self.jack_tokenizer.symbol() == "=":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                if self.jack_tokenizer.symbol() == ";":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    break
        self.level -= 1
        self.print_title("letStatement", False)

    def compile_while(self):
        self.print_title("whileStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                if self.jack_tokenizer.symbol() == "{":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.level -= 1
        self.print_title("whileStatement", False)


    def compile_return(self):
        self.print_title("returnStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";":
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        else:
            self.compile_expression()
            # should return from "compile_expression" only with ";"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("returnStatement", False)

    def compile_if(self):
        self.print_title("ifStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                if self.jack_tokenizer.symbol() == "{":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else":
            # print "else"
            self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # print "{"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.compile_statements()
            # print "}"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
        self.level -= 1
        self.print_title("ifStatement", False)

    def compile_subroutine_call(self):
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        # (className | varName).subroutineName(expressionList)
        elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # subroutineName
            self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # "("
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            # expressionList
            self.compile_expression_list()
            # ")"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())


    def compile_expression(self):
        self.print_title("expression", True)
        self.level += 1
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
            self.is_unary = True
        self.compile_term()
        while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                ["+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
                self.is_unary = True
            self.compile_term()
        self.level -= 1
        self.print_title("expression", False)

    def compile_term(self):
        keywords_list = ["true", "false", "null", "this"]
        self.print_title("term", True)
        self.level += 1
        while True:
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\
                    [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                break
            if token_type == INT_CONST:
                self.print_tag(INT_CONST, self.jack_tokenizer.int_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == STRING_CONST:
                self.print_tag(STRING_CONST, self.jack_tokenizer.string_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == KEYWORD and self.jack_tokenizer.key_word() in keywords_list:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]:
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                self.is_unary = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_term()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(":
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_expression()
                # should return from compile_expression only with ")"
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                        [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # should print only "]"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.compile_expression_list()
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                # (className | varName).subroutineName(expressionList)
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # subroutineName
                    self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # "("
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    # expressionList
                    self.compile_expression_list()
                    # ")"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
        self.level -= 1
        self.print_title("term", False)

    def compile_expression_list(self):
        self.print_title("expressionList", True)
        self.level += 1
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")":
                break
            else:
                self.compile_expression()
                if self.jack_tokenizer.symbol() == ")":
                    break
                # print ","
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("expressionList", False)


    def print_tag(self, token_type, value):
        tabs = ""
        tag = ""
        for i in range(self.level):
            tabs += "\t"
        if token_type == KEYWORD:
           tag = "<keyword> " + value + " </keyword>\n"
        elif token_type == SYMBOL:
            tag = "<symbol> " + value + " </symbol>\n"
        elif token_type == IDENTIFIER:
            tag = "<identifier> " + value + " </identifier>\n"
        elif token_type == INT_CONST:
            tag = "<integerConstant> " + value + " </integerConstant>\n"
        elif token_type == STRING_CONST:
            tag = "<stringConstant> " + value + " </stringConstant>\n"
        else:
            tag = "<" + value + ">" + " </" + value + ">\n"
        self.output.write(tabs + tag)

    def print_title(self, title, is_title):
        tabs = ""
        for i in range(self.level):
            tabs += "\t"
        if is_title:
            self.output.write(tabs + "<" + title + ">\n")
        # print closer
        else:
            self.output.write(tabs + "</" + title + ">\n")
class CompilationEngine():

    op = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

    def __init__(self, input, output):

        print('Opened ' + input + ' for compiling.')

        self.input = input

        # Instantiate different modules
        self.tokenizer = JackTokenizer(input)
        self.symbolTable = SymbolTable()
        self.vmWriter = VMWriter(output)

        # Unique number - used for labels
        self.uniqueNo = -1

        # Load up the first token
        self.tokenizer.advance()

        # Call compileClass to start the compilation
        self.compileClass()

    def subTag(self, _tag):
        print('Subtag encountered - fix this')
        raise NameError
        sys.exit()

    def subTagIdentifier(self, name, category, new, kind, index):
        print('Subtag encountered - fix this')
        raise NameError
        sys.exit()

    def getUniqueNo(self):
        self.uniqueNo += 1
        return str(self.uniqueNo)

    def compileClass(self):
        # Current token assumed to be the CLASS keyword

        # Keyword: class
        self.tokenizer.advance()

        # Identifier: class name
        # Classes are not entered into symboltable
        self.className = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # classVarDec or Subroutine
        while not self.tokenizer.rawToken(
        ) == '}':  # Access token directly to circumvent error checking
            if self.tokenizer.keyWord() in ['STATIC', 'FIELD']:
                self.compileClassVarDec()
            elif self.tokenizer.keyWord() in [
                    'CONSTRUCTOR', 'FUNCTION', 'METHOD'
            ]:
                self.compileSubroutine()

        # Symbol: }
        # Do not advance, we are done

        self.close()

    def close(self):
        self.vmWriter.close()
        self.tokenizer.close()

        print('Finished compiling ' + self.input + '.')

    def compileClassVarDec(self):
        # Current token assumed to be the STATIC or FIELD keyword

        # Keyword: STATIC or FIELD
        if self.tokenizer.keyWord() == 'FIELD':
            _kind = 'FIELD'
        elif self.tokenizer.keyWord() == 'STATIC':
            _kind = 'STATIC'
            raise NotImplementedError
        self.tokenizer.advance()

        # Keyword: type | identifier (if class)
        try:
            _type = self.tokenizer.keyWord()
        except TokenTypeError:
            _type = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Identifier: varName
        # Declare in symboltable
        self.symbolTable.define(self.tokenizer.identifier(), _type, _kind)
        self.tokenizer.advance()

        # Compile any other varDecs on the same line (of the same type)
        while self.tokenizer.symbol() == ',':
            self.tokenizer.advance()

            # Identifier: varName
            # Declare in symboltable
            self.symbolTable.define(self.tokenizer.identifier(), _type, _kind)
            self.tokenizer.advance()

        # Symbol: ;
        self.tokenizer.advance()

    def compileSubroutine(self):
        # Current token assumed to be keyword: constructor | function | method

        # Create new subroutine scoped symbol table
        self.symbolTable.startSubroutine()

        # Keyword: constructor | function | method
        subroutineKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # Keyword: void | type | identifier (if class)
        self.tokenizer.advance()

        # Identifier: subroutineName
        subroutineName = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # Program structure: ParameterList
        self.compileParameterList()

        # Symbol: )
        self.tokenizer.advance()

        ### START SUBROUTINE BODY ###

        # Symbol: {
        self.tokenizer.advance()

        # subroutineBody: varDecs
        while self.tokenizer.keyWord() == 'VAR':
            self.compileVarDec()

        # Write vm code function declaration
        # This is done 'late' so that we can get nLocals (noting that varDec() does not actually write vm code)
        self.vmWriter.writeFunction(self.className + '.' + subroutineName,
                                    self.symbolTable.varCount('LOCAL'))

        if subroutineKind == 'CONSTRUCTOR':
            # Alloc() required space (as determined by number of class variables)
            self.vmWriter.writePush('constant',
                                    self.symbolTable.varCount('FIELD'))
            self.vmWriter.writeCall('Memory.alloc', 1)

            # pop return value of alloc() to THIS (effectively pointing it to start of allocated object memory)
            self.vmWriter.writePop('pointer', 0)

        elif subroutineKind == 'METHOD':
            # Set 'this' pointer by pushing first argument and popping to pointer 0
            self.vmWriter.writePush('argument', 0)
            self.vmWriter.writePop('pointer', 0)

        # subroutineBody: Statements
        self.compileStatements()

        # Symbol: }
        self.tokenizer.advance()

        ### END SUBROUTINE BODY ###

    def compileParameterList(self):
        # assume pointer is on keyword: type of first parameter OR symbol: ( if no parameters

        if self.tokenizer.rawToken() is not ')':
            run_once = True
            while self.tokenizer.rawToken() == ',' or run_once == True:

                if run_once == False:
                    # Symbol: ,
                    self.tokenizer.advance()

                # Keyword: type
                _type = self.tokenizer.keyWord()
                self.tokenizer.advance()

                # Identifier: varName
                # Declare in symboltable
                self.symbolTable.define(self.tokenizer.identifier(), _type,
                                        'ARGUMENT')
                self.tokenizer.advance()

                run_once = False

    def compileVarDec(self):
        # assume pointer is on keyword: var

        # Keyword: var
        self.tokenizer.advance()

        # Keyword: type | identifier (if class)
        try:
            _type = self.tokenizer.keyWord()
        except TokenTypeError:
            _type = self.tokenizer.identifier()
        finally:
            self.tokenizer.advance()

        # Identifier: varName
        # Define in symboltable - note that no actual VM code is required here
        self.symbolTable.define(self.tokenizer.identifier(), _type, 'LOCAL')
        self.tokenizer.advance()

        # Further varNames
        while self.tokenizer.symbol() == ',':
            # Symbol: ,
            self.tokenizer.advance()

            # Identifier: varName
            self.symbolTable.define(self.tokenizer.identifier(), _type,
                                    'LOCAL')
            self.tokenizer.advance()

        # Symbol: ;
        self.tokenizer.advance()

    def compileStatements(self):
        # assume token is keyword: let | if | while | do | return

        # note: each of the nested compile methods call tokenizer.advance() at the end,
        # so no need to call it here

        while self.tokenizer.rawToken() is not '}':
            if self.tokenizer.keyWord() == 'LET':
                self.compileLet()
            elif self.tokenizer.keyWord() == 'IF':
                self.compileIf()
            elif self.tokenizer.keyWord() == 'WHILE':
                self.compileWhile()
            elif self.tokenizer.keyWord() == 'DO':
                self.compileDo()
            elif self.tokenizer.keyWord() == 'RETURN':
                self.compileReturn()
            else:
                raise TokenTypeError('Statement keyword',
                                     self.tokenizer.tokenType(),
                                     self.tokenizer.rawToken(),
                                     self.tokenizer.lineNo)

    def compileSubroutineCall(self):

        # Identifier: subroutineName or (className | varName)

        # Check symboltable to see if this is an instantiated class object
        # If so, we need to retrieve the object type to be able to call the method
        if self.symbolTable.typeOf(self.tokenizer.identifier()):
            # This is a declared variable, so assume instantiated class object
            targetObject = self.tokenizer.identifier()
            subroutineName = self.symbolTable.typeOf(targetObject)
        else:
            # Not declared, assume we are calling it on the class directly
            subroutineName = self.tokenizer.identifier()
            targetObject = None
        self.tokenizer.advance()

        thisArg = 0

        # Symbol: . (indicating format of className.subroutineName) or ( (indicating format of subroutineName)
        if self.tokenizer.symbol() == ".":
            subroutineName += self.tokenizer.symbol()
            self.tokenizer.advance()

            # Identifier: subroutineName
            subroutineName += self.tokenizer.identifier()

            # Push object pointer (if it exists) to top of stack so that it is available for methods
            if targetObject is not None and self.symbolTable.kindOf(
                    targetObject):
                if self.symbolTable.kindOf(targetObject) == 'field':
                    self.vmWriter.writePush(
                        'this', self.symbolTable.indexOf(targetObject))
                else:
                    self.vmWriter.writePush(
                        self.symbolTable.kindOf(targetObject),
                        self.symbolTable.indexOf(targetObject))
                thisArg = 1
            self.tokenizer.advance()

        elif self.tokenizer.symbol() == '(':
            # We are calling a method from a method within the same class, so push the class pointer to stack for first arg
            self.vmWriter.writePush('pointer', 0)
            thisArg = 1

            # Also append className to start so that we have a complete vm function name
            subroutineName = self.className + '.' + subroutineName

        # Symbol: (
        self.tokenizer.advance()

        nArgs = self.compileExpressionList()

        # Symbol: )
        self.tokenizer.advance()

        # Write function call
        self.vmWriter.writeCall(subroutineName, nArgs + thisArg)

    def compileDo(self):

        # Keyword: Do
        self.tokenizer.advance()

        self.compileSubroutineCall()

        # Symbol: ;
        self.tokenizer.advance()

    def compileLet(self):

        # Keyword: let
        self.tokenizer.advance()

        # identifier: varName
        varName = self.tokenizer.identifier()
        self.tokenizer.advance()

        # index if applicable
        if self.tokenizer.symbol() == '[':

            # Symbol: [
            self.tokenizer.advance()

            # Expression
            self.compileExpression()

            # Symbol: ]
            self.tokenizer.advance()

        # Symbol: =
        self.tokenizer.advance()

        # Expression
        self.compileExpression()

        # Symbol: ;
        self.tokenizer.advance()

        # Write VM code - pop from top of stack to variable
        if self.symbolTable.kindOf(varName) == 'field':
            self.vmWriter.writePop('this', self.symbolTable.indexOf(varName))
        else:
            self.vmWriter.writePop(self.symbolTable.kindOf(varName),
                                   self.symbolTable.indexOf(varName))

    def compileWhile(self):

        # Get a new unique number
        uniqueNo = self.getUniqueNo()

        # Keyword: while
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # startWhile label
        self.vmWriter.writeLabel('startWhile' + uniqueNo)

        # Expression
        self.compileExpression()

        # Jump if expression is FALSE
        # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value)
        self.vmWriter.writePush('constant', 1)
        self.vmWriter.writeArithmetic('ADD')
        self.vmWriter.writeIf('endWhile' + uniqueNo)

        # Symbol: )
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # Statements
        self.compileStatements()

        # Jump to startWhile
        self.vmWriter.writeGoto('startWhile' + uniqueNo)

        # endWhile label
        self.vmWriter.writeLabel('endWhile' + uniqueNo)

        # Symbol: }
        self.tokenizer.advance()

    def compileReturn(self):

        # Keyword: return
        self.tokenizer.advance()

        # Symbol: ; or expression then ;
        if self.tokenizer.rawToken() is not ';':
            self.compileExpression()
        else:
            # No return value - push constant 0
            self.vmWriter.writePush('constant', 0)

        self.tokenizer.advance()

        # Write return
        self.vmWriter.writeReturn()

    def compileIf(self):

        # Get new unique no
        uniqueNo = self.getUniqueNo()

        # Keyword: if
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # Expression
        self.compileExpression()

        # Jump if expression is FALSE
        # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value)
        self.vmWriter.writePush('constant', 1)
        self.vmWriter.writeArithmetic('ADD')
        self.vmWriter.writeIf('startElse' + uniqueNo)

        # Symbol: )
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # Statements
        self.compileStatements()

        # Symbol: }
        self.tokenizer.advance()

        self.vmWriter.writeGoto('endIf' + uniqueNo)

        self.vmWriter.writeLabel('startElse' + uniqueNo)

        try:
            if self.tokenizer.keyWord() == 'ELSE':

                # keyword: else
                self.tokenizer.advance()

                # symbol: {
                self.tokenizer.advance()

                # Compile statements
                self.compileStatements()

                # symbol: }
                self.tokenizer.advance()
        except TokenTypeError:
            pass

        self.vmWriter.writeLabel('endIf' + uniqueNo)

    def compileExpression(self):
        # Term
        self.compileTerm()

        while self.tokenizer.symbol() in CompilationEngine.op:

            # Symbol: op
            # Save for writing later
            op = self.tokenizer.symbol()
            self.tokenizer.advance()

            # Term
            self.compileTerm()

            # Write op
            if op == '+':
                self.vmWriter.writeArithmetic('ADD')
            elif op == '-':
                self.vmWriter.writeArithmetic('SUB')
            elif op == '=':
                self.vmWriter.writeArithmetic('EQ')
            elif op == '>':
                self.vmWriter.writeArithmetic('GT')
            elif op == '<':
                self.vmWriter.writeArithmetic('LT')
            elif op == '&':
                self.vmWriter.writeArithmetic('AND')
            elif op == '|':
                self.vmWriter.writeArithmetic('OR')
            elif op == '~':
                self.vmWriter.writeArithmetic('NOT')
            elif op == '*':
                self.vmWriter.writeCall('Math.multiply', 2)

    def compileTerm(self):

        tokenType = self.tokenizer.tokenType()

        if tokenType == 'INT_CONST':

            # Integer constant
            self.vmWriter.writePush('constant', self.tokenizer.intVal())
            self.tokenizer.advance()

        elif tokenType == 'STRING_CONST':

            # String constant
            string = self.tokenizer.stringVal()

            # Create empty string object of required length and store it in pointer 1 (that)
            length = len(string)
            self.vmWriter.writePush('constant', length)
            self.vmWriter.writeCall('String.new', 1)
            self.vmWriter.writePop('pointer', 1)

            # Append each char in the string
            for i in range(0, length - 1):
                ascii_value = ord(string[i])
                self.vmWriter.writePush('pointer', 1)
                self.vmWriter.writePush('constant', ascii_value)
                self.vmWriter.writeCall('String.appendChar', 2)

            # No need to return the pointer because it is already stored in pointer 1

            # Next token
            self.tokenizer.advance()

        elif tokenType == 'KEYWORD':

            # Keyword constant (true | false | null | this)     ########## NB: LET LOOP = TRUE; IS NOT PUSHING -1 TO STACK
            if self.tokenizer.keyWord() == 'TRUE':
                self.vmWriter.writePush('constant', 1)
                self.vmWriter.writeArithmetic('NEG')

            elif self.tokenizer.keyWord() == 'FALSE' or self.tokenizer.keyWord(
            ) == 'NULL':
                self.vmWriter.writePush('constant', 0)

            elif self.tokenizer.keyWord() == 'THIS':
                self.vmWriter.writePush('pointer', 0)

            self.tokenizer.advance()

        elif tokenType == 'IDENTIFIER':
            # varName | varName[expression] | subroutineCall

            # Symbol: [ | ( | .
            if self.tokenizer.lookAhead() == '[':
                # varName[expression]

                # Identifier: varName
                self.subTagIdentifier(
                    self.tokenizer.identifier(), 'VAR', 'FALSE',
                    self.symbolTable.kindOf(self.tokenizer.identifier()),
                    self.symbolTable.indexOf(self.tokenizer.identifier()))
                self.tokenizer.advance()

                # Symbol: [
                self.subTag('symbol')
                self.tokenizer.advance()

                # Expression
                self.compileExpression()

                # Symbol: ]
                self.subTag('symbol')
                self.tokenizer.advance()

            elif self.tokenizer.lookAhead() == '(' or self.tokenizer.lookAhead(
            ) == '.':
                # subroutine call
                self.compileSubroutineCall()

            else:
                # Identifier: varName
                # Retrieve segment and index from symboltable and push to top of stack
                varName = self.tokenizer.identifier()
                if self.symbolTable.kindOf(varName) == 'field':
                    self.vmWriter.writePush('this',
                                            self.symbolTable.indexOf(varName))
                else:
                    self.vmWriter.writePush(self.symbolTable.kindOf(varName),
                                            self.symbolTable.indexOf(varName))
                self.tokenizer.advance()

        elif self.tokenizer.symbol() == '(':

            # ( Expression )

            # Symbol: (
            self.tokenizer.advance()

            # Expression
            self.compileExpression()

            # Symbol: )
            self.tokenizer.advance()

        elif self.tokenizer.symbol() in ['-', '~']:

            # Symbol: unaryop
            op = self.tokenizer.symbol()
            self.tokenizer.advance()

            # Term
            self.compileTerm()

            # Write op
            if op == '-':
                self.vmWriter.writeArithmetic('NEG')
            elif op == '~':
                self.vmWriter.writeArithmetic('NOT')

    def compileExpressionList(self):

        nArgs = 0

        # Expression list may be empty, check
        if self.tokenizer.rawToken() is not ')':

            # Expression
            self.compileExpression()
            nArgs += 1

            # Further comma delimited expressions
            while self.tokenizer.rawToken() == ',':
                # Symbol: ,
                self.tokenizer.advance()

                # Expression
                self.compileExpression()
                nArgs += 1

        return nArgs
class CompilationEngine(object):
    """This class recursively compiles a .jack file into (eventually) vm code.
    For now, this just outputs a grammar xml file"""
    def __init__(self, inFile):
        super(CompilationEngine, self).__init__()
        # create an internal tokenizer to iterate through
        self.tokenizer = JackTokenizer(inFile)

        # spacing so I can make nicely formatted xml, this will increase by
        # 4 spaces every time I recurse
        self.spacing = ""

        # setup the output file
        self.outputPath = inFile.name.replace(".jack", ".xml")
        self.outputFile = open(self.outputPath, 'w')
        self.outputFile.close()
        self.outputFile = open(self.outputPath, 'a')

    def __increaseSpacing(self):
        """Adds 2 spaces to self.spacing"""
        self.spacing += "  "

    def __decreaseSpacing(self):
        """Removes 2 spaces from self.spacing"""
        self.spacing = self.spacing[:-2]

    def __writeFullTag(self, tag, value):
        """Writes the spacing, then <tag> value </tag> to the output file"""
        self.outputFile.write(self.spacing + "<" + tag + "> " + value + " </" +
                              tag + ">\n")

    def __writeOpenTag(self, tag):
        """Writes spacing, then <tag>, then increases the spacing"""
        self.outputFile.write(self.spacing + "<" + tag + ">\n")
        self.__increaseSpacing()

    def __writeCloseTag(self, tag):
        """Decreases spacing, then writes spacing, then </tag>"""
        self.__decreaseSpacing()
        self.outputFile.write(self.spacing + "</" + tag + ">\n")

    def start(self):
        """Starts the compilation by creating the token XML file
        and then calling __compileClass()"""

        # start the tokenizer
        self.tokenizer.advance()

        # make token xml file
        self.__createTokenXML()

        # reset tokenizer and compile
        self.tokenizer.reset()
        self.tokenizer.advance()
        self.__compileClass()

    def __createTokenXML(self):
        """Creates the token XML file for a .jack file"""
        outputPath = self.outputPath.replace(".xml", "T.xml")
        f = open(outputPath, 'w')
        f.close()
        f = open(outputPath, 'a')
        f.write("<tokens>\n")
        # make an output file that is filename but with testXML.xml at end
        while self.tokenizer.hasMoreTokens():
            # output to xml to check
            tokenType = self.tokenizer.tokenType()
            if tokenType == "KEYWORD":
                f.write("<keyword>" + self.tokenizer.keyWord() +
                        "</keyword>\n")
            elif tokenType == "SYMBOL":
                symbol = self.tokenizer.symbol()
                symbol = symbol.replace("&", "&amp;")
                symbol = symbol.replace("<", "&lt;")
                symbol = symbol.replace(">", "&gt;")
                symbol = symbol.replace("\"", "&quot;")
                f.write("<symbol>" + symbol + "</symbol>\n")
            elif tokenType == "IDENTIFIER":
                f.write("<identifier>" + self.tokenizer.identifier() +
                        "</identifier>\n")
            elif tokenType == "INT_CONST":
                f.write("<integerConstant>" + self.tokenizer.intVal() +
                        "</integerConstant>\n")
            elif tokenType == "STRING_CONST":
                f.write("<stringConstant>" + self.tokenizer.stringVal() +
                        "</stringConstant>\n")

            self.tokenizer.advance()

        # close the xml tag
        f.write("</tokens>")

    def __compileType(self):
        """Compiles a complete jack type grammar. Returns false if there is an error"""
        # check for valid keyword
        if self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()
            if k not in ["int", "char", "boolean"]:
                print("Error: type keyword must be int, char, or boolean")
                return False
            self.__writeFullTag("keyword", k)
            self.tokenizer.advance()
            return True
        # check for className
        else:
            res = self.__compileClassName()
            # if __compileClassName() errors, this is not a valid type
            if not res:
                print("Error: type not a valid className")
            return res

    def __compileClassName(self):
        """Compiles a complete jack className grammar. Returns false if there is
        an error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False
        self.__writeFullTag("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        return True

    def __compileSubroutineName(self):
        """Compiles a complete jack subroutineName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False
        self.__writeFullTag("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        return True

    def __compileVarName(self):
        """Compiles a complete jack varName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False
        self.__writeFullTag("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        return True

    def __compileClass(self):
        """Compiles a complete jack class grammar"""
        # find the class keyword
        if self.tokenizer.tokenType() != "KEYWORD" or \
                self.tokenizer.keyWord() != "class":
            print("Error: no class declaration found")
            sys.exit(1)
        # write both the class tag and the keyword tag for class
        self.__writeOpenTag("class")
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # find the className
        if not self.__compileClassName():
            print("Error: no class name found in class declaration")
            sys.exit(1)

        # find the open curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: no opening brace found after class")
            sys.exit(0)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile the classVarDecs
        while (self.tokenizer.tokenType() == "KEYWORD"
               and (self.tokenizer.keyWord() == "static"
                    or self.tokenizer.keyWord() == "field")):
            self.__compileClassVarDec()

        # compile the subroutines
        while (self.tokenizer.tokenType() == "KEYWORD"
               and (self.tokenizer.keyWord() == "constructor"
                    or self.tokenizer.keyWord() == "function"
                    or self.tokenizer.keyWord() == "method")):
            self.__compileSubroutineDec()

        # find last curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: no closing brace found after class definition")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close class tag
        self.__writeCloseTag("class")
        self.tokenizer.advance()

    def __compileClassVarDec(self):
        """Compiles a complete jack class variable declaration. This advances the
        tokenizer completely through the variable declaration"""
        # since we already checked to make sure this is valid, we can write
        # the tag here and either static or filed
        self.__writeOpenTag("classVarDec")
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # look for a valid type
        if not self.__compileType():
            sys.exit(1)

        # check for varName
        if not self.__compileVarName():
            print("Error: missing varName identifier in classVarDec")
            sys.exit(1)

        # check for comma then more varNames (possible not existing)
        while self.tokenizer.tokenType() == "SYMBOL" and \
                self.tokenizer.symbol() == ",":
            # write the comma
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # check for varName again
            if not self.__compileVarName():
                print("Error: missing varName identifier in classVarDec")
                sys.exit(1)

        # check for closing semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing semicolon after classVarDec")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close classVarDec tag
        self.__writeCloseTag("classVarDec")

    def __compileSubroutineDec(self):
        """Compiles a complete jack subroutine description. This advances the
        tokenizer completely through the subroutine declaration"""
        # write the opening tag
        self.__writeOpenTag("subroutineDec")
        # since we already checked for constructor/function/method, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # look for void or type
        if self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() == "void":
            # if void, write it
            self.__writeFullTag("keyword", self.tokenizer.keyWord())
            self.tokenizer.advance()
        elif not self.__compileType():
            print("Error: subroutine return type not void or valid type")
            sys.exit(1)

        # check for subroutineName
        if not self.__compileSubroutineName():
            print("Error: missing subroutineName in subroutineDec")
            sys.exit(1)

        # check for open parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "(":
            print("Error: missing ( for parameter list")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # do parameter list (this could add nothing)
        self.__compileParameterList()

        # check for closing parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ")":
            print("Error: missing ) for parameter list")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile subroutine body
        self.__compileSubroutineBody()

        # close subroutineDec tag
        self.__writeCloseTag("subroutineDec")

    def __compileParameterList(self):
        """Compiles a complete jack parameter list grammar"""
        # write opening tag
        self.__writeOpenTag("parameterList")

        # if the next symbol is a ), then there is no parameter list, so just return
        # the rest of compileSubroutine will handle writing that
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ")":
            # close tag
            self.__writeCloseTag("parameterList")
            return
        # look for a valid type
        else:
            res = self.__compileType()
            if not res:
                sys.exit(1)

            # check for varName
            if not self.__compileVarName():
                print("Error: missing varName identifier in parameterList")
                sys.exit(1)

            # check for comma separated list of type and varName
            while self.tokenizer.tokenType(
            ) == "SYMBOL" and self.tokenizer.symbol() == ",":
                # write the comma
                self.__writeFullTag("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()

                # look for a valid type
                if not self.__compileType():
                    sys.exit(1)

                # check for varName
                if not self.__compileVarName():
                    print("Error: missing varName identifier in parameterList")
                    sys.exit(1)

            # write closing tag
            self.__writeCloseTag("parameterList")

    def __compileSubroutineBody(self):
        """Compile a complete jack subroutine body grammar"""
        # write opening tag
        self.__writeOpenTag("subroutineBody")

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: missing { for subroutine body")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check to see if we need to compile varDec
        while self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() == "var":
            self.__compileVarDec()

        # compile statements
        self.__compileStatements()

        # check for closing }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: missing closing } for subroutine body")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close tag
        self.__writeCloseTag("subroutineBody")
        return

    def __compileVarDec(self):
        """Compiles a complete jack varDec grammar"""
        # write open tag
        self.__writeOpenTag("varDec")
        # since we already checked to make sure there is a var, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # check for type
        if not self.__compileType():
            sys.exit(1)

        # check for varName
        if not self.__compileVarName():
            print("Error: missing varName identifier in varDec")
            sys.exit(1)

        # check for comma separated list of type and varName
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ",":
            # write the comma
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # check for varName
            if not self.__compileVarName():
                print("Error: missing varName identifier in varDec")
                sys.exit(1)

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after varDec")
            sys.exit(1)
        # write ;
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close tag
        self.__writeCloseTag("varDec")

        return

    def __compileStatements(self):
        """Compiles a complete jack statements grammar"""
        # write statements tag
        self.__writeOpenTag("statements")

        # check for the keywords for all the statements
        while self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k == "let":
                self.__compileLetStatement()
            elif k == "if":
                self.__compileIfStatement()
            elif k == "while":
                self.__compileWhileStatement()
            elif k == "do":
                self.__compileDoStatement()
            elif k == "return":
                self.__compileReturnStatement()
            else:
                print("Error: invalid statment " + k)
                sys.exit(1)

        # close statements tag
        self.__writeCloseTag("statements")

    def __compileLetStatement(self):
        """Compiles a complete jack let statment grammar"""
        # write opening tag
        self.__writeOpenTag("letStatement")
        # since we already checked for the keyword let, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # look for varName
        if not self.__compileVarName():
            print("Error: missing varName for let statement")

        # check for [
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == "[":
            # write the bracket
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile expression
            self.__compileExpression()

            # write the closing bracket
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != "]":
                print("Error: missing closing ] in let statement")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

        # check for =
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "=":
            print("Error: missing = in let expression")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # look for ;
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after let statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("letStatement")

    def __compileIfStatement(self):
        """Compiles a complete jack if statement grammar"""
        # write opening tag
        self.__writeOpenTag("ifStatement")
        # since we already checked for if, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile more statements
        self.__compileStatements()

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check for else
        if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord(
        ) == "else":
            # write else
            self.__writeFullTag("keyword", self.tokenizer.keyWord())
            self.tokenizer.advance()

            # check for {
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != "{":
                print("Error: missing { for if statement")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile more statements
            self.__compileStatements()

            # check for }
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != "}":
                print("Error: missing } after if statement")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

        # close tag
        self.__writeCloseTag("ifStatement")

    def __compileWhileStatement(self):
        """Compiles a complete jack while statement grammar"""
        # write opening tag
        self.__writeOpenTag("whileStatement")
        # since we checked for while already, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile more statements
        self.__compileStatements()

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("whileStatement")

    def __compileDoStatement(self):
        """Compiles a complete jack do statement grammar"""
        # write opening tag
        self.__writeOpenTag("doStatement")
        # since we already checked for do, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # compile subroutine call
        self.__compileSubroutineCall()

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after do statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("doStatement")

    def __compileReturnStatement(self):
        """Compiles a complete jack return statement grammar"""
        # write opening tag
        self.__writeOpenTag("returnStatement")
        # since we checked for return already, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # if the next symbol isn't a symbol, it must be an expression
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            self.__compileExpression()

        # write ;, checking again to make sure after calling compile expression
        # that the next symbol is still a valid ;
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after return statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("returnStatement")

    def __convertOp(self, op):
        """Converts the operators that interfere with xml tags to their properly
        escaped versions"""
        op = op.replace("&", "&amp;")
        op = op.replace("<", "&lt;")
        op = op.replace(">", "&gt;")
        op = op.replace("\"", "&quot;")

        return op

    def __compileExpression(self):
        """Compiles a complete jack expression grammar"""
        # write opening tag
        self.__writeOpenTag("expression")

        # compile term
        self.__compileTerm()

        # check for op
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) in op:
            s = self.tokenizer.symbol()

            # write op
            self.__writeFullTag("symbol", self.__convertOp(s))
            self.tokenizer.advance()

            # compile another term
            self.__compileTerm()

        # close tag
        self.__writeCloseTag("expression")

    def __compileTerm(self):
        """Compiles a complete jack term grammar"""
        # write opening tag
        self.__writeOpenTag("term")

        # term logic
        # check for integerConstant
        if self.tokenizer.tokenType() == "INT_CONST":
            self.__writeFullTag("integerConstant", self.tokenizer.intVal())
            self.tokenizer.advance()
        # check for string constant
        elif self.tokenizer.tokenType() == "STRING_CONST":
            self.__writeFullTag("stringConstant", self.tokenizer.stringVal())
            self.tokenizer.advance()
        # check for keyword for KeywordConstant
        elif self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k not in KeyWordConstant:
                print("Error: invalid KeyWordConstant" + k + " in term")
                sys.exit(1)

            # write the keywordconstant
            self.__writeFullTag("keyword", k)
            self.tokenizer.advance()
        # check for symbol for either ( expression ) or unary op
        elif self.tokenizer.tokenType() == "SYMBOL":
            s = self.tokenizer.symbol()

            # ( expression )
            if s == "(":
                self.__writeFullTag("symbol", s)
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

                # check for )
                if self.tokenizer.tokenType(
                ) != "SYMBOL" or self.tokenizer.symbol() != ")":
                    print("Error: missing ) after expression in term")
                    sys.exit(1)
                self.__writeFullTag("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()
            # unaryOp term
            elif s in unaryOp:
                self.__writeFullTag("symbol", s)
                self.tokenizer.advance()

                # compile term
                self.__compileTerm()
            else:
                print("Error: invalid symbol " + s + " in term")
                sys.exit(1)
        # check for varName | varName [ expression ] | subroutineCall
        elif self.tokenizer.tokenType() == "IDENTIFIER":
            # advance the tokenizer one more step to check for [, (, or other
            self.tokenizer.advance()

            if self.tokenizer.tokenType() == "SYMBOL":
                s = self.tokenizer.symbol()

                # varName[expression]
                if s == "[":
                    # go back to varName
                    self.tokenizer.retreat()

                    # compile varName
                    if not self.__compileVarName():
                        print("Error: invalid varName in term")
                        sys.exit(1)

                    # write [
                    self.__writeFullTag("symbol", self.tokenizer.symbol())
                    self.tokenizer.advance()

                    # compile expression
                    self.__compileExpression()

                    # write ]
                    if self.tokenizer.tokenType(
                    ) != "SYMBOL" or self.tokenizer.symbol() != "]":
                        print("Error: missing ] after varName[expression]")
                        sys.exit(1)
                    self.__writeFullTag("symbol", self.tokenizer.symbol())
                    self.tokenizer.advance()
                # subroutineCall
                elif s == "(" or s == ".":
                    # go back to subroutineName
                    self.tokenizer.retreat()

                    # compile subroutineCall
                    self.__compileSubroutineCall()
                else:
                    # go back to varName
                    self.tokenizer.retreat()

                    # compile varName
                    if not self.__compileVarName():
                        print("Error: invalid varName in term")
                        sys.exit(1)
            else:
                # go back to varName
                self.tokenizer.retreat()

                # compile varName
                if not self.__compileVarName():
                    print("Error: invalid varName in term")
                    sys.exit(1)
        else:
            print("Error: invalid term")
            sys.exit(1)

        # close tag
        self.__writeCloseTag("term")

    def __compileSubroutineCall(self):
        """Compiles a complete jack subroutine call grammar"""
        # look ahead one token to see if it is a ( or a .
        self.tokenizer.advance()

        # subroutineName
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == "(":
            # go back to subroutine name
            self.tokenizer.retreat()

            # compile subroutine name
            if not self.__compileSubroutineName():
                print("Error: invalid subroutineName in subroutineCall")
                sys.exit(1)

            # check for (
            if self.tokenizer.tokenType(
            ) != "SYMBOL" and self.tokenizer.symbol() != "(":
                print(
                    "Error: missing ( in subroutineCall before expressionList")
                sys.exit(1)
            # write (
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile expression list
            self.__compileExpressionList()

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != ")":
                print(
                    "Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
        # className | varName
        elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ".":
            # go back to varName/className
            self.tokenizer.retreat()

            if self.tokenizer.tokenType() != "IDENTIFIER":
                print("Error: missing varName|className in subroutineCall")
            # Hacky, but className and varName both correspond to just an
            # identitifer, so I just call compileVarName to handle both
            if not self.__compileVarName():
                print("Error: invalid className or varName in subroutineCall")
                sys.exit(1)

            # check for .
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != ".":
                print("Error: missing . in subroutineCall")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile subroutineName
            if not self.__compileSubroutineName():
                print(
                    "Error: missing subroutineName after . in subroutineCall")
                sys.exit(1)

            # check for (
            if self.tokenizer.tokenType(
            ) != "SYMBOL" and self.tokenizer.symbol() != "(":
                print(
                    "Error: missing ( in subroutineCall before expressionList")
                sys.exit(1)
            # write (
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile expression list
            self.__compileExpressionList()

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != ")":
                print(
                    "Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
        else:
            print("Error: invalid subroutineCall")
            sys.exit(1)

    def __compileExpressionList(self):
        """Compiles a complete jack expression list grammar"""
        # write open tag
        self.__writeOpenTag("expressionList")

        # if the symbol is ), there is no expression list
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ")":
            # close tag
            self.__writeCloseTag("expressionList")
            return
        else:
            # compile expression
            self.__compileExpression()

            # loop until you dont see a comma
            while self.tokenizer.tokenType(
            ) == "SYMBOL" and self.tokenizer.symbol() == ",":
                # write ,
                self.__writeFullTag("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

            # write closing tag
            self.__writeCloseTag("expressionList")
class CompilationEngine:
    def __init__(self, inputFile, outputFile):
        self.XMLFile = open(outputFile, 'w')
        self.tokenizer = JackTokenizer(inputFile)
        self.CompileClass()

    def __writeToken(self, token, value):
        self.XMLFile.write("<" + token + "> " + value + " </" + token + ">\n")

    def CompileClass(self):
        self.XMLFile.write("<class>\n")
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()
        self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord(
        ) == "field":
            self.CompileClassVarDec()

        while self.tokenizer.keyWord(
        ) == "constructor" or self.tokenizer.keyWord(
        ) == "function" or self.tokenizer.keyWord() == "method":
            self.CompileSubroutine()

        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()
        self.XMLFile.write("</class>\n")

    def CompileClassVarDec(self):
        self.XMLFile.write("<classVarDec>\n")

        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()
        self.compileType()
        self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()

        # add the rest of var names, if there are
        while self.tokenizer.symbol() == ",":
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.__writeToken("identifier", self.tokenizer.identifier())
            self.tokenizer.advance()

        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()
        self.XMLFile.write("</classVarDec>\n")

    def CompileSubroutine(self):
        self.XMLFile.write("<subroutineDec>\n")

        # constructor | function | method
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # void | type
        self.compileType()

        # subrotineName
        self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()

        # ( parameterList )
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()
        self.compileParameterList()
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # subrotineBody
        self.compileSubroutineBody()

        self.XMLFile.write("</subroutineDec>\n")

    def compileParameterList(self):
        self.XMLFile.write("<parameterList>\n")
        if self.tokenizer.tokenType() != 1:

            # type varName
            self.compileType()
            self.__writeToken("identifier", self.tokenizer.identifier())
            self.tokenizer.advance()

            # (, type varName)*
            while self.tokenizer.symbol() == ",":
                self.__writeToken("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()
                self.compileType()
                self.__writeToken("identifier", self.tokenizer.identifier())
                self.tokenizer.advance()

        self.XMLFile.write("</parameterList>\n")

    def compileSubroutineBody(self):
        self.XMLFile.write("<subroutineBody>\n")
        # {
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # varDec*
        while self.tokenizer.keyWord() == "var":
            self.compileVarDec()

        # statements
        self.compileStatements()

        # }
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        self.XMLFile.write("</subroutineBody>\n")

    def compileVarDec(self):
        self.XMLFile.write("<varDec>\n")

        # var
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # type
        self.compileType()

        # varName
        self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()

        # (, varName)*
        while self.tokenizer.symbol() == ",":
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.__writeToken("identifier", self.tokenizer.identifier())
            self.tokenizer.advance()

        # ;
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        self.XMLFile.write("</varDec>\n")

    def compileStatements(self):
        self.XMLFile.write("<statements>\n")
        while self.tokenizer.tokenType() == 0:
            if self.tokenizer.keyWord() == "let":
                self.compileLet()
            elif self.tokenizer.keyWord() == "if":
                self.compileIf()
            elif self.tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self.tokenizer.keyWord() == "do":
                self.compileDo()
            elif self.tokenizer.keyWord() == "return":
                self.compileReturn()
        self.XMLFile.write("</statements>\n")

    def compileDo(self):
        self.XMLFile.write("<doStatement>\n")

        # do
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        self.compileSubRoutineCall()

        # ;
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        self.XMLFile.write("</doStatement>\n")

    def compileLet(self):
        self.XMLFile.write("<letStatement>\n")

        # let
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # varName
        self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()

        # ([ expression ])?
        if self.tokenizer.symbol() == "[":
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileExpression()
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

        # =
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # expression
        self.CompileExpression()

        # ;
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        self.XMLFile.write("</letStatement>\n")

    def compileWhile(self):
        self.XMLFile.write("<whileStatement>\n")

        # while
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # ( expression )
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()
        self.CompileExpression()
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # {
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # statements
        self.compileStatements()

        # }
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        self.XMLFile.write("</whileStatement>\n")

    def compileReturn(self):
        self.XMLFile.write("<returnStatement>\n")

        # return
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # expression?
        # if (self.tokenizer.tokenType() != 1 and self.tokenizer.symbol() != ";") \
        #         or (self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == "-" or self.tokenizer.symbol() == "~")):
        if self.isTerm():
            self.CompileExpression()

        # ;
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        self.XMLFile.write("</returnStatement>\n")

    def compileIf(self):
        self.XMLFile.write("<ifStatement>\n")
        #if
        self.__writeToken("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()
        # ( expression )
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()
        self.CompileExpression()
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # { statements }
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()
        self.compileStatements()
        self.__writeToken("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord(
        ) == "else":
            # else
            self.__writeToken("keyword", self.tokenizer.keyWord())
            self.tokenizer.advance()

            # { statements }
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.compileStatements()
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

        self.XMLFile.write("</ifStatement>\n")

    def CompileExpression(self):
        self.XMLFile.write("<expression>\n")
        #term
        self.CompileTerm()
        # (op term)*
        while self.tokenizer.tokenType() == 1 and self.tokenizer.symbol(
        ) in operators:
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileTerm()

        self.XMLFile.write("</expression>\n")

    def CompileTerm(self):
        self.XMLFile.write("<term>\n")
        if self.tokenizer.tokenType() == 3:
            self.__writeToken("integerConstant", self.tokenizer.intVal())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 4:
            self.__writeToken("stringConstant", self.tokenizer.stringVal())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 0:
            self.__writeToken("keyword", self.tokenizer.keyWord())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 2:

            if self.tokenizer.tokens[self.tokenizer.currentToken + 1] == '[':
                self.__writeToken("identifier", self.tokenizer.identifier())
                self.tokenizer.advance()
                # [ expression ]
                self.__writeToken("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()
                self.CompileExpression()
                self.__writeToken("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()

            elif self.tokenizer.tokens[self.tokenizer.currentToken +
                                       1] == '(' or self.tokenizer.tokens[
                                           self.tokenizer.currentToken +
                                           1] == '.':
                self.compileSubRoutineCall()

            else:
                self.__writeToken("identifier", self.tokenizer.identifier())
                self.tokenizer.advance()
        elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol(
        ) == '(':
            # ( expression )
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileExpression()
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
        else:
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileTerm()

        self.XMLFile.write("</term>\n")

    def compileSubRoutineCall(self):
        # subroutineName  | (className | varName)
        self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()

        if self.tokenizer.symbol() == '(':
            # ( expressionList )
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileExpressionList()
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
        else:
            # .
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # subroutineName
            self.__writeToken("identifier", self.tokenizer.identifier())
            self.tokenizer.advance()

            # ( expressionList )
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileExpressionList()
            self.__writeToken("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

    def CompileExpressionList(self):
        self.XMLFile.write("<expressionList>\n")
        # (expression
        if self.isTerm():
            # (, expression)
            self.CompileExpression()
            while self.tokenizer.symbol() == ',':
                self.__writeToken("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()
                self.CompileExpression()
        self.XMLFile.write("</expressionList>\n")

    def isTerm(self):
        if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4:
            return True
        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord(
        ) in keyword_const:
            return True
        if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(':
            return True
        if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-'
                                                or self.tokenizer.symbol()
                                                == '~'):
            return True
        if self.tokenizer.tokenType() == 2:
            return True
        return False

    def compileType(self):
        if self.tokenizer.tokenType() == 0:
            self.__writeToken("keyword", self.tokenizer.keyWord())
        else:
            self.__writeToken("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
class CompilationEngine:
    """
    effects the compilation engine
    """
    def __init__(self, input_file_path, output_path):
        """

        :param fileToRead:
        """
        self._indentation = 0
        self._tokenizer = JackTokenizer(input_file_path)
        self._output = open(output_path, "w+")

    def compileClass(self):
        if self._tokenizer.hasMoreTokens():
            self._tokenizer.advance()
            self._output.write("<class>\n")
            self._indentation += 1

            self._write_keyword()

            self._tokenizer.advance()
            self._write_identifier()

            self._tokenizer.advance()
            self._write_symbol()

            self._tokenizer.advance()
            while self._tokenizer.keyWord() == "static" or \
                    self._tokenizer.keyWord() == "field":
                self.compileClassVarDec()
            while self._tokenizer.keyWord() == "constructor" or \
                    self._tokenizer.keyWord() == "function" \
                    or self._tokenizer.keyWord() == "method":
                self.compileSubroutine()

            self._write_symbol()

            self._indentation -= 1
            self._output.write("</class>\n")
            self._output.close()

    def compileClassVarDec(self):
        """
        this should only print if there actually are class var decs,
        should run on the recursively
        :return:
        """
        self._output.write("  " * self._indentation + "<classVarDec>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._compile_type_and_varName()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</classVarDec>\n")

    def compileSubroutine(self):
        self._output.write("  " * self._indentation + "<subroutineDec>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        if self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            self._write_keyword()
        elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
            self._write_identifier()

        self._tokenizer.advance()
        self._write_identifier()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileParameterList()

        self._write_symbol()

        self._tokenizer.advance()
        # compile subroutineBody:
        self._output.write("  " * self._indentation + "<subroutineBody>\n")
        self._indentation += 1
        self._write_symbol()

        self._tokenizer.advance()
        while self._tokenizer.keyWord() == "var":
            self.compileVarDec()

        self.compileStatements()

        self._write_symbol()
        self._indentation -= 1
        self._output.write("  " * self._indentation + "</subroutineBody>\n")
        self._indentation -= 1
        self._output.write("  " * self._indentation + "</subroutineDec>\n")
        self._tokenizer.advance()

    def compileParameterList(self):
        self._output.write("  " * self._indentation + "<parameterList>\n")
        self._indentation += 1
        while self._tokenizer.tokenType() != self._tokenizer.SYMBOL:
            if self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
                self._write_keyword()
            elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
                self._write_identifier()
            self._tokenizer.advance()
            self._write_identifier()
            self._tokenizer.advance()
            if self._tokenizer.symbol() == ",":
                self._write_symbol()
                self._tokenizer.advance()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</parameterList>\n")

    def compileVarDec(self):
        self._output.write("  " * self._indentation + "<varDec>\n")
        self._indentation += 1

        self._write_keyword()
        self._tokenizer.advance()
        self._compile_type_and_varName()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</varDec>\n")

    def compileStatements(self):
        self._output.write("  " * self._indentation + "<statements>\n")
        self._indentation += 1
        while self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            if self._tokenizer.keyWord() == "let":
                self.compileLet()
            elif self._tokenizer.keyWord() == "if":
                self.compileIf()
            elif self._tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self._tokenizer.keyWord() == "do":
                self.compileDo()
            elif self._tokenizer.keyWord() == "return":
                self.compileReturn()
        self._indentation -= 1
        self._output.write("  " * self._indentation + "</statements>\n")

    def compileDo(self):
        self._output.write("  " * self._indentation + "<doStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        #subroutineCall
        self._write_identifier()
        self._tokenizer.advance()
        if self._tokenizer.symbol() == ".":
            self._write_symbol()
            self._tokenizer.advance()
            self._write_identifier()
            self._tokenizer.advance()

        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpressionList()

        self._write_symbol()

        self._tokenizer.advance()
        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</doStatement>\n")
        self._tokenizer.advance()

    def compileLet(self):
        self._output.write("  " * self._indentation + "<letStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._write_identifier()

        self._tokenizer.advance()
        if self._tokenizer.symbol() == "[":
            self._write_symbol()
            self._tokenizer.advance()
            self.compileExpression()
            self._write_symbol()
            self._tokenizer.advance()

        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpression()
        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</letStatement>\n")
        self._tokenizer.advance()

    def compileWhile(self):
        self._output.write("  " * self._indentation + "<whileStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpression()

        self._write_symbol()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileStatements()

        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</whileStatement>\n")
        self._tokenizer.advance()

    def compileReturn(self):
        self._output.write("  " * self._indentation + "<returnStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \
                self._tokenizer.symbol() != ";":
            self.compileExpression()

        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</returnStatement>\n")
        self._tokenizer.advance()

    def compileIf(self):
        self._output.write("  " * self._indentation + "<ifStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpression()

        self._write_symbol()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileStatements()

        self._write_symbol()

        self._tokenizer.advance()
        if self._tokenizer.tokenType() == self._tokenizer.KEYWORD and \
                self._tokenizer.keyWord() == "else":
            self._write_keyword()

            self._tokenizer.advance()
            self._write_symbol()

            self._tokenizer.advance()
            self.compileStatements()

            self._write_symbol()
            self._tokenizer.advance()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</ifStatement>\n")

    def compileExpression(self):
        """
        Note that tokenizer must be advanced before this is called!!!
        :return:
        """
        self._output.write("  " * self._indentation + "<expression>\n")
        self._indentation += 1

        self.compileTerm()
        while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \
                self._tokenizer.symbol() in OP_LIST:
            self._write_symbol()
            self._tokenizer.advance()
            self.compileTerm()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</expression>\n")

    def compileTerm(self):
        # debugging - not finished!!
        sanity_check = True
        self._output.write("  " * self._indentation + "<term>\n")
        self._indentation += 1
        if self._tokenizer.tokenType() == self._tokenizer.INT_CONST:
            self._write_int_const()
        elif self._tokenizer.tokenType() == self._tokenizer.STRING_CONST:
            self._write_str_const()
        elif self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            self._write_keyword()
        elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
            self._write_identifier()

            self._tokenizer.advance()
            sanity_check = False
            if self._tokenizer.symbol() == "[":
                sanity_check = True
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpression()
                self._write_symbol()
            elif self._tokenizer.symbol() == ".":  ## subroutine case
                sanity_check = True
                self._write_symbol()
                self._tokenizer.advance()
                self._write_identifier()
                self._tokenizer.advance()
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpressionList()
                self._write_symbol()
            elif self._tokenizer.symbol() == "(":
                sanity_check = True
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpressionList()
                self._write_symbol()

        elif self._tokenizer.symbol() == "(":
            self._write_symbol()
            self._tokenizer.advance()
            self.compileExpression()
            self._write_symbol()
        elif self._tokenizer.symbol() == "~" or self._tokenizer.symbol() == \
                "-":
            self._write_symbol()
            self._tokenizer.advance()
            self.compileTerm()
            sanity_check = False

        if sanity_check:
            self._tokenizer.advance()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</term>\n")

    def compileExpressionList(self):
        self._output.write("  " * self._indentation + "<expressionList>\n")
        self._indentation += 1

        if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \
                self._tokenizer.symbol() != ")":
            self.compileExpression()
            while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \
                    self._tokenizer.symbol() == ",":
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpression()
        if self._tokenizer.symbol() == "(":
            self.compileExpression()
            while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \
                    self._tokenizer.symbol() == ",":
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpression()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</expressionList>\n")

    def _compile_type_and_varName(self):
        if self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            self._write_keyword()
        elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
            self._write_identifier()
        self._tokenizer.advance()
        self._write_identifier()
        self._tokenizer.advance()
        while self._tokenizer.symbol() == ",":
            self._write_symbol()
            self._tokenizer.advance()
            self._write_identifier()
            self._tokenizer.advance()
        self._write_symbol()
        self._tokenizer.advance()

    def _write_identifier(self):
        self._output.write("  " * self._indentation + "<identifier> " +
                           self._tokenizer.identifier() + " </identifier>\n")

    def _write_keyword(self):
        self._output.write("  " * self._indentation + "<keyword> " +
                           self._tokenizer.keyWord() + " </keyword>\n")

    def _write_symbol(self):
        string_to_write = self._tokenizer.symbol()
        if self._tokenizer.symbol() == "<":
            string_to_write = "&lt"
        elif self._tokenizer.symbol() == ">":
            string_to_write = "&gt"
        elif self._tokenizer.symbol() == "&":
            string_to_write = "&amp"
        self._output.write("  " * self._indentation + "<symbol> " +
                           string_to_write + " </symbol>\n")

    def _write_int_const(self):
        self._output.write("  " * self._indentation + "<integerConstant> " +
                           self._tokenizer.identifier() +
                           " </integerConstant>\n")

    def _write_str_const(self):
        self._output.write("  " * self._indentation + "<stringConstant> " +
                           self._tokenizer.identifier() +
                           " </stringConstant>\n")