Exemple #1
0
def main():
    """Drives the Jack-to-VM translation process"""
    file_name = sys.argv[1]
    tokenizers = []
    output_files = []

    abs_path = os.path.abspath(file_name)

    if '.jack' in file_name and file_name[-5:] == '.jack':
        tokenizer = JackTokenizer(abs_path)
        tokenizers.append(tokenizer)
        output_path = os.path.splitext(abs_path)[0] + '.xml'
        output_files.append(output_path)
    else:
        for walk_obj in os.walk(abs_path):
            for jack_file in walk_obj[2]:
                if '.jack' in jack_file and jack_file[-5:] == '.jack':
                    tokenizer = JackTokenizer(abs_path + '/' + jack_file)
                    tokenizers.append(tokenizer)
                    output_path = abs_path + '/' + jack_file[:-5] + '.xml'
                    output_files.append(output_path)
    
    for tokenizer in tokenizers:
        while tokenizer.has_more_tokens():
            tokenizer.advance()
            token_type = tokenizer.token_type()

            if token_type == 'KEYWORD':
                keyword = tokenizer.keyword()
            elif token_type == 'SYMBOL':
                symbol = tokenizer.symbol()
            elif token_type == 'IDENTIFIER':
                identifier = tokenizer.identifier()
            elif token_type == 'INT_CONST':
                int_val = tokenizer.int_val()
            elif token_type == 'STRING_CONST':
                string_val = tokenizer.string_val()
    def test_advance(self):
        """Tests all parts of the tokenizer using this Jack code:

        /** Multi-line comment for
        some class. */
        class A{
          // Single-line comment
          let x = -4;
          do Output.printString("Ring Constants!");
        }

        """
        tokenizer = JackTokenizer("test.jack")
        tokenizer.advance()
        self.assertEqual(tokenizer.keyword(), CLASS)
        self.assertEqual(tokenizer.token_type(), KEYWORD)
        tokenizer.advance()
        self.assertEqual(tokenizer.identifier(), 'A')
        self.assertEqual(tokenizer.token_type(), IDENTIFIER)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), '{')
        self.assertEqual(tokenizer.token_type(), SYMBOL)

        tokenizer.advance()
        self.assertEqual(tokenizer.keyword(), LET)
        self.assertEqual(tokenizer.token_type(), KEYWORD)
        tokenizer.advance()
        self.assertEqual(tokenizer.identifier(), 'x')
        self.assertEqual(tokenizer.token_type(), IDENTIFIER)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), '=')
        self.assertEqual(tokenizer.token_type(), SYMBOL)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), '-')
        self.assertEqual(tokenizer.token_type(), SYMBOL)
        tokenizer.advance()
        self.assertEqual(tokenizer.int_val(), 4)
        self.assertEqual(tokenizer.token_type(), INT_CONST)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), ';')
        self.assertEqual(tokenizer.token_type(), SYMBOL)

        tokenizer.advance()
        self.assertEqual(tokenizer.keyword(), DO)
        self.assertEqual(tokenizer.token_type(), KEYWORD)
        tokenizer.advance()
        self.assertEqual(tokenizer.identifier(), 'Output')
        self.assertEqual(tokenizer.token_type(), IDENTIFIER)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), '.')
        self.assertEqual(tokenizer.token_type(), SYMBOL)
        tokenizer.advance()
        self.assertEqual(tokenizer.identifier(), 'printString')
        self.assertEqual(tokenizer.token_type(), IDENTIFIER)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), '(')
        self.assertEqual(tokenizer.token_type(), SYMBOL)
        tokenizer.advance()
        self.assertEqual(tokenizer.string_val(), 'Ring Constants!')
        self.assertEqual(tokenizer.token_type(), STRING_CONST)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), ')')
        self.assertEqual(tokenizer.token_type(), SYMBOL)
        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), ';')
        self.assertEqual(tokenizer.token_type(), SYMBOL)

        tokenizer.advance()
        self.assertEqual(tokenizer.symbol(), '}')
        self.assertEqual(tokenizer.token_type(), SYMBOL)
class CompilationEngine:
    ###############
    # CONSTRUCTOR #
    ###############

    def __init__(self, in_filename, in_file, out_xml, out_vm):
        """
        Creates a new compilation engine with the given input and output.
        The next routine called must be compileClass().
        :param in_file: Open source Jack file.
        :param out_xml: Open XML file.
        :param out_vm: Open VM file.
        """
        self.__in_filename = in_filename
        self.__in_file, self.__out_xml = in_file, out_xml
        self.__tokenizer = JackTokenizer(in_file)
        self.__symbolTable = SymbolTable()
        self.__vmWriter = VMWriter(in_filename, out_vm)
        self.__stack = list()
        self.__tokenizer.advance()
        self.__resetUniqueLabels()

    ###################
    # PRIVATE METHODS #
    ###################

    def __resetUniqueLabels(self):
        self.__unique_id_if = 0
        self.__unique_id_while = 0

    def __uniqueWhileLabels(self):
        """
        Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to
        prevent collisions with other labels carrying the same name.
        Example:
            while_exp, while_end = __uniqueWhileLabels()
            -->
            while_exp = "WHILE_EXP123"
            while_end = "WHILE_END123"
        """
        unique_labels = []
        for label in [WHILE_EXP, WHILE_END]:
            unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER,
                                                 self.__unique_id_while))
        self.__unique_id_while += 1
        return unique_labels

    def __uniqueIfLabels(self):
        """
        Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to
        prevent collisions with other labels carrying the same name.
        Example:
            if_true, if_false, if_end = __uniqueIfLabels()
            -->
            if_true = "IF_TRUE123"
            if_false = "IF_FALSE123"
            if_end = "IF_END123"
        """
        unique_labels = []
        for label in [IF_TRUE, IF_FALSE, IF_END]:
            unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER,
                                                 self.__unique_id_if))
        self.__unique_id_if += 1
        return unique_labels

    def __writeToken(self, token, token_type):
        """
        Writes the given token as an xml tag to the output.
        :param token:
        :param token_type:
        :return:
        """
        tag = self.__getIndentedTag("<{0}>{1}{2}{1}</{0}>\n".format(
            token_type, XML_DELIM_TERMINAL, token))
        self.__out_xml.write(tag)

    def __writeTokenAndAdvance(self, token, token_type):
        """
        Writes the given token as an xml tag to the output and extracts the
        next token from the code.
        :param token: token tag value
        :param token_type: token tag type
        """
        # Build XML tag
        self.__writeToken(token, token_type)
        self.__tokenizer.advance()

    def __getIndentedTag(self, tag):
        """
        Return the given tag with trailing tabs according to current
        indentation level.
        :param tag: tag to indent
        :return: tag indented with trailing tabs.
        """
        return XML_INDENT_CHAR * len(self.__stack) + tag

    def __openTag(self, tagName):
        """
        Open an XML tag with the given name.
        All following tags will be written as inner tags until __closeTag()
        is called.
        :param tagName: name of the tag to open
        """
        tag = self.__getIndentedTag("<{}>\n".format(tagName))
        self.__out_xml.write(tag)
        self.__stack.append(tagName)

    def __closeTag(self):
        """
        Close the current open XML tag.
        All following tags will be written as outer tags in the previous
        indentation level.
        """
        tagName = self.__stack.pop()
        tag = self.__getIndentedTag("</{}>\n".format(tagName))
        self.__out_xml.write(tag)

    def __compileKeyWord(self):
        """
        Compile a keyword token
        """
        keyword = self.__tokenizer.keyWord()
        self.__writeTokenAndAdvance(keyword, TOKEN_TYPE_KEYWORD)
        return keyword

    def __compileSymbol(self):
        """
        Compile a symbol token
        """
        symbol = self.__tokenizer.symbol()
        self.__writeTokenAndAdvance(symbol, TOKEN_TYPE_SYMBOL)
        return symbol

    def __compileIdentifier(self,
                            category,
                            status,
                            kind=KIND_NONE,
                            index=INDEX_NONE):
        """
        Compile an identifier token
        """

        info = "{} {}".format(category, status)
        if kind != KIND_NONE:
            info += " " + KIND_2_SEGMENT[kind]
        if index != INDEX_NONE:
            info += " " + str(index)
        info = "[{}] ".format(info)
        identifier = self.__tokenizer.identifier()
        self.__writeTokenAndAdvance(info + identifier, TOKEN_TYPE_IDENTIFIER)
        return identifier

    def __compileIntVal(self):
        """
        Compile an intVal token
        """
        intval = self.__tokenizer.intVal()
        self.__writeTokenAndAdvance(intval, TOKEN_TYPE_INTEGER)
        self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, intval)
        return intval

    def __compileStringVal(self):
        """
        Compile a stringVal token
        """
        string = self.__tokenizer.stringVal()
        self.__writeTokenAndAdvance(string, TOKEN_TYPE_STRING)

        corrected = self.__correctString(string)
        self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, len(corrected))
        self.__vmWriter.writeCall(OS_STRING_NEW, 1)
        for char in corrected:
            self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, ord(char))
            self.__vmWriter.writeCall(OS_STRING_APPEND_CHAR, 2)

    def __compileClassName(self, status):
        """
        Compiles a variable name.
        """
        return self.__compileIdentifier(CATEGORY_CLASS, status)

    def __compileSubroutineName(self, status):
        """
        Compiles a variable name.
        """
        return self.__compileIdentifier(CATEGORY_SUBROUTINE, status)

    def __compileSubroutineCall(self):
        """
        Compiles a subroutine call.
        Syntax:
        ( className | varName) '.' subroutineName '(' expressionList ')' |
        subroutineName '(' expressionList ')'
        """
        # Compile XML
        callName = ""
        exp_count = 0
        if self.__tokenizer.lookahead() == RE_DOT:  # className | varName
            # extract var\class name
            callName = self.__tokenizer.peek()
            # className or varName?
            kind = self.__symbolTable.kindOf(callName)
            if (kind != KIND_NONE):  # varName
                # Use class name instead of object name
                varName = callName
                callName = self.__symbolTable.typeOf(callName)
                # Push variable (this) and call class method
                index = self.__symbolTable.indexOf(varName)
                segment = self.__symbolTable.segmentOf(varName)
                self.__vmWriter.writePush(segment, index)
                # Include self as argument 0
                exp_count += 1
                self.__compileIdentifier(kind, STATUS_USE, kind, index)
            else:  # className
                self.__compileIdentifier(CATEGORY_CLASS, STATUS_USE)

            callName += self.__compileSymbol()  # '.'
        else:  # subroutineName
            # Subroutine -> className.Subroutine
            self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0)
            callName += self.__className + FUNC_NAME_DELIMITER
            exp_count += 1

        callName += self.__compileSubroutineName(STATUS_USE)
        self.__compileSymbol()  # '('
        exp_count += self.CompileExpressionList()  # expressionList
        self.__compileSymbol()  # ')'

        # Compile VM
        self.__vmWriter.writeCall(callName, exp_count)

    def __compileVarName(self, status):
        """
        Compiles a variable name.
        """
        name = self.__tokenizer.peek()
        index = INDEX_NONE
        if status != STATUS_DEFINE:
            index = self.__symbolTable.indexOf(name)
        varName = self.__compileIdentifier(CATEGORY_VAR, status, KIND_VAR,
                                           index)
        return varName

    def __compileType(self):
        """
        Compiles a type.
        Syntax:
        'int' | 'char' | 'boolean' | className
        """
        # 'int' | 'char' | 'boolean'
        if self.__tokenizer.peek() in {RE_INT, RE_CHAR, RE_BOOLEAN}:
            type = self.__compileKeyWord()
        # className
        else:
            type = self.__compileClassName(STATUS_USE)
        return type

    def __compileSubroutineBody(self, funcType, name):
        """
        Compiles a subroutine body.
        Syntax:
        '{' varDec* statements '}'
        """
        self.__openTag('subroutineBody')  # <subroutineBody>
        self.__compileSymbol()  #   '{'
        # varDec*
        while self.__tokenizer.peek() == RE_VAR:
            self.compileVarDec()  #   varDec*
        vars = self.__symbolTable.varCount(KIND_VAR)
        self.__vmWriter.writeFunction(name, vars)
        if funcType == RE_METHOD:
            # Hold self at pointer
            self.__vmWriter.writePush(VM_SEGMENT_ARGUMENT, 0)
            self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0)
        if funcType == RE_CONSTRUCTOR:
            # Allocate memory for all fields
            fields = self.__symbolTable.varCount(KIND_FIELD)
            self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, fields)
            self.__vmWriter.writeCall(OS_MEMORY_ALLOC, 1)
            # Hold allocated memory at pointer
            self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0)
        self.compileStatements()  #   statements
        self.__compileSymbol()  #   '}'
        self.__closeTag()  # </subroutineBody>
        return vars

    ##################
    # PUBLIC METHODS #
    ##################

    def compileClass(self):
        """
        Compiles a complete class.
        Syntax:
        'class' className '{' classVarDec* subroutineDec* '}'
        """
        self.__openTag('class')  # <class>
        self.__compileKeyWord()  #   'class'
        className = self.__compileClassName(  #   className
            STATUS_DEFINE)
        self.__className = className
        self.__compileSymbol()  #   '{'

        # classVarDec*
        while self.__tokenizer.peek() in {RE_STATIC, RE_FIELD}:
            self.CompileClassVarDec()

            # subroutineDec*
        while self.__tokenizer.peek() in {
                RE_CONSTRUCTOR, RE_FUNCTION, RE_METHOD
        }:
            self.CompileSubroutine()

        self.__compileSymbol()  #   '}'
        self.__closeTag()  # </class>

    def CompileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        Syntax:
        ('static' | 'field') type varName (',' varName)* ';'
        """
        self.__openTag('classVarDec')  # <classVarDec>
        kind = self.__compileKeyWord()  #   ('static' | 'field')
        type = self.__compileType()  #   type
        moreVars = True
        while moreVars:  #   (',' varName)*
            name = self.__compileVarName(  #   varName
                STATUS_DEFINE)
            self.__symbolTable.define(name, type, kind)
            if self.__tokenizer.peek() != RE_COMMA:
                moreVars = False
            else:
                self.__compileSymbol()  #   ','

        self.__compileSymbol()  #   ';'
        self.__closeTag()  # </classVarDec>

    def CompileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        Syntax:
        ('constructor' | 'function' | 'method') ('void' | type)
        subroutineName '(' parameterList ')' subroutineBody
        """
        # Start subroutine in symbol table
        self.__resetUniqueLabels()
        self.__symbolTable.startSubroutine()

        # Compile XML
        self.__openTag('subroutineDec')  # <subroutineDec>
        funcType = self.__compileKeyWord()  #   ('constructor' |
        #   'function' | 'method')
        if funcType in {RE_METHOD}:
            # +1 var count for this method (+1 for self)
            self.__symbolTable.define(VM_SELF, self.__className, KIND_ARG)
        if self.__tokenizer.peek() == RE_VOID:
            type = self.__compileKeyWord()  #   'void'
        else:
            type = self.__compileType()  #   type
        subName = self.__compileSubroutineName(  #   soubroutineName
            STATUS_DEFINE)
        name = self.__className + FUNC_NAME_DELIMITER + subName
        self.__compileSymbol()  #   '('
        self.compileParameterList()  #   parameterList
        self.__compileSymbol()  #   ')'
        self.__compileSubroutineBody(funcType, name)  #   subroutineBody
        self.__closeTag()  # </subroutineDec>

    def compileParameterList(self):
        """
        Compiles a (possibly empty) parameter list, not including the
        enclosing "()".
        Syntax:
        ( (type varName) (',' type varName)*)?
        """
        parameters = 0  # no parameters?
        self.__openTag('parameterList')  # <parameterList>
        if self.__tokenizer.peek() != RE_BRACKETS_RIGHT:
            moreVars = True
            while moreVars:
                parameters += 1  # yes parameters!
                type = self.__compileType()  #   type
                name = self.__compileVarName(  #   varName
                    STATUS_DEFINE)
                self.__symbolTable.define(name, type, KIND_ARG)
                if self.__tokenizer.peek() == RE_COMMA:
                    self.__compileSymbol()  # ','
                else:
                    moreVars = False
        self.__closeTag()  # </parametersList>
        return parameters

    def compileVarDec(self):
        """
        Compiles a var declaration.
        Syntax:
        'var' type varName (',' varName)* ';'
        """
        self.__openTag('varDec')  # <varDec>
        moreVars = True
        self.__compileKeyWord()  #   'var'
        type = self.__compileType()  #   type
        while moreVars:
            name = self.__tokenizer.peek()  #   varName
            self.__symbolTable.define(name, type, KIND_VAR)
            self.__compileVarName(STATUS_DEFINE)
            if self.__tokenizer.peek() == RE_COMMA:
                self.__compileSymbol()  #   ','
            else:
                moreVars = False
        self.__compileSymbol()  #   ';'
        self.__closeTag()  # </varDec>

    def compileStatements(self):
        """
        Compiles a sequence of statements, not including the enclosing "{}".
        Syntax:
        statement*
        where statement is in:
        letStatement | ifStatement | whileStatement | doStatement | returnStatement
        """
        self.__openTag('statements')  # <statements>
        statement = self.__tokenizer.peek()
        while statement in {
                RE_LET, RE_IF, RE_WHILE, RE_DO, RE_RETURN_NOTHING,
                RE_RETURN_SOMETHING
        }:
            if statement == RE_LET:
                self.compileLet()
            elif statement == RE_IF:
                self.compileIf()
            elif statement == RE_WHILE:
                self.compileWhile()
            elif statement == RE_DO:
                self.compileDo()
            elif statement == RE_RETURN_NOTHING:
                self.compileReturnNothing()
            elif statement == RE_RETURN_SOMETHING:
                self.compileReturnSomething()
            statement = self.__tokenizer.peek()
        self.__closeTag()  # </statements>

    def compileDo(self):
        """
        Compiles a do statement.
        Syntax:
        'do' subroutineCall ';'
        """
        self.__openTag('doStatement')  # <doStatement>
        self.__compileKeyWord()  #   'do'
        self.__compileSubroutineCall()  #   subroutineCall
        self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0)
        self.__compileSymbol()  #   ';'
        self.__closeTag()  # </doStatement>

    def compileLet(self):
        """
        Compiles a let statement.
        Syntax:
        'let' varName ('[' expression ']')? '=' expression ';'
        """
        isArray = False
        self.__openTag('letStatement')  # <letStatement>
        self.__compileKeyWord()  #   'let'
        varName = self.__tokenizer.peek()
        index = self.__symbolTable.indexOf(varName)
        segment = self.__symbolTable.segmentOf(varName)
        self.__compileVarName(STATUS_USE)  #   varName
        if self.__tokenizer.peek() == RE_BRACKETS_SQUARE_LEFT:
            isArray = True
            self.__compileSymbol()  #   '['
            self.CompileExpression()  # expression
            self.__compileSymbol()  #   ']'
            # Add the offset to the variable address
            self.__vmWriter.writePush(segment, index)
            self.__vmWriter.writeArithmetic(RE_PLUS, True)
            # Address of array element is at stack top
        self.__compileSymbol()  #   '='
        self.CompileExpression()  # expression
        self.__compileSymbol()  #   ';'
        self.__closeTag()  # </letStatement>

        if isArray:
            # Pop rh-expression to temp
            self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0)
            # Get address of array element
            self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1)
            # Push rh-expression to stack
            self.__vmWriter.writePush(VM_SEGMENT_TEMP, 0)
            # Pop rh-expression to address of element
            self.__vmWriter.writePop(VM_SEGMENT_THAT, 0)
        else:
            # Compile only if the varName was defined
            # (unlike class name of subroutine name)
            # if segment != KIND_NONE:  # varName was defined
            index = self.__symbolTable.indexOf(varName)
            self.__vmWriter.writePop(segment, index)

    def compileWhile(self):
        """
        Compiles a while statement.
        Syntax:
        'while' '(' expression ')' '{' statements '}'
        """
        LABEL_EXP, LABEL_END = self.__uniqueWhileLabels()

        self.__openTag('whileStatement')  # <whileStatement>
        self.__compileKeyWord()  #   'while'
        self.__compileSymbol()  #   '('
        self.__vmWriter.writeLabel(  # label WHILE_EXP
            LABEL_EXP)
        self.CompileExpression()  #   expression
        # Negate the expression
        # (jump out of while if *NOT* expression)
        self.__vmWriter.writeArithmetic(RE_TILDA, False)
        self.__compileSymbol()  #   ')'
        self.__vmWriter.writeIf(LABEL_END)  # if-goto WHILE_END
        self.__compileSymbol()  #   '{'
        self.compileStatements()  #   statements
        self.__compileSymbol()  #   '}'
        self.__vmWriter.writeGoto(LABEL_EXP)  # goto WHILE_EXP
        self.__vmWriter.writeLabel(LABEL_END)  # lable WHILE_END
        self.__closeTag()  # </whileStatement>

    def compileReturnNothing(self):
        """
        Compiles a 'return;' statement.
        Syntax:
        'return;'
        """
        # Compile XML
        self.__openTag('returnStatement')  # <returnStatement>
        self.__writeToken(
            'return',  #   'return'
            TOKEN_TYPE_KEYWORD)
        self.__writeTokenAndAdvance(
            ';',  #   ';'
            TOKEN_TYPE_SYMBOL)
        self.__vmWriter.writeReturn(True)
        self.__closeTag()  # </returnStatement>

    def compileReturnSomething(self):
        """
        Compiles a return statement.
        Syntax:
        'return' expression? ';'
        """
        # Compile XML
        self.__openTag('returnStatement')  # <returnStatement>
        self.__writeTokenAndAdvance(
            'return',  #   'return'
            TOKEN_TYPE_KEYWORD)
        self.CompileExpression()  #   expression
        self.__compileSymbol()  #   ';'
        self.__vmWriter.writeReturn()
        self.__closeTag()  # </returnStatement>

    def compileIf(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        Syntax:
        'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements
        '}' )?
        """
        LABEL_TRUE, LABEL_FALSE, LABEL_END = self.__uniqueIfLabels()

        self.__openTag('ifStatement')  # <ifStatement>
        self.__compileKeyWord()  #   'if'
        self.__compileSymbol()  #   '('
        # VM Code for computing ~(cond)
        self.CompileExpression()  #   expression
        self.__compileSymbol()  #   ')'
        self.__vmWriter.writeIf(LABEL_TRUE)  # if-goto LABEL_TRUE
        self.__vmWriter.writeGoto(LABEL_FALSE)  # goto LABEL_FALSE
        self.__vmWriter.writeLabel(LABEL_TRUE)  # label LABEL_TRUE
        self.__compileSymbol()  #   '{'
        # VM Code for executing TRUE
        self.compileStatements()  #   statements
        self.__compileSymbol()  #   '}'
        if self.__tokenizer.peek() == RE_ELSE:  #
            self.__vmWriter.writeGoto(LABEL_END)  # goto LABEL_END
            self.__vmWriter.writeLabel(  # label LABEL_FALSE
                LABEL_FALSE)
            self.__compileKeyWord()  #   'else'
            self.__compileSymbol()  #   '{'
            # VM Code for executing ELSE
            self.compileStatements()  #   statements
            self.__compileSymbol()  #   '}'
            self.__vmWriter.writeLabel(  # label END
                LABEL_END)
        else:
            self.__vmWriter.writeLabel(  # label FALSE
                LABEL_FALSE)
        self.__closeTag()  # </ifStatement>

    def CompileExpression(self):
        """
        Compiles an expression.
        Syntax:
        term (op term)*
        """
        self.__openTag('expression')  # <expression>
        self.CompileTerm()  # term
        while self.__tokenizer.peek() in {
                RE_PLUS, RE_BAR, RE_ASTERISK, RE_SLASH, RE_AMPERSAND, RE_VBAR,
                RE_LT, RE_GT, RE_EQ
        }:
            symbol = self.__compileSymbol()  # op
            self.CompileTerm()  # term
            self.__vmWriter.writeSymbol(symbol)
        self.__closeTag()  # </expression>

    def __correctString(self, string):
        """
        Convert escape characters in a string to valid chars
        :param string: string to correct
        :return: corrected strings with escaped characters corrected
        """
        correct = string.replace('\t', '\\t')
        correct = correct.replace('\n', '\\n')
        correct = correct.replace('\r', '\\r')
        return correct

    def CompileTerm(self):
        """
        Compiles a term.
        This routine is faced with a slight difficulty when trying to decide
        between some of the alternative parsing rules.
        Specifically, if the current token is an identifier, the routine
        must distinguish between a variable, an array entry, and a subroutine
        call. A single look-ahead token, which may be one
        of "[", "(", or "." suffices to distinguish between the three
        possibilities. Any other token is not part of this term and should
        not be advanced over.
        Syntax:
        integerConstant | stringConstant | keywordConstant | varName |
        varName '[' expression ']' | subroutineCall | '(' expression ')' |
        unaryOp term
        """
        self.__openTag('term')  # <term>
        lookahead = self.__tokenizer.lookahead()
        if self.__tokenizer.peek() == RE_BRACKETS_LEFT:
            self.__compileSymbol()  #   '('
            self.CompileExpression()  #   expression
            self.__compileSymbol()  #   ')'
        elif self.__tokenizer.peek() in {RE_TILDA, RE_BAR}:
            symbol = self.__compileSymbol()  #   unaryOp
            self.CompileTerm()  #   term
            self.__vmWriter.writeArithmetic(symbol, False)
        elif lookahead == RE_BRACKETS_SQUARE_LEFT:
            varName = self.__tokenizer.peek()
            self.__compileVarName(STATUS_USE)  #   varName
            self.__compileSymbol()  #   '['
            self.CompileExpression()  #   expression
            self.__compileSymbol()  #   ']'
            # Compile array indexing
            kind = self.__symbolTable.kindOf(varName)
            index = self.__symbolTable.indexOf(varName)
            segment = KIND_2_SEGMENT[kind]
            self.__vmWriter.writePush(segment, index)
            self.__vmWriter.writeArithmetic(RE_PLUS, True)
            self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1)
            self.__vmWriter.writePush(VM_SEGMENT_THAT, 0)
        elif lookahead in {RE_BRACKETS_LEFT, RE_DOT}:
            self.__compileSubroutineCall()  #   subroutineCall |
            # (varName | className) '.' subroutineCall
        else:
            if self.__tokenizer.tokenType() == TOKEN_TYPE_INTEGER:
                self.__compileIntVal()  #   integerConstant
            elif self.__tokenizer.tokenType() == TOKEN_TYPE_STRING:
                self.__compileStringVal()  #   stringConstant
            elif self.__tokenizer.tokenType() == TOKEN_TYPE_KEYWORD:
                # true | false | null | this
                # true | false | null - pushed to stack as constants
                keyword = self.__tokenizer.peek()
                if keyword in {RE_FALSE, RE_NULL, RE_TRUE}:
                    self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, 0)
                    if keyword == RE_TRUE:
                        self.__vmWriter.writeArithmetic(RE_TILDA, False)
                # this - pushes pointer
                elif keyword == RE_THIS:
                    self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0)
                self.__compileKeyWord()  #   keywordConstant
            elif self.__tokenizer.tokenType() == TOKEN_TYPE_IDENTIFIER:
                name = self.__tokenizer.peek()
                kind = self.__symbolTable.kindOf(name)
                index = self.__symbolTable.indexOf(name)
                segment = self.__symbolTable.segmentOf(name)
                self.__compileIdentifier(kind, STATUS_USE, kind, index)
                self.__vmWriter.writePush(segment, index)
        self.__closeTag()  # </term>

    def CompileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        Syntax:
        (expression (',' expression)* )?
        """
        exp_count = 0
        self.__openTag('expressionList')  # <expressionList>
        if self.__tokenizer.peek() != RE_BRACKETS_RIGHT:
            self.CompileExpression()
            exp_count += 1  #   expression
            while self.__tokenizer.peek() == RE_COMMA:
                self.__compileSymbol()  #   ','
                self.CompileExpression()
                exp_count += 1
        self.__closeTag()  # </expressionList>
        return exp_count
Exemple #4
0
class CompilationEngine:
    def __init__(self, input_path, output_path):
        """
        creates a new compilation engine with the given input and output. the next routine called must be compileClass()
        :param input_path: input stream/file
        :param output_path: output stream/file
        """
        self._root = None
        self._current_node = None
        self.tokenizer = JackTokenizer(input_path)
        self.CompileClass()
        for elem in self._root.iter():
            if elem.tag == 'expressionList' or elem.tag == 'parameterList':
                if "/>" in str(ET.tostring(elem)):
                    elem.text = '\n'
        p = ET.XMLParser(remove_blank_text=True)
        tree = ET.ElementTree(self._root, parser=p)
        tree.write(output_path, method='xml', pretty_print=True)

    def CompileClass(self):
        """
        Compiles a complete class.
        """
        self._root = ET.Element('class')
        self.tokenizer.advance()
        self._write_line(self._root, self.tokenizer.keyWord())
        self.tokenizer.advance()
        self._write_line(self._root, self.tokenizer.identifier())
        self.tokenizer.advance()
        self._write_line(self._root, self.tokenizer.symbol())
        self.CompileClassVarDec()
        self.CompileSubroutine()
        self.tokenizer.advance()
        self._write_line(self._root, self.tokenizer.symbol())

    def _write_line(self, node, name):
        """
        writes the current node to the output file
        :param name: the name of the node
        """
        _ = ET.SubElement(node, TYPES[self.tokenizer.tokenType()])
        _.text = ' ' + name + ' '

    def CompileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        """
        peek = self.tokenizer.peek()
        if 'static' in peek or 'field' in peek:
            _classVarNode = ET.SubElement(self._root, 'classVarDec')
        while 'static' in peek or 'field' in peek:
            self.tokenizer.advance()
            self._write_line(_classVarNode,
                             self.tokenizer.keyWord())  # field/static
            self.tokenizer.advance()
            self._write_line(_classVarNode, self.tokenizer.keyWord())  # type
            self.tokenizer.advance()
            self._write_line(_classVarNode,
                             self.tokenizer.identifier())  # name
            self.tokenizer.advance()
            while self.tokenizer.symbol() == ',':
                self._write_line(_classVarNode, self.tokenizer.symbol())  # ,
                self.tokenizer.advance()
                self._write_line(_classVarNode,
                                 self.tokenizer.identifier())  # name
                self.tokenizer.advance()
            self._write_line(_classVarNode, self.tokenizer.symbol())  # ;
            peek = self.tokenizer.peek()
            if 'static' in peek or 'field' in peek:
                _classVarNode = ET.SubElement(self._root, 'classVarDec')

    def CompileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        """
        _last_node = self._current_node
        _subroutineNode = ET.SubElement(self._root, 'subroutineDec')
        self._current_node = _subroutineNode
        peek = self.tokenizer.peek()
        while 'function' in peek or 'constructor' in peek or 'method' in peek:
            self.tokenizer.advance()
            self._write_line(_subroutineNode,
                             self.tokenizer.keyWord())  # const/func/method
            self.tokenizer.advance()
            self._write_line(_subroutineNode,
                             self.tokenizer.current_token)  # void/type
            self.tokenizer.advance()
            self._write_line(_subroutineNode,
                             self.tokenizer.identifier())  # name
            self.tokenizer.advance()
            self._write_line(_subroutineNode, self.tokenizer.symbol())  # '('
            self.CompileParameterList()
            self.tokenizer.advance()
            self._write_line(_subroutineNode, self.tokenizer.symbol())  # ')'
            self.tokenizer.advance()
            self._current_node = ET.SubElement(_subroutineNode,
                                               'subroutineBody')
            self._write_line(self._current_node,
                             self.tokenizer.symbol())  # '{'
            peek = self.tokenizer.peek()
            if 'var' in peek:
                self.CompileVarDec()
            self.CompileStatements()
            self.tokenizer.advance()
            self._write_line(self._current_node,
                             self.tokenizer.symbol())  # '}'
            peek = self.tokenizer.peek()
            if 'function' in peek or 'constructor' in peek or 'method' in peek:
                _subroutineNode = ET.SubElement(self._root, 'subroutineDec')
                self._current_node = _subroutineNode

    def CompileParameterList(self):
        """
        Compiles a (possibly empty) parameter list, not including the enclosing ()
        """
        param_list = ET.SubElement(self._current_node, 'parameterList')
        peek = self.tokenizer.peek()
        if peek != ')':
            self.tokenizer.advance()
            self._write_line(param_list, self.tokenizer.keyWord())  # type
            self.tokenizer.advance()
            self._write_line(param_list, self.tokenizer.identifier())  # name
            peek = self.tokenizer.peek()
        while peek == ',':
            self.tokenizer.advance()
            self._write_line(param_list, self.tokenizer.symbol())  # ','
            self.tokenizer.advance()
            self._write_line(param_list, self.tokenizer.keyWord())  # type
            self.tokenizer.advance()
            self._write_line(param_list, self.tokenizer.identifier())  # name
            peek = self.tokenizer.peek()
        # if not param_list.text:
        #     param_list.text = '\n'

    def CompileVarDec(self):
        """
        Compiles a var declaration.
        """
        _varDecNode = ET.SubElement(self._current_node, 'varDec')
        peek = self.tokenizer.peek()
        while 'var' in peek:
            self.tokenizer.advance()
            self._write_line(_varDecNode, self.tokenizer.keyWord())
            self.tokenizer.advance()
            self._write_line(_varDecNode, self.tokenizer.keyWord())
            self.tokenizer.advance()
            self._write_line(_varDecNode, self.tokenizer.identifier())
            self.tokenizer.advance()
            while self.tokenizer.symbol() == ',':
                self._write_line(_varDecNode, self.tokenizer.symbol())  # ,
                self.tokenizer.advance()
                self._write_line(_varDecNode,
                                 self.tokenizer.identifier())  # name
                self.tokenizer.advance()
            self._write_line(_varDecNode, self.tokenizer.symbol())  # ;
            peek = self.tokenizer.peek()
            if peek == 'var':
                _varDecNode = ET.SubElement(self._current_node, 'varDec')

    def CompileStatements(self):
        """
        Compiles a sequence of statements, not including the enclosing "{}"
        """
        peek = self.tokenizer.peek()
        _parent = self._current_node
        self._current_node = ET.SubElement(self._current_node, 'statements')
        while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek:
            if 'let' in peek:
                self.CompileLet()
            elif 'if' in peek:
                self.CompileIf()
            elif 'while' in peek:
                self.CompileWhile()
            elif 'do' in peek:
                self.CompileDo()
            elif 'return' in peek:
                self.CompileReturn()
            peek = self.tokenizer.peek()
        self._current_node = _parent

    def CompileDo(self):
        """
        Compiles a do statement.
        """
        _last_node = self._current_node
        _statement = ET.SubElement(self._current_node, 'doStatement')
        self._current_node = _statement
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.keyWord())
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.identifier())
        peek = self.tokenizer.peek()
        while peek == '.':
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.symbol())
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.identifier())
            peek = self.tokenizer.peek()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '('
        self.CompileExpressionList()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # ')'
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # ';'
        self._current_node = _last_node

    def CompileLet(self):
        """
        Compiles a let statement.
        """
        _last_node = self._current_node
        _statement = ET.SubElement(self._current_node, 'letStatement')
        self._current_node = _statement
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.keyWord())
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.identifier())
        peek = self.tokenizer.peek()
        if peek == '[':
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.symbol())  # '['
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.symbol())  # ']'
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '='
        self.tokenizer.advance()
        self.CompileExpression()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # ';'
        self._current_node = _last_node

    def CompileWhile(self):
        """
        Compiles a while statement.
        """
        _last_node = self._current_node
        _statement = ET.SubElement(self._current_node, 'whileStatement')
        self._current_node = _statement
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.keyWord())  # while
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '('
        self.tokenizer.advance()
        self.CompileExpression()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # ')'
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '{'
        self.CompileStatements()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '}'
        self._current_node = _last_node

    def CompileReturn(self):
        """
        Compiles a return statement.
        """
        _last_node = self._current_node
        _statement = ET.SubElement(self._current_node, 'returnStatement')
        self._current_node = _statement
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.keyWord())  # return
        peek = self.tokenizer.peek()
        if peek != ';':
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
        else:
            self.tokenizer.advance()
        self._write_line(self._current_node, self.tokenizer.symbol())  # ';'
        self._current_node = _last_node

    def CompileIf(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        """
        _last_node = self._current_node
        _statement = ET.SubElement(self._current_node, 'ifStatement')
        self._current_node = _statement
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.keyWord())  # if
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '('
        self.tokenizer.advance()
        self.CompileExpression()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # ')'
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '{'
        self.CompileStatements()
        self.tokenizer.advance()
        self._write_line(_statement, self.tokenizer.symbol())  # '}'
        peek = self.tokenizer.peek()
        if peek == 'else':
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.keyWord())  # else
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.symbol())  # '{'
            self.CompileStatements()
            self.tokenizer.advance()
            self._write_line(_statement, self.tokenizer.symbol())  # '}'
        self._current_node = _last_node

    def CompileExpression(self):
        """
        Compiles an expression.
        """
        _last_node = self._current_node
        self._current_node = ET.SubElement(self._current_node, 'expression')
        self.CompileTerm()
        peek = self.tokenizer.peek()
        while peek in OPS:
            self.tokenizer.advance()
            self._write_line(self._current_node, self.tokenizer.symbol())
            self.tokenizer.advance()
            self.CompileTerm()
            peek = self.tokenizer.peek()
        self._current_node = _last_node

    def CompileTerm(self):
        """
        Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the
        alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish
        between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one
        of [, (, or . suffices to distinguish between the three possibilities. Any other token is not
        part of this term and should not be advanced over.
        """
        term_branch = ET.SubElement(self._current_node, 'term')
        # self.tokenizer.advance()
        if self.tokenizer.tokenType(
        ) == 'INT_CONST' or self.tokenizer.tokenType() == 'KEYWORD':
            self._write_line(term_branch, self.tokenizer.current_token)
        elif self.tokenizer.tokenType() == 'STRING_CONST':
            self._write_line(term_branch, self.tokenizer.stringVal())
        elif self.tokenizer.current_token in UNARY_OP:
            self._write_line(term_branch, self.tokenizer.symbol())
            last_node = self._current_node
            self._current_node = term_branch
            self.tokenizer.advance()
            self.CompileTerm()
            self._current_node = last_node
        elif self.tokenizer.current_token in SYMBOLS:
            self._write_line(term_branch, self.tokenizer.symbol())
            self.tokenizer.advance()
            last_node = self._current_node
            self._current_node = term_branch
            self.CompileExpression()
            self._current_node = last_node
            self.tokenizer.advance()
            self._write_line(term_branch, self.tokenizer.symbol())
        else:
            self._write_line(term_branch, self.tokenizer.identifier())
            peek = self.tokenizer.peek()
            if '[' in peek or '(' in peek:
                self.tokenizer.advance()
                self._write_line(term_branch, self.tokenizer.symbol())
                self.tokenizer.advance()
                last_node = self._current_node
                self._current_node = term_branch
                self.CompileExpression()
                self._current_node = last_node
                self.tokenizer.advance()
                self._write_line(term_branch, self.tokenizer.symbol())
            elif '.' in peek:
                self.tokenizer.advance()
                self._write_line(term_branch, self.tokenizer.symbol())
                self.tokenizer.advance()
                self._write_line(term_branch, self.tokenizer.identifier())
                self.tokenizer.advance()
                self._write_line(term_branch, self.tokenizer.symbol())
                last_node = self._current_node
                self._current_node = term_branch
                self.CompileExpressionList()
                self._current_node = last_node
                self.tokenizer.advance()
                self._write_line(term_branch, self.tokenizer.symbol())

    def CompileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        """
        last_node = self._current_node
        self._current_node = ET.SubElement(self._current_node,
                                           'expressionList')
        peek = self.tokenizer.peek()
        while peek != ')':
            self.tokenizer.advance()
            if peek == ',':
                self._write_line(self._current_node, self.tokenizer.symbol())
                self.tokenizer.advance()
            self.CompileExpression()
            peek = self.tokenizer.peek()
        self._current_node = last_node
Exemple #5
0
class CompilationEngine:
    def __init__(self, filename):
        self.tokenizer = JackTokenizer(filename)

    def compile(self, filename):
        input_stream = initialize(filename)
        compileClass()

    def xml_print_el(self):
        xmlprint(self.tokenizer.token_type, self.tokenizer.token)

    def advanceSymbol(self, symbol):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Symbol expected:' + symbol +
                              ', found end of stream')
        if self.tokenizer.symbol() != symbol:
            raise SyntaxError('Symbol expected:' + symbol)

    def advanceKeyword(self, keyword):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Keyword expected:' + keyword +
                              ', found end of stream')
        if self.tokenizer.keyword() != keyword:
            raise SyntaxError('Keyword expected:' + keyword)

    def advanceTokenType(self, tokenType):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Identifier expected, found end of stream')
        if self.tokenizer.token_type != 'identifier':
            raise SyntaxError('Identifier expected')

    def advanceKeywords(self, *args):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Keywords expected:' + args +
                              ', found end of stream')
        if self.tokenizer.keyword() != keyword:
            raise SyntaxError('Keywords expected:' + args)

    def advanceAndGetType(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('type expected, found end of stream')
        if self.is_type():
            return self.tokenizer.token
        else:
            raise SyntaxError('type expected')

    def is_type(self):
        return self.tokenizer.keyword() in [
            'int', 'char', 'boolean'
        ] or self.tokenizer.token_type == 'identifier'

    def advanceAndGetReturnType(self):
        self.advance()
        if self.is_type() or self.tokenizer.keyword() == 'void':
            return self.tokenizer.token
        else:
            raise SyntaxError('type expected')

    def advanceToClassName(self):
        self.advanceTokenType('identifier')
        return self.tokenizer.identifier()

    def advanceToVarName(self):
        self.advanceTokenType('identifier')
        return self.tokenizer.identifier()

    def advanceToSubroutineName(self):
        self.advanceTokenType('identifier')
        return self.tokenizer.identifier()

    def hasClassVarDec(self):
        pass

    def advance(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('found end of stream!')

    def compileClass(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        print('<class>')
        self.advanceKeyword('class')
        self.xml_print_el()
        # classname
        self.advanceToClassName()
        className = self.tokenizer.identifier()
        self.xml_print_el()
        # {
        self.advanceSymbol('{')
        self.xml_print_el()

        self.advance()
        # classVarDec*
        while (self.tokenizer.keyword() in ['static', 'field']):
            self.compileClassVarDec()

        # subroutineDec*
        while (self.tokenizer.keyword()
               in ['constructor', 'function', 'method']):
            self.compileSubroutine()

        # }
        self.advanceSymbol('}')
        self.xml_print_el()
        print('</class>')

    def compileClassVarDec(self):
        # ('static'|'field') type varName (',' varName)* ';'
        print('<classVarDec>')
        # ('static'|'field')
        self.xml_print_el()
        # type
        type = self.advanceAndGetType()
        self.xml_print_el()
        # varName
        varName = self.advanceToVarName()
        self.xml_print_el()
        # ;
        self.advanceSymbol(';')
        self.xml_print_el()
        print('</classVarDec>')
        self.advance()

    def compileSubroutine(self):
        print('<subroutineDec>')
        kind = self.tokenizer.keyword()
        self.xml_print_el()

        # ( 'void' | type )

        return_type = self.advanceAndGetReturnType()
        self.xml_print_el()

        # subroutineName
        name = self.advanceToSubroutineName()
        self.xml_print_el()

        # (
        self.advanceSymbol('(')
        self.xml_print_el()

        # TODO parameterList
        self.compileParameterList()

        # (
        self.advanceSymbol(')')
        self.xml_print_el()

        # subroutineBody
        self.compileSubroutineBody()

        print('</subroutineDec>')
        self.advance()
        pass

    def compileSubroutineBody(self):
        print('<subroutineBody>')
        # {
        self.advanceSymbol('{')
        self.xml_print_el()

        # varDec*
        #TODO a structure to represent the *
        self.varDec()

        # statementes
        self.compileStatements()

        # }
        self.advanceSymbol('}')
        self.xml_print_el()
        print('</subroutineBody>')

    def compileParameterList(self):
        print('<parameterList>')

        print('</parameterList>')

    def compileVarDec(self):
        pass

    def compileStatements(self):
        pass

    def compileDo():
        pass

    def compileLet():
        pass

    def compileWhile():
        pass

    def compileReturn():
        pass

    def compileIf():
        pass

    def compileExpression():
        pass

    # if identifier: variable, array entry, subroutine call
    def compileTerm():
        # single lookahead token - can be [ ( or .
        pass

    # comma separated list of expressions
    def compileExpressionList():
        pass
class CompilationEngine:
    """
    The compilation engine compile the jack code given in the input file
    into an xml code saved in the out_file
    """
    def __init__(self, in_file, out_file):
        """
        A compilation engine constructor
        :param in_file: the file we are currently compiling
        :param out_file: the file where we save the output
        """
        self.tokenizer = JackTokenizer(in_file)
        self.out_file = open(out_file, 'w')
        self._indent_count = 0

    def compile_class(self):
        """
        compiles a class according to the grammar
        """
        self._write_outer_tag(CLASS_TAG)
        self.tokenizer.advance()
        if self.tokenizer.key_word() != CLASS_TAG:
            print(COMPILE_CLASS_ERROR)
            sys.exit()
        self._write_token(self.tokenizer.token_type())
        self._check_write_name()
        self._check_write_symbol("{")
        # there may be multiple variable declarations
        while self._check_if_var_dec():
            self.compile_class_var_dec()
        # there may be multiple subroutine declarations
        while self._check_subroutine_dec():
            self.compile_subroutine_dec()
        self._check_write_symbol("}")
        self._write_outer_tag(CLASS_TAG, IS_ENDING_TAG)

    def compile_class_var_dec(self):
        """
        compiles the class's variables declarations
        """
        self._write_outer_tag(CLASS_VAR_DEC_TAG)
        # we only come in the function if the current token is correct so we
        # can just write it
        self._write_token(self.tokenizer.token_type())
        self._check_write_type()
        self._check_write_name()
        while self._check_if_comma():  # there are more variables
            self._check_write_symbol(",")
            self._check_write_name()
        self._check_write_symbol(";")
        self._write_outer_tag(CLASS_VAR_DEC_TAG, IS_ENDING_TAG)

    def compile_subroutine_dec(self):
        """
        compiles the class's subroutine (methods and functions) declarations
        """
        self._write_outer_tag(SUBROUTINE_DEC_TAG)
        # we only come in the function if the current token is correct so we
        # can just write it
        self._write_token(self.tokenizer.token_type())
        # the function is either void or has a type
        if self.tokenizer.key_word() == 'void':
            self._write_token(self.tokenizer.token_type())
        else:
            self._check_write_type()
        self._check_write_name()
        self._check_write_symbol("(")
        self.compile_parameter_list()
        self._check_write_symbol(")")
        self.compile_subroutine_body()
        self._write_outer_tag(SUBROUTINE_DEC_TAG, IS_ENDING_TAG)

    def compile_parameter_list(self):
        """
        compiles the parameter list for the subroutines
        """
        self._write_outer_tag(PARAMETER_LIST_TAG)
        # if curr_token is ')' it means the param list is empty
        if self.tokenizer.symbol() != ')':
            self._check_write_type()
            self._check_write_name()
            while self._check_if_comma():  # there are more params
                self._check_write_symbol(",")
                self._check_write_type()
                self._check_write_name()
        self._write_outer_tag(PARAMETER_LIST_TAG, IS_ENDING_TAG)

    def compile_subroutine_body(self):
        """
        compiles the body of the subroutine
        """
        self._write_outer_tag(SUBROUTINE_BODY_TAG)
        self._check_write_symbol("{")
        # there may be multiple variable declarations at the beginning of
        # the subroutine
        while self.tokenizer.key_word() == 'var':
            self.compile_var_dec()
        self.compile_statements()
        self._check_write_symbol("}")
        self._write_outer_tag(SUBROUTINE_BODY_TAG, IS_ENDING_TAG)

    def compile_var_dec(self):
        """
        compiles the variable declarations
        """
        self._write_outer_tag(VAR_DEC_TAG)
        self._write_token(self.tokenizer.token_type())
        self._check_write_type()
        self._check_write_name()
        # there may be multiple variable names in the dec
        while self._check_if_comma():
            self._check_write_symbol(",")
            self._check_write_name()
        self._check_write_symbol(";")
        self._write_outer_tag(VAR_DEC_TAG, IS_ENDING_TAG)

    def compile_statements(self):
        """
        compiles the statements (0 or more statements)
        """
        self._write_outer_tag(STATEMENTS_TAG)
        while self._check_if_statement():
            if self.tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.tokenizer.key_word() == 'return':
                self.compile_return()
        self._write_outer_tag(STATEMENTS_TAG, IS_ENDING_TAG)

    def compile_do(self):
        """
        compiles the do statement
        """
        self._write_outer_tag(DO_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        self.compile_subroutine_call()
        self._check_write_symbol(";")
        self._write_outer_tag(DO_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_let(self):
        """
        compiles the let statement
        """
        self._write_outer_tag(LET_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        self._check_write_name()
        if self.tokenizer.symbol() == '[':  # if there is an array
            self._check_write_symbol("[")
            self.compile_expression()
            self._check_write_symbol("]")
        self._check_write_symbol("=")
        self.compile_expression()
        self._check_write_symbol(";")
        self._write_outer_tag(LET_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_if(self):
        """
        compiles the if statements
        """
        self._write_outer_tag(IF_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        self._check_write_symbol("(")
        self.compile_expression()
        self._check_write_symbol(")")
        self._check_write_symbol("{")
        self.compile_statements()
        self._check_write_symbol("}")
        # there can also be an if else scenario
        if self.tokenizer.key_word() == 'else':
            self._write_token(self.tokenizer.token_type())
            self._check_write_symbol("{")
            self.compile_statements()
            self._check_write_symbol("}")
        self._write_outer_tag(IF_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_while(self):
        """
        compiles the while statements
        """
        self._write_outer_tag("whileStatement")
        self._write_token(self.tokenizer.token_type())
        self._check_write_symbol("(")
        self.compile_expression()
        self._check_write_symbol(")")
        self._check_write_symbol("{")
        self.compile_statements()
        self._check_write_symbol("}")
        self._write_outer_tag("whileStatement", IS_ENDING_TAG)

    def compile_return(self):
        """
        compiles the return statements
        """
        self._write_outer_tag(RETURN_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        # if cur token is ; we return nothing, otherwise we return something
        if not self.tokenizer.symbol() == ';':
            self.compile_expression()
        self._check_write_symbol(";")
        self._write_outer_tag(RETURN_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_subroutine_call(self):
        """
        compiles the subroutine calls ( when we actually call a subroutine
        as  opposed to declaring it)
        """
        self._check_write_name()
        # there may be a '.' if it is a foo.bar() scenario (or Foo.bar())
        if self.tokenizer.symbol() == ".":
            self._check_write_symbol(".")
            self._check_write_name()
        self._check_write_symbol("(")
        self.compile_expression_list()
        self._check_write_symbol(")")

    def compile_expression(self):
        """
        compiles expressions which are terms and possibly operators and more
        terms
        """
        self._write_outer_tag(EXPRESSION_TAG)
        self.compile_term()
        # there may be a few operators in one expression
        while self.tokenizer.symbol() in OPERATIONS:
            self._write_op()
            self.compile_term()
        self._write_outer_tag(EXPRESSION_TAG, IS_ENDING_TAG)

    def compile_term(self):
        """
        compiles terms according to the grammar
        """
        self._write_outer_tag(TERM_TAG)
        cur_type = self.tokenizer.token_type()
        # either a string/int constant
        if self.tokenizer.token_type() in ["INT_CONST", "STRING_CONST"]:
            self._write_token(cur_type)
        # or a constant keyword (true, false, null, this)
        elif self.tokenizer.key_word() in KEYWORD_CONST:
            self._write_token(cur_type)
        # or an expression within brown brackets
        elif self.tokenizer.symbol() == '(':
            self._write_token(cur_type)
            self.compile_expression()
            self._check_write_symbol(")")
        # or a unary op and then a term
        elif self.tokenizer.symbol() in UNARY_OPS:
            self._write_op()
            self.compile_term()
        # or it is an identifier which could be:
        elif self.tokenizer.identifier():
            self._compile_term_identifier()
        else:
            print(COMPILE_TERM_ERROR)
            sys.exit()
        self._write_outer_tag(TERM_TAG, IS_ENDING_TAG)

    def _compile_term_identifier(self):
        """
         compiles terms in case of identifier token
        """
        # an array
        if self.tokenizer.get_next_token() == '[':
            self._check_write_name()
            self._check_write_symbol("[")
            self.compile_expression()
            self._check_write_symbol("]")
        # or a subroutine call
        elif self.tokenizer.get_next_token() in [".", "("]:
            self.compile_subroutine_call()
        else:
            self._check_write_name()  # or just a variable name

    def compile_expression_list(self):
        """
        compiles the expression lists
        """
        self._write_outer_tag(EXPRESSION_LIST_TAG)
        # if it is ')' then the expression list is empty
        if self.tokenizer.symbol() != ')':
            self.compile_expression()
            while self._check_if_comma():  # while there are more expressions
                self._write_token(self.tokenizer.token_type())
                self.compile_expression()
        self._write_outer_tag(EXPRESSION_LIST_TAG, IS_ENDING_TAG)

    def _check_if_var_dec(self):
        """
        check if we are currently compiling a variable declaration
        :return: true iff the current token is either 'static' or 'field'
        """
        return self.tokenizer.key_word() in CLASS_VAR_KEYWORDS

    def _check_subroutine_dec(self):
        """
        checks if we are currently compiling a subroutine declaration
        :return: true iff the current token is either 'constructor' or
        'function' or 'method'
        """
        return self.tokenizer.key_word() in SUBROUTINE

    def _check_if_comma(self):
        """
        checks if current token is a comma
        :return: true iff the current token is a ','
        """
        return self.tokenizer.symbol() == ','

    def _check_if_statement(self):
        """
        checks if we are currently compiling a statement
        :return: true iff the current token
        is in ['let', 'if', 'while', 'do', 'return']
        """
        return self.tokenizer.key_word() in STATEMENTS

    def _check_write_type(self):
        """
        checks if the current token is a valid type and if so, it writes it
        to  the output file
        """
        if self.tokenizer.key_word() in TYPE_KEYWORDS:
            self._write_token(self.tokenizer.token_type())
        else:
            self._check_write_name()

    def _check_write_symbol(self, expected_symbol):
        """
        checks if the current token is the expected symbol, if so it write
        it to the output file
        :param expected_symbol: the symbol we are validating is the current
        token
        :return: prints illegal statement error if it is not the expected
        symbol and exits the program
        """
        if self.tokenizer.symbol() != expected_symbol:
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()
        self._write_token(self.tokenizer.token_type())

    def _check_write_name(self):
        """
        checks the current token is a name (identifier), and if so, write
        it to the output file
        :return: prints illegal statement error if it is not a name and
        exits the program
        """
        if self.tokenizer.identifier():
            self._write_token("IDENTIFIER")
        else:
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()

    def _write_outer_tag(self, tag_str, end=False):
        """
        writes the outer tags of the different sections we are compiling
        :param tag_str: the string of the current section we are compiling
        :param end: true iff it is an end tag
        """
        if end:  # we decrease the indent count before the closing tag
            self._indent_count -= 1
            self.out_file.write("\t" * self._indent_count)
            self.out_file.write("</" + tag_str + ">\n")
        else:  # we increase the indent count after the opening tag
            self.out_file.write("\t" * self._indent_count)
            self.out_file.write("<" + tag_str + ">\n")
            self._indent_count += 1

    def _write_op(self):
        """
        writes an op symbol to the out file
        """
        self.out_file.write("\t" * self._indent_count)
        self.out_file.write("<symbol> ")
        if self.tokenizer.symbol() == '<':
            self.out_file.write("&lt;")
        elif self.tokenizer.symbol() == '>':
            self.out_file.write("&gt;")
        elif self.tokenizer.symbol() == '&':
            self.out_file.write("&amp;")
        elif self.tokenizer.symbol() == '\"':
            self.out_file.write("&quot;")
        else:
            self.out_file.write(self.tokenizer.symbol())
        self.out_file.write(" </symbol>\n")
        self.tokenizer.advance()

    def _write_token(self, cur_type):
        """
        writes the current token to the output file
        :param cur_type: the type of the current token
        """
        self.out_file.write("\t" * self._indent_count)
        self.out_file.write("<" + TOKEN_TYPE_STR[cur_type] + "> ")
        self.out_file.write(str(self.tokenizer.get_token_str()))
        self.out_file.write(" </" + TOKEN_TYPE_STR[cur_type] + ">\n")
        self.tokenizer.advance()
Exemple #7
0
class CompilationEngine:
    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
        self.current_sub_name = None
        self.class_name = None
        self.func_counter = 0
        self.while_counter = 0
        self.if_counter = 0

        # starts the process
        self.tokenizer.advance()
        self.compile_class()
        self.vm_writer.close()

    def compile_class(self):
        """
        compiles the class function
        :return: none
        """
        # advances a single step to get the class name
        self.tokenizer.advance()
        # set class's name
        self.class_name = self.tokenizer.current_token
        # moves to the symbol {
        self.tokenizer.advance()

        # move to the next symbol and check what it is
        self.tokenizer.advance()

        # compiles class variable
        while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \
                KEY_WORDS.get(self.tokenizer.current_token) == FIELD:
            self.compile_class_var_dec()
        # compiles subroutine
        while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \
                KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \
                KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION:
            self.compile_sub_routine()
        # we are now at the <symbol> } <symbol> which closes the class

    def compile_class_var_dec(self):
        """
        compiles a var dec
        :return: none
        """
        var_kind = self.tokenizer.key_word()
        # advances the token to the var's type
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            var_type = self.tokenizer.key_word()
        else:
            var_type = self.tokenizer.identifier()
        # advances the token to the var's identifier
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            var_name = self.tokenizer.key_word()
        else:
            var_name = self.tokenizer.identifier()

        # update symbol table
        self.symbol_table.define(var_name, var_type, var_kind)

        # advance to next token, and check if there are more var_names
        self.tokenizer.advance()
        while self.tokenizer.current_token != ";":
            # token is <symbol> , <symbol>
            # advance to var's identifier
            self.tokenizer.advance()
            var_name = self.tokenizer.current_token
            # update symbol table
            self.symbol_table.define(var_name, var_type, var_kind)
            self.tokenizer.advance()

        # the current token is <symbol> ; <symbol>, advance to next
        self.tokenizer.advance()

    def compile_sub_routine(self):
        """
        compiles a single sub routine
        :return: none
        """
        # start new subroutine symbol table
        self.symbol_table.start_subroutine()
        # get subroutine type (method/construction/function)
        sub_type = self.tokenizer.key_word()

        # advances the token to what the subroutine returns
        self.tokenizer.advance()
        # updates the return type
        if self.tokenizer.token_type() == KEY_WORD:
            return_type = self.tokenizer.key_word()
        else:
            return_type = self.tokenizer.identifier()

        # advances the token to <identifier> sub_name <identifier>
        self.tokenizer.advance()
        # update the subroutine name
        subroutine_name = self.tokenizer.identifier()
        self.current_sub_name = subroutine_name

        # advance to <symbol> ( <symbol>
        self.tokenizer.advance()
        # if subroutine is a method, add 'this' to the symbol table as argument 0
        if sub_type == METHOD:
            self.symbol_table.define("this", self.class_name, "ARG")
        # compiles the parameter list
        self.compile_parameter_list()
        # we are at <symbol> ) <symbol>
        # advance to subroutine body, and compile it
        self.tokenizer.advance()
        self.compile_subroutine_body(sub_type)

    def compile_subroutine_body(self, sub_type):
        """
        the method compiles the subroutine body
        :return: none
        """
        # we are at bracket {, advance
        self.tokenizer.advance()

        # compile var dec
        while KEY_WORDS.get(self.tokenizer.current_token) == VAR:
            self.compile_var_dec()

        # write function label
        self.vm_writer.write_function(
            self.class_name + '.' + self.current_sub_name,
            self.symbol_table.var_count("VAR"))

        # if is method, update THIS to the object
        if sub_type == METHOD:
            self.vm_writer.write_push(ARG, 0)
            self.vm_writer.write_pop("POINTER", 0)

        # if is constructor, allocate memory, and put in this
        if sub_type == CONSTRUCTOR:
            self.vm_writer.write_push("CONST",
                                      self.symbol_table.var_count("FIELD"))
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop("POINTER", 0)

        if self.tokenizer.current_token != "}":
            self.compile_statements()

        # we are at bracket }, advance
        self.tokenizer.advance()

    def compile_parameter_list(self):
        """
        compiles a parameter list
        :return: none
        """
        # advance to first parameter
        self.tokenizer.advance()
        # while there are more parameters
        while self.tokenizer.current_token != ')':
            # tests what to put as the type of the object
            if self.tokenizer.token_type() == KEY_WORD:
                var_type = self.tokenizer.key_word()
            else:
                var_type = self.tokenizer.identifier()

            # advance to variables name <identifier> var_name <identifier>
            self.tokenizer.advance()
            var_name = self.tokenizer.identifier()

            # define new variable
            self.symbol_table.define(var_name, var_type, "ARG")

            # gets the next token
            self.tokenizer.advance()

            # advance to next token if we are at ','
            if self.tokenizer.current_token == ",":
                self.tokenizer.advance()

    def compile_var_dec(self):
        """
        compiles a declaration of a variable
        :return: none
        """
        # we are at <keyword> var <keyword>
        # advance to variable type
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            var_type = self.tokenizer.key_word()
        else:
            var_type = self.tokenizer.identifier()

        # advance to the variables name
        self.tokenizer.advance()
        while self.tokenizer.current_token != ';':
            # we are at <identifier> var_name <identifier>
            var_name = self.tokenizer.identifier()
            # define variable in symbol table
            self.symbol_table.define(var_name, var_type, "VAR")
            # advance to next token
            self.tokenizer.advance()
            # tests what to put as the type of the object
            if self.tokenizer.current_token == ",":
                self.tokenizer.advance()
        # we are at <symbol> ; <symbol>
        # advance to next token
        self.tokenizer.advance()

    def compile_statements(self):
        """
        the method compiles statements
        :return: none
        """
        # while there are more statements, deal with each one
        while self.tokenizer.current_token != '}':
            statement_type = self.tokenizer.key_word()
            if statement_type == LET:
                self.compile_let()
            elif statement_type == IF:
                self.compile_if()
            elif statement_type == WHILE:
                self.compile_while()
            elif statement_type == DO:
                self.compile_do()
            elif statement_type == RETURN:
                self.compile_return()

    def compile_do(self):
        """
        the method compiles a do command
        :return: none
        """
        # we are at <keyword> do <keyword>
        # advance to next token <identifier> name_of_func <identifier>
        self.tokenizer.advance()
        func_name = self.tokenizer.identifier()
        self.tokenizer.advance()
        # compile the subroutine call
        self.compile_subroutine_call(func_name)
        # pop the result from the function into temp
        self.vm_writer.write_pop("TEMP", 0)
        # we are at <symbol> ; <symbol>, advance to next token
        self.tokenizer.advance()

    def compile_let(self):
        """
        the method compiles a let statement
        :return: none
        """
        # we are at <keyword> let <keyword>
        # advance to next token (var_name)
        self.tokenizer.advance()
        # we are at <identifier> var_name <identifier>
        var_name = self.tokenizer.identifier()
        # get variable data
        var_index = self.symbol_table.index_of(var_name)
        var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name))
        # advance to next token ('[' | '=')
        self.tokenizer.advance()
        is_array = False
        if self.tokenizer.current_token == '[':
            is_array = True
            # push arr
            self.vm_writer.write_push(var_kind, var_index)
            # advance to expression and compile it
            self.tokenizer.advance()
            self.compile_expression()
            # we are at <symbol> ] <symbol>, advance to next token
            self.tokenizer.advance()
            # add the index of array and the expression to get the correct location
            self.vm_writer.write_arithmetic("ADD")
        # we are at <symbol> = <symbol>
        # advance to expression and compile it
        self.tokenizer.advance()
        self.compile_expression()

        # if var is an array
        if is_array:
            self.vm_writer.write_pop("TEMP", 0)
            self.vm_writer.write_pop("POINTER", 1)
            self.vm_writer.write_push("TEMP", 0)
            self.vm_writer.write_pop("THAT", 0)
        # if var is not an array
        else:
            self.vm_writer.write_pop(var_kind, var_index)

        # we are at <symbol> ; <symbol>, advance to next
        self.tokenizer.advance()
        return

    def compile_while(self):
        """
        the method compiles a while statement
        :return: none
        """
        while_counter = str(self.while_counter)
        # update the while counter
        self.while_counter += 1
        # create new label for the start of the while
        self.vm_writer.write_label("While_" + while_counter)
        # we are at <keyword> while <keyword>, advance to next token
        self.tokenizer.advance()
        # we are at <symbol> ( <symbol>, advance to next token
        self.tokenizer.advance()
        self.compile_expression()
        # we are at <symbol> ) <symbol>, advance to next token
        self.tokenizer.advance()
        # negate expression
        self.vm_writer.write_arithmetic("NOT")
        # if condition is not met, go to the end of the while
        self.vm_writer.write_if("End_While_" + while_counter)
        # we are at <symbol> { <symbol>, advance to next token
        self.tokenizer.advance()
        # compile statements
        self.compile_statements()
        # go back to the start of the while
        self.vm_writer.write_goto("While_" + while_counter)
        # create new label for the end of the while
        self.vm_writer.write_label("End_While_" + while_counter)
        # we are at <symbol> } <symbol>, advance to next token
        self.tokenizer.advance()
        return

    def compile_return(self):
        """
        the method compiles a return statement
        :return: none
        """
        # we are at <keyword> return <keyword>, advance to next token
        self.tokenizer.advance()
        if self.tokenizer.current_token != ';':
            self.compile_expression()
        else:
            # if function is void, push const 0 to the stack
            self.vm_writer.write_push("CONST", 0)
        # we are at <symbol> ; <symbol>, advance to next token
        self.tokenizer.advance()
        self.vm_writer.write_return()
        return

    def compile_if(self):
        """
        the method compiles an if statement
        :return: none
        """
        if_count = str(self.if_counter)
        # update if counter
        self.if_counter += 1
        # we are at <keyword> if <keyword>, advance to next token
        self.tokenizer.advance()
        # we are at <symbol> ( <symbol>, advance to next token
        self.tokenizer.advance()
        # compile expression
        self.compile_expression()
        # negate the expression
        self.vm_writer.write_arithmetic("NOT")
        # check if condition is met
        self.vm_writer.write_if("ELSE_" + if_count)
        # we are at <symbol> ) <symbol>, advance to next token
        self.tokenizer.advance()
        # we are at <symbol> { <symbol>, advance to next token
        self.tokenizer.advance()
        self.compile_statements()
        # jump to the end of the if
        self.vm_writer.write_goto("END_IF_" + if_count)
        # we are at <symbol> } <symbol>, advance to next token
        self.tokenizer.advance()
        # create else label (which may be empty)
        self.vm_writer.write_label("ELSE_" + if_count)
        if self.tokenizer.current_token == 'else':
            # we are at <keyword> else <keyword>, advance
            self.tokenizer.advance()
            # we are at <symbol> { <symbol>, advance
            self.tokenizer.advance()
            self.compile_statements()
            # we are at <symbol> } <symbol>, advance
            self.tokenizer.advance()
        # create new label
        self.vm_writer.write_label("END_IF_" + if_count)
        return

    def compile_expression(self):
        """
        the method compiles an expression
        :return:
        """
        # compile the term
        self.compile_term()
        while self.tokenizer.current_token in OP_LST:
            call_math = False
            # we are at <symbol> op <symbol>
            op = OP_DICT.get(self.tokenizer.current_token)
            # check if operator needs to call math
            if self.tokenizer.current_token == '*' or self.tokenizer.current_token == '/':
                call_math = True
            # advance to next term and compile term
            self.tokenizer.advance()
            self.compile_term()
            # output the operator
            if call_math:
                self.vm_writer.write_call(op[0], op[1])
            else:
                self.vm_writer.write_arithmetic(op)
        return

    def compile_term(self):
        """
        the method compiles a term
        :return: none
        """
        token_type = self.tokenizer.token_type()
        if token_type == INT_CONST:
            # push the const int
            self.vm_writer.write_push("CONST", self.tokenizer.int_val())
            self.tokenizer.advance()
        elif token_type == STRING_CONST:
            # write without the ""
            string_val = self.tokenizer.string_val()
            # push the len of the string and call the string constructor
            self.vm_writer.write_push("CONST", len(string_val))
            self.vm_writer.write_call("String.new", 1)
            # update new string
            for char in string_val:
                self.vm_writer.write_push("CONST", ord(char))
                self.vm_writer.write_call("String.appendChar", 2)
            self.tokenizer.advance()
        elif self.tokenizer.current_token in KEY_WORD_CONST:
            segment, idx = KEY_WORD_CONST.get(self.tokenizer.current_token)
            self.vm_writer.write_push(segment, idx)
            if self.tokenizer.current_token == 'true':
                self.vm_writer.write_arithmetic('NOT')
            self.tokenizer.advance()
        elif self.tokenizer.current_token == '(':
            # we are at <symbol> ( <symbol>, advance to next token
            self.tokenizer.advance()
            self.compile_expression()
            # we are at <symbol> ) <symbol>, advance to next token
            self.tokenizer.advance()
        elif self.tokenizer.current_token in UNARY_OP:
            op_command = UNARY_OP.get(self.tokenizer.current_token)
            self.tokenizer.advance()
            self.compile_term()
            self.vm_writer.write_arithmetic(op_command)
        # var/var[expression]/subroutine_call
        else:
            # we are at <identifier> var_name <identifier>
            var_name = self.tokenizer.identifier()
            self.tokenizer.advance()
            # if is var_name[expression]
            if self.tokenizer.current_token == '[':
                var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name))
                var_index = self.symbol_table.index_of(var_name)
                # push arr
                self.vm_writer.write_push(var_kind, var_index)
                # we are at <symbol> [ <symbol>, advance to expression and compile it
                self.tokenizer.advance()
                self.compile_expression()
                # add the index of array and the expression to get the correct location
                self.vm_writer.write_arithmetic("ADD")
                # set the that pointer
                self.vm_writer.write_pop("POINTER", 1)
                # push to the stack what is in the arr[i]
                self.vm_writer.write_push("THAT", 0)
                # we are at <symbol> ] <symbol>, advance
                self.tokenizer.advance()
            # if is a subroutine call
            elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.':
                self.compile_subroutine_call(var_name)
            else:
                # if is just 'var'
                var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name))
                var_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(var_kind, var_index)
        return

    def compile_expression_list(self):
        """
        the method compiles a list of expressions
        :return: amount of arguments in the expression list
        """
        expression_counter = 0
        # check that list is not empty
        if self.tokenizer.current_token != ')':
            expression_counter += 1
            # compile first expression
            self.compile_expression()
            # if there are more expressions, compile them
            while self.tokenizer.current_token == ',':
                expression_counter += 1
                # we are at <symbol> , <symbol>, advance
                self.tokenizer.advance()
                # compile expression
                self.compile_expression()
        return expression_counter

    def compile_subroutine_call(self, identifier):
        """
        the method compiles a subroutine call (not including the subroutine
        first varName
        :return: none
        """
        func_name = self.class_name + "." + identifier
        num_of_arguments = 0
        if self.tokenizer.current_token == '.':
            # change func name to its class name
            if self.symbol_table.type_of(identifier) is not None:
                func_name = self.symbol_table.type_of(identifier)
                # we are at <symbol> . <symbol>, advance
                self.tokenizer.advance()
                # we are at <identifier> sub_name <identifier>
                func_name = func_name + "." + self.tokenizer.identifier()
                self.tokenizer.advance()
                # push the object to the stack
                segment = TYPE_DICT.get(self.symbol_table.kind_of(identifier))
                idx = self.symbol_table.index_of(identifier)
                self.vm_writer.write_push(segment, idx)
                num_of_arguments += 1
            else:
                # we are at <symbol> . <symbol>, advance
                self.tokenizer.advance()
                # we are at <identifier> sub_name <identifier>
                func_name = identifier + "." + self.tokenizer.identifier()
                self.tokenizer.advance()
        else:
            self.vm_writer.write_push("POINTER", 0)
            num_of_arguments += 1
        # we are at <symbol> ( <symbol>, advance
        self.tokenizer.advance()
        num_of_arguments += self.compile_expression_list()
        # we are at <symbol> ) <symbol>, advance
        self.tokenizer.advance()
        self.vm_writer.write_call(func_name, num_of_arguments)
        return
Exemple #8
0
class CompilationEngine:
    def __init__(self, input_path, output_path):
        """
        creates a new compilation engine with the given input and output. the next routine called must be compileClass()
        :param input_path: input stream/file
        :param output_path: output stream/file
        """
        self.labels = 0
        self.jack_class = None
        self.class_subroutine = None
        self.tokenizer = JackTokenizer(input_path)
        self._writer = VMWriter(output_path)
        self.CompileClass()

    def CompileClass(self):
        """
        Compiles a complete class.
        """
        self.tokenizer.advance()
        self.tokenizer.advance()
        self.jack_class = JackClass(self.tokenizer.current_token)
        self.tokenizer.advance()
        self.CompileClassVarDec()
        self.CompileSubroutine()
        self.tokenizer.advance()

    def CompileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        """
        peek = self.tokenizer.peek()
        while 'static' in peek or 'field' in peek:
            self.tokenizer.advance()
            kind = self.tokenizer.keyWord()  # field/static
            self.tokenizer.advance()
            type = self.tokenizer.keyWord()  # type
            self.tokenizer.advance()
            name = self.tokenizer.identifier()  # name
            self.tokenizer.advance()
            self.jack_class.add_var(name, type, kind)
            while self.tokenizer.symbol() == ',':
                self.tokenizer.advance()
                name = self.tokenizer.identifier()
                self.tokenizer.advance()
                self.jack_class.add_var(name, type, kind)
            peek = self.tokenizer.peek()

    def CompileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        """
        peek = self.tokenizer.peek()
        while 'function' in peek or 'constructor' in peek or 'method' in peek:
            self.tokenizer.advance()
            kind = self.tokenizer.keyWord()  # const/func/method
            self.tokenizer.advance()
            type = self.tokenizer.current_token  # void/type
            self.tokenizer.advance()
            name = self.tokenizer.identifier()  # name
            self.tokenizer.advance()
            self.class_subroutine = JackSubroutine(name, kind, type,
                                                   self.jack_class)
            self.CompileParameterList()
            self.tokenizer.advance()
            self.tokenizer.advance()
            peek = self.tokenizer.peek()
            if 'var' in peek:
                self.CompileVarDec()
            full_name = '{}.{}'.format(self.jack_class.class_name,
                                       self.class_subroutine.name)
            self._writer.write_function(full_name, self.class_subroutine.var_c)
            if kind == 'constructor':
                fields = self.jack_class.counters[0]
                self._writer.push('constant', str(fields))
                self._writer.write_call('Memory.alloc', '1')
                self._writer.pop('pointer', '0')
            elif kind == 'method':
                self._writer.push('argument', '0')
                self._writer.pop('pointer', '0')
            self.CompileStatements()
            self.tokenizer.advance()
            peek = self.tokenizer.peek()

    def CompileParameterList(self):
        """
        Compiles a (possibly empty) parameter list, not including the enclosing ()
        """
        peek = self.tokenizer.peek()
        if peek != ')':
            self.tokenizer.advance()
            type = self.tokenizer.keyWord()  # type
            self.tokenizer.advance()
            name = self.tokenizer.identifier()  # name
            peek = self.tokenizer.peek()
            self.class_subroutine.add_arg(name, type)
        while peek == ',':
            self.tokenizer.advance()
            self.tokenizer.advance()
            type = self.tokenizer.keyWord()  # type
            self.tokenizer.advance()
            name = self.tokenizer.identifier()  # name
            self.class_subroutine.add_arg(name, type)
            peek = self.tokenizer.peek()

    def CompileVarDec(self):
        """
        Compiles a var declaration.
        """
        peek = self.tokenizer.peek()
        while 'var' in peek:
            self.tokenizer.advance()
            self.tokenizer.advance()
            type = self.tokenizer.keyWord()
            self.tokenizer.advance()
            name = self.tokenizer.identifier()
            self.class_subroutine.add_var(name, type)
            self.tokenizer.advance()
            while self.tokenizer.symbol() == ',':
                self.tokenizer.advance()
                name = self.tokenizer.identifier()  # name
                self.class_subroutine.add_var(name, type)
                self.tokenizer.advance()
            peek = self.tokenizer.peek()

    def CompileStatements(self):
        """
        Compiles a sequence of statements, not including the enclosing "{}"
        """
        peek = self.tokenizer.peek()
        while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek:
            if 'let' in peek:
                self.CompileLet()
            elif 'if' in peek:
                self.CompileIf()
            elif 'while' in peek:
                self.CompileWhile()
            elif 'do' in peek:
                self.CompileDo()
            elif 'return' in peek:
                self.CompileReturn()
            peek = self.tokenizer.peek()

    def CompileDo(self):
        """
        Compiles a do statement.
        """
        self.tokenizer.advance()  # do
        self.tokenizer.advance()  # do
        self.CompileTerm()
        self._writer.pop('temp', '0')
        self.tokenizer.advance()  # ;
        if self.tokenizer.current_token != ';':
            self.tokenizer.advance()

    def CompileLet(self):
        """
        Compiles a let statement.
        """
        self.tokenizer.advance()  # let
        self.tokenizer.advance()
        name = self.tokenizer.identifier()
        symbol = self.class_subroutine.get_symbol(name)
        peek = self.tokenizer.peek()
        if peek == '[':
            self.tokenizer.advance()  # [
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()  # ]
            self.tokenizer.advance()  # =
            self._writer.push(symbol)
            self._writer.write_cmd('add')
            self.tokenizer.advance()
            self.CompileExpression()
            self._writer.pop('temp', '0')
            self._writer.pop('pointer', '1')
            self._writer.push('temp', '0')
            self._writer.pop('that', '0')
        else:
            self.tokenizer.advance()  # =
            self.tokenizer.advance()
            self.CompileExpression()
            self._writer.pop(symbol)
        self.tokenizer.advance()  # ;

    def CompileWhile(self):
        """
        Compiles a while statement.
        """
        label_c = self.labels
        self.tokenizer.advance()  # while
        self.tokenizer.advance()  # (
        self.tokenizer.advance()
        self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c))
        self.CompileExpression()
        self.tokenizer.advance()  # )
        self.tokenizer.advance()  # {
        self._writer.write_if(LABEL_FORMAT.format('WHILE_END', label_c))
        self.CompileStatements()
        self._writer.write_goto(LABEL_FORMAT.format('WHILE_END', label_c))
        self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c))
        self.tokenizer.advance()  # }

    def CompileReturn(self):
        """
        Compiles a return statement.
        """
        self.tokenizer.advance()  # return
        peek = self.tokenizer.peek()
        if peek != ';':
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
        else:
            self._writer.push('constant', '0')
            self.tokenizer.advance()
        self._writer.write_return()

    def CompileIf(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        """
        label_c = self.labels
        self.tokenizer.advance()  # if
        self.tokenizer.advance()
        self.tokenizer.advance()  # (
        self.CompileExpression()
        self.tokenizer.advance()  # )
        self.tokenizer.advance()  # {
        self._writer.write_if(LABEL_FORMAT.format('IF_TRUE', label_c))
        self._writer.write_goto(LABEL_FORMAT.format('IF_FALSE', label_c))
        self._writer.write_label(LABEL_FORMAT.format('IF_TRUE', label_c))
        self.labels += 1
        self.CompileStatements()
        self.tokenizer.advance()  # }
        peek = self.tokenizer.peek()
        if peek == 'else':
            self._writer.write_goto(LABEL_FORMAT.format('IF_END', label_c))
            self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c))
            self.tokenizer.advance()  # else
            self.tokenizer.advance()  # {
            self.CompileStatements()
            self.tokenizer.advance()  # }
            self._writer.write_label(LABEL_FORMAT.format('IF_END', label_c))
        else:
            self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c))

    def CompileExpression(self):
        """
        Compiles an expression.
        """
        self.CompileTerm()
        peek = self.tokenizer.peek()
        while peek in OPS:
            self.tokenizer.advance()
            op = self.tokenizer.symbol()
            self.tokenizer.advance()
            self.CompileTerm()
            self._writer.write_cmd(OP_DIC[op])
            peek = self.tokenizer.peek()

    def CompileTerm(self):
        """
        Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the
        alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish
        between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one
        of [, (, or . suffices to distinguish between the three possibilities. Any other token is not
        part of this term and should not be advanced over.
        """
        if self.tokenizer.current_token in UNARY_OP:
            self._writer.write_cmd(UNARY_DIC[self.tokenizer.current_token])
            self.tokenizer.advance()
            self.CompileTerm()
        elif self.tokenizer.current_token == '(':
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()  # )
        elif self.tokenizer.tokenType() == 'INT_CONST':
            self._writer.push('constant', self.tokenizer.current_token)
        elif self.tokenizer.tokenType() == 'STRING_CONST':
            str = self.tokenizer.stringVal()
            self._writer.push('constant', len(str))
            self._writer.write_call('String.new', '1')
            for char in str:
                self._writer.push('constant', ord(char))
                self._writer.write_call('String.appendChar', '2')
        elif self.tokenizer.tokenType() == 'KEYWORD':
            if self.tokenizer.current_token == 'this':
                self._writer.push('pointer', '0')
            else:
                self._writer.push('constant', '0')
                if self.tokenizer.current_token == 'true':
                    self._writer.write('not')
        elif self.tokenizer.tokenType() == 'IDENTIFIER':
            value = self.tokenizer.identifier()
            var = self.class_subroutine.get_symbol(value)
            peek = self.tokenizer.peek()
            if peek == '[':
                self.tokenizer.advance()
                self.tokenizer.advance()  # [
                self.CompileExpression()
                self._writer.push(var)
                self._writer.write_cmd('add')
                self._writer.pop('pointer', '1')
                self._writer.push('that', '0')
                self.tokenizer.advance()  # ]
            else:
                function_name = value
                functions_class = self.class_subroutine.jack_class
                is_default = True
                args = 0
                if peek == '.':
                    is_default = False
                    self.tokenizer.advance()
                    self.tokenizer.advance()
                    function_object = self.class_subroutine.get_symbol(
                        function_name)
                    function_name = self.tokenizer.current_token
                    if function_object:
                        functions_class = var.type
                        args = 1
                        self._writer.push(var)
                    else:
                        functions_class = value
                    peek = self.tokenizer.peek()
                if peek == '(':
                    if is_default:
                        args = 1
                        self._writer.push('pointer', '0')
                    self.tokenizer.advance()  # (
                    args += self.CompileExpressionList()
                    if type(functions_class) != type(''):
                        functions_class = functions_class.class_name
                    full_name = '{}.{}'.format(functions_class, function_name)
                    self._writer.write_call(full_name, args)
                    if self.tokenizer.current_token != ')':
                        self.tokenizer.advance()  # ')'
                elif var:
                    self._writer.push(var)

    def CompileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        """
        expressions_counter = 0
        peek = self.tokenizer.peek()
        while peek != ')' and peek != ';':
            self.tokenizer.advance()
            expressions_counter += 1
            if self.tokenizer.current_token == ',':
                self.tokenizer.advance()
            self.CompileExpression()
            peek = self.tokenizer.peek()
        return expressions_counter
class CompilationEngine:
    """
    The compilation engine compile the jack code given in the input file
    into an xml code saved in the out_file
    """
    def __init__(self, in_file, out_file):
        """
        A compilation engine constructor
        :param in_file: the file we are currently compiling
        :param out_file: the file where we save the output
        """
        self._tokenizer = JackTokenizer(in_file)
        self._class_table = SymbolTable()
        self._method_table = SymbolTable()
        self._cur_class_name = ""
        self._vm_writer = VMWriter(out_file)
        self._label_count_while = 0
        self._label_count_if = 0

    def compile_class(self):
        """
        compiles a class according to the grammar
        """
        self._class_table.start_subroutine()
        self._tokenizer.advance()
        # check if the current keyword is the right class tag
        if self._tokenizer.key_word() != CLASS_TAG:
            print(COMPILE_CLASS_ERROR)
            sys.exit()
        self._tokenizer.advance()
        self._cur_class_name = self.get_cur_token()
        self._tokenizer.advance()
        self._check_symbol("{")

        # there may be multiple variable declarations
        while self._check_if_var_dec():
            self.compile_class_var_dec()
        # there may be multiple subroutine declarations
        while self._check_subroutine_dec():
            self.compile_subroutine_dec()
        self._check_symbol("}")

    def compile_class_var_dec(self):
        """
        compiles the class's variables declarations
        """

        cur_kind = self.get_cur_token()
        self._tokenizer.advance()
        cur_type = self.get_cur_token()
        self._check_type()
        cur_name = self.get_cur_token()
        self._check_name()
        self._class_table.define(cur_name, cur_type, cur_kind)
        while self._check_if_comma():  # there are more variables
            self._tokenizer.advance()
            cur_name = self.get_cur_token()
            self._check_name()
            self._class_table.define(cur_name, cur_type, cur_kind)

        self._check_symbol(";")

    def get_cur_token(self):
        return self._tokenizer.get_token_str()

    def compile_subroutine_dec(self):
        """
        compiles the class's subroutine (methods and functions) declarations
        """
        # re-initialize the method symbol table
        self._method_table.start_subroutine()
        key_word = self._tokenizer.key_word()
        self._tokenizer.advance()
        self._tokenizer.advance()
        cur_name = self.get_cur_token()
        self._tokenizer.advance()

        # method get the as argument the base address of the current object
        if key_word == "method":
            self._method_table.define("this", self._cur_class_name, "argument")

        self._check_symbol("(")
        self.compile_parameter_list()
        self._check_symbol(")")

        subroutine_path = self._cur_class_name + '.' + cur_name
        # the function is either void or has a type

        self.compile_subroutine_body(subroutine_path, key_word)

    def compile_parameter_list(self):
        """
        compiles the parameter list for the subroutines
        """

        # if curr_token is ')' it means the param list is empty
        if self._tokenizer.symbol() == ')':
            return
        cur_type = self.get_cur_token()
        self._check_type()
        cur_name = self.get_cur_token()
        self._check_name()
        self._method_table.define(cur_name, cur_type, "argument")
        while self._check_if_comma():  # there are more params
            self._tokenizer.advance()
            cur_type = self.get_cur_token()
            self._check_type()
            cur_name = self.get_cur_token()
            self._check_name()
            self._method_table.define(cur_name, cur_type, "argument")

    def compile_subroutine_body(self, subroutine_name, subroutine_kind):
        """
        compiles the body of the subroutine
        """
        self._check_symbol("{")
        # there may be multiple variable declarations at the beginning of
        # the subroutine
        while self._tokenizer.key_word() == 'var':
            self.compile_var_dec()
        # define the subroutine
        n_locals = self._method_table.var_count("local")
        self._vm_writer.write_function(subroutine_name, n_locals)

        if subroutine_kind == "constructor":
            # allocating memory for the object's fields
            num_of_fields = self._class_table.var_count("field")
            self._vm_writer.write_push("constant", num_of_fields)
            self._vm_writer.write_call("Memory.alloc", 1)
            # make 'this' to point to address returned by Memory.alloc
            self._vm_writer.write_pop("pointer", 0)

        if subroutine_kind == "method":
            # assign pointer[0] to the object's base address in order to
            # get access to 'this' segment
            self._vm_writer.write_push("argument", 0)
            self._vm_writer.write_pop("pointer", 0)

        self.compile_statements()
        self._check_symbol("}")

    def compile_var_dec(self):
        """
        compiles the variable declarations
        """
        self._tokenizer.advance()
        cur_type = self.get_cur_token()
        self._check_type()
        cur_name = self.get_cur_token()
        self._check_name()
        self._method_table.define(cur_name, cur_type, "local")
        # there may be multiple variable names in the dec
        while self._check_if_comma():
            self._tokenizer.advance()
            self._method_table.define(self.get_cur_token(), cur_type, "local")
            self._check_name()
        self._check_symbol(";")

    def compile_statements(self):
        """
        compiles the statements (0 or more statements)
        """
        while self._check_if_statement():
            if self._tokenizer.key_word() == 'let':
                self.compile_let()
            elif self._tokenizer.key_word() == 'if':
                self.compile_if()
            elif self._tokenizer.key_word() == 'while':
                self.compile_while()
            elif self._tokenizer.key_word() == 'do':
                self.compile_do()
            elif self._tokenizer.key_word() == 'return':
                self.compile_return()

    def compile_do(self):
        """
        compiles the do statement
        """
        self._tokenizer.advance()
        self.compile_subroutine_call()
        self._check_symbol(";")
        self._vm_writer.write_pop("temp", 0)

    def compile_let(self):
        """
        compiles the let statement
        """
        self._tokenizer.advance()
        name = self.get_cur_token()
        info = self._get_symbol_info(name)
        self._check_if_declared(info)
        s_type, s_kind, s_id = info
        seg = self._get_segment(s_kind)
        is_and_array = False

        if self._tokenizer.get_next_token() == '[':  # if there is an array
            is_and_array = True
            self.compile_term()
        else:
            self._tokenizer.advance()
        self._check_symbol("=")
        self.compile_expression()

        if is_and_array:
            # save the value created after compiling the expression which
            # appears right after '=' in temp[0]
            self._vm_writer.write_pop("temp", 0)
            # now the top of the stack should be the address of the right cell
            # in the array so we assign it to pointer[1]
            self._vm_writer.write_pop("pointer", 1)
            # re-pushing the value we saved in temp[0]
            self._vm_writer.write_push("temp", 0)
            # the value of the array is located in that[0]
            seg = "that"
            s_id = 0
        # execute the assignment
        self._vm_writer.write_pop(seg, s_id)
        self._check_symbol(";")

    @staticmethod
    def _check_if_declared(info):
        if info is None:
            print("Unknown Symbol")
            sys.exit()

    def compile_if(self):
        """
        compiles the if statements
        """
        false_label = self._get_if_label()
        end_label = self._get_if_label()

        self._tokenizer.advance()
        self._check_symbol("(")
        self.compile_expression()
        self._check_symbol(")")
        self._check_symbol("{")
        self._vm_writer.write_arithmetic("not")
        self._vm_writer.write_if_goto(false_label)
        self.compile_statements()
        self._check_symbol("}")
        # there can also be an if else scenario
        self._vm_writer.write_goto(end_label)
        self._vm_writer.write_label(false_label)

        if self._tokenizer.key_word() == 'else':
            self._tokenizer.advance()
            self._check_symbol("{")
            self.compile_statements()
            self._check_symbol("}")

        self._vm_writer.write_label(end_label)

    def compile_while(self):
        """
        compiles the while statements
        """
        self._tokenizer.advance()
        first_label = self._get_while_label()
        second_label = self._get_while_label(END_WHILE)
        self._check_symbol("(")
        self._vm_writer.write_label(first_label)
        self.compile_expression()
        self._vm_writer.write_arithmetic("not")
        self._vm_writer.write_if_goto(second_label)
        self._check_symbol(")")
        self._check_symbol("{")
        self.compile_statements()
        self._vm_writer.write_goto(first_label)
        self._vm_writer.write_label(second_label)
        self._check_symbol("}")

    def compile_return(self):
        """
        compiles the return statements
        """
        self._tokenizer.advance()
        # if cur token is ; we return nothing, otherwise we return something
        if not self._tokenizer.symbol() == ';':
            self.compile_expression()
        else:
            self._vm_writer.write_push("constant", 0)
        self._check_symbol(";")
        self._vm_writer.write_return()

    def compile_subroutine_call(self):
        """
        compiles the subroutine calls ( when we actually call a subroutine
        as  opposed to declaring it)
        """
        method_name = self.get_cur_token()
        self._check_name()
        num_of_args = 0
        # there may be a '.' if it is a foo.bar() scenario (or Foo.bar())

        if self._tokenizer.symbol() == ".":

            self._tokenizer.advance()
            class_name = method_name
            method_name = self.get_cur_token()
            self._check_name()
            symbol_info = self._get_symbol_info(class_name)

            if symbol_info is None:
                cur_name = class_name + '.' + method_name
            else:
                type_of, kind_of, id_of = symbol_info
                num_of_args += 1
                self._vm_writer.write_push(self._get_segment(kind_of), id_of)
                cur_name = type_of + '.' + method_name
        else:
            cur_name = self._cur_class_name + '.' + method_name
            num_of_args += 1
            self._vm_writer.write_push("pointer", 0)

        self._check_symbol("(")
        num_of_args += self.compile_expression_list()
        self._check_symbol(")")
        self._vm_writer.write_call(cur_name, num_of_args)

    def compile_expression(self):
        """
        compiles expressions which are terms and possibly operators and more
        terms
        """
        symbol = self._tokenizer.symbol()
        self.compile_term()
        # write the 'not' operator if necessary
        if symbol == '~':
            self._vm_writer.write_arithmetic("not")

        # there may be a few operators in one expression
        while self._tokenizer.symbol() in OPERATIONS:
            symbol = self._tokenizer.symbol()
            self.compile_term()
            # executing operators after handling the the operands
            # in order to evaluate the current expression as postfix expression
            op = self._get_op(symbol)
            self._vm_writer.write_arithmetic(op)

    def compile_term(self):
        """
        compiles terms according to the grammar
        """
        cur_type = self._tokenizer.token_type()
        key_word = self._tokenizer.key_word()
        cur_token = self.get_cur_token()

        # either a string/int constant
        if cur_type in ["INT_CONST", "STRING_CONST"]:
            self._compile_string_int_term(cur_token, cur_type)

        # or a constant keyword (true, false, null, this)
        elif key_word in KEYWORD_CONST:
            self._compile_const_keyword_term(key_word)

        # or an expression within brown brackets
        elif self._tokenizer.symbol() == '(':
            self._tokenizer.advance()
            self.compile_expression()
            self._check_symbol(")")

        # or a unary op and then a term
        elif self._tokenizer.symbol() in OPERATIONS:

            self._tokenizer.advance()
            self.compile_term()

        # or it is an identifier which could be:
        elif self._tokenizer.identifier():
            self._compile_term_identifier()
        else:
            print(COMPILE_TERM_ERROR)
            sys.exit()

    def _compile_const_keyword_term(self, key_word):
        """
       compile term in case the current token type is constant keyword
       :param key_word: string from {'true', 'false', 'null', 'this'}
       """
        if key_word == "this":
            self._vm_writer.write_push("pointer", 0)
        else:
            self._vm_writer.write_push("constant", 0)
        if key_word == "true":
            self._vm_writer.write_arithmetic("not")
        self._tokenizer.advance()

    def _compile_string_int_term(self, cur_token, cur_type):
        """
        compile term in case the given token type is constant string
        or constant integer
        :param cur_token: the current token as a string
        :param cur_type:  the type of the current token
        """
        if cur_type == "INT_CONST":
            self._vm_writer.write_push("constant", cur_token)

        else:  # is string
            n = len(cur_token)
            self._vm_writer.write_push("constant", n)
            self._vm_writer.write_call("String.new", 1)
            for c in cur_token:
                self._vm_writer.write_push("constant", ord(c))
                self._vm_writer.write_call("String.appendChar", 2)
        self._tokenizer.advance()

    def _compile_term_identifier(self):
        """
         compiles terms in case of identifier token
        """
        cur_token = self.get_cur_token()
        info = self._get_symbol_info(cur_token)
        next_token = self._tokenizer.get_next_token()
        if info is not None and next_token not in [".", "("]:
            type_of, kind_of, id_of = info
            seg = self._get_segment(kind_of)
            self._vm_writer.write_push(seg, id_of)

        # an array
        if next_token == '[':

            self._check_name()
            self._check_symbol("[")
            self.compile_expression()
            self._check_symbol("]")
            self._vm_writer.write_arithmetic("add")
            if self._tokenizer.symbol() != '=':
                self._vm_writer.write_pop("pointer", 1)
                self._vm_writer.write_push("that", 0)
        # or a subroutine call
        elif next_token in [".", "("]:
            self.compile_subroutine_call()
        else:
            self._tokenizer.advance()

    def compile_expression_list(self):
        """
        compiles the expression lists
        """
        # if it is ')' then the expression list is empty
        if self._tokenizer.symbol() == ')':
            return 0
        num_of_args = 1  # at least one argument
        self.compile_expression()
        # while there are more expressions
        while self._check_if_comma():
            self._tokenizer.advance()
            cur_symbol = self._tokenizer.symbol()
            self.compile_expression()
            if cur_symbol == '-':  # negative int
                self._vm_writer.write_arithmetic("neg")
            num_of_args += 1
        return num_of_args

    def _check_if_var_dec(self):
        """
        check if we are currently compiling a variable declaration
        :return: true iff the current token is either 'static' or 'field'
        """
        return self._tokenizer.key_word() in CLASS_VAR_KEYWORDS

    def _check_subroutine_dec(self):
        """
        checks if we are currently compiling a subroutine declaration
        :return: true iff the current token is either 'constructor' or
        'function' or 'method'
        """
        return self._tokenizer.key_word() in SUBROUTINE

    def _check_if_comma(self):
        """
        checks if current token is a comma
        :return: true iff the current token is a ','
        """
        return self._tokenizer.symbol() == ','

    def _check_if_statement(self):
        """
        checks if we are currently compiling a statement
        :return: true iff the current token
        is in ['let', 'if', 'while', 'do', 'return']
        """
        return self._tokenizer.key_word() in STATEMENTS

    def _check_type(self):
        """
        checks if the current token is a valid type and if so, it writes it
        to  the output file
        """
        if not self._tokenizer.key_word() in TYPE_KEYWORDS:
            self._check_name()
        else:
            self._tokenizer.advance()

    def _check_symbol(self, expected_symbol):
        """
        checks if the current token is the expected symbol, if so it write
        it to the output file
        :param expected_symbol: the symbol we are validating is the current
        token
        :return: prints illegal statement error if it is not the expected
        symbol and exits the program
        """
        if self._tokenizer.symbol() != expected_symbol:
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()
        self._tokenizer.advance()

    def _check_name(self):
        """
        checks the current token is a name (identifier), and if so, write
        it to the output file
        :return: prints illegal statement error if it is not a name and
        exits the program
        """
        if not self._tokenizer.identifier():
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()
        self._tokenizer.advance()

    @staticmethod
    def _get_op(symbol):
        """
       writes an op symbol to the out file
       """
        if symbol == '<':
            return "lt"
        elif symbol == '>':
            return "gt"
        elif symbol == '=':
            return "eq"
        elif symbol == '&':
            return "and"
        elif symbol == '|':
            return "or"
        elif symbol == '+':
            return "add"
        elif symbol == '-':
            return "sub"
        elif symbol == '~':
            return "not"
        elif symbol == "*":
            return "call Math.multiply 2"
        elif symbol == "/":
            return "call Math.divide 2"

    def _get_symbol_info(self, symbol_name):
        """
        first checks if the given symbol in the method symbol table
        if the method table contains the symbol it returns it's information:
        (type,kind,id)
        otherwise check if the class symbol table contains the symbol
        if it does it return the symbol information from the class table
        else returns None
        :param symbol_name: string
        """
        info = self._method_table.get_info(symbol_name)
        if info is None:
            info = self._class_table.get_info(symbol_name)
        return info

    @staticmethod
    def _get_segment(cur_kind):
        """
        :param cur_kind: Jack kind - from the list:
         ["var", "argument", "field", "class", "subroutine", "local", "static"]
        :return: if the given kind is "field" it returns 'this'
        otherwise returns the given kind
        """
        if cur_kind == "field":
            return "this"
        else:
            return cur_kind

    def _get_if_label(self):
        """
        create new if label and increment the if label counter
        :return: if unused label
        """
        curr_counter = str(self._label_count_if)
        self._label_count_if += 1
        return "IF" + curr_counter

    def _get_while_label(self, is_end_while=False):
        """
        creates label according to the given flag, if the method creates
        end while label it increments the while label counter
        :param is_end_while: if true creates end while label
        otherwise creates while label
        :return: unused while label or end while label according to the flag
        """
        curr_counter = str(self._label_count_while)
        if is_end_while:
            self._label_count_while += 1
            return "WHILE_END" + curr_counter
        return "WHILE" + curr_counter