コード例 #1
0
class CompilationEngine(object):
    """docstring for CompilationEngine"""
    def __init__(self, inputFilePath):
        super(CompilationEngine, self).__init__()
        normalized = os.path.normpath(inputFilePath)

        self.compileDir = os.path.isdir(normalized)
        self.jackFilesToCompile = []
        self.inputFilePath = normalized

        if self.compileDir:
            os.path.walk(normalized, getFilePathsToCompile, self.jackFilesToCompile)
        else:
            self.jackFilesToCompile.append(normalized)

        self.tokenizer = None
        self.symbolTable = SymbolTable()
        self.treeRoot = None
        self.currentClassName = None
        self.currentSubroutineContext = None;
        self.writer = None

    """ Load File """
    def loadFile(self, filepath):
        self.tokenizer = JackTokenizer(filepath)
        self._generateTree()
        self._initializeClassDetails(self.treeRoot)

        outputFilePath = filepath[:-5] + ".vm"
        self.writer = VMWriter(outputFilePath)

    def setSubroutineContext(self, ctx):
        self.currentSubroutineContext = ctx

    def compile(self):
        for filepath in self.jackFilesToCompile:
            self.loadFile(filepath)

            self._compileSubroutines()

            self.writer.close()

    """ Tree Generation """

    def _generateTree(self):
        # get first token, and decide which rule to start with (in 99.9% of cases start with 'class' rule)
        nextToken = self.tokenizer.peekNextToken()
        if nextToken == None:
            raise ValueError("No tokens to parse.")
        elif nextToken.getToken() != "class":
            raise ValueError("Program does not begin with a class declaration.")

        root = Tree.Node("class")
        self.compileClass(root)
        self.treeRoot = root # root of tree after it is all built out

    def _initializeClassDetails(self, treeRoot):
        """ void: Sets self.className and defines all the class level variables in the symbol table. """
        self.symbolTable.clearClassSymbols()

        className = treeRoot.children[1].elementVal
        self.currentClassName = className;

        classVarDecs = [c for c in treeRoot.children if c.elementName == "classVarDec"]
        for cvd in classVarDecs:
            var_info = [c.elementVal for c in cvd.children if c.elementName != "symbol"]
            kind = var_info[0].upper()
            type_ = var_info[1]
            
            for varname in var_info[2:]:
                self.symbolTable.define(varname, type_, kind)
        

    def _compileSubroutines(self):
        """ Initialize the subroutine symbol table  """
        subroutines = [c for c in self.treeRoot.children if c.elementName == "subroutineDec"]
        for node in subroutines:
            self._initializeSubroutineVars(node) # set subroutineCtx & init subroutineSymbolTable
            self._handleSubroutine(node)

    def _initializeSubroutineVars(self, node):
        """ void: sets the subroutineContext and initializes the symbolTable for argument vars """

        self.symbolTable.clearSubroutineSymbols()

        subroutineCtx = {
            "subroutineKind": node.children[0].elementVal, # method,function,constructor
            "voidReturn": node.children[1].elementVal == "void",
            "returnType": node.children[1].elementVal,
            "subroutineName": node.children[2].elementVal
        }

        self.setSubroutineContext(subroutineCtx)

        plistNode = node.children[4]
        arguments = [c for c in plistNode.children if c.elementName != "symbol"]
        if len(arguments) % 2 != 0:
            raise ValueError("REMOVE LATER: Odd number of (type,varname) combos.")

        # if dealing with method, add THIS as arg 0
        if subroutineCtx["subroutineKind"] == "method":
            self.symbolTable.define("this", self.currentClassName, "ARG")

        i = 0
        while i < len(arguments):
            type_ = arguments[i].elementVal
            varname = arguments[i+1].elementVal
            self.symbolTable.define(varname, type_, "ARG")
            i += 2    


    def _handleSubroutine(self, node):
        """ void: 
            1. Initialize Local Vars
            2. Write function vm code
            3. Kick off the compilation of subroutine body
        """
        curSubroutineKind = self.currentSubroutineContext["subroutineKind"]

        subBodyNode = None
        statementsNode = None
        for child in node.children:
            if child.elementName == "subroutineBody":
                subBodyNode = child
                break

        # initialize local vars in symbolTable
        varDecs = [c for c in subBodyNode.children if c.elementName == "varDec"]
        for vd in varDecs:
            locals_ = [c for c in vd.children if c.elementName != "symbol"]
            [varNode,typeNode] = locals_[:2] # var, type
            locals_ = locals_[2:] # varnames only

            type_ = typeNode.elementVal
            for varNode in locals_:
                varname = varNode.elementVal
                self.symbolTable.define(varname, type_, "VAR")
            
        nLocals = self.symbolTable.varCount("VAR")
        self.writer.writeFunction(self.currentClassName, self.currentSubroutineContext["subroutineName"], nLocals)

        if curSubroutineKind == "constructor":
            numFieldVars = self.symbolTable.varCount(k_FIELD)
            self.writer.writePush("constant", numFieldVars)
            self.writer.writeCall("Memory", "alloc", 1)
            self.writer.writePop("pointer", 0) # sets the base address of this
        elif curSubroutineKind == "method":
            self.writer.writePush("argument", 0) # first argument in a method is always the base addr of THIS
            self.writer.writePop("pointer", 0) # anchoring this before method body executes.


        # get statements & handle them
        statementsNode = None
        for child in subBodyNode.children:
            if child.elementName == "statements":
                statementsNode = child
                break

        self._handleSubroutineStatements(statementsNode)

    def _handleSubroutineStatements(self, statementsNode):
        for statement in statementsNode.children:
            if statement.elementName == "letStatement":
                self._handleLetStatement(statement)
            elif statement.elementName == "returnStatement":
                self._handleReturnStatement(statement)
            elif statement.elementName == "doStatement":
                self._handleDoStatement(statement)
            elif statement.elementName == "ifStatement":
                self._handleIfStatement(statement)
            elif statement.elementName == "whileStatement":
                self._handleWhileStatement(statement)

    def _handleLetStatement(self, letStatement):
        

        if letStatement.children[2].elementVal == "=":
            # handle varName = ...
            expr = letStatement.children[3]
            self._handleExpression(expr)

            varname = letStatement.children[1].elementVal
            varKind = self.symbolTable.kindOf(varname)
            segment = kindToSegmentMap[varKind]
            index = self.symbolTable.indexOf(varname)
            self.writer.writePop(segment, index)
        elif letStatement.children[2].elementVal == "[":
            # handle arr[expr1] = expr2
            varname = letStatement.children[1].elementVal
            varKind = self.symbolTable.kindOf(varname)
            segment = kindToSegmentMap[varKind]
            index = self.symbolTable.indexOf(varname)

            bracketExpr = letStatement.children[3]
            self._handleExpression(bracketExpr)

            self.writer.writePush(segment, index) # push varname_base_addr            
            
            self.writer.writeOp("+") # produces add

            rightHandExpr = letStatement.children[6]
            self._handleExpression(rightHandExpr)
            self.writer.writePop("temp", 0) # pop rightHandExpr into temp 0
            self.writer.writePop("pointer", 1) # anchor THAT segment to (bracketExpr + varname_base_addr)
            self.writer.writePush("temp", 0) # push rightHandExpr back onto stack
            self.writer.writePop("that", 0) # pop rightHandExpr into that[0]


    def _handleWhileStatement(self, whileStatement):
        expr = whileStatement.children[2]
        statements = whileStatement.children[5]

        expLabel = whileExpLabel + str(self.symbolTable.whileLabelInx)
        endLabel = whileEndLabel + str(self.symbolTable.whileLabelInx)

        self.symbolTable.incrementwhileLabelInx()

        self.writer.writeLabel(expLabel)
        self._handleExpression(expr)

        self.writer.writeUnaryOp("~") # produces not
        self.writer.writeIfGoto(endLabel)
        self._handleSubroutineStatements(statements)
        self.writer.writeGoto(expLabel)
        self.writer.writeLabel(endLabel)

    def _handleIfStatement(self, ifStatement):
        if len(ifStatement.children) > 7:
            # handle if / else
            trueLabel = ifTrueLabel + str(self.symbolTable.ifLabelInx)
            falseLabel = ifFalseLabel + str(self.symbolTable.ifLabelInx)
            endLabel = ifEndLabel + str(self.symbolTable.ifLabelInx)

            self.symbolTable.incrementifLabelInx()

            expr = ifStatement.children[2]
            trueStatements = ifStatement.children[5]
            elseStatements = ifStatement.children[9]
            self._handleExpression(expr) # expr on top of stack

            self.writer.writeIfGoto(trueLabel)
            self.writer.writeGoto(falseLabel)
            self.writer.writeLabel(trueLabel)

            self._handleSubroutineStatements(trueStatements)
            
            self.writer.writeGoto(endLabel)

            self.writer.writeLabel(falseLabel)
            
            self._handleSubroutineStatements(elseStatements)

            self.writer.writeLabel(endLabel)
        else:
            # handle just if
            trueLabel = ifTrueLabel + str(self.symbolTable.ifLabelInx)
            falseLabel = ifFalseLabel + str(self.symbolTable.ifLabelInx)

            self.symbolTable.incrementifLabelInx()

            expr = ifStatement.children[2]
            trueStatements = ifStatement.children[5]
            self._handleExpression(expr) # expr on top of stack

            self.writer.writeIfGoto(trueLabel)
            self.writer.writeGoto(falseLabel)
            self.writer.writeLabel(trueLabel)

            self._handleSubroutineStatements(trueStatements)

            self.writer.writeLabel(falseLabel)

    def _handleDoStatement(self, doStatement):
        symbol = doStatement.children[2].elementVal
        if symbol == "(":
            className = self.currentClassName
            subroutineName = doStatement.children[1].elementVal
            # assume that we are compiling a Class method which requires the first argument 
            # to be THIS base address.
            self.writer.writePush("pointer", 0) # push this base address as first arg
            expList = doStatement.children[3]
            expressions = [c for c in expList.children if c.elementName == "expression"]
            for e in expressions:
                self._handleExpression(e)

            self.writer.writeCall(className, subroutineName, len(expressions) + 1) # pointer 0 is the default arg0
            self.writer.writePop("temp", 0)
        elif symbol == ".":
            name = doStatement.children[1].elementVal
            isVarName = self.symbolTable.kindOf(name) != None;

            subroutineName = doStatement.children[3].elementVal
            expList = doStatement.children[5]
            expressions = [c for c in expList.children if c.elementName == "expression"]
            
            if isVarName:
                # handle method invocation
                # put the obj base address on the stack
                varKind = self.symbolTable.kindOf(name)
                className = self.symbolTable.typeOf(name)
                segment = kindToSegmentMap[varKind]
                index = self.symbolTable.indexOf(name)
                self.writer.writePush(segment, index)
                # handle remaining expressions
                for e in expressions:
                    self._handleExpression(e)

                nArgs = len(expressions) + 1 # including the object on which the method is being invoked
                self.writer.writeCall(className, subroutineName, nArgs)
            else:
                # handle static function invocation                
                for e in expressions:
                    self._handleExpression(e)

                nArgs = len(expressions)
                self.writer.writeCall(name, subroutineName, nArgs)

            self.writer.writePop("temp", 0)

    def _handleReturnStatement(self, returnStatement):
        if returnStatement.children[1].elementName == "expression":
            # returnStatement.walkAndPrint()
            self._handleExpression(returnStatement.children[1])
            self.writer.writeReturn()
        else:
            self.writer.writePush("constant", 0)
            self.writer.writeReturn()

    def _handleExpression(self, exprTree):
        if len(exprTree.children) == 1:
            termTree = exprTree.children[0]
            self._handleTerm(termTree)
        elif len(exprTree.children) == 0:
            exprTree.prin
        else:
            t1 = exprTree.children[0]
            opTermCombos = exprTree.children[1:]
            
            self._handleTerm(t1)
            i = 0
            while i < len(opTermCombos):
                operation = opTermCombos[i].elementVal
                nextTerm = opTermCombos[i+1]
                self._handleTerm(nextTerm)
                self.writer.writeOp(operation)
                i += 2

    def _handleTerm(self, termTree):
        firstToken = termTree.children[0]
        if len(termTree.children) > 1:
            # handle a[expr] | subroutineCall | (expression) | unaryOp term      
            secondToken = termTree.children[1]
            if firstToken.elementName == "identifier":
                if secondToken.elementVal == "[":
                    # handle a[expr]
                    varname = firstToken.elementVal
                    varKind = self.symbolTable.kindOf(varname)
                    segment = kindToSegmentMap[varKind]
                    index = self.symbolTable.indexOf(varname)

                    self._handleExpression(termTree.children[2])

                    self.writer.writePush(segment, index) # push varname_base_addr
                    
                    self.writer.writeOp("+")
                    self.writer.writePop("pointer", 1)
                    self.writer.writePush("that", 0)

                elif secondToken.elementVal in ["(", "."]:
                    if secondToken.elementVal == "(":
                        # assume that we are compiling a Class method which requires the first argument 
                        # to be THIS base address.
                        self.writer.writePush("pointer", 0) # push this base address as first arg
                        expListNode = termTree.children[2]
                        expressions = [c for c in expListNode.children if c.elementName == "expression"]
                        for e in expressions:
                            self._handleExpression(e)

                        self.writer.writeCall(self.currentClassName, firstToken.elementVal, len(expressions) + 1)
                    elif secondToken.elementVal == ".":
                        name = firstToken.elementVal
                        isVarName = self.symbolTable.kindOf(name) != None;

                        functionName = termTree.children[2].elementVal
                        expListNode = termTree.children[4]
                        expressions = [c for c in expListNode.children if c.elementName == "expression"]
                        
                        if isVarName:
                            varKind = self.symbolTable.kindOf(name)
                            className = self.symbolTable.typeOf(name)
                            segment = kindToSegmentMap[varKind]
                            index = self.symbolTable.indexOf(name)
                            self.writer.writePush(segment, index)

                            for e in expressions:
                                self._handleExpression(e)

                            nArgs = len(expressions) + 1
                            self.writer.writeCall(className, functionName, nArgs)
                        else:
                            for e in expressions:
                                self._handleExpression(e)

                            nArgs = len(expressions)
                            self.writer.writeCall(name, functionName, nArgs)

            elif firstToken.elementName == "symbol":
                if firstToken.elementVal in unaryOps:
                  self._handleTerm(secondToken)
                  self.writer.writeUnaryOp(firstToken.elementVal) # unaryOp term
                else:
                  self._handleExpression(secondToken) # (expression)
        else:
            if firstToken.elementName == t_integerConstant:
                self.writer.writePush("constant", firstToken.elementVal)
            elif firstToken.elementName == t_stringConstant:
                sLength = len(firstToken.elementVal)
                self.writer.writePush("constant", sLength)
                self.writer.writeCall("String", "new", 1)
                for char in firstToken.elementVal:
                    code = ord(char)
                    self.writer.writePush("constant", code)
                    self.writer.writeCall("String", "appendChar", 2)
            elif firstToken.elementName == t_keyword:
                if firstToken.elementVal == "null":
                    self.writer.writePush("constant", 0)
                elif firstToken.elementVal == "false":
                    self.writer.writePush("constant", 0)
                elif firstToken.elementVal == "true":
                    self.writer.writePush("constant", 0)
                    self.writer.writeUnaryOp("~") # produces "not"
                elif firstToken.elementVal == "this":
                    self.writer.writePush("pointer", 0)
            elif firstToken.elementName == t_identifier:
                varKind = self.symbolTable.kindOf(firstToken.elementVal)
                segment = kindToSegmentMap[varKind]
                index = self.symbolTable.indexOf(firstToken.elementVal)
                self.writer.writePush(segment, index)


    """ JACK Program Structure """

    def compileClass(self, subTreeNode):
        # handle class keyword
        token = self.tokenizer.getNextToken()
        if token.getToken() != 'class':
            raise ValueError("Program does not begin with a class declaration.")
        subTreeNode.addChild(token)

        # handle className
        token = self.tokenizer.getNextToken()
        self.validateClassName(token)
        subTreeNode.addChild(token)

        # handle '{'
        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Invalid symbol.")
        subTreeNode.addChild(token)

        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() in ["static", "field"]:
            classVarDecSubTree = Tree.Node("classVarDec", subTreeNode.depth + 1)
            self.compileClassVarDec(classVarDecSubTree)
            subTreeNode.addChildTree(classVarDecSubTree)

            nextToken = self.tokenizer.peekNextToken()

        while nextToken.getToken() in ["constructor", "function", "method"]:
            subroutineDecSubTree = Tree.Node("subroutineDec", subTreeNode.depth + 1)
            self.compileSubroutineDec(subroutineDecSubTree)
            subTreeNode.addChildTree(subroutineDecSubTree)

            nextToken = self.tokenizer.peekNextToken()

        # get next token, verify '}' and write symbol (use indent)
        token = self.tokenizer.getNextToken()
        if token.getToken() != "}":
            raise ValueError("Invalid symbol. " + token.getToken())

        subTreeNode.addChild(token)

        return subTreeNode

    def compileClassVarDec(self, subTreeNode):
        # handle 'static' | 'field'
        token = self.tokenizer.getNextToken()
        if token.getToken() not in ['static', 'field']:
            raise ValueError("Class var declaration does not begin with \'static\' or \'field\'.")
        subTreeNode.addChild(token)

        # handle type
        token = self.tokenizer.getNextToken()
        self.validateType(token) #raises error true
        subTreeNode.addChild(token)

        # handle varname
        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        # handle 0 or more comma separated varnames
        token = self.tokenizer.getNextToken()
        while token.getToken() == ",":
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()
            self.validateVarName(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()

        # handle ';'
        if token.getToken() != ";":
            raise ValueError("Class var declaration does not end with a \';\' .")
        subTreeNode.addChild(token)

        return subTreeNode

    def compileSubroutineDec(self, subTreeNode):
        # handle constuctor, function, method
        token = self.tokenizer.getNextToken()
        if token.getToken() not in ['constructor', 'function', 'method']:
            raise ValueError("Subroutine declaration must begin with \'constructor\', \'function\', \'method\'.")
        subTreeNode.addChild(token)

        # handle 'void' | type
        token = self.tokenizer.getNextToken()
        if token.getToken() == "void":
            subTreeNode.addChild(token)
        elif self.validateType(token): # if not validateType, then we necessarily throw error
            subTreeNode.addChild(token)

        # handle subRoutineName
        token = self.tokenizer.getNextToken()
        self.validateSubroutineName(token)
        subTreeNode.addChild(token)

        # handle '('
        token = self.tokenizer.getNextToken()
        if token.getToken() != '(':
            raise ValueError("Expected \'(\' before parameter list.")
        subTreeNode.addChild(token)

        # handle parameterList
        pListSubTreeNode = Tree.Node("parameterList", subTreeNode.depth + 1)
        self.compileParameterList(pListSubTreeNode) # needs to printed like <parameterList></paremeterList> (even if no children)
        subTreeNode.addChildTree(pListSubTreeNode)

        # handle ')'
        token = self.tokenizer.getNextToken()
        if token.getToken() != ')':
            raise ValueError("Expected \')\' after parameter list, but got " + token.getToken() + " instead.")
        subTreeNode.addChild(token)

        # handle subroutineBody
        nextToken = self.tokenizer.peekNextToken()
        if nextToken.getToken() != '{':
            raise ValueError("Expeced \'{\' at start of subroutine body, but received: " + nextToken.getToken())

        subroutineBodySubTreeNode = Tree.Node("subroutineBody", subTreeNode.depth + 1)
        self.compileSubroutineBody(subroutineBodySubTreeNode)
        subTreeNode.addChildTree(subroutineBodySubTreeNode)

        return subTreeNode;

    def compileParameterList(self, subTreeNode):
        nextToken = self.tokenizer.peekNextToken()
        if not self.isType(nextToken):
            return subTreeNode

        token = self.tokenizer.getNextToken()
        # self.validateType(token)
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        # handle 0 or more comma separated (type varname)
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() == ",":
            token = self.tokenizer.getNextToken() # handle ','
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # type
            self.validateType(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # varname
            self.validateVarName(token)
            subTreeNode.addChild(token)

            nextToken = self.tokenizer.peekNextToken()

        return subTreeNode

    def compileSubroutineBody(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Expeced \'{\' at start of subroutine body, but received: " + token.getToken())
        subTreeNode.addChild(token)

        # handle 0 or more varDecs
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() == 'var':
            varDecSubTreeNode = Tree.Node("varDec", subTreeNode.depth + 1)
            self.compileVarDec(varDecSubTreeNode)
            subTreeNode.addChildTree(varDecSubTreeNode)

            nextToken = self.tokenizer.peekNextToken()

        # handle 0 or more statements
        statementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
        self.compileStatements(statementsSubTreeNode)
        subTreeNode.addChildTree(statementsSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != '}':
            raise ValueError("Expeced \'}\' at end of subroutine body, but got " + token.getToken() + " instead.")
        subTreeNode.addChild(token)

        return subTreeNode

    def compileVarDec(self, subTreeNode):
        # handle 'var'
        token = self.tokenizer.getNextToken()
        if token.getToken() != 'var':
            raise ValueError("Variable declaration must begin with \'var\'.")
        subTreeNode.addChild(token)

        # handle type
        token = self.tokenizer.getNextToken()
        self.validateType(token)
        subTreeNode.addChild(token)

        # handle var name
        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        # handle 0 or more comma separated varnames
        token = self.tokenizer.getNextToken()
        while token.getToken() == ",":
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()
            self.validateVarName(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()

        # handle ';'
        if token.getToken() != ';':
            raise ValueError("Variable declaration must end with \';\'.")
        subTreeNode.addChild(token)

        return subTreeNode;

    def isType(self, token):
        tokenType = token.getTokenType()
        tokenVal = token.getToken()

        return tokenVal in primitive_types or self._isIdentifier(token)

    def isClassName(self, token):
        return self._isIdentifier(token)

    def isSubroutineName(self, token):
        return self._isIdentifier(token)

    def isVarName(self, token):
        return self._isIdentifier(token)

    def validateType(self, token):
        tokenType = token.getTokenType()
        tokenVal = token.getToken()
        
        if tokenVal not in primitive_types:
            return self.validateClassName(token)

        return True;

    def validateClassName(self, token):
        if not self._isIdentifier(token):
            raise ValueError("class name \'" + tokenVal + "\' is not a valid identifier.")

        return True;    

    def validateSubroutineName(self, token):
        if not self._isIdentifier(token):
            raise ValueError("subroutine name \'" + tokenVal + "\' is not a valid identifier.")

        return True;

    def validateVarName(self, token):
        if not self._isIdentifier(token):
            raise ValueError("var name \'" + tokenVal + "\' is not a valid identifier.")

        return True;            

    def _isIdentifier(self, token):
        tokenType = token.getTokenType()
        tokenVal = token.getToken()

        return tokenType == t_identifier


    """ JACK Statements """

    def compileStatements(self, subTreeNode):
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() in ['let', 'if', 'while', 'do', 'return']:
            statementTreeNode = Tree.Node("CHANGEME", subTreeNode.depth + 1)
            if nextToken.getToken() == "let":
                statementTreeNode.setElementName("letStatement")
                self.compileLetStatement(statementTreeNode)
            elif nextToken.getToken() == "if":
                statementTreeNode.setElementName("ifStatement")
                self.compileIfStatement(statementTreeNode)
            elif nextToken.getToken() == "while":
                statementTreeNode.setElementName("whileStatement")
                self.compileWhileStatement(statementTreeNode)
            elif nextToken.getToken() == "do":
                statementTreeNode.setElementName("doStatement")
                self.compileDoStatement(statementTreeNode)
            elif nextToken.getToken() == "return":
                statementTreeNode.setElementName("returnStatement")
                self.compileReturnStatement(statementTreeNode)

            subTreeNode.addChildTree(statementTreeNode)
            nextToken = self.tokenizer.peekNextToken()

        return subTreeNode

    def compileLetStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "let":
            raise ValueError("let statement must begin with let.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() == "[":
            subTreeNode.addChild(token)

            # handle expression
            exprTreeNode1 = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprTreeNode1)
            subTreeNode.addChildTree(exprTreeNode1)

            token = self.tokenizer.getNextToken()
            if token.getToken() != "]":
                raise ValueError("Expected ].")
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()

        if token.getToken() != "=":
            raise ValueError("Expected =.")
        subTreeNode.addChild(token)

        # handle expression
        exprTreeNode2 = Tree.Node("expression", subTreeNode.depth + 1)
        self.compileExpression(exprTreeNode2)
        subTreeNode.addChildTree(exprTreeNode2)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ";":
            raise ValueError("Expected ;")
        subTreeNode.addChild(token)

        return subTreeNode;

    def compileIfStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "if":
            raise ValueError("Expected if.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "(":
            raise ValueError("Expected \'(\'.")
        subTreeNode.addChild(token)

        ifExprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
        self.compileExpression(ifExprSubTreeNode)
        subTreeNode.addChildTree(ifExprSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ")":
            raise ValueError("Expected \')\'.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Expected \'{\'.")
        subTreeNode.addChild(token)

        ifStatementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
        self.compileStatements(ifStatementsSubTreeNode)
        subTreeNode.addChildTree(ifStatementsSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "}":
            raise ValueError("Expected \'}\'.")
        subTreeNode.addChild(token)

        nextToken = self.tokenizer.peekNextToken()
        if nextToken.getToken() == "else":
            token = self.tokenizer.getNextToken()
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()
            if token.getToken() != "{":
                raise ValueError("Expected \'{\'.")
            subTreeNode.addChild(token)

            elseStatementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
            self.compileStatements(elseStatementsSubTreeNode)
            subTreeNode.addChildTree(elseStatementsSubTreeNode)

            token = self.tokenizer.getNextToken()
            if token.getToken() != "}":
                raise ValueError("Expected \'}\'.")
            subTreeNode.addChild(token)

        return subTreeNode

    def compileWhileStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "while":
            raise ValueError("Expected \'while\'.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "(":
            raise ValueError("Expected \'(\'.")
        subTreeNode.addChild(token)

        exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
        self.compileExpression(exprSubTreeNode)
        subTreeNode.addChildTree(exprSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ")":
            raise ValueError("Expected \')\'.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Expected \'{\'.")
        subTreeNode.addChild(token)        

        whileStatementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
        self.compileStatements(whileStatementsSubTreeNode)
        subTreeNode.addChildTree(whileStatementsSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "}":
            raise ValueError("Expected \'}\'.")
        subTreeNode.addChild(token)

        return subTreeNode

    def compileDoStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "do":
            raise ValueError("Expected \'do\'.")
        subTreeNode.addChild(token)

        self.compileSubroutineCall(subTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ";":
            raise ValueError("Expected \';\'.")
        subTreeNode.addChild(token)

        return subTreeNode;

    def compileReturnStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "return":
            raise ValueError("Expected \'return\'.")
        subTreeNode.addChild(token)

        nextToken = self.tokenizer.peekNextToken()
        if nextToken.getToken() != ";":
            exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprSubTreeNode)
            subTreeNode.addChildTree(exprSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ";":
            raise ValueError("Expected \';\'.")
        subTreeNode.addChild(token)

        return subTreeNode

    """ JACK Expressions """

    def compileExpression(self, subTreeNode):
        termSubTreeNode = Tree.Node("term", subTreeNode.depth + 1)
        self.compileTerm(termSubTreeNode)
        subTreeNode.addChildTree(termSubTreeNode)

        # handle 0 or more (op term)
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() in ops:
            token = self.tokenizer.getNextToken()
            subTreeNode.addChild(token)

            termSubTreeNode2 = Tree.Node("term", subTreeNode.depth + 1)
            self.compileTerm(termSubTreeNode2)
            subTreeNode.addChildTree(termSubTreeNode2)

            nextToken = self.tokenizer.peekNextToken()

        return subTreeNode

    def compileTerm(self, subTreeNode):
        [t1, t2] = self.tokenizer.peekNextNTokens(2)

        if self.isVarName(t1): #if is varName, then necesarily isSubroutineName and isClassName
            if  t2.getToken() == "[":
                # handle varName [ expression ]
                token = self.tokenizer.getNextToken() # handle varName
                subTreeNode.addChild(token)

                token = self.tokenizer.getNextToken() # handle [
                subTreeNode.addChild(token)

                exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
                self.compileExpression(exprSubTreeNode)
                subTreeNode.addChildTree(exprSubTreeNode)

                token = self.tokenizer.getNextToken()
                if token.getToken() != "]":
                    raise ValueError("Expected \']\', but received " + token.getToken())
                subTreeNode.addChild(token)

            elif t2.getToken() in [".", "("]:
                self.compileSubroutineCall(subTreeNode)
            else:
                # handle varName
                token = self.tokenizer.getNextToken()
                subTreeNode.addChild(token)

        elif t1.getTokenType() == t_integerConstant:
            const = self.tokenizer.getNextToken()
            subTreeNode.addChild(const)

        elif t1.getTokenType() == t_stringConstant:
            const = self.tokenizer.getNextToken()
            subTreeNode.addChild(const)

        elif t1.getToken() in keywordConstants:
            const = self.tokenizer.getNextToken()
            subTreeNode.addChild(const)

        elif t1.getToken() == "(":
            token = self.tokenizer.getNextToken()
            subTreeNode.addChild(token)

            exprTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprTreeNode)
            subTreeNode.addChildTree(exprTreeNode)

            token = self.tokenizer.getNextToken()
            if token.getToken() != ")":
                raise ValueError("Expected \')\'")
            subTreeNode.addChild(token)

        elif t1.getToken() in unaryOps:
            unaryOpToken = self.tokenizer.getNextToken()
            subTreeNode.addChild(unaryOpToken)

            termSubTreeNode = Tree.Node("term", subTreeNode.depth + 1)
            self.compileTerm(termSubTreeNode)
            subTreeNode.addChildTree(termSubTreeNode)
        else:
            raise ValueError("Invalid Term: " + t1.getToken() + ", " + t2.getToken())

        return subTreeNode

    def compileSubroutineCall(self, subTreeNode):
        [t1, t2] = self.tokenizer.peekNextNTokens(2)
        
        self.validateSubroutineName(t1) # also handles the case where (className | varName) bc they are all identifiers.

        if t2.getToken() == "(":
            token = self.tokenizer.getNextToken() # handle subroutineName
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle (
            subTreeNode.addChild(token)

            exprListTreeNode = Tree.Node("expressionList", subTreeNode.depth + 1)
            self.compileExpressionList(exprListTreeNode)
            subTreeNode.addChildTree(exprListTreeNode)
            
            token = self.tokenizer.getNextToken() # handle )
            subTreeNode.addChild(token)
        elif t2.getToken() == ".":
            token = self.tokenizer.getNextToken() # handle (className | varName)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle .
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle subroutineName
            self.validateSubroutineName(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle (
            if token.getToken() != "(":
                raise ValueError("Expected \'(\'.")
            subTreeNode.addChild(token)

            exprListTreeNode = Tree.Node("expressionList", subTreeNode.depth + 1)
            self.compileExpressionList(exprListTreeNode)
            subTreeNode.addChildTree(exprListTreeNode)

            token = self.tokenizer.getNextToken() # )
            if token.getToken() != ")":
                raise ValueError("Expected \'(\'.")
            subTreeNode.addChild(token)
        else:
            raise ValueError("Expected \'<subroutineName>.\' or \'<className|varName>(\'.")

        return subTreeNode

    def compileExpressionList(self, subTreeNode):
        nextToken = self.tokenizer.peekNextToken() 

        if nextToken.getToken() == ")":
            return subTreeNode
        else:
            exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprSubTreeNode)
            subTreeNode.addChildTree(exprSubTreeNode)

            nextToken = self.tokenizer.peekNextToken()
            while nextToken.getToken() == ",":
                token = self.tokenizer.getNextToken()
                subTreeNode.addChild(token)

                exprSubTreeNode2 = Tree.Node("expression", subTreeNode.depth + 1)
                self.compileExpression(exprSubTreeNode2)
                subTreeNode.addChildTree(exprSubTreeNode2)

                nextToken = self.tokenizer.peekNextToken()

            return subTreeNode;

    """ Writing XML """
    
    def writeXmlOutput(self, treeNode=None):
        treeNode = self.treeRoot if treeNode is None else treeNode
        indent = "  " * treeNode.depth
        
        if treeNode.isLeaf():
            xml = indent + "<" + treeNode.elementName + "> " + _encodeXmlToken(treeNode.elementVal) + " </" + treeNode.elementName + ">\n"
            self.fileObject.write(xml)
        else:
            outerXmlBeginning = indent + "<" + treeNode.elementName + ">\n"
            outerXmlEnding = indent + "</" + treeNode.elementName + ">\n"

            self.fileObject.write(outerXmlBeginning)
            for child in treeNode.children:
                self.writeXmlOutput(child)

            self.fileObject.write(outerXmlEnding)


    """ Managing Resources """

    def close(self):
        self.fileObject.close()
コード例 #2
0
class CompilationEngine:
    def __init__(self, tokenizer, output):
        """
        c'tor
        :param tokenizer: tokenizer object
        :param output: output file/stream
        """
        self.tokenizer = tokenizer
        self.vmWriter = VMWriter(output)
        self.symbols = SymbolTable()
        self.className = None
        self.labelC = 0

    def CompileClass(self):
        """
        this method is called for each file right after the c'tor. it calls all other methods, and compiles the class
        """
        self.tokenizer.advance()  # class
        self.tokenizer.advance()  # class name
        self.className = self.tokenizer.identifier()
        self.tokenizer.advance()  # {
        while self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            if self.tokenizer.tokenType() == "keyword":
                if self.tokenizer.keyWord() in consts.CLASS_VARS:
                    self.CompileClassVarDec()
                elif self.tokenizer.keyWord() in consts.SUB_ROUTINES:
                    self.CompileSubroutine()

    def CompileClassVarDec(self):
        """
        compiles class variables decelerations
        """
        varKind = self.tokenizer.keyWord()
        self.tokenizer.advance()  # type
        varType = self._getType()
        self.tokenizer.advance()  # varName
        varName = self.tokenizer.identifier()
        self.symbols.define(varName, varType, varKind)
        self.tokenizer.advance()  # , or ;
        while self.tokenizer.tokenType() == "symbol" and self.tokenizer.symbol(
        ) == ',':
            self.tokenizer.advance()  # varName
            varName = self.tokenizer.identifier()
            self.symbols.define(varName, varType, varKind)
            self.tokenizer.advance()  # , or ;

    def CompileSubroutine(self):
        """
        compiles subroutine declarations
        """
        subroutine = self.tokenizer.keyWord()
        self.tokenizer.advance()  # retType
        self.tokenizer.advance()  # subRoutine name
        name = self.tokenizer.identifier()
        self.tokenizer.advance()  # (
        self.labelC = 0
        self.symbols.startSubroutine()
        if subroutine == "method":
            self.symbols.define("this", self.className, "arg")
        self.CompileParameterList()
        self.tokenizer.advance()  # {
        self.tokenizer.advance()  # var / statement
        localN = 0
        while self.tokenizer.tokenType(
        ) == "keyword" and self.tokenizer.keyWord() == "var":
            localN += self.CompileVarDec()
        self.vmWriter.writeFunction(self.className + "." + name, localN)
        if subroutine == "method":
            self.vmWriter.writePush("argument", 0)
            self.vmWriter.writePop("pointer", 0)
        elif subroutine == "constructor":
            fields = self.symbols.varCount("field")
            self.vmWriter.writePush("constant", fields)
            self.vmWriter.writeCall("Memory.alloc", 1)
            self.vmWriter.writePop("pointer", 0)
        self.CompileStatements()

    def CompileVarDec(self):
        """
        compiles variable declarations
        :return: number of vars
        """
        self.tokenizer.advance()  # type
        varType = self._getType()
        self.tokenizer.advance()  # var name
        varName = self.tokenizer.identifier()
        self.symbols.define(varName, varType, "var")
        self.tokenizer.advance()  # , or ;
        counter = 1
        while self.tokenizer.symbol() == ',':
            counter += 1
            self.tokenizer.advance()  # varName
            varName = self.tokenizer.identifier()
            self.symbols.define(varName, varType, "var")
            self.tokenizer.advance()  # , or ;
        self.tokenizer.advance()
        return counter

    def _getType(self):
        """
        helper method to get type, either keyword or identifier
        """
        if self.tokenizer.tokenType() == "keyword":
            return self.tokenizer.keyWord()
        else:
            return self.tokenizer.identifier()

    def CompileParameterList(self):
        """
        compiles parameter list
        """
        self.tokenizer.advance()  # type / statement
        if self.tokenizer.tokenType() == "identifier" or (
                self.tokenizer.tokenType() == "keyword"
                and self.tokenizer.keyWord() in consts.VAR_TYPES):
            paramType = self._getType()
            self.tokenizer.advance()  # varName
            paramName = self.tokenizer.identifier()
            self.symbols.define(paramName, paramType, "arg")
            self.tokenizer.advance()  # , or statement
            while self.tokenizer.tokenType(
            ) == "symbol" and self.tokenizer.symbol() == ',':
                self.tokenizer.advance()  # type
                paramType = self.tokenizer.keyWord()
                self.tokenizer.advance()  # varName
                paramName = self.tokenizer.identifier()
                self.symbols.define(paramName, paramType, "arg")
                self.tokenizer.advance()  # , or statement

    def CompileStatements(self):
        """
        compiles statements
        """
        while self.tokenizer.tokenType(
        ) == "keyword" and self.tokenizer.keyWord() in consts.STATEMENTS:
            if self.tokenizer.keyWord() == "let":
                self.CompileLet()
            elif self.tokenizer.keyWord() == "do":
                self.CompileDo()
            elif self.tokenizer.keyWord() == "while":
                self.CompileWhile()
            elif self.tokenizer.keyWord() == "return":
                self.CompileReturn()
            elif self.tokenizer.keyWord() == "if":
                self.CompileIf()

    def CompileLet(self):
        """
        compiles let statement
        """
        self.tokenizer.advance()  # var name
        name = self.tokenizer.identifier()
        kind = self.symbols.kindOf(name)
        self.tokenizer.advance()  # [ or =
        if self.tokenizer.symbol() == '[':  # access a specific cell in array
            self.tokenizer.advance()  # exp
            self.CompileExpression()
            position = self.symbols.indexOf(name)
            self.vmWriter.writePush(KIND_VM[kind], position)
            self.vmWriter.writeArithmetic(
                "add")  # start point + requested position
            self.tokenizer.advance()  # =
            self.tokenizer.advance()
            self.CompileExpression()
            self.vmWriter.writePop("temp",
                                   0)  # insert result to requested array cell
            self.vmWriter.writePop("pointer", 1)
            self.vmWriter.writePush("temp", 0)
            self.vmWriter.writePop("that", 0)
        else:
            self.tokenizer.advance()
            self.CompileExpression()
            position = self.symbols.indexOf(name)
            self.vmWriter.writePop(KIND_VM[kind],
                                   position)  # insert result to requested var
        self.tokenizer.advance()

    def CompileIf(self):
        """
        compiles if statement
        """
        elseLabel = "L" + str(self.labelC)  # create labels
        self.labelC += 1
        after = "L" + str(self.labelC)
        self.labelC += 1
        self.tokenizer.advance()  # (
        self.tokenizer.advance()  # exp
        self.CompileExpression()
        self.vmWriter.writeArithmetic("not")  # negate cond.
        self.vmWriter.writeIf(elseLabel)
        self.tokenizer.advance()  # {
        self.tokenizer.advance()  # statements
        self.CompileStatements()  # if true
        self.vmWriter.writeGoto(after)
        self.vmWriter.writeLabel(elseLabel)
        self.tokenizer.advance()
        if self.tokenizer.tokenType() == "keyword" and self.tokenizer.keyWord(
        ) == "else":  # else section exists
            self.tokenizer.advance()  # {
            self.tokenizer.advance()  # statements
            self.CompileStatements()
            self.tokenizer.advance()
        self.vmWriter.writeLabel(after)  # after the scope

    def CompileWhile(self):
        """
        compiles while statement
        """
        loop = "L" + str(self.labelC)  # create labels
        self.labelC += 1
        after = "L" + str(self.labelC)
        self.labelC += 1
        self.tokenizer.advance()  # (
        self.tokenizer.advance()  # exp
        self.vmWriter.writeLabel(loop)
        self.CompileExpression()  # loop cond.
        self.vmWriter.writeArithmetic("not")
        self.vmWriter.writeIf(after)
        self.tokenizer.advance()  # {
        self.tokenizer.advance()  # statements
        self.CompileStatements()
        self.vmWriter.writeGoto(loop)
        self.vmWriter.writeLabel(after)
        self.tokenizer.advance()

    def CompileDo(self):
        """
        compiles do statement
        """
        self.tokenizer.advance()  # subroutine name / class name/var name
        self.CompileTerm()
        self.vmWriter.writePop("temp", 0)  # pop dummy-value
        self.tokenizer.advance()

    def CompileReturn(self):
        """
        compiles return statement
        """
        self.tokenizer.advance()  # statement or ;
        if self.tokenizer.tokenType() != "symbol" or self.tokenizer.symbol(
        ) != ';':
            self.CompileExpression()
        else:  # void
            self.vmWriter.writePush("constant", 0)  # dummy value
        self.vmWriter.writeReturn()
        self.tokenizer.advance()

    def CompileExpression(self):
        """
        compiles expression
        """
        self.CompileTerm()
        while self.tokenizer.tokenType() == "symbol" and self.tokenizer.symbol(
        ) in consts.OP:
            op = self.tokenizer.symbol()
            self.tokenizer.advance()  # term
            self.CompileTerm()
            if op in ("*", "/"):
                self.vmWriter.writeCall(OP_VM[op], 2)
            else:
                self.vmWriter.writeArithmetic(OP_VM[op])

    def CompileTerm(self):
        """
        compiles term
        """
        if self.tokenizer.tokenType() == "integerConstant":
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()
        elif self.tokenizer.tokenType() == "stringConstant":
            stringLen = len(self.tokenizer.stringVal())
            self.vmWriter.writePush("constant", stringLen)  # new string
            self.vmWriter.writeCall("String.new", 1)
            for i in range(stringLen):  # push each char
                char = ord(self.tokenizer.stringVal()[i])
                self.vmWriter.writePush("constant", char)
                self.vmWriter.writeCall("String.appendChar", 2)
            self.tokenizer.advance()
        elif self.tokenizer.tokenType() == "keyword":
            kw = self.tokenizer.keyWord()
            if kw == "this":
                self.vmWriter.writePush("pointer", 0)
            else:  # null, true, false
                self.vmWriter.writePush("constant", 0)
                if kw == "true":
                    self.vmWriter.writeArithmetic("not")
            self.tokenizer.advance()
        elif self.tokenizer.tokenType() == "symbol":
            if self.tokenizer.symbol() == '(':
                self.tokenizer.advance()
                self.CompileExpression()
                self.tokenizer.advance()  # next thing
            elif self.tokenizer.symbol() in UNI_VM:
                op = self.tokenizer.symbol()
                self.tokenizer.advance()
                self.CompileTerm()
                self.vmWriter.writeArithmetic(UNI_VM[op])
        else:  # var or subroutine call
            name = self.tokenizer.identifier()
            kind = self.symbols.kindOf(name)
            self.tokenizer.advance()  # ( [ . or next thing (if var name)
            if self.tokenizer.symbol() == '[':  # change that to array position
                self.tokenizer.advance()
                self.CompileExpression()
                position = self.symbols.indexOf(name)
                self.vmWriter.writePush(KIND_VM[kind], position)
                self.vmWriter.writeArithmetic("add")
                self.vmWriter.writePop("pointer", 1)
                self.vmWriter.writePush("that", 0)
                self.tokenizer.advance()  # next thing
            else:
                fullName = self.className + "." + name
                dot = False  # function being called without class
                argsN = 0
                if self.tokenizer.symbol() == '.':
                    dot = True
                    self.tokenizer.advance()  # subroutine name
                    if kind != "NONE":
                        fullName = self.symbols.typeOf(
                            name) + "." + self.tokenizer.identifier()
                        argsN += 1
                        position = self.symbols.indexOf(name)
                        self.vmWriter.writePush(KIND_VM[kind], position)
                    else:
                        fullName = name + "." + self.tokenizer.identifier()
                    self.tokenizer.advance()  # (
                if self.tokenizer.tokenType(
                ) == "symbol" and self.tokenizer.symbol() == "(":
                    if not dot:
                        argsN += 1
                        self.vmWriter.writePush("pointer", 0)
                    self.tokenizer.advance()  # exp or )
                    argsN += self.CompileExpressionList()
                    self.vmWriter.writeCall(fullName, argsN)
                    self.tokenizer.advance()
                elif kind != "NONE":
                    position = self.symbols.indexOf(name)
                    self.vmWriter.writePush(KIND_VM[kind], position)
                else:
                    self.tokenizer.advance()

    def CompileExpressionList(self):
        """
        compiles expression list
        :return: number of args
        """
        counter = 0
        if not (self.tokenizer.tokenType() == "symbol"
                and self.tokenizer.symbol() == ')'):
            counter += 1
            self.CompileExpression()
            while self.tokenizer.symbol() == ',':
                counter += 1
                self.tokenizer.advance()  # expression
                self.CompileExpression()
        return counter
コード例 #3
0
class CompilationEngine(object):
	def __init__(self, src, output):
		self.tokenizer = JackTokenizer(src)
		self.writer = VMWriter(output)
		self.symbolTable = SymbolTable()
		self.labelIndex = 0

	def _acceptNextToken(self, token):
		if self.tokenizer.hasMoreToken():
			self.tokenizer.advance()
			typ = self.tokenizer.tokenType()
			tok = self.tokenizer.tokenValue()
			if type(token) != list:
				token = [token]
			if typ in token or tok in token:
				return tok
		raise SyntaxError('Parse Error')

	def _tryNextToken(self, token):
		if self.tokenizer.hasMoreToken():
			typ, tok = self.tokenizer.next()
			if type(token) != list:
				token = [token]
			if typ in token or tok in token:
				return True
		return False

	def compileClass(self):
		#'class' className '{' classVarDec* subroutineDec* '}'
		self._acceptNextToken('class')
		self.classname = self._acceptNextToken('identifier')
		self._acceptNextToken('{')

		while self._tryNextToken(['static', 'field']):
			self.compileClassVarDec()
		while self._tryNextToken(['constructor', 'function', 'method']):
			self.compileSubroutine()
		self._acceptNextToken('}')

		self.writer.close()

	def compileClassVarDec(self):
		#('static'|'field') type varName (','varName)* ';'
		kind = self._acceptNextToken(['static', 'field'])
		type = self._acceptNextToken(['int', 'char', 'boolean', 'identifier'])
		self.symbolTable.define(self._acceptNextToken('identifier'), type, kind)

		while self._tryNextToken(','):
			self._acceptNextToken(',')
			self.symbolTable.define(self._acceptNextToken('identifier'), type, kind)
		self._acceptNextToken(';')

	def compileSubroutine(self):
		#('constructor'|'function'|'method')
		#('void'|type)subroutineName'('parameterList')'
		#subroutineBody
		self.labelIndex = 0

		self.symbolTable.startSubroutine()
		subroutine = self._acceptNextToken(['constructor', 'function', 'method'])
		self._acceptNextToken(['void', 'int', 'char', 'boolean', 'identifier'])
		functionname = self._acceptNextToken('identifier')

		if subroutine == 'method':
			self.symbolTable.define('this', self.classname, 'argument')

		self._acceptNextToken('(')
		self.compileParameterList()
		self._acceptNextToken(')')
		self._acceptNextToken('{')

		argc = 0
		while self._tryNextToken('var'):
			argc += self.compileVarDec()
		self.writer.writeFunction(self.classname + '.' + functionname, argc)

		if subroutine == 'constructor':
			self.writer.writePush('constant', self.symbolTable.varCount('field'))
			self.writer.writeCall('Memory.alloc', 1)
			self.writer.writePop('pointer', 0)
		elif subroutine == 'method':
			self.writer.writePush('argument', 0)
			self.writer.writePop('pointer', 0)
		while self._tryNextToken(STATEMENT):
			self.compileStatements()
		self._acceptNextToken('}')

	def compileParameterList(self):
		#((type varName)(','type varName)*)?
		if self._tryNextToken(TYPE):
			type = self._acceptNextToken(TYPE)
			self.symbolTable.define(self._acceptNextToken('identifier'), type, 'argument')
			while self._tryNextToken(','):
				self._acceptNextToken(',')
				type = self._acceptNextToken(TYPE)
				self.symbolTable.define(self._acceptNextToken('identifier'), type, 'argument')

	def compileVarDec(self):
		#'var' type varName (',' varName)*';'
		argc = 1
		self._acceptNextToken('var')
		type = self._acceptNextToken(TYPE)
		self.symbolTable.define(self._acceptNextToken('identifier'), type, 'local')

		while self._tryNextToken(','):
			self._acceptNextToken(',')
			argc += 1
			self.symbolTable.define(self._acceptNextToken('identifier'), type, 'local')
		self._acceptNextToken(';')
		return argc

	def compileStatements(self):
		#statement*
		#letStatement|ifStatement|whileStatement|doStatement|returnStatement
		while self._tryNextToken(STATEMENT):
			if self._tryNextToken('let'):
				self.compileLet()
			elif self._tryNextToken('if'):
				self.compileIf()
			elif self._tryNextToken('while'):
				self.compileWhile()
			elif self._tryNextToken('do'):
				self.compileDo()
			elif self._tryNextToken('return'):
				self.compileReturn()

	def compileDo(self):
		#'do' subroutineCall ';'
		#subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
		self._acceptNextToken('do')
		funcname = self._acceptNextToken('identifier')

		argc = 0
		if self._tryNextToken('.'):
			self._acceptNextToken('.')
			type = self.symbolTable.typeOf(funcname)
			if type != None:
				argc += 1
				self.writer.writePush(self.symbolTable.kindOf(funcname), self.symbolTable.indexOf(funcname))
				funcname = type + '.' + self._acceptNextToken('identifier')				#game.run()
			else:
				funcname = funcname + '.' + self._acceptNextToken('identifier')			#Game.run()
		else:
			argc += 1
			funcname = self.classname + '.' + funcname 										#run()
			self.writer.writePush('pointer', 0)
	
		self._acceptNextToken('(')
		argc += self.compileExpressionList()
		self._acceptNextToken(')')
		self._acceptNextToken(';')

		self.writer.writeCall(funcname, argc)
		self.writer.writePop('temp', 0)

	def compileLet(self):
		#'let' varName ('[' expression ']')? '=' expression ';'
		self._acceptNextToken('let')
		varName = self._acceptNextToken('identifier')
		if self._tryNextToken('['):
			self.writer.writePush(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName))
			self._acceptNextToken('[')
			self.compileExpression()
			self._acceptNextToken(']')
			self.writer.writeArithmetic('add')
			self._acceptNextToken('=')
			self.compileExpression()
			self._acceptNextToken(';')
			self.writer.writePop('temp', 0)
			self.writer.writePop('pointer', 1)
			self.writer.writePush('temp', 0)
			self.writer.writePop('that', 0)
		else:
			self._acceptNextToken('=')
			self.compileExpression()
			self._acceptNextToken(';')
			self.writer.writePop(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName))

	def compileWhile(self):
		#'while' '(' expression ')''{' statements '}'
		index = str(self.labelIndex)
		self.labelIndex += 1

		self.writer.writeLabel('WHILE' + index)
		self._acceptNextToken('while')
		self._acceptNextToken('(')
		self.compileExpression()
		self._acceptNextToken(')')
		self.writer.writeArithmetic('not')

		self.writer.writeIf('WHILE_END' + index)
		self._acceptNextToken('{')
		self.compileStatements()
		self._acceptNextToken('}')
		self.writer.writeGoto('WHILE' + index)
		self.writer.writeLabel('WHILE_END' + index)

	def compileReturn(self):
		#'return' expression? ';'
		self._acceptNextToken('return')

		if self._tryNextToken(';'):
			self._acceptNextToken(';')
			self.writer.writePush('constant', 0)
		else:
			self.compileExpression()
			self._acceptNextToken(';')
		self.writer.writeReturn()

	def compileIf(self):
		#'if' '(' expression ')' '{' statements '}'
		#('else' '{' statements '}')?
		index = str(self.labelIndex);
		self.labelIndex += 1

		self._acceptNextToken('if')
		self._acceptNextToken('(')
		self.compileExpression()
		self._acceptNextToken(')')
		self.writer.writeArithmetic('not')
		self.writer.writeIf('IF_TRUE' + index)

		self._acceptNextToken('{')
		self.compileStatements()
		self._acceptNextToken('}')
		self.writer.writeGoto('IF_FALSE' + index)
		self.writer.writeLabel('IF_TRUE' + index)

		if self._tryNextToken('else'):
			self._acceptNextToken('else')
			self._acceptNextToken('{')
			self.compileStatements()
			self._acceptNextToken('}')
		self.writer.writeLabel('IF_FALSE' + index)

	def compileExpression(self):
		#term(op term)*
		self.compileTerm()
		while self._tryNextToken(OP):
			op = self._acceptNextToken(OP)
			self.compileTerm()
			if op == '*':
				self.writer.writeCall('Math.multiply', 2)
			elif op == '/':
				self.writer.writeCall('Math.divide', 2)
			else:
				self.writer.writeArithmetic(OP_COMMAND[op])

	def compileTerm(self):
		#integerConstant|stringConstant|keywordConstant|varName|
		
		if self._tryNextToken('('):										#'('expression')'
			self._acceptNextToken('(')
			self.compileExpression()
			self._acceptNextToken(')')
		elif self._tryNextToken(['-', '~']):							#unaryOp term
			unaryOp = self._acceptNextToken(['-', '~'])
			self.compileTerm()
			if unaryOp == '-':
				self.writer.writeArithmetic('neg')
			else:
				self.writer.writeArithmetic('not')
		else:
			first_s = self._acceptNextToken(TERM)
			if self._tryNextToken('['):									#varName'['expression']'
				self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				self._acceptNextToken('[')
				self.compileExpression()
				self._acceptNextToken(']')
				self.writer.writeArithmetic('add')
				self.writer.writePop('pointer', 1)
				self.writer.writePush('that', 0)
			elif self._tryNextToken('('):								#subroutineCall run()
				self.writer.writePush('pointer', 0)
				self._acceptNextToken('(')
				argc = self.compileExpressionList() + 1
				self._acceptNextToken(')')
				self.writer.writeCall(self.classname + '.' + first_s, argc)
			elif self._tryNextToken('.'):								#subroutineCall game.run()
				self._acceptNextToken('.')
				idenfitier = self._acceptNextToken('identifier')
				type = self.symbolTable.typeOf(first_s)
				argc = 0
				callname = first_s
				if type != None:
					argc += 1
					callname = type
					self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				self._acceptNextToken('(')
				argc += self.compileExpressionList()
				self._acceptNextToken(')')
				self.writer.writeCall(callname + '.' + idenfitier, argc)
			else:
				tokenType = self.tokenizer.tokenType()
				if tokenType == 'integerConstant':
					self.writer.writePush('constant', int(first_s))
				elif tokenType == 'stringConstant':
					self.writer.writePush('constant', len(first_s))
					self.writer.writeCall('String.new', 1)
					for c in first_s:
						self.writer.writePush('constant', ord(c))
						self.writer.writeCall('String.appendChar', 2)
				elif tokenType == 'identifier':
					self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				else:
					if first_s == 'null' or first_s == 'false':
						self.writer.writePush('constant', 0)
					elif first_s == 'true':
						self.writer.writePush('constant', 1)
						self.writer.writeArithmetic('neg')
					elif first_s == 'this':
						self.writer.writePush('pointer', 0)

	def compileExpressionList(self):
		#(expression(','expression)*))?
		argc = 0
		if self._tryNextToken(TERM):
			self.compileExpression()
			argc += 1
			while self._tryNextToken(','):
				self._acceptNextToken(',')
				self.compileExpression()
				argc += 1
		return argc
コード例 #4
0
class CompilationEngine():
    vardeclist = ['static', 'field']
    subroutinelist = ['constructor', 'function', 'method', 'void']
    statementList = ['let', 'while', 'if', 'return', 'do', 'turn']
    op_dict = {
        '+': 'add',
        '-': 'sub',
        '&': 'and',
        '|': 'or',
        '<': 'lt',
        '>': 'gt',
        '=': 'eq',
        '~': 'not',
        '&amp;': 'and',
        '&lt;': 'lt',
        '&gt;': 'gt'
    }
    running_index = 0
    label_index = 0
    while_label_index = 0
    if_label_index = 0

    def __init__(self, inputfile, writefile):
        self.op_flag1 = False
        self.op_flag2 = False
        self.is_Array = False
        self.ClassName = ''
        self.keyword = ''
        self.cur_subroutineName = ''
        self.sub_return_type = ''
        self.running_index = 0
        self.array_name = ''

        self.Tokenizer = JackTokenizer(inputfile)
        self.SymbolTable = SymbolTable()
        self.infile = inputfile
        self.VMWriter = VMWriter(inputfile.replace('.jack', '.vm'))
        self.outfile = open(writefile, 'w+')
        self.compileClass()

    def compileClass(self):  #draft finished
        print('Compiling started of ' + self.infile)
        self.Tokenizer.advance()  #starts it, token = 'class'
        if 'class' in self.Tokenizer.getToken():

            self.Tokenizer.advance()  #token = classname
            self.ClassName = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #next token = {

            #cleaned up from project 10 put while loop in each respective method
            self.compileClassVarDec()
            self.compileSubroutine()

            self.Tokenizer.advance()  #next token
        else:
            print('error: NO CLASS TO COMPILE')

    def compileClassVarDec(
            self):  #finished untested ######WHILE LOOP NOT TURNING ON
        self.Tokenizer.advance()  #adv0ance off { to first token in var dec

        while 'static' in self.Tokenizer.getStrToken(
        ) or 'field' in self.Tokenizer.getStrToken():

            kind = self.Tokenizer.getStrToken()  #static or
            self.Tokenizer.advance()  #next token advance to type
            type = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #next token advance to name
            name = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #advances to either , or ;

            self.SymbolTable.define(name, type, kind)

            while ';' not in self.Tokenizer.getToken(
            ):  #while you havent reached the end of the line
                self.Tokenizer.advance(
                )  #advances to next token which is hopefully a name
                name = self.Tokenizer.getStrToken()
                self.SymbolTable.define(name, type, kind)

                self.Tokenizer.advance()  #advances to either , or ;

            #have reached ;
            self.Tokenizer.advance()  #next token

    def compileSubroutine(self):  #draft finished
        #contains subroutine DEc and subroutine Body
        #something f****d up here
        #return_is_void=False
        while 'constructor' in self.Tokenizer.getStrToken(
        ) or 'method' in self.Tokenizer.getStrToken(
        ) or 'function' in self.Tokenizer.getStrToken():
            constructor_flag = False
            method_flag = False
            self.SymbolTable.startSubroutine()
            if 'constructor' in self.Tokenizer.getStrToken():
                constructor_flag = True
            if 'method' in self.Tokenizer.getStrToken():
                method_flag = True
            #current token right now should be either constructor, method, or function
            self.Tokenizer.advance()  #advances to type of return eg int
            self.sub_return_type = self.Tokenizer.getStrToken()
            self.Tokenizer.advance(
            )  #next token advances to subroutine name (should be new if constructor)
            self.cur_subroutineName = self.ClassName + '.' + self.Tokenizer.getStrToken(
            )
            if 'method' in self.Tokenizer.getStrToken():
                self.SymbolTable.define('this', self.sub_return_type,
                                        'argument')
            self.Tokenizer.advance()  #next token advances to (
            self.compileParameterList()
            #after parameter list finishes token is )
            self.Tokenizer.advance()  #next token advances to {

            #starts subroutine body
            self.Tokenizer.advance(
            )  #advances to start of subroutine body First token there
            #moved while loop below into compileVardec
            #while 'var' in self.Tokenizer.getToken():
            self.compileVarDec()

            self.VMWriter.writeFunction(
                self.cur_subroutineName, self.SymbolTable.varCount('var')
            )  #calls function related to parent class, if its a function all good. if its method or constructor more has to happen
            self.if_label_index = 0
            if constructor_flag == True:
                self.VMWriter.writePush(
                    'constant', self.SymbolTable.varCount('field')
                )  #pushes the constructors fields onto stack for however many fields there are
                self.VMWriter.writeCall('Memory.alloc', 1)
                self.VMWriter.writePop('pointer', 0)
            elif method_flag == True:
                self.VMWriter.writePush('argument', 0)
                self.VMWriter.writePop('pointer', 0)  #init this

            #compile the rest of subroutine
            self.compileStatements()
            #current token after should be }
            self.Tokenizer.advance(
            )  #next statement constructor and method function if not breaks out of while loop

    def compileParameterList(self):  #draft finished

        while ')' not in self.Tokenizer.getToken():
            self.Tokenizer.advance(
            )  #advance to type of parameter or ) in case of no param
            if ')' not in self.Tokenizer.getStrToken(
            ):  #parameter here to parameter stuff
                if ',' not in self.Tokenizer.getStrToken():
                    type = self.Tokenizer.getStrToken()
                    self.Tokenizer.advance()  #advances to name
                    name = self.Tokenizer.getStrToken()
                    self.Tokenizer.advance(
                    )  #advances to either comma or end of list start loop over

                    self.SymbolTable.define(name, type, 'argument')
            else:
                return  # ')' is token, function is done

    def compileVarDec(self):  #draft

        while 'var' in self.Tokenizer.getToken():

            if 'var' in self.Tokenizer.getToken():
                self.Tokenizer.advance()  #advances to type
                type = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()  #advances to name
                name = self.Tokenizer.getStrToken()
                self.SymbolTable.define(name, type,
                                        'var')  #appends to symbol table
                self.Tokenizer.advance()  #advances to either ,  or ;

                while ';' not in self.Tokenizer.getToken():
                    self.Tokenizer.advance()  #advances to name
                    name = self.Tokenizer.getStrToken()
                    self.SymbolTable.define(name, type, 'var')
                    self.Tokenizer.advance(
                    )  #next token advances to either , or ;

                self.Tokenizer.advance(
                )  #next token var if more vars or done if not

    def compileStatements(self):  #finished

        while 'if' or 'let' or 'while' or 'do' or 'return' in self.Tokenizer.getStrToken(
        ):

            if 'let' in self.Tokenizer.getToken():
                self.compileLet()

            elif 'while' in self.Tokenizer.getToken():
                self.compileWhile()

            elif 'if' in self.Tokenizer.getToken():
                self.compileIf()

            elif 'do' in self.Tokenizer.getToken():
                self.compileDo()

            elif 'return' in self.Tokenizer.getToken():

                self.compileReturn()
            else:

                break

    def compileCall(self):  #finished untested
        #do without pop temp 0
        #next token to function/method name
        doCallName = self.Tokenizer.getStrToken()

        self.Tokenizer.advance()  #advances to '.' or '('
        if '.' in self.Tokenizer.getToken():
            if doCallName in self.SymbolTable.SubSymbolTable:  #ie method like square.move()
                self.Tokenizer.advance()
                subName = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()  #next token '('
                self.Tokenizer.advance()  #first expression
                self.compileExpressionList()

                nArgs = self.running_index  #running index added whenever expression called in expression list +1 for this
                self.running_index = 0  #resets index
                self.VMWriter.writePush(
                    self.SymbolTable.kindOf(doCallName),
                    self.SymbolTable.indexOf(
                        doCallName))  #pushes 'this' of object onto stack

                self.VMWriter.writeCall(
                    self.SymbolTable.typeOf(doCallName) + '.' + subName, nArgs)
                return

            else:  #ie function like Keyboard.keyPressed() Sys.wait(5) basically same thing except you dont push this on stack

                self.Tokenizer.advance()
                subName = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()  #next token '('
                self.Tokenizer.advance()
                self.compileExpressionList()
                nArgs = self.running_index  #running index added whenever expression called in expression list
                #resets index
                self.running_index = 0
                if doCallName in self.SymbolTable.ClassSymbolTable:
                    self.VMWriter.writePush('this', nArgs - 1)
                    self.VMWriter.writeCall(
                        self.SymbolTable.typeOf(doCallName) + '.' + subName,
                        nArgs)
                else:
                    if doCallName == 'Keyboard':
                        self.VMWriter.writeCall(doCallName + '.' + subName,
                                                nArgs - 1)
                    else:
                        self.VMWriter.writeCall(doCallName + '.' + subName,
                                                nArgs)

        if '(' in self.Tokenizer.getToken():  #method call like do clear()
            self.Tokenizer.advance()  #token is first token in expression list
            self.compileExpressionList()
            nArgs = self.running_index
            self.running_index = 0
            self.VMWriter.writePush('pointer', 0)  #pushes this

            self.VMWriter.writeCall(self.ClassName + '.' + doCallName, nArgs)

    def compileDo(self):
        self.Tokenizer.advance()
        self.compileCall()
        self.Tokenizer.advance()
        self.Tokenizer.advance()
        self.VMWriter.writePop('temp', 0)

    #dont think this works for arrays like a[b[c[5]] yet NOT CONFIDENT ON THIS METHOD

    def compileLet(self):
        #current token is let
        #doesnt work for a[i]
        self.Tokenizer.advance()  #now token = varname
        varName = self.Tokenizer.getStrToken()
        self.array_name = self.Tokenizer.getStrToken()
        self.Tokenizer.advance()  # = or [

        if '[' in self.Tokenizer.getStrToken(
        ):  #array, see section 11.1.1 Array Handling for help or unit 5.8 video
            self.is_array = True
            self.compileArrayExp()  #compile arrary term

            self.compileExpression()
            self.VMWriter.writePop('temp', 0)
            self.VMWriter.writePop('pointer', 1)
            self.VMWriter.writePush('temp', 0)
            self.VMWriter.writePop('that', 0)

        else:
            self.Tokenizer.advance()  #expression

            self.compileExpression()  #eg(5*(3+4))

            self.VMWriter.writePop(
                self.SymbolTable.kindOf(varName),
                self.SymbolTable.indexOf(varName))  #pop expression t
        self.Tokenizer.advance()
        if ';' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

    def compileWhile(
            self):  #draft I might have the labeling wrong or something
        #at start token = while
        whileLabel = 'WHILE_EXP' + str(self.while_label_index)
        self.VMWriter.writeLabel(whileLabel)
        self.Tokenizer.advance()  #advances to (
        self.compileExpression()  #writes expression
        self.VMWriter.writeArithmetic('not')
        self.VMWriter.writeIf('WHILE_END' + str(self.while_label_index))
        while_end_index = self.while_label_index
        self.while_label_index += 1
        if '{' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

        self.compileStatements()  #writes statements

        self.VMWriter.writeGoto(whileLabel)
        self.VMWriter.writeLabel('WHILE_END' + str(while_end_index))

        self.Tokenizer.advance()  #advance to after } ie done with this while

    def compileReturn(self):  #draft

        #starts at token = return
        self.Tokenizer.advance()
        if ';' not in self.Tokenizer.getToken():
            self.compileExpression()
            self.Tokenizer.advance()  #advance past ;
        else:  #placeholder return
            self.VMWriter.writePush('constant', 0)
            self.Tokenizer.advance()
        self.VMWriter.writeReturn()
        self.op_flag2 = False
        if ';' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

    def compileIf(self):  #finished sort of the same as while
        #start token = if

        ifLabel = 'IF_TRUE' + str(self.if_label_index)

        self.compileExpression()

        self.VMWriter.writeIf(ifLabel)
        self.VMWriter.writeGoto('IF_FALSE' + str(self.if_label_index))
        self.VMWriter.writeLabel(ifLabel)
        else_index = self.if_label_index
        self.if_label_index += 1
        if '{' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

        self.compileStatements()
        self.Tokenizer.advance()

        if 'else' in self.Tokenizer.getToken():
            self.VMWriter.writeGoto('IF_END' + str(else_index))
            self.VMWriter.writeLabel('IF_FALSE' + str(else_index))
            self.Tokenizer.advance()  #advances to {
            self.Tokenizer.advance()  #advances to statements
            self.compileStatements()
            self.Tokenizer.advance()
            self.VMWriter.writeLabel('IF_END' + str(else_index))
            if '}' in self.Tokenizer.peek():
                self.Tokenizer.advance()
        else:
            self.VMWriter.writeLabel('IF_FALSE' + str(else_index))

        #self.if_label_index

    def compileExpression(self):  #draft

        self.compileTerm()
        self.op_flag2 = False

        self.Tokenizer.advance()

        if self.Tokenizer.isOp():
            if '*' in self.Tokenizer.getStrToken():

                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeCall('Math.multiply', 2)
            if '/' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeCall('Math.divide', 2)
                self.Tokenizer.advance()

            if '+' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('add')
                self.op_flag2 = True

            if '&gt' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('gt')

            if '&amp' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('and')

            if '|' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('or')
            if '&lt' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('lt')
            if '=' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('eq')
            if '-' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('sub')

            self.op_flag1 = True

        else:

            self.op_flag1 = False
        ##ie two ops in a row so write this

    def compileTerm(
            self):  #unfinished god almighty this thing is gonna kill me

        if '[' in self.Tokenizer.peek():
            self.array_name = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #advances to [
            self.Tokenizer.advance()  #advances to inner term
            self.VMWriter.writePush(
                self.SymbolTable.kindOf(self.Tokenizer.getStrToken()),
                self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
            self.VMWriter.writePush(self.SymbolTable.kindOf(self.array_name),
                                    self.SymbolTable.indexOf(self.array_name))
            self.Tokenizer.advance()  #advances to another term or ]

            if ']' not in self.Tokenizer.getStrToken():
                self.array_name = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('pointer', 1)
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('that', 0)
            if ']' in self.Tokenizer.getStrToken():

                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePop('pointer', 1)

                self.VMWriter.writePush('that', 0)
                self.Tokenizer.advance()
            return

        if 'integerConstant' in self.Tokenizer.getToken():
            if self.op_flag2 == False:
                self.VMWriter.writePush('constant',
                                        self.Tokenizer.getStrToken())

        elif 'stringConstant' in self.Tokenizer.getToken(
        ):  #forums explanation here: http://nand2tetris-questions-and-answers-forum.32033.n3.nabble.com/Project-11-gt-Strings-calling-string-constructor-td4030992.html#a4030993
            string = self.Tokenizer.getToken()
            string = string.replace('<stringConstant> ', '')
            string = string.replace(' </stringConstant>', '')

            self.VMWriter.writePush('constant', len(string))
            self.VMWriter.writeCall('String.new', 1)
            for char in string:

                self.VMWriter.writePush('constant', ord(char))
                self.VMWriter.writeCall('String.appendChar', 2)

        elif 'this' in self.Tokenizer.getStrToken():
            self.VMWriter.writePush('pointer', 0)

        elif 'true' in self.Tokenizer.getStrToken():
            self.VMWriter.writePush('constant', 0)
            self.VMWriter.writeArithmetic('not')

        elif 'false' in self.Tokenizer.getStrToken():
            self.VMWriter.writePush('constant', 0)

        elif '-' in self.Tokenizer.getToken():
            self.Tokenizer.advance()
            self.compileTerm()
            self.VMWriter.writeArithmetic('neg')

        elif '~' in self.Tokenizer.getToken():
            self.Tokenizer.advance()
            self.compileTerm()
            self.VMWriter.writeArithmetic('not')
        elif '(' in self.Tokenizer.getToken():
            self.Tokenizer.advance()  #advances to expression of off (
            self.compileExpression()
            self.Tokenizer.advance()  #advances off of )

        elif self.Tokenizer.getStrToken() in self.SymbolTable.SubSymbolTable:

            self.VMWriter.writePush(
                self.SymbolTable.kindOf(self.Tokenizer.getStrToken()),
                self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
        elif self.Tokenizer.getStrToken() in self.SymbolTable.ClassSymbolTable:
            if self.op_flag2 == False:
                self.VMWriter.writePush(
                    'this',
                    self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
        if '[' in self.Tokenizer.getStrToken(
        ):  #recursion here for multiple arrays of arrays

            self.Tokenizer.advance()  #advance off of [ and on to val
            self.VMWriter.writePush(
                self.SymbolTable.kindOf(self.Tokenizer.getStrToken()),
                self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
            self.VMWriter.writePush(self.SymbolTable.kindOf(self.array_name),
                                    self.SymbolTable.indexOf(self.array_name))
            self.Tokenizer.advance()  #advances to another term or ]

            if ']' not in self.Tokenizer.getStrToken():
                self.array_name = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('pointer', 1)
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('that', 0)
            if ']' in self.Tokenizer.getStrToken():
                self.VMWriter.writeArithmetic('add')
                self.Tokenizer.advance()

            print('1: ' + self.Tokenizer.getStrToken() + self.Tokenizer.peek())
        else:  #var dec makes it harder come back to later
            #very possible something could be wrong here
            if '.' in self.Tokenizer.peek():

                self.compileCall()

            if '(' in self.Tokenizer.peek():
                if self.SymbolTable.kindOf(
                        self.Tokenizer.getStrToken) != 'NONE':
                    #self.VMWriter.writePush(self.SymbolTable.kindOf(self.Tokenizer.getStrToken),self.SymbolTable.indexOf(self.Tokenizer.getStrToken))
                    pass
                self.Tokenizer.advance()
                self.Tokenizer.advance()
                self.compileExpression()
                self.Tokenizer.advance()

    def compileArrayExp(self):
        self.compileTerm()
        self.Tokenizer.advance()

    def compileExpressionList(
            self):  #calls compile expression until ; then say im done

        self.running_index = 1
        while ';' not in self.Tokenizer.peek(
        ):  #and ';' not in self.Tokenizer.getStrToken() :
            if '(' in self.Tokenizer.getStrToken(
            ) and ')' in self.Tokenizer.peek():
                return
            if ',' in self.Tokenizer.getToken():
                self.Tokenizer.advance()
                self.running_index += 1

            else:
                self.compileExpression()

        self.op_flag2 = False

        #token at end is )

    def close(self):
        self.outfile.close()
コード例 #5
0
class CompilationEngine:
    """Recursive top-down parser"""

    def __init__(self, inFile, outFile):
        """Creates a new compilation engine with the given input and output.
        The next routine called must be compileClass()"""
        self.tokenizer = JackTokenizer(inFile)
        self.targetFile = open(outFile, 'w')
        self.getNext()
        self.classTable = None
        self.className = ''
        self.writer = VMWriter(outFile)
        self.labelWhile = 1
        self.labelIf = 1

    def getNext(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def compileClass(self):
        """Compiles a complete class"""
        self.classTable = SymbolTable()
        # 'class' className '{' classVarDec* subroutineDec* '}'
        # class
        self.getNext()
        # className
        self.className = self.tokenizer.getToken()
        self.getNext()
        # {
        self.getNext()

        token = self.tokenizer.getToken()
        while token in ["static", "field"]:
            self.compileDec()
            token = self.tokenizer.getToken()

        token = self.tokenizer.getToken()
        while token in ["constructor", "function", "method"]:
            self.compileSubroutine()
            token = self.tokenizer.getToken()
        # }
        self.getNext()


    def compileSubroutine(self):
        """Compiles a complete method, function, or constructor."""
        # subroutine dec
        self.classTable.startSubroutine()
        # ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
        # ('constructor' | 'function' | 'method')
        subroutineType = self.tokenizer.getToken()
        self.getNext()
        # ('void' | type)
        self.getNext()

        # subroutineName
        name = self.tokenizer.getToken()
        self.getNext()
        # (
        self.getNext()
        # parameterList
        self.compileParameterList(subroutineType == 'method')
        # )
        self.getNext()

        # subroutine body
        # '{' varDec* statements '}'
        # {
        self.getNext()

        # varDec*
        while self.tokenizer.getToken() == 'var':
            self.compileDec()
        numOfVars = self.classTable.varCount(Toolbox.VAR)

        if subroutineType == 'function':
            self.writer.writeFunction(self.className + "." + name, numOfVars)
        elif subroutineType == 'constructor':
            self.writer.writeFunction(self.className + "." + name, numOfVars)
            # push constant (num of fields)
            # call Memory.alloc 1
            # pop pointer 0
            fields = self.classTable.varCount(Toolbox.FIELD)
            self.writer.writePush(Toolbox.CONST, fields)
            self.writer.writeCall('Memory.alloc', 1)
            self.writer.writePop(Toolbox.POINTER, 0)
        else:  # method
            self.writer.writeFunction(self.className + "." + name, numOfVars)
            # push argument 0
            # pop pointer 0
            self.writer.writePush(Toolbox.SEG_ARG, 0)
            self.writer.writePop(Toolbox.POINTER, 0)

        # statements
        self.compileStatements()
        # }
        self.getNext()

    def compileParameterList(self, method=False):
        """Compiles a (possibly empty) parameter list,
        not including the enclosing "()"."""
        tokenType, name = '', ''

        if method:  # Add this to method's var list.
            self.classTable.define(None, None, Toolbox.ARG)

        if self.tokenizer.tokenType() != self.tokenizer.SYMBOL:  # param list not empty
            while True:
                tokenType = self.tokenizer.getToken()
                self.getNext()

                name = self.tokenizer.getToken()
                self.classTable.define(name, tokenType, Toolbox.ARG)
                self.getNext()

                if self.tokenizer.getToken() == ')':
                    break

                self.getNext()  # ','

    def compileStatements(self):  # (letStatement | ifStatement | whileStatement | doStatement | returnStatement)*
        """Compiles a sequence of statements,
        not including the enclosing "{}"."""
        token = self.tokenizer.getToken()
        while token in ["let", "if", "while", "do", "return"]:
            if token == 'let':
                self.compileLet()
            elif token == 'if':
                self.compileIf()
            elif token == 'while':
                self.compileWhile()
            elif token == 'do':
                self.compileDo()
            elif token == 'return':
                self.compileReturn()
            token = self.tokenizer.getToken()

    def compileSubroutineCall(self, name, printIdentifier=True):
     # subroutineName '(' expressionList ') ' | ( className | varName) '.' subroutineName '(' expressionList ') '

        var = None
        nArgs = 0
        if printIdentifier:
            # subroutineName | ( className | varName)
            self.getNext()

        var = self.classTable.searchScope(name)

        if self.tokenizer.getToken() == '.':
            if var:
                # push <this>
                self.writer.writePush(var[0], var[1])
                nArgs += 1
                className = var[2]  # Use the type instead of the variable name
            else:
                className = name
            self.getNext()
            subroutineName = self.tokenizer.getToken()
            self.getNext()
        else:
            # push <this>
            self.writer.writePush(Toolbox.POINTER, 0)
            nArgs += 1
            className = self.className
            subroutineName = name

        name = className + '.' + subroutineName
        # '('
        self.getNext()
        nArgs += self.compileExpressionList()

        self.writer.writeCall(name, nArgs)
        # ')'
        self.getNext()

    def compileDo(self):  # 'do' subroutineCall ';'
        """Compiles a do statement"""
        # do
        self.getNext()
        # subroutineCall
        self.compileSubroutineCall(self.tokenizer.getToken())
        self.writer.writePop(Toolbox.TEMP, 0)
        # ;
        if self.tokenizer.getToken() == ';':
            self.getNext()

    def compileLet(self):  # 'let' varName ('[' expression ']')? '=' expression ';'
        """Compiles a let statement"""
        # let
        # self.targetFile.write(T_LET)
        self.getNext()
        # var name
        name = self.tokenizer.getToken()
        # search scope
        segment, index, type = self.classTable.searchScope(name)

        self.getNext()
        # [
        array = False
        if self.tokenizer.getToken() == '[':
            array = True
            self.writer.writePush(segment, index)
            self.getNext()
            # expression
            self.compileExpression()
            # ]
            self.getNext()
            self.writer.writeArithmetic('add')
        # =
        self.getNext()
        # expression
        self.compileExpression()

        if array:
            self.writer.writePop(Toolbox.TEMP, 0)
            self.writer.writePop(Toolbox.TEMP, 1)
            self.writer.writePush(Toolbox.TEMP, 0)
            self.writer.writePush(Toolbox.TEMP, 1)

            self.writer.writePop(Toolbox.POINTER, 1)
            self.writer.writePop(Toolbox.THAT, 0)
        else:
            self.writer.writePop(segment, index)

        # ;
        token = self.tokenizer.getToken()
        if token == ';':
            self.getNext()

    def compileWhile(self):  # while' '(' expression ')' '{' statements '}'
        """Compiles a while statement"""
        # while
        label = str(self.labelWhile)
        self.labelWhile += 1
        self.writer.writeLabel('while' + label)
        self.getNext()
        # (
        self.getNext()
        # expression
        self.compileExpression()
        # )
        self.getNext()
        self.writer.writeArithmetic('not')
        self.writer.writeIf('endwhile' + label)
        # {
        self.getNext()
        # statements
        self.compileStatements()
        # }
        self.getNext()
        self.writer.writeGoto('while' + label)
        self.writer.writeLabel('endwhile' + label)

    def compileReturn(self):  # 'return' expression? ';'
        """Compiles a return statement"""
        # return
        self.getNext()
        # expression
        if not (self.tokenizer.getToken() == ";"):
            self.compileExpression()
        else:
            self.writer.writePush(Toolbox.CONST, 0)
        self.writer.writeReturn()
        # ;
        self.getNext()

    def compileIf(self):  # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
        """Compiles an if statement, possibly with a trailing else clause"""
        # if
        label = 'if' + str(self.labelIf)
        self.labelIf += 1

        self.getNext()
        # (
        self.getNext()
        # expression
        self.compileExpression()
        # )
        self.getNext()

        self.writer.writeArithmetic('not')
        self.writer.writeIf('else' + label)

        # {
        self.getNext()
        # statements
        self.compileStatements()
        # }
        self.getNext()

        self.writer.writeGoto('end' + label)
        self.writer.writeLabel('else' + label)

        # else
        if self.tokenizer.getToken() == 'else':
            self.getNext()
            # {
            self.getNext()
            # expression
            self.compileStatements()
            # }
            self.getNext()
        self.writer.writeLabel('end' + label)

    def compileExpression(self):
        """Compiles an expression"""
        # term (op term)*

        self.compileTerm()
        token = self.tokenizer.getToken()
        while token in ['+', '/', '-', '*', '&', '|', '>', '<', '=']:
            self.getNext()
            self.compileTerm()
            self.writer.writeArithmetic(token)

            token = self.tokenizer.getToken()


    def compileTerm(self):  #integerConstant | stringConstant | keywordConstant | varName | varName '[' expression']' |
                            # subroutineCall | '(' expression ')' | unaryOp term
        """Compiles a term"""

        token = self.tokenizer.getToken()
        tokenType = self.tokenizer.tokenType()

        if tokenType == self.tokenizer.INT_CONST:
            self.writer.writePush(Toolbox.CONST, token)
            self.getNext()
        elif tokenType == self.tokenizer.STRING_CONST:
            self.writer.writePush(Toolbox.CONST, len(token))
            self.writer.writeCall('String.new', 1)

            for c in token:
                self.writer.writePush(Toolbox.CONST, ord(c))
                self.writer.writeCall('String.appendChar', 2)

            self.getNext()
        elif tokenType == self.tokenizer.KEYWORD:  # true | false | null | this
            self.compileKeywordConstant(token)
        elif tokenType == self.tokenizer.IDENTIFIER:
            name = token
            self.getNext()
            token = self.tokenizer.getToken()
            if token == '[':
                self.compileVarName(name)
                self.getNext()
                self.compileExpression()
                self.getNext()
                self.writer.writeArithmetic('add')
                self.writer.writePop(Toolbox.POINTER, 1)
                self.writer.writePush(Toolbox.THAT, 0)
            elif token in ['(', '.']:
                self.compileSubroutineCall(name, False)
            else:
                self.compileVarName(name)

        elif token == '(':
            self.getNext()
            self.compileExpression()
            self.getNext()
        elif token in ['-', '~']:
            self.compileUnary(token)


    def compileExpressionList(self):
        """Compiles a (possibly empty) comma separated list of expressions"""
        nArgs = 0

        if self.tokenizer.getToken() != ')':
            self.compileExpression()
            nArgs += 1

            while self.tokenizer.getToken() == ',':
                self.getNext()
                self.compileExpression()
                nArgs += 1

        return nArgs

    def compileDec(self):  # 'var' type varName (',' varName)* ';'
        """Compiles a var declaration"""
        # keyword 'var'
        token = self.tokenizer.getToken()
        kind = None
        if token == 'var':
            kind = Toolbox.VAR
        elif token == 'field':
            kind = Toolbox.FIELD
        elif token == 'static':
            kind = Toolbox.STATIC
        self.getNext()
        tokenType = self.tokenizer.getToken()

        # type can be an identifier or a keyword
        self.getNext()

        # var name
        name = self.tokenizer.getToken()
        self.classTable.define(name, tokenType, kind)
        self.getNext()
        while self.tokenizer.tokenType() == self.tokenizer.SYMBOL and self.tokenizer.getToken() == ',':
            # ,
            self.getNext()
            name = self.tokenizer.getToken()
            self.classTable.define(name, tokenType, kind)
            # var name
            self.getNext()
        # ;
        self.getNext()

    def compileVarName(self, name):
        segment, index, type = self.classTable.searchScope(name)
        self.writer.writePush(segment, index)

    def compileKeywordConstant(self, keyword):
        if keyword == 'false' or keyword == 'null':
            self.writer.writePush(Toolbox.CONST, 0)
        if keyword == 'true':
            self.writer.writePush(Toolbox.CONST, 0)
            self.writer.writeArithmetic('not')
        if keyword == 'this':
            self.writer.writePush(Toolbox.POINTER, 0)
        self.getNext()

    def compileUnary(self, token):
        """
        Compiles an unary operator with its operand (term)
        :param token: unary token
        """
        self.getNext()  # '~' or '-'
        self.compileTerm()  # operand

        if token == '-':
            self.writer.writeArithmetic('neg')
        else:  # token is '~'
            self.writer.writeArithmetic('not')
コード例 #6
0
class CompilationEngine:

    def __init__(self, inputFile, outputFile):
        self.tokenizer = JackTokenizer(inputFile)
        self.vmWriter = VMWriter(outputFile)
        self.symbolTable = SymbolTable()
        self.classname = ""
        self.CompileClass()
        self.whilecounter = 0
        self.ifcounter = 0

    def CompileClass(self):
        #classname
        self.tokenizer.advance()
        self.classname = self.tokenizer.identifier()
        self.tokenizer.advance()
        # ignore {
        self.tokenizer.advance()

        while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field":
            self.CompileClassVarDec()

        while self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method":
            self.CompileSubroutine()

        #ignore }
        self.tokenizer.advance()



    def CompileClassVarDec(self):

        kind = self.tokenizer.keyWord()
        self.tokenizer.advance()
        type = self.compileType()
        name = self.tokenizer.identifier()
        self.symbolTable.define(name, type, kind)
        self.tokenizer.advance()

        # add the rest of var names, if there are
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            name = self.tokenizer.identifier()
            self.symbolTable.define(name, type, kind)
            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()

    def CompileSubroutine(self):

        self.symbolTable.startSubroutine()
        self.ifcounter = 0
        self.whilecounter = 0
        # constructor | function | method
        functype = self.tokenizer.keyWord()
        self.tokenizer.advance()

        if functype == "method":
            self.symbolTable.define("this", self.classname, "arg")

        self.tokenizer.advance()

        subrotineName = self.classname + "." + self.tokenizer.identifier()
        self.tokenizer.advance()

        # ( parameterList )
        self.tokenizer.advance()
        self.compileParameterList()
        self.tokenizer.advance()

        # subrotineBody
        # ignore {
        self.tokenizer.advance()
        # varDec*
        while self.tokenizer.keyWord() == "var":
            self.compileVarDec()

        self.vmWriter.writeFunction(subrotineName, self.symbolTable.varCount("var"))
        # allocate memory for constructor
        # if functype == "constructor":
        #     self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
        #     self.vmWriter.writeCall("Memory.alloc", "1")

        if functype == "constructor" or functype == "method":
            if functype == "constructor":
                self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
                self.vmWriter.writeCall("Memory.alloc", "1")
            else:
                self.vmWriter.writePush("argument", "0")
            self.vmWriter.writePop("pointer", "0")


        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()

    def compileParameterList(self):
        # if not )
        if self.tokenizer.tokenType() != 1:

            # type varName
            argtype = self.compileType()
            argname = self.tokenizer.identifier()
            self.symbolTable.define(argname, argtype, "arg")
            self.tokenizer.advance()

            # (, type varName)*
            while self.tokenizer.symbol() == ",":
                self.tokenizer.advance()
                argtype = self.compileType()
                argname = self.tokenizer.identifier()
                self.symbolTable.define(argname, argtype, "arg")
                self.tokenizer.advance()

    def compileVarDec(self):

        # var
        self.tokenizer.advance()

        # type
        type = self.compileType()

        # varName
        varname = self.tokenizer.identifier()
        self.symbolTable.define(varname, type, "var")
        self.tokenizer.advance()

        # (, varName)*
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            varname = self.tokenizer.identifier()
            self.symbolTable.define(varname, type, "var")

            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()


    def compileStatements(self):

        while self.tokenizer.tokenType() == 0:
            if self.tokenizer.keyWord() == "let":
                self.compileLet()
            elif self.tokenizer.keyWord() == "if":
                self.compileIf()
            elif self.tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self.tokenizer.keyWord() == "do":
                self.compileDo()
            elif self.tokenizer.keyWord() == "return":
                self.compileReturn()


    def compileDo(self):

        self.tokenizer.advance()
        self.compileSubRoutineCall()
        self.vmWriter.writePop("temp", "0")

        # ignore ;
        self.tokenizer.advance()

    def compileLet(self):

        # let
        self.tokenizer.advance()
        # varName
        varname = self.tokenizer.identifier()
        varkind = self.symbolTable.kindOf(varname)

        self.tokenizer.advance()

        # ([ expression ])?
        if self.tokenizer.symbol() == "[":
            self.tokenizer.advance()
            self.CompileExpression()
            if varkind == "field":
                self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
            self.vmWriter.writeArithmetic("add")

            #ignore ]
            self.tokenizer.advance()
            #ignore =
            self.tokenizer.advance()
            self.CompileExpression()
            self.vmWriter.writePop("temp", "0")

            # that
            self.vmWriter.writePop("pointer", "1")
            self.vmWriter.writePush("temp", "0")
            self.vmWriter.writePop("that", "0")
            self.tokenizer.advance()


        else:

            # ignore =
            self.tokenizer.advance()

            # expression
            self.CompileExpression()

            if varkind == "field":
                self.vmWriter.writePop("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePop("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePop("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePop("static", self.symbolTable.indexOf(varname))

            #ignore ;
            self.tokenizer.advance()


    def compileWhile(self):

        # while
        self.tokenizer.advance()

        # ( expression )
        self.tokenizer.advance()
        whileindex = self.whilecounter
        self.whilecounter += 1
        self.vmWriter.writeLabel("WHILE_EXP" + str(whileindex))
        self.CompileExpression()
        self.vmWriter.writeArithmetic("not")
        self.vmWriter.writeIf("WHILE_END" + str(whileindex))
        self.tokenizer.advance()

        # ignore {
        self.tokenizer.advance()

        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()
        self.vmWriter.writeGoto("WHILE_EXP" + str(whileindex))
        self.vmWriter.writeLabel("WHILE_END" + str(whileindex))

    def compileReturn(self):

        # return
        self.tokenizer.advance()

        # expression?
        if self.isTerm():
            self.CompileExpression()
            self.vmWriter.writeReturn()
        else:
            self.vmWriter.writePush("constant", "0")
            self.vmWriter.writeReturn()

        # ignore;
        self.tokenizer.advance()


    def compileIf(self):
        #if
        self.tokenizer.advance()
        # ( expression )
        self.tokenizer.advance()
        self.CompileExpression()
        ifindex = self.ifcounter
        self.ifcounter += 1
        self.vmWriter.writeIf("IF_TRUE" + str(ifindex))
        self.vmWriter.writeGoto("IF_FALSE" + str(ifindex))
        self.vmWriter.writeLabel("IF_TRUE" + str(ifindex))
        self.tokenizer.advance()

        # { statements }
        self.tokenizer.advance()
        self.compileStatements()
        self.tokenizer.advance()

        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() == "else":
            # else
            self.vmWriter.writeGoto("IF_END" + str(ifindex))
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))

            self.tokenizer.advance()

            # { statements }
            self.tokenizer.advance()
            self.compileStatements()
            self.tokenizer.advance()

            self.vmWriter.writeLabel("IF_END" + str(ifindex))

        else:
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))


    def CompileExpression(self):
        #term
        self.CompileTerm()
        # (op term)*
        op = self.tokenizer.symbol()
        while self.tokenizer.tokenType() == 1 and op in operators:
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "=":
                self.vmWriter.writeArithmetic("eq")
            elif op == "+":
                self.vmWriter.writeArithmetic("add")
            elif op == "-":
                self.vmWriter.writeArithmetic("sub")
            elif op == "*":
                self.vmWriter.writeCall("Math.multiply", "2")
            elif op == "/":
                self.vmWriter.writeCall("Math.divide", "2")
            elif op == "&amp;":
                self.vmWriter.writeArithmetic("and")
            elif op == "|":
                self.vmWriter.writeArithmetic("or")
            elif op == "&lt;":
                self.vmWriter.writeArithmetic("lt")
            elif op == "&gt;":
                self.vmWriter.writeArithmetic("gt")
            op = self.tokenizer.symbol()

    def CompileTerm(self):
        if self.tokenizer.tokenType() == 3:
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 4:
            conststring = self.tokenizer.stringVal()
            self.vmWriter.writePush("constant", str(len(conststring)))
            self.vmWriter.writeCall("String.new", "1")
            for i in range(len(conststring)):
                self.vmWriter.writePush("constant", str(ord(conststring[i])))
                self.vmWriter.writeCall("String.appendChar", "2")

            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 0:
            keywordconst = self.tokenizer.keyWord()
            if keywordconst == "true":
                self.vmWriter.writePush("constant", "0")
                self.vmWriter.writeArithmetic("not")
            elif keywordconst == "false" or keywordconst == "null":
                self.vmWriter.writePush("constant", "0")
            elif keywordconst == "this":
                self.vmWriter.writePush("pointer", "0")
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 2:
            # varName [ expression]
            if self.tokenizer.tokens[self.tokenizer.currentToken +1] == '[':
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                self.tokenizer.advance()
                # [ expression ]
                self.tokenizer.advance()
                self.CompileExpression()
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.vmWriter.writeArithmetic("add")
                # that
                self.vmWriter.writePop("pointer", "1")
                self.vmWriter.writePush("that", "0")
                self.tokenizer.advance()
            # subrutine call
            elif self.tokenizer.tokens[self.tokenizer.currentToken +1] == '(' or self.tokenizer.tokens[self.tokenizer.currentToken +1] == '.':
                self.compileSubRoutineCall()
            # varname
            else:
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(':
            # ( expression )
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
        else:
            #unary!!!
            op = self.tokenizer.symbol()
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "-":
                self.vmWriter.writeArithmetic("neg")
            elif op == "~":
                self.vmWriter.writeArithmetic("not")

    def compileSubRoutineCall(self):
        # subroutineName  | (className | varName)
        identifier = self.tokenizer.identifier()
        self.tokenizer.advance()
        #no "." only name
        if self.tokenizer.symbol() == '(':
            # ( expressionList ) -- subroutine of type method
            self.tokenizer.advance()
            self.vmWriter.writePush("pointer", "0")
            argnum = self.CompileExpressionList()
            self.vmWriter.writeCall(self.classname + "." + identifier, str(argnum +1))

            self.tokenizer.advance()
        else:
            # . -- class.function or var.method
            self.tokenizer.advance()
            # subroutineName
            subname = self.tokenizer.identifier()
            self.tokenizer.advance()

            self.tokenizer.advance()
            if identifier in self.symbolTable.classtable or identifier in self.symbolTable.subroutinetable:
                # varname!!!
                if identifier in self.symbolTable.subroutinetable:
                    if self.symbolTable.kindOf(identifier) == "var":
                        self.vmWriter.writePush("local", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("argument", self.symbolTable.indexOf(identifier))
                else:
                    if self.symbolTable.kindOf(identifier) == "static":
                        self.vmWriter.writePush("static", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("this", self.symbolTable.indexOf(identifier))


                argnum = self.CompileExpressionList()
                identifierclass = self.symbolTable.typeOf(identifier)
                self.vmWriter.writeCall(identifierclass + "." + subname, str(argnum +1))
            else:
                argnum = self.CompileExpressionList()
                self.vmWriter.writeCall(identifier + "." + subname, str(argnum))
            self.tokenizer.advance()

    def CompileExpressionList(self):
        # (expression
        i = 0
        if self.isTerm():
            i += 1
            # (, expression)
            self.CompileExpression()
            while self.tokenizer.symbol() == ',':
                i+= 1
                self.tokenizer.advance()
                self.CompileExpression()
        return i

    def isTerm(self):
        if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4:
            return True
        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() in keyword_const:
            return True
        if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(' :
            return True
        if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'):
            return True
        if self.tokenizer.tokenType() == 2:
            return True
        return False

    def compileType(self):
        if self.tokenizer.tokenType() == 0:
            typen = self.tokenizer.keyWord()
        else:
            typen = self.tokenizer.identifier()
        self.tokenizer.advance()
        return typen
コード例 #7
0
class CompilerEngine (JackTokenizer):
    B_OPERATOR = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    U_OPERATOR = ['-', '~']
    KEYWORD_CONSTANT = ['true', 'false', 'null', 'this']
    STATEMENTS = ['let', 'if', 'while', 'do', 'return']
    KEYWORD_TYPE = ['int', 'char', 'boolean']
    KEYWORD_SUBROUTINE = ['constructor', 'function', 'method']
    KEYWORD_CLASS_VAR_TYPE = ['static', 'field']
    TERM_TYPE = [Token.TK_INT, Token.TK_STRING, Token.TK_IDENTIFIER]
    REPLACEMENTS = {'<': '&lt;', '>': '&gt;', '"': '&quot;', '&': '&amp;'}
    
    def __init__ (self, path):
        super().__init__(path)
        self.xml = str()
        self.advance()
        self.st = SymbolTable()
        self.vm = VMWriter(self.path[:-5]+'.vm')
        self.className = str()
        self.labelCounter = 0
        self.currentFunctionName = str()
        self.currentSubroutineType = str()

    def xmlContent (self):
        tokenClass = self.tokenType()
        currentToken = self.getToken()
        if tokenClass != Token.TK_STRING:
            try:
                currentToken = self.REPLACEMENTS[currentToken]
            except KeyError:
                pass
            return currentToken
        else:
            return currentToken[1:-1]

    def generateXML (self):
        with open(self.path+'.xml', 'w+') as f:
            f.write(self.xml)

    def error (self, expected):
        print("\n# On file '%s', got following error:" % self.path, file=sys.stderr)
        fancyExpected = ' or '.join([repr(x) for x in expected]) if isinstance(expected, (tuple, list)) else expected
        print("# Line %s: Expected %s, got '%s'." % (self.getLine()+1, fancyExpected, self.getToken()), file=sys.stderr)
        raise CompilerException("Get out.")

    def eat (self, *types):
        if self.tokenType() not in types:
            self.error(types)
        self.xml += '<%s>%s</%s>\n'%(self.tokenType(), self.xmlContent(), self.tokenType())
        self.advance()

    def cEat (self, token): # Conditional Eat
        if self.getToken().__eq__(token):
            self.eat(self.tokenType())
        else:
            self.error(token)

    def compileTerm (self):
        self.xml += '<term>\n'
        if self.tokenType().__eq__(Token.TK_IDENTIFIER):
            try:
                nextToken = self._tokens[1].token()
            except:
                nextToken = str()
            if nextToken.__eq__('['):
                self.vm.writePush(self.st.kindOf(self.getToken()), self.st.indexOf(self.getToken()))
                self.eat(Token.TK_IDENTIFIER)
                self.cEat('[')
                self.compileExpression()
                self.cEat(']')
                self.vm.writeArithmetic('+')
                self.vm.writePop('pointer', 1)
                self.vm.writePush('that', 0)
            elif nextToken in ['(', '.']:
                self.compileSubroutineCall()
            else:
                self.vm.writePush(self.st.kindOf(self.getToken()), self.st.indexOf(self.getToken()))
                self.eat(Token.TK_IDENTIFIER)
        elif self.getToken().__eq__('('):
            self.cEat('(')
            self.compileExpression()
            self.cEat(')')
        elif self.getToken() in self.U_OPERATOR:
            operator = self.getToken()
            self.eat(Token.TK_SYMBOL)
            self.compileTerm()
            if operator.__eq__('-'):
                self.vm.writeArithmetic('!')
            else:
                self.vm.writeArithmetic(operator)
        elif self.getToken() in self.KEYWORD_CONSTANT:
            if self.getToken().__eq__('this'):
                self.vm.writePush('pointer', 0)
            elif self.getToken().__eq__('true'):
                self.vm.writePush('constant', 1)
                self.vm.writeArithmetic('!')
            else:
                self.vm.writePush('constant', 0)
            self.eat(Token.TK_KEYWORD)
        else:
            if self.tokenType().__eq__(Token.TK_INT):
                self.vm.writePush('constant', self.getToken())
            else:
                self.vm.writePush('constant', len(self.getToken()))
                self.vm.writeCall('String.new', 1)
                for char in self.getToken():
                    self.vm.writePush('constant', ord(char))
                    self.vm.writeCall('String.appendChar', 2)
            self.eat(Token.TK_INT, Token.TK_STRING)
        self.xml += '</term>\n'

    def compileExpression (self):
        self.xml += '<expression>\n'
        self.compileTerm()
        while self.getToken() in self.B_OPERATOR:
            operator = self.getToken()
            self.eat(Token.TK_SYMBOL)
            self.compileTerm()
            if operator in vm.ARITHMETIC:
                self.vm.writeArithmetic(operator)
            elif operator.__eq__('*'):
                self.vm.writeCall('Math.multiply', 2)
            elif operator.__eq__(r'/'):
                self.vm.writeCall('Math.divide', 2)
        self.xml += '</expression>\n'

    def compileExpressionList (self):
        self.xml += '<expressionList>\n'
        nArgs = 0
        if self.tokenType() in self.TERM_TYPE or self.getToken() in self.KEYWORD_CONSTANT or self.getToken().__eq__('('):
            nArgs += 1
            self.compileExpression()
            while self.getToken().__eq__(','):
                nArgs += 1
                self.eat(Token.TK_SYMBOL)
                self.compileExpression()
        self.xml += '</expressionList>\n'
        return nArgs

    def compileSubroutineCall (self):
        owner = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        if self.getToken().__eq__('.'):
            self.eat(Token.TK_SYMBOL)
            method = self.getToken()
            try:
                ownerIndex = self.st.indexOf(owner)
            except CompilerException:
                ownerIndex = -1
            if ownerIndex != -1:
                nArgs = 1
                self.vm.writePush(self.st.kindOf(owner), ownerIndex)
            else:
                nArgs = 0
            self.eat(Token.TK_IDENTIFIER)
            self.cEat('(')
            nArgs += self.compileExpressionList()
            self.cEat(')')
            if ownerIndex != -1:
                self.vm.writeCall('%s.%s' % (self.st.typeOf(owner), method), nArgs)
            else:
                self.vm.writeCall('%s.%s' % (owner, method), nArgs)
        elif self.getToken().__eq__('('):
            self.cEat('(')
            self.vm.writePush('pointer', 0)
            nArgs = self.compileExpressionList()
            self.cEat(')')
            self.vm.writeCall('%s.%s' % (self.className, owner), nArgs+1)

    def compileStatements (self):
        self.xml += '<statements>\n'
        while self.getToken() in self.STATEMENTS:
            if self.getToken().__eq__('let'):
                self.compileLet()
            elif self.getToken().__eq__('if'):
                self.compileIf()
            elif self.getToken().__eq__('while'):
                self.compileWhile()
            elif self.getToken().__eq__('do'):
                self.compileDo()
            elif self.getToken().__eq__('return'):
                self.compileReturn()
        self.xml += '</statements>\n'

    def compileLet (self):
        self.xml += '<letStatement>\n'
        self.cEat('let')
        variable = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        hasLeftArray = False
        hasRightArray = False
        if self.getToken().__eq__('['):
            hasLeftArray = True
            self.eat(Token.TK_SYMBOL)
            self.vm.writePush(self.st.kindOf(variable), self.st.indexOf(variable))
            self.compileExpression()
            self.vm.writeArithmetic('+')
            self.cEat(']')
            for token in self._tokens:
                if token.token().__eq__('['):
                    hasRightArray = True
                    break
                if token.token().__eq__(';'):
                    break
            if not hasRightArray:
                self.vm.writePop('pointer', 1)
        
        self.cEat('=')
        self.compileExpression()
        if hasRightArray and hasLeftArray:
            self.vm.writePop('temp', 0)
            self.vm.writePop('pointer', 1)
            self.vm.writePush('temp', 0)
            self.vm.writePop('that', 0)
        elif hasLeftArray:
            self.vm.writePop('that', 0)
        else:
            self.vm.writePop(self.st.kindOf(variable), self.st.indexOf(variable))
        self.cEat(';')
        self.xml += '</letStatement>\n'

    def compileIf (self):
        self.xml += '<ifStatement>\n'
        self.cEat('if')
        self.cEat('(')
        self.compileExpression()
        self.vm.writeArithmetic('~')
        self.labelCounter += 2
        thisLabel = self.labelCounter
        self.vm.writeIf('%s.L%d' % (self.className, thisLabel-1))
        self.cEat(')')
        self.cEat('{')
        self.compileStatements()
        self.vm.writeGoto('%s.L%d' % (self.className, thisLabel))
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel-1))
        self.cEat('}')
        try:
            if self.getToken().__eq__('else'):
                self.eat(Token.TK_KEYWORD)
                self.cEat('{')
                self.compileStatements()
                self.cEat('}')
        except EOFException:
            pass
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel))
        self.xml += '</ifStatement>\n'

    def compileWhile (self):
        self.xml += '<whileStatement>\n'
        self.cEat('while')
        self.cEat('(')
        self.labelCounter += 2
        thisLabel = self.labelCounter
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel-1))
        self.compileExpression()
        self.vm.writeArithmetic('~')
        self.vm.writeIf('%s.L%d' % (self.className, thisLabel))
        self.cEat(')')
        self.cEat('{')
        self.compileStatements()
        self.vm.writeGoto('%s.L%d' % (self.className, thisLabel-1))
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel))
        self.cEat('}')
        self.xml += '</whileStatement>\n'

    def compileDo (self):
        self.xml += '<doStatement>\n'
        self.cEat('do')
        self.compileSubroutineCall()
        self.vm.writePop('temp', 0)
        self.cEat(';')
        self.xml += '</doStatement>\n'

    def compileReturn (self):
        self.xml += '<returnStatement>\n'
        self.cEat('return')
        if self.getToken().__eq__(';'):
            self.vm.writePush('constant', 0)
            self.vm.writeReturn()
            self.eat(Token.TK_SYMBOL)
            self.xml += '</returnStatement>\n'
            return
        self.compileExpression()
        self.vm.writeReturn()
        self.cEat(';')
        self.xml += '</returnStatement>\n'

    def compileType (self):
        if self.getToken() in self.KEYWORD_TYPE:
            self.eat(Token.TK_KEYWORD)
        elif self.tokenType().__eq__(Token.TK_IDENTIFIER):
            self.eat(Token.TK_IDENTIFIER)
        else:
            self.error(self.KEYWORD_TYPE+[Token.TK_IDENTIFIER])

    def compileVarDec (self):
        self.xml += '<varDec>\n'
        self.cEat('var')
        
        if len(self._tokens) >= 2:
            tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
        self.compileType()
        self.eat(Token.TK_IDENTIFIER)
        self.st.define(tokenIdent, tokenType, 'local')
        
        while self.getToken().__eq__(','):
            self.eat(Token.TK_SYMBOL)

            tokenIdent = self.getToken()
            self.st.define(tokenIdent, tokenType, 'local')
            self.eat(Token.TK_IDENTIFIER)
        
        self.cEat(';')
        self.xml += '</varDec>\n'

    def compileSubroutineBody (self):
        self.xml += '<subroutineBody>\n'
        self.cEat('{')
        while self.getToken().__eq__('var'):
            self.compileVarDec()
        self.vm.writeFunction('%s.%s' % (self.className, self.currentFunctionName), self.st.varCount('local'))
        if self.currentSubroutineType.__eq__('method'):
            self.vm.writePush('argument', 0)
            self.vm.writePop('pointer', 0)
        elif self.currentSubroutineType.__eq__('constructor'):
            self.vm.writePush('constant', self.st.varCount(st.FIELD))
            self.vm.writeCall('Memory.alloc', 1)
            self.vm.writePop('pointer', 0)
        self.compileStatements()
        self.cEat('}')
        self.xml += '</subroutineBody>\n'

    def compileParameterList (self):
        self.xml += '<parameterList>\n'
        promiseTable = list()
        if self.getToken() in self.KEYWORD_TYPE or self.tokenType().__eq__(Token.TK_IDENTIFIER):
            if len(self._tokens) >= 2:
                tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
            self.compileType()
            self.eat(Token.TK_IDENTIFIER)
            self.st.define(tokenIdent, tokenType, 'argument')
            
            while self.getToken().__eq__(','):
                self.eat(Token.TK_SYMBOL)

                if len(self._tokens) >= 2:
                    tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
                self.compileType()
                self.eat(Token.TK_IDENTIFIER)
                self.st.define(tokenIdent, tokenType, 'argument')
        self.xml += '</parameterList>\n'

    def compileSubroutineDec (self):
        self.xml += '<subroutineDec>\n'
        if self.getToken() in self.KEYWORD_SUBROUTINE:
            if self.getToken().__eq__('method'):
                self.st.define('this', self.className, 'argument')
            self.currentSubroutineType = self.getToken()
            self.eat(Token.TK_KEYWORD)
        else:
            self.error(self.KEYWORD_SUBROUTINE)
        if self.getToken() in self.KEYWORD_TYPE+['void']:
            self.eat(Token.TK_KEYWORD)
        elif self.tokenType().__eq__(Token.TK_IDENTIFIER):
            self.eat(Token.TK_IDENTIFIER)
        else:
            self.error(self.KEYWORD_TYPE+['void']+Token.TK_IDENTIFIER)
        self.currentFunctionName = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        self.cEat('(')
        self.compileParameterList()
        self.cEat(')')
        self.compileSubroutineBody()
        self.xml += '</subroutineDec>\n'
    
    def compileClassVarDec (self):
        self.xml += '<classVarDec>\n'

        if self.getToken().__eq__('static'):
            tokenKind = st.STATIC
        elif self.getToken().__eq__('field'):
            tokenKind = st.FIELD
        else:
            self.error(self.KEYWORD_CLASS_VAR_TYPE)
        self.eat(Token.TK_KEYWORD)
        if len(self._tokens) >= 2:
            tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
        self.compileType()
        self.eat(Token.TK_IDENTIFIER)
        self.st.define(tokenIdent, tokenType, tokenKind)
        
        while self.getToken().__eq__(','):
            self.eat(Token.TK_SYMBOL)
            
            tokenIdent = self.getToken()
            self.st.define(tokenIdent, tokenType, tokenKind)
            self.eat(Token.TK_IDENTIFIER)
        
        self.cEat(';')
        self.xml += '</classVarDec>\n'

    def compileClass (self):
        self.xml += '<class>\n'
        self.cEat('class')
        self.className = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        self.cEat('{')
        while self.getToken() in self.KEYWORD_CLASS_VAR_TYPE:
            self.compileClassVarDec()
        while self.getToken() in self.KEYWORD_SUBROUTINE:
            self.compileSubroutineDec()
            self.st.startSubroutine()
            
        self.cEat('}')
        self.xml += '</class>\n'

    def compile (self):
        try:
            self.compileClass()
        except CompilerException as e:
            print("# Compilation failed.\n", file=sys.stderr)
            return
        self.generateXML()
        self.vm.close()
        print('Successful compiling of "%s"' % self.path)
コード例 #8
0
class JackCompilar:
    def __init__(self, filepath):
        jt = JackTokenizer(filepath)
        ce = CompilationEngine(filepath, jt.tokens)
        self.vmwriter = VMWriter(filepath)
        self.symbol_table = SymbolTable()
        self.class_name = None
        self.generateClass(ce.root)

    def generateClass(self, node):
        chs = childList(node)
        self.class_name = chs[1].text
        for ch in node:
            if ch.tag == "classVarDec":
                self.generateClassVarDec(ch)
            if ch.tag == "subroutineDec":
                self.generateSubroutineDec(ch)

    def generateClassVarDec(self, node):
        chs = childList(node)

        if chs[0].text == "static":
            scopekind = ScopeKind.STATIC
        else:
            scopekind = ScopeKind.FIELD

        typename = chs[1].text
        for i in range(2, len(chs), 2):
            ch = chs[i]
            self.symbol_table.define(ch.text, typename, scopekind)

    def generateSubroutineDec(self, node):
        self.symbol_table.startSubroutine()
        chs = childList(node)
        subroutine_kind = chs[0].text
        subroutine_name = chs[2].text

        parameter_list_node = chs[4]
        subroutine_body_node = chs[6]
        if subroutine_kind == "method":
            self.symbol_table.define("", "", ScopeKind.ARG)

        chs = childList(parameter_list_node)
        for i in range(0, len(chs), 3):
            type_node = chs[i]
            var_name_node = chs[i + 1]
            self.symbol_table.define(var_name_node.text, type_node.text,
                                     ScopeKind.ARG)

        chs = childList(subroutine_body_node)

        statements_node = chs[-2]
        for i in range(1, len(chs) - 2):
            self.generateVarDec(chs[i])

        nlocals = self.symbol_table.varCount(ScopeKind.VAR)
        self.vmwriter.writeFunction(f"{self.class_name}.{subroutine_name}",
                                    nlocals)

        if subroutine_kind == "method":
            self.vmwriter.writePush(SegmentType.ARG, 0)
            self.vmwriter.writePop(SegmentType.POINTER, 0)

        elif subroutine_kind == "constructor":
            nfields = self.symbol_table.varCount(ScopeKind.FIELD)
            self.vmwriter.writePush(SegmentType.CONST, nfields)
            self.vmwriter.writeCall("Memory.alloc", 1)
            self.vmwriter.writePop(SegmentType.POINTER, 0)

        self.generateStatements(statements_node)
        # for ch in statements_node:
        #     self.generateStatement(ch)

    def generateStatements(self, node):
        for ch in node:
            self.generateStatement(ch)

    def generateStatement(self, node):
        if node.tag == "letStatement":
            self.generateLetStatement(node)

        elif node.tag == "ifStatement":
            self.generateIfStatement(node)

        elif node.tag == "whileStatement":
            self.generateWhileStatement(node)

        elif node.tag == "doStatement":
            self.generateDoStatement(node)

        else:
            self.generateReturnStatement(node)

    def generateLetStatement(self, node):
        chs = childList(node)
        self.generateExpression(chs[-2])

        varname = chs[1].text
        print(varname)
        scopekind = self.symbol_table.kindOf(varname)
        index = self.symbol_table.indexOf(varname)

        if scopekind == ScopeKind.STATIC:
            segment = SegmentType.STATIC
        elif scopekind == ScopeKind.FIELD:
            segment = SegmentType.THIS
        elif scopekind == ScopeKind.ARG:
            segment = SegmentType.ARG
        else:
            segment = SegmentType.LOCAL

        if chs[2].text != "[":
            self.vmwriter.writePop(segment, index)

        else:
            self.vmwriter.writePush(segment, index)
            self.generateExpression(chs[3])
            self.vmwriter.writeArithmetic(ArithmeticCommandType.ADD)
            self.vmwriter.writePop(SegmentType.POINTER, 1)
            self.vmwriter.writePop(SegmentType.THAT, 0)

    def generateIfStatement(self, node):
        chs = childList(node)

        L1 = generate_random_label()
        L2 = generate_random_label()

        self.generateExpression(chs[2])
        self.vmwriter.writeArithmetic(ArithmeticCommandType.NOT)
        self.vmwriter.writeIF(L1)
        self.generateStatements(chs[5])
        self.vmwriter.writeGoto(L2)
        self.vmwriter.writeLabel(L1)
        if len(chs) > 9:
            self.generateStatements(chs[9])
        self.vmwriter.writeLabel(L2)

    def generateWhileStatement(self, node):
        chs = childList(node)
        L1 = generate_random_label()
        L2 = generate_random_label()

        self.vmwriter.writeLabel(L1)
        self.generateExpression(chs[2])
        self.vmwriter.writeArithmetic(ArithmeticCommandType.NOT)
        self.vmwriter.writeIF(L2)
        self.generateStatements(chs[5])
        self.vmwriter.writeGoto(L1)
        self.vmwriter.writeLabel(L2)

    def generateDoStatement(self, node):
        sc = childList(node)[1:-1]
        self.generateSubroutineCall(sc)

        L = generate_random_label()
        self.vmwriter.writeIF(L)
        self.vmwriter.writeLabel(L)

    def generateReturnStatement(self, node):
        chs = childList(node)
        if len(chs) == 2:
            self.vmwriter.writePush(SegmentType.CONST, 0)
        else:
            self.generateExpression(chs[1])

        self.vmwriter.writeReturn()

    def generateTerm(self, node):
        chs = childList(node)

        if chs[0].tag == "integerConstant":
            self.vmwriter.writePush(SegmentType.CONST, int(chs[0].text))
            return

        if chs[0].tag == "stringConstant":
            length = len(chs[0].text)
            self.vmwriter.writePush(SegmentType.CONST, length)
            self.vmwriter.writeCall("String.new", 1)
            for c in chs[0].text:
                self.vmwriter.writePush(SegmentType.CONST, ord(c))
                self.vmwriter.writeCall("String.appendChar", 2)
            return

        if chs[0].tag == "keyword":
            if chs[0].text == "true":
                self.vmwriter.writePush(SegmentType.CONST, 0)
                self.vmwriter.writeArithmetic(ArithmeticCommandType.NOT)
            elif chs[0].text in {"false", "null"}:
                self.vmwriter.writePush(SegmentType.CONST, 0)
            else:
                self.vmwriter.writePush(SegmentType.POINTER, 0)
            return

        if chs[0].text == "(":
            self.generateExpression(chs[1])
            return

        if chs[0].text in {"-", "~"}:
            self.generateTerm(chs[1])
            if chs[0].text == "-":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.NEG)
            if chs[0].text == "~":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.NOT)
            return

        if len(chs) == 1 or chs[-1].text == "]":
            var_name = chs[0].text
            scopekind = self.symbol_table.kindOf(var_name)
            index = self.symbol_table.indexOf(var_name)
            if scopekind == ScopeKind.STATIC:
                segment = SegmentType.STATIC
            elif scopekind == ScopeKind.FIELD:
                segment = SegmentType.THIS
            elif scopekind == ScopeKind.ARG:
                segment = SegmentType.ARG
            else:
                segment = SegmentType.LOCAL

            self.vmwriter.writePush(segment, index)
            if chs[-1].text == "]":
                self.generateExpression(chs[2])
                self.vmwriter.writeArithmetic(ArithmeticCommandType.ADD)
                self.vmwriter.writePop(SegmentType.POINTER, 1)
                self.vmwriter.writePush(SegmentType.THAT, 0)
            return

        self.generateSubroutineCall(chs)

    def generateSubroutineCall(self, sc):
        subroutine_name = sc[-4].text
        nargs = (len(childList(sc[-2])) + 1) // 2
        if len(sc) == 4:
            self.vmwriter.writePush(SegmentType.POINTER, 0)
            nargs += 1
            class_name = self.class_name

        elif self.symbol_table.isDefined(sc[0].text):
            var_name = sc[0].text
            class_name = self.symbol_table.typeOf(var_name)

            scopekind = self.symbol_table.kindOf(var_name)
            index = self.symbol_table.indexOf(var_name)

            if scopekind == ScopeKind.STATIC:
                segment = SegmentType.STATIC
            elif scopekind == ScopeKind.FIELD:
                segment = SegmentType.THIS
            elif scopekind == ScopeKind.ARG:
                segment = SegmentType.ARG
            else:
                segment = SegmentType.LOCAL

            self.vmwriter.writePush(segment, index)
            nargs += 1

        else:
            class_name = sc[0].text

        self.generateExpressionList(sc[-2])
        self.vmwriter.writeCall(f"{class_name}.{subroutine_name}", nargs)

    def generateExpression(self, node):
        chs = childList(node)
        self.generateTerm(chs[0])
        for i in range(1, len(chs), 2):
            self.generateTerm(chs[i + 1])
            op = chs[i].text

            if op == "+":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.ADD)
            elif op == "-":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.SUB)
            elif op == "&amp;":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.AND)
            elif op == "|":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.OR)
            elif op == "&lt;":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.LT)
            elif op == "&gt;":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.GT)
            elif op == "=":
                self.vmwriter.writeArithmetic(ArithmeticCommandType.EQ)
            elif op == "*":
                self.vmwriter.writeCall("Math.multiply", 2)
            elif op == "/":
                self.vmwriter.writeCall("Math.divide", 2)

    def generateExpressionList(self, node):
        chs = childList(node)
        for i in range(0, len(chs), 2):
            self.generateExpression(chs[i])

    def generateVarDec(self, node):
        chs = childList(node)
        typename = chs[1].text
        for i in range(2, len(chs), 2):
            ch = chs[i]
            self.symbol_table.define(ch.text, typename, ScopeKind.VAR)
コード例 #9
0
class CompilationEngine:
    def __init__(self, filename):

        self.tokenizer = JackTokenizer(filename)
        self.types = ['int', 'char', 'boolean']
        self.operators = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        self.keywordsConstant = ['true', 'false', 'null', 'this']
        self.fileName = splitext(filename)[0]
        self.symbolTable = SymbolTable()
        self.vm = VMWriter(splitext(filename)[0])
        self.whileLabelNum = 0
        self.ifLabelNum = 0
    
    def compile(self):
        self.file = open(self.fileName + ".xml", "w")
        self.compileClass()
        self.file.close()
        self.vm.close()

    def compileClass(self):
        self.writeToXml("<class>")

        self.expect("class")
        self.className = self.tokenizer.getToken()
        self.expectType('identifier')
        self.expect("{")

        while self.tokenizer.getToken() in ['static', 'field']:
            self.compileClassVarDec()

        while self.tokenizer.getToken() in ['constructor', 'function', 'method']:
            self.compileSubroutine()

        self.expect("}")
        self.writeToXml("</class>")

    def compileClassVarDec(self):
        self.writeToXml('<classVarDec>')
        
        kind = self.tokenizer.getToken()
        self.expect(['field', 'static'])

        if self.tokenizer.getToken() in self.types or self.tokenizer.tokenType() == 'identifier':
            type = self.tokenizer.getToken()
            self.printToken()
            self.tokenizer.advance()

        name = self.tokenizer.getToken()
        self.expectType('identifier')

        self.symbolTable.define(name, type, kind)

        while self.tokenizer.getToken() == ",":
            self.expect(",")
            name = self.tokenizer.getToken()
            self.expectType('identifier')

            self.symbolTable.define(name, type, kind)

        self.expect(';')

        self.writeToXml('</classVarDec>')

    def compileSubroutine(self):
        self.writeToXml("<subroutineDec>")
        self.symbolTable.startSubroutine()
        self.whileLabelNum = 0
        self.ifLabelNum = 0

        subroutineType = self.tokenizer.getToken()

        if subroutineType == 'method':
            self.symbolTable.define("this", self.className, SymbolTable.ARG)

        self.expect(['constructor', 'function', 'method'])

        if self.tokenizer.getToken() in self.types + ['void'] or self.tokenizer.tokenType() == 'identifier':
            self.printToken()
            self.tokenizer.advance()

        functionName = self.className + '.' + self.tokenizer.getToken()

        self.expectType('identifier')
        self.expect("(")
        self.compileParameterList()
        self.expect(")")

        self.writeToXml("<subroutineBody>")
        self.expect("{")

        while self.tokenizer.getToken() == 'var':
            self.compileVarDec()

        self.vm.writeFunction(functionName, self.symbolTable.varCount(SymbolTable.VAR))
        if subroutineType == 'constructor':
            self.vm.writePush(VMWriter.CONST, self.symbolTable.varCount(SymbolTable.FIELD))
            self.vm.writeCall("Memory.alloc", 1)
            self.vm.writePop(VMWriter.POINTER, 0)
        elif subroutineType == 'method':
            self.vm.writePush(VMWriter.ARG, 0)
            self.vm.writePop(VMWriter.POINTER, 0)

        self.compileStatements()
        self.expect("}")
        self.writeToXml("</subroutineBody>")

        self.writeToXml("</subroutineDec>")

    def compileParameterList(self):
        self.writeToXml("<parameterList>")

        if self.tokenizer.getToken() in self.types or self.tokenizer.tokenType() == 'identifier':
            type = self.tokenizer.getToken()
            
            self.printToken() 
            self.tokenizer.advance()

            name = self.tokenizer.getToken()
            self.symbolTable.define(name, type, SymbolTable.ARG)

            self.expectType('identifier')

        while self.tokenizer.getToken() == ',':
            self.expect(",")
            if self.tokenizer.getToken() not in self.types and self.tokenizer.tokenType() != 'identifier':
                self.errorExpected(self.tokenizer.getToken(), '|'.join(self.types + ['identifier']))

            type = self.tokenizer.getToken()

            self.printToken()
            self.tokenizer.advance()

            name = self.tokenizer.getToken()
            self.symbolTable.define(name, type, SymbolTable.ARG)

            self.expectType('identifier')

        self.writeToXml("</parameterList>")

    def compileVarDec(self):
        self.writeToXml("<varDec>")

        self.expect('var')

        if self.tokenizer.getToken() in self.types or self.tokenizer.tokenType() == 'identifier':
            type = self.tokenizer.getToken()

            self.printToken()
            self.tokenizer.advance()
        
        self.symbolTable.define(self.tokenizer.getToken(),type, self.symbolTable.VAR)
        self.expectType('identifier')

        while self.tokenizer.getToken() == ",":
            self.expect(",")
            self.symbolTable.define(self.tokenizer.getToken(), type, self.symbolTable.VAR)
            self.expectType('identifier')

        self.expect(';')

        self.writeToXml("</varDec>")

    def compileStatements(self):
        self.writeToXml("<statements>")

        statatements = ['while', 'if', 'let', 'return', 'do']

        while self.tokenizer.getToken() in statatements:
            token = self.tokenizer.getToken()
            if token == 'while':
                self.compileWhile()
            elif token == 'if':
                self.compileIf()
            elif token == 'let':
                self.compileLet()
            elif token == 'do':
                self.compileDo()
            elif token == 'return':
                self.compileReturn()

        self.writeToXml("</statements>")

    def compileDo(self):
        self.writeToXml('<doStatement>')

        self.expect("do")
        identifier = self.tokenizer.getToken()
        self.expectType("identifier")

        self.compileSubroutineCall(identifier)

        self.expect(";")
        self.vm.writePop(VMWriter.TEMP, 0)

        self.writeToXml('</doStatement>')

    def compileExpressionList(self):
        self.writeToXml('<expressionList>')

        count = 0
        if self.tokenizer.getToken() != ")":
            self.compileExpression()
            count += 1
            while self.tokenizer.getToken() == ",":
                self.expect(",")
                self.compileExpression()
                count += 1

        self.writeToXml('</expressionList>')

        return count
        
    def compileIf(self):
        self.writeToXml('<ifStatement>')

        labelTrue = "IF_TRUE{}".format(self.ifLabelNum) 
        labelFalse = "IF_FALSE{}".format(self.ifLabelNum) 
        labelEnd = "IF_END{}".format(self.ifLabelNum)
        self.ifLabelNum += 1

        self.expect("if")
        self.expect("(")
        self.compileExpression()
        self.vm.writeIf(labelTrue)
        self.vm.writeGoto(labelFalse)
        self.vm.writeLabel(labelTrue)   
        self.expect(")")
        self.expect("{")

        self.compileStatements()
        self.vm.writeGoto(labelEnd)

        self.expect("}")

        self.vm.writeLabel(labelFalse)

        if self.tokenizer.getToken() == "else":
            self.expect('else')
            self.expect("{")
            self.compileStatements()
            self.expect("}")

        self.vm.writeLabel(labelEnd)

        self.writeToXml('</ifStatement>')

    def compileWhile(self):
        self.writeToXml('<whileStatement>')

        labelExp = "WHILE_EXP{}".format(self.whileLabelNum)
        labelEnd = "WHILE_END{}".format(self.whileLabelNum)
        self.whileLabelNum += 1

        self.vm.writeLabel(labelExp)
        self.expect("while")
        self.expect("(")

        self.compileExpression()
        self.vm.writeArithmetic(VMWriter.NOT)
        self.vm.writeIf(labelEnd)

        self.expect(")")
        self.expect("{")
        self.compileStatements()
        self.vm.writeGoto(labelExp)
        self.expect("}")

        self.vm.writeLabel(labelEnd)

        self.writeToXml('</whileStatement>')

    def compileReturn(self):
        self.writeToXml('<returnStatement>')

        self.expect("return")
        if self.tokenizer.getToken() != ";":
            self.compileExpression()
            self.vm.writeReturn()
        else:
            self.vm.writePush(VMWriter.CONST, 0)
            self.vm.writeReturn()

        self.expect(";")

        self.writeToXml("</returnStatement>")

    def compileLet(self):
        self.writeToXml('<letStatement>')

        self.expect("let")

        ident = self.tokenizer.getToken()

        self.expectType('identifier')

        isArray = False
        if self.tokenizer.getToken() == "[":
            self.expect("[")
            self.compileExpression()

            self.vm.writePush(self.resolveSegment(ident), self.symbolTable.indexOf(ident))
            self.vm.writeArithmetic(VMWriter.ADD)

            self.expect("]")
            isArray = True

        self.expect("=")
        self.compileExpression()

        if isArray:
            self.vm.writePop(VMWriter.TEMP, 0)
            self.vm.writePop(VMWriter.POINTER, 1)
            self.vm.writePush(VMWriter.TEMP, 0)
            self.vm.writePop(VMWriter.THAT, 0)
        else:
            self.vm.writePop(self.resolveSegment(ident), self.symbolTable.indexOf(ident))

        self.expect(";")

        self.writeToXml('</letStatement>')

    def compileExpression(self):
        self.writeToXml('<expression>')

        self.compileTerm()

        while self.tokenizer.getToken() in self.operators:
            operator = self.tokenizer.getToken()

            self.expect(self.operators)
            self.compileTerm()

            self.compileOperator(operator)
        
        self.writeToXml('</expression>')

    def compileOperator(self, operator):
        if operator is '+':
            self.vm.writeArithmetic(self.vm.ADD)
        
        elif operator is '-':
            self.vm.writeArithmetic(self.vm.SUB)

        elif operator is '*':
            self.vm.writeCall("Math.multiply", 2)

        elif operator is '/':
            self.vm.writeCall("Math.divide", 2)
        elif operator is '&':
            self.vm.writeArithmetic(self.vm.AND)
        elif operator is '|':
            self.vm.writeArithmetic(self.vm.OR)
        elif operator is '~':
            self.vm.writeArithmetic(self.vm.NOT)
        elif operator is '>':
            self.vm.writeArithmetic(self.vm.GT)
        elif operator is '<':
            self.vm.writeArithmetic(self.vm.LT)
        elif operator is '=':
            self.vm.writeArithmetic(self.vm.EQ)



    def compileTerm(self):
        self.writeToXml("<term>")

        if self.tokenizer.tokenType() == 'keyword':
            if self.tokenizer.getToken() == 'true':
                self.vm.writePush(VMWriter.CONST, 0)
                self.vm.writeArithmetic(VMWriter.NOT)

            elif self.tokenizer.getToken() in ['false', 'null']:
                self.vm.writePush(VMWriter.CONST, 0)

            elif self.tokenizer.getToken() == 'this':
                self.vm.writePush(VMWriter.POINTER, 0)                        

            self.expect(self.keywordsConstant)

        elif self.tokenizer.tokenType() == 'identifier':

            identifier = self.tokenizer.getToken()
            self.expectType('identifier')

            if self.tokenizer.getToken() == '[':
                self.expect('[')
                self.compileExpression()

                self.vm.writePush(self.resolveSegment(identifier), self.symbolTable.indexOf(identifier))
                self.vm.writeArithmetic(VMWriter.ADD)

                self.expect(']')
                self.vm.writePop(VMWriter.POINTER, 1)
                self.vm.writePush(VMWriter.THAT, 0)

            elif self.tokenizer.getToken() in ['.', '(']:
                self.compileSubroutineCall(identifier)
            else:
                segment = self.symbolTable.kindOf(identifier)
                if segment == 'field':
                    segment = 'this'
                
                self.vm.writePush(segment, self.symbolTable.indexOf(identifier))

        elif self.tokenizer.tokenType() == 'intConst':
            self.vm.writePush(self.vm.CONST, self.tokenizer.getToken())
            self.expectType('intConst')

        elif self.tokenizer.tokenType() == 'stringConst':
            string = self.tokenizer.getToken()
            string = string[1:-1]
            self.expectType('stringConst')

            self.vm.writePush(VMWriter.CONST, len(string))
            self.vm.writeCall('String.new', 1)

            for i in string:
                self.vm.writePush(VMWriter.CONST, ord(i))
                self.vm.writeCall('String.appendChar', 2)
            
        elif self.tokenizer.getToken() == '~':
            self.expect('~')
            self.compileTerm()
            self.vm.writeArithmetic(VMWriter.NOT)

        elif self.tokenizer.getToken() == '-':
            self.expect('-')
            self.compileTerm()
            self.vm.writeArithmetic(VMWriter.NEG)
        elif self.tokenizer.getToken() == '(':
            self.expect('(')
            self.compileExpression()
            self.expect(')')

        self.writeToXml("</term>")

    def compileSubroutineCall(self, identifier):
        if self.tokenizer.getToken() == '(':
            self.vm.writePush(VMWriter.POINTER, 0)

            self.expect('(')
            nArgs = self.compileExpressionList()
            nArgs += 1
            self.expect(')')
                
            functionName = self.className + '.' + identifier
            self.vm.writeCall(functionName, nArgs)
        else:
            if self.tokenizer.getToken() == ".":
                self.expect(".")

                if self.symbolTable.hasOf(identifier):
                    self.vm.writePush(self.resolveSegment(identifier), self.symbolTable.indexOf(identifier))
                    nameFunction = self.symbolTable.typeOf(identifier) + '.' + self.tokenizer.getToken()
                    
                    self.expectType("identifier")
                    self.expect("(")
                    nArgs = self.compileExpressionList()
                    nArgs += 1
                else:
                    nameFunction = identifier + '.' + self.tokenizer.getToken()
        
                    self.expectType("identifier")
                    self.expect("(")
                    nArgs = self.compileExpressionList()
            

            self.expect(")")
            self.vm.writeCall(nameFunction, nArgs)

    def resolveSegment(self, ident):
        segment = self.symbolTable.kindOf(ident)
        if segment == 'field':
            segment = VMWriter.THIS

        return segment

    def expect(self, expected):
        if type(expected) == list:
            if self.tokenizer.getToken() not in expected: 
                self.errorExpected(self.tokenizer.getToken(), "|".join(expected))
        else:
            if self.tokenizer.getToken() != expected:
                self.errorExpected(self.tokenizer.getToken(), expected)

        self.printToken()
        self.tokenizer.advance()

    def expectType(self, expected):
        if type(expected) == list:
            if self.tokenizer.tokenType() not in expected: 
                self.errorExpected(self.tokenizer.getToken(), "|".join(expected))
        else:
            if self.tokenizer.tokenType() != expected:
                self.errorExpected(self.tokenizer.getToken(), expected)
            
        self.printToken()
        self.tokenizer.advance()
    
    def printToken(self):
        tokenType = self.tokenizer.tokenType()

        self.writeToXml("<" + tokenType + ">" + escape(self.tokenizer.getToken()) + "</" + tokenType + ">")

    def errorExpected(self, atual, expected):
        exit("Expected " + expected + ", " + atual + " given")
    
    def writeToXml(self, el):
        self.file.write(el)
コード例 #10
0
class CompilationEngine():
    OPERATORS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

    def __init__(self, token_file, output_file):
        """
            Creates a new compilation engine with
            the given input and output.
            The next routine called must be compileClass.
        """
        if os.path.exists(output_file):
            os.remove(output_file)

        self.input = open(token_file, 'r')
        self.output = open(output_file, 'a+')
        self.current_line = self.input.readline()
        self.symbol_table = None
        self.code_writer = VMWriter(output_file)
        self.label_counter = 0

        self._compile()

    def _compile(self):
        """
            Compiles the whole Jack program.
        """
        # Pula a primeira linha, que identifica o arquivo de tokens
        # Percorre o arquivo até o fim
        self.current_line = self.input.readline()
        while "</tokens>" not in self.current_line:
            self.compileClass()

    def _identify_key(self, line):
        tag_end = line.find('>')
        return line[1:tag_end]

    def _identify_value(self, line):
        first_tag_end = line.find('> ')
        last_tag_start = line.find(' </')
        return line[first_tag_end + 2:last_tag_start]

    def _skipLine(self):
        self.current_line = self.input.readline()

    def _generateLabel(self):
        label = "L{}".format(self.label_counter)
        self.label_counter += 1
        return label

    def compileClass(self):
        """
            Compiles a complete class.
        """
        # Cada classe nova deve ter uma symbol table nova
        self.symbol_table = SymbolTable()

        # Avança a linha <keyword> class </keyword>
        self._skipLine()
        # Grava e avança o nome da classe <identifier> nome </identifier>
        name = self._identify_value(self.current_line)
        self._skipLine()
        # Avança o símbolo de início da classe <symbol> { </symbol>
        self._skipLine()

        self.compileClassVarDec()
        self.compileSubroutineDec(name)

        # Avança o símbolo de fechamento da classe <symbol> } </symbol>
        self._skipLine()

    def compileClassVarDec(self):
        """
            Compiles a static variable declaration,
            or a field declaration.
        """
        # Escreve múltiplas declarações de variável seguidas
        while self._identify_value(
                self.current_line) in ["var", "static", "field"]:
            # Grava e avança a declaração do dado
            kind = self._identify_value(self.current_line)
            self._skipLine()
            # Grava e avança o tipo de dado
            type = self._identify_value(self.current_line)
            self._skipLine()

            # Escreve a declaração até que encontre o último caracter
            while self._identify_value(self.current_line) != ';':
                if self._identify_key(self.current_line) != "symbol":
                    # Se não for uma vírgula, é um novo nome de variável
                    # Grava e avança o nome
                    name = self._identify_value(self.current_line)
                    self._skipLine()
                    # Adiciona a variável à symbol table
                    self.symbol_table.define(name, type, kind)
                else:
                    # Se for uma vírgula, avança a linha
                    self._skipLine()

            # Avança o último caracter ;
            self._skipLine()

    def compileSubroutineDec(self, class_name):
        """
            Compiles a complete method, function,
            or constructor.
        """
        # Analisa múltiplos métodos ou funções seguidos
        while self._identify_value(
                self.current_line) in ["method", "function", "constructor"]:
            # Cria uma nova symbol table para o escopo da subrotina
            self.symbol_table.startSubroutine()

            # Avança a declaração <keyword> function </keyword>
            self._skipLine()
            # Grava e avança o tipo de retorno <keyword> void </keyword>
            type = self._identify_value(self.current_line)
            self._skipLine()
            # Grava e avança o nome da função <identifier> nome </identifier>
            name = self._identify_value(self.current_line)
            self._skipLine()
            # Avança a declaração dos parâmetros <symbol> ( </symbol>
            self._skipLine()
            # Recebe e grava a quantidade de parâmetros na lista de parâmetros
            n_params = self.compileParameterList()
            # Avança a conclusão dos parâmetros <symbol> ) </symbol>
            self._skipLine()

            # Escreve a declaração da função no arquivo .vm
            self.code_writer.writeFunction("{}.{}".format(class_name, name),
                                           n_params)

            self.compileSubroutineBody()

    def compileParameterList(self):
        """
            Compiles a (possibly empty) parameter
            list. Does not handle the enclosin "()".
        """
        parameters_count = 0

        # Escreve todas as linhas até encontrar o caracter de fim de parâmetros
        while self._identify_value(self.current_line) != ')':
            if self._identify_key(self.current_line) != "symbol":
                # Guarda e avança o tipo do argumento <keyword> int </keyword>
                type = self._identify_value(self.current_line)
                self._skipLine()
                # Guarda o nome do argumento <identifier> nome </identifier>
                name = self._identify_value(self.current_line)
                self._skipLine()
                # Adiciona o argumento à symbol table da subrotina
                self.symbol_table.define(name, type, "argument")
                # Aumenta a contagem de parâmetros
                parameters_count += 1
            else:
                # Avança a vírgula
                self._skipLine()

        return parameters_count

    def compileSubroutineBody(self):
        """
            Compiles a subroutine's body.
        """
        # Avança a abertura de bloco <symbol> { </symbol>
        self._skipLine()

        self.compileVarDec()
        self.compileStatements()

        # Avança o término do bloco <symbol> } </symbol>
        self._skipLine()

    def compileVarDec(self):
        """
            Compiles a var declaration.
        """
        # Escreve múltiplas declarações de variáveis seguidas
        while self._identify_value(self.current_line) == "var":
            # Grava e avança a declaração da variável <keyword> var </keyword>
            kind = self._identify_value(self.current_line)
            self._skipLine()
            # Grava e avança o tipo da variável <keyword> int </keyword>
            type = self._identify_value(self.current_line)
            self._skipLine()

            # Avança a declaração até que encontre o último caracter
            while self._identify_value(self.current_line) != ';':
                if self._identify_key(self.current_line) != "symbol":
                    # Se não for uma vírgula, é um novo nome de variável
                    # Grava e avança o nome da variável
                    name = self._identify_value(self.current_line)
                    self._skipLine()
                    # Adiciona a variável à symbol table
                    self.symbol_table.define(name, type, kind)
                else:
                    # Avança a vírgula
                    self._skipLine()

            # Avança o último caracter ;
            self._skipLine()

    def compileStatements(self):
        """
            Compiles a sequence os statements.
            Does not handle the enclosing "{}";
        """
        keyword = self._identify_value(self.current_line)

        # Verifica múltiplos statements
        while keyword in ["let", "if", "while", "do", "return"]:
            if keyword == "let":
                self.compileLet()
            elif keyword == "if":
                self.compileIf()
            elif keyword == "while":
                self.compileWhile()
            elif keyword == "do":
                self.compileDo()
            elif keyword == "return":
                self.compileReturn()

            keyword = self._identify_value(self.current_line)

    def compileLet(self):
        """
            Compiles a let statement.
        """
        # Avança a keyword <keyword> let </keyword>
        self._skipLine()
        # Grava e avança o nome da variável <identifier> nome </identifier>
        name = self._identify_value(self.current_line)
        self._skipLine()

        # Se tiver [, é de um array e deve conter uma expressão dentro
        if self._identify_value(self.current_line) == '[':
            # Avança a abertura de chave [
            self._skipLine()
            # Compila a expressão
            self.compileExpression()
            # Avança o fechamento de chave ]
            self._skipLine()

        # Avança a associação <symbol> = </symbol>
        self._skipLine()
        # Compila a expressão
        self.compileExpression()
        # Avança o fim da declaração <symbol> ; </symbol>
        self._skipLine()

        # Escreve o resultado da expressão na variável usando o pop
        kind = self.symbol_table.kindOf(name)
        index = self.symbol_table.indexOf(name)
        self.code_writer.writePop(kind, index)

    def compileIf(self):
        """
            Compiles an if statement,
            possibly with a trailing else clause.
        """
        else_label = self._generateLabel()
        end_label = self._generateLabel()

        # Avança a keyword <keyword> if </keyword>
        self._skipLine()
        # Avança o início da expressão <symbol> ( </symbol>
        self._skipLine()
        # Compila a expressão de verificação
        self.compileExpression()
        # Avança o fim da expressão <symbol> ) </symbol>
        self._skipLine()

        # Nega a expressão de verificação no arquivo .vm
        self.code_writer.writeArithmetic("~")
        # Redireciona para o else no arquivo .vm
        self.code_writer.writeIf(else_label)

        # Inicia o bloco do if <symbol> { </symbol>
        self._skipLine()
        while self._identify_value(self.current_line) != '}':
            self.compileStatements()
        # Avança o fim do bloco <symbol> } </symbol>
        self._skipLine()

        # Redireciona para o fim da verificação no .vm
        self.code_writer.writeGoto(end_label)
        # Escreve a label do else no arquivo .vm
        self.code_writer.writeLabel(else_label)

        # Confere se existe um bloco else
        if self._identify_value(self.current_line) == "else":
            # Avança o else <keyword> else </keyword>
            self._skipLine()
            # Avança o início do bloco <symbol> { </symbol>
            self._skipLine()
            # Escreve o conteúdo do bloco
            while self._identify_value(self.current_line) != '}':
                self.compileStatements()
            # Avança o fim do bloco <symbol> } </symbol>
            self._skipLine()

        # Escreve a label de fim de bloco
        self.code_writer.writeLabel(end_label)

    def compileWhile(self):
        """
            Compiles a while statement.
        """
        # Define as 2 labels necessárias
        start_label = self._generateLabel()
        end_label = self._generateLabel()

        # Escreve a label de início no arquivo .vm
        self.code_writer.writeLabel(start_label)

        # Avança o início da declaração <keyword> while </keyword>
        self._skipLine()
        # Avança o início da expressão <symbol> ( </symbol>
        self._skipLine()
        # Compila a expressão de verificação
        self.compileExpression()

        # Nega a expressão de verificação no arquivo .vm
        self.code_writer.writeArithmetic("~")
        # Verifica a expressão e escreve um if-goto no arquivo .vm
        self.code_writer.writeIf(end_label)

        # Avança o fim da expressão </symbol> ) </symbol>
        self._skipLine()
        # Avança o início do bloco e continua até o fim do mesmo
        self._skipLine()
        # Compila o conteúdo do while
        while self._identify_value(self.current_line) != '}':
            self.compileStatements()
        # Avança o fim do bloco <symbol> } </symbol>
        self._skipLine()

        # Escreve um goto no arquivo para voltar ao início do loop no .vm
        self.code_writer.writeGoto(start_label)
        # Escreve label final para sair do loop no .vm
        self.code_writer.writeLabel(end_label)

    def compileDo(self):
        """
            Compiles a do statement.
        """
        # Avança o comando <keyword> do </keyword>
        self._skipLine()
        # Identifica a função a ser chamada até o início dos parâmetros
        function = ""
        while self._identify_value(self.current_line) != '(':
            # Adiciona o valor para montar o nome da chamda
            function += self._identify_value(self.current_line)
            # Avança para o próximo valor
            self._skipLine()

        # Avança o início da lista de expressões <symbol> ( </symbol>
        self._skipLine()
        # Compila a lista de expressões
        n_args = self.compileExpressionList()
        # Avança o fim da lista <symbol> ) </symbol>
        self._skipLine()
        # Avança o fim do statement <symbol> ; </symbol>
        self._skipLine()

        # Escreve a chamada da função no arquivo .vm
        self.code_writer.writeCall(function, n_args)

        # Como a função 'do' não retorna nada, precisamos fazer um pop
        # do valor gerado para a pilha temporária
        self.code_writer.writePop("temp", 0)

    def compileReturn(self):
        """
            Compiles a return statement.
        """
        # Avança o ínicio da declaração <keyword> return </keyword>
        self._skipLine()
        if self._identify_key(self.current_line) != "symbol":
            # Compila a expressão de retorno
            self.compileExpression()
        else:
            # A função não retorna nada, mas é esperado um valor de retorno
            # Por isso informamos 0
            self.code_writer.writePush("constant", 0)
        # Avança o fim da declaração <symbol> ; </symbol>
        self._skipLine()

        # Escreve o comando de return no arquivo .vm
        self.code_writer.writeReturn()

    def compileExpression(self):
        """
            Compiles an expression.
        """
        # Sempre inicia com um termo
        self.compileTerm()

        # Verificamos a necessidade de outro termo
        operator = self._identify_value(self.current_line)
        if operator in self.OPERATORS:
            # Avança o operador
            self._skipLine()
            # Compila o próximo termo
            self.compileTerm()
            # Escreve a operação no arquivo
            self.code_writer.writeArithmetic(operator)

    def compileTerm(self):
        """
            Compiles a term. If the current token
            is an identifier, the routine must
            distinguish between a variable , an
            array entry, or a subroutine call. A
            single look-ahead token, which may be one of
            "[", "(", or ".", suffices to distinguish
            between the possibilities. Any other token is
            not part of this term and should not be advanced
            over.
        """
        if self._identify_key(self.current_line) == "identifier":
            # Pode ser um nome de variável ou uma chamada de função
            # var[expressao], funcao.chamada()
            # Por isso gravamos e avançamos o identificador e
            # verificamos por caracteres especiais
            name = self._identify_value(self.current_line)
            self._skipLine()

            if self._identify_value(self.current_line) == '.':
                # Se a linha for um símbolo . é uma chamada a uma função
                # Grava e avança o ponto
                name += "."
                self._skipLine()
                # Grava e avança o nome da função
                name += self._identify_value(self.current_line)
                self._skipLine()
                # Avança o símbolo de início da chamada (
                self._skipLine()
                # Se houver uma expressão dentro da chamada, compila
                # Se não, compila a lista em branco
                n_args = self.compileExpressionList()
                # Avança o símbolo de fim da chamada )
                self._skipLine()
                # Escreve a chamada da função no arquivo .vm
                self.code_writer.writeCall(name, n_args)
            elif self._identify_value(self.current_line) == '[':
                # Se a linha for um símbolo [ é um acesso ao array
                # Avança a chave [
                self._skipLine()
                # Compila a expressão dentro das chaves
                self.compileExpression()
                # Avança a chave ]
                self._skipLine()

                kind = self.symbol_table.kindOf(name)
                index = self.symbol_table.indexOf(name)
                # Escreve o push do array no arquivo .vm
                self.code_writer.writePush(kind, index)

                self.code_writer.writeArithmetic('+')
                self.code_writer.writePop('pointer', 1)
                self.code_writer.writePush('that', 0)
            else:
                # Faz o push do identifier no arquivo .vm
                kind = self.symbol_table.kindOf(name)
                index = self.symbol_table.indexOf(name)
                self.code_writer.writePush(kind, index)
        elif self._identify_value(self.current_line) == '(':
            # Avança a abertura de expressão (
            self._skipLine()
            # Compila a expressão
            self.compileExpression()
            # Avança o encerramento da expressão )
            self._skipLine()
        elif self._identify_key(self.current_line) == "keyword":
            # Faz o push do valor no arquivo .vm
            value = self._identify_value(self.current_line)
            if value == "true":
                self.code_writer.writePush("constant", 0)
                self.code_writer.writeArithmetic('~')
            elif value == "false":
                self.code_writer.writePush("constant", 0)
            self._skipLine()
        elif self._identify_key(self.current_line) == "stringConstant":
            # Grava a string
            string = self._identify_value(self.current_line)

            # Escreve o tamanho e chama a criação de string no arquivo .vm
            self.code_writer.writePush("constant", len(string))
            self.code_writer.writeCall("String.appendChar", 1)

            # Escreve o código e adiciona cada caracter no arquivo .vm
            for char in string:
                self.code_writer.writePush("constant", ord(char))
                self.code_writer.writeCall("String.appendChar", 2)
        elif self._identify_key(self.current_line) == "integerConstant":
            # Adiciona a constante à pilha
            num = self._identify_value(self.current_line)
            self.code_writer.writePush("constant", num)
            # Avança a linha
            self._skipLine()
        elif self._identify_value(self.current_line) in ['-', '~']:
            # É um operador unário e ainda tem outra parte do termo
            # depois dele, portanto escreve o operador e o próximo termo
            op = self._identify_value(self.current_line)
            op = op if op == '~' else 'neg'
            self._skipLine()
            self.compileTerm()
            self.code_writer.writeArithmetic(op)

    def compileExpressionList(self):
        """
            Compiles a (possibly empty) comma-separated
            list of expressions.
        """
        arguments_count = 0

        while self._identify_value(self.current_line) != ')':
            if self._identify_value(self.current_line) == ',':
                # Avança a vírgula
                self._skipLine()
            else:
                # Compila a expressão
                self.compileExpression()
                # Incrementa a contagem de argumentos
                arguments_count += 1

        return arguments_count
コード例 #11
0
class CompilationEngine:
    def __init__(self, filepath):
        self._tokenizer = JackTokenizer(filepath) 
        self._writer = VMWriter(filepath)
        self._classVariables = SymbolTable()
        self._subroutineVariables = SymbolTable()
        self._currentToken = None
        self._preserveCurrentToken = False
        self._className = ''
        self._currentCompilingFunction = {'kind': '', 'name': ''}
        self._numberConditionalsStatementsCurrentFunction = 0

    def run(self):
        self._compileClass()
        self._writer.close()
        return

    #compile functions 
    def _compileClass(self):
        self._eatObligatory([T_KEYWORD], [K_CLASS])
        self._eatObligatory([T_IDENTIFIER])
        self._className = self._currentToken['value']

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileClassVarDeclarations()
        self._compileSubroutineDeclarations()
        self._eatObligatory([T_SYMBOL], ['}'])
        return

    def _compileClassVarDeclarations(self):
        self._classVariables.startSubroutine()

        while self._eatExpected([T_KEYWORD], [K_STATIC, K_FIELD]):
            kind = VAR_STATIC if self._currentToken['value'] == K_STATIC else VAR_FIELD
            varType, name = self._compileTypedVarDeclaration()
            self._classVariables.insert(name, varType, kind)

            while self._eatExpected([T_SYMBOL], [',']):
                self._eatObligatory([T_IDENTIFIER])
                name = self._currentToken['value']
                self._classVariables.insert(name, varType, kind)

            self._eatObligatory([T_SYMBOL], [';'])
        return

    def _compileSubroutineDeclarations(self):
        while self._eatExpected([T_KEYWORD], [K_CONSTRUCTOR, K_FUNCTION, K_METHOD]):
            self._currentCompilingFunction['kind'] = self._currentToken['value']
            self._subroutineVariables.startSubroutine()

            self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN, K_VOID])
            self._eatObligatory([T_IDENTIFIER])
            self._currentCompilingFunction['name'] = self._currentToken['value']

            self._eatObligatory([T_SYMBOL], ['('])
            self._compileParameterList()
            self._eatObligatory([T_SYMBOL], [')'])
            self._compileSubroutineBody()
        return

    def _compileParameterList(self):
        if self._eatExpected([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]):
            varType = self._currentToken['value']
            self._eatObligatory([T_IDENTIFIER])
            name = self._currentToken['value']
            self._subroutineVariables.insert(name, varType, VAR_ARG)
            
            while self._eatExpected([T_SYMBOL], [',']):
                varType, name = self._compileTypedVarDeclaration()
                self._subroutineVariables.insert(name, varType, VAR_ARG)
        return

    def _compileSubroutineBody(self):
        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileVarDeclaration()

        funcName = self._className + '.' + self._currentCompilingFunction['name']
        nLocalVars = self._subroutineVariables.getVarCountByKind(VAR_LOCAL)
        self._writer.writeFunction(funcName, nLocalVars)

        self._numberConditionalsStatementsCurrentFunction = 0
        if self._currentCompilingFunction['kind'] == K_CONSTRUCTOR: self._compileConstructorCode()
        elif self._currentCompilingFunction['kind'] == K_METHOD: self._compileMethodCode()
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        return

    def _compileVarDeclaration(self):
        while self._eatExpected([T_KEYWORD], [K_VAR]):
            varType, name = self._compileTypedVarDeclaration()
            self._subroutineVariables.insert(name, varType, VAR_LOCAL)
            while self._eatExpected([T_SYMBOL], [',']):
                self._eatObligatory([T_IDENTIFIER])
                name = self._currentToken['value']
                self._subroutineVariables.insert(name, varType, VAR_LOCAL)
            self._eatObligatory([T_SYMBOL], [';'])
        return


    def _compileStatements(self):
        while self._eatExpected([T_KEYWORD], [K_LET, K_IF, K_WHILE, K_DO, K_RETURN]):
            self._compileStatementByKeyword()
        return

    def _compileLetStatement(self):
        self._eatObligatory([T_IDENTIFIER])
        name = self._currentToken['value']
        segment, index = self._searchVariableByName(name)
        
        isArrayAssignment = False
        if self._eatExpected([T_SYMBOL], ['[']):
            self._compileArrayPosition(name)
            isArrayAssignment = True

        self._eatObligatory([T_SYMBOL], ['='])
        self._compileExpression()
        if isArrayAssignment:
            self._writer.writePop(SEGMENT_TEMP, 0)
            self._writer.writePop(SEGMENT_POINTER, 1)
            self._writer.writePush(SEGMENT_TEMP, 0)
            self._writer.writePop(SEGMENT_THAT, 0)
        else:
            self._writer.writePop(segment, index)
        self._eatObligatory([T_SYMBOL], [';'])
        return

    def _compileIfStatement(self):
        funcName = self._className + '.' + self._currentCompilingFunction['name']
        notIfLabel = f'{funcName}_NOT_IF_{self._numberConditionalsStatementsCurrentFunction}' 
        endComparisonLabel = f'{funcName}_END_COMPARISON_BLOCK_{self._numberConditionalsStatementsCurrentFunction}'
        self._numberConditionalsStatementsCurrentFunction += 1
        
        self._eatObligatory([T_SYMBOL], ['('])
        self._compileExpression()
        self._writer.writeArithmetic('not')
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeIf(notIfLabel)

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeGoto(endComparisonLabel)

        self._writer.writeLabel(notIfLabel)
        if self._eatExpected([T_KEYWORD], [K_ELSE]):
            self._eatObligatory([T_SYMBOL], ['{'])
            self._compileStatements()
            self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeLabel(endComparisonLabel)
        return

    def _compileWhileStatement(self):
        funcName = self._className + '.' + self._currentCompilingFunction['name']
        loopLabel = f'{funcName}_LOOP_{self._numberConditionalsStatementsCurrentFunction}' 
        endLoopLabel = f'{funcName}_END_LOOP_{self._numberConditionalsStatementsCurrentFunction}' 
        self._numberConditionalsStatementsCurrentFunction += 1

        self._writer.writeLabel(loopLabel)
        self._eatObligatory([T_SYMBOL], ['('])
        self._compileExpression()
        self._writer.writeArithmetic('not')
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeIf(endLoopLabel)

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeGoto(loopLabel)
        self._writer.writeLabel(endLoopLabel)
        return

    def _compileDoStatement(self):
        self._compileSubroutineCall()
        self._writer.writePop(SEGMENT_TEMP, 0)
        self._eatObligatory([T_SYMBOL], [';'])
        return
    
    def _compileReturnStatement(self):
        if self._eatExpected([T_SYMBOL], [';']):
            self._writer.writePush(SEGMENT_CONST, 0)
        else:
            self._compileExpression()
            self._eatObligatory([T_SYMBOL], [';'])
        self._writer.writeReturn()
        return

    def _compileExpression(self):
        self._compileTerm()
        if self._eatExpected([T_SYMBOL], ['+', '-', '*', '/', '&', '|', '<', '>', '=']):
            operator = self._currentToken['value']
            self._compileTerm()
            self._writer.writeArithmetic(VM_COMMAND_BY_JACK_OPERATOR[operator])
        return

    def _compileTerm(self):
        requiredTypes = [T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD, T_IDENTIFIER, T_SYMBOL]
        requiredValues = [K_TRUE, K_FALSE, K_NULL, K_THIS, '(', '-', '~']
        self._eatObligatory(requiredTypes, requiredValues)
        tokenType = self._currentToken['type']
        
        if tokenType == T_INTEGER_CONSTANT:
            integer = self._currentToken['value']
            self._writer.writePush(SEGMENT_CONST, integer)

        elif tokenType == T_STRING_CONSTANT:
            stringConst = self._currentToken['value'].replace('"', '')
            self._writer.writePush(SEGMENT_CONST, len(stringConst))
            self._writer.writeCall('String.new', 1)
            for char in stringConst:
                self._writer.writePush(SEGMENT_CONST, ord(char))
                self._writer.writeCall('String.appendChar', 2)
        
        elif tokenType == T_KEYWORD:
            constant = self._currentToken['value']
            if constant == K_FALSE or constant == K_NULL:
                self._writer.writePush(SEGMENT_CONST, 0)
            elif constant == K_TRUE:
                self._writer.writePush(SEGMENT_CONST, 1)
                self._writer.writeArithmetic('neg')
            else:
                self._writer.writePush(SEGMENT_POINTER, 0)

        elif tokenType == T_SYMBOL:
            symbol = self._currentToken['value']
            if symbol == '(':
                self._compileExpression()
                self._eatObligatory([T_SYMBOL], [')'])
            else:
                unaryOperation = 'neg' if symbol == '-' else 'not'
                self._compileTerm()
                self._writer.writeArithmetic(unaryOperation)

        elif tokenType == T_IDENTIFIER:
            name = self._currentToken['value']
            if self._eatExpected([T_SYMBOL], ['[', '.', '(']):
                symbol = self._currentToken['value']

                if symbol == '[':
                    self._compileArrayPosition(name)
                    self._writer.writePop(SEGMENT_POINTER, 1)
                    self._writer.writePush(SEGMENT_THAT, 0)
                else:
                    self._preserveCurrentToken = True
                    self._compileSubroutineCall(name)
            else:
                segment, index = self._searchVariableByName(name)
                self._writer.writePush(segment, index)
        return

    def _compileExpressionList(self):
        nArgs = 0
        if not self._eatExpected([T_SYMBOL], [')']):
            self._compileExpression()
            nArgs += 1
            while self._eatExpected([T_SYMBOL], [',']):
                self._compileExpression()
                nArgs += 1
        self._preserveCurrentToken = True
        return nArgs

    #aux compile functions
    def _compileTypedVarDeclaration(self):
        self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN])
        varType = self._currentToken['value']
        self._eatObligatory([T_IDENTIFIER])
        name = self._currentToken['value']
        return varType, name

    def _compileStatementByKeyword(self):
        COMPILE_FUNCTION_BY_KEYWORD = {
            K_LET : self._compileLetStatement,
            K_IF : self._compileIfStatement,
            K_WHILE : self._compileWhileStatement,
            K_DO: self._compileDoStatement,
            K_RETURN : self._compileReturnStatement
        }

        keyword = self._currentToken['value']
        COMPILE_FUNCTION_BY_KEYWORD[keyword]()
        return

    def _compileSubroutineCall(self, name = None):
        if name is None:
            self._eatObligatory([T_IDENTIFIER])
            name = self._currentToken['value']
        
        nArgs = 0
        if self._eatExpected([T_SYMBOL], ['.']):
            self._eatObligatory([T_IDENTIFIER])
            funcName = self._currentToken["value"]
            varInfo = self._searchVariableByName(name)
            if varInfo is not None:
                segment, index = varInfo
                self._writer.writePush(segment, index)
                nArgs += 1
            else: 
                funcName = f'{name}.{funcName}'
        else:
            funcName = name
                
        self._eatObligatory([T_SYMBOL], ['('])
        nArgs += self._compileExpressionList()
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeCall(funcName, nArgs)
        return

    def _compileConstructorCode(self):
        nArgs = self._subroutineVariables.getVarCountByKind(VAR_ARG)
        self._writer.writePush(SEGMENT_CONST, nArgs)
        self._writer.writeCall('Memory.alloc', 1)
        self._writer.writePop(SEGMENT_POINTER, 0)
        return

    def _compileMethodCode(self):
        self._writer.writePush(SEGMENT_ARG, 0)
        self._writer.writePop(SEGMENT_POINTER, 0)
        return

    def _compileArrayPosition(self, arrName):
        arrayBaseAddr = self._searchVariableByName(arrName)
        segment, index = arrayBaseAddr

        self._writer.writePush(segment, index)
        self._compileExpression()
        self._writer.writeArithmetic('add')

        self._eatObligatory([T_SYMBOL], [']'])
        return

    #aux functions
    def _eatObligatory(self, requiredTokenTypes, requiredTokenValues = []):
        if not self._preserveCurrentToken and not self._tokenizer.hasMoreTokens():
            self._writer.writeCompilationError('MORE TOKENS EXPECTED!')
            exit(1)
            
        if self._preserveCurrentToken:
            self._preserveCurrentToken = False
        else:
            self._currentToken = self._tokenizer.advance()
        
        if (self._currentToken['type'] not in requiredTokenTypes or 
            (self._currentToken['type'] in TOKEN_TYPES_WITH_EXPECTABLE_VALUES and 
            len(requiredTokenValues) > 0 and self._currentToken['value'] not in requiredTokenValues)):
                self._writer.writeCompilationError(f'SYNTAX ERROR!')
                self._writer.writeCompilationError(f'TOKEN GIVEN: {self._currentToken}')
                self._writer.writeCompilationError(f'EXPECTED: {requiredTokenValues} in {requiredTokenTypes}')
                exit(1)
        return

    def _eatExpected(self, expectedTokenTypes, expectedTokenValues = []):
        self._currentToken = self._currentToken if self._preserveCurrentToken else self._tokenizer.advance()
        ateExpected = (self._currentToken['type'] in expectedTokenTypes and 
                (self._currentToken['type'] not in TOKEN_TYPES_WITH_EXPECTABLE_VALUES or 
                len(expectedTokenValues) == 0 or self._currentToken['value'] in expectedTokenValues)) 
        self._preserveCurrentToken = not ateExpected
        return ateExpected

    def _searchVariableByName(self, name):
        subroutineVar = self._subroutineVariables.getByName(name)
        if subroutineVar is not None:
            return subroutineVar['segment'], subroutineVar['index']
        classVar = self._classVariables.getByName(name)
        if classVar is not None:
            return classVar['segment'], classVar['index']
        return None 
コード例 #12
0
class CompilationEngine:
    """
    Effects the actual compilation output. Gets its input from a JackTokenizer
    and emits its parsed structure into an output file/stream.
    """

    INDENT = "  "

    def __init__(self, jackFile, vmFile, DEBUG=False):
        """
        Creates a new compilation engine with the given input and output. The
        next routine called must be compileClass().
        """
        self.tokenizer = JackTokenizer(jackFile)  # , DEBUG=DEBUG)
        self.DEBUG = DEBUG

        # Indentation level
        self.indentLevel = 0

        # Counters for while loops and if statements
        self.whileCounter = self.ifCounter = 0

        # Initialize the symbol table
        self.symtab = SymbolTable(DEBUG=True)

        # Initialize the VM writer
        self.writer = VMWriter(vmFile, DEBUG=True)

    def compileClass(self):
        """
        Compiles a complete class.
        """
        self.emit(xml="<class>")

        # Alias self.tokenizer to make code more compact
        t = self.tokenizer

        # Verify that there is a token to read and advance to it
        if t.hasMoreTokens():
            # Advance to the next token
            t.advance()
        else:
            # If not, we're done.
            return

        self.eatAndEmit("keyword", ["class"])
        (_, self.thisClass) = self.eatAndEmit(
            "identifier", category="CLASS", state="DEFINE"
        )
        self.eatAndEmit("symbol", ["{"])

        # Expect zero or more classVarDecs. Count the fields defined.
        self.nFields = 0
        while t.tokenType() == "keyword" and t.keyWord() in ["static", "field"]:
            kw = t.keyWord()
            count = self.compileClassVarDec()

            # Count the fields to determine the size of the object
            if kw == "field":
                self.nFields += count

        # Expect zero or more subroutineDecs
        while t.tokenType() == "keyword" and t.keyWord() in [
            "constructor",
            "function",
            "method",
        ]:
            self.compileSubroutine()

        self.eatAndEmit("symbol", ["}"])
        self.emit(xml="</class>")

        # Should not be any more input
        if self.tokenizer.hasMoreTokens():
            raise SyntaxError(
                "Token after end of class: " + self.tokenizer.currentToken
            )

        # Close the VMWriter
        self.writer.close()

    def compileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        Should only be called if keyword static or keyword field is the current
        token.
        """
        self.emit(xml="<classVarDec>")

        # Need to save the variable kind for the symbol table
        token = self.eat("keyword", ["static", "field"])
        (_, varKind) = token
        varKind = varKind.upper()
        self.emit(token=token)

        # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a
        # className (identifier).
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
        else:
            (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE")

        self.eatAndEmit("identifier", category=varKind, varType=varType, state="DEFINE")
        count = 1

        # Expect an optional list of identifiers.
        while t.tokenType() == "symbol" and t.symbol() == ",":
            self.eatAndEmit("symbol", [","])
            self.eatAndEmit("identifier", category=varKind, state="DEFINE")
            count += 1

        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</classVarDec>")

        return count

    def compileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        Should only be called if the current token is one of 'constructor',
        'function', or 'method'.
        """
        self.emit(xml="<subroutineDec>")
        (_, kw) = self.eatAndEmit("keyword", ["constructor", "function", "method"])

        # Reset the subroutine symbol table
        self.symtab.startSubroutine()

        # If this is a method, seed the symbol table with "this" as argument 0
        if kw == "method":
            self.symtab.define("this", self.thisClass, "ARG")

        # Expect 'void' or a type: one of the keywords 'int', 'char', or
        # 'boolean', or a className (identifier).
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            self.eatAndEmit("keyword", ["void", "int", "char", "boolean"])
        else:
            self.eatAndEmit("identifier", category="CLASS", state="USE")

        (_, functionName) = self.eatAndEmit(
            "identifier", category="SUBROUTINE", state="DEFINE"
        )

        self.eatAndEmit("symbol", ["("])
        self.compileParameterList()
        self.eatAndEmit("symbol", [")"])
        self.emit(xml="<subroutineBody>")
        self.eatAndEmit("symbol", ["{"])

        # Expect varDec*. Count the number of local variables.
        nLocals = 0
        while t.tokenType() == "keyword" and t.keyWord() == "var":
            nLocals += self.compileVarDec()

        # Generate the VM code to start the function.
        self.writer.writeFunction("{}.{}".format(self.thisClass, functionName), nLocals)

        # If this subroutine is a constructor, allocate memory for the new object and set the base of the this segment
        if kw == "constructor":
            self.writer.writePush("CONST", self.nFields)
            self.writer.writeCall("Memory.alloc", 1)
            self.writer.writePop("POINTER", 0)

        # If this subroutine is a method, set the base of the this segment
        if kw == "method":
            self.writer.writePush("ARG", 0)
            self.writer.writePop("POINTER", 0)

        # Compile the code of the function
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])
        self.emit(xml="</subroutineBody>")
        self.emit(xml="</subroutineDec>")

    def compileParameterList(self):
        """
        Compiles a (possibly empty) parameter list, not including the
        enclosing '( )'.
        """
        self.emit(xml="<parameterList>")

        # Alias for tokenizer
        t = self.tokenizer

        # Get the current token type
        tType = t.tokenType()

        # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a
        # className (identifier).
        finished = False
        while not finished and tType in ["keyword", "identifier"]:
            if tType == "keyword":
                (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
            else:
                (_, varType) = self.eatAndEmit(
                    "identifier", category="CLASS", state="USE"
                )

            self.eatAndEmit(
                "identifier", category="ARG", state="DEFINE", varType=varType
            )

            # Look for a ',' symbol
            if t.tokenType() == "symbol" and t.symbol() == ",":
                # If found, eat it
                self.eatAndEmit("symbol", [","])

                # Get the next token type
                tType = t.tokenType()
            else:
                finished = True

        self.emit(xml="</parameterList>")

    def compileVarDec(self):
        """
        Compiles a var declaration.
        """
        self.emit(xml="<varDec>")
        self.eatAndEmit("keyword", ["var"])

        # Expect a type for the variable: one of the keywords 'int', 'char',
        # or 'boolean', or a className (identifier). Save the variable type.
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
        else:
            (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE")

        self.eatAndEmit("identifier", category="VAR", state="DEFINE", varType=varType)
        nVars = 1

        # Expect an optional list of identifiers.
        while t.tokenType() == "symbol" and t.symbol() == ",":
            self.eatAndEmit("symbol", [","])
            self.eatAndEmit(
                "identifier", category="VAR", state="DEFINE", varType=varType
            )
            nVars += 1

        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</varDec>")

        return nVars

    def compileStatements(self):
        """
        Compiles a sequence of statements, not including the enclosing
        '{ }'.
        """
        self.emit(xml="<statements>")

        t = self.tokenizer
        while t.tokenType() == "keyword":
            keyword = t.keyWord()
            if keyword == "do":
                self.compileDo()
            elif keyword == "let":
                self.compileLet()
            elif keyword == "while":
                self.compileWhile()
            elif keyword == "return":
                self.compileReturn()
            elif keyword == "if":
                self.compileIf()
            else:
                raise SyntaxError(
                    "Expected statement. Found {}.".format(t.currentToken)
                )

        self.emit(xml="</statements>")

    def compileDo(self):
        """
        Compiles a do statement.
        """
        self.emit(xml="<doStatement>")
        self.eatAndEmit("keyword", ["do"])

        # Eat the identifier. Can't emit until we know if this is a class or a subroutine.
        token = self.eat("identifier")
        (_, ident) = token

        # Check for a '.', which indicates a method call
        t = self.tokenizer
        if t.tokenType() == "symbol" and t.symbol() == ".":
            self.eatAndEmit("symbol", ["."])
            # Previous token was an object or a class. Check symbol table.
            objType = self.symtab.typeOf(ident)
            if objType:
                # ident is an object, so method is objType.method, and the object must be loaded into this as argument 0
                self.emit(token=token, category=self.symtab.kindOf(ident), state="USE")

                # subroutine starts with the class type
                subroutine = objType

                # Add an argument to the stack for "this"
                nArgs = 1
                kind = self.symtab.kindOf(ident)
                index = self.symtab.indexOf(ident)
                self.writer.writePush(kind, index)
            else:
                # ident is a class, so method is ident.method and there is no this
                self.emit(token=token, category="CLASS", state="USE")
                subroutine = ident
                nArgs = 0

            methodToken = self.eat("identifier")
            (_, method) = methodToken
            self.emit(token=methodToken, category="METHOD", state="USE")
            subroutine += "." + method
        else:
            # Bare subroutine calls are assumed to be methods of the current class
            self.emit(token=token, category="SUBROUTINE", state="USE")
            subroutine = self.thisClass + "." + ident

            # Add "this" to the stack
            nArgs = 1
            self.writer.writePush("POINTER", 0)

        self.eatAndEmit("symbol", ["("])
        nArgs += self.compileExpressionList()
        self.eatAndEmit("symbol", [")"])
        self.eatAndEmit("symbol", [";"])

        # Call the desired subroutine and consume the returned value
        self.writer.writeCall(subroutine, nArgs)
        self.writer.writePop("TEMP", 0)

        self.emit(xml="</doStatement>")

    def compileLet(self):
        """
        Compiles a let statement.
        """
        self.emit(xml="<letStatement>")
        self.eatAndEmit("keyword", ["let"])
        (_, varName) = self.eatAndEmit("identifier", category="LET", state="USE")

        # Look up the variable in the symbol table
        varKind = self.symtab.kindOf(varName)
        varIndex = self.symtab.indexOf(varName)

        # Check for array qualifier
        t = self.tokenizer
        arrayRef = False
        if t.tokenType() == "symbol" and t.symbol() == "[":
            # Compute the offset
            self.eatAndEmit("symbol", "[")
            self.compileExpression()
            self.eatAndEmit("symbol", ["]"])

            # Add the offset to the base. Leave the result on the stack.
            self.writer.writePush(varKind, varIndex)
            self.writer.writeArithmetic("+")
            arrayRef = True

        self.eatAndEmit("symbol", ["="])
        self.compileExpression()
        self.eatAndEmit("symbol", [";"])

        # Value to save is at the top of the stack.
        if not arrayRef:
            # Direct POP
            self.writer.writePop(varKind, varIndex)
        else:
            # Array reference. Save value temporarily while setting THAT.
            self.writer.writePop("TEMP", 0)
            self.writer.writePop("POINTER", 1)
            self.writer.writePush("TEMP", 0)
            self.writer.writePop("THAT", 0)

        self.emit(xml="</letStatement>")

    def compileWhile(self):
        """
        Compiles a while statement.
        """
        self.emit(xml="<whileStatement>")
        self.eatAndEmit("keyword", ["while"])
        
        whileInstance = self.whileCounter
        self.whileCounter += 1
        self.writer.writeLabel("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance))

        self.eatAndEmit("symbol", ["("])
        self.compileExpression()
        self.eatAndEmit("symbol", [")"])

        self.writer.writeArithmetic("U~")
        self.writer.writeIf("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance))

        self.eatAndEmit("symbol", ["{"])
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])

        self.writer.writeGoto("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance))
        self.writer.writeLabel("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance))

        self.emit(xml="</whileStatement>")

    def compileReturn(self):
        """
        Compiles a return statement.
        """
        self.emit(xml="<returnStatement>")
        self.eatAndEmit("keyword", ["return"])

        # If not a ';', expect an expression
        t = self.tokenizer
        if not (t.tokenType() == "symbol" and t.symbol() == ";"):
            # Expect an expression
            self.compileExpression()
        else:
            # void function, so force a 0 onto the stack to return
            self.writer.writePush("CONST", 0)

        self.writer.writeReturn()
        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</returnStatement>")

    def compileIf(self):
        """
        Compiles an if statement, possibly with a trailing else
        clause.
        """
        self.emit(xml="<ifStatement>")
        self.eatAndEmit("keyword", ["if"])
        self.eatAndEmit("symbol", ["("])
        self.compileExpression()
        self.eatAndEmit("symbol", [")"])

        self.writer.writeArithmetic("U~")
        ifInstance = self.ifCounter
        self.ifCounter += 1
        self.writer.writeIf("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))

        self.eatAndEmit("symbol", ["{"])
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])

        t = self.tokenizer
        if t.tokenType() == "keyword" and t.keyWord() == "else":
            self.writer.writeGoto("IF.{}.{}.EXIT".format(self.thisClass, ifInstance))
            self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))

            self.eatAndEmit("keyword", ["else"])
            self.eatAndEmit("symbol", ["{"])
            self.compileStatements()
            self.eatAndEmit("symbol", ["}"])
            self.writer.writeLabel("IF.{}.{}.EXIT".format(self.thisClass, ifInstance))
        else:
            self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))


        self.emit(xml="</ifStatement>")

    def compileExpression(self):
        """
        Compiles an expression.
        """
        self.emit(xml="<expression>")
        self.compileTerm()

        # Look for operator-term pairs
        t = self.tokenizer
        ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="]
        while t.tokenType() == "symbol" and t.symbol() in ops:
            (_, op) = self.eatAndEmit("symbol", ops)
            self.compileTerm()
            self.writer.writeArithmetic(op)

        self.emit(xml="</expression>")

    def compileTerm(self):
        """
        Compiles a term. This routine is faced with a slight difficulty when
        trying to decide between some of the alternative parsing rules.
        Specifically, if the current token is an identifier, the routine must
        distinguish between a variable, an array entry, and a subroutine call.
        A single lookahead token, which may be one of '[', '(', or '.',
        suffices to distinguish between the three possibilities. Any other
        token is not part of this term and should not be advanced over.
        """
        self.emit(xml="<term>")

        # Get the current token type
        t = self.tokenizer
        tType = t.tokenType()

        # Integer constant
        if tType == "integerConstant":
            (_, value) = self.eatAndEmit("integerConstant")
            self.writer.writePush("CONST", value)
        # String constant
        elif tType == "stringConstant":
            (_, value) = self.eatAndEmit("stringConstant")
            # Declare space for the string
            self.writer.writePush("CONST", len(value))
            self.writer.writeCall("String.new", 1)
            # Save the contents of the string
            for c in value:
                self.writer.writePush("CONST", ord(c))
                self.writer.writeCall("String.appendChar", 2)
        # Keyword constant
        elif tType == "keyword" and t.keyWord() in ["true", "false", "null", "this"]:
            (_, kw) = self.eatAndEmit("keyword", ["true", "false", "null", "this"])
            if kw in ["null", "false"]:
                # Map to 0
                self.writer.writePush("CONST", 0)
            elif kw == "true":
                # Map to -1
                self.writer.writePush("CONST", 1)
                self.writer.writeArithmetic("U-")  # NEG
            else:
                # this
                self.writer.writePush("POINTER", 0)
        # Identifier (varName, or array name, or subroutine call)
        elif tType == "identifier":
            (_, ident) = self.eatAndEmit("identifier", category="TERM", state="USE")
            if t.tokenType() == "symbol":
                symbol = t.symbol()
                if symbol == "[":
                    # Array reference
                    # ident is the array name
                    # Compute the offset
                    self.eatAndEmit("symbol", ["["])
                    self.compileExpression()
                    self.eatAndEmit("symbol", ["]"])
                    # Add base to offset
                    self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident))
                    self.writer.writeArithmetic("+")
                    # Update THAT and retrieve
                    self.writer.writePop("POINTER", 1)
                    self.writer.writePush("THAT", 0)
                elif symbol == "(":
                    # Subroutine call
                    # ident is the subroutine.
                    self.eatAndEmit("symbol", ["("])
                    nArgs = self.compileExpressionList()
                    self.eatAndEmit("symbol", [")"])
                    self.writer.writeCall(ident, nArgs)
                elif symbol == ".":
                    # Method call.
                    # ident is the class name (static method) or the object which will be argument 0 (this).

                    # Look up the object's type in the symbol table. If not found, then it is a class name and there is no object to be "this".
                    objType = self.symtab.typeOf(ident)
                    nArgs = 0
                    if objType is not None:
                        # Push this onto stack as argument 0
                        self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident))
                        nArgs = 1
                    else:
                        # ident is the class name, so use it
                        objType = ident

                    self.eatAndEmit("symbol", ["."])
                    (_, method) = self.eatAndEmit(
                        "identifier", category="SUBROUTINE", state="USE"
                    )
                    self.eatAndEmit("symbol", ["("])
                    nArgs += self.compileExpressionList()
                    self.eatAndEmit("symbol", [")"])
                    self.writer.writeCall(objType + "." + method, nArgs)
                else:
                    # Next token not a symbol, so ident is a simple variable identifier.
                    varKind = self.symtab.kindOf(ident)
                    varIndex = self.symtab.indexOf(ident)
                    self.writer.writePush(varKind, varIndex)
        # Sub-expression
        elif tType == "symbol" and t.symbol() == "(":
            self.eatAndEmit("symbol", ["("])
            self.compileExpression()
            self.eatAndEmit("symbol", [")"])
        # Unary op and term
        elif tType == "symbol" and t.symbol() in ["-", "~"]:
            (_, op) = self.eatAndEmit("symbol", ["-", "~"])
            self.compileTerm()
            # Mark as unary to get right version of '-'
            self.writer.writeArithmetic("U" + op)
        else:
            # Not a term
            raise SyntaxError("Expected term, found {}.".format(t.currentToken))

        self.emit(xml="</term>")

    def compileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        Returns the number of expressions compiled.
        """
        self.emit(xml="<expressionList>")

        # Get the initial token type
        t = self.tokenizer
        tType = t.tokenType()

        # Count the expressions in the list
        nExpressions = 0

        # Closing parenthesis ends the list
        while not (tType == "symbol" and t.symbol() == ")"):
            nExpressions += 1
            self.compileExpression()

            # Expect an optional ','
            if t.tokenType() == "symbol" and t.symbol() == ",":
                self.eatAndEmit("symbol", [","])

            # Update the tType
            tType = t.tokenType()

        self.emit(xml="</expressionList>")

        return nExpressions

    def eat(self, tokenType, tokenVals=None):
        """
        Consume the current token if it matches the expected type and value.
        """
        # Get the type and value of the current token
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            tVal = t.keyWord()
        elif tType == "symbol":
            tVal = t.symbol()
        elif tType == "identifier":
            tVal = t.identifier()
        elif tType == "integerConstant":
            tVal = t.intVal()
        else:  # tType == 'stringConstant'
            tVal = t.stringVal()

        # Verify that the type matches and the value is one of the values
        # expected.
        if not (tType == tokenType and (not tokenVals or tVal in tokenVals)):
            raise SyntaxError(
                "Expected {} {}. Found {}.".format(
                    tokenType, " or ".join(tokenVals or []), t.currentToken
                )
            )

        if t.hasMoreTokens():
            t.advance()

        # Return the actual token type and value
        return (tType, tVal)

    def emit(self, token=None, category=None, state=None, varType=None, xml=None):
        """
        Emit the provided XML or token as XML to the xmlFile.
        Will indent based on the current indentLevel.
        """
        # If XML code not provided, create it from the token type and value
        if not xml:
            (tokenType, tokenVal) = token

            # Handle symbol table additions/lookups
            index = None
            if state == "DEFINE" and category in ["STATIC", "FIELD", "ARG", "VAR"]:
                index = self.symtab.define(tokenVal, varType, category)

            if state == "USE" and category in ["LET", "TERM"]:
                category = self.symtab.kindOf(tokenVal)
                if category:
                    varType = self.symtab.typeOf(tokenVal)
                    index = self.symtab.indexOf(tokenVal)
                else:
                    category = "CLASS OR SUBROUTINE"

            # Define additional output fields
            fields = ""
            if category is not None:
                fields += " category={}".format(category)
            if state is not None:
                fields += " state={}".format(state)
            if varType is not None:
                fields += " varType={}".format(varType)
            if index is not None:
                fields += " index={}".format(index)

            xml = "<{0}{2}>{1}</{0}>".format(
                tokenType, self.xmlProtect(tokenVal), fields
            )

        else:
            # If the XML starts with '</', reduce the indent level
            if xml[:2] == "</":
                self.indentLevel = self.indentLevel - 1

        # Output the XML, indented to the current level
        output = "{}{}\n".format(self.INDENT * self.indentLevel, xml)
        self.writer.writeComment(output)
        if self.DEBUG:
            print(output, end="")

        # If the XML does not contain '</', increase the indent level
        if "</" not in xml:
            self.indentLevel = self.indentLevel + 1

    def eatAndEmit(
        self, tokenType, tokenVals=None, category=None, state=None, varType=None
    ):
        """
        Shorthand for common pattern of eat and emit. Returns the token eaten.
        """
        token = self.eat(tokenType, tokenVals)
        self.emit(token=token, category=category, state=state, varType=varType)

        # Return the token in case the caller wants it
        return token

    def xmlProtect(self, token):
        # Protect <, >, and & tokens from XML
        if token == "<":
            return "&lt;"
        elif token == ">":
            return "&gt;"
        elif token == "&":
            return "&amp;"
        else:
            return token
コード例 #13
0
class CompilationEngine():

    op = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

    def __init__(self, input, output):

        print('Opened ' + input + ' for compiling.')

        self.input = input

        # Instantiate different modules
        self.tokenizer = JackTokenizer(input)
        self.symbolTable = SymbolTable()
        self.vmWriter = VMWriter(output)

        # Unique number - used for labels
        self.uniqueNo = -1

        # Load up the first token
        self.tokenizer.advance()

        # Call compileClass to start the compilation
        self.compileClass()

    def subTag(self, _tag):
        print('Subtag encountered - fix this')
        raise NameError
        sys.exit()

    def subTagIdentifier(self, name, category, new, kind, index):
        print('Subtag encountered - fix this')
        raise NameError
        sys.exit()

    def getUniqueNo(self):
        self.uniqueNo += 1
        return str(self.uniqueNo)

    def compileClass(self):
        # Current token assumed to be the CLASS keyword

        # Keyword: class
        self.tokenizer.advance()

        # Identifier: class name
        # Classes are not entered into symboltable
        self.className = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # classVarDec or Subroutine
        while not self.tokenizer.rawToken(
        ) == '}':  # Access token directly to circumvent error checking
            if self.tokenizer.keyWord() in ['STATIC', 'FIELD']:
                self.compileClassVarDec()
            elif self.tokenizer.keyWord() in [
                    'CONSTRUCTOR', 'FUNCTION', 'METHOD'
            ]:
                self.compileSubroutine()

        # Symbol: }
        # Do not advance, we are done

        self.close()

    def close(self):
        self.vmWriter.close()
        self.tokenizer.close()

        print('Finished compiling ' + self.input + '.')

    def compileClassVarDec(self):
        # Current token assumed to be the STATIC or FIELD keyword

        # Keyword: STATIC or FIELD
        if self.tokenizer.keyWord() == 'FIELD':
            _kind = 'FIELD'
        elif self.tokenizer.keyWord() == 'STATIC':
            _kind = 'STATIC'
            raise NotImplementedError
        self.tokenizer.advance()

        # Keyword: type | identifier (if class)
        try:
            _type = self.tokenizer.keyWord()
        except TokenTypeError:
            _type = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Identifier: varName
        # Declare in symboltable
        self.symbolTable.define(self.tokenizer.identifier(), _type, _kind)
        self.tokenizer.advance()

        # Compile any other varDecs on the same line (of the same type)
        while self.tokenizer.symbol() == ',':
            self.tokenizer.advance()

            # Identifier: varName
            # Declare in symboltable
            self.symbolTable.define(self.tokenizer.identifier(), _type, _kind)
            self.tokenizer.advance()

        # Symbol: ;
        self.tokenizer.advance()

    def compileSubroutine(self):
        # Current token assumed to be keyword: constructor | function | method

        # Create new subroutine scoped symbol table
        self.symbolTable.startSubroutine()

        # Keyword: constructor | function | method
        subroutineKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # Keyword: void | type | identifier (if class)
        self.tokenizer.advance()

        # Identifier: subroutineName
        subroutineName = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # Program structure: ParameterList
        self.compileParameterList()

        # Symbol: )
        self.tokenizer.advance()

        ### START SUBROUTINE BODY ###

        # Symbol: {
        self.tokenizer.advance()

        # subroutineBody: varDecs
        while self.tokenizer.keyWord() == 'VAR':
            self.compileVarDec()

        # Write vm code function declaration
        # This is done 'late' so that we can get nLocals (noting that varDec() does not actually write vm code)
        self.vmWriter.writeFunction(self.className + '.' + subroutineName,
                                    self.symbolTable.varCount('LOCAL'))

        if subroutineKind == 'CONSTRUCTOR':
            # Alloc() required space (as determined by number of class variables)
            self.vmWriter.writePush('constant',
                                    self.symbolTable.varCount('FIELD'))
            self.vmWriter.writeCall('Memory.alloc', 1)

            # pop return value of alloc() to THIS (effectively pointing it to start of allocated object memory)
            self.vmWriter.writePop('pointer', 0)

        elif subroutineKind == 'METHOD':
            # Set 'this' pointer by pushing first argument and popping to pointer 0
            self.vmWriter.writePush('argument', 0)
            self.vmWriter.writePop('pointer', 0)

        # subroutineBody: Statements
        self.compileStatements()

        # Symbol: }
        self.tokenizer.advance()

        ### END SUBROUTINE BODY ###

    def compileParameterList(self):
        # assume pointer is on keyword: type of first parameter OR symbol: ( if no parameters

        if self.tokenizer.rawToken() is not ')':
            run_once = True
            while self.tokenizer.rawToken() == ',' or run_once == True:

                if run_once == False:
                    # Symbol: ,
                    self.tokenizer.advance()

                # Keyword: type
                _type = self.tokenizer.keyWord()
                self.tokenizer.advance()

                # Identifier: varName
                # Declare in symboltable
                self.symbolTable.define(self.tokenizer.identifier(), _type,
                                        'ARGUMENT')
                self.tokenizer.advance()

                run_once = False

    def compileVarDec(self):
        # assume pointer is on keyword: var

        # Keyword: var
        self.tokenizer.advance()

        # Keyword: type | identifier (if class)
        try:
            _type = self.tokenizer.keyWord()
        except TokenTypeError:
            _type = self.tokenizer.identifier()
        finally:
            self.tokenizer.advance()

        # Identifier: varName
        # Define in symboltable - note that no actual VM code is required here
        self.symbolTable.define(self.tokenizer.identifier(), _type, 'LOCAL')
        self.tokenizer.advance()

        # Further varNames
        while self.tokenizer.symbol() == ',':
            # Symbol: ,
            self.tokenizer.advance()

            # Identifier: varName
            self.symbolTable.define(self.tokenizer.identifier(), _type,
                                    'LOCAL')
            self.tokenizer.advance()

        # Symbol: ;
        self.tokenizer.advance()

    def compileStatements(self):
        # assume token is keyword: let | if | while | do | return

        # note: each of the nested compile methods call tokenizer.advance() at the end,
        # so no need to call it here

        while self.tokenizer.rawToken() is not '}':
            if self.tokenizer.keyWord() == 'LET':
                self.compileLet()
            elif self.tokenizer.keyWord() == 'IF':
                self.compileIf()
            elif self.tokenizer.keyWord() == 'WHILE':
                self.compileWhile()
            elif self.tokenizer.keyWord() == 'DO':
                self.compileDo()
            elif self.tokenizer.keyWord() == 'RETURN':
                self.compileReturn()
            else:
                raise TokenTypeError('Statement keyword',
                                     self.tokenizer.tokenType(),
                                     self.tokenizer.rawToken(),
                                     self.tokenizer.lineNo)

    def compileSubroutineCall(self):

        # Identifier: subroutineName or (className | varName)

        # Check symboltable to see if this is an instantiated class object
        # If so, we need to retrieve the object type to be able to call the method
        if self.symbolTable.typeOf(self.tokenizer.identifier()):
            # This is a declared variable, so assume instantiated class object
            targetObject = self.tokenizer.identifier()
            subroutineName = self.symbolTable.typeOf(targetObject)
        else:
            # Not declared, assume we are calling it on the class directly
            subroutineName = self.tokenizer.identifier()
            targetObject = None
        self.tokenizer.advance()

        thisArg = 0

        # Symbol: . (indicating format of className.subroutineName) or ( (indicating format of subroutineName)
        if self.tokenizer.symbol() == ".":
            subroutineName += self.tokenizer.symbol()
            self.tokenizer.advance()

            # Identifier: subroutineName
            subroutineName += self.tokenizer.identifier()

            # Push object pointer (if it exists) to top of stack so that it is available for methods
            if targetObject is not None and self.symbolTable.kindOf(
                    targetObject):
                if self.symbolTable.kindOf(targetObject) == 'field':
                    self.vmWriter.writePush(
                        'this', self.symbolTable.indexOf(targetObject))
                else:
                    self.vmWriter.writePush(
                        self.symbolTable.kindOf(targetObject),
                        self.symbolTable.indexOf(targetObject))
                thisArg = 1
            self.tokenizer.advance()

        elif self.tokenizer.symbol() == '(':
            # We are calling a method from a method within the same class, so push the class pointer to stack for first arg
            self.vmWriter.writePush('pointer', 0)
            thisArg = 1

            # Also append className to start so that we have a complete vm function name
            subroutineName = self.className + '.' + subroutineName

        # Symbol: (
        self.tokenizer.advance()

        nArgs = self.compileExpressionList()

        # Symbol: )
        self.tokenizer.advance()

        # Write function call
        self.vmWriter.writeCall(subroutineName, nArgs + thisArg)

    def compileDo(self):

        # Keyword: Do
        self.tokenizer.advance()

        self.compileSubroutineCall()

        # Symbol: ;
        self.tokenizer.advance()

    def compileLet(self):

        # Keyword: let
        self.tokenizer.advance()

        # identifier: varName
        varName = self.tokenizer.identifier()
        self.tokenizer.advance()

        # index if applicable
        if self.tokenizer.symbol() == '[':

            # Symbol: [
            self.tokenizer.advance()

            # Expression
            self.compileExpression()

            # Symbol: ]
            self.tokenizer.advance()

        # Symbol: =
        self.tokenizer.advance()

        # Expression
        self.compileExpression()

        # Symbol: ;
        self.tokenizer.advance()

        # Write VM code - pop from top of stack to variable
        if self.symbolTable.kindOf(varName) == 'field':
            self.vmWriter.writePop('this', self.symbolTable.indexOf(varName))
        else:
            self.vmWriter.writePop(self.symbolTable.kindOf(varName),
                                   self.symbolTable.indexOf(varName))

    def compileWhile(self):

        # Get a new unique number
        uniqueNo = self.getUniqueNo()

        # Keyword: while
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # startWhile label
        self.vmWriter.writeLabel('startWhile' + uniqueNo)

        # Expression
        self.compileExpression()

        # Jump if expression is FALSE
        # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value)
        self.vmWriter.writePush('constant', 1)
        self.vmWriter.writeArithmetic('ADD')
        self.vmWriter.writeIf('endWhile' + uniqueNo)

        # Symbol: )
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # Statements
        self.compileStatements()

        # Jump to startWhile
        self.vmWriter.writeGoto('startWhile' + uniqueNo)

        # endWhile label
        self.vmWriter.writeLabel('endWhile' + uniqueNo)

        # Symbol: }
        self.tokenizer.advance()

    def compileReturn(self):

        # Keyword: return
        self.tokenizer.advance()

        # Symbol: ; or expression then ;
        if self.tokenizer.rawToken() is not ';':
            self.compileExpression()
        else:
            # No return value - push constant 0
            self.vmWriter.writePush('constant', 0)

        self.tokenizer.advance()

        # Write return
        self.vmWriter.writeReturn()

    def compileIf(self):

        # Get new unique no
        uniqueNo = self.getUniqueNo()

        # Keyword: if
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # Expression
        self.compileExpression()

        # Jump if expression is FALSE
        # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value)
        self.vmWriter.writePush('constant', 1)
        self.vmWriter.writeArithmetic('ADD')
        self.vmWriter.writeIf('startElse' + uniqueNo)

        # Symbol: )
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # Statements
        self.compileStatements()

        # Symbol: }
        self.tokenizer.advance()

        self.vmWriter.writeGoto('endIf' + uniqueNo)

        self.vmWriter.writeLabel('startElse' + uniqueNo)

        try:
            if self.tokenizer.keyWord() == 'ELSE':

                # keyword: else
                self.tokenizer.advance()

                # symbol: {
                self.tokenizer.advance()

                # Compile statements
                self.compileStatements()

                # symbol: }
                self.tokenizer.advance()
        except TokenTypeError:
            pass

        self.vmWriter.writeLabel('endIf' + uniqueNo)

    def compileExpression(self):
        # Term
        self.compileTerm()

        while self.tokenizer.symbol() in CompilationEngine.op:

            # Symbol: op
            # Save for writing later
            op = self.tokenizer.symbol()
            self.tokenizer.advance()

            # Term
            self.compileTerm()

            # Write op
            if op == '+':
                self.vmWriter.writeArithmetic('ADD')
            elif op == '-':
                self.vmWriter.writeArithmetic('SUB')
            elif op == '=':
                self.vmWriter.writeArithmetic('EQ')
            elif op == '>':
                self.vmWriter.writeArithmetic('GT')
            elif op == '<':
                self.vmWriter.writeArithmetic('LT')
            elif op == '&':
                self.vmWriter.writeArithmetic('AND')
            elif op == '|':
                self.vmWriter.writeArithmetic('OR')
            elif op == '~':
                self.vmWriter.writeArithmetic('NOT')
            elif op == '*':
                self.vmWriter.writeCall('Math.multiply', 2)

    def compileTerm(self):

        tokenType = self.tokenizer.tokenType()

        if tokenType == 'INT_CONST':

            # Integer constant
            self.vmWriter.writePush('constant', self.tokenizer.intVal())
            self.tokenizer.advance()

        elif tokenType == 'STRING_CONST':

            # String constant
            string = self.tokenizer.stringVal()

            # Create empty string object of required length and store it in pointer 1 (that)
            length = len(string)
            self.vmWriter.writePush('constant', length)
            self.vmWriter.writeCall('String.new', 1)
            self.vmWriter.writePop('pointer', 1)

            # Append each char in the string
            for i in range(0, length - 1):
                ascii_value = ord(string[i])
                self.vmWriter.writePush('pointer', 1)
                self.vmWriter.writePush('constant', ascii_value)
                self.vmWriter.writeCall('String.appendChar', 2)

            # No need to return the pointer because it is already stored in pointer 1

            # Next token
            self.tokenizer.advance()

        elif tokenType == 'KEYWORD':

            # Keyword constant (true | false | null | this)     ########## NB: LET LOOP = TRUE; IS NOT PUSHING -1 TO STACK
            if self.tokenizer.keyWord() == 'TRUE':
                self.vmWriter.writePush('constant', 1)
                self.vmWriter.writeArithmetic('NEG')

            elif self.tokenizer.keyWord() == 'FALSE' or self.tokenizer.keyWord(
            ) == 'NULL':
                self.vmWriter.writePush('constant', 0)

            elif self.tokenizer.keyWord() == 'THIS':
                self.vmWriter.writePush('pointer', 0)

            self.tokenizer.advance()

        elif tokenType == 'IDENTIFIER':
            # varName | varName[expression] | subroutineCall

            # Symbol: [ | ( | .
            if self.tokenizer.lookAhead() == '[':
                # varName[expression]

                # Identifier: varName
                self.subTagIdentifier(
                    self.tokenizer.identifier(), 'VAR', 'FALSE',
                    self.symbolTable.kindOf(self.tokenizer.identifier()),
                    self.symbolTable.indexOf(self.tokenizer.identifier()))
                self.tokenizer.advance()

                # Symbol: [
                self.subTag('symbol')
                self.tokenizer.advance()

                # Expression
                self.compileExpression()

                # Symbol: ]
                self.subTag('symbol')
                self.tokenizer.advance()

            elif self.tokenizer.lookAhead() == '(' or self.tokenizer.lookAhead(
            ) == '.':
                # subroutine call
                self.compileSubroutineCall()

            else:
                # Identifier: varName
                # Retrieve segment and index from symboltable and push to top of stack
                varName = self.tokenizer.identifier()
                if self.symbolTable.kindOf(varName) == 'field':
                    self.vmWriter.writePush('this',
                                            self.symbolTable.indexOf(varName))
                else:
                    self.vmWriter.writePush(self.symbolTable.kindOf(varName),
                                            self.symbolTable.indexOf(varName))
                self.tokenizer.advance()

        elif self.tokenizer.symbol() == '(':

            # ( Expression )

            # Symbol: (
            self.tokenizer.advance()

            # Expression
            self.compileExpression()

            # Symbol: )
            self.tokenizer.advance()

        elif self.tokenizer.symbol() in ['-', '~']:

            # Symbol: unaryop
            op = self.tokenizer.symbol()
            self.tokenizer.advance()

            # Term
            self.compileTerm()

            # Write op
            if op == '-':
                self.vmWriter.writeArithmetic('NEG')
            elif op == '~':
                self.vmWriter.writeArithmetic('NOT')

    def compileExpressionList(self):

        nArgs = 0

        # Expression list may be empty, check
        if self.tokenizer.rawToken() is not ')':

            # Expression
            self.compileExpression()
            nArgs += 1

            # Further comma delimited expressions
            while self.tokenizer.rawToken() == ',':
                # Symbol: ,
                self.tokenizer.advance()

                # Expression
                self.compileExpression()
                nArgs += 1

        return nArgs
コード例 #14
0
class CompilationEngine:
    def __init__(self, tokenizer, output_vm_file, output_xml_file=None):
        self.tokenizer = tokenizer
        self.vmr = VMWriter(output_vm_file)
        self.output_xml_file = output_xml_file
        self.indent = ""
        self.symbol_table = SymbolTable()
        self.current_class = None
        self.void_subr = set()
        self.label_counter = 0

    def _addIndent(self):
        self.indent = self.indent + "  "

    def _subIndent(self):
        self.indent = self.indent[2:]

    def _startTag(self, tagName, top=False):
        self.output_xml_file.write(self.indent + "<" + tagName + ">")
        if not top:
            self.output_xml_file.write(" ")
        else:
            self.output_xml_file.write("\n")

    def _endTag(self, tagName, top=False):
        if not top:
            self.output_xml_file.write(" ")
            self.output_xml_file.write("</" + tagName + ">")
        else:
            self.output_xml_file.write(self.indent + "</" + tagName + ">")
        self.output_xml_file.write("\n")

    def _compileSymbol(self, symbol):
        # just to make the code shorter..
        self._startTag("symbol")
        self.output_xml_file.write(symbol)
        self._endTag("symbol")
        self.tokenizer.advance()
        if symbol == "&lt;":
            return "<"
        elif symbol == "&gt;":
            return ">"
        elif symbol == "&quot;":
            return "\""
        elif symbol == "&amp;":
            return "&"
        else:
            return symbol

    def _compileIdentifier(self, name, status, type="", kind=""):
        is_cs = kind in {"CLASS", "SUBROUTINE"}
        if status == "def":
            assert kind != "" and type != ""
            self.symbol_table.define(name=name, type=type, kind=kind)
        self._startTag("identifier")
        if is_cs:
            self.output_xml_file.write(status + ": " + kind + " " + name)
        else:
            self.output_xml_file.write(status + ": " + ", ".join([
                name, "type=" + self.symbol_table.typeOf(name), "kind=" +
                self.symbol_table.kindOf(name), "index=" +
                str(self.symbol_table.indexOf(name))
            ]))
        self._endTag("identifier")
        self.tokenizer.advance()
        return name

    def _compileType(self):
        if self.tokenizer.current_token in {"int", "char", "boolean"}:
            this_type = self.tokenizer.current_token
            self._startTag("keyword")
            self.output_xml_file.write(self.tokenizer.current_token)
            self._endTag("keyword")
            self.tokenizer.advance()
            return this_type
        else:  # className
            this_class = self._compileIdentifier(
                name=self.tokenizer.identifier(), kind="CLASS", status="use")
            return this_class

    def compileClass(self):
        # 'class'
        self._startTag("class", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("class")
        self._endTag("keyword")
        self.tokenizer.advance()
        # className
        self._startTag("identifier")
        self.output_xml_file.write("onset of class: " +
                                   self.tokenizer.current_token)
        self.current_class = self.tokenizer.current_token
        self._endTag("identifier")
        self.tokenizer.advance()
        # '{'
        self._compileSymbol(self.tokenizer.symbol())
        # classVarDec* subroutineDec*
        while not self.tokenizer.current_token == "}":
            if self.tokenizer.current_token in {"static", "field"}:
                self.compileClassVarDec()
            elif self.tokenizer.current_token in {
                    "constructor", "function", "method"
            }:
                self.compileSubroutine()
        # '}'
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("class", top=True)

    def compileClassVarDec(self):
        this_class_var = self.tokenizer.current_token
        # {'static'|'field'}
        self._startTag("classVarDec", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write(this_class_var)
        self._endTag("keyword")
        self.tokenizer.advance()
        # type
        this_type = self._compileType()
        # varName
        self._compileIdentifier(name=self.tokenizer.identifier(),
                                type=this_type,
                                kind=this_class_var.upper(),
                                status="def")
        # ("," varName)*
        while self.tokenizer.current_token == ",":
            self._compileSymbol(self.tokenizer.symbol())
            self._compileIdentifier(name=self.tokenizer.identifier(),
                                    type=this_type,
                                    kind=this_class_var.upper(),
                                    status="def")
        # ";"
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("classVarDec", top=True)

    def compileSubroutine(self):
        self.symbol_table.startSubroutine()
        this_subr = self.tokenizer.current_token
        # ('constructor'|'function'|'method')
        self._startTag("subroutineDec", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write(this_subr)
        self._endTag("keyword")
        self.tokenizer.advance()
        # ('void'|type)
        this_return_type = self.tokenizer.current_token
        if self.tokenizer.current_token == "void":
            self._startTag("keyword")
            self.output_xml_file.write(self.tokenizer.current_token)
            self._endTag("keyword")
            self.tokenizer.advance()
        else:
            self._compileType()
        # subroutineName
        self._startTag("identifier")
        this_subr_name = self.tokenizer.current_token
        self.output_xml_file.write("onset of subroutine: " +
                                   self.tokenizer.current_token)
        self._endTag("identifier")
        self.tokenizer.advance()
        if this_return_type == "void":
            self.void_subr.add(this_subr_name)
        # '('
        self._compileSymbol(self.tokenizer.symbol())
        # parameterList
        if this_subr == "method":
            self.symbol_table.define(name="this",
                                     type=self.current_class,
                                     kind="ARG")
        param_cnt = self.compileParameterList() + (this_subr == "method")
        # ')'
        self._compileSymbol(self.tokenizer.symbol())
        # subroutineBody
        self._startTag("subroutineBody", top=True)
        self._addIndent()
        ## '{'
        self._compileSymbol(self.tokenizer.symbol())
        ## varDec*
        lcl_cnt = 0
        while self.tokenizer.current_token == "var":
            lcl_cnt += self.compileVarDec()
        self.vmr.writeFunction(self.current_class + "." + this_subr_name,
                               lcl_cnt)
        if this_subr == "method":
            self.vmr.writePush("ARG", 0)
            self.vmr.writePop("POINTER", 0)
        elif this_subr == "constructor":
            self.vmr.writePush("CONST", len(self.symbol_table.tbl_c))
            self.vmr.writeCall("Memory.alloc", 1)
            self.vmr.writePop("POINTER", 0)
        ## statements
        self.compileStatements(is_void=(this_return_type == "void"))
        ## '}'
        self._compileSymbol(self.tokenizer.symbol())
        ## subroutineBody ending
        self._subIndent()
        self._endTag("subroutineBody", top=True)
        # ending
        self._subIndent()
        self._endTag("subroutineDec", top=True)

    def compileParameterList(self):
        self._startTag("parameterList", top=True)
        self._addIndent()
        param_cnt = 0
        # possibly no parameters
        if self.tokenizer.current_token != ")":
            param_cnt += 1
            # type
            this_type = self._compileType()
            # varName
            self._compileIdentifier(name=self.tokenizer.identifier(),
                                    type=this_type,
                                    kind="ARG",
                                    status="def")
            # ("," type varName)*
            while self.tokenizer.current_token == ",":
                param_cnt += 1
                self._compileSymbol(self.tokenizer.symbol())  # ,
                # type
                this_type = self._compileType()
                # varName
                self._compileIdentifier(name=self.tokenizer.identifier(),
                                        type=this_type,
                                        kind="ARG",
                                        status="def")
        # ending
        self._subIndent()
        self._endTag("parameterList", top=True)
        return param_cnt

    def compileVarDec(self):
        # 'var'
        self._startTag("varDec", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("var")
        self._endTag("keyword")
        self.tokenizer.advance()
        # type
        this_type = self._compileType()
        var_cnt = 1
        # varName
        self._compileIdentifier(name=self.tokenizer.identifier(),
                                type=this_type,
                                kind="VAR",
                                status="def")
        # ("," varName)*
        while self.tokenizer.current_token == ",":
            var_cnt += 1
            self._compileSymbol(self.tokenizer.symbol())
            self._compileIdentifier(name=self.tokenizer.identifier(),
                                    type=this_type,
                                    kind="VAR",
                                    status="def")
        # ";"
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("varDec", top=True)
        return var_cnt

    def _compileStatement(self, is_void=False):
        if self.tokenizer.current_token == "let":
            self.compileLet()
        elif self.tokenizer.current_token == "while":
            self.compileWhile()
        elif self.tokenizer.current_token == "if":
            self.compileIf()
        elif self.tokenizer.current_token == "do":
            self.compileDo()
        elif self.tokenizer.current_token == "return":
            self.compileReturn(is_void=is_void)

    def compileStatements(self, is_void=False):
        # statements*
        self._startTag("statements", top=True)
        self._addIndent()
        while self.tokenizer.current_token in {
                "let", "while", "if", "do", "return"
        }:
            self._compileStatement(is_void=is_void)
        # ending
        self._subIndent()
        self._endTag("statements", top=True)

    def compileLet(self):
        # 'let'
        self._startTag("letStatement", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("let")
        self._endTag("keyword")
        self.tokenizer.advance()
        # varName
        this_var = self._compileIdentifier(name=self.tokenizer.identifier(),
                                           status="use")
        # expression starts (possibly no...)
        is_arr = False
        if self.tokenizer.current_token == "[":
            is_arr = True
            if self.symbol_table.kindOf(this_var) == "VAR":
                self.vmr.writePush("LOCAL",
                                   self.symbol_table.indexOf(this_var))
            elif self.symbol_table.kindOf(this_var) == "ARG":
                self.vmr.writePush("ARG", self.symbol_table.indexOf(this_var))
            elif self.symbol_table.kindOf(this_var) == "FIELD":
                self.vmr.writePush("THIS", self.symbol_table.indexOf(this_var))
            elif self.symbol_table.kindOf(this_var) == "STATIC":
                self.vmr.writePush("STATIC",
                                   self.symbol_table.indexOf(this_var))
            else:
                pass
            ## '['
            self._compileSymbol(self.tokenizer.symbol())
            ## expression
            self.compileExpression()
            ## ']'
            self._compileSymbol(self.tokenizer.symbol())
            self.vmr.writeArithmetic("ADD")
        # expression ends
        # '='
        self._compileSymbol(self.tokenizer.symbol())
        # expression
        self.compileExpression()
        if is_arr:
            self.vmr.writePop("TEMP", 0)
            self.vmr.writePop("POINTER", 1)
            self.vmr.writePush("TEMP", 0)
            self.vmr.writePop("THAT", 0)
        else:
            if self.symbol_table.kindOf(this_var) == "VAR":
                self.vmr.writePop("LOCAL", self.symbol_table.indexOf(this_var))
            elif self.symbol_table.kindOf(this_var) == "ARG":
                self.vmr.writePop("ARG", self.symbol_table.indexOf(this_var))
            elif self.symbol_table.kindOf(this_var) == "FIELD":
                self.vmr.writePop("THIS", self.symbol_table.indexOf(this_var))
            elif self.symbol_table.kindOf(this_var) == "STATIC":
                self.vmr.writePop("STATIC",
                                  self.symbol_table.indexOf(this_var))
            else:
                pass
        # ';'
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("letStatement", top=True)

    def compileIf(self):
        # 'if'
        self._startTag("ifStatement", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("if")
        self._endTag("keyword")
        self.tokenizer.advance()
        # '('
        self._compileSymbol(self.tokenizer.symbol())
        # expression
        self.compileExpression()
        self.vmr.writeArithmetic("NOT")
        label_1 = self.label_counter
        self.label_counter += 1
        self.vmr.writeIf("IF_LABEL_" + str(label_1))  ## to leave if
        # ')'
        self._compileSymbol(self.tokenizer.symbol())
        # '{'
        self._compileSymbol(self.tokenizer.symbol())
        # statements
        self.compileStatements()
        label_2 = self.label_counter
        self.label_counter += 1
        self.vmr.writeGoto("LEAVE_IF_LABEL_" + str(label_2))  ## to leave if _2
        # '}'
        self._compileSymbol(self.tokenizer.symbol())
        # else starts (possibly no...)
        self.vmr.writeLabel("IF_LABEL_" + str(label_1))
        if self.tokenizer.current_token == "else":
            self._startTag("keyword")
            self.output_xml_file.write("else")
            self._endTag("keyword")
            self.tokenizer.advance()
            ## '{'
            self._compileSymbol(self.tokenizer.symbol())
            ## statements
            self.compileStatements()
            ## '}'
            self._compileSymbol(self.tokenizer.symbol())
        self.vmr.writeLabel("LEAVE_IF_LABEL_" + str(label_2))
        # else ends
        # ending
        self._subIndent()
        self._endTag("ifStatement", top=True)

    def compileWhile(self):
        # 'while'
        self._startTag("whileStatement", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("while")
        self._endTag("keyword")
        self.tokenizer.advance()
        label_1 = str(self.label_counter)
        self.label_counter += 1
        self.vmr.writeLabel("WHILE_LABEL_" + label_1)
        # '('
        self._compileSymbol(self.tokenizer.symbol())
        # expression
        self.compileExpression()
        self.vmr.writeArithmetic("NOT")
        label_2 = str(self.label_counter)
        self.label_counter += 1
        self.vmr.writeIf("WHILE_LABEL_" + label_2)  ## to leave while
        # ')'
        self._compileSymbol(self.tokenizer.symbol())
        # '{'
        self._compileSymbol(self.tokenizer.symbol())
        # statements
        self.compileStatements()
        # '}'
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("whileStatement", top=True)
        self.vmr.writeGoto("WHILE_LABEL_" + label_1)
        self.vmr.writeLabel("WHILE_LABEL_" + label_2)
        self.label_counter += 1

    def compileDo(self):
        # 'do'
        self._startTag("doStatement", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("do")
        self._endTag("keyword")
        self.tokenizer.advance()
        # subroutineCall
        self._compileSubroutineCall()
        # ';'
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("doStatement", top=True)

    def compileReturn(self, is_void=False):
        # 'return'
        self._startTag("returnStatement", top=True)
        self._addIndent()
        self._startTag("keyword")
        self.output_xml_file.write("return")
        self._endTag("keyword")
        self.tokenizer.advance()
        # expression  (possibly no...)
        if self.tokenizer.current_token != ";":
            self.compileExpression()
        # ';'
        self._compileSymbol(self.tokenizer.symbol())
        # ending
        self._subIndent()
        self._endTag("returnStatement", top=True)
        if is_void:
            self.vmr.writePush("CONST", 0)
        self.vmr.writeReturn()

    def _compileSubroutineCall(self):
        # subroutineName or className
        pk = self.tokenizer.peek()
        if pk == ".":
            this_head = self._compileIdentifier(
                name=self.tokenizer.identifier(), kind="CLASS", status="use")
            this_head_type = self.symbol_table.typeOf(this_head)
            this_head_kind = self.symbol_table.kindOf(this_head)
            if this_head_kind == "FIELD":
                this_head_kind = "THIS"
            elif this_head_kind == "VAR":
                this_head_kind = "LOCAL"
            if this_head_type is not None:  # method
                self._compileSymbol(self.tokenizer.symbol())  # "."
                subr_name = self._compileIdentifier(
                    name=self.tokenizer.identifier(),
                    kind="SUBROUTINE",
                    status="use")
                this_subr = this_head_type + "." + subr_name
                self.vmr.writePush(this_head_kind,
                                   self.symbol_table.indexOf(this_head))
            else:  # function
                this_subr = this_head + \
                    self._compileSymbol(self.tokenizer.symbol()) + \
                    self._compileIdentifier(
                        name=self.tokenizer.identifier(), kind="SUBROUTINE", status="use")
        else:  # method of current class
            this_subr = self._compileIdentifier(
                name=self.tokenizer.identifier(),
                kind="SUBROUTINE",
                status="use")
            this_subr = self.current_class + "." + this_subr
            self.vmr.writePush("POINTER", 0)
        # '('
        self._compileSymbol(self.tokenizer.symbol())
        # expressionList
        exp_cnt = self.compileExpressionList()
        # ')'
        self._compileSymbol(self.tokenizer.symbol())
        self.vmr.writeCall(
            this_subr, exp_cnt + ((pk == "." and
                                   (1 if this_head_type else 0)) or pk != "."))
        self.vmr.writePop("TEMP", 0)

    def compileTerm(self):
        # 'term'
        self._startTag("term", top=True)
        self._addIndent()
        tk = self.tokenizer.current_token
        if tk in {"-", "~"}:
            self._compileSymbol(self.tokenizer.current_token)
            self.compileTerm()
            if tk == "-":
                self.vmr.writeArithmetic("NEG")
            elif tk == "~":
                self.vmr.writeArithmetic("NOT")
        elif tk == "(":
            self._compileSymbol(self.tokenizer.current_token)  # (
            self.compileExpression()
            self._compileSymbol(self.tokenizer.current_token)  # )
        elif self.tokenizer.tokenType() == "IDENTIFIER":
            pk = self.tokenizer.peek()
            self._compileIdentifier(name=self.tokenizer.current_token,
                                    kind="CLASS",
                                    status="use")
            if pk == "[":  # array
                if self.symbol_table.kindOf(tk) == "VAR":
                    self.vmr.writePush("LOCAL", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "ARG":
                    self.vmr.writePush("ARG", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "FIELD":
                    self.vmr.writePush("THIS", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "STATIC":
                    self.vmr.writePush("STATIC", self.symbol_table.indexOf(tk))
                else:
                    pass
                self._compileSymbol(self.tokenizer.symbol())  # [
                self.compileExpression()
                self._compileSymbol(self.tokenizer.symbol())  # ]
                self.vmr.writeArithmetic("ADD")
                self.vmr.writePop("POINTER", 1)
                self.vmr.writePush("THAT", 0)
            elif pk == "(":  # must be a method
                if self.symbol_table.kindOf(tk) == "VAR":
                    self.vmr.writePush("LOCAL", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "ARG":
                    self.vmr.writePush("ARG", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "FIELD":
                    self.vmr.writePush("THIS", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "STATIC":
                    self.vmr.writePush("STATIC", self.symbol_table.indexOf(tk))
                else:
                    pass
                self._compileSymbol(self.tokenizer.symbol())  # (
                exp_cnt = self.compileExpressionList()
                self._compileSymbol(self.tokenizer.symbol())  # )
                self.vmr.writeCall(self.current_class + "." + tk, exp_cnt + 1)
            elif pk == ".":  # method, or function
                self._compileSymbol(self.tokenizer.symbol())  # .
                if self.symbol_table.typeOf(tk):  # method
                    this_subr = self._compileIdentifier(
                        name=self.symbol_table.typeOf(tk) + "." +
                        self.tokenizer.current_token,
                        kind="SUBROUTINE",
                        status="use")
                    if self.symbol_table.kindOf(tk) == "VAR":
                        self.vmr.writePush("LOCAL",
                                           self.symbol_table.indexOf(tk))
                    elif self.symbol_table.kindOf(tk) == "ARG":
                        self.vmr.writePush("ARG",
                                           self.symbol_table.indexOf(tk))
                    elif self.symbol_table.kindOf(tk) == "FIELD":
                        self.vmr.writePush("THIS",
                                           self.symbol_table.indexOf(tk))
                    elif self.symbol_table.kindOf(tk) == "STATIC":
                        self.vmr.writePush("STATIC",
                                           self.symbol_table.indexOf(tk))
                    else:
                        pass
                else:
                    this_subr = self._compileIdentifier(
                        name=tk + "." + self.tokenizer.current_token,
                        kind="SUBROUTINE",
                        status="use")
                self._compileSymbol(self.tokenizer.symbol())  # (
                exp_cnt = self.compileExpressionList()
                self._compileSymbol(self.tokenizer.symbol())  # )
                if self.symbol_table.typeOf(tk):  # method
                    self.vmr.writeCall(this_subr, exp_cnt + 1)
                else:
                    self.vmr.writeCall(this_subr, exp_cnt)
            else:  # identifier
                if self.symbol_table.kindOf(tk) == "VAR":
                    self.vmr.writePush("LOCAL", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "ARG":
                    self.vmr.writePush("ARG", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "FIELD":
                    self.vmr.writePush("THIS", self.symbol_table.indexOf(tk))
                elif self.symbol_table.kindOf(tk) == "STATIC":
                    self.vmr.writePush("STATIC", self.symbol_table.indexOf(tk))
                else:
                    pass
        elif self.tokenizer.tokenType() == "STRING_CONST":
            self._startTag("stringConstant")
            self.output_xml_file.write(self.tokenizer.stringVal())
            self.vmr.writePush("CONST", len(self.tokenizer.stringVal()))
            self.vmr.writeCall("String.new", 1)
            for s in self.tokenizer.stringVal():
                self.vmr.writePush("CONST", ord(s))
                self.vmr.writeCall("String.appendChar", 2)
            self._endTag("stringConstant")
            self.tokenizer.advance()
        elif self.tokenizer.tokenType() == "INT_CONST":
            self._startTag("integerConstant")
            self.output_xml_file.write(self.tokenizer.current_token)
            self.vmr.writePush("CONST", self.tokenizer.current_token)
            self._endTag("integerConstant")
            self.tokenizer.advance()
        elif self.tokenizer.tokenType(
        ) == "KEYWORD":  # true / false / this / null
            self._startTag("keyword")
            self.output_xml_file.write(self.tokenizer.current_token)
            if self.tokenizer.current_token == "true":
                self.vmr.writePush("CONST", 1)
                self.vmr.writeArithmetic("NEG")
            elif self.tokenizer.current_token == "false" or self.tokenizer.current_token == "null":
                self.vmr.writePush("CONST", 0)
            elif self.tokenizer.current_token == "this":
                self.vmr.writePush("POINTER", 0)
            elif self.tokenizer.current_token == "that":
                self.vmr.writePush("POINTER", 1)
            self._endTag("keyword")
            self.tokenizer.advance()
        else:
            print("????")
        # ending
        self._subIndent()
        self._endTag("term", top=True)

    def compileExpression(self):
        # 'expression'
        self._startTag("expression", top=True)
        self._addIndent()
        # term
        self.compileTerm()
        # (op term)*
        symb_dict = {
            "+": "ADD",
            "-": "SUB",
            "&": "AND",
            "|": "OR",
            "<": "LT",
            ">": "GT",
            "=": "EQ"
        }
        while self.tokenizer.current_token in {
                "+", "-", "*", "/", "&", "|", "<", ">", "="
        }:
            this_symb = self._compileSymbol(self.tokenizer.symbol())
            self.compileTerm()
            if this_symb == "*":
                self.vmr.writeCall("Math.multiply", 2)
            elif this_symb == "/":
                self.vmr.writeCall("Math.divide", 2)
            else:
                self.vmr.writeArithmetic(symb_dict[this_symb])

        self._subIndent()
        self._endTag("expression", top=True)

    def compileExpressionList(self):
        # 'expressionList'
        self._startTag("expressionList", top=True)
        self._addIndent()
        exp_cnt = 0
        # possibly no...
        if self.tokenizer.current_token != ")":
            exp_cnt += 1
            # expression
            self.compileExpression()
            # (',' expression)*
            while self.tokenizer.current_token != ")":
                exp_cnt += 1
                self._compileSymbol(self.tokenizer.symbol())
                self.compileExpression()
        # ending
        self._subIndent()
        self._endTag("expressionList", top=True)
        return exp_cnt
コード例 #15
0
class CompilationEngine:
    def __init__(self, tokenizer, classDict):
        self.tokenizer = tokenizer
        self.className = tokenizer.getFileName()
        self.resetLabelCounters()
        self.writer = VMWriter(tokenizer.getFileName())

    def resetLabelCounters(self):
        self.ifLabelCounter = 0
        self.whileLabelCounter = 0

    def compileClass(self):
        self.symbolTable = SymbolTable()
        while self.tokenizer.hasMoreTokens():
            if self.tokenizer.getToken(
            ) == 'static' or self.tokenizer.getToken() == 'field':
                self.compileClassVarDec()
            elif self.tokenizer.getToken() in const.SUBS:
                self.compileSubroutine()
            if not self.tokenizer.getToken() in const.SUBS:
                self.tokenizer.advance()
        self.writer.close()

    def compileClassVarDec(self):
        variable = []
        while self.tokenizer.getToken() != ';':
            if self.tokenizer.getToken() != ',':
                variable.append(self.tokenizer.getToken())
            self.tokenizer.advance()
        self.symbolTable.define(variable[2:], variable[1], variable[0])

    def compileSubroutine(self):
        self.symbolTable.startSubroutine()
        functionName = None
        getType = False
        getName = False
        while self.tokenizer.getToken() != '{':
            if self.tokenizer.getToken() == '(':
                self.compileParameterList()
            elif self.tokenizer.getToken() in const.SUBS:
                getType = True
            elif getType:
                getName = True
                getType = False
            elif getName:
                functionName = self.tokenizer.getToken()
                getName = False
            self.tokenizer.advance()
        self.tokenizer.advance()  # {
        numLocals = 0
        while self.tokenizer.getToken() == 'var':
            numLocals += self.compileVarDec()
            self.tokenizer.advance()
        self.writer.writeFunction(self.className + '.' + functionName,
                                  numLocals)
        self.resetLabelCounters()
        self.compileStatements()
        if self.tokenizer.getToken() == '}':
            self.tokenizer.advance()  # }

    def compileParameterList(self):
        self.tokenizer.advance()
        variables = []
        while self.tokenizer.getToken() != ')':
            if self.tokenizer.getToken() != ',':
                variables.append(self.tokenizer.getToken())
            self.tokenizer.advance()
        idx = 0
        for variable in variables:
            if idx % 2 != 0:
                self.symbolTable.define([variable], variables[idx - 1], 'arg')
            idx += 1

    def compileVarDec(self):
        variable = []
        while self.tokenizer.getToken() != ';':
            if self.tokenizer.getToken() != ',':
                variable.append(self.tokenizer.getToken())
            self.tokenizer.advance()
        self.symbolTable.define(variable[2:], variable[1], variable[0])
        return len(variable[2:])

    def compileStatements(self):
        while self.tokenizer.getToken() in [
                'let', 'if', 'while', 'do', 'return'
        ]:
            if self.tokenizer.getToken() == 'let':
                self.compileLet()
            if self.tokenizer.getToken() == 'if':
                self.compileIf()
            if self.tokenizer.getToken() == 'while':
                self.compileWhile()
            if self.tokenizer.getToken() == 'do':
                self.compileDo()
            if self.tokenizer.getToken() == 'return':
                self.compileReturn()

    def compileLet(self):
        self.tokenizer.advance()  # let
        assignee = self.tokenizer.getToken()
        self.tokenizer.advance()  # varName
        if self.tokenizer.getToken() == '[':
            self.tokenizer.advance()  # [
            self.compileExpression()  # expression
            self.tokenizer.advance()  # ]
        self.tokenizer.advance()  # =
        self.compileExpression()  # expression
        if not self.symbolTable.kindOf(assignee):
            raise Exception('Undeclared identifier assignment ' + assignee)
        else:
            self.writer.writePop(self.symbolTable.kindOf(assignee),
                                 self.symbolTable.indexOf(assignee))

    def getLabel(self, labelName, increment=False):
        label = labelName
        if 'WHILE' in labelName:
            label += str(self.whileLabelCounter)
            if increment:
                self.whileLabelCounter += 1
        if 'IF' in labelName:
            label += str(self.ifLabelCounter)
            if increment:
                self.ifLabelCounter += 1
        return label

    def compileIf(self):
        firstLabel = self.getLabel('IF_TRUE')
        secondLabel = self.getLabel('IF_FALSE')
        endLabel = self.getLabel('IF_END', True)
        self.tokenizer.advance()  # if
        self.tokenizer.advance()  # (
        self.compileExpression()  # expression
        self.tokenizer.advance()  # )
        self.writer.writeIf(firstLabel)
        self.writer.writeGoto(secondLabel)
        self.writer.writeLabel(firstLabel)
        self.tokenizer.advance()  # {
        self.compileStatements()  # statements
        self.tokenizer.advance()  # }
        if self.tokenizer.getToken() == 'else':
            self.writer.writeGoto(endLabel)
            self.writer.writeLabel(secondLabel)
            self.tokenizer.advance()  # else
            self.tokenizer.advance()  # {
            self.compileStatements()  # statements
            self.tokenizer.advance()  # }
            self.writer.writeLabel(endLabel)
        else:
            self.writer.writeLabel(secondLabel)

    def compileWhile(self):
        firstLabel = self.getLabel('WHILE_EXP')
        secondLabel = self.getLabel('WHILE_END', True)
        self.writer.writeLabel(firstLabel)
        self.tokenizer.advance()  # while
        self.tokenizer.advance()  # (
        self.compileExpression()  # expression
        self.tokenizer.advance()  # )
        self.writer.writeArithmetic('~', True)  # negating the expression
        self.writer.writeIf(secondLabel)
        self.tokenizer.advance()  # {
        self.compileStatements()  # statements
        self.tokenizer.advance()  # }
        self.writer.writeGoto(firstLabel)
        self.writer.writeLabel(secondLabel)

    def compileDo(self):
        self.tokenizer.advance()  # do
        functionName = ''
        if self.tokenizer.nextToken() == '.':
            if not self.symbolTable.typeOf(self.tokenizer.getToken()):
                functionName = self.tokenizer.getToken()
            else:
                functionName = self.symbolTable.typeOf(
                    self.tokenizer.getToken())
            self.tokenizer.advance()  # (className | varName)
            functionName += '.'
            self.tokenizer.advance()  # .
        functionName += self.tokenizer.getToken()
        self.tokenizer.advance()  # subroutineName
        self.tokenizer.advance()  # (
        numArgs = self.compileExpressionList()  # expressionList
        self.tokenizer.advance()  # )
        self.tokenizer.advance()  # ;
        self.writer.writeCall(functionName, numArgs)
        self.writer.writePop('temp', 0)

    def compileReturn(self):
        if self.tokenizer.nextToken() == ';':
            self.writer.writePush('constant', 0)
        self.tokenizer.advance()  # return
        if self.tokenizer.getToken() != ';':
            self.compileExpression()  # expression?
        self.writer.writeReturn()

    def compileExpression(self):
        op = None
        expLen = 0
        while self.tokenizer.getToken() not in [';', ')', ']', ',']:
            if self.tokenizer.getToken() in const.UOP and expLen == 0:
                self.compileTerm()
            elif self.tokenizer.getToken() in const.OP:
                op = self.tokenizer.getToken()
                self.tokenizer.advance()
            else:
                self.compileTerm()
            expLen += 1
        if not not op:
            self.writer.writeArithmetic(op)
        if self.tokenizer.getToken() == ';':
            self.tokenizer.advance()  # ;

    def compileTerm(self):
        if self.tokenizer.getToken() == '(':
            self.tokenizer.advance()  # (
            self.compileExpression()  # expression
            self.tokenizer.advance()  # )
            return
        uop = None
        if self.tokenizer.getToken() in const.UOP:
            uop = self.tokenizer.getToken()
            self.tokenizer.advance()  # UOP
            self.compileTerm()
            if not not uop:
                self.writer.writeArithmetic(uop, True)
        if self.tokenizer.getToken() in [';', ')', ']', ',']:
            return
        if self.tokenizer.getToken(
        ) not in const.OP and self.tokenizer.nextToken() not in [
                '[', '.', '('
        ]:
            if not not self.symbolTable.kindOf(self.tokenizer.getToken()):
                self.writer.writePush(
                    self.symbolTable.kindOf(self.tokenizer.getToken()),
                    self.symbolTable.indexOf(self.tokenizer.getToken()))
            else:
                self.writer.writePush('constant', self.tokenizer.getToken())
            self.tokenizer.advance()  # varName, etc.
        elif self.tokenizer.nextToken() in ['[', '.', '(']:
            functionName = ''
            if not self.symbolTable.typeOf(self.tokenizer.getToken()):
                functionName += self.tokenizer.getToken()
            else:
                functionName += self.symbolTable.typeOf(
                    self.tokenizer.getToken())
            self.tokenizer.advance()  # varName, etc.
            if self.tokenizer.getToken() == '[':
                self.tokenizer.advance()  # [
                self.compileExpression()  # expression
                self.tokenizer.advance()  # ]
            if self.tokenizer.getToken() == '.':
                self.tokenizer.advance()  # .
                functionName += '.' + self.tokenizer.getToken()
                self.tokenizer.advance()  # identifier
            numArgs = 0
            if self.tokenizer.getToken() == '(':
                self.tokenizer.advance()  # (
                numArgs = self.compileExpressionList()  # expressionList
                self.tokenizer.advance()  # )
                self.writer.writeCall(functionName, numArgs)

    def compileExpressionList(self):
        count = 0
        while self.tokenizer.getToken() != ')':
            if self.tokenizer.getToken() == ',':
                self.tokenizer.advance()  # ,?
            else:
                self.compileExpression()
                count += 1
        return count
コード例 #16
0
class CompilationEngine():
    def __init__(self, tokens, fp_out):
        self.tokens = tokens
        self.num = 0  #current node in tree
        self.total = len(tokens)
        self.crnt_elem = self.tokens[0]
        self.symbols = SymbolTable()#create symbol table(s)
        #possibly should call compileclass from outside
        self.VM = VMWriter(fp_out)
        self.labels = {} #to create unique labels

    def compileClass(self):
        """Class Grammar:
        class className { classVarDec* subroutineDec* }"""

        self.check_next(KEYWORD, "class")
        class_name = self.get()  #classname tkn
        self.className = class_name.text
        self.check_next(SYMBOL, "{")
        while self.check_texts(KEYWORD, [STATIC, FIELD]):
            self.compileClassVarDec()
        while self.check_texts(KEYWORD, ["constructor", "function", "method"]):
            self.compileSubroutineDec()
        self.check_next(SYMBOL, "}")
        self.VM.close()

    def compileClassVarDec(self):
        """ClassVarDec Grammar:
        (static|field) type VarName ("," VarName)* ";" """
        kind_t, type_t, name_t= self.get_mult(3)
        if kind_t.text == FIELD:
            kind_t.text = THIS
        self.symbols.define(name_t.text, type_t.text, kind_t.text)
        while self.check_texts(SYMBOL, ",", True): #another VarName
            name_t = self.get() #VarName
            self.symbols.define(name_t.text, type_t.text, kind_t.text)
        self.check_next(SYMBOL, ";") #end-VarDec


    def compileSubroutineDec(self):
        """SubroutineDec Grammar:
        (constructor|function|method) ("void"| type)
        subroutineName "(" ParameterList ")" SubroutineBody """
        self.symbols.startSubroutine() #wipe previous sub_vars from Symbol Table
        sub_type, ret_type, sub_name, _ = self.get_mult(4)
        self.subType = sub_type.text #(constructor|function|method)
        self.name = "{}.{}".format(self.className, sub_name.text)
        if self.subType == "method":
            self.symbols.define(THIS, self.className, ARG)
        self.compileParameterList()
        self.check_next(SYMBOL, ")")
        self.compileSubroutineBody()
        self.symbols.startSubroutine() #wipe vars from Symbol Table

    def compileParameterList(self):
        """ParameterList Grammar:
        (type varName) ("," type varName)
         """
        if not self.check_texts(SYMBOL, ")"):  #if parameter present
            type, name = self.get_mult(2)
            self.symbols.define(name.text, type.text, ARG)
        while self.check_texts(SYMBOL, ",", True): #another VarName
            type, name = self.get_mult(2)
            self.symbols.define(name.text, type.text, ARG)

    def compileSubroutineBody(self):
        """subroutineBody Grammar:
        "{" varDec* statements "}"""
        self.check_next(SYMBOL, "{")
        while self.check_texts(KEYWORD, "var", True): #check for VarDec
            self.compileVarDec()
        n_vars = self.symbols.varCount(LCL)
        self.VM.writeFunction(self.name, n_vars)
        self.updatePointer()
        self.compileStatements()
        self.check_next(SYMBOL, "}")

    def compileVarDec(self):
        """ Grammar:
        "var" type varName ("," varName)* ";" """
        type, name = self.get_mult(2)
        self.symbols.define(name.text, type.text, LCL)
        while self.check_texts(SYMBOL, ",", True): #another VarName
            name = self.get()
            self.symbols.define(name.text, type.text, LCL)
        self.check_next(SYMBOL, ";")

    def updatePointer(self):
        if self.subType == "method":
            self.VM.writePush(ARG, 0)
            self.VM.writePop(POINTER, 0) #store self in this
        elif self.subType == "constructor":
            n_args = self.symbols.varCount(THIS)
            self.VM.writePush(CONSTANT, n_args)
            self.VM.writeCall("Memory.alloc", 1)
            self.VM.writePop(POINTER, 0) #store object in this

    def compileStatements(self):
        """ Grammar:
        let|if|while|do|return
        """
        while self.check_texts(KEYWORD, statementTypes):
            type = self.get().text
            if type == "let":           self.compileLet()
            elif type == "if":          self.compileIf()
            elif type == "while":       self.compileWhile()
            elif type == "do":          self.compileDo()        #restore parent node
            elif type == "return":      self.compileReturn()
            else:                       self.fault()

    def compileLet(self):
        """ Grammar: e.g: let x = 4
        "let" varName ("[" expression "]")?
        "=" expression ";" """
        varName = self.get().text
        type, kind, index = self.symbols.get(varName)
        isArray = False
        if self.check_texts(SYMBOL, "[", True): #Array
            self.compileExpression()
            self.VM.writePush(kind, index)
            self.VM.writeArithmetic("add")  #ram[index] to be accessed
            isArray = True
            self.check_next(SYMBOL, "]")
        self.check_next(SYMBOL, "=")
        self.compileExpression()
        if isArray:
            self.VM.writePop(TEMP, 0)
            self.VM.writePop(POINTER, 1)    #ram[index] to be accessed
            self.VM.writePush(TEMP, 0)
            self.VM.writePop(THAT, 0)
        else:
            self.VM.writePop(kind, index)
        self.check_next(SYMBOL, ";")

    def compileIf(self):
        """ Grammar:
        "if" "(" expression ")" "{" statements"}"
        ("else"  "{" statements"}" )? """
        label_1 = "IF_FALSE.{}".format(self.get_label(1))
        label_2 = "IF_END.{}".format(self.get_label(2))
        self.check_next(SYMBOL, "(")
        self.compileExpression()
        self.VM.writeArithmetic("not")
        self.VM.writeIf(label_1)
        self.get_mult(2)            #")" "{"
        self.compileStatements()
        self.get()
        self.VM.writeGoto(label_2)                 #"}"
        self.VM.writeLabel(label_1)
        if self.check_texts(KEYWORD, "else", True): #nested expression
            self.check_next(SYMBOL, "{")
            self.compileStatements()
            self.check_next(SYMBOL, "}")
        self.VM.writeLabel(label_2)

    def compileWhile(self):
        """ Grammar:
        "while" "(" expression ")" "{" statements"}" """
        label_3 = "WHILE_END.{}".format(self.get_label(3))
        label_4 = "WHILE_LOOP.{}".format(self.get_label(4))
        self.check_next(SYMBOL, "(")
        self.VM.writeLabel(label_4)
        self.compileExpression()
        self.VM.writeArithmetic("not")
        self.VM.writeIf(label_3)
        self.get_mult(2)
        self.compileStatements()
        self.VM.writeGoto(label_4)
        self.VM.writeLabel(label_3)
        self.check_next(SYMBOL, "}")


    def compileDo(self):
        """ Grammar:
        "do" subroutineCall ";" """
        self.compileTerm()
        self.VM.writePop(TEMP, 0) #i.e. do statments only have side-efects
        self.check_next(SYMBOL, ";")

    def compileReturn(self):
        """ Grammar:
        "return" expression? ";" """
        if not self.check_texts(SYMBOL, ";"):
            self.compileExpression()
        else: #no expresison to return
            self.VM.writePush(CONSTANT, 0)
        self.VM.writeReturn()
        self.check_next(SYMBOL, ";")


    def compileExpression(self):
        """ Grammar:
        term (op term)* """
        self.compileTerm()
        if self.check_texts(SYMBOL, operators): #op present
            operator = self.get().text
            self.compileTerm()
            op_vm = operators[operator]
            if op_vm:
                self.VM.writeArithmetic(op_vm)
            elif operator == "*":
                self.VM.writeCall("Math.multiply", 2)
            elif operator == "/":
                self.VM.writeCall("Math.divide", 2)

    def compileTerm(self):
        """Grammar:
        integerConstant | stringConstant | keywordConstant |
        varName | varname "[" expression "]" |
        subroutineCall | "(" expression ")" |
        unaryOp term

        subroutineCall Grammar:
        subroutineName "(" expressionList ")" |
        (className|varName) "." subroutineName "(" expressionList ")"
        """
        tkn = self.get(False) #don't increment
        tag = tkn.tag
        #print("before: ", tkn.tag, tkn.text) #debug
        if tag == INT_CONST: #integerConstant
            int = self.get().text
            self.VM.writePush("constant", int)
        elif tag == STRING_CONST: #stringConstant
            string = self.get().text
            self.VM.writePush("constant", len(string))
            self.VM.writeCall("String.new", 1)
            for char in string:
                self.VM.writePush("constant", ord(char))
                self.VM.writeCall("String.appendChar", 2)
        elif self.check_texts(KEYWORD, keywordConstants): #keywordConstant
            keyword = self.get().text
            if keyword == "false" or keyword == "null":
                self.VM.writePush("constant", 0)
            elif keyword == "true":
                self.VM.writePush("constant", 1)
                self.VM.writeArithmetic("neg")
            elif keyword == "this":
                self.VM.writePush("pointer", 0)
        elif self.check_texts(SYMBOL, unaryOperators): #unaryOp
            un_op = self.get().text
            un_op_vm = unaryOperators[un_op]
            self.compileTerm()
            self.VM.writeArithmetic(un_op_vm)
        elif self.check_texts(SYMBOL, "(", True): # "(" expression ")"
            self.compileExpression()
            self.check_next(SYMBOL, ")")
        elif self.check_texts(IDENTIFIER):
            name = self.get().text
            if self.check_texts(SYMBOL, "[", True):
                #ARRAY: varname "[" expression "]"
                type, kind, index = self.symbols.get(name)
                self.compileExpression()
                self.VM.writePush(kind, index)
                self.VM.writeArithmetic("add")  #ram[index] to be accessed
                self.VM.writePop(POINTER, 1)
                self.VM.writePush(THAT, 0)
                self.check_next(SYMBOL, "]")
            elif self.check_texts(SYMBOL, "(", True):
                #FUNCTION/CONSTRUCTOR call: subroutineName "(" expressionList ")"
                nArgs = self.compileExpressionList()
                function_name = "{}.{}".format(self.className, name)
                self.VM.writePush(POINTER, 0)
                self.VM.writeCall(function_name, nArgs + 1)
                self.check_next(SYMBOL, ")")
            elif self.check_texts(SYMBOL, ".", True):
                #METHOD CALL: (className|varName) "." subroutineName"(" expressionList ")"
                nArgs = 0
                sub_name = self.get().text #subroutineName
                type, kind, index = self.symbols.get(name)
                if type: #a variable
                    self.VM.writePush(kind, index)
                    function_name = "{}.{}".format(type, sub_name)
                    nArgs += 1
                else: #a separate class
                    function_name = "{}.{}".format(name, sub_name)
                self.check_next(SYMBOL, "(")
                nArgs += self.compileExpressionList()
                self.check_next(SYMBOL, ")")
                self.VM.writeCall(function_name, nArgs)#call the method
            else: #variable
                type, kind, index = self.symbols.get(name)
                self.VM.writePush(kind, index)

        else:
            #print("after: ", tkn.tag, tkn.text)
            self.fault()

    def compileExpressionList(self):
        """Grammar:
        (expression ("," expression)* )?

        Returns number of expressions
        """
        count = 0
        if not self.check_texts(SYMBOL, ")"):
            self.compileExpression()
            count +=1
        while self.check_texts(SYMBOL, ",", True):
            self.compileExpression()
            count +=1
        return count

    def check_texts(self, tag, texts=None, increment=False):
        """ONLY INCREMENTS IF TRUE"""
        tkn = self.get(False)
        if tkn is not None:
            text = tkn.text
            if tkn.tag == tag: #texts could be array of strings or string
                if (not texts) or \
                    (type(texts) is str and text == texts) or \
                    ((type(texts) is list or (type(texts) is dict)) \
                    and text in texts):
                    if increment:
                        self.num +=1
                    return True
        #print("looking for: {} of type {}".format(texts, tag))
        #print("found {} of type {}\n".format(tkn.text, tkn.tag))
        return False

    def check_next(self, tag, texts=None, increment=True):
        """get next token and checks that it has correct text
        and tag.
        Set increment =false when you would like to check next
        value w/o updating current tkn. This is useful when
        you aren't sure what next routine is"""

        tkn = self.get(increment)
        text = tkn.text
        #print(tag, tkn.tag, tkn.text)
        if tkn.tag == tag: #texts could be array of strings or string
            if (not texts) or \
                (type(texts) is str and text == texts) or \
                (type(texts) is list and text in texts):
                return tkn
        else:
            print("Invalid program (or end of program)")
            print("problem with:", self.num, tkn.tag, tkn.text)
            self.quit()


    def get(self, increment = True):
        """returns next token"""
        if self.num < self.total:
            tkn = self.tokens[self.num]
            self.tkn = tkn
            #debug:
            #print(self.num, tkn.tag, tkn.text)
            if increment:
                self.num +=1
            return tkn
        else:
            return None

    def get_mult(self, n):
        tkns = []
        for i in range(n):
            tkns.append(self.get())
        return tkns

    def fault(self):
        """called if incorrect program provided
        make this error message more expressive"""
        print("Invalid program. Quitting...")
        #close any open files
        sys.exit(1)

    def quit(self):
        print("quiting...")
        sys.exit(1)

    def get_label(self, key):
        """Accesses value from dictionary.
        Creates entry if none exists"""
        try:
            val = self.labels[key]
            self.labels[key] = val + 1
        except KeyError:
            #create entry
            val = 0
            self.labels[key] = 1
        return val
コード例 #17
0
class CompilationEngine:
    def __init__(self, filename: str, token_list: list):
        self.token_index = 0
        self.identation_level = 0
        self.tokens = token_list
        self.output_file_name = filename
        self.symbol_table = SymbolTable()
        self.VMWriter = VMWriter(filename.replace(".jack", ".vm"))
        self.is_identifier_used = False
        self.current_if_labels = 0
        self.current_while_labels = 0
        pass

    def GetNextToken(self, offset=0):
        return self.tokens[self.token_index + offset]

    def AdvanceToNextToken(self):
        self.token_index += 1

    def Compile(self):
        self.CompileClass()

    def CompileClass(self):

        class_var_types = ['static', 'field']

        subroutine_types = ['constructor', 'function', 'method']

        self.AdvanceToNextToken()  # class
        class_name = self.GetNextToken()[0]
        self.symbol_table.class_name = class_name
        self.AdvanceToNextToken()  # className
        self.AdvanceToNextToken()  # {

        while (self.GetNextToken()[0] in class_var_types):
            self.CompileClassVarDec()
        while (self.GetNextToken()[0] in subroutine_types):
            self.CompileSubroutine()

        self.AdvanceToNextToken()  # }

    def CompileClassVarDec(self):

        self.is_identifier_used = False

        token_kind = self.GetNextToken(0)[0]
        token_type = self.GetNextToken(1)[0]
        token_name = self.GetNextToken(2)[0]
        self.symbol_table.define(token_name, token_type, token_kind)

        self.AdvanceToNextToken()  # static | field
        self.AdvanceToNextToken()  # type
        self.AdvanceToNextToken()  # identifier

        if (self.GetNextToken()[0] == ','):
            while (self.GetNextToken()[0] != ';'):
                self.AdvanceToNextToken()
                token_name = self.GetNextToken(0)[0]
                self.symbol_table.define(token_name, token_type, token_kind)
                self.AdvanceToNextToken()  # identifier

        self.AdvanceToNextToken()  # ;

        self.is_identifier_used = True

        return

    def CompileSubroutine(self):

        self.symbol_table.startSubroutine()
        subroutine_type = self.GetNextToken()[0]

        if (subroutine_type == "method"):
            self.CompileMethod()

        elif (subroutine_type == "constructor"):
            self.CompileConstructor()

        elif (subroutine_type == "function"):
            self.CompileFunction()

        return

    def CompileMethod(self):
        class_name = self.symbol_table.class_name
        self.symbol_table.define("this", class_name, "argument")

        self.AdvanceToNextToken()  # Subroutine type
        self.AdvanceToNextToken()  # return type

        function_name = self.GetNextToken()[0]

        self.AdvanceToNextToken()  # subroutine name
        self.AdvanceToNextToken()  # (

        self.CompileParameterList()
        self.AdvanceToNextToken()  # )
        self.VMWriter.writeFunction(f"{class_name}.{function_name}")
        self.CompileMethodBody()

    def CompileConstructor(self):
        class_name = self.symbol_table.class_name

        self.AdvanceToNextToken()  # Subroutine type
        self.AdvanceToNextToken()  # return type

        function_name = self.GetNextToken()[0]

        self.AdvanceToNextToken()  # subroutine name
        self.AdvanceToNextToken()  # (

        self.CompileParameterList()
        self.AdvanceToNextToken()  # )
        self.VMWriter.writeFunction(f"{class_name}.{function_name}")
        self.CompileConstructorBody()

    def CompileFunction(self):
        class_name = self.symbol_table.class_name

        self.AdvanceToNextToken()  # Subroutine type
        self.AdvanceToNextToken()  # return type

        function_name = self.GetNextToken()[0]

        self.AdvanceToNextToken()  # subroutine name
        self.AdvanceToNextToken()  # (

        self.CompileParameterList()
        self.AdvanceToNextToken()  # )
        self.VMWriter.writeFunction(f"{class_name}.{function_name}")
        self.CompileFunctionBody()

    def CompileParameterList(self):

        if (self.GetNextToken()[0] == ')'):
            return

        self.is_identifier_used = False

        token_kind = "argument"
        token_type = self.GetNextToken(0)[0]
        token_name = self.GetNextToken(1)[0]
        self.symbol_table.define(token_name, token_type, token_kind)

        self.AdvanceToNextToken()  # type
        self.AdvanceToNextToken()  # varName

        while (self.GetNextToken()[0] != ')'):
            self.AdvanceToNextToken()  # ,
            token_type = self.GetNextToken(0)[0]
            token_name = self.GetNextToken(1)[0]
            self.symbol_table.define(token_name, token_type, token_kind)

            self.AdvanceToNextToken()  # token type
            self.AdvanceToNextToken()  # token name

        self.is_identifier_used = True

        return

    def CompileFunctionBody(self):

        self.AdvanceToNextToken()  # {
        n_locals = 0
        while (self.GetNextToken()[0] == "var"):
            n_locals += self.CompileVarDec()

        self.VMWriter.writeFunctionLocals(n_locals)
        self.CompileStatements()

        self.AdvanceToNextToken()  # }

        return

    def CompileMethodBody(self):

        self.AdvanceToNextToken()  # {
        n_locals = 0
        while (self.GetNextToken()[0] == "var"):
            n_locals += self.CompileVarDec()

        self.VMWriter.writeFunctionLocals(n_locals)
        self.VMWriter.writePush("argument", 0)
        self.VMWriter.writePop("pointer", 0)
        self.CompileStatements()

        self.AdvanceToNextToken()  # }

        return

    def CompileConstructorBody(self):

        self.AdvanceToNextToken()  # {
        n_locals = 0
        while (self.GetNextToken()[0] == "var"):
            n_locals += self.CompileVarDec()

        self.VMWriter.writeFunctionLocals(n_locals)
        number_of_fields = self.symbol_table.varCount("field")
        self.VMWriter.writePush("constant", number_of_fields)
        self.VMWriter.writeCall("Memory.alloc", 1)
        self.VMWriter.writePop("pointer", 0)

        self.CompileStatements()

        self.AdvanceToNextToken()  # }

        return

    def CompileVarDec(self):

        self.is_identifier_used = False
        token_kind = "local"
        token_type = self.GetNextToken(1)[0]

        self.AdvanceToNextToken()  # var
        self.AdvanceToNextToken()  # type
        inline_locals = 0

        # same line var declarations
        while True:
            token_name = self.GetNextToken(0)[0]
            self.symbol_table.define(token_name, token_type, token_kind)
            self.AdvanceToNextToken()  # varName
            inline_locals += 1
            if (self.GetNextToken()[0] != ','):
                break
            self.AdvanceToNextToken()  # ,
        self.AdvanceToNextToken()  #;

        self.is_identifier_used = True
        return inline_locals

    def CompileStatements(self):
        statement_types = ["let", "if", "while", "do", "return"]

        while (self.GetNextToken()[0] in statement_types):
            if (self.GetNextToken()[0] == "let"):
                self.CompileLet()
            elif (self.GetNextToken()[0] == "if"):
                self.CompileIf()
            elif (self.GetNextToken()[0] == "while"):
                self.CompileWhile()
            elif (self.GetNextToken()[0] == "do"):
                self.CompileDo()
            elif (self.GetNextToken()[0] == "return"):
                self.CompileReturn()

        return

    def CompileDo(self):
        self.AdvanceToNextToken()  #do
        self.CompileSubroutineCall()
        self.AdvanceToNextToken()  #;
        self.VMWriter.writePop("temp", 0)

        return

    def CompileLet(self):
        self.AdvanceToNextToken()  # let

        if (self.IsVarArray()):
            self.CompileArrayAccess()

        else:
            var_name = self.GetNextToken()[0]
            var_index = self.symbol_table.indexOf(var_name)
            var_segment = self.symbol_table.kindOf(var_name)
            if (var_segment == "field"):
                var_segment = "this"

            self.AdvanceToNextToken()  # varName

            self.AdvanceToNextToken()  # =
            self.CompileExpression()
            self.AdvanceToNextToken()  # ;
            self.VMWriter.writePop(var_segment, var_index)

        return

    def CompileWhile(self):
        L1, L2 = self.GetCurrentWhileLabels()

        self.VMWriter.writeLabel(L1)

        self.AdvanceToNextToken()  # while
        self.AdvanceToNextToken()  # (
        self.CompileExpression()
        self.AdvanceToNextToken()  # )
        self.VMWriter.writeArithmetic("not")
        self.VMWriter.writeIf(L2)
        self.AdvanceToNextToken()  # {
        self.CompileStatements()
        self.AdvanceToNextToken()  # }
        self.VMWriter.writeGoto(L1)
        self.VMWriter.writeLabel(L2)

        return

    def GetCurrentWhileLabels(self):
        L1 = f"WHILE_EXP{self.current_while_labels}"
        L2 = f"WHILE_END{self.current_while_labels}"
        self.current_while_labels += 1
        return (L1, L2)

    def CompileReturn(self):
        is_void = True
        self.AdvanceToNextToken()  # return
        if (self.GetNextToken()[0] != ';'):
            is_void = False
            self.CompileExpression()

        self.AdvanceToNextToken()  #;
        if (is_void):
            self.VMWriter.writePush("constant", 0)

        self.VMWriter.writeReturn()

        return

    def CompileIf(self):

        L1, L2 = self.GetCurrentIfLabels()

        self.AdvanceToNextToken()  # if
        self.AdvanceToNextToken()  # (
        self.CompileExpression()
        self.AdvanceToNextToken()  # )
        self.VMWriter.writeArithmetic("not")
        self.VMWriter.writeIf(L1)
        self.AdvanceToNextToken()  # {
        self.CompileStatements()
        self.AdvanceToNextToken()  # }
        self.VMWriter.writeGoto(L2)
        self.VMWriter.writeLabel(L1)
        if (self.GetNextToken()[0] == "else"):
            self.AdvanceToNextToken()  # else
            self.AdvanceToNextToken()  # {
            self.CompileStatements()
            self.AdvanceToNextToken()  # }
        self.VMWriter.writeLabel(L2)

        return

    def GetCurrentIfLabels(self):
        L1 = f"IF_TRUE{self.current_if_labels}"
        L2 = f"IF_FALSE{self.current_if_labels}"
        self.current_if_labels += 1
        return (L1, L2)

    def CompileExpression(self):
        operator_to_command = {
            '+': "add",
            '-': "sub",
            '*': "mult",
            '/': "div",
            "&amp;": "and",
            '|': "or",
            "&lt;": "lt",
            "&gt;": "gt",
            '=': "eq"
        }

        self.CompileTerm()
        while (self.GetNextToken()[0] in operator_to_command.keys()):
            operator = self.GetNextToken()[0]
            self.AdvanceToNextToken()  # op
            self.CompileTerm()

            if (operator == '*'):
                self.VMWriter.writeCall("Math.multiply", 2)
            elif (operator == '/'):
                self.VMWriter.writeCall("Math.divide", 2)
            else:
                self.VMWriter.writeArithmetic(operator_to_command[operator])

        return

    def CompileTerm(self):
        u_operators_to_command = {'-': "neg", '~': "not"}

        if (self.IsUnaryOperator()):
            u_operator = self.GetNextToken()[0]
            self.AdvanceToNextToken()  # unary op
            self.CompileTerm()
            self.VMWriter.writeArithmetic(u_operators_to_command[u_operator])

        elif (self.IsExpressionParentheses()):
            self.AdvanceToNextToken()  # (
            self.CompileExpression()
            self.AdvanceToNextToken()  # )

        elif (self.IsSubroutineCall()):
            self.CompileSubroutineCall()

        elif (self.IsVarArray()):
            self.CompileArrayAccess()

        elif (self.IsIntegerConstant()):
            self.CompileIntegerConstant()

        elif (self.IsStringConstant()):
            self.CompileStringConstant()

        elif (self.IsKeywordConstant()):
            self.CompileKeywordConstant()

        else:
            self.CompileVarName()

        return

    def IsIntegerConstant(self):
        return self.GetNextToken()[0].isdigit()

    def IsStringConstant(self):
        token_type = self.GetNextToken()[1]
        return (token_type == "stringConstant")

    def IsKeywordConstant(self):
        keyword_constants = ("true", "false", "null", "this")

        return (self.GetNextToken()[0] in keyword_constants)

    def IsUnaryOperator(self):
        u_operators = {'-', '~'}
        return (self.GetNextToken()[0] in u_operators)

    def IsExpressionParentheses(self):
        return (self.GetNextToken()[0] == '(')

    def IsSubroutineCall(self):
        # subroutineName(expresionList)
        subroutine_cond1 = self.GetNextToken(1)[0] == '('
        # className|varName.subroutineName(expressionList)
        subroutine_cond2 = self.GetNextToken(
            1)[0] == '.' and self.GetNextToken(3)[0] == '('
        return subroutine_cond1 or subroutine_cond2

    def IsVarArray(self):
        return self.GetNextToken(1)[0] == '['

    def CompileIntegerConstant(self):
        self.VMWriter.writePush("constant", self.GetNextToken()[0])
        self.AdvanceToNextToken()

    def CompileStringConstant(self):
        string_constant = self.GetNextToken()[0]
        self.AdvanceToNextToken()
        self.VMWriter.writePush("constant", len(string_constant))
        self.VMWriter.writeCall("String.new", 1)
        for c in string_constant:
            self.VMWriter.writePush("constant", ord(c))
            self.VMWriter.writeCall("String.appendChar", 2)

    def CompileKeywordConstant(self):
        if (self.GetNextToken()[0] == "false"):
            self.VMWriter.writePush("constant", 0)
        elif (self.GetNextToken()[0] == "null"):
            self.VMWriter.writePush("constant", 0)
        elif (self.GetNextToken()[0] == "true"):
            self.VMWriter.writePush("constant", 1)
            self.VMWriter.writeArithmetic("neg")
        elif (self.GetNextToken()[0] == "this"):
            self.VMWriter.writePush("pointer", 0)
        self.AdvanceToNextToken()

    def CompileVarName(self):
        segment = self.symbol_table.kindOf(self.GetNextToken()[0])
        index = self.symbol_table.indexOf(self.GetNextToken()[0])
        if (segment == "field"):
            segment = "this"
        self.VMWriter.writePush(segment, index)
        self.AdvanceToNextToken()

    def CompileExpressionList(self):
        n_args = 0

        while (self.GetNextToken()[0] != ')'):
            self.CompileExpression()
            n_args += 1
            while (self.GetNextToken()[0] == ','):
                self.AdvanceToNextToken()  # ,
                self.CompileExpression()
                n_args += 1

        return n_args

    def CompileSubroutineCall(self):
        function_name = self.GetNextToken()[0]
        n_args = 0
        var_name = self.GetNextToken()[0]

        if (self.symbol_table.contains(var_name)):
            self.CompileVarName()
            n_args += 1
            function_name = self.symbol_table.typeOf(var_name)

        else:
            self.AdvanceToNextToken()  # subroutineName | className | varName

        if (self.GetNextToken()[0] == '('):
            self.AdvanceToNextToken()  # (
            # Method
            self.VMWriter.writePush("pointer", 0)
            n_args += 1
            function_name = self.symbol_table.class_name + '.' + function_name
            n_args += self.CompileExpressionList()
            self.AdvanceToNextToken()  # )
        elif (self.GetNextToken()[0] == '.'):
            self.AdvanceToNextToken()  # .
            function_name += "." + self.GetNextToken()[0]
            self.AdvanceToNextToken()  # SubroutineName
            self.AdvanceToNextToken()  # (
            n_args += self.CompileExpressionList()
            self.AdvanceToNextToken()  # )

        self.VMWriter.writeCall(function_name, n_args)

    def CompileArrayAccess(self):
        # push arr base address
        self.CompileVarName()  # varName
        self.AdvanceToNextToken()  # [
        self.CompileExpression()
        self.AdvanceToNextToken()  # ]
        self.VMWriter.writeArithmetic("add")

        if (self.GetNextToken()[0] == '='):
            self.AdvanceToNextToken()
            self.CompileExpression()
            self.AdvanceToNextToken()  # ;
            self.VMWriter.writePop("temp", 0)
            self.VMWriter.writePop("pointer", 1)
            self.VMWriter.writePush("temp", 0)
            self.VMWriter.writePop("that", 0)

        else:
            self.VMWriter.writePop("pointer", 1)
            self.VMWriter.writePush("that", 0)
コード例 #18
0
class CompilationEngine:

    keywordConsts = ["null", "true", "false", "this"] 
    def __init__(self, tokenizer, outputFile, vmFile):
        from SymbolTable import SymbolTable
        from VMWriter import VMWriter
        self.tokenizer = tokenizer
        self.outputFile = outputFile
        self.symbolTable = SymbolTable()
        self.vmWriter = VMWriter(vmFile)
        self.labelNum = 0
        print(outputFile)
    
    def compileClass(self):
        from JackTokenizer import JackTokenizer
        self.indentLevel = 0
        NUM_OPENING_STATEMENTS = 3
        classVarOpenings = ['static', 'field']
        subOpenings = ['constructor', 'function', 'method']

        if self.tokenizer.currentToken != "class":
            raise Exception("Keyword 'class' expected")
        self.writeFormatted("<class>")
        self.indentLevel += 1
        self.printToken() #Should print 'class'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print class name
            self.className = self.tokenizer.identifier()
            self.writeClassOrSubInfo("class", False)

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print '{'
        
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        
        self.fieldCount = 0
        while self.tokenizer.hasMoreTokens() and self.tokenizer.keyWord() in classVarOpenings:
            if self.tokenizer.keyWord() == "field":
                self.fieldCount += 1
            self.compileClassVarDec()
        while(self.tokenizer.hasMoreTokens() and self.tokenizer.tokenType == JackTokenizer.KEYWORD 
                and self.tokenizer.keyWord() in subOpenings):
            self.compileSubroutine()
        self.printToken()
        self.writeFormatted("</class>")
        self.indentLevel -= 1
    
    def compileClassVarDec(self):
        from JackTokenizer import JackTokenizer
        from SymbolTable import SymbolTable 
        self.writeFormatted("<classVarDec>")
        self.indentLevel += 1
        self.printToken() #Should print static or field
        if self.tokenizer.tokenType == JackTokenizer.KEYWORD:
            if self.tokenizer.keyWord() == "static":
                kind = SymbolTable.STATIC
            elif self.tokenizer.keyWord() == "field":
                kind = SymbolTable.FIELD
            else:
                raise Exception("Invalid kind of class variable " + self.tokenizer.keyWord())
        else:
            raise Exception("Keyword expected")
        
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print the variable type
            identifierType = self.tokenizer.currentToken
            isKeyword = self.tokenizer.tokenType == JackTokenizer.KEYWORD

        if not isKeyword:
            self.writeClassOrSubInfo("class", True)

        varNames = []
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print variable name
            varNames.append(self.tokenizer.currentToken)
            if self.tokenizer.hasMoreTokens(): 
                self.tokenizer.advance()

        while self.tokenizer.symbol() != ";" and self.tokenizer.hasMoreTokens():
            if self.tokenizer.symbol() != ",":
                raise Exception("Invalid variable list")
            self.printToken() #Should print ','
            self.tokenizer.advance()
            self.printToken() #Should print variable name
            varNames.append(self.tokenizer.currentToken)
            if kind == SymbolTable.FIELD:
                self.fieldCount += 1
            if not self.tokenizer.hasMoreTokens():
                raise Exception("More tokens expected")
            self.tokenizer.advance()
        self.printToken()
    

        for name in varNames:
            self.symbolTable.define(name, identifierType, kind)
            self.writeVarInfo(name, False)


        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</classVarDec>")

    def compileSubroutine(self):
        from JackTokenizer import JackTokenizer
        from SymbolTable import SymbolTable
        self.writeFormatted("<subroutineDec>")
        self.symbolTable.startSubroutine()
        self.indentLevel += 1
        NUM_OPENING_STATEMENTS = 4
        
        self.printToken() #Should print 'constructor', 'function', or 'method'
        self.isConstructor = False
        self.isMethod = False
        if self.tokenizer.keyWord() == "constructor":
            self.isConstructor = True
        elif self.tokenizer.keyWord() == "method":
            self.isMethod = True
            self.symbolTable.define("this", self.className, SymbolTable.ARG)

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance() 
            self.printToken() #Should print the type or 'void'

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance() 
            self.printToken() #Should print the subroutine name
            self.subName = self.tokenizer.identifier()

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance() 
            self.printToken() #Should print opening '(' before parameter list

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance() 
        self.compileParameterList()
        self.printToken() #Should print closing ")" after parameter list
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        self.numLocalVariables = 0
        self.compileSubroutineBody()
        self.indentLevel -= 1
        self.writeFormatted("</subroutineDec>")
    
    def compileSubroutineBody(self):
        from JackTokenizer import JackTokenizer
        from SymbolTable import SymbolTable
        from VMWriter import VMWriter
        self.writeFormatted("<subroutineBody>")
        self.indentLevel += 1
        self.printToken() #Should print "{"
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        
        while(self.tokenizer.hasMoreTokens() and self.tokenizer.tokenType == JackTokenizer.KEYWORD
                and self.tokenizer.keyWord() == "var"):
            self.compileVarDec()
        
        self.vmWriter.writeFunction(self.className + "." + self.subName, self.numLocalVariables) 
        if self.isConstructor:
            self.vmWriter.writePush("constant", self.fieldCount)
            self.vmWriter.writeCall("Memory.alloc", 1) #allocate space for this object
            self.vmWriter.writePop("pointer", 0) #assign object to 'this'
        elif self.isMethod:
            self.vmWriter.writePush(SymbolTable.ARG, 0)
            self.vmWriter.writePop("pointer", VMWriter.THIS_POINTER)
            
        self.compileStatements()
        self.printToken() #Should print closing "}"
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</subroutineBody>")

    def compileParameterList(self):
        from JackTokenizer import JackTokenizer
        from SymbolTable import SymbolTable
        self.writeFormatted("<parameterList>")
        self.indentLevel += 1

        if self.tokenizer.currentToken != ")":
            self.printToken() #Should print the type
            argType = self.tokenizer.currentToken
            self.tokenizer.advance()
            self.printToken() #Should print the name
            argName = self.tokenizer.currentToken
            self.symbolTable.define(argName, argType, SymbolTable.ARG)
            self.writeVarInfo(argName, False)
            self.tokenizer.advance()


        while self.tokenizer.tokenType != JackTokenizer.SYMBOL or self.tokenizer.symbol() != ")":
            self.printToken() #Should print a comma
            if self.tokenizer.currentToken != ",":
                raise Exception("Comma expected")
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.printToken() #Should print the argument type
                argType = self.tokenizer.currentToken
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.printToken() #Should print the argument name
                argName = self.tokenizer.currentToken
                self.symbolTable.define(argName, argType, SymbolTable.ARG)
                self.writeVarInfo(argName, False)
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
            
        self.indentLevel -= 1
        self.writeFormatted("</parameterList>")

    def compileVarDec(self):
        from JackTokenizer import JackTokenizer
        from SymbolTable import SymbolTable
        self.numLocalVariables += 1
        self.writeFormatted("<varDec>")
        self.indentLevel += 1
        
        varNames = []
        self.printToken() #Should print 'var'
        if self.tokenizer.currentToken != "var":
            raise Exception("'var' keyword expected")
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print the type
            varType = self.tokenizer.currentToken
            isKeyword = self.tokenizer.tokenType == JackTokenizer.KEYWORD

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance() 
            self.printToken() #Should print the var name
            varNames.append(self.tokenizer.currentToken) 
        
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        while(self.tokenizer.hasMoreTokens() and 
                (self.tokenizer.tokenType != JackTokenizer.SYMBOL or self.tokenizer.symbol() != ";")):
            self.printToken() #Should print ','
            self.tokenizer.advance()
            self.printToken() #Should print the var name
            varNames.append(self.tokenizer.currentToken)
            self.tokenizer.advance()
            self.numLocalVariables += 1
        
        #If the type is not a keyword (e.g. int) that means it's a class and we should print identifier info
        if not isKeyword:
            self.writeClassOrSubInfo("class", "True")

        for name in varNames:
            self.symbolTable.define(name, varType, SymbolTable.VAR)
            self.writeVarInfo(name, False)

        self.printToken() #Should print ';'
        self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</varDec>")

    def compileStatements(self):
        from JackTokenizer import JackTokenizer
        self.writeFormatted("<statements>")
        self.indentLevel += 1
        stmtStarts = ['do', 'while', 'let', 'if', 'return']
        while(self.tokenizer.hasMoreTokens() and self.tokenizer.tokenType == JackTokenizer.KEYWORD 
              and self.tokenizer.keyWord() in stmtStarts):
            if self.tokenizer.keyWord() == "do":
                self.compileDo()
            elif self.tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self.tokenizer.keyWord() == "let":
                self.compileLet()
            elif self.tokenizer.keyWord() == "if":
                self.compileIf()
            elif self.tokenizer.keyWord() == "return":
                self.compileReturn()
        self.indentLevel -= 1
        self.writeFormatted("</statements>")

    def compileDo(self):
        from JackTokenizer import JackTokenizer
        self.writeFormatted("<doStatement>")
        self.indentLevel += 1
        if self.tokenizer.keyWord() != "do":
            raise Exception("'do' keyword expected")
        self.printToken()

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.compileSubroutineCall()
            self.vmWriter.writePop("temp", 0) #This pops and ignores the returned value 

        self.printToken() #Print ';'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</doStatement>")

    def compileLet(self):
        from SymbolTable import SymbolTable
        from VMWriter import VMWriter
        self.writeFormatted("<letStatement>")
        self.indentLevel += 1
        isArray = False
        if self.tokenizer.keyWord() != "let":
            raise Exception("Let keyword expected")
        self.printToken() #Should print "let"
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print varname
            varName = self.tokenizer.identifier()
            self.writeVarInfo(self.tokenizer.identifier(), True)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print '[' or '='
            print("compileLet - [ or = " + self.tokenizer.currentToken)
        if self.tokenizer.currentToken == "[":
            isArray = True
            self.tokenizer.advance()

             
            if self.symbolTable.isDefined(varName):
                varKind = self.symbolTable.kindOf(varName)
                self.vmWriter.writePush(varKind, self.symbolTable.indexOf(varName))

                self.compileExpression()
                self.printToken() #Should print ']'
                self.vmWriter.writeArithmetic("add")
                segment = "that"
                index = 0

            else:
                raise Exception("Symbol " + varName + " is not defined")
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.printToken() #Should print '='
        else:
            #If it goes down this path this is just a regular variable not an array
            varKind = self.symbolTable.kindOf(varName)
            segment = varKind
            index = self.symbolTable.indexOf(varName)

        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()


        print("compileLet - after equals " + self.tokenizer.currentToken)
        self.compileExpression()
        if isArray:
            self.vmWriter.writePop("temp", 0)
            self.vmWriter.writePop("pointer", VMWriter.THAT_POINTER)
            self.vmWriter.writePush("temp", 0)

        self.vmWriter.writePop(segment, index)
        self.printToken() #print ";"
        
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</letStatement>")

    def compileWhile(self):
        from JackTokenizer import JackTokenizer
        self.writeFormatted("<whileStatement>")
        self.indentLevel += 1

        self.labelNum += 1
        firstLabel = "W" + str(self.labelNum)
        self.labelNum += 1
        secondLabel = "W" + str(self.labelNum)

        if not(self.tokenizer.tokenType == JackTokenizer.KEYWORD and self.tokenizer.keyWord() == "while"):
            raise Exception("'while' keyword was expected")
        self.printToken() #print 'while'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #print '('
            self.vmWriter.writeLabel(firstLabel)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.compileExpression()
            self.vmWriter.writeArithmetic("not")
            self.vmWriter.writeIf(secondLabel)
            self.printToken() #print ')'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #print '{'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.compileStatements()
            self.vmWriter.writeGoto(firstLabel)
            self.printToken() #print '}'
            self.vmWriter.writeLabel(secondLabel)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</whileStatement>")

    def compileReturn(self):
        from JackTokenizer import JackTokenizer
        self.writeFormatted("<returnStatement>")
        self.indentLevel += 1
        if self.tokenizer.keyWord() != "return":
            raise Exception("'return' keyword was expected")
        self.printToken() #print 'return' keyword
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        if not(self.tokenizer.tokenType == JackTokenizer.SYMBOL and self.tokenizer.symbol() == ";"):
            self.compileExpression()
        else:
            #When the function's return type is void it should always return 0
            self.vmWriter.writePush("constant", 0)
        self.printToken() #print ";"
        self.vmWriter.writeReturn()
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</returnStatement>")

    def compileIf(self):
        from JackTokenizer import JackTokenizer
        self.writeFormatted("<ifStatement>")
        self.indentLevel += 1
        trueLabel = "IF-TRUE" + str(self.labelNum)
        falseLabel = "IF-FALSE" + str(self.labelNum)
        endLabel = "END-IF" + str(self.labelNum)
        self.labelNum += 1
        if self.tokenizer.keyWord() != "if":
            raise Exception("'if' keyword was expected")
        self.printToken() #print 'if'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #print '('
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.compileExpression()
            self.printToken() #print ')'
            self.vmWriter.writeIf(trueLabel)
            self.vmWriter.writeGoto(falseLabel)
            self.vmWriter.writeLabel(trueLabel)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #print '{'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.compileStatements()
            self.printToken() #print '}'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        if self.tokenizer.tokenType == JackTokenizer.KEYWORD and self.tokenizer.keyWord() == "else":
            self.vmWriter.writeGoto(endLabel)
            self.vmWriter.writeLabel(falseLabel)
            self.printToken() #print 'else'
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.printToken() #print '{'
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.compileStatements()
            self.printToken() #print '}'
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
            self.vmWriter.writeLabel(endLabel)
        else:
            #In this case the if statement doesn't have an else so we 
            #don't need the end label
            self.vmWriter.writeLabel(falseLabel)
        self.indentLevel -= 1
        self.writeFormatted("</ifStatement>")

    def compileExpression(self):
        from JackTokenizer import JackTokenizer

        #There are 2 symbol arrays which each correspond to a different array with 
        #the commands/functions to call for the given operator in the same index
        functionSymbols = [ '*', '/']
        functionNames = ["Math.multiply", "Math.divide"]
        builtInCommands = ["add", "sub", "and", "or", "lt", "gt", "eq"]
        builtInSymbols = ['+', '-', '&amp;', '|', '&lt;', '&gt;', '=']
        self.writeFormatted("<expression>")
        self.indentLevel += 1
        print("About to call compile term current token is " + self.tokenizer.currentToken)
        self.compileTerm()
        while(self.tokenizer.tokenType == JackTokenizer.SYMBOL and 
                (self.tokenizer.symbol() in builtInSymbols or self.tokenizer.symbol() in functionSymbols)):
            self.printToken()
            operator = self.tokenizer.symbol() 
            print("Current operator " + self.tokenizer.currentToken)
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.compileTerm()

            if operator in builtInSymbols:
                self.vmWriter.writeArithmetic(builtInCommands[builtInSymbols.index(operator)])
            else:
                #Both multiply and divide take two arguments
                self.vmWriter.writeCall(functionNames[functionSymbols.index(operator)], 2)

            
        self.indentLevel -= 1
        self.writeFormatted("</expression>")

    def compileTerm(self):
        from JackTokenizer import JackTokenizer
        from VMWriter import VMWriter
        print("Opening token is " + self.tokenizer.currentToken)
        unaryOps = ['-', '~']
        unaryCommands = ["neg", "not"]
        self.writeFormatted("<term>")
        self.indentLevel += 1
        self.printToken()
        if self.tokenizer.tokenType == JackTokenizer.IDENTIFIER:
            name = self.tokenizer.identifier()
            self.tokenizer.advance()
            print("second token in IDENTIFIER " + self.tokenizer.currentToken)
            if self.tokenizer.tokenType == JackTokenizer.SYMBOL:
                if self.tokenizer.symbol() == ".":
                    if self.symbolTable.isDefined(name):
                        self.writeVarInfo(name, True)
                    else:
                        self.writeClassOrSubInfo("class", True)

                    self.printToken() #Should print '.'
                    self.tokenizer.advance()
                    self.printToken() #Should print subroutine name
                    subName = self.tokenizer.identifier()

                    self.tokenizer.advance() 
                    self.printToken() #Should print '('

                    #If the subroutine is a method call we must first push the object before 
                    #pushing the rest of the arguments
                    if self.symbolTable.isDefined(name):
                        self.vmWriter.writePush(self.symbolTable.kindOf(name), self.symbolTable.indexOf(name))

                    self.numExpressions = 0
                    self.tokenizer.advance()
                    self.compileExpressionList()
                    self.printToken() #Should print ')'
                    
                    if self.symbolTable.isDefined(name):
                        #Must add 1 to the number of arguments since we pushed the object that the method is operating on
                        self.vmWriter.writeCall(self.symbolTable.typeOf(name) + "." + subName, self.numExpressions + 1)
                    else:
                        self.vmWriter.writeCall(name + "." + subName, self.numExpressions)

                    self.tokenizer.advance()
                elif self.tokenizer.symbol() == "(":
                    self.printToken()
                    self.writeClassOrSubInfo("subroutine", True)
                    if self.tokenizer.hasMoreTokens():
                        self.tokenizer.advance()

                        self.vmWriter.writePush("pointer", VMWriter.THIS_POINTER)
                        self.compileExpressionList()
                        self.numExpressions += 1

                        self.printToken() #Print ')'
                        self.vmWriter.writeCall(self.className +  "." + name, self.numExpressions)
                        if self.tokenizer.hasMoreTokens():
                            self.tokenizer.advance()
                elif self.tokenizer.symbol() == "[":
                    self.writeVarInfo(name, True)
                    self.printToken()

                    self.vmWriter.writePush(self.symbolTable.kindOf(name), self.symbolTable.indexOf(name))
                    if self.tokenizer.hasMoreTokens():
                        self.tokenizer.advance()
                        self.compileExpression()
                        self.printToken() #Should print ']'

                        self.vmWriter.writeArithmetic("add")
                        self.vmWriter.writePop("pointer", VMWriter.THAT_POINTER)
                        self.vmWriter.writePush("that", 0)
                        if self.tokenizer.hasMoreTokens():
                            self.tokenizer.advance()
                else:
                    self.vmWriter.writePush(self.symbolTable.kindOf(name), self.symbolTable.indexOf(name))
                    self.writeVarInfo(name, True)
        elif self.tokenizer.tokenType == JackTokenizer.SYMBOL and self.tokenizer.symbol() == "(":
            self.tokenizer.advance()
            print("second token in (expression)" + self.tokenizer.currentToken)
            self.compileExpression()
            self.printToken() #print ')'
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
        elif self.tokenizer.tokenType == JackTokenizer.SYMBOL and self.tokenizer.symbol() in unaryOps:
            op = self.tokenizer.symbol()
            self.tokenizer.advance()
            print("second token in unary " + self.tokenizer.currentToken)
            self.compileTerm()
            self.vmWriter.writeArithmetic(unaryCommands[unaryOps.index(op)])
        elif self.tokenizer.tokenType == JackTokenizer.INT_CONST:
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
        elif self.tokenizer.currentToken in CompilationEngine.keywordConsts:
            if self.tokenizer.keyWord() == "null" or self.tokenizer.keyWord() == "false":
                self.vmWriter.writePush("constant", 0)
            elif self.tokenizer.keyWord() == "true":
                self.vmWriter.writePush("constant", 1)
                self.vmWriter.writeArithmetic("neg") #Value of true is -1
            elif self.tokenizer.keyWord() == "this":
                self.vmWriter.writePush("pointer", VMWriter.THIS_POINTER) 
            else:
                raise Exception("Invalid keyword constant " + self.tokenizer.keyWord())

            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
        elif self.tokenizer.tokenType == JackTokenizer.STRING_CONST:
            self.vmWriter.writePush("constant", len(self.tokenizer.stringVal()))
            self.vmWriter.writeCall("String.new", 1)
            for char in self.tokenizer.stringVal():
                self.vmWriter.writePush("constant", ord(char))
                self.vmWriter.writeCall("String.appendChar", 2)

            if self.tokenizer.hasMoreTokens():
               self.tokenizer.advance() 
        else:
            raise Exception("Invalid term provided")
        print("The current token is " + self.tokenizer.currentToken)
        self.indentLevel -= 1
        self.writeFormatted("</term>")

    def compileExpressionList(self):
        from JackTokenizer import JackTokenizer
        self.writeFormatted("<expressionList>")
        self.indentLevel += 1
        self.numExpressions = 0 #The number of expressions in this list

        #I sort of feel guilty for doing this since this relies on knowing that
        #the expression list is surrounded by parenthesis and according to the spec
        #it should not know that (it would require modifying this message if I wanted to use an expression list anywhere else).
        #However, also according to the spec I should create a <subroutineCall> XML element or I shouldn't depending
        #on which part of the spec you trust.
        while not(self.tokenizer.tokenType == JackTokenizer.SYMBOL and self.tokenizer.symbol() == ")"):
           self.compileExpression() 
           self.numExpressions += 1
           if self.tokenizer.tokenType == JackTokenizer.SYMBOL and self.tokenizer.symbol() == ",":
               self.printToken() #print ','
               if self.tokenizer.hasMoreTokens():
                   self.tokenizer.advance()
        self.indentLevel -= 1
        self.writeFormatted("</expressionList>")

    def compileSubroutineCall(self):
        from JackTokenizer import JackTokenizer
        from VMWriter import VMWriter
        self.printToken() #Should print either the subroutine name or the class/object the
        #subroutine is a member of
        firstToken = self.tokenizer.currentToken
        secondToken = ""
        isClassOrObj = False
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.printToken() #Should print '.' or '(' 
        if self.tokenizer.tokenType == JackTokenizer.SYMBOL and self.tokenizer.symbol() == ".":
            isClassOrObj = True
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance() 
                self.printToken() #Should print subroutine name
                secondToken = self.tokenizer.currentToken
            if self.tokenizer.hasMoreTokens():
                self.tokenizer.advance()
                self.printToken() #Should print opening '('
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

            #If the subroutine is a method call we must first push the object before 
            #pushing the rest of the arguments
            if secondToken != "" and self.symbolTable.isDefined(firstToken):
                self.vmWriter.writePush(self.symbolTable.kindOf(firstToken), self.symbolTable.indexOf(firstToken))
            
            if secondToken == "":
                self.vmWriter.writePush("pointer", VMWriter.THIS_POINTER)

            self.compileExpressionList()
            

            self.printToken() #Should print ')'
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

         
        if secondToken != "":
            if self.symbolTable.isDefined(firstToken):
                callName = self.symbolTable.typeOf(firstToken) + "." + secondToken
                self.numExpressions += 1
            else:
                callName = firstToken + "." + secondToken
        else:
            self.numExpressions += 1
            callName = self.className + "." + firstToken

        self.vmWriter.writeCall(callName, self.numExpressions) 

        if isClassOrObj and self.symbolTable.isDefined(firstToken):
            self.writeVarInfo(firstToken, True) #Writing information about an object
        elif isClassOrObj:
            self.writeClassOrSubInfo("class", True) #Writing information about a class
        self.writeClassOrSubInfo("subroutine", True)

    def printToken(self):
        from JackTokenizer import JackTokenizer
        if self.tokenizer.tokenType == JackTokenizer.KEYWORD:
           self.writeFormatted("<keyword>" + self.tokenizer.keyWord() + "</keyword>")
        elif self.tokenizer.tokenType == JackTokenizer.SYMBOL:
            self.writeFormatted("<symbol>" + self.tokenizer.symbol() + "</symbol>")
        elif self.tokenizer.tokenType == JackTokenizer.IDENTIFIER:
            self.writeFormatted("<identifier>" + self.tokenizer.identifier() + "</identifier>")
        elif self.tokenizer.tokenType == JackTokenizer.INT_CONST:
            self.writeFormatted("<integerConstant>" + self.tokenizer.intVal() + "</integerConstant>")
        elif self.tokenizer.tokenType == JackTokenizer.STRING_CONST:
            self.writeFormatted("<stringConstant>" + self.tokenizer.stringVal() + "</stringConstant>")

    def writeFormatted(self, string):
        self.outputFile.write("  " * self.indentLevel + string + "\n")
    
    def writeVarInfo(self, varName, inUse):
        from SymbolTable import SymbolTable
        self.writeFormatted("<IdentifierInfo>")
        self.indentLevel += 1
        self.writeFormatted("<type>" + self.symbolTable.typeOf(varName) + "</type>")
        self.writeFormatted("<kind>" + self.symbolTable.stringKindOf(varName) + "</kind>")
        self.writeFormatted("<index>" + str(self.symbolTable.indexOf(varName)) + "</index>")
        self.writeFormatted("<inUse>" + str(inUse) + "</inUse>")
        self.indentLevel -= 1
        self.writeFormatted("</IdentifierInfo>")

    def writeClassOrSubInfo(self, kind, inUse):
       self.writeFormatted("<IdentifierInfo>")
       self.indentLevel += 1
       self.writeFormatted("<kind>" + kind + "</kind>")
       self.writeFormatted("<inUse>" + str(inUse) + "</inUse>")
       self.indentLevel -= 1
       self.writeFormatted("</IdentifierInfo>")
コード例 #19
0
class CompilationEngine(object):
    """This class recursively compiles a .jack file into vm code."""

    def __init__(self, inFile):
        super(CompilationEngine, self).__init__()
        # create an internal tokenizer to iterate through
        self.tokenizer = JackTokenizer(inFile)

        # setup the output file
        self.outputPath = inFile.name.replace(".jack", ".vm")
        self.outputFile = open(self.outputPath, 'w')
        self.outputFile.close()
        self.outputFile = open(self.outputPath, 'a')

        # create a VMWriter with the output file
        self.vmWriter = VMWriter(self.outputFile)

        # create a symbol table
        self.symbolTable = SymbolTable()

        # stuff we need to keep track of for the symbol table
        self.className = ""
        self.currentName = ""
        self.currentKind = ""
        self.currentType = ""
        self.ifCounter = 0
        self.whileCounter = 0

    def start(self):
        """Starts the compilation by creating the token XML file
        and then calling __compileClass()"""

        # start the tokenizer
        self.tokenizer.advance()

        # start the compilation
        self.__compileClass()

    def __checkIdentifier(self):
        """Makes sure that the current token is an identifier and saves that
        identifier as the current name for the symbol table"""
        if self.tokenizer.tokenType() == "IDENTIFIER":
            self.currentName = self.tokenizer.identifier()
            return True
        return False

    def __checkType(self):
        """Checks for a valid type and saves that type for the symbol table"""
        if self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() in ["int", "char", "boolean", "void"]:
            self.currentType = self.tokenizer.keyWord()
            return True
        elif self.tokenizer.tokenType() == "IDENTIFIER":
            self.currentType = self.tokenizer.identifier()
            return True
        else:
            return False

    def __compileType(self):
        """Compiles a complete jack type grammar. Returns false if there is an error"""
        # check for valid keyword
        if self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()
            if k not in ["int", "char", "boolean"]:
                print("Error: type keyword must be int, char, or boolean")
                return False

            # self.__writeFullTag("keyword", k)

            self.tokenizer.advance()
            return True
        # check for className
        else:
            res = self.__compileClassName()
            # if __compileClassName() errors, this is not a valid type
            if not res:
                print("Error: type not a valid className")
            return res

    def __compileClassName(self):
        """Compiles a complete jack className grammar. Returns false if there is
        an error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False

        # self.__writeFullTag("identifier", self.tokenizer.identifier())

        self.tokenizer.advance()
        return True

    def __compileSubroutineName(self):
        """Compiles a complete jack subroutineName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False

        # self.__writeFullTag("identifier", self.tokenizer.identifier())

        self.tokenizer.advance()
        return True

    def __compileVarName(self):
        """Compiles a complete jack varName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False

        # self.__writeFullTag("identifier", self.tokenizer.identifier())

        self.tokenizer.advance()
        return True

    def __compileClass(self):
        """Compiles a complete jack class grammar"""
        # find the class keyword
        if self.tokenizer.tokenType() != "KEYWORD" or \
                self.tokenizer.keyWord() != "class":
            print("Error: no class declaration found")
            sys.exit(1)

        self.tokenizer.advance()

        # find the className
        if not self.__checkIdentifier():
            print("Error: no class name found in class declaration")
            sys.exit(1)
        # save the class name
        self.className = self.tokenizer.identifier()
        self.tokenizer.advance()

        # find the open curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: no opening brace found after class")
            sys.exit(0)
        self.tokenizer.advance()

        # compile the classVarDecs
        while(self.tokenizer.tokenType() == "KEYWORD" and
              (self.tokenizer.keyWord() == "static" or
               self.tokenizer.keyWord() == "field")):
            self.__compileClassVarDec()

        # compile the subroutines
        while(self.tokenizer.tokenType() == "KEYWORD" and
              (self.tokenizer.keyWord() == "constructor" or
               self.tokenizer.keyWord() == "function" or
               self.tokenizer.keyWord() == "method")):
            self.__compileSubroutineDec()

        # find last curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: no closing brace found after class definition")
            sys.exit(1)
        self.tokenizer.advance()

    def __compileClassVarDec(self):
        """Compiles a complete jack class variable declaration. This advances the
        tokenizer completely through the variable declaration"""
        # we already checked to make sure that the keyword is valid
        self.currentKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # look for a valid type
        if not self.__checkType():
            print("Error: invalid type in classVarDec")
            sys.exit(1)
        self.tokenizer.advance()

        # check for varName
        if self.__checkIdentifier():
            self.symbolTable.define(
                self.currentName, self.currentType, self.currentKind)
            self.tokenizer.advance()
        else:
            print("Error: missing varName identifier in classVarDec")
            sys.exit(1)

        # check for comma then more varNames (possible not existing)
        while self.tokenizer.tokenType() == "SYMBOL" and \
                self.tokenizer.symbol() == ",":
            self.tokenizer.advance()

            # check for varName again
            if self.__checkIdentifier():
                self.symbolTable.define(
                    self.currentName, self.currentType, self.currentKind)
                self.tokenizer.advance()
            else:
                print("Error: missing varName identifier in classVarDec")
                sys.exit(1)

        # check for closing semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing semicolon after classVarDec")
            sys.exit(1)
        self.tokenizer.advance()

    def __compileSubroutineDec(self):
        """Compiles a complete jack subroutine description. This advances the
        tokenizer completely through the subroutine declaration"""
        # clear the subroutine symbol table
        self.symbolTable.startSubroutine()

        # since we already checked for the subroutine kind, grab it
        subroutineKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # look for return type
        if not self.__checkType():
            print("Error: illegal return type for subroutine")
            sys.exit(1)
        self.tokenizer.advance()

        # check for subroutineName and save it with the specified format
        if self.__checkIdentifier():
            currentSubroutineName = self.className + "." + self.currentName
            self.tokenizer.advance()
        else:
            print("Error: missing subroutineName in subroutineDec")
            sys.exit(1)

        # if the subroutine is a method, the first arg needs to be this
        if subroutineKind == "method":
            self.symbolTable.define("this", self.className, "arg")

        # check for open parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
            print("Error: missing ( for parameter list")
            sys.exit(1)
        self.tokenizer.advance()

        # do parameter list (this could add nothing)
        self.__compileParameterList()

        # check for closing parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
            print("Error: missing ) for parameter list")
            sys.exit(1)
        self.tokenizer.advance()

        # compile subroutine body
        self.__compileSubroutineBody(subroutineKind, currentSubroutineName)

    def __compileParameterList(self):
        """Compiles a complete jack parameter list grammar"""
        # we know all parameter lists are arguments, so set the current kind
        self.currentKind = "arg"

        # if the next symbol is a ), then there is no parameter list, so just return
        # the rest of compileSubroutine will handle writing that
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ")":
            return
        # look for a valid type
        else:
            if not self.__checkType():
                print("Error: invalid type in parameter list")
                sys.exit(1)
            self.tokenizer.advance()

            # check for varName
            if self.__checkIdentifier():
                self.symbolTable.define(
                    self.currentName, self.currentType, self.currentKind)
                self.tokenizer.advance()
            else:
                print("Error: missing varName identifier in parameterList")
                sys.exit(1)

            # check for comma separated list of type and varName
            while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",":
                # write the comma
                self.tokenizer.advance()

                # look for a valid type
                if not self.__checkType():
                    print("Error: invalid type in parameter list")
                    sys.exit(1)
                self.tokenizer.advance()

                # check for varName
                if self.__checkIdentifier():
                    self.symbolTable.define(
                        self.currentName, self.currentType, self.currentKind)
                    self.tokenizer.advance()
                else:
                    print("Error: missing varName identifier in parameterList")
                    sys.exit(1)

    def __compileSubroutineBody(self, currentSubKind, currentSubName):
        """Compile a complete jack subroutine body grammar"""
        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: missing { for subroutine body")
            sys.exit(1)
        self.tokenizer.advance()

        # check to see if we need to compile varDec
        while self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() == "var":
            self.__compileVarDec()

        # write the function
        self.vmWriter.writeFunction(
            currentSubName, self.symbolTable.varCount("var"))

        # write stuff for constructor
        if currentSubKind == "constructor":
            # get number of class fields to allocate space for them
            numFields = self.symbolTable.varCount("field")
            if numFields > 0:
                self.vmWriter.writePush("constant", numFields)
            self.vmWriter.writeCall("Memory.alloc", 1)
            self.vmWriter.writePop("pointer", 0)
        # write stuff for method
        elif currentSubKind == "method":
            # get the this pointer
            self.vmWriter.writePush("argument", 0)
            self.vmWriter.writePop("pointer", 0)

        # compile statements
        self.__compileStatements()

        # check for closing }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: missing closing } for subroutine body")
            sys.exit(1)
        self.tokenizer.advance()

        return

    def __compileVarDec(self):
        """Compiles a complete jack varDec grammar"""
        # all var decs are of type var, so set it
        self.currentKind = "var"

        self.tokenizer.advance()

        # check for type
        if not self.__checkType():
            print("Error: invalid type in var dec")
            sys.exit(1)
        self.tokenizer.advance()

        # check for varName
        if self.__checkIdentifier():
            self.symbolTable.define(
                self.currentName, self.currentType, self.currentKind)
            self.tokenizer.advance()
        else:
            print("Error: missing varName identifier in varDec")
            sys.exit(1)

        # check for comma separated list of type and varName
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",":
            # write the comma
            self.tokenizer.advance()

            # check for varName
            if self.__checkIdentifier():
                self.symbolTable.define(
                    self.currentName, self.currentType, self.currentKind)
                self.tokenizer.advance()
            else:
                print("Error: missing varName identifier in varDec")
                sys.exit(1)

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing ; after varDec")
            sys.exit(1)
        self.tokenizer.advance()

        return

    def __compileStatements(self):
        """Compiles a complete jack statements grammar"""
        # check for the keywords for all the statements
        while self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k == "let":
                self.__compileLetStatement()
            elif k == "if":
                self.__compileIfStatement()
            elif k == "while":
                self.__compileWhileStatement()
            elif k == "do":
                self.__compileDoStatement()
            elif k == "return":
                self.__compileReturnStatement()
            else:
                print("Error: invalid statment " + k)
                sys.exit(1)

    def __compileLetStatement(self):
        """Compiles a complete jack let statment grammar"""
        self.tokenizer.advance()

        # look for varName
        if not self.__checkIdentifier():
            print("Error: missing varName for let statement")
        self.tokenizer.advance()

        # get values from symbol table
        varName = self.currentName
        kind = self.symbolTable.kindOf(varName)
        varType = self.symbolTable.typeOf(varName)
        index = self.symbolTable.indexOf(varName)
        isArray = False

        # check for [
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == "[":
            isArray = True
            self.tokenizer.advance()

            # compile expression
            self.__compileExpression()

            # get the index from the top of the stack from compileExpression
            self.vmWriter.writePush(kind, index)
            self.vmWriter.writeArithmetic("add")
            self.vmWriter.writePop("temp", 2)

            # write the closing bracket
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "]":
                print("Error: missing closing ] in let statement")
                sys.exit(1)
            self.tokenizer.advance()

        # check for =
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "=":
            print("Error: missing = in let expression")
            sys.exit(1)
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # write code to pop since expression puts result on top of stack
        if isArray:
            self.vmWriter.writePush("temp", 2)
            self.vmWriter.writePop("pointer", 1)
            self.vmWriter.writePop("that", 0)
        else:
            self.vmWriter.writePop(kind, index)

        # look for ;
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing ; after let statement")
            sys.exit(1)
        self.tokenizer.advance()

    def __compileIfStatement(self):
        """Compiles a complete jack if statement grammar"""
        # setup local counter
        localIfCounter = self.ifCounter
        self.ifCounter += 1

        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # get the ~ if part from the stack
        self.vmWriter.writeArithmetic("not")

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write the if for L1
        self.vmWriter.writeIf("if-false", localIfCounter)

        # compile more statements
        self.__compileStatements()

        # write the goto for L2
        self.vmWriter.writeGoto("if-true", localIfCounter)

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write label for L1
        self.vmWriter.writeLabel("if-false", localIfCounter)

        # check for else
        if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord() == "else":
            self.tokenizer.advance()

            # check for {
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
                print("Error: missing { for if statement")
                sys.exit(1)
            self.tokenizer.advance()

            # compile more statements
            self.__compileStatements()

            # check for }
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
                print("Error: missing } after if statement")
                sys.exit(1)
            self.tokenizer.advance()

        # write label for L2
        self.vmWriter.writeLabel("if-true", localIfCounter)

    def __compileWhileStatement(self):
        """Compiles a complete jack while statement grammar"""
        # get counter and write label for L1
        localWhileCounter = self.whileCounter
        self.whileCounter += 1
        self.vmWriter.writeLabel("whileStart", localWhileCounter)

        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # get ~cond from stack
        self.vmWriter.writeArithmetic("not")

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write the if for L2
        self.vmWriter.writeIf("whileEnd", localWhileCounter)

        # compile more statements
        self.__compileStatements()

        # write the goto for L1
        self.vmWriter.writeGoto("whileStart", localWhileCounter)

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.tokenizer.advance()

        # write the label for L2
        self.vmWriter.writeLabel("whileEnd", localWhileCounter)

    def __compileDoStatement(self):
        """Compiles a complete jack do statement grammar"""
        self.tokenizer.advance()

        # compile subroutine call
        if self.__checkIdentifier():
            firstHalf = self.currentName
            self.tokenizer.advance()
            if self.tokenizer.tokenType() == "SYMBOL" and (self.tokenizer.symbol() == "."
                                                           or self.tokenizer.symbol() == "("):
                self.__compileSubroutineCall(firstHalf)

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            print("Error: missing ; after do statement")
            sys.exit(1)
        self.tokenizer.advance()

        # pop the 0 from the return
        self.vmWriter.writePop("temp", 0)

    def __compileReturnStatement(self):
        """Compiles a complete jack return statement grammar"""
        self.tokenizer.advance()

        # if the next symbol isn't a symbol, it must be an expression
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
            self.__compileExpression()

            # write ;, checking again to make sure after calling compile expression
            # that the next symbol is still a valid ;
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ";":
                print("Error: missing ; after return statement")
                sys.exit(1)
        else:
            # write the return of 0
            self.vmWriter.writePush("constant", 0)

        self.tokenizer.advance()
        # write the return
        self.vmWriter.writeReturn()

    def __convertOp(self, op):
        """Converts the operators that interfere with xml tags to their properly
        escaped versions"""
        op = op.replace("&", "&amp;")
        op = op.replace("<", "&lt;")
        op = op.replace(">", "&gt;")
        op = op.replace("\"", "&quot;")

        return op

    def __compileExpression(self):
        """Compiles a complete jack expression grammar"""
        # compile term
        self.__compileTerm()

        # check for op
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() in op:
            s = self.tokenizer.symbol()

            self.tokenizer.advance()

            # compile another term
            self.__compileTerm()

            # write op vm code
            if s == "+":
                self.vmWriter.writeArithmetic("add")
            elif s == "-":
                self.vmWriter.writeArithmetic("sub")
            elif s == "*":
                self.vmWriter.writeCall("Math.multiply", 2)
            elif s == "/":
                self.vmWriter.writeCall("Math.divide", 2)
            elif s == "&":
                self.vmWriter.writeArithmetic("and")
            elif s == "|":
                self.vmWriter.writeArithmetic("or")
            elif s == "<":
                self.vmWriter.writeArithmetic("lt")
            elif s == ">":
                self.vmWriter.writeArithmetic("gt")
            elif s == "=":
                self.vmWriter.writeArithmetic("eq")

    def __compileTerm(self):
        """Compiles a complete jack term grammar"""
        # term logic
        # check for integerConstant
        if self.tokenizer.tokenType() == "INT_CONST":
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()
        # check for string constant
        elif self.tokenizer.tokenType() == "STRING_CONST":
            # need to make a string constant
            string = self.tokenizer.stringVal()

            # push the length of the string
            self.vmWriter.writePush("constant", len(string))

            # call String.new 1
            self.vmWriter.writeCall("String.new", 1)

            # append to create the string
            for letter in string:
                self.vmWriter.writePush("constant", ord(letter))
                self.vmWriter.writeCall("String.appendChar", 2)

            self.tokenizer.advance()
        # check for keyword for KeywordConstant
        elif self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k not in KeyWordConstant:
                print("Error: invalid KeyWordConstant" + k + " in term")
                sys.exit(1)

            # write the outputs for the keyword constants
            if k == "null" or k == "false":
                self.vmWriter.writePush("constant", 0)
            elif k == "true":
                self.vmWriter.writePush("constant", 1)
                self.vmWriter.writeArithmetic("neg")
            elif k == "this":
                self.vmWriter.writePush("pointer", 0)

            self.tokenizer.advance()
        # check for symbol for either ( expression ) or unary op
        elif self.tokenizer.tokenType() == "SYMBOL":
            s = self.tokenizer.symbol()

            # ( expression )
            if s == "(":
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

                # check for )
                if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
                    print("Error: missing ) after expression in term")
                    sys.exit(1)
                self.tokenizer.advance()
            # unaryOp term
            elif s in unaryOp:
                self.tokenizer.advance()

                # compile term
                self.__compileTerm()

                # write the unary output
                if s == "-":
                    self.vmWriter.writeArithmetic("neg")
                else:
                    self.vmWriter.writeArithmetic("not")
            else:
                print("Error: invalid symbol " + s + " in term")
                sys.exit(1)
        # check for varName | varName [ expression ] | subroutineCall
        elif self.__checkIdentifier():
            # advance the tokenizer one more step to check for [, (, or other
            self.tokenizer.advance()
            firstHalf = self.currentName

            if self.tokenizer.tokenType() == "SYMBOL":
                s = self.tokenizer.symbol()

                # varName[expression]
                if s == "[":
                    # push the array address
                    self.vmWriter.writePush(self.symbolTable.kindOf(firstHalf),
                                            self.symbolTable.indexOf(firstHalf))

                    # write [
                    self.tokenizer.advance()

                    # compile expression
                    self.__compileExpression()

                    # write vm code for array expression
                    self.vmWriter.writeArithmetic("add")
                    self.vmWriter.writePop("pointer", 1)
                    self.vmWriter.writePush("that", 0)

                    # write ]
                    if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "]":
                        print("Error: missing ] after varName[expression]")
                        sys.exit(1)
                    self.tokenizer.advance()
                # subroutineCall
                elif s == "(" or s == ".":
                    # compile subroutineCall
                    self.__compileSubroutineCall(firstHalf)
                else:
                    self.vmWriter.writePush(self.symbolTable.kindOf(self.currentName),
                                            self.symbolTable.indexOf(self.currentName))
            else:
                self.vmWriter.writePush(self.symbolTable.kindOf(self.currentName),
                                        self.symbolTable.indexOf(self.currentName))
        else:
            print("Error: invalid term")
            sys.exit(1)

    def __compileSubroutineCall(self, firstHalf):
        """Compiles a complete jack subroutine call grammar"""
        # look ahead one token to see if it is a ( or a .
        isClass = firstHalf[0].isupper()
        fullSubroutineName = ""
        nArgs = 0

        # subroutineName
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == "(":
            fullSubroutineName = self.className + "." + firstHalf
            # since this a self method, we need to push pointer
            self.vmWriter.writePush("pointer", 0)
            self.tokenizer.advance()

            # compile expression list
            nArgs = self.__compileExpressionList(isClass)

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
                print("Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.tokenizer.advance()
        # className | varName
        elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ".":
            self.tokenizer.advance()
            if self.__checkIdentifier():
                if isClass:
                    fullSubroutineName = firstHalf + "." + self.currentName
                else:
                    fullSubroutineName = self.symbolTable.typeOf(
                        firstHalf) + "." + self.currentName
                    # push the address of firstHalf
                    self.vmWriter.writePush(self.symbolTable.kindOf(
                        firstHalf), self.symbolTable.indexOf(firstHalf))
            else:
                print("Error: missing varName|className in subroutineCall")

            # check for (
            self.tokenizer.advance()
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != "(":
                print("Error: missing ( in subroutineCall before expressionList")
                sys.exit(1)
            self.tokenizer.advance()

            # compile expression list
            nArgs = self.__compileExpressionList(isClass)

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol() != ")":
                print("Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.tokenizer.advance()
        else:
            print("Error: invalid subroutineCall")
            sys.exit(1)

        if fullSubroutineName != "":
            self.vmWriter.writeCall(fullSubroutineName, nArgs)

    def __compileExpressionList(self, isClass):
        """Compiles a complete jack expression list grammar"""
        # if the symbol is ), there is no expression list
        if isClass:
            argCounter = 0
        else:
            argCounter = 1

        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ")":
            return argCounter
        else:
            # compile expression
            self.__compileExpression()

            argCounter += 1

            # loop until you dont see a comma
            while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol() == ",":
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

                argCounter += 1

            return argCounter
コード例 #20
0
class CompilationEngine:

    __segDict = {'static': 'STATIC', 'field': 'FIELD', 'var': 'VAR'}

    def __init__(self, outfile):
        self.fout = open(os.path.splitext(outfile)[0] + '.xml', 'w')
        self.st = SymbolTable()
        self.vmw = VMWriter(outfile)

    def writeToken(self, tk, ind, tkSpec=None, tkType=None):
        typestr = tk.typestr[tk.tokenType()]
        if (tkSpec == None or
            (tk.getToken() in tkSpec)) and (tkType == None or
                                            (typestr in tkType)):
            self.fout.write(ind * '\t')
            self.fout.write('<' + typestr + '> ')
            self.fout.write(tk.getToken())
            self.fout.write(' </' + typestr + '>\n')
        else:
            self.raiseSyntaxError()

    def raiseSyntaxError(self, *trash, err=UserWarning):
        raise err

    # PROGRAM STRUCTURE

    def compileClass(self, tk, ind):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self.fout.write(ind * '\t' + '<class>\n')

        tk.advance()
        self.writeToken(tk, ind + 1, tkSpec='class')
        tk.advance()
        self.writeToken(tk, ind + 1, tkType='identifier')  # className
        self.className = tk.getToken()
        tk.advance()
        self.writeToken(tk, ind + 1, tkSpec='{')
        tk.advance()

        while tk.getToken() in ('static', 'field'):
            self.compileClassVarDec(tk, ind + 1)
        while tk.getToken() != '}':
            self.compileSubroutineDec(tk, ind + 1)
        self.writeToken(tk, ind=1, tkSpec='}')

        self.fout.write(ind * '\t' + '</class>\n')

    def compileClassVarDec(self, tk, ind):
        # ('static' | 'field') type varName (',' varName)* ';'
        self.fout.write(ind * '\t' + '<classVarDec>\n')

        self.writeToken(tk, ind + 1, tkSpec=('static', 'field'))
        kind = tk.getToken()
        tk.advance()
        self.writeToken(tk, ind + 1, tkType=('keyword', 'identifier'))  # type
        vtype = tk.getToken()
        tk.advance()
        self.writeToken(tk, ind + 1, tkType='identifier')  # varName
        name = tk.getToken()
        tk.advance()

        self.st.define(name, vtype, self.__segDict[kind])

        while tk.getToken() == ',':
            self.writeToken(tk, ind + 1, tkSpec=',')
            tk.advance()
            self.writeToken(tk, ind + 1, tkType='identifier')  # varName
            name = tk.getToken()
            tk.advance()

            self.st.define(name, vtype, self.__segDict[kind])

        self.writeToken(tk, ind + 1, tkSpec=';')
        tk.advance()

        self.fout.write(ind * '\t' + '</classVarDec>\n')

    def compileSubroutineDec(self, tk, ind):
        # ('constructor' | 'function' | 'method') ('void' | type) subroutineName
        # '(' parameterList ')' subroutineBody
        self.fout.write(ind * '\t' + '<subroutineDec>\n')

        self.st.startSubroutine()
        if tk.getToken() == 'method':
            self.st.define('this', self.className, 'ARG')
        self.ifCount = 0
        self.whileCount = 0

        self.writeToken(tk,
                        ind + 1,
                        tkSpec=('constructor', 'function', 'method'))
        tk.advance()
        self.writeToken(tk, ind + 1,
                        tkType=('keyword', 'identifier'))  # ('void' | type)
        tk.advance()
        self.writeToken(tk, ind + 1, tkType='identifier')  # subroutineName
        self.subroutineName = tk.getToken()
        tk.advance()

        self.writeToken(tk, ind + 1, tkSpec='(')
        tk.advance()
        self.compileParameterList(tk, ind + 1)
        self.writeToken(tk, ind + 1, tkSpec=')')
        tk.advance()
        self.compileSubroutineBody(tk, ind + 1)

        self.fout.write(ind * '\t' + '</subroutineDec>\n')

    def compileParameterList(self, tk, ind):
        # ((type varName) (',' type varName)*)?
        self.fout.write(ind * '\t' + '<parameterList>\n')

        if tk.getToken() != ')':
            self.writeToken(tk, ind + 1, tkType=('keyword', 'identifier'))
            vtype = tk.getToken()
            tk.advance()
            self.writeToken(tk, ind + 1, tkType='identifier')
            name = tk.getToken()
            tk.advance()

            self.st.define(name, vtype, 'ARG')

            while tk.getToken() == ',':
                self.writeToken(tk, ind + 1, tkSpec=',')
                tk.advance()
                self.writeToken(tk, ind + 1, tkType=('keyword', 'identifier'))
                vtype = tk.getToken()
                tk.advance()
                self.writeToken(tk, ind + 1, tkType='identifier')
                name = tk.getToken()
                tk.advance()

                self.st.define(name, vtype, 'ARG')

        self.fout.write(ind * '\t' + '</parameterList>\n')

    def compileSubroutineBody(self, tk, ind):
        # '{' varDec* statements '}'
        self.fout.write(ind * '\t' + '<subroutineBody>\n')

        self.writeToken(tk, ind + 1, tkSpec='{')
        tk.advance()
        while tk.getToken() == 'var':
            self.compileVarDec(tk, ind + 1)

        self.st.printSymbolTable()
        self.vmw.writeFunction(self.className + '.' + self.subroutineName,
                               self.st.varCount('VAR'))

        self.compileStatements(tk, ind + 1)
        self.writeToken(tk, ind + 1, tkSpec='}')
        tk.advance()

        self.fout.write(ind * '\t' + '</subroutineBody>\n')

    def compileVarDec(self, tk, ind):
        # 'var' type varName (',' varName)* ';'
        self.fout.write(ind * '\t' + '<varDec>\n')

        self.writeToken(tk, ind + 1, tkSpec='var')
        kind = tk.getToken()
        tk.advance()
        self.writeToken(tk, ind + 1, tkType=('keyword', 'identifier'))  # type
        vtype = tk.getToken()
        tk.advance()
        self.writeToken(tk, ind + 1, tkType='identifier')  # varName
        name = tk.getToken()
        tk.advance()

        self.st.define(name, vtype, self.__segDict[kind])

        while tk.getToken() == ',':
            self.writeToken(tk, ind + 1, tkSpec=',')
            tk.advance()
            self.writeToken(tk, ind + 1, tkType='identifier')  # varName
            name = tk.getToken()
            tk.advance()

            self.st.define(name, vtype, self.__segDict[kind])

        self.writeToken(tk, ind + 1, tkSpec=';')
        tk.advance()

        self.fout.write(ind * '\t' + '</varDec>\n')

    # STATEMENTS

    def compileStatements(self, tk, ind):
        # statement*
        self.fout.write(ind * '\t' + '<statements>\n')
        while tk.getToken() != '}':
            switch = {'let': self.compileLet, 'if': self.compileIf, \
                'while': self.compileWhile, 'do': self.compileDo, 'return': self.compileReturn}
            switch.get(tk.getToken(), self.raiseSyntaxError)(tk, ind + 1)
        self.fout.write(ind * '\t' + '</statements>\n')

    def compileLet(self, tk, ind):
        # 'let' varName '=' expression ';'
        self.fout.write(ind * '\t' + '<letStatement>\n')

        self.writeToken(tk, ind + 1, tkSpec='let')
        tk.advance()
        self.writeToken(tk, ind + 1, tkType='identifier')  # varName
        name = tk.getToken()
        tk.advance()

        array = False
        if tk.getToken() == '[':
            array = True
            self.writeToken(tk, ind + 1, tkSpec='[')
            tk.advance()
            self.compileExpression(tk, ind + 1)
            self.writeToken(tk, ind + 1, tkSpec=']')
            tk.advance()

            self.vmw.writePush(self.st.kindOf(name), self.st.indexOf(name))
            self.vmw.writeArithmetic('add')

        self.writeToken(tk, ind + 1, tkSpec='=')
        tk.advance()
        self.compileExpression(tk, ind + 1)  # expression
        self.writeToken(tk, ind + 1, tkSpec=';')
        tk.advance()

        if array:
            self.vmw.writePop('TEMP', 0)
            self.vmw.writePop('POINTER', 1)
            self.vmw.writePush('TEMP', 0)
            self.vmw.writePop('THAT', 0)
        else:
            self.vmw.writePop(self.st.kindOf(name), self.st.indexOf(name))

        self.fout.write(ind * '\t' + '</letStatement>\n')

    def compileIf(self, tk, ind):
        # 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
        self.fout.write(ind * '\t' + '<ifStatement>\n')

        thisif = self.ifCount
        self.ifCount = self.ifCount + 1

        self.writeToken(tk, ind + 1, tkSpec='if')
        tk.advance()
        self.writeToken(tk, ind + 1, tkSpec='(')
        tk.advance()
        self.compileExpression(tk, ind + 1)  # expression
        self.writeToken(tk, ind + 1, tkSpec=')')
        tk.advance()

        self.vmw.writeIf('IF_' + str(thisif))
        self.vmw.writeGoto('ELSE_' + str(thisif))
        self.vmw.writeLabel('IF_' + str(thisif))

        self.writeToken(tk, ind + 1, tkSpec='{')
        tk.advance()
        self.compileStatements(tk, ind + 1)  # statements
        self.writeToken(tk, ind + 1, tkSpec='}')
        tk.advance()

        self.vmw.writeGoto('IFEND_' + str(thisif))
        self.vmw.writeLabel('ELSE_' + str(thisif))
        if tk.getToken() == 'else':
            self.writeToken(tk, ind + 1, tkSpec='else')
            tk.advance()
            self.writeToken(tk, ind + 1, tkSpec='{')
            tk.advance()
            self.compileStatements(tk, ind + 1)  # statements
            self.writeToken(tk, ind + 1, tkSpec='}')
            tk.advance()
        self.vmw.writeLabel('IFEND_' + str(thisif))

        self.fout.write(ind * '\t' + '</ifStatement>\n')

    def compileWhile(self, tk, ind):
        # 'while' '(' expression ')' '{' statements '}'
        self.fout.write(ind * '\t' + '<whileStatement>\n')

        thiswhile = self.whileCount
        self.whileCount = self.whileCount + 1

        self.vmw.writeLabel('WHILE_' + str(thiswhile))
        self.writeToken(tk, ind + 1, tkSpec='while')
        tk.advance()
        self.writeToken(tk, ind + 1, tkSpec='(')
        tk.advance()
        self.compileExpression(tk, ind + 1)  # expression
        self.writeToken(tk, ind + 1, tkSpec=')')
        tk.advance()

        self.vmw.writeIf('WHILEBODY_' + str(thiswhile))
        self.vmw.writeGoto('WHILEEND_' + str(thiswhile))

        self.vmw.writeLabel('WHILEBODY_' + str(thiswhile))
        self.writeToken(tk, ind + 1, tkSpec='{')
        tk.advance()
        self.compileStatements(tk, ind + 1)  # statements
        self.writeToken(tk, ind + 1, tkSpec='}')
        tk.advance()
        self.vmw.writeGoto('WHILE_' + str(thiswhile))

        self.vmw.writeLabel('WHILEEND_' + str(thiswhile))

        self.fout.write(ind * '\t' + '</whileStatement>\n')

    def compileDo(self, tk, ind):
        self.fout.write(ind * '\t' + '<doStatement>\n')

        self.writeToken(tk, ind + 1, tkSpec='do')
        tk.advance()
        self.writeToken(tk, ind + 1, tkType='identifier')
        idtf = tk.getToken()
        tk.advance()
        if tk.getToken() == '.':
            self.writeToken(tk, ind + 1, tkSpec='.')
            tk.advance()
            self.writeToken(tk, ind + 1, tkType='identifier')  # subroutineName
            idtf = idtf + '.' + tk.getToken()
            tk.advance()

        flag = self.vmw.writePush(self.st.kindOf(idtf.split('.')[0]),
                                  self.st.indexOf(idtf.split('.')[0]))
        if flag:
            self.argCount = self.argCount + 1

        self.writeToken(tk, ind + 1, tkSpec='(')
        tk.advance()
        self.compileExpressionList(tk, ind + 1)  # expressionList
        self.writeToken(tk, ind + 1, tkSpec=')')
        tk.advance()
        self.writeToken(tk, ind + 1, tkSpec=';')
        tk.advance()

        self.vmw.writeCall(idtf, self.argCount)
        self.vmw.writePop('TEMP', 0)

        self.fout.write(ind * '\t' + '</doStatement>\n')

    def compileReturn(self, tk, ind):
        self.fout.write(ind * '\t' + '<returnStatement>\n')

        self.writeToken(tk, ind + 1)
        tk.advance()
        if tk.getToken() == ';':
            self.writeToken(tk, ind + 1, tkSpec=';')
            self.vmw.writePush('CONST', 0)
        else:
            self.compileExpression(tk, ind + 1)
            self.writeToken(tk, ind + 1, tkSpec=';')
        tk.advance()
        self.vmw.writeReturn()

        self.fout.write(ind * '\t' + '</returnStatement>\n')

    # EXPRESSIONS

    def compileExpression(self, tk, ind):
        self.fout.write(ind * '\t' + '<expression>\n')

        self.compileTerm(tk, ind + 1)
        while tk.getToken() in ('+', '-', '*', '/', '&', '|', '<', '>', '='):
            self.writeToken(tk, ind + 1, tkType='symbol')
            op = tk.getToken()
            tk.advance()
            self.compileTerm(tk, ind + 1)

            if op == '+':
                self.vmw.writeArithmetic('add')
            elif op == '-':
                self.vmw.writeArithmetic('sub')
            elif op == '*':
                self.vmw.writeCall('Math.multiply', 2)
            elif op == '/':
                self.vmw.writeCall('Math.divide', 2)
            elif op == '&':
                self.vmw.writeArithmetic('and')
            elif op == '|':
                self.vmw.writeArithmetic('or')
            elif op == '<':
                self.vmw.writeArithmetic('lt')
            elif op == '>':
                self.vmw.writeArithmetic('gt')
            elif op == '=':
                self.vmw.writeArithmetic('eq')

        self.fout.write(ind * '\t' + '</expression>\n')

    def compileTerm(self, tk, ind):
        self.fout.write(ind * '\t' + '<term>\n')

        if tk.tokenType() == 'IDENTIFIER':
            self.writeToken(tk, ind + 1)
            idtf = tk.getToken()
            tk.advance()

            if tk.getToken() == '.':
                self.writeToken(tk, ind + 1, tkSpec='.')
                tk.advance()
                self.writeToken(tk, ind + 1,
                                tkType='identifier')  # subroutineName
                idtf = idtf + '.' + tk.getToken()
                tk.advance()

                flag = self.vmw.writePush(self.st.kindOf(idtf.split('.')[0]),
                                          self.st.indexOf(idtf.split('.')[0]))
                if flag:
                    self.argCount = self.argCount + 1

                self.writeToken(tk, ind + 1, tkSpec='(')
                tk.advance()
                self.compileExpressionList(tk, ind + 1)  # expressionList
                self.writeToken(tk, ind + 1, tkSpec=')')
                tk.advance()

                self.vmw.writeCall(idtf, self.argCount)

            elif tk.getToken() == '(':
                self.writeToken(tk, ind + 1, tkSpec='(')
                tk.advance()
                self.compileExpressionList(tk, ind + 1)  # expressionList
                self.writeToken(tk, ind + 1, tkSpec=')')
                tk.advance()
                self.vmw.writeCall(idtf, self.argCount)

            elif tk.getToken() == '[':
                self.writeToken(tk, ind + 1, tkSpec='[')
                tk.advance()
                self.compileExpression(tk, ind + 1)  # expression
                self.writeToken(tk, ind + 1, tkSpec=']')
                tk.advance()
                self.vmw.writePush(self.st.kindOf(idtf), self.st.indexOf(idtf))
                self.vmw.writeArithmetic('add')
                self.vmw.writePop('POINTER', 1)
                self.vmw.writePush('THAT', 0)

            else:
                self.vmw.writePush(self.st.kindOf(idtf), self.st.indexOf(idtf))

        elif tk.getToken() in ('-', '~'):
            self.writeToken(tk, ind + 1)
            op = tk.getToken()
            tk.advance()
            self.compileTerm(tk, ind + 1)
            if op == '-':
                self.vmw.writeArithmetic('neg')
            elif op == '~':
                self.vmw.writeArithmetic('not')

        elif tk.getToken() == '(':
            self.writeToken(tk, ind + 1, tkSpec='(')
            tk.advance()
            self.compileExpression(tk, ind + 1)
            self.writeToken(tk, ind + 1, tkSpec=')')
            tk.advance()

        else:
            self.writeToken(tk, ind + 1)
            val = tk.getToken()
            if val.isnumeric():
                self.vmw.writePush('CONST', int(tk.getToken()))
            elif val == 'true':
                self.vmw.writePush('CONST', 0)
                self.vmw.writeArithmetic('not')
            elif val == 'false':
                self.vmw.writePush('CONST', 0)
            elif val == 'this':
                self.vmw.writePush('POINTER', 0)
            elif val == 'null':
                self.vmw.writePush('CONST', 0)
            elif tk.tokenType() == 'STRING_CONST':
                self.vmw.writePush('CONST', len(val))
                self.vmw.writeCall('String.new', 1)
                strList = list(val)
                while len(strList) > 0:
                    self.vmw.writePush('CONST', ord(strList.pop(0)))
                    self.vmw.writeCall('String.appendChar', 2)
            tk.advance()

        self.fout.write(ind * '\t' + '</term>\n')

    def compileExpressionList(self, tk, ind):
        # (expression (',' expression)*)?
        self.fout.write(ind * '\t' + '<expressionList>\n')

        self.argCount = 0
        if tk.getToken() != ')':
            self.compileExpression(tk, ind + 1)
            self.argCount = self.argCount + 1
            while tk.getToken() == ',':
                self.writeToken(tk, ind + 1, tkSpec=',')
                tk.advance()
                self.compileExpression(tk, ind + 1)  # expression
                self.argCount = self.argCount + 1

        self.fout.write(ind * '\t' + '</expressionList>\n')
コード例 #21
0
class Parser:
  def __init__(self, parserInput, parserOutput,daSymbolTable):
    
    self.tTp = [] #tokens to parse array aka tTp
    self.tTpcopy = [] #copy to help remove stuff
    self.tokenCounter = 0
    self.indent = 0

    # open the input.xml file made by the tokenizer (full of tokens ready to parse)
    with open(parserInput) as f:
      for line in f:
        self.tTpcopy.append(line)
    self.tTp = self.tTpcopy[1:-1] # remove unwanted stuff from copy

    self.output1 = open(parserOutput+'.xml', 'w') 
    #output to write to
    self.theVMWriter = VMWriter(parserOutput)

    
    self.currentTokenArr = self.tTp[self.tokenCounter].split(' ')
    self.currentToken = self.tTp[self.tokenCounter] # first token
    self.className = ''
    self.subroutineVoid = False
    self.iflabel = 0
    self.whilelabel = 0
    self.constructor = False
    self.functionType = ''
    self.subroutineName = ''
    self.array = False

    # get symbol table and everything started
    self.theSymbolTable = daSymbolTable
    self.theSymbolTable.classStart()
    self.compileClass() # start compiling


    self.output1.close() # close the file when done
    
# compile the class
  def compileClass(self):
    self.output1.write('<class>'+'\n')
    self.increaseIndent()
    self.writeAdv() # 'class'
    # save the class name for symbol table
    self.className = str(self.currentToken.split()[1]) 
    self.writeAdv() #  className
    self.writeAdv() # '{'


    self.compileClassVarDec()
  


    self.compileSubroutine()
    self.outIndent()
    self.output1.write(self.currentToken)
    self.output1.write('</class>'+'\n')

    # recursion to compile multiple classes in one file
    #print(self.tokenCounter)
    #print(len(self.tTp))
    daSize = len(self.tTp)
    if self.tokenCounter+1 != daSize:
      #print('hello')
      lookAhead = str(self.tTp[self.tokenCounter+1]).split()[1]
      #print(lookAhead)
      if lookAhead == 'class':
        #self.tokenCounter+= 2
        self.writeAdv()
        #print(self.currentToken)
        self.compileClass() # start compiling again using recursion!!!!!


    #self.theSymbolTable.viewTableCST()
    return 

  def compileClassVarDec(self):
    

    if str(self.currentToken) == '<keyword> field </keyword>\n' or str(self.currentToken) == '<keyword> static </keyword>\n':
      self.outIndent()
      self.output1.write('<classVarDec>\n')
      self.increaseIndent()
      # while loop takes care of (varName)*
      classVarkind = self.currentToken.split()[1]
      classVartype = self.tTp[self.tokenCounter+1].split()[1]
      everytwo = 0
      start = 1
      while str(self.currentToken) != '<symbol> ; </symbol>\n':

        if everytwo % 2 == 0 and start > 2:
          daname = self.currentToken.split()[1]
          self.theSymbolTable.define(daname,classVartype,classVarkind)
          
        self.writeAdv() 
        start += 1
        everytwo += 1
        
        
      self.writeAdv() # ')'


      self.decreaseIndent()
      self.outIndent()
      self.output1.write('</classVarDec>\n')
    # recursion: check if more, then call itself again
    if str(self.currentToken) == '<keyword> field </keyword>\n' or str(self.currentToken) == '<keyword> static </keyword>\n':
      self.compileClassVarDec()
    return

  def compileSubroutine(self):
    self.outIndent()
    self.output1.write('<subroutineDec>\n')
    self.increaseIndent()
    
    
    self.functionType = str(self.currentToken.split()[1]) # save if it is constructor,function, or method
    if str(self.currentToken.split()[1]) == 'constructor':
      self.constructor = True
    else:
      self.constructor = False

    self.writeAdv() # constructor|function|method
    if str(self.currentToken.split()[1]) == 'void':
      self.subroutineVoid = True
    else:
      self.subroutineVoid = False

    self.writeAdv() # void|type
    self.subroutineName = str(self.currentToken.split()[1]) 
    self.writeAdv() # subroutineName
    self.writeAdv() # '('
    
    self.theSymbolTable.startSubroutine()
  

    self.compileParameterList()
    
    self.writeAdv() # ')' for end of parameterlist

    self.outIndent()
    self.output1.write('<subroutineBody>\n')
    self.increaseIndent()
    self.writeAdv() # print the {

    # compile all of the varDecs first
    if str(self.currentToken) == '<keyword> var </keyword>\n':
      self.compileVarDec()
    
    # VM code write function down
    the999 = self.theSymbolTable.varCount('var')
    #print(subroutineName)
    self.theVMWriter.writeFunction(self.className,self.subroutineName,the999)
    
    # code to set the 'this' to point to passed object
    if self.constructor == True:
      self.theVMWriter.writePush('constant',self.theSymbolTable.varCount('field'))
      self.theVMWriter.writeCall('Memory.alloc',1)
      self.theVMWriter.writePop('pointer',0)
    elif self.functionType == 'method':
      self.theVMWriter.writePush('argument',0)
      self.theVMWriter.writePop('pointer',0)
      daname = 'this'
      datype = self.className
      dakind = 'argument'
      self.theSymbolTable.defineMethod(daname,datype,dakind)
    # enter statements and statements calls itself recursively
    self.compileStatements()  
     
    self.writeAdv() # } ending the subroutineBody
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</subroutineBody>\n')
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</subroutineDec>\n')
    #self.theSymbolTable.viewTableMST()
    
    # keep track of methods in symbol table
    
    self.theSymbolTable.defineSubroutineTracker(self.subroutineName,'method' ,self.className,self.subroutineVoid)
    # recusion part
    # check if more subroutines
    # if constructor|function|method then we compileSubroutine
    if str(self.currentToken) == '<keyword> constructor </keyword>\n':
      self.compileSubroutine()
    elif str(self.currentToken) == '<keyword> function </keyword>\n':
      self.compileSubroutine()
    elif str(self.currentToken) == '<keyword> method </keyword>\n':
      self.compileSubroutine()

    return

  def compileParameterList(self):
    self.outIndent()
    self.output1.write('<parameterList>\n')
    self.increaseIndent()
    everythree = 0
    while str(self.currentToken) != '<symbol> ) </symbol>\n':
      if everythree % 3 == 0:
        daname = self.tTp[self.tokenCounter+1].split()[1]
        datype = self.tTp[self.tokenCounter].split()[1]
        dakind = 'argument'
        self.theSymbolTable.defineMethod(daname,datype,dakind)

      self.writeAdv()
      everythree += 1
      
    
    
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</parameterList>\n')
    return

  def compileVarDec(self):
    
    self.outIndent()
    self.output1.write('<varDec>\n')
    self.increaseIndent()

    
    self.writeAdv() # 'var'
    everytwo = 0
    datype = self.tTp[self.tokenCounter].split()[1]
    self.writeAdv() #  type
    while str(self.currentToken) != '<symbol> ; </symbol>\n':
      if everytwo % 2 == 0:
        daname = self.tTp[self.tokenCounter].split()[1]
        dakind = 'var'
        self.theSymbolTable.defineMethod(daname,datype,dakind)

      self.writeAdv()
      everytwo += 1
      
    self.writeAdv()
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</varDec>\n')

    if str(self.currentToken) == '<keyword> var </keyword>\n':
      self.compileVarDec()
    return

  def compileStatements(self):
    self.outIndent()
    self.output1.write('<statements>\n')
    self.increaseIndent()
    
    
    # change to while to check if next guy is statement
    while self.checkStatement():
      if str(self.currentToken) == '<keyword> if </keyword>\n':
        self.compileIf()
      elif str(self.currentToken) == '<keyword> let </keyword>\n':
        self.compileLet()
      elif str(self.currentToken) == '<keyword> while </keyword>\n':
        self.compileWhile()
      elif str(self.currentToken) == '<keyword> do </keyword>\n':
        self.compileDo()
      elif str(self.currentToken) == '<keyword> return </keyword>\n':
        if self.constructor == True:
          2+2
          #self.theVMWriter.writePush('pointer',0)
        self.compileReturn()

    
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</statements>\n')
    return

  def compileDo(self):
    self.outIndent()
    self.output1.write('<doStatement>\n')
    self.increaseIndent()
    DoSubroutineName = ''
    self.writeAdv() # 'do'
    # subroutineCall which is a term, inside of an expression
    
    # LL(2) grammar part. 
    # look ahead one token to see if ( or . for two types of subroutine calls
    lookAhead = self.tTp[self.tokenCounter+1]
    if lookAhead == '<symbol> ( </symbol>\n':
      callF = str(self.currentToken.split()[1])
      DoSubroutineName = callF
      #print(callF)
      self.theVMWriter.writePush('pointer','0')
      self.writeAdv() # subroutineName wrapped in identifier tags
      self.writeAdv() # '('
      self.compileExpressionList()
      self.writeAdv() # ')'
      self.writeAdv() # ';'
      numOfarg = self.theSymbolTable.getID(callF)
      self.theVMWriter.writeCall(self.className+'.'+callF,numOfarg)
      if self.theSymbolTable.getVoid(callF):
        self.theVMWriter.writePop('temp','0')
    else:
      # subroutineName
      otherClassname = str(self.currentToken.split()[1])
      self.writeAdv() # className|varName
      self.writeAdv() # '.'
      callF = str(self.currentToken.split()[1]) # subroutineName
      DoSubroutineName = callF
      self.writeAdv() # subroutineName
      self.writeAdv() # '('
      self.compileExpressionList()
      self.writeAdv() # ')'
      self.writeAdv() # ';'
      numOfarg = self.theSymbolTable.getID(callF)
      #print(otherClassname)
      if self.theSymbolTable.getKind(otherClassname) == None:
        self.theVMWriter.writeCall(otherClassname+'.'+callF,numOfarg)
        if self.theSymbolTable.getVoid(callF):
          self.theVMWriter.writePop('temp','0')
      else:
        theSegment = self.theSymbolTable.getKind(otherClassname)
        theIndex = self.theSymbolTable.getID(otherClassname)
        self.theVMWriter.writePush(theSegment,theIndex)
        self.theVMWriter.writeCall(otherClassname+'.'+callF,numOfarg)
        if self.theSymbolTable.getVoid(callF):
          self.theVMWriter.writePop('temp','0')


    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</doStatement>\n')
    #if self.theSymbolTable.getVoid(DoSubroutineName):
      #print(DoSubroutineName)
      # if the method call was void
      # self.theSymbolTable.getVoid(callF)
      #print(DoSubroutineName)
      #self.theVMWriter.writePop('temp','0')

    return

  def compileLet(self):
    self.outIndent()
    self.output1.write('<letStatement>\n')
    self.increaseIndent()

    self.writeAdv() # 'let'
    letVarname = str(self.currentToken.split()[1]) 
    self.writeAdv() # varName
    #check if [] brackets are there or not 
    if str(self.currentToken) == '<symbol> [ </symbol>\n':
      self.array = True
      # push a from a[i]
      theSegment = self.theSymbolTable.getKind(letVarname)
      theIndex = self.theSymbolTable.getID(letVarname)
      self.theVMWriter.writePush(theSegment,theIndex)
      self.writeAdv() # [
      self.compileExpression()
      self.writeAdv() # ]
      # add
      self.theVMWriter.writeArithmetic('+')
      
    self.writeAdv() # '=' 
    self.compileExpression() # 19 expression in example
    self.writeAdv() # ';'
   
    if self.array:
      #self.theVMWriter.writePop('pointer','1')
      self.theVMWriter.writePop('temp',0)
      self.theVMWriter.writePop('pointer',1)
      self.theVMWriter.writePush('temp',0)
      self.theVMWriter.writePop('that',0)
      self.array = False
    else:
      # now VM code for pop stack result to varName spot
      
      theSegment = self.theSymbolTable.getKind(letVarname)
      theIndex = self.theSymbolTable.getID(letVarname)
      self.theVMWriter.writePop(theSegment,theIndex)

    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</letStatement>\n')
    return

  def compileWhile(self):
    self.outIndent()
    self.output1.write('<whileStatement>\n')
    self.increaseIndent()

    whileLabel1 = str('whileL'+str(self.whilelabel))
    self.whilelabel += 1
    whileLabel2 = str('whileL'+str(self.whilelabel))
    self.whilelabel += 1
    self.theVMWriter.writeLabel(whileLabel1) # label L1

    self.writeAdv() # 'while'
    self.writeAdv() # (
    self.compileExpression()
    
    # taken care of by expression-compileTerm
    self.theVMWriter.writeArithmetic('NOT') # push !(cond)
    self.theVMWriter.writeIf(whileLabel2)

    self.writeAdv() # )
    self.writeAdv() # {
    self.compileStatements() # VM code for {}
    self.writeAdv() # }
    self.theVMWriter.writeGoto(whileLabel1) # go to L1
    self.theVMWriter.writeLabel(whileLabel2) # label 2
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</whileStatement>\n')
    return

  def compileReturn(self):
    self.outIndent()
    self.output1.write('<returnStatement>\n')
    self.increaseIndent()

    self.writeAdv() # print return 
    # if expression compile that as well
    if self.checkMoreTerms():
      self.compileExpression()
    self.writeAdv() # ;

    # if void push 0 ,if not return top of stack
    if self.subroutineVoid:
      self.theVMWriter.writePush('constant',0)
    # VM code return
    self.theVMWriter.writeReturn()
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</returnStatement>\n')
    return

  def compileIf(self):
    self.outIndent()
    self.output1.write('<ifStatement>\n')
    self.increaseIndent()

    self.writeAdv() # 'if'
    self.writeAdv() # '('
    self.compileExpression()
    self.writeAdv() # ')'

    self.theVMWriter.writeArithmetic('NOT') # push !(cond)
    ifLabel1 = str('ifL'+str(self.iflabel))
    self.iflabel += 1
    self.theVMWriter.writeIf(ifLabel1)

    self.writeAdv() # '{'
    self.compileStatements()

    ifLabel2 = str('ifL'+str(self.iflabel))
    self.iflabel += 1
    self.theVMWriter.writeGoto(ifLabel2)

    self.theVMWriter.writeLabel(ifLabel1)
    self.writeAdv() # '}'
    # check if else statment is there 
    if str(self.currentToken) == '<keyword> else </keyword>\n':
      self.writeAdv() # 'else'
      self.writeAdv() # '{'
      self.compileStatements()
      self.writeAdv() # '}'
    
    self.theVMWriter.writeLabel(ifLabel2)
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</ifStatement>\n')
    return

  def compileExpression(self):
    # begging of expression so add indent to parse tree
    self.outIndent()
    self.output1.write('<expression>\n')
    self.increaseIndent()

    # VM codeWrite(exp) algorithm
    #self.codeWrite()

    # compile term calls itself recursively to take care of multiple terms ie. term (op term)*
    self.compileTerm()

    
    # expression is over, no more terms, decrease the indent
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</expression>\n')
    return

  def compileTerm(self):
    self.outIndent()
    self.output1.write('<term>\n')
    self.increaseIndent()

    

    # da first part of the token
    daToken = str(self.currentToken.split()[0])
    VMToken = str(self.currentToken.split()[1])
    
    # check which type of term to compile
    if daToken == '<identifier>':
      lookAhead = self.tTp[self.tokenCounter+1]
      if lookAhead == '<symbol> [ </symbol>\n': # then it is an array
        #print('array')
        theSegment = self.theSymbolTable.getKind(VMToken)
        theIndex = self.theSymbolTable.getID(VMToken)
        self.theVMWriter.writePush(theSegment,theIndex)
        #arrayNumber = self.tTp[self.tokenCounter+2].split()[1]
        #arrayName = VMToken
        self.writeAdv() # write the identifier varName
        self.writeAdv() # the [
        self.compileExpression()
        self.writeAdv() # the ]
        #theSegment = self.theSymbolTable.getKind(arrayName)
        #theIndex = self.theSymbolTable.getID(arrayName)
        #self.theVMWriter.writePush(theSegment,theIndex)
        # add i + base of array
        self.theVMWriter.writeArithmetic('+')
        self.theVMWriter.writePop('pointer','1')
        self.theVMWriter.writePush('that','0')
        #self.writeAdv() # '='
        #self.compileExpression()

        #self.theVMWriter.writePop('temp',0)
        #self.theVMWriter.writePop('pointer',1)
        #self.theVMWriter.writePush('temp',0)
        #self.theVMWriter.writePop('that',0)
        
      elif lookAhead == '<symbol> ( </symbol>\n': # then it is a subroutine call
      # VM use That point
        #call f
        callF = str(self.currentToken.split()[1])
        self.theVMWriter.writePush('argument','0')
        self.writeAdv() # subroutineName
        self.writeAdv() # '('
        self.compileExpressionList()
        self.writeAdv() # ')'
        numOfarg = self.theSymbolTable.getID(callF)
        self.theVMWriter.writeCall(self.className+'.'+callF,numOfarg)
        if self.theSymbolTable.getVoid(callF):
          self.theVMWriter.writePop('temp','0')
      elif lookAhead == '<symbol> . </symbol>\n': # call to another class
        # subroutineName
        # VM use THIS pointer
        otherClassname = str(self.currentToken.split()[1])
        almostNew = self.tokenCounter
        self.writeAdv() # className|varName
        self.writeAdv() # '.'
        callF = str(self.currentToken.split()[1]) # subroutineName
        self.writeAdv() # subroutineName
        self.writeAdv() # '('
        self.compileExpressionList()
        self.writeAdv() # ')'
        numOfarg = self.theSymbolTable.getID(callF)
        numOfargClass = self.theSymbolTable.getID(callF)
        #self.theVMWriter.writePush(otherClassname,numOfargClass)
        self.theVMWriter.writeCall(otherClassname+'.'+callF,numOfarg)
        #if self.theSymbolTable.getVoid(callF):
          #self.theVMWriter.writePop('temp','0')
        #if callF == 'new': # already handled by let statement
          #2+2
          # if new pop the x address let x = stuff
          #theNewAddress = str(self.tTp[almostNew-2].split()[1])
          #theSegment = self.theSymbolTable.getKind(theNewAddress)
          #theIndex = self.theSymbolTable.getID(theNewAddress)
          #self.theVMWriter.writePop(theSegment,theIndex)
      else: # varName
        theSegment = self.theSymbolTable.getKind(VMToken)
        theIndex = self.theSymbolTable.getID(VMToken)
        self.theVMWriter.writePush(theSegment,theIndex)
        self.writeAdv() # just a varName
    elif daToken == '<integerConstant>':
      self.theVMWriter.writePush('constant',VMToken)
      self.writeAdv()
    elif daToken == '<stringConstant>':
      #if self.tTp[self.tokenCounter-1].split()[1] == '=':
      arrayVMToken = self.currentToken.split()
      #print(str(''.join(arrayVMToken[1:-1])))
      eachletter = list(' '.join(arrayVMToken[1:-1]))
      #print(eachletter)
      self.theVMWriter.writePush('constant',len(eachletter))
      self.theVMWriter.writeCall('String.new',1)
      for i in range(len(eachletter)):
        self.theVMWriter.writePush('constant',ord(eachletter[i]))
        self.theVMWriter.writeCall('String.appendChar',2)
        
      self.writeAdv()
    elif str(self.currentToken) == '<keyword> true </keyword>\n':
      self.theVMWriter.writePush('constant',1)
      self.theVMWriter.writeArithmetic('NEG')
      self.writeAdv()
    elif str(self.currentToken) == '<keyword> false </keyword>\n':
      self.theVMWriter.writePush('constant',0)
      self.writeAdv()
    elif str(self.currentToken) == '<keyword> null </keyword>\n':
      self.theVMWriter.writePush('constant',0)
      self.writeAdv()
    elif str(self.currentToken) == '<keyword> this </keyword>\n':
      self.theVMWriter.writePush('pointer',0)
      self.writeAdv()
    elif str(self.currentToken) == '<symbol> ( </symbol>\n':
      self.writeAdv() # for (
      self.compileExpression()
      self.writeAdv() # for )
    elif str(self.currentToken) == '<symbol> - </symbol>\n':
      # negative number
      self.writeAdv()
      self.compileTerm()
      self.theVMWriter.writeArithmetic('NEG') # now write op
    elif str(self.currentToken) == '<symbol> ~ </symbol>\n':
      self.writeAdv()
      self.compileTerm()
      self.theVMWriter.writeArithmetic('~') # now write op
    
    
    # end of term decrease indent
    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</term>\n')

    # check for op and then use recursion
    if self.checkOp():
      opTerm = str(self.currentToken.split()[1]) # save op to write after
      self.writeAdv() # write the op term
      self.compileTerm()
      self.theVMWriter.writeArithmetic(opTerm) # now write op
    return

  def compileExpressionList(self):
    self.outIndent()
    self.output1.write('<expressionList>\n')
    self.increaseIndent()

    
    # check if 
    while self.checkMoreTerms():
      # actually compile an expression if not empty
      #print(self.currentToken)
      self.compileExpression()
      if str(self.currentToken) == '<symbol> , </symbol>\n':
        self.writeAdv() # ','

    self.decreaseIndent()
    self.outIndent()
    self.output1.write('</expressionList>\n')
    return

  def outIndent(self):
    for i in range(self.indent):
      # change be careful below was old
      #self.output1.write('\t')
      self.output1.write('  ')
    return

  def advance(self):
    self.tokenCounter += 1
    self.currentToken = self.tTp[self.tokenCounter]

  def writeAdv(self):
    self.outIndent()
    self.output1.write(self.currentToken)
    self.advance()
    return

  def increaseIndent(self):
    self.indent += 1
    return

  def decreaseIndent(self):
    self.indent -= 1
    return

  def checkStatement(self):
    if str(self.currentToken) == '<keyword> if </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> let </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> while </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> do </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> return </keyword>\n':
      return True
    else:
      return False

  def checkOp(self):
    if str(self.currentToken) == '<symbol> + </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> - </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> * </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> / </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> &amp; </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> | </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> &lt; </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> &gt; </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> = </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> ^ </symbol>\n':
      return True
    else:
      return False

  def checkMoreTerms(self):

    daToken = str(self.currentToken.split()[0])

    if daToken == '<identifier>':
      lookAhead = self.tTp[self.tokenCounter+1]
      if lookAhead == '<symbol> [ </symbol>\n':
        return True
      elif lookAhead == '<symbol> ( </symbol>\n':
        return True
      elif lookAhead == '<symbol> . </symbol>\n':
        return True
      else:
        return True
    elif daToken == '<integerConstant>':
      return True
    elif daToken == '<stringConstant>':
      return True
    elif str(self.currentToken) == '<keyword> true </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> false </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> null </keyword>\n':
      return True
    elif str(self.currentToken) == '<keyword> this </keyword>\n':
      return True
    elif str(self.currentToken) == '<symbol> ( </symbol>\n':
      return True
      #check unary Op
    elif str(self.currentToken) == '<symbol> - </symbol>\n':
      return True
    elif str(self.currentToken) == '<symbol> ~ </symbol>\n':
      return True
    else:
      return False
コード例 #22
0
class CompilationEngine():
    def __init__(self, input_file):
        self.st=SymbolTable()
        self.vmW = VMWriter()
        self.tknz = JackTokenizer(input_file)
        self._vm_string = ''
        self.tknz.advance()
        self.Op=[]
        self.Function=[]

    def eat(self, vetor):
        if (self.tknz.getToken() in vetor):
            self.tknz.advance()
        else:
            raise Exception ("Esperado '"+str(vetor)+"' encontrado '"+self.tknz.getToken()+"'")

    def eatType(self, vetor):
        if (self.tknz.tokenType() in vetor):
            self.tknz.advance()
        else:
            raise Exception ("Esperado '"+str(vetor)+"' encontrado '"+self.tknz.tokenType()+"'")

    def compileClass(self):
        self.eat('class')
        self.compileClassName()
        self.eat('{')
        self.compileClassVarDec()
        self.compileSubroutineDec()
        self.eat('}')
        return self._vm_string

    def compileClassVarDec(self):
        if (self.tknz.getToken() in ['static', 'field']):
            kind=self.tknz.getToken()
            self.eat(['static', 'field'])
            tokenType=self.tknz.getToken()
            self.compileType()
            name=self.tknz.getToken()
            self.compileVarName()
            self.st.define(name, tokenType, kind)
            while self.tknz.getToken() == ',':
                self.eat(',')
                name=self.tknz.getToken()
                self.compileVarName()
                self.st.define(name, tokenType, kind)
            self.eat(';')
            self.compileClassVarDec()

    def compileSubroutineDec(self):
        if (self.tknz.getToken() in ['constructor', 'function', 'method']):
            self.st.startSubroutine()
            if self.tknz.getToken() == 'method':
                tokenType = self.className
                kind = 'arg'
                name = 'this'
                self.st.define(name, tokenType, kind)
            subroutineKind=self.tknz.getToken()
            self.eat(['constructor', 'function', 'method'])
            subroutineType=self.tknz.getToken()
            if self.tknz.getToken() == 'void':
                self.eat('void')
            else:
                self.compileType()
            self.compileSubroutineName()
            self.eat('(')
            self.compileParameterList()
            self.eat(')')
            self.compileSubroutineBody()
            self.compileSubroutineDec()

    def compileParameterList(self):
        while self.tknz.getToken() != ')':
            tokenType = self.tknz.getToken()
            self.compileType()
            name = self.tknz.getToken()
            self.compileVarName()
            kind = 'arg'
            self.st.define(name, tokenType, kind)
            if (self.tknz.getToken()==','):
                self.eat(',')

    def compileSubroutineBody(self):
        self.ifCount = 0
        self.whileCount = 0
        self.eat('{')
        while self.tknz.getToken()=='var':
            self.compileVarDec()
        subroutine=self.Function.pop(-1)
        function=self.Function[0]
        self._vm_string += self.vmW.writeFunction(function+'.'+subroutine, self.st.varCount('var'))
        self.compileStatements()
        self.eat('}')

    def compileVarDec(self):
        self.eat('var')
        tokenType = self.tknz.getToken()
        self.compileType()
        name = self.tknz.getToken()
        self.compileVarName()
        kind = 'var'
        self.st.define(name, tokenType, kind)
        while self.tknz.getToken() == ',':
            self.eat(',')
            name = self.tknz.getToken()
            self.compileVarName()
            self.st.define(name, tokenType, kind)
        self.eat(';')

    def compileStatements(self):
        while self.tknz.getToken()!='}':
            self.compileStatement()

    def compileStatement(self):
        if (self.tknz.getToken()=='let'):
            self.compileLet()
        elif (self.tknz.getToken()=='if'):
            self.compileIf()
        elif (self.tknz.getToken()=='while'):
            self.compileWhile()
        elif (self.tknz.getToken()=='do'):
            self.compileDo()
        elif (self.tknz.getToken()=='return'):
            self.compileReturn()
        else:
            raise Exception ("Esperado 'let | if | while | do | return' encontrado '"+self.tknz.getToken()+"'")

    def compileLet(self):
        self.eat('let')
        name=self.tknz.getToken()
        kind=self.st.kindOf(name)
        self.compileVarName()
        if (self.tknz.getToken()=='['):
            self.eat('[')
            self.compileExpression()
            self.eat(']')
        self.eat('=')
        self.compileExpression()
        self.eat(';')
        self._vm_string += self.vmW.writePop(kind, self.st.indexOf(name))

    def compileIf(self):
        self.eat('if')
        self.eat('(')
        self.compileExpression()
        self.eat(')')
        self.eat('{')
        self._vm_string += self.vmW.writeIf('IF_TRUE' + str(self.ifCount))
        self._vm_string += self.vmW.writeGoto('IF_FALSE' + str(self.ifCount))
        self._vm_string += self.vmW.writeLabel('IF_TRUE' + str(self.ifCount))
        self.compileStatements()
        self.eat('}')
        if (self.tknz.getToken()=='else'):
            self._vm_string += self.vmW.writeGoto('IF_END' + str(self.ifCount))
            self.eat('else')
            self.eat('{')
            self._vm_string += self.vmW.writeLabel('IF_FALSE' + str(self.ifCount))
            self.compileStatements()
            self.eat('}')
            self._vm_string += self.vmW.writeLabel('IF_END' + str(self.ifCount))
        else:
            self._vm_string += self.vmW.writeLabel('IF_FALSE' + str(self.ifCount))
        self.ifCount += 1

    def compileWhile(self):
        self.eat('while')
        self.eat('(')
        self._vm_string += self.vmW.writeLabel('WHILE_EXP' + str(self.whileCount))
        self.compileExpression()
        self._vm_string += 'not\n'
        self._vm_string += self.vmW.writeIf('WHILE_END' + str(self.whileCount))
        self.eat(')')
        self.eat('{')
        self.compileStatements()
        self.eat('}')
        self._vm_string += self.vmW.writeGoto('WHILE_EXP' + str(self.whileCount))
        self._vm_string += self.vmW.writeLabel('WHILE_END' + str(self.whileCount))
        self.whileCount += 1

    def compileDo(self):
        self.eat('do')
        self.compileSubroutineCall()
        self.eat(';')
        self._vm_string += self.vmW.writePop('temp', 0)

    def compileReturn(self):
        self.eat('return')
        if (self.tknz.getToken()!=';'):
            self.compileExpression()
        else:
            self._vm_string += self.vmW.writePush('constant', 0)
        self.eat(';')
        self._vm_string += self.vmW.writeReturn()

    def compileExpression(self):
        self.compileTerm()
        while self.tknz.getToken() in ['+', '-', '*', '/', '&', '|', '<', '>', '=']:
            self.compileOp()
            self.compileTerm()
            if (self.Op[-1] in ['+', '-', '<', '>', '=', '&', '|']):
                op=self.Op.pop(-1)
                self._vm_string += self.vmW.writeArithmetic(op)
            elif (self.Op[-1] in ['*', '/']):
                op=self.Op.pop(-1)
                self._vm_string += self.vmW.writeCall(self.vmW.writeArithmetic(op), 2)

    def compileTerm(self):
        if (self.tknz.tokenType() in ['intConst', 'stringConst', 'keyword']):
            if (self.tknz.tokenType() == 'intConst'):
                self._vm_string += self.vmW.writePush('constant', self.tknz.getToken())
            self.tknz.advance()
        elif (self.tknz.getToken()=='('):
            self.eat('(')
            self.compileExpression()
            self.eat(')')
        elif (self.tknz.getToken()=='-' or self.tknz.getToken()=='~'):
            self.compileUnaryOp()
            self.compileTerm()
            if (self.Op[-1] in ['~', '-']):
                op=self.Op.pop(-1)
                self._vm_string += self.vmW.writeArithmetic('unary'+op)
        else:
            if (self.tknz.nextToken()=='['):
                self.compileVarName()
                self.eat('[')
                self.compileExpression()
                self.eat(']')
            elif (self.tknz.nextToken()=='.'):
                self.compileSubroutineCall()
            else:
                name=self.tknz.getToken()
                kind=self.st.kindOf(name)
                self._vm_string += self.vmW.writePush(kind, self.st.indexOf(name))
                self.compileVarName()

    def compileExpressionList(self):
        self.expCount=0
        while self.tknz.getToken()!=')':
            self.expCount+=1
            self.compileExpression()
            if (self.tknz.getToken()==','):
                self.eat(',')

    def compileType(self):
        vetor = ['int','char','boolean', 'String']
        if (self.tknz.getToken() in vetor or self.tknz.tokenType() == 'identifier'):
            self.tknz.advance()
        else:
            raise Exception ("Esperado 'int' | 'char' | 'boolean' | className encontrado '"+self.tknz.getToken()+"'")

    def compileClassName(self):
        self.className=self.tknz.getToken()
        self.Function.append(self.className)
        self.eatType('identifier')

    def compileSubroutineName(self):
        self.subroutineName=self.tknz.getToken()
        self.Function.append(self.subroutineName)
        #self.Function.append(self.className + '.' + self.subroutineName)
        self.eatType('identifier')

    def compileVarName(self):
        self.eatType('identifier')

    def compileSubroutineCall(self):
        self.compileClassName()
        if (self.tknz.getToken()=='.'):
            self.eat('.')
            self.compileSubroutineName()
            self.eat('(')
            self.compileExpressionList()
            self.eat(')')
        else:
            self.eat('(')
            self.compileExpressionList()
            self.eat(')')
        subroutine=self.Function.pop(-1)
        function=self.Function.pop(-1)
        self._vm_string += self.vmW.writeCall(function+'.'+subroutine, self.expCount)

    def compileOp(self):
        vetor = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        if (self.tknz.getToken() in vetor ):
            self.Op.append(self.tknz.getToken())
            self.tknz.advance()
        else:
            raise Exception ("Esperado '+' | '-' | '* | '/' | '&' | '|' | '<' | '>' | '=' encontrado '"+self.tknz.getToken()+"'")

    def compileUnaryOp(self):
        vetor = ['-', '~']
        if (self.tknz.getToken() in vetor ):
            self.Op.append(self.tknz.getToken())
            self.tknz.advance()
        else:
            raise Exception ("Esperado '-' | '~' encontrado '"+self.tknz.getToken()+"'")

    def compileKeywordConstant(self):
        vetor = ['true', 'false', 'null', 'this']
        if (self.tknz.getToken() in vetor ):
            self.tknz.advance()
        else:
            raise Exception ("Esperado 'true' | 'false' | 'null' | 'this' encontrado '"+self.tknz.getToken()+"'")
コード例 #23
0
class Parser:
    def __init__(self, directory, filename):
        self.symbolTable = SymbolTable()
        self.VMWriter = VMWriter(directory, filename)
        self.className = ''

        self.tokens = []
        self.tokenIndex = 0
        self.labelCounter = 0

        self.statementOptions = ['let', 'while', 'do', 'if', 'return']
        self.expressionOptions = [
            'integerConstant', 'stringConstant', 'identifier'
        ]

        self.op = {
            '+': 'add',
            '-': 'sub',
            '*': 'call Math.multiply 2',
            '/': 'call Math.divide 2',
            '=': 'eq',
            '<': 'lt',
            '>': 'gt',
            '&': 'and',
            '|': 'or'
        }
        self.unaryOp = {'-': 'neg', '~': 'not'}
        self.keywordConstant = {
            'true': 'constant 1\n neg',
            'false': 'constant 0',
            'this': 'pointer 0',
            'null': 'constant 0'
        }

        self.classVariableKinds = ['field', 'static']
        self.subroutineVariableKinds = ['local', 'argument']
        self.subroutineTypes = ['method', 'constructor', 'function']
        self.variableTypes = ['int', 'char', 'boolean']

    def parse(self, tokens):
        self.tokens = tokens
        self.compileClass()
        self.VMWriter.close()
        return

    def expect(self, *args):
        expectedString = self.currentToken()

        if expectedString['value'] not in args and expectedString[
                'type'] != 'identifier':
            print(self.currentToken())
            raise Exception("Expecting either of '{0}'; saw '{1}'".format(
                ','.join([x for x in args]), expectedString['value']))
        else:
            # ateprint(self.currentToken())
            tokenValue = self.currentToken()['value']
            self.advanceIndex(1)
            return tokenValue

    def advanceIndex(self, step):
        self.tokenIndex += step

    def currentToken(self):
        return self.tokens[self.tokenIndex]

    # starts program compilation at class-level
    def compileClass(self):
        self.expect('class')
        self.className = self.compileClassName()
        self.expect('{')

        while self.currentToken()['value'] in self.classVariableKinds:
            self.compileClassVarDec()

        currentToken = self.currentToken()['value']
        while currentToken in self.subroutineTypes:
            self.compileSubroutineDec()
            currentToken = self.currentToken()['value']

        self.expect('}')

    def compileClassVarDec(self):
        variable = dict()
        variable['kind'] = self.expect('static', 'field')
        variable['type'] = self.compileType()
        variable['name'] = self.compileVarName()
        self.symbolTable.addVariable(variable)

        while self.currentToken()['value'] == ',':
            self.expect(',')
            variable['name'] = self.compileVarName()
            self.symbolTable.addVariable(variable)

        self.expect(';')

    def compileClassName(self):
        return self.expect('')

    def compileSubroutineDec(self):
        # reset the subroutine symbol table for new subroutine
        self.symbolTable.startNewSubroutine()

        self.subroutineType = self.expect('method', 'constructor', 'function')
        if self.subroutineType == 'method':
            # print(self.subroutineType)
            variable = dict()
            variable['kind'] = 'argument'
            variable['type'] = self.className
            variable['name'] = 'this'
            self.symbolTable.addVariable(variable)

        self.returnType = self.expect('void', 'int', 'char', 'boolean')
        self.compileSubroutineName()

        # subroutine name in VM => className.subroutineName
        subroutineName = self.className + '.' + self.subroutineName
        self.expect('(')
        numberOfParameters = self.compileParameterList()
        self.expect(')')
        self.compileSubroutineBody(numberOfParameters, subroutineName)

    def compileSubroutineName(self):
        self.subroutineName = self.expect('')

    def compileParameterList(self):
        numberOfParameters = 0

        if self.currentToken()['type'] == 'identifier' or self.currentToken(
        )['value'] in self.variableTypes:
            numberOfParameters = self.variableList('argument')

        return numberOfParameters

    def compileSubroutineBody(self, numberOfParameters, subroutineName):
        self.expect('{')

        while self.currentToken()['value'] == 'var':
            self.compileVarDec()

        numberOfLocalVars = self.symbolTable.getNumberOfLocalVars()
        self.VMWriter.writeFunction(subroutineName, numberOfLocalVars)

        if self.subroutineType == 'constructor':
            # allocate memory for the instance variables of the  object to be constructed
            memorySpaceNeeded = self.symbolTable.getNumberOfFieldVars()
            self.VMWriter.writePush('constant', memorySpaceNeeded)
            self.VMWriter.writeCall('Memory.alloc', 1)
            # set the base of the THIS segment to the value of the address returned by the Memory.alloc function
            self.VMWriter.writePop('pointer', 0)
        # add the reference to the calling object as argument 0 of the subroutine
        if self.subroutineType == 'method':
            self.VMWriter.writePush('argument', 0)
            self.VMWriter.writePop('pointer', 0)

        #print(subroutineName, self.symbolTable.subroutineSymbolTable)
        self.compileStatements()
        self.expect('}')

    def compileVarDec(self):
        self.expect('var')
        self.variableList('local')
        self.expect(';')

    def variableList(self, kind):
        numberOfParameters = 0

        # populate the respective (calling function) symbol table with the variables found
        variable = dict()
        variable['kind'] = kind
        variable['type'], variable['name'] = self.VarDec()
        self.symbolTable.addVariable(variable)
        numberOfParameters += 1

        # compile multiple variable declarations
        while self.currentToken()['value'] == ',':
            self.expect(',')
            if self.currentToken()['type'] == 'keyword':
                variable['type'] = self.compileType()
            variable['name'] = self.compileVarName()
            self.symbolTable.addVariable(variable)
            numberOfParameters += 1

        return numberOfParameters

    def VarDec(self):
        return self.compileType(), self.compileVarName()

    def compileVarName(self):
        return self.expect('')

    def compileType(self):
        return self.expect('int', 'char', 'boolean')

    def compileStatements(self):
        while self.currentToken()['value'] in self.statementOptions:
            self.compileStatement()

    def compileStatement(self):
        if self.currentToken()['value'] == 'if':
            self.compileIfStatement()
        elif self.currentToken()['value'] == 'while':
            self.compileWhileStatement()
        elif self.currentToken()['value'] == 'let':
            self.compileLetStatement()
        elif self.currentToken()['value'] == 'do':
            self.compileDoStatement()
        elif self.currentToken()['value'] == 'return':
            self.compileReturnStatement()

    def compileIfStatement(self):
        self.expect('if')
        self.expect('(')
        self.compileExpression()
        self.expect(')')
        self.VMWriter.writeArithmetic('not')
        label1 = self.advanceLabelCounter()
        self.VMWriter.writeIFGOTO('L{0}'.format(label1))
        self.expect('{')
        self.compileStatements()
        self.expect('}')
        label2 = self.advanceLabelCounter()
        self.VMWriter.writeGOTO('L{0}'.format(label2))
        self.VMWriter.writeLabel('L{0}'.format(label1))
        if self.currentToken()['value'] == 'else':
            self.expect('else')
            self.expect('{')
            self.compileStatements()
            self.expect('}')
        self.VMWriter.writeLabel('L{0}'.format(label2))

    def advanceLabelCounter(self):
        self.labelCounter += 1
        return self.labelCounter

    def compileWhileStatement(self):
        self.expect('while')
        self.expect('(')
        label1 = self.advanceLabelCounter()
        self.VMWriter.writeLabel('L{0}'.format(label1))
        self.compileExpression()
        self.VMWriter.writeArithmetic('not')
        label2 = self.advanceLabelCounter()
        self.VMWriter.writeIFGOTO('L{0}'.format(label2))
        self.expect(')')
        self.expect('{')
        self.compileStatements()
        self.VMWriter.writeGOTO('L{0}'.format(label1))
        self.expect('}')
        self.VMWriter.writeLabel('L{0}'.format(label2))

    def compileDoStatement(self):
        self.expect('do')
        self.advanceIndex(1)
        self.compileSubroutineCall()
        self.expect(';')
        self.VMWriter.writePop('temp', 0)

    def compileLetStatement(self):
        self.expect('let')
        variable = self.compileVarName()
        segment = self.symbolTable.getVariableKind(variable)
        index = self.symbolTable.getVariablePosition(variable)

        currentToken = self.currentToken()['value']

        if currentToken == '[':
            self.VMWriter.writePush(segment, index)
            self.expect('[')
            self.compileExpression()
            self.expect(']')
            self.VMWriter.writeArithmetic('add')

        self.expect('=')
        self.compileExpression()
        self.expect(';')

        if currentToken == '[':
            self.VMWriter.writePop('temp', 0)
            self.VMWriter.writePop('pointer', 1)
            self.VMWriter.writePush('temp', 0)
            self.VMWriter.writePop('that', 0)
        else:
            self.VMWriter.writePop(segment, index)

    def compileReturnStatement(self):
        self.expect('return')
        currentToken = self.currentToken()
        if currentToken['type'] in self.expressionOptions or currentToken['value'] in self.keywordConstant\
                or self.currentToken()['value'] in self.unaryOp:
            self.compileExpression()
        self.expect(';')
        if self.returnType == 'void':
            self.VMWriter.writePush('constant', 0)
        self.VMWriter.writeReturn()

    def compileSubroutineCall(self):
        subroutineFullName = ''
        numberOfArgs = 0
        if self.currentToken()['value'] == '.':
            self.advanceIndex(-1)
            subroutineFullName = self.compileVarName()
            varType = self.symbolTable.getVariableType(subroutineFullName)
            segment = self.symbolTable.getVariableKind(subroutineFullName)
            index = self.symbolTable.getVariablePosition(subroutineFullName)
            if varType is not None:
                self.VMWriter.writePush(segment, index)
                subroutineFullName = varType
                numberOfArgs += 1
            subroutineFullName += self.expect('.')
            subroutineName, nArgs = self.subroutineCall()
            subroutineFullName += subroutineName
        else:
            self.advanceIndex(-1)
            subroutineName, nArgs = self.subroutineCall()
            subroutineFullName += subroutineName
        numberOfArgs += nArgs
        self.VMWriter.writeCall(subroutineFullName, numberOfArgs)

    def subroutineCall(self):
        subroutineName = self.expect('')
        self.expect('(')
        numberOfArgs = self.compileExpressionList()
        self.expect(')')
        return subroutineName, numberOfArgs

    def compileExpression(self):
        self.compileTerm()
        while self.currentToken()['value'] in self.op.keys():
            operator = self.expect(self.currentToken()['value'])
            self.compileTerm()
            self.VMWriter.writeArithmetic(self.op[operator])

    def compileExpressionList(self):
        numberOfArgs = 0
        currentToken = self.currentToken()
        if currentToken['type'] in self.expressionOptions or currentToken['value'] in self.keywordConstant\
                or currentToken['value'] == '(' or self.currentToken()['value'] in self.unaryOp:
            self.compileExpression()
            numberOfArgs += 1
            while self.currentToken()['value'] == ',':
                self.expect(',')
                self.compileExpression()
                numberOfArgs += 1
        return numberOfArgs

    def compileTerm(self):
        if self.currentToken()['value'] in self.unaryOp.keys():
            operator = self.expect(self.currentToken()['value'])
            self.compileTerm()
            self.VMWriter.writeArithmetic(self.unaryOp[operator])

        elif self.currentToken()['type'] == 'identifier':
            variable = self.currentToken()['value']
            self.advanceIndex(1)
            if self.currentToken()['value'] == '(' or self.currentToken(
            )['value'] == '.':
                self.compileSubroutineCall()

            elif self.currentToken()['value'] == '[':
                segment = self.symbolTable.getVariableKind(variable)
                index = self.symbolTable.getVariablePosition(variable)
                self.VMWriter.writePush(segment, index)
                self.expect('[')
                self.compileExpression()
                self.expect(']')
                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePop('pointer', 1)
                self.VMWriter.writePush('that', 0)

            else:
                self.advanceIndex(-1)
                variable = self.expect('')
                segment = self.symbolTable.getVariableKind(variable)
                index = self.symbolTable.getVariablePosition(variable)
                self.VMWriter.writePush(segment, index)

        elif self.currentToken()['type'] == 'integerConstant':
            integer = self.expect(self.currentToken()['value'])
            self.VMWriter.writePush('constant', integer)

        elif self.currentToken()['type'] == 'stringConstant':
            string = self.expect(self.currentToken()['value'])
            self.VMWriter.writePush('constant', len(string))
            self.VMWriter.writeCall('String.new', 1)
            for character in string:
                self.VMWriter.writePush('constant', ord(character))
                self.VMWriter.writeCall('String.appendChar', 2)

        elif self.currentToken()['value'] in self.keywordConstant:
            constant = self.expect(self.currentToken()['value'])
            self.VMWriter.writePush(self.keywordConstant[constant], '')

        elif self.currentToken()['value'] == '(':
            self.expect('(')
            self.compileExpression()
            self.expect(')')