コード例 #1
0
def make_file(filename):
    tokenizer = JackTokenizer()
    tokenizer.read_file(filename)
    tokenizer.divide_into_tokens()
    file_to_write = filename[:-5] + "." + "vm"
    writer = VMWriter(file_to_write)
    parser = CompilationEngine(tokenizer, writer)
    parser.void_subroutines()
    parser.methods_()
    parser.compile_class()
    writer.close()
コード例 #2
0
ファイル: JackCompiler.py プロジェクト: abir0/nand2tetris
    def compileFiles(self):
        """Compile each file in the given directory using all modules and generate code."""
        verbose = self.parse_arg(sys.argv)

        for filename in self.filenames:
            vm_writer = VMWriter(filename)
            tokenizer = Tokenizer(filename)
            symbol_table = SymbolTable()
            tokenizer.tokenize()
            engine = CompilationEngine(tokens=tokenizer,
                                       vm_writer=vm_writer,
                                       symbol_table=symbol_table,
                                       verbose=verbose)
            engine.compileClass()
            vm_writer.close()
コード例 #3
0
class CompilationEngine(object):
    """docstring for CompilationEngine"""
    def __init__(self, inputFilePath):
        super(CompilationEngine, self).__init__()
        normalized = os.path.normpath(inputFilePath)

        self.compileDir = os.path.isdir(normalized)
        self.jackFilesToCompile = []
        self.inputFilePath = normalized

        if self.compileDir:
            os.path.walk(normalized, getFilePathsToCompile, self.jackFilesToCompile)
        else:
            self.jackFilesToCompile.append(normalized)

        self.tokenizer = None
        self.symbolTable = SymbolTable()
        self.treeRoot = None
        self.currentClassName = None
        self.currentSubroutineContext = None;
        self.writer = None

    """ Load File """
    def loadFile(self, filepath):
        self.tokenizer = JackTokenizer(filepath)
        self._generateTree()
        self._initializeClassDetails(self.treeRoot)

        outputFilePath = filepath[:-5] + ".vm"
        self.writer = VMWriter(outputFilePath)

    def setSubroutineContext(self, ctx):
        self.currentSubroutineContext = ctx

    def compile(self):
        for filepath in self.jackFilesToCompile:
            self.loadFile(filepath)

            self._compileSubroutines()

            self.writer.close()

    """ Tree Generation """

    def _generateTree(self):
        # get first token, and decide which rule to start with (in 99.9% of cases start with 'class' rule)
        nextToken = self.tokenizer.peekNextToken()
        if nextToken == None:
            raise ValueError("No tokens to parse.")
        elif nextToken.getToken() != "class":
            raise ValueError("Program does not begin with a class declaration.")

        root = Tree.Node("class")
        self.compileClass(root)
        self.treeRoot = root # root of tree after it is all built out

    def _initializeClassDetails(self, treeRoot):
        """ void: Sets self.className and defines all the class level variables in the symbol table. """
        self.symbolTable.clearClassSymbols()

        className = treeRoot.children[1].elementVal
        self.currentClassName = className;

        classVarDecs = [c for c in treeRoot.children if c.elementName == "classVarDec"]
        for cvd in classVarDecs:
            var_info = [c.elementVal for c in cvd.children if c.elementName != "symbol"]
            kind = var_info[0].upper()
            type_ = var_info[1]
            
            for varname in var_info[2:]:
                self.symbolTable.define(varname, type_, kind)
        

    def _compileSubroutines(self):
        """ Initialize the subroutine symbol table  """
        subroutines = [c for c in self.treeRoot.children if c.elementName == "subroutineDec"]
        for node in subroutines:
            self._initializeSubroutineVars(node) # set subroutineCtx & init subroutineSymbolTable
            self._handleSubroutine(node)

    def _initializeSubroutineVars(self, node):
        """ void: sets the subroutineContext and initializes the symbolTable for argument vars """

        self.symbolTable.clearSubroutineSymbols()

        subroutineCtx = {
            "subroutineKind": node.children[0].elementVal, # method,function,constructor
            "voidReturn": node.children[1].elementVal == "void",
            "returnType": node.children[1].elementVal,
            "subroutineName": node.children[2].elementVal
        }

        self.setSubroutineContext(subroutineCtx)

        plistNode = node.children[4]
        arguments = [c for c in plistNode.children if c.elementName != "symbol"]
        if len(arguments) % 2 != 0:
            raise ValueError("REMOVE LATER: Odd number of (type,varname) combos.")

        # if dealing with method, add THIS as arg 0
        if subroutineCtx["subroutineKind"] == "method":
            self.symbolTable.define("this", self.currentClassName, "ARG")

        i = 0
        while i < len(arguments):
            type_ = arguments[i].elementVal
            varname = arguments[i+1].elementVal
            self.symbolTable.define(varname, type_, "ARG")
            i += 2    


    def _handleSubroutine(self, node):
        """ void: 
            1. Initialize Local Vars
            2. Write function vm code
            3. Kick off the compilation of subroutine body
        """
        curSubroutineKind = self.currentSubroutineContext["subroutineKind"]

        subBodyNode = None
        statementsNode = None
        for child in node.children:
            if child.elementName == "subroutineBody":
                subBodyNode = child
                break

        # initialize local vars in symbolTable
        varDecs = [c for c in subBodyNode.children if c.elementName == "varDec"]
        for vd in varDecs:
            locals_ = [c for c in vd.children if c.elementName != "symbol"]
            [varNode,typeNode] = locals_[:2] # var, type
            locals_ = locals_[2:] # varnames only

            type_ = typeNode.elementVal
            for varNode in locals_:
                varname = varNode.elementVal
                self.symbolTable.define(varname, type_, "VAR")
            
        nLocals = self.symbolTable.varCount("VAR")
        self.writer.writeFunction(self.currentClassName, self.currentSubroutineContext["subroutineName"], nLocals)

        if curSubroutineKind == "constructor":
            numFieldVars = self.symbolTable.varCount(k_FIELD)
            self.writer.writePush("constant", numFieldVars)
            self.writer.writeCall("Memory", "alloc", 1)
            self.writer.writePop("pointer", 0) # sets the base address of this
        elif curSubroutineKind == "method":
            self.writer.writePush("argument", 0) # first argument in a method is always the base addr of THIS
            self.writer.writePop("pointer", 0) # anchoring this before method body executes.


        # get statements & handle them
        statementsNode = None
        for child in subBodyNode.children:
            if child.elementName == "statements":
                statementsNode = child
                break

        self._handleSubroutineStatements(statementsNode)

    def _handleSubroutineStatements(self, statementsNode):
        for statement in statementsNode.children:
            if statement.elementName == "letStatement":
                self._handleLetStatement(statement)
            elif statement.elementName == "returnStatement":
                self._handleReturnStatement(statement)
            elif statement.elementName == "doStatement":
                self._handleDoStatement(statement)
            elif statement.elementName == "ifStatement":
                self._handleIfStatement(statement)
            elif statement.elementName == "whileStatement":
                self._handleWhileStatement(statement)

    def _handleLetStatement(self, letStatement):
        

        if letStatement.children[2].elementVal == "=":
            # handle varName = ...
            expr = letStatement.children[3]
            self._handleExpression(expr)

            varname = letStatement.children[1].elementVal
            varKind = self.symbolTable.kindOf(varname)
            segment = kindToSegmentMap[varKind]
            index = self.symbolTable.indexOf(varname)
            self.writer.writePop(segment, index)
        elif letStatement.children[2].elementVal == "[":
            # handle arr[expr1] = expr2
            varname = letStatement.children[1].elementVal
            varKind = self.symbolTable.kindOf(varname)
            segment = kindToSegmentMap[varKind]
            index = self.symbolTable.indexOf(varname)

            bracketExpr = letStatement.children[3]
            self._handleExpression(bracketExpr)

            self.writer.writePush(segment, index) # push varname_base_addr            
            
            self.writer.writeOp("+") # produces add

            rightHandExpr = letStatement.children[6]
            self._handleExpression(rightHandExpr)
            self.writer.writePop("temp", 0) # pop rightHandExpr into temp 0
            self.writer.writePop("pointer", 1) # anchor THAT segment to (bracketExpr + varname_base_addr)
            self.writer.writePush("temp", 0) # push rightHandExpr back onto stack
            self.writer.writePop("that", 0) # pop rightHandExpr into that[0]


    def _handleWhileStatement(self, whileStatement):
        expr = whileStatement.children[2]
        statements = whileStatement.children[5]

        expLabel = whileExpLabel + str(self.symbolTable.whileLabelInx)
        endLabel = whileEndLabel + str(self.symbolTable.whileLabelInx)

        self.symbolTable.incrementwhileLabelInx()

        self.writer.writeLabel(expLabel)
        self._handleExpression(expr)

        self.writer.writeUnaryOp("~") # produces not
        self.writer.writeIfGoto(endLabel)
        self._handleSubroutineStatements(statements)
        self.writer.writeGoto(expLabel)
        self.writer.writeLabel(endLabel)

    def _handleIfStatement(self, ifStatement):
        if len(ifStatement.children) > 7:
            # handle if / else
            trueLabel = ifTrueLabel + str(self.symbolTable.ifLabelInx)
            falseLabel = ifFalseLabel + str(self.symbolTable.ifLabelInx)
            endLabel = ifEndLabel + str(self.symbolTable.ifLabelInx)

            self.symbolTable.incrementifLabelInx()

            expr = ifStatement.children[2]
            trueStatements = ifStatement.children[5]
            elseStatements = ifStatement.children[9]
            self._handleExpression(expr) # expr on top of stack

            self.writer.writeIfGoto(trueLabel)
            self.writer.writeGoto(falseLabel)
            self.writer.writeLabel(trueLabel)

            self._handleSubroutineStatements(trueStatements)
            
            self.writer.writeGoto(endLabel)

            self.writer.writeLabel(falseLabel)
            
            self._handleSubroutineStatements(elseStatements)

            self.writer.writeLabel(endLabel)
        else:
            # handle just if
            trueLabel = ifTrueLabel + str(self.symbolTable.ifLabelInx)
            falseLabel = ifFalseLabel + str(self.symbolTable.ifLabelInx)

            self.symbolTable.incrementifLabelInx()

            expr = ifStatement.children[2]
            trueStatements = ifStatement.children[5]
            self._handleExpression(expr) # expr on top of stack

            self.writer.writeIfGoto(trueLabel)
            self.writer.writeGoto(falseLabel)
            self.writer.writeLabel(trueLabel)

            self._handleSubroutineStatements(trueStatements)

            self.writer.writeLabel(falseLabel)

    def _handleDoStatement(self, doStatement):
        symbol = doStatement.children[2].elementVal
        if symbol == "(":
            className = self.currentClassName
            subroutineName = doStatement.children[1].elementVal
            # assume that we are compiling a Class method which requires the first argument 
            # to be THIS base address.
            self.writer.writePush("pointer", 0) # push this base address as first arg
            expList = doStatement.children[3]
            expressions = [c for c in expList.children if c.elementName == "expression"]
            for e in expressions:
                self._handleExpression(e)

            self.writer.writeCall(className, subroutineName, len(expressions) + 1) # pointer 0 is the default arg0
            self.writer.writePop("temp", 0)
        elif symbol == ".":
            name = doStatement.children[1].elementVal
            isVarName = self.symbolTable.kindOf(name) != None;

            subroutineName = doStatement.children[3].elementVal
            expList = doStatement.children[5]
            expressions = [c for c in expList.children if c.elementName == "expression"]
            
            if isVarName:
                # handle method invocation
                # put the obj base address on the stack
                varKind = self.symbolTable.kindOf(name)
                className = self.symbolTable.typeOf(name)
                segment = kindToSegmentMap[varKind]
                index = self.symbolTable.indexOf(name)
                self.writer.writePush(segment, index)
                # handle remaining expressions
                for e in expressions:
                    self._handleExpression(e)

                nArgs = len(expressions) + 1 # including the object on which the method is being invoked
                self.writer.writeCall(className, subroutineName, nArgs)
            else:
                # handle static function invocation                
                for e in expressions:
                    self._handleExpression(e)

                nArgs = len(expressions)
                self.writer.writeCall(name, subroutineName, nArgs)

            self.writer.writePop("temp", 0)

    def _handleReturnStatement(self, returnStatement):
        if returnStatement.children[1].elementName == "expression":
            # returnStatement.walkAndPrint()
            self._handleExpression(returnStatement.children[1])
            self.writer.writeReturn()
        else:
            self.writer.writePush("constant", 0)
            self.writer.writeReturn()

    def _handleExpression(self, exprTree):
        if len(exprTree.children) == 1:
            termTree = exprTree.children[0]
            self._handleTerm(termTree)
        elif len(exprTree.children) == 0:
            exprTree.prin
        else:
            t1 = exprTree.children[0]
            opTermCombos = exprTree.children[1:]
            
            self._handleTerm(t1)
            i = 0
            while i < len(opTermCombos):
                operation = opTermCombos[i].elementVal
                nextTerm = opTermCombos[i+1]
                self._handleTerm(nextTerm)
                self.writer.writeOp(operation)
                i += 2

    def _handleTerm(self, termTree):
        firstToken = termTree.children[0]
        if len(termTree.children) > 1:
            # handle a[expr] | subroutineCall | (expression) | unaryOp term      
            secondToken = termTree.children[1]
            if firstToken.elementName == "identifier":
                if secondToken.elementVal == "[":
                    # handle a[expr]
                    varname = firstToken.elementVal
                    varKind = self.symbolTable.kindOf(varname)
                    segment = kindToSegmentMap[varKind]
                    index = self.symbolTable.indexOf(varname)

                    self._handleExpression(termTree.children[2])

                    self.writer.writePush(segment, index) # push varname_base_addr
                    
                    self.writer.writeOp("+")
                    self.writer.writePop("pointer", 1)
                    self.writer.writePush("that", 0)

                elif secondToken.elementVal in ["(", "."]:
                    if secondToken.elementVal == "(":
                        # assume that we are compiling a Class method which requires the first argument 
                        # to be THIS base address.
                        self.writer.writePush("pointer", 0) # push this base address as first arg
                        expListNode = termTree.children[2]
                        expressions = [c for c in expListNode.children if c.elementName == "expression"]
                        for e in expressions:
                            self._handleExpression(e)

                        self.writer.writeCall(self.currentClassName, firstToken.elementVal, len(expressions) + 1)
                    elif secondToken.elementVal == ".":
                        name = firstToken.elementVal
                        isVarName = self.symbolTable.kindOf(name) != None;

                        functionName = termTree.children[2].elementVal
                        expListNode = termTree.children[4]
                        expressions = [c for c in expListNode.children if c.elementName == "expression"]
                        
                        if isVarName:
                            varKind = self.symbolTable.kindOf(name)
                            className = self.symbolTable.typeOf(name)
                            segment = kindToSegmentMap[varKind]
                            index = self.symbolTable.indexOf(name)
                            self.writer.writePush(segment, index)

                            for e in expressions:
                                self._handleExpression(e)

                            nArgs = len(expressions) + 1
                            self.writer.writeCall(className, functionName, nArgs)
                        else:
                            for e in expressions:
                                self._handleExpression(e)

                            nArgs = len(expressions)
                            self.writer.writeCall(name, functionName, nArgs)

            elif firstToken.elementName == "symbol":
                if firstToken.elementVal in unaryOps:
                  self._handleTerm(secondToken)
                  self.writer.writeUnaryOp(firstToken.elementVal) # unaryOp term
                else:
                  self._handleExpression(secondToken) # (expression)
        else:
            if firstToken.elementName == t_integerConstant:
                self.writer.writePush("constant", firstToken.elementVal)
            elif firstToken.elementName == t_stringConstant:
                sLength = len(firstToken.elementVal)
                self.writer.writePush("constant", sLength)
                self.writer.writeCall("String", "new", 1)
                for char in firstToken.elementVal:
                    code = ord(char)
                    self.writer.writePush("constant", code)
                    self.writer.writeCall("String", "appendChar", 2)
            elif firstToken.elementName == t_keyword:
                if firstToken.elementVal == "null":
                    self.writer.writePush("constant", 0)
                elif firstToken.elementVal == "false":
                    self.writer.writePush("constant", 0)
                elif firstToken.elementVal == "true":
                    self.writer.writePush("constant", 0)
                    self.writer.writeUnaryOp("~") # produces "not"
                elif firstToken.elementVal == "this":
                    self.writer.writePush("pointer", 0)
            elif firstToken.elementName == t_identifier:
                varKind = self.symbolTable.kindOf(firstToken.elementVal)
                segment = kindToSegmentMap[varKind]
                index = self.symbolTable.indexOf(firstToken.elementVal)
                self.writer.writePush(segment, index)


    """ JACK Program Structure """

    def compileClass(self, subTreeNode):
        # handle class keyword
        token = self.tokenizer.getNextToken()
        if token.getToken() != 'class':
            raise ValueError("Program does not begin with a class declaration.")
        subTreeNode.addChild(token)

        # handle className
        token = self.tokenizer.getNextToken()
        self.validateClassName(token)
        subTreeNode.addChild(token)

        # handle '{'
        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Invalid symbol.")
        subTreeNode.addChild(token)

        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() in ["static", "field"]:
            classVarDecSubTree = Tree.Node("classVarDec", subTreeNode.depth + 1)
            self.compileClassVarDec(classVarDecSubTree)
            subTreeNode.addChildTree(classVarDecSubTree)

            nextToken = self.tokenizer.peekNextToken()

        while nextToken.getToken() in ["constructor", "function", "method"]:
            subroutineDecSubTree = Tree.Node("subroutineDec", subTreeNode.depth + 1)
            self.compileSubroutineDec(subroutineDecSubTree)
            subTreeNode.addChildTree(subroutineDecSubTree)

            nextToken = self.tokenizer.peekNextToken()

        # get next token, verify '}' and write symbol (use indent)
        token = self.tokenizer.getNextToken()
        if token.getToken() != "}":
            raise ValueError("Invalid symbol. " + token.getToken())

        subTreeNode.addChild(token)

        return subTreeNode

    def compileClassVarDec(self, subTreeNode):
        # handle 'static' | 'field'
        token = self.tokenizer.getNextToken()
        if token.getToken() not in ['static', 'field']:
            raise ValueError("Class var declaration does not begin with \'static\' or \'field\'.")
        subTreeNode.addChild(token)

        # handle type
        token = self.tokenizer.getNextToken()
        self.validateType(token) #raises error true
        subTreeNode.addChild(token)

        # handle varname
        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        # handle 0 or more comma separated varnames
        token = self.tokenizer.getNextToken()
        while token.getToken() == ",":
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()
            self.validateVarName(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()

        # handle ';'
        if token.getToken() != ";":
            raise ValueError("Class var declaration does not end with a \';\' .")
        subTreeNode.addChild(token)

        return subTreeNode

    def compileSubroutineDec(self, subTreeNode):
        # handle constuctor, function, method
        token = self.tokenizer.getNextToken()
        if token.getToken() not in ['constructor', 'function', 'method']:
            raise ValueError("Subroutine declaration must begin with \'constructor\', \'function\', \'method\'.")
        subTreeNode.addChild(token)

        # handle 'void' | type
        token = self.tokenizer.getNextToken()
        if token.getToken() == "void":
            subTreeNode.addChild(token)
        elif self.validateType(token): # if not validateType, then we necessarily throw error
            subTreeNode.addChild(token)

        # handle subRoutineName
        token = self.tokenizer.getNextToken()
        self.validateSubroutineName(token)
        subTreeNode.addChild(token)

        # handle '('
        token = self.tokenizer.getNextToken()
        if token.getToken() != '(':
            raise ValueError("Expected \'(\' before parameter list.")
        subTreeNode.addChild(token)

        # handle parameterList
        pListSubTreeNode = Tree.Node("parameterList", subTreeNode.depth + 1)
        self.compileParameterList(pListSubTreeNode) # needs to printed like <parameterList></paremeterList> (even if no children)
        subTreeNode.addChildTree(pListSubTreeNode)

        # handle ')'
        token = self.tokenizer.getNextToken()
        if token.getToken() != ')':
            raise ValueError("Expected \')\' after parameter list, but got " + token.getToken() + " instead.")
        subTreeNode.addChild(token)

        # handle subroutineBody
        nextToken = self.tokenizer.peekNextToken()
        if nextToken.getToken() != '{':
            raise ValueError("Expeced \'{\' at start of subroutine body, but received: " + nextToken.getToken())

        subroutineBodySubTreeNode = Tree.Node("subroutineBody", subTreeNode.depth + 1)
        self.compileSubroutineBody(subroutineBodySubTreeNode)
        subTreeNode.addChildTree(subroutineBodySubTreeNode)

        return subTreeNode;

    def compileParameterList(self, subTreeNode):
        nextToken = self.tokenizer.peekNextToken()
        if not self.isType(nextToken):
            return subTreeNode

        token = self.tokenizer.getNextToken()
        # self.validateType(token)
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        # handle 0 or more comma separated (type varname)
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() == ",":
            token = self.tokenizer.getNextToken() # handle ','
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # type
            self.validateType(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # varname
            self.validateVarName(token)
            subTreeNode.addChild(token)

            nextToken = self.tokenizer.peekNextToken()

        return subTreeNode

    def compileSubroutineBody(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Expeced \'{\' at start of subroutine body, but received: " + token.getToken())
        subTreeNode.addChild(token)

        # handle 0 or more varDecs
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() == 'var':
            varDecSubTreeNode = Tree.Node("varDec", subTreeNode.depth + 1)
            self.compileVarDec(varDecSubTreeNode)
            subTreeNode.addChildTree(varDecSubTreeNode)

            nextToken = self.tokenizer.peekNextToken()

        # handle 0 or more statements
        statementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
        self.compileStatements(statementsSubTreeNode)
        subTreeNode.addChildTree(statementsSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != '}':
            raise ValueError("Expeced \'}\' at end of subroutine body, but got " + token.getToken() + " instead.")
        subTreeNode.addChild(token)

        return subTreeNode

    def compileVarDec(self, subTreeNode):
        # handle 'var'
        token = self.tokenizer.getNextToken()
        if token.getToken() != 'var':
            raise ValueError("Variable declaration must begin with \'var\'.")
        subTreeNode.addChild(token)

        # handle type
        token = self.tokenizer.getNextToken()
        self.validateType(token)
        subTreeNode.addChild(token)

        # handle var name
        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        # handle 0 or more comma separated varnames
        token = self.tokenizer.getNextToken()
        while token.getToken() == ",":
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()
            self.validateVarName(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()

        # handle ';'
        if token.getToken() != ';':
            raise ValueError("Variable declaration must end with \';\'.")
        subTreeNode.addChild(token)

        return subTreeNode;

    def isType(self, token):
        tokenType = token.getTokenType()
        tokenVal = token.getToken()

        return tokenVal in primitive_types or self._isIdentifier(token)

    def isClassName(self, token):
        return self._isIdentifier(token)

    def isSubroutineName(self, token):
        return self._isIdentifier(token)

    def isVarName(self, token):
        return self._isIdentifier(token)

    def validateType(self, token):
        tokenType = token.getTokenType()
        tokenVal = token.getToken()
        
        if tokenVal not in primitive_types:
            return self.validateClassName(token)

        return True;

    def validateClassName(self, token):
        if not self._isIdentifier(token):
            raise ValueError("class name \'" + tokenVal + "\' is not a valid identifier.")

        return True;    

    def validateSubroutineName(self, token):
        if not self._isIdentifier(token):
            raise ValueError("subroutine name \'" + tokenVal + "\' is not a valid identifier.")

        return True;

    def validateVarName(self, token):
        if not self._isIdentifier(token):
            raise ValueError("var name \'" + tokenVal + "\' is not a valid identifier.")

        return True;            

    def _isIdentifier(self, token):
        tokenType = token.getTokenType()
        tokenVal = token.getToken()

        return tokenType == t_identifier


    """ JACK Statements """

    def compileStatements(self, subTreeNode):
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() in ['let', 'if', 'while', 'do', 'return']:
            statementTreeNode = Tree.Node("CHANGEME", subTreeNode.depth + 1)
            if nextToken.getToken() == "let":
                statementTreeNode.setElementName("letStatement")
                self.compileLetStatement(statementTreeNode)
            elif nextToken.getToken() == "if":
                statementTreeNode.setElementName("ifStatement")
                self.compileIfStatement(statementTreeNode)
            elif nextToken.getToken() == "while":
                statementTreeNode.setElementName("whileStatement")
                self.compileWhileStatement(statementTreeNode)
            elif nextToken.getToken() == "do":
                statementTreeNode.setElementName("doStatement")
                self.compileDoStatement(statementTreeNode)
            elif nextToken.getToken() == "return":
                statementTreeNode.setElementName("returnStatement")
                self.compileReturnStatement(statementTreeNode)

            subTreeNode.addChildTree(statementTreeNode)
            nextToken = self.tokenizer.peekNextToken()

        return subTreeNode

    def compileLetStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "let":
            raise ValueError("let statement must begin with let.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        self.validateVarName(token)
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() == "[":
            subTreeNode.addChild(token)

            # handle expression
            exprTreeNode1 = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprTreeNode1)
            subTreeNode.addChildTree(exprTreeNode1)

            token = self.tokenizer.getNextToken()
            if token.getToken() != "]":
                raise ValueError("Expected ].")
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()

        if token.getToken() != "=":
            raise ValueError("Expected =.")
        subTreeNode.addChild(token)

        # handle expression
        exprTreeNode2 = Tree.Node("expression", subTreeNode.depth + 1)
        self.compileExpression(exprTreeNode2)
        subTreeNode.addChildTree(exprTreeNode2)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ";":
            raise ValueError("Expected ;")
        subTreeNode.addChild(token)

        return subTreeNode;

    def compileIfStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "if":
            raise ValueError("Expected if.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "(":
            raise ValueError("Expected \'(\'.")
        subTreeNode.addChild(token)

        ifExprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
        self.compileExpression(ifExprSubTreeNode)
        subTreeNode.addChildTree(ifExprSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ")":
            raise ValueError("Expected \')\'.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Expected \'{\'.")
        subTreeNode.addChild(token)

        ifStatementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
        self.compileStatements(ifStatementsSubTreeNode)
        subTreeNode.addChildTree(ifStatementsSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "}":
            raise ValueError("Expected \'}\'.")
        subTreeNode.addChild(token)

        nextToken = self.tokenizer.peekNextToken()
        if nextToken.getToken() == "else":
            token = self.tokenizer.getNextToken()
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken()
            if token.getToken() != "{":
                raise ValueError("Expected \'{\'.")
            subTreeNode.addChild(token)

            elseStatementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
            self.compileStatements(elseStatementsSubTreeNode)
            subTreeNode.addChildTree(elseStatementsSubTreeNode)

            token = self.tokenizer.getNextToken()
            if token.getToken() != "}":
                raise ValueError("Expected \'}\'.")
            subTreeNode.addChild(token)

        return subTreeNode

    def compileWhileStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "while":
            raise ValueError("Expected \'while\'.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "(":
            raise ValueError("Expected \'(\'.")
        subTreeNode.addChild(token)

        exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
        self.compileExpression(exprSubTreeNode)
        subTreeNode.addChildTree(exprSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ")":
            raise ValueError("Expected \')\'.")
        subTreeNode.addChild(token)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "{":
            raise ValueError("Expected \'{\'.")
        subTreeNode.addChild(token)        

        whileStatementsSubTreeNode = Tree.Node("statements", subTreeNode.depth + 1)
        self.compileStatements(whileStatementsSubTreeNode)
        subTreeNode.addChildTree(whileStatementsSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != "}":
            raise ValueError("Expected \'}\'.")
        subTreeNode.addChild(token)

        return subTreeNode

    def compileDoStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "do":
            raise ValueError("Expected \'do\'.")
        subTreeNode.addChild(token)

        self.compileSubroutineCall(subTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ";":
            raise ValueError("Expected \';\'.")
        subTreeNode.addChild(token)

        return subTreeNode;

    def compileReturnStatement(self, subTreeNode):
        token = self.tokenizer.getNextToken()
        if token.getToken() != "return":
            raise ValueError("Expected \'return\'.")
        subTreeNode.addChild(token)

        nextToken = self.tokenizer.peekNextToken()
        if nextToken.getToken() != ";":
            exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprSubTreeNode)
            subTreeNode.addChildTree(exprSubTreeNode)

        token = self.tokenizer.getNextToken()
        if token.getToken() != ";":
            raise ValueError("Expected \';\'.")
        subTreeNode.addChild(token)

        return subTreeNode

    """ JACK Expressions """

    def compileExpression(self, subTreeNode):
        termSubTreeNode = Tree.Node("term", subTreeNode.depth + 1)
        self.compileTerm(termSubTreeNode)
        subTreeNode.addChildTree(termSubTreeNode)

        # handle 0 or more (op term)
        nextToken = self.tokenizer.peekNextToken()
        while nextToken.getToken() in ops:
            token = self.tokenizer.getNextToken()
            subTreeNode.addChild(token)

            termSubTreeNode2 = Tree.Node("term", subTreeNode.depth + 1)
            self.compileTerm(termSubTreeNode2)
            subTreeNode.addChildTree(termSubTreeNode2)

            nextToken = self.tokenizer.peekNextToken()

        return subTreeNode

    def compileTerm(self, subTreeNode):
        [t1, t2] = self.tokenizer.peekNextNTokens(2)

        if self.isVarName(t1): #if is varName, then necesarily isSubroutineName and isClassName
            if  t2.getToken() == "[":
                # handle varName [ expression ]
                token = self.tokenizer.getNextToken() # handle varName
                subTreeNode.addChild(token)

                token = self.tokenizer.getNextToken() # handle [
                subTreeNode.addChild(token)

                exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
                self.compileExpression(exprSubTreeNode)
                subTreeNode.addChildTree(exprSubTreeNode)

                token = self.tokenizer.getNextToken()
                if token.getToken() != "]":
                    raise ValueError("Expected \']\', but received " + token.getToken())
                subTreeNode.addChild(token)

            elif t2.getToken() in [".", "("]:
                self.compileSubroutineCall(subTreeNode)
            else:
                # handle varName
                token = self.tokenizer.getNextToken()
                subTreeNode.addChild(token)

        elif t1.getTokenType() == t_integerConstant:
            const = self.tokenizer.getNextToken()
            subTreeNode.addChild(const)

        elif t1.getTokenType() == t_stringConstant:
            const = self.tokenizer.getNextToken()
            subTreeNode.addChild(const)

        elif t1.getToken() in keywordConstants:
            const = self.tokenizer.getNextToken()
            subTreeNode.addChild(const)

        elif t1.getToken() == "(":
            token = self.tokenizer.getNextToken()
            subTreeNode.addChild(token)

            exprTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprTreeNode)
            subTreeNode.addChildTree(exprTreeNode)

            token = self.tokenizer.getNextToken()
            if token.getToken() != ")":
                raise ValueError("Expected \')\'")
            subTreeNode.addChild(token)

        elif t1.getToken() in unaryOps:
            unaryOpToken = self.tokenizer.getNextToken()
            subTreeNode.addChild(unaryOpToken)

            termSubTreeNode = Tree.Node("term", subTreeNode.depth + 1)
            self.compileTerm(termSubTreeNode)
            subTreeNode.addChildTree(termSubTreeNode)
        else:
            raise ValueError("Invalid Term: " + t1.getToken() + ", " + t2.getToken())

        return subTreeNode

    def compileSubroutineCall(self, subTreeNode):
        [t1, t2] = self.tokenizer.peekNextNTokens(2)
        
        self.validateSubroutineName(t1) # also handles the case where (className | varName) bc they are all identifiers.

        if t2.getToken() == "(":
            token = self.tokenizer.getNextToken() # handle subroutineName
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle (
            subTreeNode.addChild(token)

            exprListTreeNode = Tree.Node("expressionList", subTreeNode.depth + 1)
            self.compileExpressionList(exprListTreeNode)
            subTreeNode.addChildTree(exprListTreeNode)
            
            token = self.tokenizer.getNextToken() # handle )
            subTreeNode.addChild(token)
        elif t2.getToken() == ".":
            token = self.tokenizer.getNextToken() # handle (className | varName)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle .
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle subroutineName
            self.validateSubroutineName(token)
            subTreeNode.addChild(token)

            token = self.tokenizer.getNextToken() # handle (
            if token.getToken() != "(":
                raise ValueError("Expected \'(\'.")
            subTreeNode.addChild(token)

            exprListTreeNode = Tree.Node("expressionList", subTreeNode.depth + 1)
            self.compileExpressionList(exprListTreeNode)
            subTreeNode.addChildTree(exprListTreeNode)

            token = self.tokenizer.getNextToken() # )
            if token.getToken() != ")":
                raise ValueError("Expected \'(\'.")
            subTreeNode.addChild(token)
        else:
            raise ValueError("Expected \'<subroutineName>.\' or \'<className|varName>(\'.")

        return subTreeNode

    def compileExpressionList(self, subTreeNode):
        nextToken = self.tokenizer.peekNextToken() 

        if nextToken.getToken() == ")":
            return subTreeNode
        else:
            exprSubTreeNode = Tree.Node("expression", subTreeNode.depth + 1)
            self.compileExpression(exprSubTreeNode)
            subTreeNode.addChildTree(exprSubTreeNode)

            nextToken = self.tokenizer.peekNextToken()
            while nextToken.getToken() == ",":
                token = self.tokenizer.getNextToken()
                subTreeNode.addChild(token)

                exprSubTreeNode2 = Tree.Node("expression", subTreeNode.depth + 1)
                self.compileExpression(exprSubTreeNode2)
                subTreeNode.addChildTree(exprSubTreeNode2)

                nextToken = self.tokenizer.peekNextToken()

            return subTreeNode;

    """ Writing XML """
    
    def writeXmlOutput(self, treeNode=None):
        treeNode = self.treeRoot if treeNode is None else treeNode
        indent = "  " * treeNode.depth
        
        if treeNode.isLeaf():
            xml = indent + "<" + treeNode.elementName + "> " + _encodeXmlToken(treeNode.elementVal) + " </" + treeNode.elementName + ">\n"
            self.fileObject.write(xml)
        else:
            outerXmlBeginning = indent + "<" + treeNode.elementName + ">\n"
            outerXmlEnding = indent + "</" + treeNode.elementName + ">\n"

            self.fileObject.write(outerXmlBeginning)
            for child in treeNode.children:
                self.writeXmlOutput(child)

            self.fileObject.write(outerXmlEnding)


    """ Managing Resources """

    def close(self):
        self.fileObject.close()
コード例 #4
0
class CompilationEngine(object):
    def __init__(self, inputfile, outputfile):
        self._inputfile = inputfile
        self._outputfile = outputfile
        self._tokenizer: JackTokenizer = None
        self._cur_root = []
        self._n_args = []
        self._root = None
        self.class_name = None
        self.return_type = None
        self._label_cnt = 0
        self.vm_writer = None  # type:VMWriter
        self._init()
        self.symbol = SymbolTable()
        self.vm_writer.set_engine(self)
        self.method_type = None

    def line_num(self):
        return self._tokenizer.line

    def _init(self):
        self._inputbuf = self.create_buffer(self._inputfile)
        self._outputbuf = self.create_buffer(self._outputfile, mode="w+")
        self.vm_writer = VMWriter(self._outputfile[:-4] + ".vm")
        self._tokenizer = JackTokenizer(self._inputbuf)

    def create_buffer(self, fn, mode='r'):

        if isinstance(fn, str) or isinstance(fn, unicode):
            return open(fn, mode)
        elif isinstance(fn, file) or isinstance(fn, IOBase):
            return fn
        else:
            raise ValueError("file object show file or readable")

    def compile_class(self):
        parent = self._set_parent("class")
        self._root = parent
        self._advance()
        self._pop_required(parent, TokenType.keyword, KeywordType.CLASS)
        self.class_name = self._token()[1]
        self._pop_required(parent, TokenType.identifier)
        self._pop_required(parent, TokenType.symbol, "{")

        try:
            while self._is_class_var():
                self.compile_class_var_desc()

            while self._is_subroutine():
                self.compile_subroutine()
            self._pop_required(parent, TokenType.symbol, "}")
            print(self.symbol)
        finally:
            self._outputbuf.write(
                unicode(
                    et.tostring(self._root, pretty_print=True,
                                method="c14n2").decode("utf-8")))
            self.vm_writer.close()
        self._outputbuf.close()

    def _required_type(self, token_type, val=None):
        tp, tv = self._token()
        if token_type != tp or (
            (tp == TokenType.keyword or tp == TokenType.symbol) and
            (val != tv)):
            raise ValueError("token must be %s,%s" % (token_type, val))
        return tp, tv

    def compile_class_var_desc(self):
        parent = self._set_parent("classVarDec")
        # 具体可以细分变量类型检查,标识符正确检查
        parent.append(self._build_element())
        kind = self.get_kind()
        self._advance()
        itype = self.get_type()
        parent.append(self._build_element())
        self._advance()

        while not self.is_token(TokenType.symbol, ";"):
            parent.append(self._build_element())
            if self._token()[1] != "," and self._token()[1] != ";":
                self.symbol.define(self._token()[1], itype, kind)
            self._advance()
        parent.append(self._build_element())
        self._advance()
        self._remove_parent()

    def get_kind(self):
        kind = self._token()[1]
        if isinstance(kind, KeywordType):
            kind = kind.name.lower()
        return kind

    def get_type(self):
        itype = self._token()[1]
        if isinstance(itype, KeywordType):
            return itype.name.lower()
        return itype

    def compile_subroutine(self):
        print(self.symbol)
        self.symbol.start_subroutine()
        parent = self._set_parent("subroutineDec")
        method_type = self._token()[1]
        self.method_type = method_type
        self._advance()
        self.return_type = self._token()[1]
        self._advance()
        function_name = self._token()[1]
        self._advance()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_parameter_list()
        full_name = "{}.{}".format(self.class_name, function_name)

        self._pop_required(parent, TokenType.symbol, ")")
        self._compile_body(full_name, method_type)
        self._remove_parent()
        self.vm_writer.write_comment("end function %s" % function_name)
        self.vm_writer.write_comment("")
        # if self._tokenizer.token_type()==TokenType.KEY_WORD:

    def _compile_body(self, full_name, method_type):
        parent = self._set_parent("subroutineBody")
        self._pop_required(parent, TokenType.symbol, "{")
        while self._is_var_desc():
            self.compile_var_desc()

        var_cnt = self.symbol.var_count("var")
        field_cnt = self.symbol.var_count("field")
        self.vm_writer.write_function(full_name, var_cnt)
        if method_type == KeywordType.CONSTRUCTOR:
            #  构造函数分配对象内存
            self.vm_writer.write_push(SEG_CONSTANT, field_cnt)
            self.vm_writer.write_call("Memory.alloc", "1")
            self.vm_writer.write_pop(SEG_POINTER, "0")
        elif method_type == KeywordType.METHOD:
            # 成员方法,设置this=arg[0]
            self.vm_writer.write_push(SEG_ARG, "0")
            self.vm_writer.write_pop(SEG_POINTER, "0")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self._remove_parent()

    def _remove_parent(self):
        self._cur_root.pop()

    def compile_parameter_list(self):
        kind = "arg"
        while not self.is_token(TokenType.symbol, ")"):
            itype = self.get_type()
            self._advance()
            name = self._token()[1]
            self.symbol.define(name, itype, kind)
            self._advance()
            # parent.append(self._build_element())
            if self.is_token(TokenType.symbol, ","):
                self._advance()

    def compile_var_desc(self):
        parent = self._set_parent("varDec")
        self._pop_required(parent, TokenType.keyword, KeywordType.VAR)
        kind = "var"
        itype = self.get_type()
        parent.append(self._build_element())
        self._advance()

        while not self.is_token(TokenType.symbol, ";"):
            # parent.append(self._build_element())
            if not self.is_token(TokenType.symbol, ",") and not self.is_token(
                    TokenType.symbol, ";"):
                self.symbol.define(self._token()[1], itype, kind)
            self._advance()
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_statements(self):

        self._set_parent("statements")

        while self._is_statement():
            if self.is_let_statement():
                self.compile_let()
            if self.is_do_statement():
                self.compile_do()
            if self.is_return_statement():
                self.compile_return()
            if self.is_if_statement():
                self.compile_if()
                continue
            if self.is_while_statement():
                self.compile_while()
                continue
        self._remove_parent()

    def compile_do(self):
        parent = self._set_parent("doStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.DO)
        type1, id1 = self._pop_required(parent, TokenType.identifier)
        self.compile_call(type1, id1)
        self.vm_writer.write_pop(SEG_TEMP, 0)
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_call(self, typ1, id1):
        parent = None
        symbol_kind = self.symbol.kind_of(id1)
        # 调用变量方法
        n_args = 0
        typ2, id2 = self._token()
        if id2 == ".":
            if symbol_kind:
                # 变量类型
                function_type = self.symbol.type_of(id1)
                # this 指针入栈
                if symbol_kind == "arg":
                    self.vm_writer.write_push("argument",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "static":
                    self.vm_writer.write_push("static",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "var":
                    self.vm_writer.write_push("local",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "field":
                    self.vm_writer.write_push("this",
                                              self.symbol.index_of(id1))
                n_args += 1
            else:
                # 静态方法
                function_type = id1
            self._advance()
            _, method_name = self._pop_required(parent, TokenType.identifier)
            full_name = "%s.%s" % (function_type, method_name)
        else:
            n_args += 1
            self.vm_writer.write_push("pointer", 0)
            function_type = self.class_name
            full_name = "%s.%s" % (function_type, id1)
        self._n_args.append(n_args)
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression_list()
        self._pop_required(parent, TokenType.symbol, ")")
        n_args = self._n_args.pop(-1)
        self.vm_writer.write_call(full_name, n_args=n_args)

    def compile_let(self):
        parent = self._set_parent("letStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.LET)
        tk, val = self._pop_required(parent, TokenType.identifier)
        seg, idx = self.get_var_seg_idx(val)
        is_arr = False
        if self.is_token(TokenType.symbol, "["):
            is_arr = True
            self._advance()
            self.compile_expression()
            self.vm_writer.write_push(seg, idx)
            self.vm_writer.write_arithmetic("+")
            self._pop_required(parent, TokenType.symbol, "]")

        # 有可能是数组
        # 替换正则
        self._pop_required(parent, TokenType.symbol, "=")
        self.compile_expression()
        if is_arr:
            self.vm_writer.write_pop(SEG_TEMP, "0")
            self.vm_writer.write_pop(SEG_POINTER, "1")
            self.vm_writer.write_push(SEG_TEMP, "0")
            self.vm_writer.write_pop(SEG_THAT, "0")
        else:
            self.vm_writer.write_pop(seg, idx)
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_while(self):
        self.vm_writer.write_comment("start while")
        parent = self._set_parent("whileStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.WHILE)
        label1 = self._get_label()
        self.vm_writer.write_label(label1)
        label2 = self._get_label()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        self._pop_required(parent, TokenType.symbol, ")")
        self.vm_writer.write_if(label2)
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_goto(label1)
        self.vm_writer.write_label(label2)
        self._remove_parent()

        self.vm_writer.write_comment("end while")

    def compile_return(self):
        parent = self._set_parent("returnStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.RETURN)
        if not self.is_token(TokenType.symbol, ";"):
            self.compile_expression()
        self._pop_required(parent, TokenType.symbol, ";")
        if self.return_type == KeywordType.VOID:
            self.vm_writer.write_push(SEG_CONSTANT, 0)
        self.vm_writer.write_return()
        self._remove_parent()

    def compile_if(self):
        parent = self._set_parent("ifStatement")
        self.vm_writer.write_comment("compile if")
        self._pop_required(parent, TokenType.keyword, KeywordType.IF)
        self._pop_required(parent, TokenType.symbol, "(")
        label1 = self._get_label()
        label2 = self._get_label()
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        self.vm_writer.write_if(label1)
        self._pop_required(parent, TokenType.symbol, ")")
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_goto(label2)
        self.vm_writer.write_label(label1)
        if self.is_token(TokenType.keyword, KeywordType.ELSE):
            self._pop_required(parent, TokenType.keyword, KeywordType.ELSE)
            self._pop_required(parent, TokenType.symbol, "{")
            self.compile_statements()
            self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_label(label2)
        self._remove_parent()

        self.vm_writer.write_comment(" if end")

    def compile_expression(self):
        parent = self._set_parent("expression")
        op_count = 0
        ops = []
        while not self._is_end():
            self.compile_term()
            if self._is_op(False):
                _, op = self._token()
                self._advance()
                ops.append(op)
            op_count += 1
            if op_count >= 2:
                print(ops)
                self.vm_writer.write_arithmetic(ops.pop(0))
            # parent.append(self._build_element())
            # self._advance()

        self._remove_parent()

    def compile_term(self):
        parent = self._set_parent("term")
        first = True
        while not self._is_op(first) and not self._is_end():
            first = False
            if self.is_token(TokenType.symbol, "("):
                self._advance()
                self.compile_expression()
                self._pop_required(parent, TokenType.symbol, ")")

            elif self._is_unary_op():
                token, op = self._token()
                self._advance()
                op = "neg" if op == "-" else op
                self.compile_term()
                self.vm_writer.write_arithmetic(op)
                continue
            elif self.is_token(TokenType.identifier):
                tk, val = self._pop_required(parent, TokenType.identifier)
                if self.is_token(TokenType.symbol, "(") or self.is_token(
                        TokenType.symbol, "."):
                    self.compile_call(tk, val)
                elif self.is_token(TokenType.symbol, "["):
                    self._advance()
                    self.compile_expression()
                    seg, idx = self.get_var_seg_idx(val)
                    self.vm_writer.write_push(seg, idx)
                    # 数组直接计算基址,通过that[0]访问
                    # fixme a[0] 这种常数的访问
                    self.vm_writer.write_arithmetic("+")
                    self.vm_writer.write_pop(SEG_POINTER, "1")
                    self.vm_writer.write_push(SEG_THAT, "0")
                    self._pop_required(parent, TokenType.symbol, "]")
                else:
                    # 变量
                    seg, idx = self.get_var_seg_idx(val)
                    self.vm_writer.write_push(seg, idx)
            else:
                tk, val = self._token()
                if self.is_token(TokenType.integerConstant):
                    self.vm_writer.write_push(SEG_CONSTANT, val)
                elif self.is_token(TokenType.keyword, KeywordType.TRUE):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                    self.vm_writer.write_arithmetic("~")
                elif self.is_token(TokenType.keyword, KeywordType.FALSE):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                elif self.is_token(TokenType.keyword, KeywordType.NULL):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                elif self.is_token(TokenType.keyword, KeywordType.THIS):
                    self.vm_writer.write_push(SEG_POINTER, "0")
                elif self.is_token(TokenType.stringConstant):
                    str_len = len(val)
                    self.vm_writer.write_push(SEG_CONSTANT, str(str_len))
                    self.vm_writer.write_call("String.new", "1")

                    for idx, x in enumerate(val):
                        self.vm_writer.write_push(SEG_CONSTANT, str(ord(x)))
                        self.vm_writer.write_call("String.appendChar", '2')

                self._advance()
        self._remove_parent()

    def _pop_required(self, parent, tk, val=None):
        tk, val = self.required(tk, val)
        self._advance()
        return tk, val

    def _is_op(self, first):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-'
                                                                and not first)

    def _is_unary_op(self):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '-~'

    def compile_expression_list(self):
        parent = self._set_parent("expressionList")
        n_args = self._n_args[-1]
        while not self.is_token(TokenType.symbol, ")"):
            n_args += 1
            self.compile_expression()
            if self.is_token(TokenType.symbol, ","):
                self._pop_required(parent, TokenType.symbol, ",")
        self._n_args[-1] = n_args
        self._remove_parent()

    def build_identifier(self):
        e = et.Element("identifier")
        e.text = self._tokenizer.identifier()
        return e

    def build_keyword(self):
        e = et.Element("keyword")
        e.text = self._tokenizer.keyword().name.lower()
        return e

    def build_symbol(self):
        e = et.Element("symbol")
        e.text = self._tokenizer.symbol()
        return e

    def _token(self):
        # if self._tokenizer.line > 44:
        #     raise ValueError("测试代码,翻译到此停止")
        token_type = self._tokenizer.token_type()
        if self._tokenizer.token_type() == TokenType.keyword:
            a, b = token_type, self._tokenizer.keyword()
        elif self._tokenizer.token_type() == TokenType.symbol:
            a, b = token_type, self._tokenizer.symbol()
        elif self._tokenizer.token_type() == TokenType.identifier:
            a, b = token_type, self._tokenizer.identifier()
        elif self._tokenizer.token_type() == TokenType.integerConstant:
            a, b = token_type, self._tokenizer.intVal()
        elif self._tokenizer.token_type() == TokenType.stringConstant:
            a, b = token_type, self._tokenizer.stringVal()
        else:
            a, b = None, None
        print(a, b, self._tokenizer.line)
        return a, b

    def _advance(self):
        if self._tokenizer.has_more_tokens():
            self._tokenizer.advance()

    def required(self, token, val=None):
        return self._required_type(token, val)

    def _build_element(self):
        a, b = self._token()
        e = et.Element(a.name)
        if isinstance(b, KeywordType):
            e.text = b.name.lower()
        else:
            e.text = b
        return e

    def _is_class_var(self):
        return self.is_token(TokenType.keyword,
                             KeywordType.FIELD) or self.is_token(
                                 TokenType.keyword, KeywordType.STATIC)

    def is_token(self, token, val=None):
        t, v = self._token()
        if val is not None:
            return t == token and v == val
        else:
            return t == token

    def _get_parent(self):
        if len(self._cur_root) > 0:
            return self._cur_root[-1]
        else:
            return None

    def _set_parent(self, name):
        parent = self._get_parent()
        ele2 = et.Element(name)
        if parent is not None:
            parent.append(ele2)
        self._cur_root.append(ele2)
        return ele2

    def _is_subroutine(self):
        return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \
               or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \
               or self.is_token(TokenType.keyword, KeywordType.METHOD)

    def _is_statement(self):
        if self.is_let_statement():
            return True
        if self.is_do_statement():
            return True
        if self.is_return_statement():
            return True
        if self.is_if_statement():
            return True
        if self.is_while_statement():
            return True

    def is_while_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.WHILE)

    def is_let_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.LET)

    def is_do_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.DO)

    def is_return_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.RETURN)

    def is_if_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.IF)

    def _is_var_desc(self):
        return self.is_token(TokenType.keyword, KeywordType.VAR)

    def _is_end(self):
        return self.is_token(TokenType.symbol, ";") or \
               self.is_token(TokenType.symbol, ";") \
               or self.is_token(TokenType.symbol, ")") \
               or self.is_token(TokenType.symbol, ",") \
               or self.is_token(TokenType.symbol, "]")

    def get_var_seg_idx(self, val):
        kind = self.symbol.kind_of(val)
        idx = self.symbol.index_of(val)
        if kind == "static":
            return SEG_STATIC, idx
        elif kind == "var":
            return SEG_LOCAL, idx
        elif kind == "field":
            return SEG_THIS, idx
        elif kind == "arg":
            if self.method_type == KeywordType.METHOD:
                idx += 1
            return SEG_ARG, idx

    def _get_label(self):
        label = "label_%s" % self._label_cnt
        self._label_cnt += 1
        return label
コード例 #5
0
class CompilationEngine(object):
    def __init__(self, inFile):
        self.t = Tokenizer(inFile)
        self.symTable = SymbolTable()
        self.vmName = inFile.rstrip('.jack') + '.vm'
        self.vm = VMWriter(self.vmName)
        self.className = ''
        self.types = ['int', 'char', 'boolean', 'void']
        self.stmnt = ['do', 'let', 'if', 'while', 'return']
        self.subroutType = ''
        self.whileIndex = 0
        self.ifIndex = 0
        self.fieldNum = 0

    def compile_class(self):

        self.t.advance()
        self.validator('class')
        self.className = self.t.current_token()
        self.t.advance()
        self.validator('{')
        self.fieldNum = self.compile_class_var_dec()
        while self.t.symbol() != '}':  # subroutines
            self.compile_subroutine()
        self.validator('}')
        self.vm.close()

        return

    def compile_class_var_dec(self):
        varKeyWords = ['field', 'static']
        name = ''
        kind = ''
        varType = ''
        counter = 0
        while self.t.keyword() in varKeyWords:
            kind = self.t.current_token()
            self.validator(varKeyWords)
            # variable type
            varType = self.t.current_token()
            self.validator(['int', 'char', 'boolean', 'IDENTIFIER'])
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            if kind == 'field':
                counter += 1

            while self.t.symbol() != ';':  # checks multiple vars
                self.validator(',')
                name = self.t.current_token()
                self.symTable.define(name, varType, kind)
                self.t.advance()
                if kind == 'field':
                    counter += 1
            self.validator(';')

        return counter

    def compile_subroutine(self):
        current_subrout_scope = self.symTable.subDict
        self.symTable.start_subroutine()

        subroutKword = self.t.current_token()
        self.validator(['constructor', 'function', 'method'])

        self.subroutType = self.t.current_token()
        self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])

        name = self.t.current_token()
        subroutName = self.className + '.' + name
        self.t.advance()
        self.validator('(')

        if subroutKword == 'method':
            self.compile_parameter_list(method=True)
        else:
            self.compile_parameter_list()

        self.validator(')')
        self.validator('{')

        if self.t.symbol() == '}':
            self.t.advance()

            return

        self.validator(['var', 'let', 'do', 'if', 'while', 'return'],
                       advance=False)
        numLocals = 0
        if self.t.keyword() == 'var':
            numLocals = self.compile_var_dec()

        self.vm.write_function(subroutName, numLocals)

        if subroutKword == 'constructor':
            self.vm.write_push('constant', self.fieldNum)
            self.vm.write_call('Memory.alloc', 1)
            self.vm.write_pop('pointer', 0)
        elif subroutKword == 'method':
            self.vm.write_push('argument', 0)
            self.vm.write_pop('pointer', 0)

        if self.t.keyword() in self.stmnt:
            self.compile_statements()

        self.validator('}')
        self.symTable.subDict = current_subrout_scope
        self.whileIndex = 0
        self.ifIndex = 0

        return

    def compile_parameter_list(self, method=False):
        name = ''
        varType = ''
        kind = ''
        counter = 0

        if self.t.symbol() == ')':
            return counter
        varType = self.t.current_token()
        self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
        kind = 'arg'
        name = self.t.current_token()
        if method:
            self.symTable.define(name, varType, kind, method=True)
        else:
            self.symTable.define(name, varType, kind)

        self.t.advance()
        counter += 1
        while self.t.symbol() == ',':
            self.validator(',')
            self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
            kind = 'arg'
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            counter += 1

        return counter

    def compile_var_dec(self):
        name = ''
        kind = ''
        varType = ''
        counter = 0

        while self.t.keyword() == 'var':  # check multiple lines of var
            kind = 'var'
            self.t.advance()
            varType = self.t.current_token()
            self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            counter += 1

            while self.t.symbol() == ',':  # multiple varNames
                self.t.advance()
                name = self.t.current_token()
                self.symTable.define(name, varType, kind)
                self.t.advance()
                counter += 1
            self.validator(';')

        return counter

    def compile_statements(self):

        while self.t.keyword() in self.stmnt:
            if self.t.keyword() == 'let':
                self.compile_let()
            elif self.t.keyword() == 'do':
                self.compile_do()
            elif self.t.keyword() == 'if':
                self.compile_if()
            elif self.t.keyword() == 'while':
                self.compile_while()
            elif self.t.keyword() == 'return':
                self.compile_return()
            else:
                raise Exception(self.t.current_token() + ' is not valid')

        return

    def compile_do(self):
        lookAhead = ''
        self.t.advance()  # do
        lookAhead = self.t.tokens[self.t.tokenIndex + 1]

        if lookAhead == '(':  # subroutineName(exprlist)
            subroutName = self.className + '.' + self.t.current_token()
            self.t.advance()
            self.validator('(')

            self.vm.write_push('pointer', 0)
            numArgs = self.compile_expression_list()
            self.vm.write_call(subroutName, numArgs + 1)  # add 1 for 'this'

            self.validator(')')
            self.validator(';')
            self.vm.write_pop('temp', 0)  # throws away returned value

            return
        else:
            className = self.t.current_token()
            self.t.advance()
            self.validator('.')  # name.subroutine(exprList)
            subroutName = self.t.current_token()
            self.t.advance()
            self.validator('(')

            if self.symTable.kind_of(className) in [
                    'this', 'static', 'local', 'argument'
            ]:
                # used 'this' for 'field'
                typeName = self.symTable.type_of(className)
                subroutName = typeName + '.' + subroutName
                segment = self.symTable.kind_of(className)
                index = self.symTable.index_of(className)
                self.vm.write_push(segment, index)
                numArgs = self.compile_expression_list()
                self.vm.write_call(subroutName, numArgs + 1)
            else:
                subroutName = className + '.' + subroutName
                numArgs = self.compile_expression_list()
                self.vm.write_call(subroutName, numArgs)

            self.validator(')')
            self.validator(';')
            self.vm.write_pop('temp', 0)

            return

    def compile_let(self):
        name = ''
        kind = ''
        array = False
        self.t.advance()  # let
        while self.t.symbol() != ';':
            name = self.t.identifier()
            kind = self.symTable.kind_of(name)
            index = self.symTable.index_of(name)
            if name in self.symTable.classDict:
                self.t.advance()
            elif name in self.symTable.subDict:
                self.t.advance()
            else:
                raise Exception(self.t.identifier() + ' is not defined')
            if self.t.symbol() == '[':  # array index
                array = True
                self.vm.write_push(kind, index)
                self.validator('[')
                self.compile_expression()
                self.validator(']')
                self.vm.write_arithmetic('+')

            self.validator('=')
            self.compile_expression()
            if array:
                self.vm.write_pop('temp', 0)
                self.vm.write_pop('pointer', 1)
                self.vm.write_push('temp', 0)
                self.vm.write_pop('that', 0)
            else:
                self.vm.write_pop(kind, index)
        self.validator(';')

        return

    def compile_while(self):
        currentWhile = 'WHILE' + str(self.whileIndex)
        self.vm.write_label(currentWhile)
        self.whileIndex += 1
        self.t.advance()  # while
        self.validator('(')

        self.compile_expression()
        self.vm.write_arithmetic('~')
        self.vm.write_if('END' + currentWhile)

        self.validator(')')
        self.validator('{')

        self.compile_statements()
        self.vm.write_goto(currentWhile)

        self.validator('}')
        self.vm.write_label('END' + currentWhile)

        return

    def compile_return(self):
        self.t.advance()  # return
        if self.t.symbol() == ';':
            self.vm.write_push('constant', '0')
            self.vm.write_return()
            self.t.advance()
        else:
            self.compile_expression()
            self.validator(';')
            self.vm.write_return()

        return

    def compile_if(self):
        endIf = 'END_IF' + str(self.ifIndex)
        currentElse = 'IF_ELSE' + str(self.ifIndex)
        self.ifIndex += 1
        self.t.advance()  # if
        self.validator('(')
        self.compile_expression()
        self.vm.write_arithmetic('~')
        self.vm.write_if(currentElse)

        self.validator(')')
        self.validator('{')

        self.compile_statements()
        self.vm.write_goto(endIf)
        self.validator('}')
        self.vm.write_label(currentElse)

        if self.t.keyword() == 'else':
            self.t.advance()  # else
            self.validator('{')

            self.compile_statements()

            self.validator('}')
        self.vm.write_label(endIf)

        return

    def compile_expression(self):
        op = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        self.compile_term()
        while self.t.symbol() in op:
            opToken = self.t.current_token()
            self.t.advance()
            self.compile_term()
            self.vm.write_arithmetic(opToken)

        return

    def compile_term(self):

        keyConst = ['true', 'false', 'null', 'this']
        unOps = ['-', '~']
        lookAhead = ''
        name = ''
        current_subrout_scope = ''

        if self.t.token_type() == 'INT_CONST':
            self.vm.write_push('constant', self.t.int_val())
            self.t.advance()
        elif self.t.token_type() == 'STRING_CONST':
            string = self.t.string_val()
            length = len(string)
            self.vm.write_push('constant', length)
            self.vm.write_call('String.new', 1)
            for char in string:
                char = ord(char)  # gives the ASCII number
                self.vm.write_push('constant', char)
                self.vm.write_call('String.appendChar', 2)
            self.t.advance()

        elif self.t.token_type() == 'KEYWORD':
            self.validator(keyConst, advance=False)
            if self.t.current_token() in ['false', 'null']:
                self.t.advance()
                self.vm.write_push('constant', '0')
            elif self.t.current_token() == 'true':
                self.vm.write_push('constant', '1')
                self.vm.write_arithmetic('-', neg=True)
                self.t.advance()
            else:
                self.vm.write_push('pointer', '0')
                self.t.advance()

        elif self.t.token_type() == 'SYMBOL':
            if self.t.symbol() in unOps:  # unary operator
                unOpToken = self.t.current_token()
                self.t.advance()
                self.compile_term()
                self.vm.write_arithmetic(unOpToken, neg=True)
            elif self.t.symbol() == '(':  # (expression))
                self.t.advance()
                self.compile_expression()
                self.t.advance()
            else:
                raise Exception(self.t.current_token() + ' is not valid')
        elif self.t.token_type() == 'IDENTIFIER':  # varName, array, or subcall
            lookAhead = self.t.tokens[self.t.tokenIndex + 1]
            if lookAhead == '[':  # array item
                name = self.t.identifier()
                kind = self.symTable.kind_of(name)
                index = self.symTable.index_of(name)
                if name in self.symTable.classDict:
                    self.t.advance()
                elif name in self.symTable.subDict:
                    self.t.advance()
                else:
                    raise Exception(self.t.identifier() + ' is not defined')
                self.vm.write_push(kind, index)
                self.validator('[')
                self.compile_expression()

                self.vm.write_arithmetic('+')
                self.vm.write_pop('pointer', 1)
                self.vm.write_push('that', 0)

                self.validator(']')

            elif lookAhead == '(':  # subcall
                current_subrout_scope = self.symTable.subDict
                name = self.className + '.' + self.t.current_token()
                self.t.advance()
                self.validator('(')
                numArgs = self.compile_expression_list()
                self.vm.write_call(name, numArgs + 1)
                self.validator(')')
                self.symTable.subDict = current_subrout_scope

            elif lookAhead == '.':  # name.subroutName(expressList)
                current_subrout_scope = self.symTable.subDict
                className = self.t.current_token()
                self.t.advance()
                self.validator('.')
                subroutName = self.t.current_token()
                self.validator('IDENTIFIER')
                name = className + '.' + subroutName
                self.validator('(')
                if self.symTable.kind_of(className) in [
                        'this', 'static', 'local', 'argument'
                ]:
                    # used 'this' for 'field'
                    classType = self.symTable.type_of(className)
                    name = classType + '.' + subroutName
                    kind = self.symTable.kind_of(className)
                    index = self.symTable.index_of(className)
                    self.vm.write_push(kind, index)
                    numArgs = self.compile_expression_list()
                    self.vm.write_call(name, numArgs + 1)
                else:
                    numArgs = self.compile_expression_list()
                    self.vm.write_call(name, numArgs)
                self.validator(')')
                self.symTable.subDict = current_subrout_scope
            else:
                name = self.t.identifier()  # varName
                kind = self.symTable.kind_of(name)
                index = self.symTable.index_of(name)
                self.vm.write_push(kind, index)
                self.t.advance()
        else:
            raise Exception(self.t.current_token() + ' is not valid')

        return

    def compile_expression_list(self):  # only in subroutineCall
        counter = 0
        if self.t.symbol() == ')':
            return counter
        else:
            self.compile_expression()
            counter += 1
            while self.t.symbol() == ',':
                self.t.advance()
                self.compile_expression()
                counter += 1

        return counter

    def validator(self, syntax, advance=True):
        tokenType = self.t.token_type()
        token = self.t.current_token()
        if advance:
            self.t.advance()
        if type(syntax) != list:
            syntax = [syntax]
        for item in syntax:
            if item in [tokenType, token]:
                return True
        raise Exception(self.t.current_token() + ' is not valid')
コード例 #6
0
class CompilationEngine(object):
	def __init__(self, src, output):
		self.tokenizer = JackTokenizer(src)
		self.writer = VMWriter(output)
		self.symbolTable = SymbolTable()
		self.labelIndex = 0

	def _acceptNextToken(self, token):
		if self.tokenizer.hasMoreToken():
			self.tokenizer.advance()
			typ = self.tokenizer.tokenType()
			tok = self.tokenizer.tokenValue()
			if type(token) != list:
				token = [token]
			if typ in token or tok in token:
				return tok
		raise SyntaxError('Parse Error')

	def _tryNextToken(self, token):
		if self.tokenizer.hasMoreToken():
			typ, tok = self.tokenizer.next()
			if type(token) != list:
				token = [token]
			if typ in token or tok in token:
				return True
		return False

	def compileClass(self):
		#'class' className '{' classVarDec* subroutineDec* '}'
		self._acceptNextToken('class')
		self.classname = self._acceptNextToken('identifier')
		self._acceptNextToken('{')

		while self._tryNextToken(['static', 'field']):
			self.compileClassVarDec()
		while self._tryNextToken(['constructor', 'function', 'method']):
			self.compileSubroutine()
		self._acceptNextToken('}')

		self.writer.close()

	def compileClassVarDec(self):
		#('static'|'field') type varName (','varName)* ';'
		kind = self._acceptNextToken(['static', 'field'])
		type = self._acceptNextToken(['int', 'char', 'boolean', 'identifier'])
		self.symbolTable.define(self._acceptNextToken('identifier'), type, kind)

		while self._tryNextToken(','):
			self._acceptNextToken(',')
			self.symbolTable.define(self._acceptNextToken('identifier'), type, kind)
		self._acceptNextToken(';')

	def compileSubroutine(self):
		#('constructor'|'function'|'method')
		#('void'|type)subroutineName'('parameterList')'
		#subroutineBody
		self.labelIndex = 0

		self.symbolTable.startSubroutine()
		subroutine = self._acceptNextToken(['constructor', 'function', 'method'])
		self._acceptNextToken(['void', 'int', 'char', 'boolean', 'identifier'])
		functionname = self._acceptNextToken('identifier')

		if subroutine == 'method':
			self.symbolTable.define('this', self.classname, 'argument')

		self._acceptNextToken('(')
		self.compileParameterList()
		self._acceptNextToken(')')
		self._acceptNextToken('{')

		argc = 0
		while self._tryNextToken('var'):
			argc += self.compileVarDec()
		self.writer.writeFunction(self.classname + '.' + functionname, argc)

		if subroutine == 'constructor':
			self.writer.writePush('constant', self.symbolTable.varCount('field'))
			self.writer.writeCall('Memory.alloc', 1)
			self.writer.writePop('pointer', 0)
		elif subroutine == 'method':
			self.writer.writePush('argument', 0)
			self.writer.writePop('pointer', 0)
		while self._tryNextToken(STATEMENT):
			self.compileStatements()
		self._acceptNextToken('}')

	def compileParameterList(self):
		#((type varName)(','type varName)*)?
		if self._tryNextToken(TYPE):
			type = self._acceptNextToken(TYPE)
			self.symbolTable.define(self._acceptNextToken('identifier'), type, 'argument')
			while self._tryNextToken(','):
				self._acceptNextToken(',')
				type = self._acceptNextToken(TYPE)
				self.symbolTable.define(self._acceptNextToken('identifier'), type, 'argument')

	def compileVarDec(self):
		#'var' type varName (',' varName)*';'
		argc = 1
		self._acceptNextToken('var')
		type = self._acceptNextToken(TYPE)
		self.symbolTable.define(self._acceptNextToken('identifier'), type, 'local')

		while self._tryNextToken(','):
			self._acceptNextToken(',')
			argc += 1
			self.symbolTable.define(self._acceptNextToken('identifier'), type, 'local')
		self._acceptNextToken(';')
		return argc

	def compileStatements(self):
		#statement*
		#letStatement|ifStatement|whileStatement|doStatement|returnStatement
		while self._tryNextToken(STATEMENT):
			if self._tryNextToken('let'):
				self.compileLet()
			elif self._tryNextToken('if'):
				self.compileIf()
			elif self._tryNextToken('while'):
				self.compileWhile()
			elif self._tryNextToken('do'):
				self.compileDo()
			elif self._tryNextToken('return'):
				self.compileReturn()

	def compileDo(self):
		#'do' subroutineCall ';'
		#subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
		self._acceptNextToken('do')
		funcname = self._acceptNextToken('identifier')

		argc = 0
		if self._tryNextToken('.'):
			self._acceptNextToken('.')
			type = self.symbolTable.typeOf(funcname)
			if type != None:
				argc += 1
				self.writer.writePush(self.symbolTable.kindOf(funcname), self.symbolTable.indexOf(funcname))
				funcname = type + '.' + self._acceptNextToken('identifier')				#game.run()
			else:
				funcname = funcname + '.' + self._acceptNextToken('identifier')			#Game.run()
		else:
			argc += 1
			funcname = self.classname + '.' + funcname 										#run()
			self.writer.writePush('pointer', 0)
	
		self._acceptNextToken('(')
		argc += self.compileExpressionList()
		self._acceptNextToken(')')
		self._acceptNextToken(';')

		self.writer.writeCall(funcname, argc)
		self.writer.writePop('temp', 0)

	def compileLet(self):
		#'let' varName ('[' expression ']')? '=' expression ';'
		self._acceptNextToken('let')
		varName = self._acceptNextToken('identifier')
		if self._tryNextToken('['):
			self.writer.writePush(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName))
			self._acceptNextToken('[')
			self.compileExpression()
			self._acceptNextToken(']')
			self.writer.writeArithmetic('add')
			self._acceptNextToken('=')
			self.compileExpression()
			self._acceptNextToken(';')
			self.writer.writePop('temp', 0)
			self.writer.writePop('pointer', 1)
			self.writer.writePush('temp', 0)
			self.writer.writePop('that', 0)
		else:
			self._acceptNextToken('=')
			self.compileExpression()
			self._acceptNextToken(';')
			self.writer.writePop(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName))

	def compileWhile(self):
		#'while' '(' expression ')''{' statements '}'
		index = str(self.labelIndex)
		self.labelIndex += 1

		self.writer.writeLabel('WHILE' + index)
		self._acceptNextToken('while')
		self._acceptNextToken('(')
		self.compileExpression()
		self._acceptNextToken(')')
		self.writer.writeArithmetic('not')

		self.writer.writeIf('WHILE_END' + index)
		self._acceptNextToken('{')
		self.compileStatements()
		self._acceptNextToken('}')
		self.writer.writeGoto('WHILE' + index)
		self.writer.writeLabel('WHILE_END' + index)

	def compileReturn(self):
		#'return' expression? ';'
		self._acceptNextToken('return')

		if self._tryNextToken(';'):
			self._acceptNextToken(';')
			self.writer.writePush('constant', 0)
		else:
			self.compileExpression()
			self._acceptNextToken(';')
		self.writer.writeReturn()

	def compileIf(self):
		#'if' '(' expression ')' '{' statements '}'
		#('else' '{' statements '}')?
		index = str(self.labelIndex);
		self.labelIndex += 1

		self._acceptNextToken('if')
		self._acceptNextToken('(')
		self.compileExpression()
		self._acceptNextToken(')')
		self.writer.writeArithmetic('not')
		self.writer.writeIf('IF_TRUE' + index)

		self._acceptNextToken('{')
		self.compileStatements()
		self._acceptNextToken('}')
		self.writer.writeGoto('IF_FALSE' + index)
		self.writer.writeLabel('IF_TRUE' + index)

		if self._tryNextToken('else'):
			self._acceptNextToken('else')
			self._acceptNextToken('{')
			self.compileStatements()
			self._acceptNextToken('}')
		self.writer.writeLabel('IF_FALSE' + index)

	def compileExpression(self):
		#term(op term)*
		self.compileTerm()
		while self._tryNextToken(OP):
			op = self._acceptNextToken(OP)
			self.compileTerm()
			if op == '*':
				self.writer.writeCall('Math.multiply', 2)
			elif op == '/':
				self.writer.writeCall('Math.divide', 2)
			else:
				self.writer.writeArithmetic(OP_COMMAND[op])

	def compileTerm(self):
		#integerConstant|stringConstant|keywordConstant|varName|
		
		if self._tryNextToken('('):										#'('expression')'
			self._acceptNextToken('(')
			self.compileExpression()
			self._acceptNextToken(')')
		elif self._tryNextToken(['-', '~']):							#unaryOp term
			unaryOp = self._acceptNextToken(['-', '~'])
			self.compileTerm()
			if unaryOp == '-':
				self.writer.writeArithmetic('neg')
			else:
				self.writer.writeArithmetic('not')
		else:
			first_s = self._acceptNextToken(TERM)
			if self._tryNextToken('['):									#varName'['expression']'
				self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				self._acceptNextToken('[')
				self.compileExpression()
				self._acceptNextToken(']')
				self.writer.writeArithmetic('add')
				self.writer.writePop('pointer', 1)
				self.writer.writePush('that', 0)
			elif self._tryNextToken('('):								#subroutineCall run()
				self.writer.writePush('pointer', 0)
				self._acceptNextToken('(')
				argc = self.compileExpressionList() + 1
				self._acceptNextToken(')')
				self.writer.writeCall(self.classname + '.' + first_s, argc)
			elif self._tryNextToken('.'):								#subroutineCall game.run()
				self._acceptNextToken('.')
				idenfitier = self._acceptNextToken('identifier')
				type = self.symbolTable.typeOf(first_s)
				argc = 0
				callname = first_s
				if type != None:
					argc += 1
					callname = type
					self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				self._acceptNextToken('(')
				argc += self.compileExpressionList()
				self._acceptNextToken(')')
				self.writer.writeCall(callname + '.' + idenfitier, argc)
			else:
				tokenType = self.tokenizer.tokenType()
				if tokenType == 'integerConstant':
					self.writer.writePush('constant', int(first_s))
				elif tokenType == 'stringConstant':
					self.writer.writePush('constant', len(first_s))
					self.writer.writeCall('String.new', 1)
					for c in first_s:
						self.writer.writePush('constant', ord(c))
						self.writer.writeCall('String.appendChar', 2)
				elif tokenType == 'identifier':
					self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				else:
					if first_s == 'null' or first_s == 'false':
						self.writer.writePush('constant', 0)
					elif first_s == 'true':
						self.writer.writePush('constant', 1)
						self.writer.writeArithmetic('neg')
					elif first_s == 'this':
						self.writer.writePush('pointer', 0)

	def compileExpressionList(self):
		#(expression(','expression)*))?
		argc = 0
		if self._tryNextToken(TERM):
			self.compileExpression()
			argc += 1
			while self._tryNextToken(','):
				self._acceptNextToken(',')
				self.compileExpression()
				argc += 1
		return argc
コード例 #7
0
class CompilationEngine:
    XML_LINE = "<{0}> {1} </{0}>\n"
    COMPARE_SYM_REPLACER = {
        '<': "&lt;",
        '>': "&gt;",
        '"': "&quot;",
        '&': "&amp;"
    }
    KEYWORD_CONSTANT = ("true", "false", "null", "this")

    def __init__(self, input_stream, output_stream):
        """
        constructor of the Compilation Engine object
        :param input_stream: the input stream
        :param output_stream: the output stream
        """
        self.__tokenizer = Tokenizer(input_stream)  # Tokenizer object
        self.__output = VMWriter(output_stream)
        self.__symbol = SymbolTable()
        self.__class_name = ""
        self.__statements = {
            "let": self.compile_let,
            "if": self.compile_if,
            "while": self.compile_while,
            "do": self.compile_do,
            "return": self.compile_return
        }
        self.compile_class()
        # self.__output.close()

    def write_xml(self):
        """
        writing xml line
        """
        if self.__tokenizer.token_type() == "stringConstant":
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.string_val()))
        elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER:
            xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()]
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(), xml_val))
        else:
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.get_value()))

    def compile_class(self):
        """
        compiling the program from the class definition
        """
        # self.__output.write("<class>\n")
        # self.write_xml()
        self.__tokenizer.advance()  # skip "class"
        self.__class_name = self.__tokenizer.get_value()
        # self.write_xml()
        self.__tokenizer.advance()  # skip class name
        # self.write_xml()
        self.__tokenizer.advance()  # skip {
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            self.compile_class_var_dec()
            current_token = self.__tokenizer.get_value()
        while current_token == "constructor" or current_token == "function" or current_token == "method":
            self.compile_subroutine_dec()
            current_token = self.__tokenizer.get_value()
        # self.write_xml()
        # self.__output.write("</class>\n")
        self.__output.close()

    def compile_class_var_dec(self):
        """
        compiling the program from the class's declaration on vars
        """
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            # self.__output.write("<classVarDec>\n")
            # self.write_xml()
            index = self.__symbol.var_count(current_token)
            self.__tokenizer.advance()  # get token type
            token_type = self.__tokenizer.get_value()
            self.__output.write_push(current_token, index)
            self.__tokenizer.advance()  # get token name
            token_name = self.__tokenizer.get_value()
            self.__symbol.define(token_name, token_type, current_token)
            self.__tokenizer.advance()
            # self.write_xml()
            # self.__tokenizer.advance()
            # self.write_xml()
            # self.__tokenizer.advance()
            while self.__tokenizer.get_value() == ",":
                # self.write_xml()  # write ,
                self.__tokenizer.advance()  # get token name
                token_name = self.__tokenizer.get_value()
                index = self.__symbol.var_count(current_token)  # get new index
                self.__output.write_push(current_token, index)
                self.__symbol.define(token_name, token_type, current_token)
                self.__tokenizer.advance()
                # self.write_xml()  # write value
                # self.__tokenizer.advance()
            # self.write_xml()
            self.__tokenizer.advance()
            current_token = self.__tokenizer.get_value()
            # self.__output.write("</classVarDec>\n")

    def compile_subroutine_body(self):
        """
        compiling the program's subroutine body
        """
        # self.__output.write("<subroutineBody>\n")
        # self.write_xml()  # write {
        self.__tokenizer.advance()  # skip {
        while self.__tokenizer.get_value() == "var":
            self.compile_var_dec()
        self.compile_statements()
        # self.write_xml()  # write }
        self.__tokenizer.advance()  # skip }
        # self.__output.write("</subroutineBody>\n")

    def compile_subroutine_dec(self):
        """
        compiling the program's subroutine declaration
        """
        # self.__output.write("<subroutineDec>\n")
        # self.write_xml()  # write constructor/function/method

        self.__tokenizer.advance()  # skip constructor/function/method
        return_value = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        func_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        func_args = self.compile_parameter_list()
        self.__output.write_function(func_name, func_args)
        self.compile_subroutine_body()
        if return_value == "void":
            self.__output.write_pop("temp", "0")
        # self.__output.write("</subroutineDec>\n")

    def compile_parameter_list(self):
        """
        compiling a parameter list
        """
        # todo returns the number og args !
        # self.write_xml()  # write (
        counter = 0
        self.__tokenizer.advance()  # skip (
        # self.__output.write("<parameterList>\n")
        if self.__tokenizer.get_value() != ")":
            # self.write_xml()  # write type
            self.__tokenizer.advance()  # skip type
            # self.write_xml()  # write varName
            self.__tokenizer.advance()  # skip var name
            counter += 1
            while self.__tokenizer.get_value() == ",":
                # self.write_xml()  # write ,
                self.__tokenizer.advance()  # skip ,
                # self.write_xml()  # type
                self.__tokenizer.advance()  # skip type
                # self.write_xml()  # varName
                self.__tokenizer.advance()  # skip varName
                counter += 1
        # self.__output.write("</parameterList>\n")
        # self.write_xml()  # write )
        self.__tokenizer.advance()
        return counter

    def compile_var_dec(self):
        """
        compiling function's var declaration
        """
        # self.__output.write("<varDec>\n")
        # self.write_xml()  # write var
        token_kind = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # self.write_xml()  # write type
        token_type = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # self.write_xml()  # write varName
        token_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        index = self.__symbol.var_count(token_kind)
        self.__output.write_push(token_kind, index)
        self.__symbol.define(token_name, token_type, token_kind)
        while self.__tokenizer.get_value() == ",":
            # self.write_xml()  # write ,
            self.__tokenizer.advance()  # skip ,
            # self.write_xml()
            token_name = self.__tokenizer.get_value()
            index = self.__symbol.var_count(token_kind)
            self.__output.write_push(token_kind, index)
            self.__symbol.define(token_name, token_type, token_kind)
            self.__tokenizer.advance()
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</varDec>\n")

    def compile_statements(self):
        """
        compiling statements
        """
        key = self.__tokenizer.get_value()
        # self.__output.write("<statements>\n")
        if key != "}":
            while key in self.__statements:
                self.__statements[self.__tokenizer.get_value()]()
                key = self.__tokenizer.get_value()
        # self.__output.write("</statements>\n")

    def compile_do(self):
        """
        compiling do call
        """
        # self.__output.write("<doStatement>\n")
        # self.write_xml()  # write do
        self.__tokenizer.advance()  # skip do
        self.subroutine_call()
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</doStatement>\n")

    def compile_let(self):
        """
        compiling let call
        """
        # self.__output.write("<letStatement>\n")
        # self.write_xml()  # write let
        self.__tokenizer.advance()  # skip let
        # self.write_xml()  # write varName
        var_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # if self.__tokenizer.get_value() == "[":  # todo handle array
        #     self.write_xml()  # write [
        #     self.__tokenizer.advance()
        #     self.compile_expression()
        #     self.write_xml()  # write ]
        #     self.__tokenizer.advance()
        # self.write_xml()  # write =
        self.__tokenizer.advance()  # skip =
        self.compile_expression()  # todo push the value to the stack
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</letStatement>\n")
        var_kind = self.__symbol.kind_of(var_name)
        var_index = self.__symbol.index_of(var_name)
        self.__output.write_pop(var_kind, var_index)

    def compile_while(self):
        """
        compiling while loop call
        """
        self.__output.write("<whileStatement>\n")
        self.write_xml()  # write while
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        self.__output.write("</whileStatement>\n")

    def compile_return(self):
        """
        compiling return statement
        """
        self.__output.write("<returnStatement>\n")
        self.write_xml()  # write return
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() != ";":
            self.compile_expression()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</returnStatement>\n")

    def compile_if(self):
        """
        compiling if condition
        """
        self.__output.write("<ifStatement>\n")
        self.write_xml()  # write if
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() == "else":
            self.write_xml()  # write else
            self.__tokenizer.advance()
            self.write_xml()  # write {
            self.__tokenizer.advance()
            self.compile_statements()
            self.write_xml()  # write }
            self.__tokenizer.advance()
        self.__output.write("</ifStatement>\n")

    def compile_expression(self):
        """
        compiling expressions
        """
        self.__output.write("<expression>\n")
        self.compile_term()
        while self.__tokenizer.is_operator():
            self.write_xml()  # write the operator
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</expression>\n")

    def compile_term(self):
        """
        compiling any kind of terms
        """
        # dealing with unknown token
        self.__output.write("<term>\n")
        curr_type = self.__tokenizer.token_type()
        # handle consts
        if curr_type == "integerConstant" or curr_type == "stringConstant":
            self.write_xml()  # write the int \ string
            self.__tokenizer.advance()

        # handle const keyword
        elif curr_type == "keyword" and self.__tokenizer.get_value(
        ) in self.KEYWORD_CONSTANT:
            self.__tokenizer.set_type("keywordConstant")
            self.write_xml()  # write key word
            self.__tokenizer.advance()

        elif curr_type == "identifier":
            # handle var names
            if self.__tokenizer.get_next_token(
            ) != "(" and self.__tokenizer.get_next_token() != ".":
                self.write_xml()  # write the var name
                self.__tokenizer.advance()
                if self.__tokenizer.get_value() == "[":
                    self.write_xml()  # write [
                    self.__tokenizer.advance()
                    self.compile_expression()
                    self.write_xml()  # write ]
                    self.__tokenizer.advance()
            # handle function calls
            else:
                self.subroutine_call()
        # handle expression
        elif curr_type == "symbol" and self.__tokenizer.get_value() == "(":
            self.write_xml()  # write (
            self.__tokenizer.advance()
            self.compile_expression()
            self.write_xml()  # write )
            self.__tokenizer.advance()

        # handle - \ ~
        elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value(
        ) == "~":
            self.write_xml()  # write -\~
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</term>\n")

    def subroutine_call(self):
        """
        compiling the program's subroutine call
        """
        if self.__tokenizer.get_next_token() == ".":
            self.write_xml()  # write name
            self.__tokenizer.advance()
            self.write_xml()  # write .
            self.__tokenizer.advance()
        self.write_xml()  # write name
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression_list()
        self.write_xml()  # write )
        self.__tokenizer.advance()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        self.__output.write("<expressionList>\n")
        if self.__tokenizer.get_value() != ")":
            self.compile_expression()
            while self.__tokenizer.get_value() == ",":
                self.write_xml()  # write ,
                self.__tokenizer.advance()
                self.compile_expression()
        self.__output.write("</expressionList>\n")
コード例 #8
0
class CompilationEngine:
    def __init__(self, inpath, outpath):
        self.tokenizer = Tokenizer(inpath)
        self.symboltable = SymbolTable()
        self.vmwriter = VMWriter(outpath)
        self._class_name = None
        if self.tokenizer.has_more_tokens():
            self.compile_class()
        self.vmwriter.close()
        print("{0} completed.".format(outpath))

    def _subroutine_init(self):
        self._sub_kind = None
        self._sub_name = None
        self._ret_type = None

    def _advance(self):
        self._check_EOF()
        self.tokenizer.advance()

    @property
    def _current_token(self):
        t_type = self.tokenizer.token_type
        return (self.tokenizer.keyword if t_type == T_KEYWORD else
                self.tokenizer.symbol if t_type == T_SYMBOL else
                self.tokenizer.identifier if t_type == T_ID else self.tokenizer
                .intval if t_type == T_INTEGER else self.tokenizer.stringval)

    @property
    def _current_tok_type(self):
        return self.tokenizer.token_type

    @property
    def _current_tok_tag(self):
        return token_tags[self._current_tok_type]

    @property
    def _next_token(self):
        """return raw next_token in the tokenizer"""
        return str(self.tokenizer.next_token)

    def _require_token(self, tok_type, token=None):
        """Check whether the next_token(terminal) in the tokenizer meets the 
        requirement (specific token or just token type). If meets, tokenizer
        advances (update current_token and next_token)  and terminal will be 
        writed into outfile; If not, report an error."""
        self._advance()
        if token and self._current_token != token:
            return self._error(expect_toks=(token, ))
        elif self._current_tok_type != tok_type:
            return self._error(expect_types=(tok_type, ))

    def _require_id(self):
        self._require_token(T_ID)

    def _require_kw(self, token):
        return self._require_token(T_KEYWORD, token=token)

    def _require_sym(self, token):
        return self._require_token(T_SYMBOL, token=token)

    def _require_brackets(self, brackets, procedure):
        front, back = brackets
        self._require_sym(front)
        procedure()
        self._require_sym(back)

    def _fol_by_class_vardec(self):
        return self._next_token in (KW_STATIC, KW_FIELD)

    def _fol_by_subroutine(self):
        return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _fol_by_vardec(self):
        return self._next_token == KW_VAR

    #########################
    # structure compilation #
    #########################

    def compile_class_name(self):
        self._require_id()
        self._class_name = self._current_token

    def compile_subroutine_name(self):
        self._require_id()
        self._sub_name = self._current_token

    def compile_var_name(self, kind=None, type=None, declare=False):
        self._require_id()
        name = self._current_token
        if declare is True:  # kind and type are not None
            self.symboltable.define(name, type, kind)
        else:
            self.check_var_name(name, type)

    def check_var_name(self, name, type=None):
        recorded_kind = self.symboltable.kindof(name)
        if recorded_kind is None:
            self._traceback('name used before declared: {0}'.format(name))
        elif type is not None:
            recorded_type = self.symboltable.typeof(name)
            if recorded_type != type:
                get = '{0} "{1}"'.format(recorded_type, name)
                self._error(expect_types=(type, ), get=get)

    def compile_type(self, advanced=False, expect='type'):
        # int, string, boolean or identifier(className)
        if advanced is False:
            self._advance()
        if (self._current_token not in SymbolTable.builtIn_types
                and self._current_tok_type != T_ID):
            return self._error(expect=expect)

    def compile_return_type(self):
        # void or type
        self._advance()
        if self._current_token != KW_VOID:
            self.compile_type(True, '"void" or type')
        self._ret_type = self._current_token
        if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name:
            me = 'constructor expect current class as return type'
            self._traceback(me)

    @record_non_terminal('class')
    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self._require_kw(KW_CLASS)
        self.compile_class_name()
        self._require_sym('{')
        while self._fol_by_class_vardec():
            self.compile_class_vardec()
        while self._fol_by_subroutine():
            self.compile_subroutine()
        self._advance()
        if self._current_token != '}':
            self._traceback("Except classVarDec first, subroutineDec second.")
        if self.tokenizer.has_more_tokens():
            if self._next_token == KW_CLASS:
                self._traceback('Only expect one classDec.')
            self._traceback('Unexpected extra tokens.')

    def compile_declare(self):
        self._advance()
        id_kind = self._current_token  # ('static | field | var')
        # type varName (',' varName)* ';'
        self.compile_type()
        id_type = self._current_token
        self.compile_var_name(id_kind, id_type, declare=True)
        # compile ',' or ';'
        self._advance()
        while self._current_token == ',':
            self.compile_var_name(id_kind, id_type, declare=True)
            self._advance()
        if self._current_token != ';':
            return self._error((',', ';'))

    @record_non_terminal('classVarDec')
    def compile_class_vardec(self):
        # ('static|field') type varName (',' varName)* ';'
        self.compile_declare()

    @record_non_terminal('subroutineDec')
    def compile_subroutine(self):
        # ('constructor'|'function'|'method')
        # ('void'|type) subroutineName '(' parameterList ')' subroutineBody
        self._subroutine_init()
        self.symboltable.start_subroutine()
        self._advance()
        self._sub_kind = self._current_token
        if self._sub_kind == KW_METHOD:
            self.symboltable.define('this', self._class_name, 'argument')
        self.compile_return_type()
        self.compile_subroutine_name()
        self._require_brackets('()', self.compile_parameter_list)
        self.compile_subroutine_body()

    @record_non_terminal('parameterList')
    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        if self._next_token == ')':
            return
        self.compile_type()
        self.compile_var_name('argument', self._current_token, True)
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_type()
            self.compile_var_name('argument', self._current_token, True)

    @record_non_terminal('subroutineBody')
    def compile_subroutine_body(self):
        # '{' varDec* statements '}'
        self._require_sym('{')
        while self._fol_by_vardec():
            self.compile_vardec()
        self.compile_function()
        self.compile_statements()
        self._require_sym('}')

    def compile_function(self):
        fn_name = '.'.join((self._class_name, self._sub_name))
        num_locals = self.symboltable.varcount(KW_VAR)
        self.vmwriter.write_function(fn_name,
                                     num_locals)  # function fn_name num_locals
        # set up pointer this
        if self._sub_kind == KW_CONSTRUCTOR:
            num_fields = self.symboltable.varcount(KW_FIELD)
            self.vmwriter.write_push('constant', num_fields)
            self.vmwriter.write_call('Memory.alloc', 1)
            self.vmwriter.write_pop('pointer', 0)
        elif self._sub_kind == KW_METHOD:
            self.vmwriter.write_push('argument', 0)
            self.vmwriter.write_pop('pointer', 0)

    @record_non_terminal('varDec')
    def compile_vardec(self):
        # 'var' type varName (',' varName)* ';'
        self.compile_declare()

    #########################
    # statement compilation #
    #########################

    @record_non_terminal('statements')
    def compile_statements(self):
        # (letStatement | ifStatement | whileStatement | doStatement |
        # returnStatement)*
        last_statement = None
        while self._next_token != '}':
            self._advance()
            last_statement = self._current_token
            if last_statement == 'do':
                self.compile_do()
            elif last_statement == 'let':
                self.compile_let()
            elif last_statement == 'while':
                self.compile_while()
            elif last_statement == 'return':
                self.compile_return()
            elif last_statement == 'if':
                self.compile_if()
            else:
                return self._error(expect='statement expression')
        #if STACK[-2] == 'subroutineBody' and last_statement != 'return':
        #    self._error(expect='return statement', get=last_statement)

    @record_non_terminal('doStatement')
    def compile_do(self):
        # 'do' subroutineCall ';'
        self._advance()
        self.compile_subroutine_call()
        self.vmwriter.write_pop('temp', 0)  # temp[0] store useless value
        self._require_sym(';')

    @record_non_terminal('letStatement')
    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self.compile_var_name()
        var_name = self._current_token
        array = (self._next_token == '[')
        if array:
            self.compile_array_subscript(
                var_name)  # push (array base + subscript)
        self._require_sym('=')
        self.compile_expression()  # push expression value
        self._require_sym(';')
        if array:
            self.vmwriter.write_pop('temp', 1)  # pop exp value to temp[1]
            self.vmwriter.write_pop('pointer',
                                    1)  # that = array base + subscript
            self.vmwriter.write_push('temp', 1)
            self.vmwriter.write_pop('that', 0)
        else:
            self.assign_variable(var_name)

    kind_segment = {
        'static': 'static',
        'field': 'this',
        'argument': 'argument',
        'var': 'local'
    }

    def assign_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_pop(self.kind_segment[kind], index)

    def load_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_push(self.kind_segment[kind], index)

    label_num = 0

    @record_non_terminal('whileStatement')
    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        start_label = 'WHILE_START_' + str(self.label_num)
        end_label = 'WHILE_END_' + str(self.label_num)
        self.label_num += 1
        self.vmwriter.write_label(start_label)
        self.compile_cond_expression(start_label, end_label)

    @record_non_terminal('ifStatement')
    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        else_label = 'IF_ELSE_' + str(self.label_num)
        end_label = 'IF_END_' + str(self.label_num)
        self.label_num += 1
        self.compile_cond_expression(end_label, else_label)
        # else clause
        if self._next_token == KW_ELSE:
            self._require_kw(KW_ELSE)
            self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_label(end_label)

    def compile_cond_expression(self, goto_label, end_label):
        self._require_brackets('()', self.compile_expression)
        self.vmwriter.write_arithmetic('not')
        self.vmwriter.write_if(end_label)
        self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_goto(goto_label)  # meet
        self.vmwriter.write_label(end_label)

    @record_non_terminal('returnStatement')
    def compile_return(self):
        # 'return' expression? ';'
        if self._sub_kind == KW_CONSTRUCTOR:
            self._require_kw(KW_THIS)  # constructor must return 'this'
            self.vmwriter.write_push('pointer', 0)
        elif self._next_token != ';':
            self.compile_expression()
        else:
            if self._ret_type != KW_VOID:
                self._traceback('expect return ' + self._ret_type)
            self.vmwriter.write_push('constant', 0)
        self._require_sym(';')
        self.vmwriter.write_return()

    ##########################
    # expression compilation #
    ##########################

    unary_ops = {'-': 'neg', '~': 'not'}
    binary_ops = {
        '+': 'add',
        '-': 'sub',
        '*': None,
        '/': None,
        '&': 'and',
        '|': 'or',
        '<': 'lt',
        '>': 'gt',
        '=': 'eq'
    }

    @record_non_terminal('expression')
    def compile_expression(self):
        # term (op term)*
        self.compile_term()
        while self._next_token in self.binary_ops:
            self._advance()
            if self._current_tok_type != T_SYMBOL:
                self._error(expect_types=(T_SYMBOL, ))
            op = self._current_token
            self.compile_term()
            self.compile_binaryop(op)

    def compile_binaryop(self, op):
        if op == '*':
            self.vmwriter.write_call('Math.multiply', 2)
        elif op == '/':
            self.vmwriter.write_call('Math.divide', 2)
        else:
            self.vmwriter.write_arithmetic(self.binary_ops[op])

    kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    @record_non_terminal('term')
    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        if self._next_token == '(':
            self._require_brackets('()', self.compile_expression)
        else:
            self._advance()
            tok = self._current_token
            tok_type = self._current_tok_type
            if tok_type == T_KEYWORD and tok in self.kw_consts:
                self.compile_kw_consts(tok)
            elif tok_type == T_INTEGER:
                self.vmwriter.write_push('constant', tok)
            elif tok_type == T_STRING:
                self.compile_string(tok)
            elif tok_type == T_ID:
                if self._next_token in '(.':
                    self.compile_subroutine_call()
                elif self._next_token == '[':
                    self.check_var_name(tok)
                    self.compile_array_subscript(tok)
                    self.vmwriter.write_pop('pointer', 1)
                    self.vmwriter.write_push('that', 0)
                else:
                    self.check_var_name(tok)
                    self.load_variable(tok)
            elif tok_type == T_SYMBOL and tok in self.unary_ops:
                self.compile_term()
                self.vmwriter.write_arithmetic(self.unary_ops[tok])
            else:
                self._error(expect='term')

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kw_consts(self, kw):
        if kw == KW_THIS:
            self.vmwriter.write_push('pointer', 0)
        elif kw == KW_TRUE:
            self.vmwriter.write_push('constant', 1)
            self.vmwriter.write_arithmetic('neg')
        else:
            self.vmwriter.write_push('constant', 0)

    def compile_string(self, string):
        self.vmwriter.write_push('constant', len(string))
        self.vmwriter.write_call('String.new', 1)
        for char in string:
            self.vmwriter.write_push('constant', ord(char))
            self.vmwriter.write_call('String.appendChar', 2)

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')' |
        # (className | varName) '.' subroutineName '(' expressionList ')'
        ## the first element of structure has already been compiled.
        fn_name, num_args = self.compile_call_name()
        self._require_sym('(')
        num_args = self.compile_expressionlist(num_args)
        self._require_sym(')')
        self.vmwriter.write_call(fn_name, num_args)

    def compile_call_name(self):
        # the fisrt name of subroutine call could be (className or varName) if
        # it is followed by '.', or subroutineName if followed by '('.
        # return name of function call and num_args (1: means pushing this, 0:
        # means don't)
        if self._current_tok_type != T_ID:
            self._error(expect_types=(T_ID, ))
        name = self._current_token
        if self._next_token == '.':
            self._require_sym('.')
            self.compile_subroutine_name()
            sub_name = self._current_token
            if (name in self.symboltable.all_class_types()
                    or name in SymbolTable.builtIn_class
                    or name == self._class_name):
                return '.'.join((name, sub_name)), 0  # className
            else:
                self.check_var_name(name)  # varName with class type
                type = self.symboltable.typeof(name)
                if type in SymbolTable.builtIn_types:
                    return self._error(expect='class instance or class',
                                       get=type)
                self.load_variable(name)
                return '.'.join((type, sub_name)), 1
        elif self._next_token == '(':
            self.vmwriter.write_push('pointer', 0)  # push this to be 1st arg
            return '.'.join((self._class_name, name)), 1  # subroutineName

    @record_non_terminal('expressionList')
    def compile_expressionlist(self, num_args):
        # (expression (',' expression)*)?
        if self._next_token != ')':
            self.compile_expression()
            num_args += 1
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_expression()
            num_args += 1
        return num_args

    def compile_array_subscript(self, var_name):
        # varName '[' expression ']'
        self.check_var_name(var_name, 'Array')
        self._require_brackets(
            '[]', self.compile_expression)  # push expression value
        self.load_variable(var_name)
        self.vmwriter.write_arithmetic('add')  # base + subscript

    def _check_EOF(self):
        if not self.tokenizer.has_more_tokens():
            self._traceback("Unexpected EOF.")

    def _error(self, expect_toks=(), expect_types=(), expect=None, get=None):
        if expect is None:
            exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks))
            exp_types = ('type {0}'.format(token_tags[t])
                         for t in expect_types)
            exp_type = ' or '.join(exp_types)
            if exp_tok and exp_type:
                expect = ' or '.join(exp_tok, exp_type)
            else:
                expect = exp_tok + exp_type
        if get is None:
            get = self._current_token
        me = 'Expect {0} but get "{1}"'.format(expect, get)
        return self._traceback(me)

    def _traceback(self, message):
        if DEBUG:
            print('--------------------------------------------')
            print(self.symboltable)
            print(self.symboltable.all_class_types())
            print('--------------------------------------------')
        file_info = 'file: "{0}"'.format(self.tokenizer.filename)
        line_info = 'line {0}'.format(self.tokenizer.line_count)
        raise CompileError("{0}, {1}: {2}".format(file_info, line_info,
                                                  message))
コード例 #9
0
class CompilationEngine:


    def __init__(self, source):
        self.if_counter = 0
        self.while_counter = 0
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.symbols = SymbolTable()
        self.writer = VMWriter(source)
        self.arithmetic_op = {}
        self.init_op()
        self.root = Element(CLASS)
        self.class_name = ""
        self.compile_class(self.root)
        self.writer.close()

    def init_op(self):
        self.arithmetic_op = {'+': "add",
                         '-': "sub",
                         '*': "call Math.multiply 2",
                         '/': "call Math.divide 2",
                         '&': "and",
                         '|': "or",
                              '<': "lt",
                              '>': "gt",
                              '=': "eq"
                        }

    def next(self):
        """
        Proceed to the next token.
        :return:
        """
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def compile_expression(self,caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        op_stack = []
        self.compile_term(SubElement(caller,TERM))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            op_stack.append(self.tokenizer.symbol())
            self.next()
            self.compile_term(SubElement(caller,TERM))

        while op_stack:
            self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()])

    def compile_expressionList(self,caller):
        num_of_args = 0
        #  if expression list is empty
        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return num_of_args

        num_of_args += 1
        self.compile_expression(SubElement(caller,EXPRESSION))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            num_of_args += 1
            self.next()
            self.compile_expression(SubElement(caller,EXPRESSION))
        return num_of_args

    def compile_subroutineCall(self,caller,first_token):
        func_name = first_token
        
        is_method = 0
        if self.tokenizer.symbol() == '.':
            self.next()
            if self.symbols.kind_of(func_name): 
                segment = self.symbols.kind_of(func_name)
                segment = Kind.get_segment(segment)
                index = self.symbols.index_of(func_name)
                self.writer.write_push(segment,index)
                func_name = self.symbols.type_of(func_name)
                is_method = 1

            func_name = func_name+"."+self.tokenizer.identifier()
            self.next()
        else:
            func_name = self.class_name+"."+func_name
            self.writer.write_push(POINTER,0)
            is_method = 1

        self.next()
        num_of_args = self.compile_expressionList(SubElement(caller, EXPRESSION_LIST))+is_method

        self.writer.write_call(func_name,num_of_args)
       
        self.next()


    def compile_term(self,caller):
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            self.writer.write_push(CONSTANT,self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:

            string_val = self.tokenizer.string_val()
            self.writer.write_push(CONSTANT,len(string_val))
            self.writer.write_call("String.new", 1)
            for c in string_val:
                self.writer.write_push(CONSTANT,ord(c))
                self.writer.write_call("String.appendChar", 2)
            self.next()

        elif type is JTok.KEYWORD:
           if self.tokenizer.key_word() in {"null", "false"}:
                self.writer.write_push(CONSTANT, 0)
            elif self.tokenizer.key_word() == "true": 
                self.writer.write_push(CONSTANT, 1)
                self.writer.write_arithmetic("neg")
            elif self.tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
            else:
                print("unexpected")

            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                    self.compile_subroutineCall(caller,name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[': 
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(),index)
                else:
                    print("unexpected")
                self.writer.write_arithmetic("add")
                self.writer.write_pop(POINTER,1)
                self.writer.write_push("that",0)
                self.next()

            else:
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(),index)
                else:
                    print("unexpected")

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                self.next()

            elif self.tokenizer.symbol() in {'-','~'}:
                unary_op = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller,TERM))
                if unary_op == "-":
                    self.writer.write_arithmetic("neg")
                elif unary_op == "~":
                    self.writer.write_arithmetic("not")
                else:
                    "unexpected"



    def compile_do(self, caller):
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller,name)
        self.writer.write_pop(TEMP,0)
        self.next()

    def compile_let(self, caller):
        self.next()

        varName = self.tokenizer.identifier()
        self.next()

        kind = self.symbols.kind_of(varName)
        kind = kind.get_segment()
        index = self.symbols.index_of(varName)

        if self.tokenizer.symbol() == '[': 
            self.next() 

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_push(kind,index)
            self.writer.write_arithmetic("add")
            self.next() 
            self.next() 
            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(TEMP,0)
            self.writer.write_pop(POINTER,1)
            self.writer.write_push(TEMP,0)
            self.writer.write_pop("that",0)

        else:
            self.next() 

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(kind,index)

        self.next() 


    def compile_return(self, caller):
        self.next()

        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            self.writer.write_push(CONSTANT, 0)
            self.writer.write_return()
            self.next()
            return

        self.compile_expression(SubElement(caller,EXPRESSION))
        self.writer.write_return()
        self.next()

    def compile_while(self, caller):
        while_index = self.while_counter
        self.while_counter += 1
        self.writer.write_label("WHILE_EXP"+str(while_index))
        self.next() 

        self.next() 

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_arithmetic("not")
        self.writer.write_if("WHILE_END"+str(while_index))

        self.next()

        self.next() 

        self.compile_statements(SubElement(caller, STATEMENTS))

        self.writer.write_goto("WHILE_EXP"+str(while_index))
        self.writer.write_label("WHILE_END"+str(while_index))
        self.next()


    def compile_statements(self, caller):
        STATEMENTS = {'do','while','let','return','if'}
        caller.text = " "
        while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))

    def compile_if(self, caller):
       self.next()  # (
        self.compile_expression(caller)
        self.next()  # {

        if_index = self.if_counter
        self.if_counter += 1
        self.writer.write_if("IF_TRUE" + str(if_index))

        self.writer.write_goto("IF_FALSE" + str(if_index))
        self.writer.write_label("IF_TRUE" + str(if_index))

        self.compile_statements(caller)

        self.next()

        if self.tokenizer.key_word() == 'else':
            self.writer.write_goto("IF_END" + str(if_index))
            self.writer.write_label("IF_FALSE" + str(if_index))

            self.next()  
            self.next()  
            self.compile_statements(caller)
            self.next()  
            self.writer.write_label("IF_END" + str(if_index))
        else:
            self.writer.write_label("IF_FALSE" + str(if_index))

        return
コード例 #10
0
class CompilationEngine:

    DEBUG = False

    translate_dict = {
        '+': 'add',
        '-': 'sub',
        '=': 'eq',
        '>': 'gt',
        '<': 'lt',
        '&': 'and',
        '|': 'or',
        'unary-': 'neg',
        'unary~': 'not',
        'argument': 'argument',
        'static': 'static',
        'var': 'local',
        'field': 'this',
        '*': 'Math.multiply',
        '/': 'Math.divide'
    }

    def __init__(self, input: JackTokenizer, output_file_path):
        self.tokenizer = input
        self.vmwriter = VMWriter(output_file_path)
        self.symbol_table = SymbolTable()

        self.label_index = 0
        self.curr_token = ''
        self.curr_token_type = ''
        self.depth = 0

    def compile_class(self):
        self.print_open('compile_class')
        self.__next_token()  # class
        self.__next_token()  # className
        self.class_name = self.curr_token
        self.__next_token()  # {

        self.__next_token()
        while self.curr_token == 'static' or self.curr_token == 'field':
            self.compile_class_var_dec()

        while self.curr_token == 'constructor' or self.curr_token == 'function' or self.curr_token == 'method':
            self.compile_subroutine_dec()

        self.__next_token()  # after }
        self.vmwriter.close()
        self.print_close('compile_class_end')

    def compile_class_var_dec(self):
        self.print_open('compile_class_var_dec')
        kind = self.curr_token  # (static|field)

        self.__next_token()
        var_type = self.curr_token  # type

        self.__next_token()
        var_name = self.curr_token  # varName

        self.symbol_table.define(var_name, var_type, self.translate_dict[kind])

        self.__next_token()  # , or ;
        while (self.curr_token != ';'):
            self.__next_token()
            var_name = self.curr_token  # varName
            self.symbol_table.define(var_name, var_type,
                                     self.translate_dict[kind])
            self.__next_token()  # , or ;

        self.__next_token()  # after ;
        self.print_close('compile_class_var_dec_end')

    def compile_subroutine_dec(self):
        self.print_open('compile_subroutine_dec')
        self.symbol_table.start_subroutine()

        kind = self.curr_token  # (constructor|function|method)

        self.__next_token()
        var_type = self.curr_token  # (void|type)

        self.__next_token()
        subroutine_name = self.curr_token  # subroutineName

        self.__next_token()  # '('

        if kind == 'method':
            self.symbol_table.define('this', self.class_name, 'argument')

        self.__next_token()
        self.compile_parameter_list()

        self.__next_token()  # after ')'

        self.compile_subroutine_body(kind, var_type, subroutine_name)
        self.print_close('compile_subroutine_dec_end')

    def compile_parameter_list(self):
        self.print_open('compile_parameter_list')
        while self.curr_token != ')':
            if self.curr_token == ',':
                self.__next_token()

            var_type = self.curr_token  # type

            self.__next_token()
            var_name = self.curr_token  # varName

            self.symbol_table.define(var_name, var_type, 'argument')
            self.__next_token()
        self.print_close('compile_parameter_list_end')

    def compile_subroutine_body(self, kind, var_type, subroutine_name):
        self.print_open('compile_subroutine_body')
        self.__next_token()  # after '{'
        while self.curr_token == 'var':
            self.compile_var_dec()

        self.vmwriter.write_function(self.class_name + '.' + subroutine_name,
                                     self.symbol_table.var_count('local'))

        if kind == 'method':
            self.vmwriter.write_push('argument', 0)
            self.vmwriter.write_pop('pointer', 0)
        elif kind == 'constructor':
            self.vmwriter.write_push('constant',
                                     self.symbol_table.var_count('this'))
            self.vmwriter.write_call('Memory.alloc', 1)
            self.vmwriter.write_pop('pointer', 0)

        self.compile_statements()
        self.__next_token()  # after '}'

        self.print_close('compile_subroutine_body_end')

    def compile_var_dec(self):
        self.print_open('compile_var_dec')
        # curr token is var

        self.__next_token()
        var_type = self.curr_token  # type

        self.__next_token()
        var_name = self.curr_token  # varName

        self.symbol_table.define(var_name, var_type, 'local')

        self.__next_token()  # , or ;
        while self.curr_token != ';':
            self.__next_token()
            self.symbol_table.define(self.curr_token, type, 'local')
            self.__next_token()

        self.__next_token()  # after ;

        self.print_close('compile_var_dec_end')

    def compile_statements(self):
        self.print_open('compile_statements')
        while True:
            if self.curr_token == 'let':
                self.compile_let()
            elif self.curr_token == 'if':
                self.compile_if()
            elif self.curr_token == 'while':
                self.compile_while()
            elif self.curr_token == 'do ':
                self.compile_do()
            elif self.curr_token == 'return':
                self.compile_return()
            else:
                break
        self.print_close('compile_statements_end')

    def compile_let(self):
        self.print_open('compile_let')
        # curr_token is let
        self.__next_token()
        var_name = self.curr_token  # varName
        kind = self.symbol_table.kind_of(var_name)
        index = self.symbol_table.index_of(var_name)

        self.__next_token()
        if self.curr_token == '[':
            # push arr
            self.vmwriter.write_push(kind, index)

            # VM code for computing and pushing the value of expression1
            self.__next_token()
            self.compile_expression()
            self.__next_token()

            # add
            self.vmwriter.write_arithmetic('add')

            # VM code for computing and pushing the value of expression2
            self.__next_token()  # after =
            self.compile_expression()
            self.__next_token()  # after ;

            # pop temp 0
            self.vmwriter.write_pop('temp', 0)

            # pop pointer 1
            self.vmwriter.write_pop('pointer', 1)

            # push temp 0
            self.vmwriter.write_push('temp', 0)

            # pop that 0
            self.vmwriter.write_pop('that', 0)
        else:
            self.__next_token()  # after =
            self.compile_expression()
            self.__next_token()  # after ;

            self.vmwriter.write_pop(kind, index)
        self.print_close('compile_let_end')

    def compile_if(self):
        self.print_open('compile_if')
        # curr_token is if

        index_l = self.__next_label_index()

        self.__next_token()  # (
        self.__next_token()  # after (
        self.compile_expression()
        self.vmwriter.write_arithmetic('not')
        self.__next_token()  # ) --> {

        self.__next_token()  # { --> ?
        self.vmwriter.write_if('L1' + str(index_l))
        self.compile_statements()
        self.vmwriter.write_go_to('L2' + str(index_l))
        self.__next_token()  # } --> ?

        self.vmwriter.write_label('L1' + str(index_l))

        if self.curr_token == 'else':
            self.__next_token()  # else --> {

            self.__next_token()  # { --> ?
            self.compile_statements()
            self.__next_token()  # } --> ?

        self.vmwriter.write_label('L2' + str(index_l))
        self.print_close('compile_if_end')

    def compile_while(self):
        self.print_open('compile_while')
        # curr_token is while
        index = self.__next_label_index()

        self.vmwriter.write_label('L1' + str(index))
        self.__next_token()  # while --> (
        self.__next_token()  # ( --> ?
        self.compile_expression()
        self.__next_token()  # ) --> {

        self.vmwriter.write_arithmetic('not')
        self.vmwriter.write_if('L2' + str(index))

        self.__next_token()  # { --> ?
        self.compile_statements()
        self.__next_token()  # } --> ?
        self.vmwriter.write_go_to('L1' + str(index))

        self.vmwriter.write_label('L2' + str(index))
        self.print_close('compile_while_end')

    def compile_do(self):
        self.print_open('compile do')
        # curr_token is do
        self.__next_token()  # do --> (subroutineName | className | varName)
        self.subroutine_call()
        self.vmwriter.write_pop('temp', 0)  # because of void call
        self.__next_token()  # ; --> ?
        self.print_close('compile do_end')

    def subroutine_call(self, skipped=False, arg_name=''):
        self.print_open('subroutine_call')
        name = ''
        if skipped:
            name = arg_name
        else:
            name = self.curr_token  # (subroutineName | className | varName)
            self.__next_token()

        function = name
        args = 0
        if self.curr_token == '(':
            function = self.class_name + '.' + name
            self.vmwriter.write_push('pointer', 0)
            args = 1
        elif self.curr_token == '.':
            self.__next_token()  # . --> subroutine_name
            subroutine_name = self.curr_token

            kind = self.symbol_table.kind_of(name)
            if kind == None:
                function = name + '.' + subroutine_name
            else:
                var_type = self.symbol_table.type_of(name)
                function = var_type + '.' + subroutine_name
                self.vmwriter.write_push(kind,
                                         self.symbol_table.index_of(name))
                args = 1
            self.__next_token()  # subroutine_name --> (

        self.__next_token()  # ( --> ?
        expression_list_len = self.compile_expression_list()
        self.__next_token()  # ) --> ;

        self.vmwriter.write_call(function, args + expression_list_len)
        # self.__next_token() # ; --> ?
        self.print_close('subroutine_call_end')

    def compile_return(self):
        self.print_open('compile_return')
        # curr_token is return

        self.__next_token()  # return --> ?

        if self.curr_token != ';':
            self.compile_expression()
        else:
            self.vmwriter.write_push('constant', 0)

        self.__next_token()  # ; --> ?
        self.vmwriter.write_return()
        self.print_close('compile_return_end')

    def compile_expression(self):
        self.print_open('compile_expression')
        self.compile_term()

        while self.curr_token in {'+', '-', '*', '/', '&', '|', '<', '>', '='}:
            op = self.curr_token
            self.__next_token()
            self.compile_term()

            if op in ['*', '/']:
                self.vmwriter.write_call(self.translate_dict[op], 2)
            else:
                if op in self.translate_dict:
                    self.vmwriter.write_arithmetic(self.translate_dict[op])

        self.print_close('compile_expression_end')

    def compile_term(self):
        self.print_open('compile_term')
        if self.curr_token == '(':
            self.__next_token()  # ( --> ?
            self.compile_expression()
            self.__next_token()  # ) --> ?
        elif self.curr_token in {'-', '~'}:
            op = self.curr_token  # (-|~)
            self.__next_token()  # (-|~) --> ?
            self.compile_term()
            self.vmwriter.write_arithmetic(self.translate_dict['unary' + op])
        else:
            if self.curr_token_type == 'stringConstant':
                self.vmwriter.write_push('constant', len(self.curr_token))
                self.vmwriter.write_call('String.new', 1)

                for ch in self.curr_token:
                    self.vmwriter.write_push('constant', ord(ch))
                    self.vmwriter.write_call('String.appendChar', 2)

                self.__next_token()

            elif self.curr_token_type == 'integerConstant':
                self.vmwriter.write_push('constant', self.curr_token)
                self.__next_token()
            elif self.curr_token_type == 'keyword':
                if self.curr_token == 'this':
                    self.vmwriter.write_push('pointer', 0)
                else:
                    self.vmwriter.write_push('constant', 0)

                    if self.curr_token == 'true':
                        self.vmwriter.write_arithmetic('not')

                self.__next_token()
            else:
                temp = self.curr_token
                self.__next_token()
                if self.curr_token == '[':
                    self.vmwriter.write_push(self.symbol_table.kind_of(temp),
                                             self.symbol_table.index_of(temp))

                    self.__next_token()  # [ --> ?
                    self.compile_expression()
                    self.__next_token()  # ] --> ?

                    # add
                    self.vmwriter.write_arithmetic('add')

                    # pop pointer 1
                    self.vmwriter.write_pop('pointer', 1)

                    # push that 0
                    self.vmwriter.write_push('that', 0)

                elif self.curr_token in ['(', '.']:
                    self.subroutine_call(True, temp)
                else:
                    # var_name
                    self.vmwriter.write_push(self.symbol_table.kind_of(temp),
                                             self.symbol_table.index_of(temp))
                    # self.__next_token()

        self.print_close('compile_term_end')

    def compile_expression_list(self):
        self.print_open('compile_expression_list')
        count = 0

        while self.curr_token != ')':
            if self.curr_token == ',':
                self.__next_token()
            self.compile_expression()
            count += 1

        self.print_close('compile_expression_list_end')
        return count

    #-----------private methods----------------
    def __next_token(self):
        if self.DEBUG:
            print('  ' * self.depth + 'curr_token: ' + self.curr_token)

        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()
        self.curr_token_type = self.tokenizer.token_type()
        self.curr_token = self.tokenizer.keyword()

    def __next_label_index(self):
        index = self.label_index
        self.label_index += 1
        return index

    def print_open(self, string):
        if self.DEBUG:
            print('  ' * self.depth + string)
            self.depth += 1

    def print_close(self, string):
        if self.DEBUG:
            self.depth -= 1
            print('  ' * self.depth + string)
コード例 #11
0
class CompilationEngine:
    def __init__(self, filename):

        self.tokenizer = JackTokenizer(filename)
        self.types = ['int', 'char', 'boolean']
        self.operators = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        self.keywordsConstant = ['true', 'false', 'null', 'this']
        self.fileName = splitext(filename)[0]
        self.symbolTable = SymbolTable()
        self.vm = VMWriter(splitext(filename)[0])
        self.whileLabelNum = 0
        self.ifLabelNum = 0
    
    def compile(self):
        self.file = open(self.fileName + ".xml", "w")
        self.compileClass()
        self.file.close()
        self.vm.close()

    def compileClass(self):
        self.writeToXml("<class>")

        self.expect("class")
        self.className = self.tokenizer.getToken()
        self.expectType('identifier')
        self.expect("{")

        while self.tokenizer.getToken() in ['static', 'field']:
            self.compileClassVarDec()

        while self.tokenizer.getToken() in ['constructor', 'function', 'method']:
            self.compileSubroutine()

        self.expect("}")
        self.writeToXml("</class>")

    def compileClassVarDec(self):
        self.writeToXml('<classVarDec>')
        
        kind = self.tokenizer.getToken()
        self.expect(['field', 'static'])

        if self.tokenizer.getToken() in self.types or self.tokenizer.tokenType() == 'identifier':
            type = self.tokenizer.getToken()
            self.printToken()
            self.tokenizer.advance()

        name = self.tokenizer.getToken()
        self.expectType('identifier')

        self.symbolTable.define(name, type, kind)

        while self.tokenizer.getToken() == ",":
            self.expect(",")
            name = self.tokenizer.getToken()
            self.expectType('identifier')

            self.symbolTable.define(name, type, kind)

        self.expect(';')

        self.writeToXml('</classVarDec>')

    def compileSubroutine(self):
        self.writeToXml("<subroutineDec>")
        self.symbolTable.startSubroutine()
        self.whileLabelNum = 0
        self.ifLabelNum = 0

        subroutineType = self.tokenizer.getToken()

        if subroutineType == 'method':
            self.symbolTable.define("this", self.className, SymbolTable.ARG)

        self.expect(['constructor', 'function', 'method'])

        if self.tokenizer.getToken() in self.types + ['void'] or self.tokenizer.tokenType() == 'identifier':
            self.printToken()
            self.tokenizer.advance()

        functionName = self.className + '.' + self.tokenizer.getToken()

        self.expectType('identifier')
        self.expect("(")
        self.compileParameterList()
        self.expect(")")

        self.writeToXml("<subroutineBody>")
        self.expect("{")

        while self.tokenizer.getToken() == 'var':
            self.compileVarDec()

        self.vm.writeFunction(functionName, self.symbolTable.varCount(SymbolTable.VAR))
        if subroutineType == 'constructor':
            self.vm.writePush(VMWriter.CONST, self.symbolTable.varCount(SymbolTable.FIELD))
            self.vm.writeCall("Memory.alloc", 1)
            self.vm.writePop(VMWriter.POINTER, 0)
        elif subroutineType == 'method':
            self.vm.writePush(VMWriter.ARG, 0)
            self.vm.writePop(VMWriter.POINTER, 0)

        self.compileStatements()
        self.expect("}")
        self.writeToXml("</subroutineBody>")

        self.writeToXml("</subroutineDec>")

    def compileParameterList(self):
        self.writeToXml("<parameterList>")

        if self.tokenizer.getToken() in self.types or self.tokenizer.tokenType() == 'identifier':
            type = self.tokenizer.getToken()
            
            self.printToken() 
            self.tokenizer.advance()

            name = self.tokenizer.getToken()
            self.symbolTable.define(name, type, SymbolTable.ARG)

            self.expectType('identifier')

        while self.tokenizer.getToken() == ',':
            self.expect(",")
            if self.tokenizer.getToken() not in self.types and self.tokenizer.tokenType() != 'identifier':
                self.errorExpected(self.tokenizer.getToken(), '|'.join(self.types + ['identifier']))

            type = self.tokenizer.getToken()

            self.printToken()
            self.tokenizer.advance()

            name = self.tokenizer.getToken()
            self.symbolTable.define(name, type, SymbolTable.ARG)

            self.expectType('identifier')

        self.writeToXml("</parameterList>")

    def compileVarDec(self):
        self.writeToXml("<varDec>")

        self.expect('var')

        if self.tokenizer.getToken() in self.types or self.tokenizer.tokenType() == 'identifier':
            type = self.tokenizer.getToken()

            self.printToken()
            self.tokenizer.advance()
        
        self.symbolTable.define(self.tokenizer.getToken(),type, self.symbolTable.VAR)
        self.expectType('identifier')

        while self.tokenizer.getToken() == ",":
            self.expect(",")
            self.symbolTable.define(self.tokenizer.getToken(), type, self.symbolTable.VAR)
            self.expectType('identifier')

        self.expect(';')

        self.writeToXml("</varDec>")

    def compileStatements(self):
        self.writeToXml("<statements>")

        statatements = ['while', 'if', 'let', 'return', 'do']

        while self.tokenizer.getToken() in statatements:
            token = self.tokenizer.getToken()
            if token == 'while':
                self.compileWhile()
            elif token == 'if':
                self.compileIf()
            elif token == 'let':
                self.compileLet()
            elif token == 'do':
                self.compileDo()
            elif token == 'return':
                self.compileReturn()

        self.writeToXml("</statements>")

    def compileDo(self):
        self.writeToXml('<doStatement>')

        self.expect("do")
        identifier = self.tokenizer.getToken()
        self.expectType("identifier")

        self.compileSubroutineCall(identifier)

        self.expect(";")
        self.vm.writePop(VMWriter.TEMP, 0)

        self.writeToXml('</doStatement>')

    def compileExpressionList(self):
        self.writeToXml('<expressionList>')

        count = 0
        if self.tokenizer.getToken() != ")":
            self.compileExpression()
            count += 1
            while self.tokenizer.getToken() == ",":
                self.expect(",")
                self.compileExpression()
                count += 1

        self.writeToXml('</expressionList>')

        return count
        
    def compileIf(self):
        self.writeToXml('<ifStatement>')

        labelTrue = "IF_TRUE{}".format(self.ifLabelNum) 
        labelFalse = "IF_FALSE{}".format(self.ifLabelNum) 
        labelEnd = "IF_END{}".format(self.ifLabelNum)
        self.ifLabelNum += 1

        self.expect("if")
        self.expect("(")
        self.compileExpression()
        self.vm.writeIf(labelTrue)
        self.vm.writeGoto(labelFalse)
        self.vm.writeLabel(labelTrue)   
        self.expect(")")
        self.expect("{")

        self.compileStatements()
        self.vm.writeGoto(labelEnd)

        self.expect("}")

        self.vm.writeLabel(labelFalse)

        if self.tokenizer.getToken() == "else":
            self.expect('else')
            self.expect("{")
            self.compileStatements()
            self.expect("}")

        self.vm.writeLabel(labelEnd)

        self.writeToXml('</ifStatement>')

    def compileWhile(self):
        self.writeToXml('<whileStatement>')

        labelExp = "WHILE_EXP{}".format(self.whileLabelNum)
        labelEnd = "WHILE_END{}".format(self.whileLabelNum)
        self.whileLabelNum += 1

        self.vm.writeLabel(labelExp)
        self.expect("while")
        self.expect("(")

        self.compileExpression()
        self.vm.writeArithmetic(VMWriter.NOT)
        self.vm.writeIf(labelEnd)

        self.expect(")")
        self.expect("{")
        self.compileStatements()
        self.vm.writeGoto(labelExp)
        self.expect("}")

        self.vm.writeLabel(labelEnd)

        self.writeToXml('</whileStatement>')

    def compileReturn(self):
        self.writeToXml('<returnStatement>')

        self.expect("return")
        if self.tokenizer.getToken() != ";":
            self.compileExpression()
            self.vm.writeReturn()
        else:
            self.vm.writePush(VMWriter.CONST, 0)
            self.vm.writeReturn()

        self.expect(";")

        self.writeToXml("</returnStatement>")

    def compileLet(self):
        self.writeToXml('<letStatement>')

        self.expect("let")

        ident = self.tokenizer.getToken()

        self.expectType('identifier')

        isArray = False
        if self.tokenizer.getToken() == "[":
            self.expect("[")
            self.compileExpression()

            self.vm.writePush(self.resolveSegment(ident), self.symbolTable.indexOf(ident))
            self.vm.writeArithmetic(VMWriter.ADD)

            self.expect("]")
            isArray = True

        self.expect("=")
        self.compileExpression()

        if isArray:
            self.vm.writePop(VMWriter.TEMP, 0)
            self.vm.writePop(VMWriter.POINTER, 1)
            self.vm.writePush(VMWriter.TEMP, 0)
            self.vm.writePop(VMWriter.THAT, 0)
        else:
            self.vm.writePop(self.resolveSegment(ident), self.symbolTable.indexOf(ident))

        self.expect(";")

        self.writeToXml('</letStatement>')

    def compileExpression(self):
        self.writeToXml('<expression>')

        self.compileTerm()

        while self.tokenizer.getToken() in self.operators:
            operator = self.tokenizer.getToken()

            self.expect(self.operators)
            self.compileTerm()

            self.compileOperator(operator)
        
        self.writeToXml('</expression>')

    def compileOperator(self, operator):
        if operator is '+':
            self.vm.writeArithmetic(self.vm.ADD)
        
        elif operator is '-':
            self.vm.writeArithmetic(self.vm.SUB)

        elif operator is '*':
            self.vm.writeCall("Math.multiply", 2)

        elif operator is '/':
            self.vm.writeCall("Math.divide", 2)
        elif operator is '&':
            self.vm.writeArithmetic(self.vm.AND)
        elif operator is '|':
            self.vm.writeArithmetic(self.vm.OR)
        elif operator is '~':
            self.vm.writeArithmetic(self.vm.NOT)
        elif operator is '>':
            self.vm.writeArithmetic(self.vm.GT)
        elif operator is '<':
            self.vm.writeArithmetic(self.vm.LT)
        elif operator is '=':
            self.vm.writeArithmetic(self.vm.EQ)



    def compileTerm(self):
        self.writeToXml("<term>")

        if self.tokenizer.tokenType() == 'keyword':
            if self.tokenizer.getToken() == 'true':
                self.vm.writePush(VMWriter.CONST, 0)
                self.vm.writeArithmetic(VMWriter.NOT)

            elif self.tokenizer.getToken() in ['false', 'null']:
                self.vm.writePush(VMWriter.CONST, 0)

            elif self.tokenizer.getToken() == 'this':
                self.vm.writePush(VMWriter.POINTER, 0)                        

            self.expect(self.keywordsConstant)

        elif self.tokenizer.tokenType() == 'identifier':

            identifier = self.tokenizer.getToken()
            self.expectType('identifier')

            if self.tokenizer.getToken() == '[':
                self.expect('[')
                self.compileExpression()

                self.vm.writePush(self.resolveSegment(identifier), self.symbolTable.indexOf(identifier))
                self.vm.writeArithmetic(VMWriter.ADD)

                self.expect(']')
                self.vm.writePop(VMWriter.POINTER, 1)
                self.vm.writePush(VMWriter.THAT, 0)

            elif self.tokenizer.getToken() in ['.', '(']:
                self.compileSubroutineCall(identifier)
            else:
                segment = self.symbolTable.kindOf(identifier)
                if segment == 'field':
                    segment = 'this'
                
                self.vm.writePush(segment, self.symbolTable.indexOf(identifier))

        elif self.tokenizer.tokenType() == 'intConst':
            self.vm.writePush(self.vm.CONST, self.tokenizer.getToken())
            self.expectType('intConst')

        elif self.tokenizer.tokenType() == 'stringConst':
            string = self.tokenizer.getToken()
            string = string[1:-1]
            self.expectType('stringConst')

            self.vm.writePush(VMWriter.CONST, len(string))
            self.vm.writeCall('String.new', 1)

            for i in string:
                self.vm.writePush(VMWriter.CONST, ord(i))
                self.vm.writeCall('String.appendChar', 2)
            
        elif self.tokenizer.getToken() == '~':
            self.expect('~')
            self.compileTerm()
            self.vm.writeArithmetic(VMWriter.NOT)

        elif self.tokenizer.getToken() == '-':
            self.expect('-')
            self.compileTerm()
            self.vm.writeArithmetic(VMWriter.NEG)
        elif self.tokenizer.getToken() == '(':
            self.expect('(')
            self.compileExpression()
            self.expect(')')

        self.writeToXml("</term>")

    def compileSubroutineCall(self, identifier):
        if self.tokenizer.getToken() == '(':
            self.vm.writePush(VMWriter.POINTER, 0)

            self.expect('(')
            nArgs = self.compileExpressionList()
            nArgs += 1
            self.expect(')')
                
            functionName = self.className + '.' + identifier
            self.vm.writeCall(functionName, nArgs)
        else:
            if self.tokenizer.getToken() == ".":
                self.expect(".")

                if self.symbolTable.hasOf(identifier):
                    self.vm.writePush(self.resolveSegment(identifier), self.symbolTable.indexOf(identifier))
                    nameFunction = self.symbolTable.typeOf(identifier) + '.' + self.tokenizer.getToken()
                    
                    self.expectType("identifier")
                    self.expect("(")
                    nArgs = self.compileExpressionList()
                    nArgs += 1
                else:
                    nameFunction = identifier + '.' + self.tokenizer.getToken()
        
                    self.expectType("identifier")
                    self.expect("(")
                    nArgs = self.compileExpressionList()
            

            self.expect(")")
            self.vm.writeCall(nameFunction, nArgs)

    def resolveSegment(self, ident):
        segment = self.symbolTable.kindOf(ident)
        if segment == 'field':
            segment = VMWriter.THIS

        return segment

    def expect(self, expected):
        if type(expected) == list:
            if self.tokenizer.getToken() not in expected: 
                self.errorExpected(self.tokenizer.getToken(), "|".join(expected))
        else:
            if self.tokenizer.getToken() != expected:
                self.errorExpected(self.tokenizer.getToken(), expected)

        self.printToken()
        self.tokenizer.advance()

    def expectType(self, expected):
        if type(expected) == list:
            if self.tokenizer.tokenType() not in expected: 
                self.errorExpected(self.tokenizer.getToken(), "|".join(expected))
        else:
            if self.tokenizer.tokenType() != expected:
                self.errorExpected(self.tokenizer.getToken(), expected)
            
        self.printToken()
        self.tokenizer.advance()
    
    def printToken(self):
        tokenType = self.tokenizer.tokenType()

        self.writeToXml("<" + tokenType + ">" + escape(self.tokenizer.getToken()) + "</" + tokenType + ">")

    def errorExpected(self, atual, expected):
        exit("Expected " + expected + ", " + atual + " given")
    
    def writeToXml(self, el):
        self.file.write(el)
コード例 #12
0
class CompilationEngine:
    def __init__(self, filepath):
        self._tokenizer = JackTokenizer(filepath) 
        self._writer = VMWriter(filepath)
        self._classVariables = SymbolTable()
        self._subroutineVariables = SymbolTable()
        self._currentToken = None
        self._preserveCurrentToken = False
        self._className = ''
        self._currentCompilingFunction = {'kind': '', 'name': ''}
        self._numberConditionalsStatementsCurrentFunction = 0

    def run(self):
        self._compileClass()
        self._writer.close()
        return

    #compile functions 
    def _compileClass(self):
        self._eatObligatory([T_KEYWORD], [K_CLASS])
        self._eatObligatory([T_IDENTIFIER])
        self._className = self._currentToken['value']

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileClassVarDeclarations()
        self._compileSubroutineDeclarations()
        self._eatObligatory([T_SYMBOL], ['}'])
        return

    def _compileClassVarDeclarations(self):
        self._classVariables.startSubroutine()

        while self._eatExpected([T_KEYWORD], [K_STATIC, K_FIELD]):
            kind = VAR_STATIC if self._currentToken['value'] == K_STATIC else VAR_FIELD
            varType, name = self._compileTypedVarDeclaration()
            self._classVariables.insert(name, varType, kind)

            while self._eatExpected([T_SYMBOL], [',']):
                self._eatObligatory([T_IDENTIFIER])
                name = self._currentToken['value']
                self._classVariables.insert(name, varType, kind)

            self._eatObligatory([T_SYMBOL], [';'])
        return

    def _compileSubroutineDeclarations(self):
        while self._eatExpected([T_KEYWORD], [K_CONSTRUCTOR, K_FUNCTION, K_METHOD]):
            self._currentCompilingFunction['kind'] = self._currentToken['value']
            self._subroutineVariables.startSubroutine()

            self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN, K_VOID])
            self._eatObligatory([T_IDENTIFIER])
            self._currentCompilingFunction['name'] = self._currentToken['value']

            self._eatObligatory([T_SYMBOL], ['('])
            self._compileParameterList()
            self._eatObligatory([T_SYMBOL], [')'])
            self._compileSubroutineBody()
        return

    def _compileParameterList(self):
        if self._eatExpected([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]):
            varType = self._currentToken['value']
            self._eatObligatory([T_IDENTIFIER])
            name = self._currentToken['value']
            self._subroutineVariables.insert(name, varType, VAR_ARG)
            
            while self._eatExpected([T_SYMBOL], [',']):
                varType, name = self._compileTypedVarDeclaration()
                self._subroutineVariables.insert(name, varType, VAR_ARG)
        return

    def _compileSubroutineBody(self):
        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileVarDeclaration()

        funcName = self._className + '.' + self._currentCompilingFunction['name']
        nLocalVars = self._subroutineVariables.getVarCountByKind(VAR_LOCAL)
        self._writer.writeFunction(funcName, nLocalVars)

        self._numberConditionalsStatementsCurrentFunction = 0
        if self._currentCompilingFunction['kind'] == K_CONSTRUCTOR: self._compileConstructorCode()
        elif self._currentCompilingFunction['kind'] == K_METHOD: self._compileMethodCode()
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        return

    def _compileVarDeclaration(self):
        while self._eatExpected([T_KEYWORD], [K_VAR]):
            varType, name = self._compileTypedVarDeclaration()
            self._subroutineVariables.insert(name, varType, VAR_LOCAL)
            while self._eatExpected([T_SYMBOL], [',']):
                self._eatObligatory([T_IDENTIFIER])
                name = self._currentToken['value']
                self._subroutineVariables.insert(name, varType, VAR_LOCAL)
            self._eatObligatory([T_SYMBOL], [';'])
        return


    def _compileStatements(self):
        while self._eatExpected([T_KEYWORD], [K_LET, K_IF, K_WHILE, K_DO, K_RETURN]):
            self._compileStatementByKeyword()
        return

    def _compileLetStatement(self):
        self._eatObligatory([T_IDENTIFIER])
        name = self._currentToken['value']
        segment, index = self._searchVariableByName(name)
        
        isArrayAssignment = False
        if self._eatExpected([T_SYMBOL], ['[']):
            self._compileArrayPosition(name)
            isArrayAssignment = True

        self._eatObligatory([T_SYMBOL], ['='])
        self._compileExpression()
        if isArrayAssignment:
            self._writer.writePop(SEGMENT_TEMP, 0)
            self._writer.writePop(SEGMENT_POINTER, 1)
            self._writer.writePush(SEGMENT_TEMP, 0)
            self._writer.writePop(SEGMENT_THAT, 0)
        else:
            self._writer.writePop(segment, index)
        self._eatObligatory([T_SYMBOL], [';'])
        return

    def _compileIfStatement(self):
        funcName = self._className + '.' + self._currentCompilingFunction['name']
        notIfLabel = f'{funcName}_NOT_IF_{self._numberConditionalsStatementsCurrentFunction}' 
        endComparisonLabel = f'{funcName}_END_COMPARISON_BLOCK_{self._numberConditionalsStatementsCurrentFunction}'
        self._numberConditionalsStatementsCurrentFunction += 1
        
        self._eatObligatory([T_SYMBOL], ['('])
        self._compileExpression()
        self._writer.writeArithmetic('not')
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeIf(notIfLabel)

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeGoto(endComparisonLabel)

        self._writer.writeLabel(notIfLabel)
        if self._eatExpected([T_KEYWORD], [K_ELSE]):
            self._eatObligatory([T_SYMBOL], ['{'])
            self._compileStatements()
            self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeLabel(endComparisonLabel)
        return

    def _compileWhileStatement(self):
        funcName = self._className + '.' + self._currentCompilingFunction['name']
        loopLabel = f'{funcName}_LOOP_{self._numberConditionalsStatementsCurrentFunction}' 
        endLoopLabel = f'{funcName}_END_LOOP_{self._numberConditionalsStatementsCurrentFunction}' 
        self._numberConditionalsStatementsCurrentFunction += 1

        self._writer.writeLabel(loopLabel)
        self._eatObligatory([T_SYMBOL], ['('])
        self._compileExpression()
        self._writer.writeArithmetic('not')
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeIf(endLoopLabel)

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeGoto(loopLabel)
        self._writer.writeLabel(endLoopLabel)
        return

    def _compileDoStatement(self):
        self._compileSubroutineCall()
        self._writer.writePop(SEGMENT_TEMP, 0)
        self._eatObligatory([T_SYMBOL], [';'])
        return
    
    def _compileReturnStatement(self):
        if self._eatExpected([T_SYMBOL], [';']):
            self._writer.writePush(SEGMENT_CONST, 0)
        else:
            self._compileExpression()
            self._eatObligatory([T_SYMBOL], [';'])
        self._writer.writeReturn()
        return

    def _compileExpression(self):
        self._compileTerm()
        if self._eatExpected([T_SYMBOL], ['+', '-', '*', '/', '&', '|', '<', '>', '=']):
            operator = self._currentToken['value']
            self._compileTerm()
            self._writer.writeArithmetic(VM_COMMAND_BY_JACK_OPERATOR[operator])
        return

    def _compileTerm(self):
        requiredTypes = [T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD, T_IDENTIFIER, T_SYMBOL]
        requiredValues = [K_TRUE, K_FALSE, K_NULL, K_THIS, '(', '-', '~']
        self._eatObligatory(requiredTypes, requiredValues)
        tokenType = self._currentToken['type']
        
        if tokenType == T_INTEGER_CONSTANT:
            integer = self._currentToken['value']
            self._writer.writePush(SEGMENT_CONST, integer)

        elif tokenType == T_STRING_CONSTANT:
            stringConst = self._currentToken['value'].replace('"', '')
            self._writer.writePush(SEGMENT_CONST, len(stringConst))
            self._writer.writeCall('String.new', 1)
            for char in stringConst:
                self._writer.writePush(SEGMENT_CONST, ord(char))
                self._writer.writeCall('String.appendChar', 2)
        
        elif tokenType == T_KEYWORD:
            constant = self._currentToken['value']
            if constant == K_FALSE or constant == K_NULL:
                self._writer.writePush(SEGMENT_CONST, 0)
            elif constant == K_TRUE:
                self._writer.writePush(SEGMENT_CONST, 1)
                self._writer.writeArithmetic('neg')
            else:
                self._writer.writePush(SEGMENT_POINTER, 0)

        elif tokenType == T_SYMBOL:
            symbol = self._currentToken['value']
            if symbol == '(':
                self._compileExpression()
                self._eatObligatory([T_SYMBOL], [')'])
            else:
                unaryOperation = 'neg' if symbol == '-' else 'not'
                self._compileTerm()
                self._writer.writeArithmetic(unaryOperation)

        elif tokenType == T_IDENTIFIER:
            name = self._currentToken['value']
            if self._eatExpected([T_SYMBOL], ['[', '.', '(']):
                symbol = self._currentToken['value']

                if symbol == '[':
                    self._compileArrayPosition(name)
                    self._writer.writePop(SEGMENT_POINTER, 1)
                    self._writer.writePush(SEGMENT_THAT, 0)
                else:
                    self._preserveCurrentToken = True
                    self._compileSubroutineCall(name)
            else:
                segment, index = self._searchVariableByName(name)
                self._writer.writePush(segment, index)
        return

    def _compileExpressionList(self):
        nArgs = 0
        if not self._eatExpected([T_SYMBOL], [')']):
            self._compileExpression()
            nArgs += 1
            while self._eatExpected([T_SYMBOL], [',']):
                self._compileExpression()
                nArgs += 1
        self._preserveCurrentToken = True
        return nArgs

    #aux compile functions
    def _compileTypedVarDeclaration(self):
        self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN])
        varType = self._currentToken['value']
        self._eatObligatory([T_IDENTIFIER])
        name = self._currentToken['value']
        return varType, name

    def _compileStatementByKeyword(self):
        COMPILE_FUNCTION_BY_KEYWORD = {
            K_LET : self._compileLetStatement,
            K_IF : self._compileIfStatement,
            K_WHILE : self._compileWhileStatement,
            K_DO: self._compileDoStatement,
            K_RETURN : self._compileReturnStatement
        }

        keyword = self._currentToken['value']
        COMPILE_FUNCTION_BY_KEYWORD[keyword]()
        return

    def _compileSubroutineCall(self, name = None):
        if name is None:
            self._eatObligatory([T_IDENTIFIER])
            name = self._currentToken['value']
        
        nArgs = 0
        if self._eatExpected([T_SYMBOL], ['.']):
            self._eatObligatory([T_IDENTIFIER])
            funcName = self._currentToken["value"]
            varInfo = self._searchVariableByName(name)
            if varInfo is not None:
                segment, index = varInfo
                self._writer.writePush(segment, index)
                nArgs += 1
            else: 
                funcName = f'{name}.{funcName}'
        else:
            funcName = name
                
        self._eatObligatory([T_SYMBOL], ['('])
        nArgs += self._compileExpressionList()
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeCall(funcName, nArgs)
        return

    def _compileConstructorCode(self):
        nArgs = self._subroutineVariables.getVarCountByKind(VAR_ARG)
        self._writer.writePush(SEGMENT_CONST, nArgs)
        self._writer.writeCall('Memory.alloc', 1)
        self._writer.writePop(SEGMENT_POINTER, 0)
        return

    def _compileMethodCode(self):
        self._writer.writePush(SEGMENT_ARG, 0)
        self._writer.writePop(SEGMENT_POINTER, 0)
        return

    def _compileArrayPosition(self, arrName):
        arrayBaseAddr = self._searchVariableByName(arrName)
        segment, index = arrayBaseAddr

        self._writer.writePush(segment, index)
        self._compileExpression()
        self._writer.writeArithmetic('add')

        self._eatObligatory([T_SYMBOL], [']'])
        return

    #aux functions
    def _eatObligatory(self, requiredTokenTypes, requiredTokenValues = []):
        if not self._preserveCurrentToken and not self._tokenizer.hasMoreTokens():
            self._writer.writeCompilationError('MORE TOKENS EXPECTED!')
            exit(1)
            
        if self._preserveCurrentToken:
            self._preserveCurrentToken = False
        else:
            self._currentToken = self._tokenizer.advance()
        
        if (self._currentToken['type'] not in requiredTokenTypes or 
            (self._currentToken['type'] in TOKEN_TYPES_WITH_EXPECTABLE_VALUES and 
            len(requiredTokenValues) > 0 and self._currentToken['value'] not in requiredTokenValues)):
                self._writer.writeCompilationError(f'SYNTAX ERROR!')
                self._writer.writeCompilationError(f'TOKEN GIVEN: {self._currentToken}')
                self._writer.writeCompilationError(f'EXPECTED: {requiredTokenValues} in {requiredTokenTypes}')
                exit(1)
        return

    def _eatExpected(self, expectedTokenTypes, expectedTokenValues = []):
        self._currentToken = self._currentToken if self._preserveCurrentToken else self._tokenizer.advance()
        ateExpected = (self._currentToken['type'] in expectedTokenTypes and 
                (self._currentToken['type'] not in TOKEN_TYPES_WITH_EXPECTABLE_VALUES or 
                len(expectedTokenValues) == 0 or self._currentToken['value'] in expectedTokenValues)) 
        self._preserveCurrentToken = not ateExpected
        return ateExpected

    def _searchVariableByName(self, name):
        subroutineVar = self._subroutineVariables.getByName(name)
        if subroutineVar is not None:
            return subroutineVar['segment'], subroutineVar['index']
        classVar = self._classVariables.getByName(name)
        if classVar is not None:
            return classVar['segment'], classVar['index']
        return None 
コード例 #13
0
class CompilationEngine:
    """
    Effects the actual compilation output. Gets its input from a JackTokenizer
    and emits its parsed structure into an output file/stream.
    """

    INDENT = "  "

    def __init__(self, jackFile, vmFile, DEBUG=False):
        """
        Creates a new compilation engine with the given input and output. The
        next routine called must be compileClass().
        """
        self.tokenizer = JackTokenizer(jackFile)  # , DEBUG=DEBUG)
        self.DEBUG = DEBUG

        # Indentation level
        self.indentLevel = 0

        # Counters for while loops and if statements
        self.whileCounter = self.ifCounter = 0

        # Initialize the symbol table
        self.symtab = SymbolTable(DEBUG=True)

        # Initialize the VM writer
        self.writer = VMWriter(vmFile, DEBUG=True)

    def compileClass(self):
        """
        Compiles a complete class.
        """
        self.emit(xml="<class>")

        # Alias self.tokenizer to make code more compact
        t = self.tokenizer

        # Verify that there is a token to read and advance to it
        if t.hasMoreTokens():
            # Advance to the next token
            t.advance()
        else:
            # If not, we're done.
            return

        self.eatAndEmit("keyword", ["class"])
        (_, self.thisClass) = self.eatAndEmit(
            "identifier", category="CLASS", state="DEFINE"
        )
        self.eatAndEmit("symbol", ["{"])

        # Expect zero or more classVarDecs. Count the fields defined.
        self.nFields = 0
        while t.tokenType() == "keyword" and t.keyWord() in ["static", "field"]:
            kw = t.keyWord()
            count = self.compileClassVarDec()

            # Count the fields to determine the size of the object
            if kw == "field":
                self.nFields += count

        # Expect zero or more subroutineDecs
        while t.tokenType() == "keyword" and t.keyWord() in [
            "constructor",
            "function",
            "method",
        ]:
            self.compileSubroutine()

        self.eatAndEmit("symbol", ["}"])
        self.emit(xml="</class>")

        # Should not be any more input
        if self.tokenizer.hasMoreTokens():
            raise SyntaxError(
                "Token after end of class: " + self.tokenizer.currentToken
            )

        # Close the VMWriter
        self.writer.close()

    def compileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        Should only be called if keyword static or keyword field is the current
        token.
        """
        self.emit(xml="<classVarDec>")

        # Need to save the variable kind for the symbol table
        token = self.eat("keyword", ["static", "field"])
        (_, varKind) = token
        varKind = varKind.upper()
        self.emit(token=token)

        # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a
        # className (identifier).
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
        else:
            (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE")

        self.eatAndEmit("identifier", category=varKind, varType=varType, state="DEFINE")
        count = 1

        # Expect an optional list of identifiers.
        while t.tokenType() == "symbol" and t.symbol() == ",":
            self.eatAndEmit("symbol", [","])
            self.eatAndEmit("identifier", category=varKind, state="DEFINE")
            count += 1

        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</classVarDec>")

        return count

    def compileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        Should only be called if the current token is one of 'constructor',
        'function', or 'method'.
        """
        self.emit(xml="<subroutineDec>")
        (_, kw) = self.eatAndEmit("keyword", ["constructor", "function", "method"])

        # Reset the subroutine symbol table
        self.symtab.startSubroutine()

        # If this is a method, seed the symbol table with "this" as argument 0
        if kw == "method":
            self.symtab.define("this", self.thisClass, "ARG")

        # Expect 'void' or a type: one of the keywords 'int', 'char', or
        # 'boolean', or a className (identifier).
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            self.eatAndEmit("keyword", ["void", "int", "char", "boolean"])
        else:
            self.eatAndEmit("identifier", category="CLASS", state="USE")

        (_, functionName) = self.eatAndEmit(
            "identifier", category="SUBROUTINE", state="DEFINE"
        )

        self.eatAndEmit("symbol", ["("])
        self.compileParameterList()
        self.eatAndEmit("symbol", [")"])
        self.emit(xml="<subroutineBody>")
        self.eatAndEmit("symbol", ["{"])

        # Expect varDec*. Count the number of local variables.
        nLocals = 0
        while t.tokenType() == "keyword" and t.keyWord() == "var":
            nLocals += self.compileVarDec()

        # Generate the VM code to start the function.
        self.writer.writeFunction("{}.{}".format(self.thisClass, functionName), nLocals)

        # If this subroutine is a constructor, allocate memory for the new object and set the base of the this segment
        if kw == "constructor":
            self.writer.writePush("CONST", self.nFields)
            self.writer.writeCall("Memory.alloc", 1)
            self.writer.writePop("POINTER", 0)

        # If this subroutine is a method, set the base of the this segment
        if kw == "method":
            self.writer.writePush("ARG", 0)
            self.writer.writePop("POINTER", 0)

        # Compile the code of the function
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])
        self.emit(xml="</subroutineBody>")
        self.emit(xml="</subroutineDec>")

    def compileParameterList(self):
        """
        Compiles a (possibly empty) parameter list, not including the
        enclosing '( )'.
        """
        self.emit(xml="<parameterList>")

        # Alias for tokenizer
        t = self.tokenizer

        # Get the current token type
        tType = t.tokenType()

        # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a
        # className (identifier).
        finished = False
        while not finished and tType in ["keyword", "identifier"]:
            if tType == "keyword":
                (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
            else:
                (_, varType) = self.eatAndEmit(
                    "identifier", category="CLASS", state="USE"
                )

            self.eatAndEmit(
                "identifier", category="ARG", state="DEFINE", varType=varType
            )

            # Look for a ',' symbol
            if t.tokenType() == "symbol" and t.symbol() == ",":
                # If found, eat it
                self.eatAndEmit("symbol", [","])

                # Get the next token type
                tType = t.tokenType()
            else:
                finished = True

        self.emit(xml="</parameterList>")

    def compileVarDec(self):
        """
        Compiles a var declaration.
        """
        self.emit(xml="<varDec>")
        self.eatAndEmit("keyword", ["var"])

        # Expect a type for the variable: one of the keywords 'int', 'char',
        # or 'boolean', or a className (identifier). Save the variable type.
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
        else:
            (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE")

        self.eatAndEmit("identifier", category="VAR", state="DEFINE", varType=varType)
        nVars = 1

        # Expect an optional list of identifiers.
        while t.tokenType() == "symbol" and t.symbol() == ",":
            self.eatAndEmit("symbol", [","])
            self.eatAndEmit(
                "identifier", category="VAR", state="DEFINE", varType=varType
            )
            nVars += 1

        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</varDec>")

        return nVars

    def compileStatements(self):
        """
        Compiles a sequence of statements, not including the enclosing
        '{ }'.
        """
        self.emit(xml="<statements>")

        t = self.tokenizer
        while t.tokenType() == "keyword":
            keyword = t.keyWord()
            if keyword == "do":
                self.compileDo()
            elif keyword == "let":
                self.compileLet()
            elif keyword == "while":
                self.compileWhile()
            elif keyword == "return":
                self.compileReturn()
            elif keyword == "if":
                self.compileIf()
            else:
                raise SyntaxError(
                    "Expected statement. Found {}.".format(t.currentToken)
                )

        self.emit(xml="</statements>")

    def compileDo(self):
        """
        Compiles a do statement.
        """
        self.emit(xml="<doStatement>")
        self.eatAndEmit("keyword", ["do"])

        # Eat the identifier. Can't emit until we know if this is a class or a subroutine.
        token = self.eat("identifier")
        (_, ident) = token

        # Check for a '.', which indicates a method call
        t = self.tokenizer
        if t.tokenType() == "symbol" and t.symbol() == ".":
            self.eatAndEmit("symbol", ["."])
            # Previous token was an object or a class. Check symbol table.
            objType = self.symtab.typeOf(ident)
            if objType:
                # ident is an object, so method is objType.method, and the object must be loaded into this as argument 0
                self.emit(token=token, category=self.symtab.kindOf(ident), state="USE")

                # subroutine starts with the class type
                subroutine = objType

                # Add an argument to the stack for "this"
                nArgs = 1
                kind = self.symtab.kindOf(ident)
                index = self.symtab.indexOf(ident)
                self.writer.writePush(kind, index)
            else:
                # ident is a class, so method is ident.method and there is no this
                self.emit(token=token, category="CLASS", state="USE")
                subroutine = ident
                nArgs = 0

            methodToken = self.eat("identifier")
            (_, method) = methodToken
            self.emit(token=methodToken, category="METHOD", state="USE")
            subroutine += "." + method
        else:
            # Bare subroutine calls are assumed to be methods of the current class
            self.emit(token=token, category="SUBROUTINE", state="USE")
            subroutine = self.thisClass + "." + ident

            # Add "this" to the stack
            nArgs = 1
            self.writer.writePush("POINTER", 0)

        self.eatAndEmit("symbol", ["("])
        nArgs += self.compileExpressionList()
        self.eatAndEmit("symbol", [")"])
        self.eatAndEmit("symbol", [";"])

        # Call the desired subroutine and consume the returned value
        self.writer.writeCall(subroutine, nArgs)
        self.writer.writePop("TEMP", 0)

        self.emit(xml="</doStatement>")

    def compileLet(self):
        """
        Compiles a let statement.
        """
        self.emit(xml="<letStatement>")
        self.eatAndEmit("keyword", ["let"])
        (_, varName) = self.eatAndEmit("identifier", category="LET", state="USE")

        # Look up the variable in the symbol table
        varKind = self.symtab.kindOf(varName)
        varIndex = self.symtab.indexOf(varName)

        # Check for array qualifier
        t = self.tokenizer
        arrayRef = False
        if t.tokenType() == "symbol" and t.symbol() == "[":
            # Compute the offset
            self.eatAndEmit("symbol", "[")
            self.compileExpression()
            self.eatAndEmit("symbol", ["]"])

            # Add the offset to the base. Leave the result on the stack.
            self.writer.writePush(varKind, varIndex)
            self.writer.writeArithmetic("+")
            arrayRef = True

        self.eatAndEmit("symbol", ["="])
        self.compileExpression()
        self.eatAndEmit("symbol", [";"])

        # Value to save is at the top of the stack.
        if not arrayRef:
            # Direct POP
            self.writer.writePop(varKind, varIndex)
        else:
            # Array reference. Save value temporarily while setting THAT.
            self.writer.writePop("TEMP", 0)
            self.writer.writePop("POINTER", 1)
            self.writer.writePush("TEMP", 0)
            self.writer.writePop("THAT", 0)

        self.emit(xml="</letStatement>")

    def compileWhile(self):
        """
        Compiles a while statement.
        """
        self.emit(xml="<whileStatement>")
        self.eatAndEmit("keyword", ["while"])
        
        whileInstance = self.whileCounter
        self.whileCounter += 1
        self.writer.writeLabel("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance))

        self.eatAndEmit("symbol", ["("])
        self.compileExpression()
        self.eatAndEmit("symbol", [")"])

        self.writer.writeArithmetic("U~")
        self.writer.writeIf("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance))

        self.eatAndEmit("symbol", ["{"])
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])

        self.writer.writeGoto("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance))
        self.writer.writeLabel("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance))

        self.emit(xml="</whileStatement>")

    def compileReturn(self):
        """
        Compiles a return statement.
        """
        self.emit(xml="<returnStatement>")
        self.eatAndEmit("keyword", ["return"])

        # If not a ';', expect an expression
        t = self.tokenizer
        if not (t.tokenType() == "symbol" and t.symbol() == ";"):
            # Expect an expression
            self.compileExpression()
        else:
            # void function, so force a 0 onto the stack to return
            self.writer.writePush("CONST", 0)

        self.writer.writeReturn()
        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</returnStatement>")

    def compileIf(self):
        """
        Compiles an if statement, possibly with a trailing else
        clause.
        """
        self.emit(xml="<ifStatement>")
        self.eatAndEmit("keyword", ["if"])
        self.eatAndEmit("symbol", ["("])
        self.compileExpression()
        self.eatAndEmit("symbol", [")"])

        self.writer.writeArithmetic("U~")
        ifInstance = self.ifCounter
        self.ifCounter += 1
        self.writer.writeIf("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))

        self.eatAndEmit("symbol", ["{"])
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])

        t = self.tokenizer
        if t.tokenType() == "keyword" and t.keyWord() == "else":
            self.writer.writeGoto("IF.{}.{}.EXIT".format(self.thisClass, ifInstance))
            self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))

            self.eatAndEmit("keyword", ["else"])
            self.eatAndEmit("symbol", ["{"])
            self.compileStatements()
            self.eatAndEmit("symbol", ["}"])
            self.writer.writeLabel("IF.{}.{}.EXIT".format(self.thisClass, ifInstance))
        else:
            self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))


        self.emit(xml="</ifStatement>")

    def compileExpression(self):
        """
        Compiles an expression.
        """
        self.emit(xml="<expression>")
        self.compileTerm()

        # Look for operator-term pairs
        t = self.tokenizer
        ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="]
        while t.tokenType() == "symbol" and t.symbol() in ops:
            (_, op) = self.eatAndEmit("symbol", ops)
            self.compileTerm()
            self.writer.writeArithmetic(op)

        self.emit(xml="</expression>")

    def compileTerm(self):
        """
        Compiles a term. This routine is faced with a slight difficulty when
        trying to decide between some of the alternative parsing rules.
        Specifically, if the current token is an identifier, the routine must
        distinguish between a variable, an array entry, and a subroutine call.
        A single lookahead token, which may be one of '[', '(', or '.',
        suffices to distinguish between the three possibilities. Any other
        token is not part of this term and should not be advanced over.
        """
        self.emit(xml="<term>")

        # Get the current token type
        t = self.tokenizer
        tType = t.tokenType()

        # Integer constant
        if tType == "integerConstant":
            (_, value) = self.eatAndEmit("integerConstant")
            self.writer.writePush("CONST", value)
        # String constant
        elif tType == "stringConstant":
            (_, value) = self.eatAndEmit("stringConstant")
            # Declare space for the string
            self.writer.writePush("CONST", len(value))
            self.writer.writeCall("String.new", 1)
            # Save the contents of the string
            for c in value:
                self.writer.writePush("CONST", ord(c))
                self.writer.writeCall("String.appendChar", 2)
        # Keyword constant
        elif tType == "keyword" and t.keyWord() in ["true", "false", "null", "this"]:
            (_, kw) = self.eatAndEmit("keyword", ["true", "false", "null", "this"])
            if kw in ["null", "false"]:
                # Map to 0
                self.writer.writePush("CONST", 0)
            elif kw == "true":
                # Map to -1
                self.writer.writePush("CONST", 1)
                self.writer.writeArithmetic("U-")  # NEG
            else:
                # this
                self.writer.writePush("POINTER", 0)
        # Identifier (varName, or array name, or subroutine call)
        elif tType == "identifier":
            (_, ident) = self.eatAndEmit("identifier", category="TERM", state="USE")
            if t.tokenType() == "symbol":
                symbol = t.symbol()
                if symbol == "[":
                    # Array reference
                    # ident is the array name
                    # Compute the offset
                    self.eatAndEmit("symbol", ["["])
                    self.compileExpression()
                    self.eatAndEmit("symbol", ["]"])
                    # Add base to offset
                    self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident))
                    self.writer.writeArithmetic("+")
                    # Update THAT and retrieve
                    self.writer.writePop("POINTER", 1)
                    self.writer.writePush("THAT", 0)
                elif symbol == "(":
                    # Subroutine call
                    # ident is the subroutine.
                    self.eatAndEmit("symbol", ["("])
                    nArgs = self.compileExpressionList()
                    self.eatAndEmit("symbol", [")"])
                    self.writer.writeCall(ident, nArgs)
                elif symbol == ".":
                    # Method call.
                    # ident is the class name (static method) or the object which will be argument 0 (this).

                    # Look up the object's type in the symbol table. If not found, then it is a class name and there is no object to be "this".
                    objType = self.symtab.typeOf(ident)
                    nArgs = 0
                    if objType is not None:
                        # Push this onto stack as argument 0
                        self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident))
                        nArgs = 1
                    else:
                        # ident is the class name, so use it
                        objType = ident

                    self.eatAndEmit("symbol", ["."])
                    (_, method) = self.eatAndEmit(
                        "identifier", category="SUBROUTINE", state="USE"
                    )
                    self.eatAndEmit("symbol", ["("])
                    nArgs += self.compileExpressionList()
                    self.eatAndEmit("symbol", [")"])
                    self.writer.writeCall(objType + "." + method, nArgs)
                else:
                    # Next token not a symbol, so ident is a simple variable identifier.
                    varKind = self.symtab.kindOf(ident)
                    varIndex = self.symtab.indexOf(ident)
                    self.writer.writePush(varKind, varIndex)
        # Sub-expression
        elif tType == "symbol" and t.symbol() == "(":
            self.eatAndEmit("symbol", ["("])
            self.compileExpression()
            self.eatAndEmit("symbol", [")"])
        # Unary op and term
        elif tType == "symbol" and t.symbol() in ["-", "~"]:
            (_, op) = self.eatAndEmit("symbol", ["-", "~"])
            self.compileTerm()
            # Mark as unary to get right version of '-'
            self.writer.writeArithmetic("U" + op)
        else:
            # Not a term
            raise SyntaxError("Expected term, found {}.".format(t.currentToken))

        self.emit(xml="</term>")

    def compileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        Returns the number of expressions compiled.
        """
        self.emit(xml="<expressionList>")

        # Get the initial token type
        t = self.tokenizer
        tType = t.tokenType()

        # Count the expressions in the list
        nExpressions = 0

        # Closing parenthesis ends the list
        while not (tType == "symbol" and t.symbol() == ")"):
            nExpressions += 1
            self.compileExpression()

            # Expect an optional ','
            if t.tokenType() == "symbol" and t.symbol() == ",":
                self.eatAndEmit("symbol", [","])

            # Update the tType
            tType = t.tokenType()

        self.emit(xml="</expressionList>")

        return nExpressions

    def eat(self, tokenType, tokenVals=None):
        """
        Consume the current token if it matches the expected type and value.
        """
        # Get the type and value of the current token
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            tVal = t.keyWord()
        elif tType == "symbol":
            tVal = t.symbol()
        elif tType == "identifier":
            tVal = t.identifier()
        elif tType == "integerConstant":
            tVal = t.intVal()
        else:  # tType == 'stringConstant'
            tVal = t.stringVal()

        # Verify that the type matches and the value is one of the values
        # expected.
        if not (tType == tokenType and (not tokenVals or tVal in tokenVals)):
            raise SyntaxError(
                "Expected {} {}. Found {}.".format(
                    tokenType, " or ".join(tokenVals or []), t.currentToken
                )
            )

        if t.hasMoreTokens():
            t.advance()

        # Return the actual token type and value
        return (tType, tVal)

    def emit(self, token=None, category=None, state=None, varType=None, xml=None):
        """
        Emit the provided XML or token as XML to the xmlFile.
        Will indent based on the current indentLevel.
        """
        # If XML code not provided, create it from the token type and value
        if not xml:
            (tokenType, tokenVal) = token

            # Handle symbol table additions/lookups
            index = None
            if state == "DEFINE" and category in ["STATIC", "FIELD", "ARG", "VAR"]:
                index = self.symtab.define(tokenVal, varType, category)

            if state == "USE" and category in ["LET", "TERM"]:
                category = self.symtab.kindOf(tokenVal)
                if category:
                    varType = self.symtab.typeOf(tokenVal)
                    index = self.symtab.indexOf(tokenVal)
                else:
                    category = "CLASS OR SUBROUTINE"

            # Define additional output fields
            fields = ""
            if category is not None:
                fields += " category={}".format(category)
            if state is not None:
                fields += " state={}".format(state)
            if varType is not None:
                fields += " varType={}".format(varType)
            if index is not None:
                fields += " index={}".format(index)

            xml = "<{0}{2}>{1}</{0}>".format(
                tokenType, self.xmlProtect(tokenVal), fields
            )

        else:
            # If the XML starts with '</', reduce the indent level
            if xml[:2] == "</":
                self.indentLevel = self.indentLevel - 1

        # Output the XML, indented to the current level
        output = "{}{}\n".format(self.INDENT * self.indentLevel, xml)
        self.writer.writeComment(output)
        if self.DEBUG:
            print(output, end="")

        # If the XML does not contain '</', increase the indent level
        if "</" not in xml:
            self.indentLevel = self.indentLevel + 1

    def eatAndEmit(
        self, tokenType, tokenVals=None, category=None, state=None, varType=None
    ):
        """
        Shorthand for common pattern of eat and emit. Returns the token eaten.
        """
        token = self.eat(tokenType, tokenVals)
        self.emit(token=token, category=category, state=state, varType=varType)

        # Return the token in case the caller wants it
        return token

    def xmlProtect(self, token):
        # Protect <, >, and & tokens from XML
        if token == "<":
            return "&lt;"
        elif token == ">":
            return "&gt;"
        elif token == "&":
            return "&amp;"
        else:
            return token
コード例 #14
0
class CompilationEngine():

    op = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

    def __init__(self, input, output):

        print('Opened ' + input + ' for compiling.')

        self.input = input

        # Instantiate different modules
        self.tokenizer = JackTokenizer(input)
        self.symbolTable = SymbolTable()
        self.vmWriter = VMWriter(output)

        # Unique number - used for labels
        self.uniqueNo = -1

        # Load up the first token
        self.tokenizer.advance()

        # Call compileClass to start the compilation
        self.compileClass()

    def subTag(self, _tag):
        print('Subtag encountered - fix this')
        raise NameError
        sys.exit()

    def subTagIdentifier(self, name, category, new, kind, index):
        print('Subtag encountered - fix this')
        raise NameError
        sys.exit()

    def getUniqueNo(self):
        self.uniqueNo += 1
        return str(self.uniqueNo)

    def compileClass(self):
        # Current token assumed to be the CLASS keyword

        # Keyword: class
        self.tokenizer.advance()

        # Identifier: class name
        # Classes are not entered into symboltable
        self.className = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # classVarDec or Subroutine
        while not self.tokenizer.rawToken(
        ) == '}':  # Access token directly to circumvent error checking
            if self.tokenizer.keyWord() in ['STATIC', 'FIELD']:
                self.compileClassVarDec()
            elif self.tokenizer.keyWord() in [
                    'CONSTRUCTOR', 'FUNCTION', 'METHOD'
            ]:
                self.compileSubroutine()

        # Symbol: }
        # Do not advance, we are done

        self.close()

    def close(self):
        self.vmWriter.close()
        self.tokenizer.close()

        print('Finished compiling ' + self.input + '.')

    def compileClassVarDec(self):
        # Current token assumed to be the STATIC or FIELD keyword

        # Keyword: STATIC or FIELD
        if self.tokenizer.keyWord() == 'FIELD':
            _kind = 'FIELD'
        elif self.tokenizer.keyWord() == 'STATIC':
            _kind = 'STATIC'
            raise NotImplementedError
        self.tokenizer.advance()

        # Keyword: type | identifier (if class)
        try:
            _type = self.tokenizer.keyWord()
        except TokenTypeError:
            _type = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Identifier: varName
        # Declare in symboltable
        self.symbolTable.define(self.tokenizer.identifier(), _type, _kind)
        self.tokenizer.advance()

        # Compile any other varDecs on the same line (of the same type)
        while self.tokenizer.symbol() == ',':
            self.tokenizer.advance()

            # Identifier: varName
            # Declare in symboltable
            self.symbolTable.define(self.tokenizer.identifier(), _type, _kind)
            self.tokenizer.advance()

        # Symbol: ;
        self.tokenizer.advance()

    def compileSubroutine(self):
        # Current token assumed to be keyword: constructor | function | method

        # Create new subroutine scoped symbol table
        self.symbolTable.startSubroutine()

        # Keyword: constructor | function | method
        subroutineKind = self.tokenizer.keyWord()
        self.tokenizer.advance()

        # Keyword: void | type | identifier (if class)
        self.tokenizer.advance()

        # Identifier: subroutineName
        subroutineName = self.tokenizer.identifier()
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # Program structure: ParameterList
        self.compileParameterList()

        # Symbol: )
        self.tokenizer.advance()

        ### START SUBROUTINE BODY ###

        # Symbol: {
        self.tokenizer.advance()

        # subroutineBody: varDecs
        while self.tokenizer.keyWord() == 'VAR':
            self.compileVarDec()

        # Write vm code function declaration
        # This is done 'late' so that we can get nLocals (noting that varDec() does not actually write vm code)
        self.vmWriter.writeFunction(self.className + '.' + subroutineName,
                                    self.symbolTable.varCount('LOCAL'))

        if subroutineKind == 'CONSTRUCTOR':
            # Alloc() required space (as determined by number of class variables)
            self.vmWriter.writePush('constant',
                                    self.symbolTable.varCount('FIELD'))
            self.vmWriter.writeCall('Memory.alloc', 1)

            # pop return value of alloc() to THIS (effectively pointing it to start of allocated object memory)
            self.vmWriter.writePop('pointer', 0)

        elif subroutineKind == 'METHOD':
            # Set 'this' pointer by pushing first argument and popping to pointer 0
            self.vmWriter.writePush('argument', 0)
            self.vmWriter.writePop('pointer', 0)

        # subroutineBody: Statements
        self.compileStatements()

        # Symbol: }
        self.tokenizer.advance()

        ### END SUBROUTINE BODY ###

    def compileParameterList(self):
        # assume pointer is on keyword: type of first parameter OR symbol: ( if no parameters

        if self.tokenizer.rawToken() is not ')':
            run_once = True
            while self.tokenizer.rawToken() == ',' or run_once == True:

                if run_once == False:
                    # Symbol: ,
                    self.tokenizer.advance()

                # Keyword: type
                _type = self.tokenizer.keyWord()
                self.tokenizer.advance()

                # Identifier: varName
                # Declare in symboltable
                self.symbolTable.define(self.tokenizer.identifier(), _type,
                                        'ARGUMENT')
                self.tokenizer.advance()

                run_once = False

    def compileVarDec(self):
        # assume pointer is on keyword: var

        # Keyword: var
        self.tokenizer.advance()

        # Keyword: type | identifier (if class)
        try:
            _type = self.tokenizer.keyWord()
        except TokenTypeError:
            _type = self.tokenizer.identifier()
        finally:
            self.tokenizer.advance()

        # Identifier: varName
        # Define in symboltable - note that no actual VM code is required here
        self.symbolTable.define(self.tokenizer.identifier(), _type, 'LOCAL')
        self.tokenizer.advance()

        # Further varNames
        while self.tokenizer.symbol() == ',':
            # Symbol: ,
            self.tokenizer.advance()

            # Identifier: varName
            self.symbolTable.define(self.tokenizer.identifier(), _type,
                                    'LOCAL')
            self.tokenizer.advance()

        # Symbol: ;
        self.tokenizer.advance()

    def compileStatements(self):
        # assume token is keyword: let | if | while | do | return

        # note: each of the nested compile methods call tokenizer.advance() at the end,
        # so no need to call it here

        while self.tokenizer.rawToken() is not '}':
            if self.tokenizer.keyWord() == 'LET':
                self.compileLet()
            elif self.tokenizer.keyWord() == 'IF':
                self.compileIf()
            elif self.tokenizer.keyWord() == 'WHILE':
                self.compileWhile()
            elif self.tokenizer.keyWord() == 'DO':
                self.compileDo()
            elif self.tokenizer.keyWord() == 'RETURN':
                self.compileReturn()
            else:
                raise TokenTypeError('Statement keyword',
                                     self.tokenizer.tokenType(),
                                     self.tokenizer.rawToken(),
                                     self.tokenizer.lineNo)

    def compileSubroutineCall(self):

        # Identifier: subroutineName or (className | varName)

        # Check symboltable to see if this is an instantiated class object
        # If so, we need to retrieve the object type to be able to call the method
        if self.symbolTable.typeOf(self.tokenizer.identifier()):
            # This is a declared variable, so assume instantiated class object
            targetObject = self.tokenizer.identifier()
            subroutineName = self.symbolTable.typeOf(targetObject)
        else:
            # Not declared, assume we are calling it on the class directly
            subroutineName = self.tokenizer.identifier()
            targetObject = None
        self.tokenizer.advance()

        thisArg = 0

        # Symbol: . (indicating format of className.subroutineName) or ( (indicating format of subroutineName)
        if self.tokenizer.symbol() == ".":
            subroutineName += self.tokenizer.symbol()
            self.tokenizer.advance()

            # Identifier: subroutineName
            subroutineName += self.tokenizer.identifier()

            # Push object pointer (if it exists) to top of stack so that it is available for methods
            if targetObject is not None and self.symbolTable.kindOf(
                    targetObject):
                if self.symbolTable.kindOf(targetObject) == 'field':
                    self.vmWriter.writePush(
                        'this', self.symbolTable.indexOf(targetObject))
                else:
                    self.vmWriter.writePush(
                        self.symbolTable.kindOf(targetObject),
                        self.symbolTable.indexOf(targetObject))
                thisArg = 1
            self.tokenizer.advance()

        elif self.tokenizer.symbol() == '(':
            # We are calling a method from a method within the same class, so push the class pointer to stack for first arg
            self.vmWriter.writePush('pointer', 0)
            thisArg = 1

            # Also append className to start so that we have a complete vm function name
            subroutineName = self.className + '.' + subroutineName

        # Symbol: (
        self.tokenizer.advance()

        nArgs = self.compileExpressionList()

        # Symbol: )
        self.tokenizer.advance()

        # Write function call
        self.vmWriter.writeCall(subroutineName, nArgs + thisArg)

    def compileDo(self):

        # Keyword: Do
        self.tokenizer.advance()

        self.compileSubroutineCall()

        # Symbol: ;
        self.tokenizer.advance()

    def compileLet(self):

        # Keyword: let
        self.tokenizer.advance()

        # identifier: varName
        varName = self.tokenizer.identifier()
        self.tokenizer.advance()

        # index if applicable
        if self.tokenizer.symbol() == '[':

            # Symbol: [
            self.tokenizer.advance()

            # Expression
            self.compileExpression()

            # Symbol: ]
            self.tokenizer.advance()

        # Symbol: =
        self.tokenizer.advance()

        # Expression
        self.compileExpression()

        # Symbol: ;
        self.tokenizer.advance()

        # Write VM code - pop from top of stack to variable
        if self.symbolTable.kindOf(varName) == 'field':
            self.vmWriter.writePop('this', self.symbolTable.indexOf(varName))
        else:
            self.vmWriter.writePop(self.symbolTable.kindOf(varName),
                                   self.symbolTable.indexOf(varName))

    def compileWhile(self):

        # Get a new unique number
        uniqueNo = self.getUniqueNo()

        # Keyword: while
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # startWhile label
        self.vmWriter.writeLabel('startWhile' + uniqueNo)

        # Expression
        self.compileExpression()

        # Jump if expression is FALSE
        # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value)
        self.vmWriter.writePush('constant', 1)
        self.vmWriter.writeArithmetic('ADD')
        self.vmWriter.writeIf('endWhile' + uniqueNo)

        # Symbol: )
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # Statements
        self.compileStatements()

        # Jump to startWhile
        self.vmWriter.writeGoto('startWhile' + uniqueNo)

        # endWhile label
        self.vmWriter.writeLabel('endWhile' + uniqueNo)

        # Symbol: }
        self.tokenizer.advance()

    def compileReturn(self):

        # Keyword: return
        self.tokenizer.advance()

        # Symbol: ; or expression then ;
        if self.tokenizer.rawToken() is not ';':
            self.compileExpression()
        else:
            # No return value - push constant 0
            self.vmWriter.writePush('constant', 0)

        self.tokenizer.advance()

        # Write return
        self.vmWriter.writeReturn()

    def compileIf(self):

        # Get new unique no
        uniqueNo = self.getUniqueNo()

        # Keyword: if
        self.tokenizer.advance()

        # Symbol: (
        self.tokenizer.advance()

        # Expression
        self.compileExpression()

        # Jump if expression is FALSE
        # (Pushing constant 1 and adding has the effect of inverting the truthiness of the test value)
        self.vmWriter.writePush('constant', 1)
        self.vmWriter.writeArithmetic('ADD')
        self.vmWriter.writeIf('startElse' + uniqueNo)

        # Symbol: )
        self.tokenizer.advance()

        # Symbol: {
        self.tokenizer.advance()

        # Statements
        self.compileStatements()

        # Symbol: }
        self.tokenizer.advance()

        self.vmWriter.writeGoto('endIf' + uniqueNo)

        self.vmWriter.writeLabel('startElse' + uniqueNo)

        try:
            if self.tokenizer.keyWord() == 'ELSE':

                # keyword: else
                self.tokenizer.advance()

                # symbol: {
                self.tokenizer.advance()

                # Compile statements
                self.compileStatements()

                # symbol: }
                self.tokenizer.advance()
        except TokenTypeError:
            pass

        self.vmWriter.writeLabel('endIf' + uniqueNo)

    def compileExpression(self):
        # Term
        self.compileTerm()

        while self.tokenizer.symbol() in CompilationEngine.op:

            # Symbol: op
            # Save for writing later
            op = self.tokenizer.symbol()
            self.tokenizer.advance()

            # Term
            self.compileTerm()

            # Write op
            if op == '+':
                self.vmWriter.writeArithmetic('ADD')
            elif op == '-':
                self.vmWriter.writeArithmetic('SUB')
            elif op == '=':
                self.vmWriter.writeArithmetic('EQ')
            elif op == '>':
                self.vmWriter.writeArithmetic('GT')
            elif op == '<':
                self.vmWriter.writeArithmetic('LT')
            elif op == '&':
                self.vmWriter.writeArithmetic('AND')
            elif op == '|':
                self.vmWriter.writeArithmetic('OR')
            elif op == '~':
                self.vmWriter.writeArithmetic('NOT')
            elif op == '*':
                self.vmWriter.writeCall('Math.multiply', 2)

    def compileTerm(self):

        tokenType = self.tokenizer.tokenType()

        if tokenType == 'INT_CONST':

            # Integer constant
            self.vmWriter.writePush('constant', self.tokenizer.intVal())
            self.tokenizer.advance()

        elif tokenType == 'STRING_CONST':

            # String constant
            string = self.tokenizer.stringVal()

            # Create empty string object of required length and store it in pointer 1 (that)
            length = len(string)
            self.vmWriter.writePush('constant', length)
            self.vmWriter.writeCall('String.new', 1)
            self.vmWriter.writePop('pointer', 1)

            # Append each char in the string
            for i in range(0, length - 1):
                ascii_value = ord(string[i])
                self.vmWriter.writePush('pointer', 1)
                self.vmWriter.writePush('constant', ascii_value)
                self.vmWriter.writeCall('String.appendChar', 2)

            # No need to return the pointer because it is already stored in pointer 1

            # Next token
            self.tokenizer.advance()

        elif tokenType == 'KEYWORD':

            # Keyword constant (true | false | null | this)     ########## NB: LET LOOP = TRUE; IS NOT PUSHING -1 TO STACK
            if self.tokenizer.keyWord() == 'TRUE':
                self.vmWriter.writePush('constant', 1)
                self.vmWriter.writeArithmetic('NEG')

            elif self.tokenizer.keyWord() == 'FALSE' or self.tokenizer.keyWord(
            ) == 'NULL':
                self.vmWriter.writePush('constant', 0)

            elif self.tokenizer.keyWord() == 'THIS':
                self.vmWriter.writePush('pointer', 0)

            self.tokenizer.advance()

        elif tokenType == 'IDENTIFIER':
            # varName | varName[expression] | subroutineCall

            # Symbol: [ | ( | .
            if self.tokenizer.lookAhead() == '[':
                # varName[expression]

                # Identifier: varName
                self.subTagIdentifier(
                    self.tokenizer.identifier(), 'VAR', 'FALSE',
                    self.symbolTable.kindOf(self.tokenizer.identifier()),
                    self.symbolTable.indexOf(self.tokenizer.identifier()))
                self.tokenizer.advance()

                # Symbol: [
                self.subTag('symbol')
                self.tokenizer.advance()

                # Expression
                self.compileExpression()

                # Symbol: ]
                self.subTag('symbol')
                self.tokenizer.advance()

            elif self.tokenizer.lookAhead() == '(' or self.tokenizer.lookAhead(
            ) == '.':
                # subroutine call
                self.compileSubroutineCall()

            else:
                # Identifier: varName
                # Retrieve segment and index from symboltable and push to top of stack
                varName = self.tokenizer.identifier()
                if self.symbolTable.kindOf(varName) == 'field':
                    self.vmWriter.writePush('this',
                                            self.symbolTable.indexOf(varName))
                else:
                    self.vmWriter.writePush(self.symbolTable.kindOf(varName),
                                            self.symbolTable.indexOf(varName))
                self.tokenizer.advance()

        elif self.tokenizer.symbol() == '(':

            # ( Expression )

            # Symbol: (
            self.tokenizer.advance()

            # Expression
            self.compileExpression()

            # Symbol: )
            self.tokenizer.advance()

        elif self.tokenizer.symbol() in ['-', '~']:

            # Symbol: unaryop
            op = self.tokenizer.symbol()
            self.tokenizer.advance()

            # Term
            self.compileTerm()

            # Write op
            if op == '-':
                self.vmWriter.writeArithmetic('NEG')
            elif op == '~':
                self.vmWriter.writeArithmetic('NOT')

    def compileExpressionList(self):

        nArgs = 0

        # Expression list may be empty, check
        if self.tokenizer.rawToken() is not ')':

            # Expression
            self.compileExpression()
            nArgs += 1

            # Further comma delimited expressions
            while self.tokenizer.rawToken() == ',':
                # Symbol: ,
                self.tokenizer.advance()

                # Expression
                self.compileExpression()
                nArgs += 1

        return nArgs
コード例 #15
0
class CompilationEngine:
    all_operators = {
        "+": "add",
        "-": "sub",
        "/": "div",
        "*": "mul",
        "&amp;": "and",
        "|": "or",
        "&gt;": "gt",
        "&lt;": "lt",
        "=": "eq"
    }

    def __init__(self, tokens, out_file):
        """
        initializing a new compile engine object
        :param tokens: the list of tokens created by the tokenizer
        :param out_file: the output file.
        """
        self.__tokens = tokens
        self.__file = out_file
        self.__i = 0
        self.__class_symbol = SymbolTable()
        self.__subroutine_symbol = SymbolTable()
        self.__cur_token = ()
        self.__class_name = ""
        self.__writer = VMWriter(out_file)
        self.__label_count = 0
        self.compile_class()
        self.__writer.close()

    def eat(self):
        """
        compiling a single token and move to the next one
        """
        self.__cur_token = self.__tokens[self.__i]
        self.__i += 1

    def get_token(self):
        return self.__cur_token[1]

    def peek(self):
        """
        checking the current token without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[1]

    def peek_type(self):
        """
        checking the current token type without compiling
        :return: the token type
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[0]

    def peek_ll2(self):
        """
        checking two tokens ahead without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i + 1]
        return ret_val[1]

    def compile_while_stat(self):  # i points to while
        """
        compiling while statement
        """
        self.eat()
        self.eat()
        label_true = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_label(label_true)
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        self.__writer.write_if(label_continue)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_true)
        self.eat()
        self.__writer.write_label(label_continue)

    def compile_return_stat(self):  # i points to return
        """
        compiling return statement
        """
        self.eat()
        if not self.peek() == ";":
            self.compile_expression()
        else:
            self.__writer.write_push("constant", 0)
        self.__writer.write_return()
        self.eat()

    def compile_do_stat(self):
        """
        compiling do statement
        """
        self.eat()
        self.compile_subroutine_call()
        self.__writer.write_pop("temp", 0)
        self.eat()

    def compile_if_stat(self):
        """
        compiling if statement
        """
        self.eat()
        self.eat()
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        label_false = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_if(label_false)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_continue)
        self.eat()
        self.__writer.write_label(label_false)
        if self.peek() == "else":
            self.eat()
            self.eat()
            self.compile_statements()
            self.eat()
        self.__writer.write_label(label_continue)

    def compile_class_var_dec(self):
        """
        compiling class variable declaration
        """
        self.eat()
        kind = self.get_token()
        if kind == "var":
            kind = SymbolTable.VAR
        self.var_dec_helper(kind, self.__class_symbol)

    def compile_var_dec(self):
        """
        compiling variable declaration
        """
        self.eat()
        self.var_dec_helper(SymbolTable.VAR, self.__subroutine_symbol)

    def var_dec_helper(self, kind, symbol_table):

        self.eat()
        type = self.get_token()
        self.eat()
        name = self.get_token()
        symbol_table.add(name, type, kind)
        cur_stat = self.peek()
        while cur_stat != ";":
            self.eat()
            self.eat()
            name = self.get_token()
            symbol_table.add(name, type, kind)
            cur_stat = self.peek()
        self.eat()

    def compile_subroutine_body(self, func_name, func_type):
        """
        compiling subroutine body
        """
        self.eat()
        cur_stat = self.peek()
        while cur_stat == "var":
            self.compile_var_dec()
            cur_stat = self.peek()
        self.__writer.write_function(
            func_name, self.__subroutine_symbol.var_count(SymbolTable.VAR))
        self.__subroutine_symbol.add("this", self.__class_name, "pointer")
        if func_type == "method":
            self.__writer.write_push(SymbolTable.ARG, 0)
            self.__writer.write_pop("pointer", 0)

        elif func_type == "constructor":
            self.__writer.write_push(
                "constant", self.__class_symbol.var_count(SymbolTable.FIELD))
            self.__writer.write_call("Memory.alloc", 1)
            self.__writer.write_pop("pointer", 0)
        self.compile_statements()
        self.eat()

    def compile_parameter_list(self):
        """
        compiling parameters list
        """
        cur_stat = self.peek()
        if cur_stat != ")":
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

    def compile_class(self):
        """
        compiling class
        """
        self.eat()
        self.eat()
        self.__class_name = self.get_token()
        self.eat()
        cur_stat = self.peek()

        while cur_stat == "static" or cur_stat == "field":
            self.compile_class_var_dec()
            cur_stat = self.peek()

        while cur_stat != "}":
            self.compile_subroutine_dec()
            cur_stat = self.peek()
        self.eat()

    def compile_expression(self):
        """
        compiling expression
        """
        self.compile_term()
        cur_stat = self.peek()
        while cur_stat in CompilationEngine.all_operators.keys():
            self.eat()
            self.compile_term()
            self.compile_operation(cur_stat)
            cur_stat = self.peek()

    def compile_operation(self, op):
        """
        compiling operation
        :param op: current op
        """
        if op == "*":
            self.__writer.write_call("Math.multiply", 2)

        elif op == "/":
            self.__writer.write_call("Math.divide", 2)

        else:
            self.__writer.write_arithmetic(CompilationEngine.all_operators[op])

    def compile_statements(self):
        """
        compiling statements
        """
        while self.compile_statement():
            continue

    def compile_subroutine_call(self):
        """
        compiling subroutine call
        """
        self.eat()
        name = self.get_token()
        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.__writer.write_push("pointer", 0)
            args = self.compile_expression_list()
            self.eat()
            self.__writer.write_call(self.__class_name + "." + name, args + 1)
        else:
            self.eat()
            val = self.find(name)
            self.eat()
            var_name = self.get_token()
            self.eat()
            if not val:
                args = 0
            else:
                self.__writer.push_val(val)
                name = val[0]
                args = 1

            args += self.compile_expression_list()
            self.__writer.write_call(name + "." + var_name, args)
            self.eat()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        args = 0
        cur_stat = self.peek()
        if cur_stat != ")":
            self.compile_expression()
            args += 1
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            args += 1
            self.compile_expression()
            cur_stat = self.peek()

        return args

    def compile_statement(self):
        """
        compiling statement
        """
        cur_stat = self.peek()
        if cur_stat == "if":
            self.compile_if_stat()
        elif cur_stat == "while":
            self.compile_while_stat()
        elif cur_stat == "do":
            self.compile_do_stat()
        elif cur_stat == "return":
            self.compile_return_stat()
        elif cur_stat == "let":
            self.compile_let_stat()
        else:
            return 0  # when there is no more statements to compile
        return 1

    def compile_let_stat(self):
        """
        compiling let statement
        """
        self.eat()
        self.eat()
        name = self.get_token()
        data = self.find(name)
        kind = data[1]
        ind = data[2]

        if kind == "field":
            kind = "this"

        cur_stat = self.peek()
        if cur_stat == "[":
            self.compile_array(kind, ind)
        else:
            self.eat()
            self.compile_expression()
            self.__writer.write_pop(kind, ind)
        self.eat()  # eat ;

    def compile_subroutine_dec(self):
        """
        compiling subroutine declaration
        """
        self.eat()
        func_type = self.get_token()
        self.eat()
        self.eat()
        func_name = self.__class_name + "." + self.get_token()
        self.eat()
        if func_type == "method":
            self.__subroutine_symbol.add("this", self.__class_name,
                                         SymbolTable.ARG)
        self.compile_parameter_list()
        self.eat()
        self.compile_subroutine_body(func_name, func_type)
        self.__subroutine_symbol = SymbolTable()

    def compile_term(self):
        """
        compiling term
        """
        cur_stat = self.peek_type()
        if cur_stat == JackTokenizer.INT_CONST:
            self.__writer.write_push("constant", self.peek())
            self.eat()
            return

        if cur_stat == JackTokenizer.KEYWORD:
            if self.peek() == "null" or self.peek() == "false":
                self.__writer.write_push("constant", 0)

            elif self.peek() == "true":
                self.__writer.write_push("constant", 0)
                self.__writer.write_arithmetic("not")

            elif self.peek() == "this":
                self.__writer.write_push("pointer", 0)

            self.eat()
            return

        if cur_stat == JackTokenizer.STR_CONST:
            string1 = self.peek().replace('\t', "\\t")
            string2 = string1.replace('\n', "\\n")
            string3 = string2.replace('\r', "\\r")
            string = string3.replace('\b', "\\b")
            self.__writer.write_push("constant", len(string))
            self.__writer.write_call("String.new", 1)
            for ch in string:
                self.__writer.write_push("constant", ord(ch))
                self.__writer.write_call("String.appendChar", 2)
            self.eat()
            return

        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.compile_expression()
            self.eat()
            return

        if cur_stat == "-":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("neg")
            return

        if cur_stat == "~":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("not")
            return

        cur_stat = self.peek_ll2()
        if cur_stat == "[":
            self.eat()
            name = self.get_token()
            self.__writer.push_val(self.find(name))
            self.eat()
            self.compile_expression()
            self.__writer.write_arithmetic("add")
            self.__writer.write_pop("pointer", 1)
            self.__writer.write_push("that", 0)
            self.eat()
            return

        if cur_stat == "." or cur_stat == "(":
            self.compile_subroutine_call()
            return

        self.eat()  # varName
        name = self.get_token()
        self.__writer.push_val(self.find(name))
        return

    def find(self, name):
        """
        finding a variable name in symbol tables
        """
        val = self.__subroutine_symbol.get_data(name)
        if not val:
            val = self.__class_symbol.get_data(name)
        elif not val:
            return False
        return val

    def compile_array(self, kind, index):
        """
        compiling array assignment
        :param kind: var kind
        :param index: var index
        """
        self.eat()
        self.compile_expression()
        self.eat()
        self.__writer.write_push(kind, index)
        self.__writer.write_arithmetic("add")
        self.eat()
        self.compile_expression()
        self.__writer.write_pop("temp", 0)
        self.__writer.write_pop("pointer", 1)
        self.__writer.write_push("temp", 0)
        self.__writer.write_pop("that", 0)
コード例 #16
0
ファイル: JackCompiler.py プロジェクト: dav23r/nand2tetris
#!/usr/bin/env python3

import os
import sys

from JackTokenizer import JackTokenizer
from CompilationEngine import CompilationEngine
from VMWriter import VMWriter

if __name__ == '__main__':
    folderName = sys.argv[1]
    print("Got folder: {0}".format(folderName))

    for fileName in os.listdir(folderName):
        if fileName.endswith('.jack'):
            baseName = fileName[:-5]
            path = folderName + '/'
            print('Translating to vm code: ' + fileName)

            tokenizer = JackTokenizer(open(folderName + '/' + fileName, 'r'))
            vmWriter = VMWriter(open(folderName + '/' + baseName + '.vm', 'w'))

            engine = CompilationEngine(tokenizer, vmWriter)
            engine.CompileClass()
            vmWriter.close()
コード例 #17
0
class CompilationEngine():
    def __init__(self, tokens, fp_out):
        self.tokens = tokens
        self.num = 0  #current node in tree
        self.total = len(tokens)
        self.crnt_elem = self.tokens[0]
        self.symbols = SymbolTable()#create symbol table(s)
        #possibly should call compileclass from outside
        self.VM = VMWriter(fp_out)
        self.labels = {} #to create unique labels

    def compileClass(self):
        """Class Grammar:
        class className { classVarDec* subroutineDec* }"""

        self.check_next(KEYWORD, "class")
        class_name = self.get()  #classname tkn
        self.className = class_name.text
        self.check_next(SYMBOL, "{")
        while self.check_texts(KEYWORD, [STATIC, FIELD]):
            self.compileClassVarDec()
        while self.check_texts(KEYWORD, ["constructor", "function", "method"]):
            self.compileSubroutineDec()
        self.check_next(SYMBOL, "}")
        self.VM.close()

    def compileClassVarDec(self):
        """ClassVarDec Grammar:
        (static|field) type VarName ("," VarName)* ";" """
        kind_t, type_t, name_t= self.get_mult(3)
        if kind_t.text == FIELD:
            kind_t.text = THIS
        self.symbols.define(name_t.text, type_t.text, kind_t.text)
        while self.check_texts(SYMBOL, ",", True): #another VarName
            name_t = self.get() #VarName
            self.symbols.define(name_t.text, type_t.text, kind_t.text)
        self.check_next(SYMBOL, ";") #end-VarDec


    def compileSubroutineDec(self):
        """SubroutineDec Grammar:
        (constructor|function|method) ("void"| type)
        subroutineName "(" ParameterList ")" SubroutineBody """
        self.symbols.startSubroutine() #wipe previous sub_vars from Symbol Table
        sub_type, ret_type, sub_name, _ = self.get_mult(4)
        self.subType = sub_type.text #(constructor|function|method)
        self.name = "{}.{}".format(self.className, sub_name.text)
        if self.subType == "method":
            self.symbols.define(THIS, self.className, ARG)
        self.compileParameterList()
        self.check_next(SYMBOL, ")")
        self.compileSubroutineBody()
        self.symbols.startSubroutine() #wipe vars from Symbol Table

    def compileParameterList(self):
        """ParameterList Grammar:
        (type varName) ("," type varName)
         """
        if not self.check_texts(SYMBOL, ")"):  #if parameter present
            type, name = self.get_mult(2)
            self.symbols.define(name.text, type.text, ARG)
        while self.check_texts(SYMBOL, ",", True): #another VarName
            type, name = self.get_mult(2)
            self.symbols.define(name.text, type.text, ARG)

    def compileSubroutineBody(self):
        """subroutineBody Grammar:
        "{" varDec* statements "}"""
        self.check_next(SYMBOL, "{")
        while self.check_texts(KEYWORD, "var", True): #check for VarDec
            self.compileVarDec()
        n_vars = self.symbols.varCount(LCL)
        self.VM.writeFunction(self.name, n_vars)
        self.updatePointer()
        self.compileStatements()
        self.check_next(SYMBOL, "}")

    def compileVarDec(self):
        """ Grammar:
        "var" type varName ("," varName)* ";" """
        type, name = self.get_mult(2)
        self.symbols.define(name.text, type.text, LCL)
        while self.check_texts(SYMBOL, ",", True): #another VarName
            name = self.get()
            self.symbols.define(name.text, type.text, LCL)
        self.check_next(SYMBOL, ";")

    def updatePointer(self):
        if self.subType == "method":
            self.VM.writePush(ARG, 0)
            self.VM.writePop(POINTER, 0) #store self in this
        elif self.subType == "constructor":
            n_args = self.symbols.varCount(THIS)
            self.VM.writePush(CONSTANT, n_args)
            self.VM.writeCall("Memory.alloc", 1)
            self.VM.writePop(POINTER, 0) #store object in this

    def compileStatements(self):
        """ Grammar:
        let|if|while|do|return
        """
        while self.check_texts(KEYWORD, statementTypes):
            type = self.get().text
            if type == "let":           self.compileLet()
            elif type == "if":          self.compileIf()
            elif type == "while":       self.compileWhile()
            elif type == "do":          self.compileDo()        #restore parent node
            elif type == "return":      self.compileReturn()
            else:                       self.fault()

    def compileLet(self):
        """ Grammar: e.g: let x = 4
        "let" varName ("[" expression "]")?
        "=" expression ";" """
        varName = self.get().text
        type, kind, index = self.symbols.get(varName)
        isArray = False
        if self.check_texts(SYMBOL, "[", True): #Array
            self.compileExpression()
            self.VM.writePush(kind, index)
            self.VM.writeArithmetic("add")  #ram[index] to be accessed
            isArray = True
            self.check_next(SYMBOL, "]")
        self.check_next(SYMBOL, "=")
        self.compileExpression()
        if isArray:
            self.VM.writePop(TEMP, 0)
            self.VM.writePop(POINTER, 1)    #ram[index] to be accessed
            self.VM.writePush(TEMP, 0)
            self.VM.writePop(THAT, 0)
        else:
            self.VM.writePop(kind, index)
        self.check_next(SYMBOL, ";")

    def compileIf(self):
        """ Grammar:
        "if" "(" expression ")" "{" statements"}"
        ("else"  "{" statements"}" )? """
        label_1 = "IF_FALSE.{}".format(self.get_label(1))
        label_2 = "IF_END.{}".format(self.get_label(2))
        self.check_next(SYMBOL, "(")
        self.compileExpression()
        self.VM.writeArithmetic("not")
        self.VM.writeIf(label_1)
        self.get_mult(2)            #")" "{"
        self.compileStatements()
        self.get()
        self.VM.writeGoto(label_2)                 #"}"
        self.VM.writeLabel(label_1)
        if self.check_texts(KEYWORD, "else", True): #nested expression
            self.check_next(SYMBOL, "{")
            self.compileStatements()
            self.check_next(SYMBOL, "}")
        self.VM.writeLabel(label_2)

    def compileWhile(self):
        """ Grammar:
        "while" "(" expression ")" "{" statements"}" """
        label_3 = "WHILE_END.{}".format(self.get_label(3))
        label_4 = "WHILE_LOOP.{}".format(self.get_label(4))
        self.check_next(SYMBOL, "(")
        self.VM.writeLabel(label_4)
        self.compileExpression()
        self.VM.writeArithmetic("not")
        self.VM.writeIf(label_3)
        self.get_mult(2)
        self.compileStatements()
        self.VM.writeGoto(label_4)
        self.VM.writeLabel(label_3)
        self.check_next(SYMBOL, "}")


    def compileDo(self):
        """ Grammar:
        "do" subroutineCall ";" """
        self.compileTerm()
        self.VM.writePop(TEMP, 0) #i.e. do statments only have side-efects
        self.check_next(SYMBOL, ";")

    def compileReturn(self):
        """ Grammar:
        "return" expression? ";" """
        if not self.check_texts(SYMBOL, ";"):
            self.compileExpression()
        else: #no expresison to return
            self.VM.writePush(CONSTANT, 0)
        self.VM.writeReturn()
        self.check_next(SYMBOL, ";")


    def compileExpression(self):
        """ Grammar:
        term (op term)* """
        self.compileTerm()
        if self.check_texts(SYMBOL, operators): #op present
            operator = self.get().text
            self.compileTerm()
            op_vm = operators[operator]
            if op_vm:
                self.VM.writeArithmetic(op_vm)
            elif operator == "*":
                self.VM.writeCall("Math.multiply", 2)
            elif operator == "/":
                self.VM.writeCall("Math.divide", 2)

    def compileTerm(self):
        """Grammar:
        integerConstant | stringConstant | keywordConstant |
        varName | varname "[" expression "]" |
        subroutineCall | "(" expression ")" |
        unaryOp term

        subroutineCall Grammar:
        subroutineName "(" expressionList ")" |
        (className|varName) "." subroutineName "(" expressionList ")"
        """
        tkn = self.get(False) #don't increment
        tag = tkn.tag
        #print("before: ", tkn.tag, tkn.text) #debug
        if tag == INT_CONST: #integerConstant
            int = self.get().text
            self.VM.writePush("constant", int)
        elif tag == STRING_CONST: #stringConstant
            string = self.get().text
            self.VM.writePush("constant", len(string))
            self.VM.writeCall("String.new", 1)
            for char in string:
                self.VM.writePush("constant", ord(char))
                self.VM.writeCall("String.appendChar", 2)
        elif self.check_texts(KEYWORD, keywordConstants): #keywordConstant
            keyword = self.get().text
            if keyword == "false" or keyword == "null":
                self.VM.writePush("constant", 0)
            elif keyword == "true":
                self.VM.writePush("constant", 1)
                self.VM.writeArithmetic("neg")
            elif keyword == "this":
                self.VM.writePush("pointer", 0)
        elif self.check_texts(SYMBOL, unaryOperators): #unaryOp
            un_op = self.get().text
            un_op_vm = unaryOperators[un_op]
            self.compileTerm()
            self.VM.writeArithmetic(un_op_vm)
        elif self.check_texts(SYMBOL, "(", True): # "(" expression ")"
            self.compileExpression()
            self.check_next(SYMBOL, ")")
        elif self.check_texts(IDENTIFIER):
            name = self.get().text
            if self.check_texts(SYMBOL, "[", True):
                #ARRAY: varname "[" expression "]"
                type, kind, index = self.symbols.get(name)
                self.compileExpression()
                self.VM.writePush(kind, index)
                self.VM.writeArithmetic("add")  #ram[index] to be accessed
                self.VM.writePop(POINTER, 1)
                self.VM.writePush(THAT, 0)
                self.check_next(SYMBOL, "]")
            elif self.check_texts(SYMBOL, "(", True):
                #FUNCTION/CONSTRUCTOR call: subroutineName "(" expressionList ")"
                nArgs = self.compileExpressionList()
                function_name = "{}.{}".format(self.className, name)
                self.VM.writePush(POINTER, 0)
                self.VM.writeCall(function_name, nArgs + 1)
                self.check_next(SYMBOL, ")")
            elif self.check_texts(SYMBOL, ".", True):
                #METHOD CALL: (className|varName) "." subroutineName"(" expressionList ")"
                nArgs = 0
                sub_name = self.get().text #subroutineName
                type, kind, index = self.symbols.get(name)
                if type: #a variable
                    self.VM.writePush(kind, index)
                    function_name = "{}.{}".format(type, sub_name)
                    nArgs += 1
                else: #a separate class
                    function_name = "{}.{}".format(name, sub_name)
                self.check_next(SYMBOL, "(")
                nArgs += self.compileExpressionList()
                self.check_next(SYMBOL, ")")
                self.VM.writeCall(function_name, nArgs)#call the method
            else: #variable
                type, kind, index = self.symbols.get(name)
                self.VM.writePush(kind, index)

        else:
            #print("after: ", tkn.tag, tkn.text)
            self.fault()

    def compileExpressionList(self):
        """Grammar:
        (expression ("," expression)* )?

        Returns number of expressions
        """
        count = 0
        if not self.check_texts(SYMBOL, ")"):
            self.compileExpression()
            count +=1
        while self.check_texts(SYMBOL, ",", True):
            self.compileExpression()
            count +=1
        return count

    def check_texts(self, tag, texts=None, increment=False):
        """ONLY INCREMENTS IF TRUE"""
        tkn = self.get(False)
        if tkn is not None:
            text = tkn.text
            if tkn.tag == tag: #texts could be array of strings or string
                if (not texts) or \
                    (type(texts) is str and text == texts) or \
                    ((type(texts) is list or (type(texts) is dict)) \
                    and text in texts):
                    if increment:
                        self.num +=1
                    return True
        #print("looking for: {} of type {}".format(texts, tag))
        #print("found {} of type {}\n".format(tkn.text, tkn.tag))
        return False

    def check_next(self, tag, texts=None, increment=True):
        """get next token and checks that it has correct text
        and tag.
        Set increment =false when you would like to check next
        value w/o updating current tkn. This is useful when
        you aren't sure what next routine is"""

        tkn = self.get(increment)
        text = tkn.text
        #print(tag, tkn.tag, tkn.text)
        if tkn.tag == tag: #texts could be array of strings or string
            if (not texts) or \
                (type(texts) is str and text == texts) or \
                (type(texts) is list and text in texts):
                return tkn
        else:
            print("Invalid program (or end of program)")
            print("problem with:", self.num, tkn.tag, tkn.text)
            self.quit()


    def get(self, increment = True):
        """returns next token"""
        if self.num < self.total:
            tkn = self.tokens[self.num]
            self.tkn = tkn
            #debug:
            #print(self.num, tkn.tag, tkn.text)
            if increment:
                self.num +=1
            return tkn
        else:
            return None

    def get_mult(self, n):
        tkns = []
        for i in range(n):
            tkns.append(self.get())
        return tkns

    def fault(self):
        """called if incorrect program provided
        make this error message more expressive"""
        print("Invalid program. Quitting...")
        #close any open files
        sys.exit(1)

    def quit(self):
        print("quiting...")
        sys.exit(1)

    def get_label(self, key):
        """Accesses value from dictionary.
        Creates entry if none exists"""
        try:
            val = self.labels[key]
            self.labels[key] = val + 1
        except KeyError:
            #create entry
            val = 0
            self.labels[key] = 1
        return val
コード例 #18
0
class CompilationEngine:
    def __init__(self, source):
        self.if_counter = 0
        self.while_counter = 0
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.symbols = SymbolTable()
        self.writer = VMWriter(source)
        self.arithmetic_op = {}
        self.init_op()
        self.root = Element(CLASS)
        self.class_name = ""
        self.compile_class(self.root)
        self.writer.close()

    def init_op(self):
        self.arithmetic_op = {
            '+': "add",
            '-': "sub",
            '*': "call Math.multiply 2",
            '/': "call Math.divide 2",
            '&': "and",
            '|': "or",
            '<': "lt",
            '>': "gt",
            '=': "eq"
        }

    def next(self):
        """
        Proceed to the next token.
        :return:
        """
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def compile_expression(self, caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        op_stack = []
        self.compile_term(SubElement(caller, TERM))
        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            op_stack.append(self.tokenizer.symbol())
            self.next()
            self.compile_term(SubElement(caller, TERM))

        while op_stack:
            self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()])

    def compile_expressionList(self, caller):
        """
            compiles a list of expressions
        :param caller:
        :return: num_of_args - number of expressions in expressions list.
        used by function call
        """
        num_of_args = 0
        #  if expression list is empty
        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return num_of_args

        num_of_args += 1
        self.compile_expression(SubElement(caller, EXPRESSION))
        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            num_of_args += 1
            self.next()
            self.compile_expression(SubElement(caller, EXPRESSION))
        return num_of_args

    def compile_subroutineCall(self, caller, first_token):
        """
        First token, the first identifier must be sent manually, so the method
        expects the current token to be the second in the specification.
        :param caller:
        :param first_token:
        :return:
        """
        #SubElement(caller, IDENTIFIER).text = first_token
        func_name = first_token
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        is_method = 0
        if self.tokenizer.symbol() == '.':
            self.next()
            if self.symbols.kind_of(func_name):  # If first token is var name
                segment = self.symbols.kind_of(func_name)
                segment = Kind.get_segment(segment)
                index = self.symbols.index_of(func_name)
                self.writer.write_push(segment, index)
                func_name = self.symbols.type_of(func_name)
                is_method = 1

            func_name = func_name + "." + self.tokenizer.identifier()
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()

            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
        else:
            func_name = self.class_name + "." + func_name
            self.writer.write_push(POINTER, 0)
            is_method = 1

        self.next()
        num_of_args = self.compile_expressionList(
            SubElement(caller, EXPRESSION_LIST)) + is_method

        self.writer.write_call(func_name, num_of_args)
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_term(self, caller):
        """

        :param caller:
        :return:
        """
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            #SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal())
            self.writer.write_push(CONSTANT, self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:

            string_val = self.tokenizer.string_val()
            self.writer.write_push(CONSTANT, len(string_val))
            self.writer.write_call("String.new", 1)
            for c in string_val:
                self.writer.write_push(CONSTANT, ord(c))
                self.writer.write_call("String.appendChar", 2)
            self.next()

        elif type is JTok.KEYWORD:
            #SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            if self.tokenizer.key_word() in {"null", "false"}:
                self.writer.write_push(CONSTANT, 0)
            elif self.tokenizer.key_word(
            ) == "true":  # Assuming valid input, it must be true
                self.writer.write_push(CONSTANT, 1)
                self.writer.write_arithmetic("neg")
            elif self.tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
            else:
                print("unexpected")

            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                self.compile_subroutineCall(caller, name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol(
            ) == '[':  #TODO: Arrays, later
                # SubElement(caller, IDENTIFIER).text = name
                # SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(), index)
                else:
                    print("unexpected")
                self.writer.write_arithmetic("add")
                self.writer.write_pop(POINTER, 1)
                self.writer.write_push("that", 0)

                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            else:
                #SubElement(caller, IDENTIFIER).text = name
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(), index)
                else:
                    print("unexpected")

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            elif self.tokenizer.symbol() in {'-', '~'}:
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                unary_op = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller, TERM))
                if unary_op == "-":
                    self.writer.write_arithmetic("neg")
                elif unary_op == "~":
                    self.writer.write_arithmetic("not")
                else:
                    "unexpected"

    def compile_do(self, caller):
        """
        format : 'do' subroutineCall ';'
        :param caller:
        :return:
        """

        #SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller, name)
        self.writer.write_pop(TEMP, 0)
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()

    def compile_let(self, caller):
        """
        format : 'let' varName ( '[' expression ']' )? '=' expression ';'
        :param caller:
        :return:
        """
        self.next()  # skip 'let'

        varName = self.tokenizer.identifier()
        self.next()

        kind = self.symbols.kind_of(varName)
        kind = kind.get_segment()
        index = self.symbols.index_of(varName)

        if self.tokenizer.symbol() == '[':  # if array
            self.next()  # skip [

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_push(kind, index)
            self.writer.write_arithmetic("add")
            self.next()  # skip ]
            self.next()  # skip =
            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(TEMP, 0)
            self.writer.write_pop(POINTER, 1)
            self.writer.write_push(TEMP, 0)
            self.writer.write_pop("that", 0)

        else:
            self.next()  # skip =

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(kind, index)

        self.next()  # skip ;

    def compile_return(self, caller):
        """
        format : 'return' expression? ';'
        :param caller:
        :return:
        """
        #SubElement(caller,KEYWORD).text = self.tokenizer.identifier()
        self.next()

        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.writer.write_push(CONSTANT, 0)
            self.writer.write_return()
            self.next()
            return

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_return()
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_while(self, caller):
        """
        format : 'while' '(' expression ')' '{' statements '}'
        :param caller:
        :return:
        """
        while_index = self.while_counter
        self.while_counter += 1
        self.writer.write_label("WHILE_EXP" + str(while_index))
        self.next()  # skip while

        self.next()  # skip (

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_arithmetic("not")
        self.writer.write_if("WHILE_END" + str(while_index))

        self.next()  # skip )

        self.next()  # skip {

        self.compile_statements(SubElement(caller, STATEMENTS))

        self.writer.write_goto("WHILE_EXP" + str(while_index))
        self.writer.write_label("WHILE_END" + str(while_index))
        self.next()  # skip }

    def compile_statements(self, caller):
        """

        :param caller:
        :return:
        """
        STATEMENTS = {'do', 'while', 'let', 'return', 'if'}
        caller.text = " "
        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))

    def compile_if(self, caller):
        """
        format : 'if' '(' expression ')' '{' statements '}'
        ( 'else' '{' statements '}' )?
        :param caller:
        :return:
        """

        self.next()  # (
        self.compile_expression(caller)
        self.next()  # {

        if_index = self.if_counter
        self.if_counter += 1
        self.writer.write_if("IF_TRUE" + str(if_index))

        self.writer.write_goto("IF_FALSE" + str(if_index))
        self.writer.write_label("IF_TRUE" + str(if_index))

        self.compile_statements(caller)

        self.next()

        if self.tokenizer.key_word() == 'else':
            self.writer.write_goto("IF_END" + str(if_index))
            self.writer.write_label("IF_FALSE" + str(if_index))

            self.next()  # else
            self.next()  # {
            self.compile_statements(caller)
            self.next()  # }

            self.writer.write_label("IF_END" + str(if_index))
        else:
            self.writer.write_label("IF_FALSE" + str(if_index))

        return

    def compile_var_dec(self, caller):
        """
        format: 'var' type varName ( ',' varName)* ';'
        :param caller:
        :return:
        """

        kind = self.tokenizer.key_word()
        #SubElement(caller, KEYWORD).text = kind  # set var as keyword
        self.next()

        return self.compile_list_of_vars(caller, "var", Kind[kind])

    def compile_class(self, caller):
        """

        :param caller:
        :return:
        """
        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
        self.class_name = self.tokenizer.identifier()
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  #{
        self.next()

        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() in {'static', 'field'}:
            self.compile_classVarDec(SubElement(caller, "classVarDec"))

        while not self.tokenizer.token_type() is JTok.SYMBOL:
            self.compile_subroutine(SubElement(caller, "subroutineDec"))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  #}
        self.next()

    def compile_list_of_vars(self, caller, category, kind):
        """
        Helper method to compile lists of variables according to
        type varName (',' varName)*
        :param caller:
        :return:
        """
        num_of_vars = 0
        type = self.compile_type(caller)
        self.symbols.define(self.tokenizer.identifier(), type, kind)
        num_of_vars += 1
        #text = category+", defined, "+type+", "+kind.name+", "+str(self.symbols.index_of(self.tokenizer.identifier()))
        #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text  # set var name  as identifier
        self.next()

        while self.tokenizer.symbol() != ';':
            #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ','
            self.next()

            self.symbols.define(self.tokenizer.identifier(), type, kind)
            num_of_vars += 1
            #text = category + ", defined, " + type + ", " + kind.name + ", " + str(
            #    self.symbols.index_of(self.tokenizer.identifier()))
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text  # set var name
            self.next()

        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()
        return num_of_vars

    def compile_classVarDec(self, caller):
        """

        :param caller:
        :return:
        """
        kind = self.tokenizer.key_word()
        #SubElement(caller,KEYWORD).text = kind
        self.next()

        self.compile_list_of_vars(caller, kind, Kind[kind])

    def compile_type(self, caller):
        """
        Compiles a tag according to type, for variables
        :param caller:
        :return:
        """
        tag = KEYWORD if self.tokenizer.token_type(
        ) is JTok.KEYWORD else IDENTIFIER
        text = self.tokenizer.key_word(
        ) if tag is KEYWORD else self.tokenizer.identifier()
        SubElement(caller, tag).text = text
        self.next()
        return text

    def compile_subroutine(self, caller):
        """

        :param caller:
        :return:
        """

        subroutine_type = self.tokenizer.key_word()
        self.next()

        # Just to skip void or type
        if self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() == "void":
            SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            self.next()
        else:
            self.compile_type(caller)

        name = self.class_name + "." + self.tokenizer.identifier()
        self.symbols.start_subroutine()
        self.next()

        self.next()  # Skips (
        if subroutine_type == "method":
            self.symbols.define("this", "", Kind.arg)
        self.compile_parameterList(SubElement(caller, "parameterList"))

        self.next()  # Skips )

        self.next()  # Skips {

        num_of_locals = 0
        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() == "var":
            num_of_locals += self.compile_var_dec(SubElement(caller, "varDec"))

        self.writer.write_function(name, num_of_locals)

        if subroutine_type == "constructor":
            self.writer.write_push(CONSTANT,
                                   self.symbols.var_count(Kind.field))
            self.writer.write_call("Memory.alloc", 1)
            self.writer.write_pop(POINTER, 0)

        elif subroutine_type == "method":
            self.writer.write_push(ARGUMENT, 0)
            self.writer.write_pop(POINTER, 0)

        self.compile_statements(SubElement(caller, "statements"))

        self.next()  # Skips }

    def compile_parameterList(self, caller):
        """

        :param caller:
        :return:
        """
        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return

        type = self.compile_type(caller)
        name = self.tokenizer.identifier()

        # SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier()
        self.symbols.define(name, type, Kind.arg)
        self.next()

        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            # SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()
            type = self.compile_type(caller)
            name = self.tokenizer.identifier()
            self.symbols.define(name, type, Kind.arg)
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()
コード例 #19
0
class CompilationEngine:
    """CompilationEngine: Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream."""
    def __init__(self, tokens_with_tokenType, out_vm_file):
        self.tokens_with_tokenType = tokens_with_tokenType
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(out_vm_file)
        self.class_name = out_vm_file.stem
        self.construct_op_dict()
        self.construct_segment_dict()
        self.while_label_index = 0
        self.if_else_label_index = 0

    def construct_op_dict(self):
        self.op_dict = {
            '+': 'add',
            '-': 'sub',
            '&': 'and',
            '|': 'or',
            '<': 'lt',
            '>': 'gt',
            '=': 'eq',
        }

    def construct_segment_dict(self):
        """Translate the kind of variable to related memory segment name"""
        self.segment_dict = {
            'STATIC': 'static',
            'FIELD': 'this',
            'ARG': 'argument',
            'VAR': 'local',
        }

    def compile(self):
        compiled_etree = self.compile_tokens()
        # Uncomment following line if you want to see the output of compiled element tree
        # print(etree.tounicode(compiled_etree, pretty_print=True))
        self.vm_writer.close()

    def compile_tokens(self):
        self.compiled_output_root = etree.Element('class')
        self.compile_class()
        compiled_etree = etree.ElementTree(self.compiled_output_root)
        return compiled_etree

    def compile_new_token_ensure_token_type(self, correct_token_type, parent):
        token, token_type = self.compile_new_token(parent)
        assert token_type == correct_token_type, '{} with token_type {} not expected'.format(
            token, token_type)

    def compile_new_token_ensure_token(self, correct_token, parent):
        token, token_type = self.compile_new_token(parent)
        assert token == correct_token, '{} with token_type {} not expected'.format(
            token, token_type)

    def compile_new_token(self, parent):
        token, token_type = self.next_token_and_type()
        self.add_sub_element(parent, token_type, token)
        return token, token_type

    def add_sub_element(self, parent, element_tag, element_text):
        new_element = etree.SubElement(parent, element_tag)
        new_element.text = ' ' + element_text + ' '

    def next_token_and_type(self):
        return self.tokens_with_tokenType.pop(0)

    def show_next_token(self):
        token, token_type = self.tokens_with_tokenType[0]
        return token

    def show_next_token_and_type(self):
        return self.tokens_with_tokenType[0]

    def compile_class(self):
        """
        Compiles a complete class.
        class: 'class' className '{' classVarDec* subroutineDec* '}'
        """
        self.compile_new_token_ensure_token('class', self.compiled_output_root)
        self.compile_new_token_ensure_token_type('identifier',
                                                 self.compiled_output_root)
        self.compile_new_token_ensure_token('{', self.compiled_output_root)
        self.compile_classVarDec()
        self.compile_subroutineDec()
        self.compile_new_token_ensure_token('}', self.compiled_output_root)

    def compile_classVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        classVarDec: ('static' | 'field') type varName (',' varName)* ';'
        """
        token = self.show_next_token()
        if token in {'static', 'field'}:
            compiled_output_class_var_dec = etree.SubElement(
                self.compiled_output_root, 'classVarDec')
            symbol_kind = token.upper()
            # Add static or field
            self.compile_new_token(compiled_output_class_var_dec)
            symbol_type = self.compile_type(compiled_output_class_var_dec)
            self.compile_one_or_more_varName(compiled_output_class_var_dec,
                                             symbol_type, symbol_kind)
            self.compile_new_token_ensure_token(';',
                                                compiled_output_class_var_dec)
            # Recursive call
            self.compile_classVarDec()

    def compile_one_or_more_varName(self, parent, symbol_type, symbol_kind):
        self.add_new_symbol(symbol_type, symbol_kind)
        self.compile_new_token_ensure_token_type('identifier', parent)
        self.compile_more_varName_if_exist(parent, symbol_type, symbol_kind)

    def add_new_symbol(self, symbol_type, symbol_kind):
        """Next token is symbol_name, add this symbol_name and its symbol_type and symbol_kind to self.symbol_table"""
        symbol_name = self.show_next_token()
        self.symbol_table.define(symbol_name, symbol_type, symbol_kind)

    def compile_more_varName_if_exist(self, parent, symbol_type, symbol_kind):
        """If there is more varName, compiles them"""
        token = self.show_next_token()
        if token == ',':  # More VarName need to add
            self.compile_new_token(parent)  # Add ','
            self.add_new_symbol(symbol_type, symbol_kind)
            self.compile_new_token_ensure_token_type('identifier', parent)
            # Recursive call
            self.compile_more_varName_if_exist(parent, symbol_type,
                                               symbol_kind)

    def compile_type(self, parent):
        """
        Compiles type for var and add token element to parent.
        type: 'int' | 'char' | 'boolean' | className
        """
        token, token_type = self.compile_new_token(parent)
        assert token in {'int', 'char', 'boolean'
                         } or token_type == 'identifier'
        return token

    def compile_void_or_type(self, parent):
        """
        Compiles type or 'void' for var and add token element to parent.
        """
        token, token_type = self.compile_new_token(parent)
        assert token in {'void', 'int', 'char', 'boolean'
                         } or token_type == 'identifier'

    def compile_subroutineDec(self):
        """
        Compiles a complete method, function, or constructor.
        subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
        """
        token = self.show_next_token()
        if token in {'constructor', 'function', 'method'}:
            self.symbol_table.start_subroutine(
            )  # Reset the subroutine's symbol table
            function_kind = token
            compiled_output_subroutineDec = etree.SubElement(
                self.compiled_output_root, 'subroutineDec')
            # Add token in {'constructor', 'function', 'method'} to compiled_output_subroutineDec
            self.compile_new_token(compiled_output_subroutineDec)
            self.compile_void_or_type(compiled_output_subroutineDec)
            # subroutineName
            function_name = self.class_name + '.' + self.show_next_token()
            self.compile_new_token_ensure_token_type(
                'identifier', compiled_output_subroutineDec)
            self.compile_new_token_ensure_token('(',
                                                compiled_output_subroutineDec)
            # parameterList
            if function_kind == 'method':
                # this is a dummy symbol added to the symbol_table's ARG, for the side effect that method's number of arguments will add 1. A method with k arguments operates on k+1 arguments actually, and the first argument (argument number 0) always refers to the this object
                self.symbol_table.define('this', 'int', 'ARG')
            self.compile_parameterList(compiled_output_subroutineDec)
            self.compile_new_token_ensure_token(')',
                                                compiled_output_subroutineDec)
            # subroutineBody
            self.compile_subroutineBody(compiled_output_subroutineDec,
                                        function_name, function_kind)

            # Recursive call
            self.compile_subroutineDec()

    def compile_parameterList(self, parent):
        """
        ((type varName) (',' type varName)*)?
        """
        compiled_output_parameterList = etree.SubElement(
            parent, 'parameterList')
        token, token_type = self.show_next_token_and_type()
        if token == ')':  # No parameter need to add
            compiled_output_parameterList.text = '\n\t'  # change the print format of empty element compiled_output_parameterList
        else:  # There is at least one parameter needs to be added
            # type
            assert token in {'int', 'char', 'boolean'
                             } or token_type == 'identifier'
            symbol_kind = 'ARG'
            symbol_type = token
            self.compile_new_token(compiled_output_parameterList)  # Add type
            self.add_new_symbol(symbol_type, symbol_kind)
            # varName
            self.compile_new_token_ensure_token_type(
                'identifier', compiled_output_parameterList)
            # more paremeters
            self.compile_more_parameter(compiled_output_parameterList)

    def compile_subroutineBody(self, parent, function_name, function_kind):
        """
        subroutineBody: '{' varDec* statements '}'
        """
        compiled_output_subroutineBody = etree.SubElement(
            parent, 'subroutineBody')
        self.compile_new_token_ensure_token('{',
                                            compiled_output_subroutineBody)
        self.compile_varDec(compiled_output_subroutineBody)
        local_vars_num = self.symbol_table.count_symbol_by_kind('VAR')
        self.vm_writer.write_function(function_name, local_vars_num)
        if function_kind == 'constructor':
            # translate this=Memory.alloc(fields_num)
            fields_num = self.symbol_table.count_symbol_by_kind('FIELD')
            self.vm_writer.write_push('constant', fields_num)
            self.vm_writer.write_call('Memory.alloc', 1)
            self.vm_writer.write_pop('pointer', 0)
        elif function_kind == 'method':
            # Point the virtual this segment to the current object (using pointer 0)
            self.vm_writer.write_push(
                'argument', 0
            )  # In method, this object address will always be stored in the first argument
            self.vm_writer.write_pop('pointer', 0)
        compiled_output_statements = etree.SubElement(
            compiled_output_subroutineBody, 'statements')
        self.compile_statements(compiled_output_statements)
        self.compile_new_token_ensure_token('}',
                                            compiled_output_subroutineBody)

    def compile_more_parameter(self, parent):
        token = self.show_next_token()
        if token == ',':  # More parameter need to add
            self.compile_new_token(parent)  # Add ','
            symbol_kind = 'ARG'
            symbol_type = self.compile_type(parent)
            self.add_new_symbol(symbol_type, symbol_kind)
            self.compile_new_token_ensure_token_type('identifier', parent)
            # Recursive call
            self.compile_more_parameter(parent)

    def compile_varDec(self, parent):
        """varDec: 'var' type varName (',' varName)* ';'"""
        token = self.show_next_token()
        if token == 'var':
            compiled_output_varDec = etree.SubElement(parent, 'varDec')
            symbol_kind = token.upper()
            self.compile_new_token(compiled_output_varDec)  # Add 'var'
            symbol_type = self.compile_type(compiled_output_varDec)
            self.add_new_symbol(symbol_type, symbol_kind)
            self.compile_new_token_ensure_token_type('identifier',
                                                     compiled_output_varDec)
            self.compile_more_varName_if_exist(compiled_output_varDec,
                                               symbol_type, symbol_kind)
            self.compile_new_token_ensure_token(';', compiled_output_varDec)
            # Recursive call
            self.compile_varDec(parent)

    def compile_statements(self, parent):
        """statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement"""
        token = self.show_next_token()
        if token in {'let', 'if', 'while', 'do', 'return'}:
            if token == 'let':
                self.compile_statement_let(parent)
            elif token == 'if':
                self.compile_statement_if(parent)
            elif token == 'while':
                self.compile_statement_while(parent)
            elif token == 'do':
                self.compile_statement_do(parent)
            else:  # return
                self.compile_statement_return(parent)
            # Recursive call
            self.compile_statements(parent)

    def compile_statement_let(self, parent):
        """
        letStatement: 'let' varName ('[' expression ']')? '=' expression ';'
        vm: pop the value of expression to varName
        """
        compiled_output_statement = etree.SubElement(parent, 'letStatement')
        self.compile_new_token_ensure_token('let', compiled_output_statement)
        # varName
        symbol_name = self.show_next_token()
        self.compile_new_token_ensure_token_type('identifier',
                                                 compiled_output_statement)
        token = self.show_next_token()
        if token == '[':  # Array
            """
            code:
                arr[expression1] = expression2
            vm:
            	push arr
                push expression1
                add
                push expression2
                pop temp 0
                pop pointer 1
                push temp 0
                pop that 0
            The reason to use temp 0 and delayed pop pointer 1 after push expression2 is that expression2 may also contain arrays, for example: a[i]=b[j], then the value in pointer 1 will mess up. So we must pop the returned value by expression2 to temp 0 for the rescue.
            """
            self.write_push_variable(symbol_name)
            self.compile_new_token(compiled_output_statement)  # Add '['
            self.compile_expression(compiled_output_statement)
            self.vm_writer.write_arithmetic('add')
            self.compile_new_token_ensure_token(']', compiled_output_statement)
        self.compile_new_token_ensure_token(
            '=', compiled_output_statement)  # Add '='
        self.compile_expression(compiled_output_statement)
        if token == '[':  # Array
            # Array assignment always first align that to the address to be modified, then "pop that 0"
            self.vm_writer.write_pop('temp', 0)
            self.vm_writer.write_pop('pointer', 1)
            self.vm_writer.write_push('temp', 0)
            self.vm_writer.write_pop('that', 0)
        else:  # a varName
            self.write_pop_variable(symbol_name)
        self.compile_new_token_ensure_token(';', compiled_output_statement)

    def compile_statement_if(self, parent):
        """
        ifStatement: 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
        code:
        	if (cond)
                    s1
                else
                    s2
        vm:
        	VM code for computing ~(cond)
                if-goto L1
                VM code for executing s1
                goto L2
                label L1
                VM code for executing s2
                label L2
        """
        compiled_output_statement = etree.SubElement(parent, 'ifStatement')
        self.compile_new_token_ensure_token('if', compiled_output_statement)
        self.if_else_label_index += 1
        else_start_label_name = 'ELSE_START_{}_{}'.format(
            self.class_name.upper(), self.if_else_label_index)
        if_else_end_label_name = 'IF_ELSE_END_{}_{}'.format(
            self.class_name.upper(), self.if_else_label_index)
        self.compile_new_token_ensure_token('(', compiled_output_statement)
        self.compile_expression(compiled_output_statement)
        self.vm_writer.write_arithmetic('not')
        self.vm_writer.write_if_goto(else_start_label_name)
        self.compile_new_token_ensure_token(')', compiled_output_statement)
        self.compile_new_token_ensure_token('{', compiled_output_statement)
        compiled_output_statements_if = etree.SubElement(
            compiled_output_statement, 'statements')
        self.compile_statements(compiled_output_statements_if)
        self.vm_writer.write_goto(if_else_end_label_name)
        self.compile_new_token_ensure_token('}', compiled_output_statement)
        self.vm_writer.write_label(else_start_label_name)
        next_token = self.show_next_token()
        if next_token == 'else':
            self.compile_new_token_ensure_token('else',
                                                compiled_output_statement)
            self.compile_new_token_ensure_token('{', compiled_output_statement)
            compiled_output_statements_else = etree.SubElement(
                compiled_output_statement, 'statements')
            self.compile_statements(compiled_output_statements_else)
            self.compile_new_token_ensure_token('}', compiled_output_statement)
        self.vm_writer.write_label(if_else_end_label_name)

    def compile_statement_while(self, parent):
        """
        whileStatement: 'while' '(' expression ')' '{' statements '}'
        code: 
            while (cond) 
                s1
        vm:
            label L1
            VM code for computing ~(cond)
            if-goto L2
            VM code for executing s1
            goto L1
            label L2
        """
        compiled_output_statement = etree.SubElement(parent, 'whileStatement')
        self.compile_new_token_ensure_token('while', compiled_output_statement)
        self.while_label_index += 1
        while_start_label_name = 'WHILE_START_{}_{}'.format(
            self.class_name.upper(), self.while_label_index)
        while_end_label_name = 'WHILE_END_{}_{}'.format(
            self.class_name.upper(), self.while_label_index)
        self.vm_writer.write_label(while_start_label_name)
        self.compile_new_token_ensure_token('(', compiled_output_statement)
        self.compile_expression(compiled_output_statement)
        self.vm_writer.write_arithmetic('not')
        self.vm_writer.write_if_goto(while_end_label_name)
        self.compile_new_token_ensure_token(')', compiled_output_statement)
        self.compile_new_token_ensure_token('{', compiled_output_statement)
        compiled_output_statements_while = etree.SubElement(
            compiled_output_statement, 'statements')
        self.compile_statements(compiled_output_statements_while)
        self.vm_writer.write_goto(while_start_label_name)
        self.vm_writer.write_label(while_end_label_name)
        self.compile_new_token_ensure_token('}', compiled_output_statement)

    def compile_statement_do(self, parent):
        """
        doStatement: 'do' subroutineCall ';'
        """
        compiled_output_statement = etree.SubElement(parent, 'doStatement')
        self.compile_new_token_ensure_token('do', compiled_output_statement)
        # subroutineCall
        self.compile_subroutineCall(compiled_output_statement)
        # When translating a do sub statement where sub is a void method or function, the caller of the corresponding VM function must pop (and ignore) the returned value (which is always the constant 0).
        self.vm_writer.write_pop('temp', 0)
        self.compile_new_token_ensure_token(';', compiled_output_statement)

    def compile_subroutineCall(self, parent):
        """
        subroutineCall: subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
        """
        name = self.show_next_token()
        self.compile_new_token_ensure_token_type(
            'identifier', parent)  # subroutineName or className or varName
        next_token = self.show_next_token()
        if next_token == '.':
            self.compile_new_token_ensure_token('.', parent)
            symbol_type = self.symbol_table.get_symbol_type(name)
            if not symbol_type:
                # Not defined in symbol_table, so name must be className, and function name is simply className.subroutineName, needs not to be changed
                function_name = name + '.' + self.show_next_token()
                args_num_should_add_1 = False
            else:
                # name is varName, so it is an instance of a className, className is symbol_type, so we push the value of the varName first, which is the base address of the class instance, then set the function name to  className.subroutineName
                args_num_should_add_1 = True
                self.write_push_variable(name)
                function_name = symbol_type + '.' + self.show_next_token()
            self.compile_new_token_ensure_token_type('identifier',
                                                     parent)  # subroutineName
        else:
            # no '.' found, so name is subroutineName, function name should be self.class_name.subroutineName, and we need push this (pointer 0) first
            self.vm_writer.write_push('pointer', 0)
            function_name = self.class_name + '.' + name
            args_num_should_add_1 = True

        self.compile_new_token_ensure_token('(', parent)
        self.compile_expressionList(parent, function_name,
                                    args_num_should_add_1)
        self.compile_new_token_ensure_token(')', parent)

    def compile_statement_return(self, parent):
        """
        ReturnStatement 'return' expression? ';'
        """
        compiled_output_statement = etree.SubElement(parent, 'returnStatement')
        self.compile_new_token_ensure_token('return',
                                            compiled_output_statement)
        next_token = self.show_next_token()
        if next_token != ';':  # has expression
            self.compile_expression(compiled_output_statement)
        else:
            # void functions return the constant 0
            self.vm_writer.write_push('constant', 0)
        self.vm_writer.write_return()
        self.compile_new_token_ensure_token(';', compiled_output_statement)

    def compile_expression(self, parent):
        """
        expression: term (op term)*
        """
        compiled_output_expression = etree.SubElement(parent, 'expression')
        self.compile_term(compiled_output_expression)
        self.compile_zero_or_more_op_and_term(compiled_output_expression)

    def compile_term(self, parent):
        """
        term: integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
        """
        compiled_output_term = etree.SubElement(parent, 'term')
        next_token, token_type = self.show_next_token_and_type()
        if token_type == 'integerConstant' or next_token in {
                'true', 'false', 'null', 'this'
        }:  # integerConstant or keywordConstant
            if token_type == 'integerConstant':
                self.vm_writer.write_push('constant', next_token)
            elif next_token == 'true':
                # true = -1, which is 16 bit each bit is 1
                self.vm_writer.write_push('constant', 1)
                self.vm_writer.write_arithmetic('neg')
            elif next_token == 'false' or next_token == 'null':
                self.vm_writer.write_push('constant', 0)
            else:  # next_token == 'this'
                # this will always be the content of pointer 0
                self.vm_writer.write_push('pointer', 0)
            self.compile_new_token(compiled_output_term)
        elif token_type == 'stringConstant':
            token, token_type = self.next_token_and_type()
            # remove double quote symbol in token
            string = token[1:-1]
            # Push string using OS String: String.new(length), String.appendChar(nextChar)
            self.vm_writer.write_push('constant', len(string))
            self.vm_writer.write_call('String.new', 1)
            for char in string:
                self.vm_writer.write_push('constant', ord(char))
                self.vm_writer.write_call('String.appendChar', 2)
            self.add_sub_element(compiled_output_term, token_type, string)
        elif token_type == 'identifier':
            next_next_token, token_type = self.tokens_with_tokenType[1]
            if next_next_token == '[':  # Array
                """
                code: 
                    a[i]
                vm:
                    push a
                    push i
                    add
                    pop pointer 1
                    push that 0
                """
                symbol_name = next_token
                self.write_push_variable(symbol_name)
                self.compile_new_token_ensure_token_type(
                    'identifier', compiled_output_term)
                self.compile_new_token_ensure_token('[', compiled_output_term)
                self.compile_expression(compiled_output_term)
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop('pointer', 1)
                # Push the value of the array item to stack using segment that
                self.vm_writer.write_push('that', 0)
                self.compile_new_token_ensure_token(']', compiled_output_term)
            elif next_next_token == '(' or next_next_token == '.':
                self.compile_subroutineCall(compiled_output_term)
            else:  # A single varName
                symbol_name = next_token
                self.write_push_variable(symbol_name)
                self.compile_new_token_ensure_token_type(
                    'identifier', compiled_output_term)
        elif next_token == '(':
            self.compile_new_token(compiled_output_term)
            self.compile_expression(compiled_output_term)
            self.compile_new_token_ensure_token(')', compiled_output_term)
        elif next_token in {'-', '~'}:  # unaryOp
            self.compile_new_token(compiled_output_term)
            self.compile_term(compiled_output_term)
            if next_token == '-':
                self.vm_writer.write_arithmetic('neg')
            else:
                self.vm_writer.write_arithmetic('not')
        else:
            raise 'Not a valid expression'

    def compile_zero_or_more_op_and_term(self, parent):
        """
        op: '+' | '-' | '*' | '/' | '&' | '|' | '<' | '>' | '='
        """
        next_token = self.show_next_token()
        if next_token in {'+', '-', '*', '/', '&', '|', '<', '>',
                          '='}:  # in op
            self.compile_new_token(parent)  # add op
            self.compile_term(parent)
            # Write vm code for operator
            if next_token == '*':
                self.vm_writer.write_call('Math.multiply', 2)
            elif next_token == '/':
                self.vm_writer.write_call('Math.divide', 2)
            else:
                operator = self.op_dict[next_token]
                self.vm_writer.write_arithmetic(operator)
            # Recursive call
            self.compile_zero_or_more_op_and_term(parent)

    def compile_expressionList(self, parent, function_name,
                               args_num_should_add_1):
        """
        expressionList: (expression (',' expression)* )?
        """
        compiled_output_expressionList = etree.SubElement(
            parent, 'expressionList')
        self.args_num = 0
        if args_num_should_add_1:
            # if function_name is varName.subroutineName or self.class_name.subroutineName, the number of arguments should add 1 because we first push the base address of the operated object
            self.args_num += 1
        next_token = self.show_next_token()
        if next_token == ')':
            # No expression
            compiled_output_expressionList.text = '\n\t'
            self.vm_writer.write_call(function_name, self.args_num)
        else:
            self.compile_expression(compiled_output_expressionList)
            self.args_num += 1
            self.compile_comma_and_expression(compiled_output_expressionList)
            self.vm_writer.write_call(function_name, self.args_num)

    def compile_comma_and_expression(self, parent):
        next_token = self.show_next_token()
        if next_token == ',':
            self.compile_new_token_ensure_token(',', parent)
            self.args_num += 1
            self.compile_expression(parent)
            # Recursive call
            self.compile_comma_and_expression(parent)

    def write_push_variable(self, symbol_name):
        """Push the value of variable to working stack"""
        index = self.symbol_table.get_symbol_index(symbol_name)
        symbol_kind = self.symbol_table.get_symbol_kind(symbol_name)
        segment = self.segment_dict[symbol_kind]
        self.vm_writer.write_push(segment, index)

    def write_pop_variable(self, symbol_name):
        """Pop the top value of the working stack to variable"""
        index = self.symbol_table.get_symbol_index(symbol_name)
        symbol_kind = self.symbol_table.get_symbol_kind(symbol_name)
        segment = self.segment_dict[symbol_kind]
        self.vm_writer.write_pop(segment, index)
コード例 #20
0
class CompilerEngine (JackTokenizer):
    B_OPERATOR = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    U_OPERATOR = ['-', '~']
    KEYWORD_CONSTANT = ['true', 'false', 'null', 'this']
    STATEMENTS = ['let', 'if', 'while', 'do', 'return']
    KEYWORD_TYPE = ['int', 'char', 'boolean']
    KEYWORD_SUBROUTINE = ['constructor', 'function', 'method']
    KEYWORD_CLASS_VAR_TYPE = ['static', 'field']
    TERM_TYPE = [Token.TK_INT, Token.TK_STRING, Token.TK_IDENTIFIER]
    REPLACEMENTS = {'<': '&lt;', '>': '&gt;', '"': '&quot;', '&': '&amp;'}
    
    def __init__ (self, path):
        super().__init__(path)
        self.xml = str()
        self.advance()
        self.st = SymbolTable()
        self.vm = VMWriter(self.path[:-5]+'.vm')
        self.className = str()
        self.labelCounter = 0
        self.currentFunctionName = str()
        self.currentSubroutineType = str()

    def xmlContent (self):
        tokenClass = self.tokenType()
        currentToken = self.getToken()
        if tokenClass != Token.TK_STRING:
            try:
                currentToken = self.REPLACEMENTS[currentToken]
            except KeyError:
                pass
            return currentToken
        else:
            return currentToken[1:-1]

    def generateXML (self):
        with open(self.path+'.xml', 'w+') as f:
            f.write(self.xml)

    def error (self, expected):
        print("\n# On file '%s', got following error:" % self.path, file=sys.stderr)
        fancyExpected = ' or '.join([repr(x) for x in expected]) if isinstance(expected, (tuple, list)) else expected
        print("# Line %s: Expected %s, got '%s'." % (self.getLine()+1, fancyExpected, self.getToken()), file=sys.stderr)
        raise CompilerException("Get out.")

    def eat (self, *types):
        if self.tokenType() not in types:
            self.error(types)
        self.xml += '<%s>%s</%s>\n'%(self.tokenType(), self.xmlContent(), self.tokenType())
        self.advance()

    def cEat (self, token): # Conditional Eat
        if self.getToken().__eq__(token):
            self.eat(self.tokenType())
        else:
            self.error(token)

    def compileTerm (self):
        self.xml += '<term>\n'
        if self.tokenType().__eq__(Token.TK_IDENTIFIER):
            try:
                nextToken = self._tokens[1].token()
            except:
                nextToken = str()
            if nextToken.__eq__('['):
                self.vm.writePush(self.st.kindOf(self.getToken()), self.st.indexOf(self.getToken()))
                self.eat(Token.TK_IDENTIFIER)
                self.cEat('[')
                self.compileExpression()
                self.cEat(']')
                self.vm.writeArithmetic('+')
                self.vm.writePop('pointer', 1)
                self.vm.writePush('that', 0)
            elif nextToken in ['(', '.']:
                self.compileSubroutineCall()
            else:
                self.vm.writePush(self.st.kindOf(self.getToken()), self.st.indexOf(self.getToken()))
                self.eat(Token.TK_IDENTIFIER)
        elif self.getToken().__eq__('('):
            self.cEat('(')
            self.compileExpression()
            self.cEat(')')
        elif self.getToken() in self.U_OPERATOR:
            operator = self.getToken()
            self.eat(Token.TK_SYMBOL)
            self.compileTerm()
            if operator.__eq__('-'):
                self.vm.writeArithmetic('!')
            else:
                self.vm.writeArithmetic(operator)
        elif self.getToken() in self.KEYWORD_CONSTANT:
            if self.getToken().__eq__('this'):
                self.vm.writePush('pointer', 0)
            elif self.getToken().__eq__('true'):
                self.vm.writePush('constant', 1)
                self.vm.writeArithmetic('!')
            else:
                self.vm.writePush('constant', 0)
            self.eat(Token.TK_KEYWORD)
        else:
            if self.tokenType().__eq__(Token.TK_INT):
                self.vm.writePush('constant', self.getToken())
            else:
                self.vm.writePush('constant', len(self.getToken()))
                self.vm.writeCall('String.new', 1)
                for char in self.getToken():
                    self.vm.writePush('constant', ord(char))
                    self.vm.writeCall('String.appendChar', 2)
            self.eat(Token.TK_INT, Token.TK_STRING)
        self.xml += '</term>\n'

    def compileExpression (self):
        self.xml += '<expression>\n'
        self.compileTerm()
        while self.getToken() in self.B_OPERATOR:
            operator = self.getToken()
            self.eat(Token.TK_SYMBOL)
            self.compileTerm()
            if operator in vm.ARITHMETIC:
                self.vm.writeArithmetic(operator)
            elif operator.__eq__('*'):
                self.vm.writeCall('Math.multiply', 2)
            elif operator.__eq__(r'/'):
                self.vm.writeCall('Math.divide', 2)
        self.xml += '</expression>\n'

    def compileExpressionList (self):
        self.xml += '<expressionList>\n'
        nArgs = 0
        if self.tokenType() in self.TERM_TYPE or self.getToken() in self.KEYWORD_CONSTANT or self.getToken().__eq__('('):
            nArgs += 1
            self.compileExpression()
            while self.getToken().__eq__(','):
                nArgs += 1
                self.eat(Token.TK_SYMBOL)
                self.compileExpression()
        self.xml += '</expressionList>\n'
        return nArgs

    def compileSubroutineCall (self):
        owner = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        if self.getToken().__eq__('.'):
            self.eat(Token.TK_SYMBOL)
            method = self.getToken()
            try:
                ownerIndex = self.st.indexOf(owner)
            except CompilerException:
                ownerIndex = -1
            if ownerIndex != -1:
                nArgs = 1
                self.vm.writePush(self.st.kindOf(owner), ownerIndex)
            else:
                nArgs = 0
            self.eat(Token.TK_IDENTIFIER)
            self.cEat('(')
            nArgs += self.compileExpressionList()
            self.cEat(')')
            if ownerIndex != -1:
                self.vm.writeCall('%s.%s' % (self.st.typeOf(owner), method), nArgs)
            else:
                self.vm.writeCall('%s.%s' % (owner, method), nArgs)
        elif self.getToken().__eq__('('):
            self.cEat('(')
            self.vm.writePush('pointer', 0)
            nArgs = self.compileExpressionList()
            self.cEat(')')
            self.vm.writeCall('%s.%s' % (self.className, owner), nArgs+1)

    def compileStatements (self):
        self.xml += '<statements>\n'
        while self.getToken() in self.STATEMENTS:
            if self.getToken().__eq__('let'):
                self.compileLet()
            elif self.getToken().__eq__('if'):
                self.compileIf()
            elif self.getToken().__eq__('while'):
                self.compileWhile()
            elif self.getToken().__eq__('do'):
                self.compileDo()
            elif self.getToken().__eq__('return'):
                self.compileReturn()
        self.xml += '</statements>\n'

    def compileLet (self):
        self.xml += '<letStatement>\n'
        self.cEat('let')
        variable = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        hasLeftArray = False
        hasRightArray = False
        if self.getToken().__eq__('['):
            hasLeftArray = True
            self.eat(Token.TK_SYMBOL)
            self.vm.writePush(self.st.kindOf(variable), self.st.indexOf(variable))
            self.compileExpression()
            self.vm.writeArithmetic('+')
            self.cEat(']')
            for token in self._tokens:
                if token.token().__eq__('['):
                    hasRightArray = True
                    break
                if token.token().__eq__(';'):
                    break
            if not hasRightArray:
                self.vm.writePop('pointer', 1)
        
        self.cEat('=')
        self.compileExpression()
        if hasRightArray and hasLeftArray:
            self.vm.writePop('temp', 0)
            self.vm.writePop('pointer', 1)
            self.vm.writePush('temp', 0)
            self.vm.writePop('that', 0)
        elif hasLeftArray:
            self.vm.writePop('that', 0)
        else:
            self.vm.writePop(self.st.kindOf(variable), self.st.indexOf(variable))
        self.cEat(';')
        self.xml += '</letStatement>\n'

    def compileIf (self):
        self.xml += '<ifStatement>\n'
        self.cEat('if')
        self.cEat('(')
        self.compileExpression()
        self.vm.writeArithmetic('~')
        self.labelCounter += 2
        thisLabel = self.labelCounter
        self.vm.writeIf('%s.L%d' % (self.className, thisLabel-1))
        self.cEat(')')
        self.cEat('{')
        self.compileStatements()
        self.vm.writeGoto('%s.L%d' % (self.className, thisLabel))
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel-1))
        self.cEat('}')
        try:
            if self.getToken().__eq__('else'):
                self.eat(Token.TK_KEYWORD)
                self.cEat('{')
                self.compileStatements()
                self.cEat('}')
        except EOFException:
            pass
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel))
        self.xml += '</ifStatement>\n'

    def compileWhile (self):
        self.xml += '<whileStatement>\n'
        self.cEat('while')
        self.cEat('(')
        self.labelCounter += 2
        thisLabel = self.labelCounter
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel-1))
        self.compileExpression()
        self.vm.writeArithmetic('~')
        self.vm.writeIf('%s.L%d' % (self.className, thisLabel))
        self.cEat(')')
        self.cEat('{')
        self.compileStatements()
        self.vm.writeGoto('%s.L%d' % (self.className, thisLabel-1))
        self.vm.writeLabel('%s.L%d' % (self.className, thisLabel))
        self.cEat('}')
        self.xml += '</whileStatement>\n'

    def compileDo (self):
        self.xml += '<doStatement>\n'
        self.cEat('do')
        self.compileSubroutineCall()
        self.vm.writePop('temp', 0)
        self.cEat(';')
        self.xml += '</doStatement>\n'

    def compileReturn (self):
        self.xml += '<returnStatement>\n'
        self.cEat('return')
        if self.getToken().__eq__(';'):
            self.vm.writePush('constant', 0)
            self.vm.writeReturn()
            self.eat(Token.TK_SYMBOL)
            self.xml += '</returnStatement>\n'
            return
        self.compileExpression()
        self.vm.writeReturn()
        self.cEat(';')
        self.xml += '</returnStatement>\n'

    def compileType (self):
        if self.getToken() in self.KEYWORD_TYPE:
            self.eat(Token.TK_KEYWORD)
        elif self.tokenType().__eq__(Token.TK_IDENTIFIER):
            self.eat(Token.TK_IDENTIFIER)
        else:
            self.error(self.KEYWORD_TYPE+[Token.TK_IDENTIFIER])

    def compileVarDec (self):
        self.xml += '<varDec>\n'
        self.cEat('var')
        
        if len(self._tokens) >= 2:
            tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
        self.compileType()
        self.eat(Token.TK_IDENTIFIER)
        self.st.define(tokenIdent, tokenType, 'local')
        
        while self.getToken().__eq__(','):
            self.eat(Token.TK_SYMBOL)

            tokenIdent = self.getToken()
            self.st.define(tokenIdent, tokenType, 'local')
            self.eat(Token.TK_IDENTIFIER)
        
        self.cEat(';')
        self.xml += '</varDec>\n'

    def compileSubroutineBody (self):
        self.xml += '<subroutineBody>\n'
        self.cEat('{')
        while self.getToken().__eq__('var'):
            self.compileVarDec()
        self.vm.writeFunction('%s.%s' % (self.className, self.currentFunctionName), self.st.varCount('local'))
        if self.currentSubroutineType.__eq__('method'):
            self.vm.writePush('argument', 0)
            self.vm.writePop('pointer', 0)
        elif self.currentSubroutineType.__eq__('constructor'):
            self.vm.writePush('constant', self.st.varCount(st.FIELD))
            self.vm.writeCall('Memory.alloc', 1)
            self.vm.writePop('pointer', 0)
        self.compileStatements()
        self.cEat('}')
        self.xml += '</subroutineBody>\n'

    def compileParameterList (self):
        self.xml += '<parameterList>\n'
        promiseTable = list()
        if self.getToken() in self.KEYWORD_TYPE or self.tokenType().__eq__(Token.TK_IDENTIFIER):
            if len(self._tokens) >= 2:
                tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
            self.compileType()
            self.eat(Token.TK_IDENTIFIER)
            self.st.define(tokenIdent, tokenType, 'argument')
            
            while self.getToken().__eq__(','):
                self.eat(Token.TK_SYMBOL)

                if len(self._tokens) >= 2:
                    tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
                self.compileType()
                self.eat(Token.TK_IDENTIFIER)
                self.st.define(tokenIdent, tokenType, 'argument')
        self.xml += '</parameterList>\n'

    def compileSubroutineDec (self):
        self.xml += '<subroutineDec>\n'
        if self.getToken() in self.KEYWORD_SUBROUTINE:
            if self.getToken().__eq__('method'):
                self.st.define('this', self.className, 'argument')
            self.currentSubroutineType = self.getToken()
            self.eat(Token.TK_KEYWORD)
        else:
            self.error(self.KEYWORD_SUBROUTINE)
        if self.getToken() in self.KEYWORD_TYPE+['void']:
            self.eat(Token.TK_KEYWORD)
        elif self.tokenType().__eq__(Token.TK_IDENTIFIER):
            self.eat(Token.TK_IDENTIFIER)
        else:
            self.error(self.KEYWORD_TYPE+['void']+Token.TK_IDENTIFIER)
        self.currentFunctionName = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        self.cEat('(')
        self.compileParameterList()
        self.cEat(')')
        self.compileSubroutineBody()
        self.xml += '</subroutineDec>\n'
    
    def compileClassVarDec (self):
        self.xml += '<classVarDec>\n'

        if self.getToken().__eq__('static'):
            tokenKind = st.STATIC
        elif self.getToken().__eq__('field'):
            tokenKind = st.FIELD
        else:
            self.error(self.KEYWORD_CLASS_VAR_TYPE)
        self.eat(Token.TK_KEYWORD)
        if len(self._tokens) >= 2:
            tokenType, tokenIdent = [x.token() for x in self._tokens[0:2]]
        self.compileType()
        self.eat(Token.TK_IDENTIFIER)
        self.st.define(tokenIdent, tokenType, tokenKind)
        
        while self.getToken().__eq__(','):
            self.eat(Token.TK_SYMBOL)
            
            tokenIdent = self.getToken()
            self.st.define(tokenIdent, tokenType, tokenKind)
            self.eat(Token.TK_IDENTIFIER)
        
        self.cEat(';')
        self.xml += '</classVarDec>\n'

    def compileClass (self):
        self.xml += '<class>\n'
        self.cEat('class')
        self.className = self.getToken()
        self.eat(Token.TK_IDENTIFIER)
        self.cEat('{')
        while self.getToken() in self.KEYWORD_CLASS_VAR_TYPE:
            self.compileClassVarDec()
        while self.getToken() in self.KEYWORD_SUBROUTINE:
            self.compileSubroutineDec()
            self.st.startSubroutine()
            
        self.cEat('}')
        self.xml += '</class>\n'

    def compile (self):
        try:
            self.compileClass()
        except CompilerException as e:
            print("# Compilation failed.\n", file=sys.stderr)
            return
        self.generateXML()
        self.vm.close()
        print('Successful compiling of "%s"' % self.path)
コード例 #21
0
class CompilationEngine:
    def __init__(self, tokenizer, classDict):
        self.tokenizer = tokenizer
        self.className = tokenizer.getFileName()
        self.resetLabelCounters()
        self.writer = VMWriter(tokenizer.getFileName())

    def resetLabelCounters(self):
        self.ifLabelCounter = 0
        self.whileLabelCounter = 0

    def compileClass(self):
        self.symbolTable = SymbolTable()
        while self.tokenizer.hasMoreTokens():
            if self.tokenizer.getToken(
            ) == 'static' or self.tokenizer.getToken() == 'field':
                self.compileClassVarDec()
            elif self.tokenizer.getToken() in const.SUBS:
                self.compileSubroutine()
            if not self.tokenizer.getToken() in const.SUBS:
                self.tokenizer.advance()
        self.writer.close()

    def compileClassVarDec(self):
        variable = []
        while self.tokenizer.getToken() != ';':
            if self.tokenizer.getToken() != ',':
                variable.append(self.tokenizer.getToken())
            self.tokenizer.advance()
        self.symbolTable.define(variable[2:], variable[1], variable[0])

    def compileSubroutine(self):
        self.symbolTable.startSubroutine()
        functionName = None
        getType = False
        getName = False
        while self.tokenizer.getToken() != '{':
            if self.tokenizer.getToken() == '(':
                self.compileParameterList()
            elif self.tokenizer.getToken() in const.SUBS:
                getType = True
            elif getType:
                getName = True
                getType = False
            elif getName:
                functionName = self.tokenizer.getToken()
                getName = False
            self.tokenizer.advance()
        self.tokenizer.advance()  # {
        numLocals = 0
        while self.tokenizer.getToken() == 'var':
            numLocals += self.compileVarDec()
            self.tokenizer.advance()
        self.writer.writeFunction(self.className + '.' + functionName,
                                  numLocals)
        self.resetLabelCounters()
        self.compileStatements()
        if self.tokenizer.getToken() == '}':
            self.tokenizer.advance()  # }

    def compileParameterList(self):
        self.tokenizer.advance()
        variables = []
        while self.tokenizer.getToken() != ')':
            if self.tokenizer.getToken() != ',':
                variables.append(self.tokenizer.getToken())
            self.tokenizer.advance()
        idx = 0
        for variable in variables:
            if idx % 2 != 0:
                self.symbolTable.define([variable], variables[idx - 1], 'arg')
            idx += 1

    def compileVarDec(self):
        variable = []
        while self.tokenizer.getToken() != ';':
            if self.tokenizer.getToken() != ',':
                variable.append(self.tokenizer.getToken())
            self.tokenizer.advance()
        self.symbolTable.define(variable[2:], variable[1], variable[0])
        return len(variable[2:])

    def compileStatements(self):
        while self.tokenizer.getToken() in [
                'let', 'if', 'while', 'do', 'return'
        ]:
            if self.tokenizer.getToken() == 'let':
                self.compileLet()
            if self.tokenizer.getToken() == 'if':
                self.compileIf()
            if self.tokenizer.getToken() == 'while':
                self.compileWhile()
            if self.tokenizer.getToken() == 'do':
                self.compileDo()
            if self.tokenizer.getToken() == 'return':
                self.compileReturn()

    def compileLet(self):
        self.tokenizer.advance()  # let
        assignee = self.tokenizer.getToken()
        self.tokenizer.advance()  # varName
        if self.tokenizer.getToken() == '[':
            self.tokenizer.advance()  # [
            self.compileExpression()  # expression
            self.tokenizer.advance()  # ]
        self.tokenizer.advance()  # =
        self.compileExpression()  # expression
        if not self.symbolTable.kindOf(assignee):
            raise Exception('Undeclared identifier assignment ' + assignee)
        else:
            self.writer.writePop(self.symbolTable.kindOf(assignee),
                                 self.symbolTable.indexOf(assignee))

    def getLabel(self, labelName, increment=False):
        label = labelName
        if 'WHILE' in labelName:
            label += str(self.whileLabelCounter)
            if increment:
                self.whileLabelCounter += 1
        if 'IF' in labelName:
            label += str(self.ifLabelCounter)
            if increment:
                self.ifLabelCounter += 1
        return label

    def compileIf(self):
        firstLabel = self.getLabel('IF_TRUE')
        secondLabel = self.getLabel('IF_FALSE')
        endLabel = self.getLabel('IF_END', True)
        self.tokenizer.advance()  # if
        self.tokenizer.advance()  # (
        self.compileExpression()  # expression
        self.tokenizer.advance()  # )
        self.writer.writeIf(firstLabel)
        self.writer.writeGoto(secondLabel)
        self.writer.writeLabel(firstLabel)
        self.tokenizer.advance()  # {
        self.compileStatements()  # statements
        self.tokenizer.advance()  # }
        if self.tokenizer.getToken() == 'else':
            self.writer.writeGoto(endLabel)
            self.writer.writeLabel(secondLabel)
            self.tokenizer.advance()  # else
            self.tokenizer.advance()  # {
            self.compileStatements()  # statements
            self.tokenizer.advance()  # }
            self.writer.writeLabel(endLabel)
        else:
            self.writer.writeLabel(secondLabel)

    def compileWhile(self):
        firstLabel = self.getLabel('WHILE_EXP')
        secondLabel = self.getLabel('WHILE_END', True)
        self.writer.writeLabel(firstLabel)
        self.tokenizer.advance()  # while
        self.tokenizer.advance()  # (
        self.compileExpression()  # expression
        self.tokenizer.advance()  # )
        self.writer.writeArithmetic('~', True)  # negating the expression
        self.writer.writeIf(secondLabel)
        self.tokenizer.advance()  # {
        self.compileStatements()  # statements
        self.tokenizer.advance()  # }
        self.writer.writeGoto(firstLabel)
        self.writer.writeLabel(secondLabel)

    def compileDo(self):
        self.tokenizer.advance()  # do
        functionName = ''
        if self.tokenizer.nextToken() == '.':
            if not self.symbolTable.typeOf(self.tokenizer.getToken()):
                functionName = self.tokenizer.getToken()
            else:
                functionName = self.symbolTable.typeOf(
                    self.tokenizer.getToken())
            self.tokenizer.advance()  # (className | varName)
            functionName += '.'
            self.tokenizer.advance()  # .
        functionName += self.tokenizer.getToken()
        self.tokenizer.advance()  # subroutineName
        self.tokenizer.advance()  # (
        numArgs = self.compileExpressionList()  # expressionList
        self.tokenizer.advance()  # )
        self.tokenizer.advance()  # ;
        self.writer.writeCall(functionName, numArgs)
        self.writer.writePop('temp', 0)

    def compileReturn(self):
        if self.tokenizer.nextToken() == ';':
            self.writer.writePush('constant', 0)
        self.tokenizer.advance()  # return
        if self.tokenizer.getToken() != ';':
            self.compileExpression()  # expression?
        self.writer.writeReturn()

    def compileExpression(self):
        op = None
        expLen = 0
        while self.tokenizer.getToken() not in [';', ')', ']', ',']:
            if self.tokenizer.getToken() in const.UOP and expLen == 0:
                self.compileTerm()
            elif self.tokenizer.getToken() in const.OP:
                op = self.tokenizer.getToken()
                self.tokenizer.advance()
            else:
                self.compileTerm()
            expLen += 1
        if not not op:
            self.writer.writeArithmetic(op)
        if self.tokenizer.getToken() == ';':
            self.tokenizer.advance()  # ;

    def compileTerm(self):
        if self.tokenizer.getToken() == '(':
            self.tokenizer.advance()  # (
            self.compileExpression()  # expression
            self.tokenizer.advance()  # )
            return
        uop = None
        if self.tokenizer.getToken() in const.UOP:
            uop = self.tokenizer.getToken()
            self.tokenizer.advance()  # UOP
            self.compileTerm()
            if not not uop:
                self.writer.writeArithmetic(uop, True)
        if self.tokenizer.getToken() in [';', ')', ']', ',']:
            return
        if self.tokenizer.getToken(
        ) not in const.OP and self.tokenizer.nextToken() not in [
                '[', '.', '('
        ]:
            if not not self.symbolTable.kindOf(self.tokenizer.getToken()):
                self.writer.writePush(
                    self.symbolTable.kindOf(self.tokenizer.getToken()),
                    self.symbolTable.indexOf(self.tokenizer.getToken()))
            else:
                self.writer.writePush('constant', self.tokenizer.getToken())
            self.tokenizer.advance()  # varName, etc.
        elif self.tokenizer.nextToken() in ['[', '.', '(']:
            functionName = ''
            if not self.symbolTable.typeOf(self.tokenizer.getToken()):
                functionName += self.tokenizer.getToken()
            else:
                functionName += self.symbolTable.typeOf(
                    self.tokenizer.getToken())
            self.tokenizer.advance()  # varName, etc.
            if self.tokenizer.getToken() == '[':
                self.tokenizer.advance()  # [
                self.compileExpression()  # expression
                self.tokenizer.advance()  # ]
            if self.tokenizer.getToken() == '.':
                self.tokenizer.advance()  # .
                functionName += '.' + self.tokenizer.getToken()
                self.tokenizer.advance()  # identifier
            numArgs = 0
            if self.tokenizer.getToken() == '(':
                self.tokenizer.advance()  # (
                numArgs = self.compileExpressionList()  # expressionList
                self.tokenizer.advance()  # )
                self.writer.writeCall(functionName, numArgs)

    def compileExpressionList(self):
        count = 0
        while self.tokenizer.getToken() != ')':
            if self.tokenizer.getToken() == ',':
                self.tokenizer.advance()  # ,?
            else:
                self.compileExpression()
                count += 1
        return count
コード例 #22
0
class CompilationEngine:
	"""
	generates the compilers output
	"""

	def __init__(self, input_file, output_file):
		"""
		the constructor of the class
		:param input_file: the jack file that the user want to compile
		:param output_file: the path for the output xml file
		"""
		self.label_count = 0
		self.file_reader = JackFileReader(input_file)
		self.jack_tokens = JackTokenizer(self.file_reader.get_one_liner())
		self.curr_token = self.jack_tokens.advance()
		self.to_output_file = []
		self.symbol_table = SymbolTable()
		self.vm_writer = VMWriter(output_file)
		self.class_name = None
		self.compile_class()
		self.vm_writer.close()

	def compile_class(self):
		"""
		Compiles a complete class.
		"""
		# advancing beyond 'class'
		self.next_token()
		# assign class name
		self.class_name = self.next_token()
		# advancing beyond '{'
		self.next_token()
		# zero or more times
		while self.curr_token.split()[1] in VAR_DECS:
			self.compile_class_var_dec()
		# zero or more times
		while self.curr_token.split()[1] in SUB_ROUTINES:
			self.compile_subroutine_dec()
		# advancing beyond '}'
		self.next_token()
		return

	def compile_class_var_dec(self):
		"""
		Compiles a static declaration or a field declaration.
		:return:
		"""
		# compiles a static variable declaration, or a field declaration
		# ('static' | 'field' ) type varName (',' varName)* ';'
		var_kind = self.next_token()
		var_type = self.next_token()
		var_name = self.next_token()
		self.symbol_table.define(var_name, var_type, var_kind)
		while self.curr_token.split()[1] == COMMA:
			# advancing the COMMA
			self.next_token()
			var_name = self.next_token()
			self.symbol_table.define(var_name, var_type, var_kind)
		# advance beyond ;
		self.next_token()
		return

	def compile_subroutine_dec(self):
		"""
		Compiles a complete method, function, or constructor.
		:return:
		"""
		self.symbol_table.start_subroutine()
		# constructor \ function \ method
		subroutine_type = self.next_token()
		# advance the return type
		self.next_token()
		# subroutine name
		subroutine_name = self.class_name + "." + self.next_token()
		# advance the left brackets
		self.next_token()
		if subroutine_type == "method":
			self.symbol_table.define("this", self.class_name, ARG)
		self.compile_parameters_list()
		self.vm_writer.write_function(subroutine_name, self.count_var_dec())

		if subroutine_type == "constructor":
			field_vars_num = self.get_num_of_field_vars()
			self.vm_writer.write_push("constant", field_vars_num)
			self.vm_writer.write_call("Memory.alloc", 1)
			self.vm_writer.write_pop("pointer", 0)

		if subroutine_type == "method":
			self.vm_writer.write_push("argument", 0)
			self.vm_writer.write_pop("pointer", 0)

		# advance the right brackets
		self.next_token()
		self.compile_subroutine_body()

	def get_num_of_field_vars(self):
		field_vars_num = 0
		for var in self.symbol_table.class_symbol_table.values():
			if var[1] == "field":
				field_vars_num += 1
		return field_vars_num

	def compile_parameters_list(self):
		"""
		Compiles a (possibly empty) parameter list, not including the enclosing ().
		:return:
		"""
		num_of_par = 0
		if self.curr_token.split()[1] != RIGHT_BRACKETS:
			# type
			num_of_par += 1
			par_type = self.next_token()
			par_name = self.next_token()
			self.symbol_table.define(par_name, par_type, ARG)
			while self.curr_token.split()[1] == COMMA:
				# advance pass the comma:
				num_of_par += 1
				self.next_token()
				par_type = self.next_token()
				par_name = self.next_token()
				self.symbol_table.define(par_name, par_type, ARG)
		return num_of_par

	def count_var_dec(self):
		"""
		counts the number of variables the subroutine has
		:return:
		"""
		var_count = 0
		temp_pointer = self.jack_tokens.curr_token + 1
		# 'var' type varName (',' varName)* ';'
		while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == "var":
			var_count += 1
			# skip var type varName
			temp_pointer = temp_pointer + 3
			while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == COMMA:
				var_count += 1
				temp_pointer += 2
			# advance passed ;
			temp_pointer += 1
		return var_count

	def compile_subroutine_body(self):
		"""
		compiles the subroutine body
		:return:
		"""
		# pass the left curly brackets
		self.next_token()
		while self.curr_token.split()[1] == "var":
			self.compile_var_dec()
		self.compile_statements()
		# pass the right curly brackets
		self.next_token()

	def compile_var_dec(self):
		"""
		Compiles a var declaration.
		:return:
		"""
		# advance passed "var"
		self.next_token()
		var_type = self.next_token()
		var_name = self.next_token()
		self.symbol_table.define(var_name, var_type, LCL)
		while self.curr_token.split()[1] == COMMA:
			# advance passed COMMA
			self.next_token()
			var_name = self.next_token()
			self.symbol_table.define(var_name, var_type, LCL)
		# advance passed ;
		self.next_token()
		return

	def compile_statements(self):
		statements = True
		while statements:
			statement_type = self.curr_token.split()[1]
			if statement_type == "let":
				self.compile_let()
			elif statement_type == "if":
				self.compile_if()
			elif statement_type == "while":
				self.compile_while()
			elif statement_type == "do":
				self.compile_do()
			elif statement_type == "return":
				self.compile_return()
			else:
				statements = False

	def compile_let(self):
		"""
		Compiles a let statement.
		:return:
		"""
		# advances passed let
		self.next_token()
		# var name
		var_name = self.next_token()
		var_kind = self.symbol_table.kind_of(var_name)
		if var_kind == "field":
			var_kind = "this"
		var_index = self.symbol_table.index_of(var_name)
		# for varName[] case
		list_flag = False
		if self.curr_token.split()[1] == LEFT_SQUARE_BRACKETS:
			list_flag = True
			# advance brackets
			self.next_token()
			self.compile_expression()
			self.vm_writer.write_push(var_kind, var_index)
			self.vm_writer.write_arithmetic("+")
			# advance brackets
			self.next_token()

		# advance equal sign
		self.next_token()
		self.compile_expression()
		if list_flag:
			# the value of expression 2
			self.vm_writer.write_pop("temp", 0)
			self.vm_writer.write_pop("pointer", 1)
			self.vm_writer.write_push("temp", 0)
			self.vm_writer.write_pop("that", 0)
		else:
			self.vm_writer.write_pop(var_kind, var_index)

		# advance semi colon
		self.next_token()

	def compile_if(self):
		"""
		Compiles a if statement.
		:return:
		"""
		# advance the if
		self.next_token()
		# advance the left brackets
		self.next_token()
		self.compile_expression()
		# self.vm_writer.write_arithmetic(NOT)
		label_1 = self.next_label()
		self.vm_writer.write_if(label_1)
		label_2 = self.next_label()
		self.vm_writer.write_goto(label_2)
		# label if true
		self.vm_writer.write_label(label_1)
		# advance the right brackets
		self.next_token()

		# advance the left curly brackets
		self.next_token()

		self.compile_statements()

		# advance the right curly brackets
		self.next_token()
		# label if false

		if self.curr_token.split()[1] == "else":
			label_3 = self.next_label()
			self.vm_writer.write_goto(label_3)
			self.vm_writer.write_label(label_2)
			# advance the else
			self.next_token()
			# advance the left curly brackets
			self.next_token()
			self.compile_statements()
			# advance the right curly brackets
			self.next_token()
			self.vm_writer.write_label(label_3)

		else:
			self.vm_writer.write_label(label_2)

	def compile_while(self):
		"""
		Compiles a while statement.
		:return:
		"""
		# advance the while
		self.next_token()
		# advance the left brackets
		self.next_token()
		label_1 = self.next_label()
		self.vm_writer.write_label(label_1)
		self.compile_expression()
		self.vm_writer.write_arithmetic(NOT)
		label_2 = self.next_label()
		self.vm_writer.write_if(label_2)
		# advance the right brackets
		self.next_token()
		# advance the left curly brackets
		self.next_token()
		self.compile_statements()
		self.vm_writer.write_goto(label_1)
		self.vm_writer.write_label(label_2)
		# advance the right curly brackets
		self.next_token()

	def compile_subroutine_call(self):

		subroutine_name = self.next_token()
		kind = self.symbol_table.kind_of(subroutine_name)
		if kind == "field":
			kind = "this"
		index = self.symbol_table.index_of(subroutine_name)

		from_class = False
		if self.curr_token.split()[1] == ".":
			# advance the dot
			self.next_token()
			type_ = self.symbol_table.type_of(subroutine_name)
			if type_:
				subroutine_name = type_ + "." + self.next_token()
			else:
				subroutine_name = subroutine_name + "." + self.next_token()
		else:
			from_class = True
			subroutine_name = self.class_name + "." + subroutine_name
		# advance the brackets
		self.next_token()
		num_of_arguments = 0
		if from_class:
			self.vm_writer.write_push("pointer", 0)
			num_of_arguments = 1
		if kind is not None and index is not None:
			self.vm_writer.write_push(kind, index)
			num_of_arguments = 1
		num_of_arguments += self.compile_expression_list()
		# advance the brackets
		self.next_token()

		self.vm_writer.write_call(subroutine_name, num_of_arguments)

	def compile_do(self):
		"""
		Compiles a do statement.
		:return:
		"""
		# advance the do
		self.next_token()

		# subroutine call:
		self.compile_subroutine_call()
		self.vm_writer.write_pop("temp", 0)
		# advance the semi colon
		self.next_token()

	def compile_return(self):
		"""
		Compiles a return statement.
		:return:
		"""
		# advance the return
		self.next_token()
		if self.curr_token.split()[1] != SEMI_COLON:
			if self.curr_token.split()[1] == "this":
				self.vm_writer.write_push("pointer", 0)
				self.next_token()
			else:
				self.compile_expression()
		else:
			# default
			self.vm_writer.write_push("constant", 0)
		self.vm_writer.write_return()
		# advance the semi colon
		self.next_token()

	def compile_expression(self):
		"""
		Compiles a do statement.
		:return:
		"""
		self.compile_term()
		while self.curr_token.split()[1] in Syntax.operators:
			op = self.curr_token.split()[1]
			self.next_token()
			self.compile_term()
			self.compile_op(op)
		return

	def compile_op(self, op):
		if op == "*":
			self.vm_writer.write_call("Math.multiply", 2)
		elif op == "/":
			self.vm_writer.write_call("Math.divide", 2)
		else:
			self.vm_writer.write_arithmetic(op)

	def compile_term(self):
		"""
		Compiles a term. This routine is faced with a slight difficulty when trying to decide between
		some of the alternative parsing rules. Specifically, if the current token is an
		identifier, the routine must distinguish between a variable, an array entry, and a
		subroutine call. A single look-ahead token, which may be one of [, (, or .  suffices to distinguish
		between the three possibilities. Any other token is not part of this term and should not be advanced over.
		:return:
		"""
		all_ = self.curr_token.split()
		header = all_[0]
		val = all_[1]
		# handle case of stringConstant, integerConstant, keyword
		if header == "<integerConstant>":
			self.vm_writer.write_push("constant", val)
			self.next_token()
		# handle in case of (expression)
		elif val == LEFT_BRACKETS:
			# advance passed "("
			self.next_token()
			self.compile_expression()
			# advance passed ")"
			self.next_token()
		# case of  onary Op
		elif val in ONARY_OP:
			self.next_token()
			self.compile_term()
			if val == "-":
				self.vm_writer.write_arithmetic(NEG)
			else:
				self.vm_writer.write_arithmetic(NOT)
		elif header == IDENTIFIER:
			next_token = self.jack_tokens.peek().split()[1]
			if next_token == LEFT_SQUARE_BRACKETS:
				# skip name and "["
				self.next_token()
				self.next_token()
				self.compile_expression()
				kind = self.symbol_table.kind_of(val)
				if kind == "field":
					kind = "this"
				self.vm_writer.write_push(kind, self.symbol_table.index_of(val))
				self.vm_writer.write_arithmetic("+")
				# skip over "]"
				self.next_token()
				self.vm_writer.write_pop("pointer", 1)
				self.vm_writer.write_push("that", 0)

			# subroutine call: subroutineName(expressionList)
			elif next_token == LEFT_BRACKETS or next_token == ".":
				self.compile_subroutine_call()
			else:
				kind = self.symbol_table.kind_of(val)
				if kind == "field":
					kind = "this"
				self.vm_writer.write_push(kind, self.symbol_table.index_of(val))
				self.next_token()

		elif header == "<keyword>":
			if val == "this":
				self.vm_writer.write_push("pointer", 0)
			else:
				self.vm_writer.write_push("constant", 0)
				if val == "true":
					self.vm_writer.write_arithmetic(NOT)
			self.next_token()

		elif header == "<stringConstant>":
			the_string = self.curr_token[17:-18]
			self.vm_writer.write_push("constant", len(the_string))
			self.vm_writer.write_call("String.new", 1)
			for char in the_string:
				self.vm_writer.write_push("constant", ord(char))
				self.vm_writer.write_call("String.appendChar", 2)
			self.next_token()

		return

	def compile_expression_list(self):
		"""
		Compiles a (possibly empty) comma separated list of expressions.
		:return:
		"""
		num_of_arguments = 0
		if self.curr_token.split()[1] != RIGHT_BRACKETS:
			num_of_arguments += 1
			self.compile_expression()
			while self.curr_token.split()[1] == COMMA:
				num_of_arguments += 1
				# advance comma
				self.next_token()
				self.compile_expression()
		return num_of_arguments

	def next_token(self):
		to_return = self.curr_token.split()[1]
		self.curr_token = self.jack_tokens.advance()
		return to_return

	def next_label(self):
		count = self.label_count
		self.label_count += 1
		return "LABEL" + str(count)
コード例 #23
0
class CompilationEngine():
    def __init__(self, tokenizer, xml_output, vm_output):
        self.tokenizer = tokenizer
        self.xml_output = xml_output
        self.vmw = VMWriter(vm_output)
        self.symbolTable = SymbolTable()

    def compile(self):
        codes = self._compile('class')  # Jackは必ずclassからはじまる
        if TOKENXML:
            self.fp = open(self.xml_output, 'w')
            xml = self._convert_to_xml(codes)
            self.fp.write(xml)
            self.fp.close()
        self.vmw.convertClass(codes)
        self.vmw.close()

    def _convert_to_xml(self, parsed_list):
        sfied = list2xml(parsed_list).encode('utf-8')
        # print(sfied)
        dom = xml.dom.minidom.parseString(sfied)
        pretty_xml_str = dom.toprettyxml()
        cleaned = []
        # TextComparer.shのための微修正
        # 1行目のxmlversionの記述を除去
        # 空行を除去
        # expressionList, parameterListは中に改行を入れる
        for line in pretty_xml_str.split("\n"):
            if '?xml' in line:
                continue
            if line.strip() == "":
                continue
            if ("expressionList" in line
                    or "parameterList" in line) and ">  <" in line:
                tag = "expressionList" if "expressionList" in line else "parameterList"
                idx = line.find("<")
                cleaned.append(line[:idx] + "<{}>".format(tag))
                cleaned.append(line[:idx] + "</{}>".format(tag))
                continue
            cleaned.append(line)
        return "\n".join(cleaned)

    def _load_token(self):
        self.tokenizer.advance()
        return self.tokenizer.token

    def _set_checkpoint(self):
        checkpoint = self.tokenizer.curr_index if self.tokenizer.curr_index is not None else 0
        print("new checkpoint is {}".format(checkpoint))
        return checkpoint

    def _copy_symbol_table(self):
        return copy.deepcopy(self.symbolTable)

    def _restore_symbol_table(self, target):
        self.symbolTable = target

    def _compile_reserved(self, component):
        checkpoint = self.tokenizer.curr_index if self.tokenizer.curr_index is not None else 0
        token = self._load_token()
        for tokentype, resv_tokens in RESERVED_TOKENS.items():
            if component in resv_tokens and component == token:
                print("### compiled reserved word!!!")
                print([{tokentype: token}])
                # input()
                return [{tokentype: revealToken(token, tokentype)}]
        print("!!!reserved word compile failed!!!")
        print(component)
        print("token: " + token)
        self.tokenizer.setIndex(checkpoint)
        raise TokenUnmatchError()

    def _compile_user_token(self, component):
        checkpoint = self.tokenizer.curr_index if self.tokenizer.curr_index is not None else 0
        token = self._load_token()
        for tokentype, regex in USER_TOKENS.items():
            if component == tokentype and re.match(regex, token):
                print("### compiled user token!!!")
                print([{tokentype: token}])
                # input()
                if tokentype == "identifier":
                    return [{
                        tokentype: {
                            "name": revealToken(token, tokentype)
                        }
                    }]
                return [{tokentype: revealToken(token, tokentype)}]
        print("!!!user token compile failed!!!")
        print(component)
        print("token: " + token)
        self.tokenizer.setIndex(checkpoint)
        raise TokenUnmatchError()

    def _compile(self, component):
        print("### start compiling ###")
        print(component)

        def _exec_compile(component):
            logic = component["logic"] if "logic" in component else "and"
            if logic == "and":
                return self._and(component["components"])
            elif logic == "or":
                return self._or(component["components"])
            else:
                raise Exception("Invalid logic: {}".format(logic))

        key = None
        if isinstance(component, str):
            key = component
            component = SYNTAX[component]

        if key is not None and key in SYMBOL["preprocess_targets"]:
            for proc in SYMBOL["preprocess"]:
                if key != proc["target"]:
                    continue
                try:
                    getattr(self.symbolTable, proc["func"])(key)
                except Exception as e:
                    print("##################################")
                    print("######## preprocess error ########")
                    print("##################################")
                    print("key: {}".format(key))
                    raise e

        bodies = []
        isMulti = "multiple" in component and component["multiple"]
        isBinary = "binary" in component and component["binary"]
        try:
            if isMulti:
                while (True):
                    checkpoint = self._set_checkpoint()
                    savedSymbolTable = self._copy_symbol_table()
                    body = _exec_compile(component)
                    bodies.extend(body)
            elif isBinary:
                checkpoint = self._set_checkpoint()
                savedSymbolTable = self._copy_symbol_table()
                body = _exec_compile(
                    component
                )  # returns [] when _exec_compile raise TokenUnmatchError
                bodies = body
            else:
                checkpoint = self._set_checkpoint()
                savedSymbolTable = self._copy_symbol_table()
                bodies = _exec_compile(component)
        except (NoTokenError, TokenUnmatchError) as e:
            print("############################")
            print("SET INDEX")
            print("from {} to {}".format(self.tokenizer.curr_token,
                                         self.tokenizer.tokens[checkpoint]))
            self.tokenizer.setIndex(checkpoint)
            self._restore_symbol_table(savedSymbolTable)
            if not (isMulti or isBinary):
                raise e

        # ここまできたら、bodiesの中身についてはcomponent通りparseできたことを意味する
        # なので、symbolTableを更新して良い
        print("### finished compiling {} ###".format(str(component)))
        print(bodies)

        #if key is not None and key in SYMBOL["postprocess_targets"]:
        for proc in SYMBOL["postprocess"]:
            if isfunction(proc["target"]):
                if not proc["target"](bodies): continue
            elif isinstance(proc["target"], str):
                if key != proc["target"]: continue
            try:
                bodies = getattr(self.symbolTable,
                                 proc["func"])(bodies=bodies,
                                               key=proc["target"])
            except Exception as e:
                print("##################################")
                print("####### postprocess error ########")
                print("##################################")
                print("key: {}".format(key))
                print("bodies: {}".format(json.dumps(bodies, indent=2)))
                raise e

        wrap = component["wrapped"] if "wrapped" in component else True
        return [{key: bodies}] if (key is not None) and wrap else bodies

    def _compile_token(self, component):
        body = []
        if component in RESERVED_TOKEN_LIST:
            body = self._compile_reserved(component)
        elif isinstance(component, dict) or component in SYNTAX.keys():
            body = self._compile(component)
        elif component in USER_TOKENS.keys():
            body = self._compile_user_token(component)
        else:
            raise Exception("Invalid component: {} ".format(component))
        return body

    def _and(self, components):
        bodies = []
        # componentsの順番通りにtokenがpopできるはず
        # componentsすべてに渡ってcompileが成功するはず
        # 一度でも失敗したらraise
        for component in components:
            body = self._compile_token(component)
            bodies.extend(body)
        return bodies

    def _or(self, components):
        # componentsのうちのいずれかがtokenを満たすはず
        # compileに失敗した場合、catchして次の候補をcompileしにいく
        # すべてのcomponentで失敗したら raise
        for component in components:
            try:
                body = self._compile_token(component)
                if body:
                    return body
            except TokenUnmatchError as e:
                pass
        raise TokenUnmatchError()
コード例 #24
0
class Parser:
    def __init__(self, directory, filename):
        self.symbolTable = SymbolTable()
        self.VMWriter = VMWriter(directory, filename)
        self.className = ''

        self.tokens = []
        self.tokenIndex = 0
        self.labelCounter = 0

        self.statementOptions = ['let', 'while', 'do', 'if', 'return']
        self.expressionOptions = [
            'integerConstant', 'stringConstant', 'identifier'
        ]

        self.op = {
            '+': 'add',
            '-': 'sub',
            '*': 'call Math.multiply 2',
            '/': 'call Math.divide 2',
            '=': 'eq',
            '<': 'lt',
            '>': 'gt',
            '&': 'and',
            '|': 'or'
        }
        self.unaryOp = {'-': 'neg', '~': 'not'}
        self.keywordConstant = {
            'true': 'constant 1\n neg',
            'false': 'constant 0',
            'this': 'pointer 0',
            'null': 'constant 0'
        }

        self.classVariableKinds = ['field', 'static']
        self.subroutineVariableKinds = ['local', 'argument']
        self.subroutineTypes = ['method', 'constructor', 'function']
        self.variableTypes = ['int', 'char', 'boolean']

    def parse(self, tokens):
        self.tokens = tokens
        self.compileClass()
        self.VMWriter.close()
        return

    def expect(self, *args):
        expectedString = self.currentToken()

        if expectedString['value'] not in args and expectedString[
                'type'] != 'identifier':
            print(self.currentToken())
            raise Exception("Expecting either of '{0}'; saw '{1}'".format(
                ','.join([x for x in args]), expectedString['value']))
        else:
            # ateprint(self.currentToken())
            tokenValue = self.currentToken()['value']
            self.advanceIndex(1)
            return tokenValue

    def advanceIndex(self, step):
        self.tokenIndex += step

    def currentToken(self):
        return self.tokens[self.tokenIndex]

    # starts program compilation at class-level
    def compileClass(self):
        self.expect('class')
        self.className = self.compileClassName()
        self.expect('{')

        while self.currentToken()['value'] in self.classVariableKinds:
            self.compileClassVarDec()

        currentToken = self.currentToken()['value']
        while currentToken in self.subroutineTypes:
            self.compileSubroutineDec()
            currentToken = self.currentToken()['value']

        self.expect('}')

    def compileClassVarDec(self):
        variable = dict()
        variable['kind'] = self.expect('static', 'field')
        variable['type'] = self.compileType()
        variable['name'] = self.compileVarName()
        self.symbolTable.addVariable(variable)

        while self.currentToken()['value'] == ',':
            self.expect(',')
            variable['name'] = self.compileVarName()
            self.symbolTable.addVariable(variable)

        self.expect(';')

    def compileClassName(self):
        return self.expect('')

    def compileSubroutineDec(self):
        # reset the subroutine symbol table for new subroutine
        self.symbolTable.startNewSubroutine()

        self.subroutineType = self.expect('method', 'constructor', 'function')
        if self.subroutineType == 'method':
            # print(self.subroutineType)
            variable = dict()
            variable['kind'] = 'argument'
            variable['type'] = self.className
            variable['name'] = 'this'
            self.symbolTable.addVariable(variable)

        self.returnType = self.expect('void', 'int', 'char', 'boolean')
        self.compileSubroutineName()

        # subroutine name in VM => className.subroutineName
        subroutineName = self.className + '.' + self.subroutineName
        self.expect('(')
        numberOfParameters = self.compileParameterList()
        self.expect(')')
        self.compileSubroutineBody(numberOfParameters, subroutineName)

    def compileSubroutineName(self):
        self.subroutineName = self.expect('')

    def compileParameterList(self):
        numberOfParameters = 0

        if self.currentToken()['type'] == 'identifier' or self.currentToken(
        )['value'] in self.variableTypes:
            numberOfParameters = self.variableList('argument')

        return numberOfParameters

    def compileSubroutineBody(self, numberOfParameters, subroutineName):
        self.expect('{')

        while self.currentToken()['value'] == 'var':
            self.compileVarDec()

        numberOfLocalVars = self.symbolTable.getNumberOfLocalVars()
        self.VMWriter.writeFunction(subroutineName, numberOfLocalVars)

        if self.subroutineType == 'constructor':
            # allocate memory for the instance variables of the  object to be constructed
            memorySpaceNeeded = self.symbolTable.getNumberOfFieldVars()
            self.VMWriter.writePush('constant', memorySpaceNeeded)
            self.VMWriter.writeCall('Memory.alloc', 1)
            # set the base of the THIS segment to the value of the address returned by the Memory.alloc function
            self.VMWriter.writePop('pointer', 0)
        # add the reference to the calling object as argument 0 of the subroutine
        if self.subroutineType == 'method':
            self.VMWriter.writePush('argument', 0)
            self.VMWriter.writePop('pointer', 0)

        #print(subroutineName, self.symbolTable.subroutineSymbolTable)
        self.compileStatements()
        self.expect('}')

    def compileVarDec(self):
        self.expect('var')
        self.variableList('local')
        self.expect(';')

    def variableList(self, kind):
        numberOfParameters = 0

        # populate the respective (calling function) symbol table with the variables found
        variable = dict()
        variable['kind'] = kind
        variable['type'], variable['name'] = self.VarDec()
        self.symbolTable.addVariable(variable)
        numberOfParameters += 1

        # compile multiple variable declarations
        while self.currentToken()['value'] == ',':
            self.expect(',')
            if self.currentToken()['type'] == 'keyword':
                variable['type'] = self.compileType()
            variable['name'] = self.compileVarName()
            self.symbolTable.addVariable(variable)
            numberOfParameters += 1

        return numberOfParameters

    def VarDec(self):
        return self.compileType(), self.compileVarName()

    def compileVarName(self):
        return self.expect('')

    def compileType(self):
        return self.expect('int', 'char', 'boolean')

    def compileStatements(self):
        while self.currentToken()['value'] in self.statementOptions:
            self.compileStatement()

    def compileStatement(self):
        if self.currentToken()['value'] == 'if':
            self.compileIfStatement()
        elif self.currentToken()['value'] == 'while':
            self.compileWhileStatement()
        elif self.currentToken()['value'] == 'let':
            self.compileLetStatement()
        elif self.currentToken()['value'] == 'do':
            self.compileDoStatement()
        elif self.currentToken()['value'] == 'return':
            self.compileReturnStatement()

    def compileIfStatement(self):
        self.expect('if')
        self.expect('(')
        self.compileExpression()
        self.expect(')')
        self.VMWriter.writeArithmetic('not')
        label1 = self.advanceLabelCounter()
        self.VMWriter.writeIFGOTO('L{0}'.format(label1))
        self.expect('{')
        self.compileStatements()
        self.expect('}')
        label2 = self.advanceLabelCounter()
        self.VMWriter.writeGOTO('L{0}'.format(label2))
        self.VMWriter.writeLabel('L{0}'.format(label1))
        if self.currentToken()['value'] == 'else':
            self.expect('else')
            self.expect('{')
            self.compileStatements()
            self.expect('}')
        self.VMWriter.writeLabel('L{0}'.format(label2))

    def advanceLabelCounter(self):
        self.labelCounter += 1
        return self.labelCounter

    def compileWhileStatement(self):
        self.expect('while')
        self.expect('(')
        label1 = self.advanceLabelCounter()
        self.VMWriter.writeLabel('L{0}'.format(label1))
        self.compileExpression()
        self.VMWriter.writeArithmetic('not')
        label2 = self.advanceLabelCounter()
        self.VMWriter.writeIFGOTO('L{0}'.format(label2))
        self.expect(')')
        self.expect('{')
        self.compileStatements()
        self.VMWriter.writeGOTO('L{0}'.format(label1))
        self.expect('}')
        self.VMWriter.writeLabel('L{0}'.format(label2))

    def compileDoStatement(self):
        self.expect('do')
        self.advanceIndex(1)
        self.compileSubroutineCall()
        self.expect(';')
        self.VMWriter.writePop('temp', 0)

    def compileLetStatement(self):
        self.expect('let')
        variable = self.compileVarName()
        segment = self.symbolTable.getVariableKind(variable)
        index = self.symbolTable.getVariablePosition(variable)

        currentToken = self.currentToken()['value']

        if currentToken == '[':
            self.VMWriter.writePush(segment, index)
            self.expect('[')
            self.compileExpression()
            self.expect(']')
            self.VMWriter.writeArithmetic('add')

        self.expect('=')
        self.compileExpression()
        self.expect(';')

        if currentToken == '[':
            self.VMWriter.writePop('temp', 0)
            self.VMWriter.writePop('pointer', 1)
            self.VMWriter.writePush('temp', 0)
            self.VMWriter.writePop('that', 0)
        else:
            self.VMWriter.writePop(segment, index)

    def compileReturnStatement(self):
        self.expect('return')
        currentToken = self.currentToken()
        if currentToken['type'] in self.expressionOptions or currentToken['value'] in self.keywordConstant\
                or self.currentToken()['value'] in self.unaryOp:
            self.compileExpression()
        self.expect(';')
        if self.returnType == 'void':
            self.VMWriter.writePush('constant', 0)
        self.VMWriter.writeReturn()

    def compileSubroutineCall(self):
        subroutineFullName = ''
        numberOfArgs = 0
        if self.currentToken()['value'] == '.':
            self.advanceIndex(-1)
            subroutineFullName = self.compileVarName()
            varType = self.symbolTable.getVariableType(subroutineFullName)
            segment = self.symbolTable.getVariableKind(subroutineFullName)
            index = self.symbolTable.getVariablePosition(subroutineFullName)
            if varType is not None:
                self.VMWriter.writePush(segment, index)
                subroutineFullName = varType
                numberOfArgs += 1
            subroutineFullName += self.expect('.')
            subroutineName, nArgs = self.subroutineCall()
            subroutineFullName += subroutineName
        else:
            self.advanceIndex(-1)
            subroutineName, nArgs = self.subroutineCall()
            subroutineFullName += subroutineName
        numberOfArgs += nArgs
        self.VMWriter.writeCall(subroutineFullName, numberOfArgs)

    def subroutineCall(self):
        subroutineName = self.expect('')
        self.expect('(')
        numberOfArgs = self.compileExpressionList()
        self.expect(')')
        return subroutineName, numberOfArgs

    def compileExpression(self):
        self.compileTerm()
        while self.currentToken()['value'] in self.op.keys():
            operator = self.expect(self.currentToken()['value'])
            self.compileTerm()
            self.VMWriter.writeArithmetic(self.op[operator])

    def compileExpressionList(self):
        numberOfArgs = 0
        currentToken = self.currentToken()
        if currentToken['type'] in self.expressionOptions or currentToken['value'] in self.keywordConstant\
                or currentToken['value'] == '(' or self.currentToken()['value'] in self.unaryOp:
            self.compileExpression()
            numberOfArgs += 1
            while self.currentToken()['value'] == ',':
                self.expect(',')
                self.compileExpression()
                numberOfArgs += 1
        return numberOfArgs

    def compileTerm(self):
        if self.currentToken()['value'] in self.unaryOp.keys():
            operator = self.expect(self.currentToken()['value'])
            self.compileTerm()
            self.VMWriter.writeArithmetic(self.unaryOp[operator])

        elif self.currentToken()['type'] == 'identifier':
            variable = self.currentToken()['value']
            self.advanceIndex(1)
            if self.currentToken()['value'] == '(' or self.currentToken(
            )['value'] == '.':
                self.compileSubroutineCall()

            elif self.currentToken()['value'] == '[':
                segment = self.symbolTable.getVariableKind(variable)
                index = self.symbolTable.getVariablePosition(variable)
                self.VMWriter.writePush(segment, index)
                self.expect('[')
                self.compileExpression()
                self.expect(']')
                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePop('pointer', 1)
                self.VMWriter.writePush('that', 0)

            else:
                self.advanceIndex(-1)
                variable = self.expect('')
                segment = self.symbolTable.getVariableKind(variable)
                index = self.symbolTable.getVariablePosition(variable)
                self.VMWriter.writePush(segment, index)

        elif self.currentToken()['type'] == 'integerConstant':
            integer = self.expect(self.currentToken()['value'])
            self.VMWriter.writePush('constant', integer)

        elif self.currentToken()['type'] == 'stringConstant':
            string = self.expect(self.currentToken()['value'])
            self.VMWriter.writePush('constant', len(string))
            self.VMWriter.writeCall('String.new', 1)
            for character in string:
                self.VMWriter.writePush('constant', ord(character))
                self.VMWriter.writeCall('String.appendChar', 2)

        elif self.currentToken()['value'] in self.keywordConstant:
            constant = self.expect(self.currentToken()['value'])
            self.VMWriter.writePush(self.keywordConstant[constant], '')

        elif self.currentToken()['value'] == '(':
            self.expect('(')
            self.compileExpression()
            self.expect(')')