Exemple #1
0
class CompilationEngine:
    def __init__(self, filename):
        self.tokenizer = JackTokenizer(filename)

    def compile(self, filename):
        input_stream = initialize(filename)
        compileClass()

    def xml_print_el(self):
        xmlprint(self.tokenizer.token_type, self.tokenizer.token)

    def advanceSymbol(self, symbol):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Symbol expected:' + symbol +
                              ', found end of stream')
        if self.tokenizer.symbol() != symbol:
            raise SyntaxError('Symbol expected:' + symbol)

    def advanceKeyword(self, keyword):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Keyword expected:' + keyword +
                              ', found end of stream')
        if self.tokenizer.keyword() != keyword:
            raise SyntaxError('Keyword expected:' + keyword)

    def advanceTokenType(self, tokenType):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Identifier expected, found end of stream')
        if self.tokenizer.token_type != 'identifier':
            raise SyntaxError('Identifier expected')

    def advanceKeywords(self, *args):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('Keywords expected:' + args +
                              ', found end of stream')
        if self.tokenizer.keyword() != keyword:
            raise SyntaxError('Keywords expected:' + args)

    def advanceAndGetType(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('type expected, found end of stream')
        if self.is_type():
            return self.tokenizer.token
        else:
            raise SyntaxError('type expected')

    def is_type(self):
        return self.tokenizer.keyword() in [
            'int', 'char', 'boolean'
        ] or self.tokenizer.token_type == 'identifier'

    def advanceAndGetReturnType(self):
        self.advance()
        if self.is_type() or self.tokenizer.keyword() == 'void':
            return self.tokenizer.token
        else:
            raise SyntaxError('type expected')

    def advanceToClassName(self):
        self.advanceTokenType('identifier')
        return self.tokenizer.identifier()

    def advanceToVarName(self):
        self.advanceTokenType('identifier')
        return self.tokenizer.identifier()

    def advanceToSubroutineName(self):
        self.advanceTokenType('identifier')
        return self.tokenizer.identifier()

    def hasClassVarDec(self):
        pass

    def advance(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
        else:
            raise SyntaxError('found end of stream!')

    def compileClass(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        print('<class>')
        self.advanceKeyword('class')
        self.xml_print_el()
        # classname
        self.advanceToClassName()
        className = self.tokenizer.identifier()
        self.xml_print_el()
        # {
        self.advanceSymbol('{')
        self.xml_print_el()

        self.advance()
        # classVarDec*
        while (self.tokenizer.keyword() in ['static', 'field']):
            self.compileClassVarDec()

        # subroutineDec*
        while (self.tokenizer.keyword()
               in ['constructor', 'function', 'method']):
            self.compileSubroutine()

        # }
        self.advanceSymbol('}')
        self.xml_print_el()
        print('</class>')

    def compileClassVarDec(self):
        # ('static'|'field') type varName (',' varName)* ';'
        print('<classVarDec>')
        # ('static'|'field')
        self.xml_print_el()
        # type
        type = self.advanceAndGetType()
        self.xml_print_el()
        # varName
        varName = self.advanceToVarName()
        self.xml_print_el()
        # ;
        self.advanceSymbol(';')
        self.xml_print_el()
        print('</classVarDec>')
        self.advance()

    def compileSubroutine(self):
        print('<subroutineDec>')
        kind = self.tokenizer.keyword()
        self.xml_print_el()

        # ( 'void' | type )

        return_type = self.advanceAndGetReturnType()
        self.xml_print_el()

        # subroutineName
        name = self.advanceToSubroutineName()
        self.xml_print_el()

        # (
        self.advanceSymbol('(')
        self.xml_print_el()

        # TODO parameterList
        self.compileParameterList()

        # (
        self.advanceSymbol(')')
        self.xml_print_el()

        # subroutineBody
        self.compileSubroutineBody()

        print('</subroutineDec>')
        self.advance()
        pass

    def compileSubroutineBody(self):
        print('<subroutineBody>')
        # {
        self.advanceSymbol('{')
        self.xml_print_el()

        # varDec*
        #TODO a structure to represent the *
        self.varDec()

        # statementes
        self.compileStatements()

        # }
        self.advanceSymbol('}')
        self.xml_print_el()
        print('</subroutineBody>')

    def compileParameterList(self):
        print('<parameterList>')

        print('</parameterList>')

    def compileVarDec(self):
        pass

    def compileStatements(self):
        pass

    def compileDo():
        pass

    def compileLet():
        pass

    def compileWhile():
        pass

    def compileReturn():
        pass

    def compileIf():
        pass

    def compileExpression():
        pass

    # if identifier: variable, array entry, subroutine call
    def compileTerm():
        # single lookahead token - can be [ ( or .
        pass

    # comma separated list of expressions
    def compileExpressionList():
        pass
Exemple #2
0
import JackTokenizer as jt
import CompilationEngine as ce

debug = 0
if debug == 1:
    jack_file_name = 'ArrayTest/Main.jack'
else:
    jack_file_name = 'ArrayTest/Main.jack'

token_xml_file_name = jack_file_name.replace('.jack', 'T_gen.xml')

# part 1, tokenize
jt.Constructor(jack_file_name)
token_xml = '<tokens>\n'

while jt.hasMoreTokens():
    jt.advance()
    token_type = jt.tokenType()
    token = jt.func_list[token_type]()
    token_xml = token_xml + token

token_xml = token_xml + '</tokens>\n'
elements = token_xml.split('\n')

if debug:
    # print(token_xml)
    pass
else:
    of = open(token_xml_file_name, 'w+')
    of.write(token_xml)
    of.close()
Exemple #3
0
class CompilationEngine:
    op_list = ["+", "-", "*", "/", "&", "|", "<", ">", "="]
    un_op_list = ["-", "~"]

    un_op_dic = {"-": "NEG", "~": "NOT"}
    kind_dic = {
        "VAR": "LOCAL",
        "ARG": "ARG",
        "STATIC": "STATIC",
        "FIELD": "THIS"
    }
    op_dic = {
        "+": "ADD",
        "-": "SUB",
        "=": "EQ",
        ">": "GT",
        "<": "LT",
        "|": "OR",
        "&": "AND",
        "*": "MUL",
        "/": "DIV"
    }
    special_chars = dict()
    special_chars["<"] = "&lt;"
    special_chars[">"] = "&gt;"
    # special_chars['\"'] = "&quot;"
    special_chars["&"] = "&amp;"
    INDENT = "  "

    def __init__(self, inputFile, outputFile):
        # self.outFile = outputFile
        self.tokenizer = JackTokenizer(inputFile)
        self.vmwriter = VMWriter(outputFile)
        self.outFile = outputFile
        self.currToken = ""
        self.tabber = ""

        self.argumentsCounter = 0

    def compileClass(self):
        self.symbolTable = SymbolTable()
        self.currToken = self.tokenizer.getToken()
        # assuming first token is keyword class
        self.__advanceToken()
        # assuming next token is the class name
        self.thisType = self.currToken
        self.__advanceToken()
        # assuming next token is '{'
        if match("^[^}]+", self.tokenizer.revealNext()):
            self.compileClassVarDec()
            self.compileSubroutine()
        # assuming next token is '}'
        self.__advanceToken()
        self.vmwriter.close()

    def compileClassVarDec(self):

        while match("^(static|field)", self.tokenizer.revealNext()):
            # we know next token is static or field
            self.__advanceToken()
            kind = self.currToken.upper()
            # we assume this will be type of the var
            self.__advanceToken()
            typeVar = self.currToken
            while match("[^;]+", self.tokenizer.revealNext()):
                if match("[,]", self.tokenizer.revealNext()):
                    self.__advanceToken()
                # we assume this will be the var name
                self.__advanceToken()
                name = self.currToken
                self.symbolTable.define(name, typeVar, kind)
            self.__advanceToken()

    def compileSubroutine(self):
        while match("(constructor|function|method)",
                    self.tokenizer.revealNext()):
            self.ifCounter = 0
            self.whileCounter = 0
            self.symbolTable.startSubroutine()
            self.__advanceToken()
            self.subroutineKind = self.currToken
            if match("method", self.currToken):
                self.symbolTable.define("this", self.thisType, "ARG")

            self.__advanceToken()
            self.subroutineType = self.currToken
            # assuming this will be subroutine name
            self.__advanceToken()
            self.subroutineName = self.currToken
            # assuming this will be '('
            self.__advanceToken()
            self.compileParameterList()
            # assuming this will be ')'
            self.__advanceToken()
            self.compileSubroutineBody()

    def compileParameterList(self):

        while match("[^)]+", self.tokenizer.revealNext()):
            self.__advanceToken()
            typeArg = self.currToken
            # assuming this will be var name
            self.__advanceToken()
            nameArg = self.currToken
            self.symbolTable.define(nameArg, typeArg, "ARG")
            if match("[,]", self.tokenizer.revealNext()):
                self.__advanceToken()

    def compileSubroutineBody(self):
        if match("[{]", self.tokenizer.revealNext()):
            self.__advanceToken()
        if match("[^}]+", self.tokenizer.revealNext()):
            self.compileVarDec()
            nLocal = self.symbolTable.varCount("VAR")
            self.vmwriter.writeFunction(
                self.thisType + '.' + self.subroutineName, nLocal)
            if match("method", self.subroutineKind):
                self.vmwriter.writePush("ARG", 0)
                self.vmwriter.writePop("POINTER", 0)
            elif match("constructor", self.subroutineKind):
                fieldCounter = self.symbolTable.varCount("FIELD")
                self.vmwriter.writePush("CONST", fieldCounter)
                self.vmwriter.writeCall("Memory.alloc", 1)
                self.vmwriter.writePop("POINTER", 0)
            self.compileStatements()
        # assuming this will be '}'
        self.__advanceToken()

    def compileVarDec(self):

        while match("var", self.tokenizer.revealNext()):

            self.__advanceToken()
            kind = self.currToken.upper()
            # assuming this will be type
            self.__advanceToken()
            typeVar = self.currToken
            while match("[^;]+", self.tokenizer.revealNext()):
                if match("[,]", self.tokenizer.revealNext()):
                    self.__advanceToken()
                # assuming this will be the var name
                self.__advanceToken()
                nameVar = self.currToken
                self.symbolTable.define(nameVar, typeVar, kind)
            # assuming this will be ;
            self.__advanceToken()

    def compileStatements(self):
        while match("(let|do|if|while|return)", self.tokenizer.revealNext()):
            if match("(let)", self.tokenizer.revealNext()):
                self.compileLet()
            elif match("(do)", self.tokenizer.revealNext()):
                self.compileDo()
            elif match("(if)", self.tokenizer.revealNext()):
                self.compileIf()
            elif match("(while)", self.tokenizer.revealNext()):
                self.compileWhile()
            elif match("(return)", self.tokenizer.revealNext()):
                self.compileReturn()

    def subRoutineCall(self):
        numArg = 0
        # identifier was advanced already
        name = self.currToken
        remember = 0
        if match("[.]", self.tokenizer.revealNext()):
            if self.symbolTable.kindOf(name):
                kind = self.kind_dic[self.symbolTable.kindOf(name)]
                index = self.symbolTable.indexOf(name)
                self.vmwriter.writePush(kind, index)
                remember += 1
                name = self.symbolTable.typeOf(name)
            # assuming this will be '.'
            self.__advanceToken()
            name += self.currToken
            # assuming this will be subroutineName
            self.__advanceToken()
            name += self.currToken
        else:
            self.vmwriter.writePush("POINTER", 0)
            remember += 1
            name = self.thisType + '.' + name
        #assuming this will be '('
        self.__advanceToken()
        numArg = self.compileExpressionList()
        self.vmwriter.writeCall(name, numArg + remember)
        #assuming this will be ')'
        self.__advanceToken()

    def compileDo(self):  # 'do' subroutineCall ';'
        self.__advanceToken()
        #assuming this will be varName/className/subroutineName
        self.__advanceToken()
        self.subRoutineCall()
        # assuming this will be ;
        self.vmwriter.writePop("TEMP", 0)
        self.__advanceToken()

    def compileLet(self):
        self.__advanceToken()
        # assuming this will be var name
        self.__advanceToken()
        name = self.currToken
        kind = self.kind_dic[self.symbolTable.kindOf(name)]
        index = self.symbolTable.indexOf(name)
        arrayExp = False
        if match("[[]", self.tokenizer.revealNext()):
            arrayExp = True
            self.__advanceToken()
            self.compileExpression()
            self.vmwriter.writePush(kind, index)
            self.vmwriter.writeArithmetic("ADD")
            # assuming this will be ']'
            self.__advanceToken()
        # assuming this will be '='
        self.__advanceToken()
        self.compileExpression()
        if arrayExp:
            self.vmwriter.writePop("TEMP", 0)
            self.vmwriter.writePop("POINTER", 1)
            self.vmwriter.writePush("TEMP", 0)
            self.vmwriter.writePop("THAT", 0)
        else:
            self.vmwriter.writePop(kind, index)
        # assuming this will be ;
        self.__advanceToken()

    def compileWhile(self):  # 'while' '(' 'expression' ')' '{' 'statments' '}'
        currWhile = self.whileCounter
        self.whileCounter += 1
        self.__advanceToken()
        self.vmwriter.writeLabel("WHILE_EXP" + str(currWhile))
        #assuming this will be '('
        self.__advanceToken()
        self.compileExpression()
        #assuming this will be ')'
        self.__advanceToken()
        self.vmwriter.writeArithmetic("NOT")
        self.vmwriter.writeIf("WHILE_END" + str(currWhile))
        #assuming this will be '{'
        self.__advanceToken()
        self.compileStatements()
        #assuming this will be '}'
        self.__advanceToken()
        self.vmwriter.writeGoTo("WHILE_EXP" + str(currWhile))
        self.vmwriter.writeLabel("WHILE_END" + str(currWhile))

    def compileReturn(self):  # 'return' expression? ';'
        self.__advanceToken()
        if match("[^;]+", self.tokenizer.revealNext()):
            self.compileExpression()
        # assuming this will be ;
        if match("void", self.subroutineType):
            self.vmwriter.writePush("CONST", 0)
        self.vmwriter.writeReturn()
        self.__advanceToken()

    def compileIf(
            self):  # if ( exprssion ) { statments } (else { statments }) ?
        curr_if = self.ifCounter
        self.ifCounter += 1
        self.__advanceToken()
        # assuming this will be '('
        self.__advanceToken()
        self.compileExpression()
        # assuming this will be ')'
        self.__advanceToken()
        self.vmwriter.writeIf("IF_TRUE" + str(curr_if))
        self.vmwriter.writeGoTo("IF_FALSE" + str(curr_if))
        self.vmwriter.writeLabel("IF_TRUE" + str(curr_if))
        # assuming this will be '{'
        self.__advanceToken()
        self.compileStatements()
        # assuming this will be '}'
        self.__advanceToken()

        if match("(else)", self.tokenizer.revealNext()):
            self.vmwriter.writeGoTo("IF_END" + str(curr_if))
            self.vmwriter.writeLabel("IF_FALSE" + str(curr_if))
            self.__advanceToken()
            # assuming this will be '{'
            self.__advanceToken()
            self.compileStatements()
            #assuming this will be '}'
            self.__advanceToken()
            self.vmwriter.writeLabel("IF_END" + str(curr_if))
        else:
            self.vmwriter.writeLabel("IF_FALSE" + str(curr_if))

    def compileExpression(self):
        relevantOp = None
        counter = 0
        while match("[^,)}\];]+", self.tokenizer.revealNext()):
            self.compileTerm()
            if counter != 0:
                self.vmwriter.writeArithmetic(self.op_dic[relevantOp])
            if self.tokenizer.revealNext() in self.op_list:
                self.__advanceToken()
                relevantOp = self.currToken
            counter += 1

    def compileTerm(self):
        self.__advanceToken()
        if match("KEYWORD", self.tokenizer.tokenType()):
            if match("(false|null)", self.currToken):
                self.vmwriter.writePush("CONST", 0)
            elif match("true", self.currToken):
                self.vmwriter.writePush("CONST", 0)
                self.vmwriter.writeArithmetic("NOT")
            else:
                self.vmwriter.writePush("POINTER", 0)
        elif match("STRING_CONST", self.tokenizer.tokenType()):
            constString = self.currToken.replace('\"', "")
            constString = constString.replace('\t', "\\t")
            constString = constString.replace('\b', "\\b")
            constString = constString.replace('\r', "\\r")
            constString = constString.replace('\n', "\\n")
            self.vmwriter.writeString(constString)
        elif match("INT_CONST", self.tokenizer.tokenType()):
            self.vmwriter.writePush("CONST", int(self.currToken))
        elif match("IDENTIFIER", self.tokenizer.tokenType()):
            if match("[\[]", self.tokenizer.revealNext()):
                name = self.currToken
                kind = self.kind_dic[self.symbolTable.kindOf(name)]
                index = self.symbolTable.indexOf(name)
                self.__advanceToken()
                self.compileExpression()
                self.vmwriter.writePush(kind, index)
                self.vmwriter.writeArithmetic("ADD")
                self.vmwriter.writePop("POINTER", 1)
                self.vmwriter.writePush("THAT", 0)
                #assuming this will be ']'
                self.__advanceToken()
            elif match("([.]|[(])", self.tokenizer.revealNext()):
                self.subRoutineCall()
            else:
                seg = self.kind_dic[self.symbolTable.kindOf(self.currToken)]
                index = self.symbolTable.indexOf(self.currToken)
                self.vmwriter.writePush(seg, index)

        elif match("SYMBOL", self.tokenizer.tokenType()):
            if match("[(]", self.currToken):
                self.compileExpression()
                #assuming this will be ')'
                self.__advanceToken()
            else:
                un_op = self.currToken
                self.compileTerm()
                self.vmwriter.writeArithmetic(self.un_op_dic[un_op])

    def compileExpressionList(self):
        argsCount = 0
        while match("[^)]+", self.tokenizer.revealNext()):
            if match("[,]", self.tokenizer.revealNext()):
                self.__advanceToken()
            self.compileExpression()
            argsCount += 1
        return argsCount

    def __advanceToken(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()
            self.currToken = self.tokenizer.getToken()