コード例 #1
0
ファイル: JackAnalyzer.py プロジェクト: mormo17/N2T
def compile():
    fileDest = open(tokenizerDestFilePath, "w")
    tokens = JackTokenizer(filePath)

    lines = []
    fileDest.write("<tokens>\n")
    while (tokens.hasMoreTokens()):
        tokens.advance()
        curType = tokens.tokenType()
        if curType == "stringConstant":
            curToken = tokens.stringVal()
        elif curType == "symbol":
            curToken = tokens.symbol()
        else:
            curToken = tokens.getCurrentToken()

        toWrite = "<" + curType + ">" + " " + curToken + " " + "</" + curType + ">\n"
        lines.append(toWrite)
        fileDest.write(toWrite)
    fileDest.write("</tokens>")
    fileDest.close()

    finalDestFile = open(finalPath, "w")
    engine = CompilationEngine(lines, finalDestFile)

    engine.CompileClass()
    finalDestFile.close()
コード例 #2
0
ファイル: Main.py プロジェクト: kmanzana/nand2tetris
  def create_token_file(jack_file_name):
    token_file_name = jack_file_name.replace('.jack', 'T.xml')
    token_file      = open(token_file_name, 'w')
    jack_file       = open(jack_file_name, 'rU')
    tokenizer       = JackTokenizer(jack_file)

    token_file.write('<tokens>\n')

    while tokenizer.hasMoreTokens():
      tokenizer.advance()

      if tokenizer.tokenType() is 'KEYWORD':
        token_file.write('<keyword> {} </keyword>\n'.format(tokenizer.keyWord().lower()))
      elif tokenizer.tokenType() is 'SYMBOL':
        symbol = tokenizer.symbol()

        if symbol in ['<', '>', '&']:
          symbol = Main.XML_CONVSERSIONS[symbol]

        token_file.write('<symbol> {} </symbol>\n'.format(symbol))
      elif tokenizer.tokenType() is 'IDENTIFIER':
        token_file.write('<identifier> {} </identifier>\n'.format(tokenizer.identifier()))
      elif tokenizer.tokenType() is 'INT_CONST':
        token_file.write('<integerConstant> {} </integerConstant>\n'.format(tokenizer.intVal()))
      elif tokenizer.tokenType() is 'STRING_CONST':
        token_file.write('<stringConstant> {} </stringConstant>\n'.format(tokenizer.stringVal()))

    token_file.write('</tokens>\n')
    token_file.close()

    return token_file_name
コード例 #3
0
def main():
    if len(sys.argv) != 2:
        print("Usage: python3 JackAnalyzer.py path/file.jack\nor...\
        \nUsage: python3 JackAnalyzer.py path/dir")
        sys.exit(1)

    #check path is valid
    #and return list of in_path(s)
    in_f_paths = check_path_type()

    #Generate Tokens...
    token_xml_files = []
    for fp in in_f_paths:
        #setup outpath:
        token_fp = change_fp_name(fp, ".jack", "Tokens.xml")
        token_xml_files.append(token_fp)  #use this later
        with open(token_fp, 'w') as f:
            f.write("<tokens>\n")
            #create Tokenizer
            tokenizer = JackTokenizer(fp)
            while tokenizer.hasMoreTokens():
                crnt_tkn, type = tokenizer.advance()
                if crnt_tkn:
                    out_string = "<{}> {} </{}>\n".format(type, crnt_tkn, type)
                    f.write(out_string)
            f.write("</tokens>\n")

    #Create compiler:
    for fp in token_xml_files:
        out_fp = change_fp_name(fp, "Tokens.xml", "new.xml")
        Compiler(fp, out_fp)
 def writeTokenizerFile(self, inputFile, inputDirName):
     from JackTokenizer import JackTokenizer
     import os
     outputFileName = os.path.join(inputDirName, "output", 
         os.path.splitext(os.path.basename(inputFile.name))[0] + ".xml")
     if(not os.path.exists(os.path.dirname(outputFileName))):
         os.makedirs(os.path.dirname(outputFileName))
     outputFile = open(outputFileName, 'w')
     outputFile.write("<tokens>\n")
     tokenizer = JackTokenizer(inputFile)
     while(tokenizer.hasMoreTokens()):
         tokenizer.advance()
         if tokenizer.tokenType == JackTokenizer.KEYWORD:
             outputFile.write("\t<keyword>" + tokenizer.currentToken + "</keyword>\n")
         elif tokenizer.tokenType == JackTokenizer.SYMBOL:
             outputFile.write("\t<symbol>")
             if tokenizer.currentToken == "&":
                 outputFile.write("&amp;")
             elif tokenizer.currentToken == "<":
                 outputFile.write("&lt;")
             elif tokenizer.currentToken == ">":
                 outputFile.write("&gt;")
             elif tokenizer.currentToken == "\"":
                 outputFile.write("&quot;")
             else:
                 outputFile.write(tokenizer.currentToken)
             outputFile.write("</symbol>\n")
         elif tokenizer.tokenType == JackTokenizer.IDENTIFIER:
             outputFile.write("\t<identifier>" + tokenizer.currentToken + "</identifier>\n")
         elif tokenizer.tokenType == JackTokenizer.INT_CONST:
             outputFile.write("\t<integerConstant>" + tokenizer.currentToken + "</integerConstant>\n")
         elif tokenizer.tokenType == JackTokenizer.STRING_CONST:
             outputFile.write("\t<stringConstant>" + tokenizer.currentToken + "</stringConstant>\n")
     
     outputFile.write("</tokens>\n")
コード例 #5
0
ファイル: main.py プロジェクト: YC-Vertex/nand2tetris-proj
def tokenizerTest():
    tk = JackTokenizer(infile)
    fout = open(outfile, 'w')
    while tk.hasMoreTokens():
        tk.advance()
        typestr = tk.typestr[tk.tokenType()]
        fout.write('<' + typestr + '> ')
        fout.write(tk.getToken())
        fout.write(' </' + typestr + '>\n')
    fout.close()
コード例 #6
0
ファイル: JackAnalyzer.py プロジェクト: jauzzz/nand2teris
    def create_token(self, path):
        token_file_name = path.replace('.jack', '.token.xml')
        token_file = open(token_file_name, 'w')
        tokenizer = JackTokenizer(path)

        token_file.write('<tokens>\n')
        while tokenizer.hasMoreTokens():
            tokenizer.advance()
            token_file.write(self.xml_token(tokenizer.current_token))

        token_file.write('</tokens>\n')
        token_file.close()

        return token_file_name
コード例 #7
0
 def compileOneFile(self):
     if len(self.file_list) == 0:
         print("No more file to be compiled!")
         return False
     input_file_name = self.file_list.pop()
     output_file = open(
         self.output_dir + input_file_name.split("/")[-1].split(".")[0] +
         ".xml", "w")
     tokenizer = JackTokenizer(input_file_name)
     compeng = CompilationEngine(tokenizer, output_file)
     while tokenizer.hasMoreTokens():
         tokenizer.advance()
         compeng.compileClass()
     output_file.close()
     print("done:  " + input_file_name)
コード例 #8
0
ファイル: JackCompiler.py プロジェクト: mormo17/N2T
def getTokens():
    tokens = JackTokenizer(filePath)
    # tokenFile = open(tokenFilePath, "w")
    while (tokens.hasMoreTokens()):
        tokens.advance()
        curType = tokens.tokenType()
        if curType == "stringConstant":
            curToken = tokens.stringVal()
        elif curType == "symbol":
            curToken = tokens.symbol()
        else:
            curToken = tokens.getCurrentToken()

        currentLine = "<" + curType + "> " + curToken + " </" + curType + ">\n"
        tokenList.append(currentLine)
コード例 #9
0
    def writeTokenizerFile(self, inputFile, inputDirName):
        from JackTokenizer import JackTokenizer
        import os
        outputFileName = os.path.join(
            inputDirName, "output",
            os.path.splitext(os.path.basename(inputFile.name))[0] + ".xml")
        if (not os.path.exists(os.path.dirname(outputFileName))):
            os.makedirs(os.path.dirname(outputFileName))
        outputFile = open(outputFileName, 'w')
        outputFile.write("<tokens>\n")
        tokenizer = JackTokenizer(inputFile)
        while (tokenizer.hasMoreTokens()):
            tokenizer.advance()
            if tokenizer.tokenType == JackTokenizer.KEYWORD:
                outputFile.write("\t<keyword>" + tokenizer.currentToken +
                                 "</keyword>\n")
            elif tokenizer.tokenType == JackTokenizer.SYMBOL:
                outputFile.write("\t<symbol>")
                if tokenizer.currentToken == "&":
                    outputFile.write("&amp;")
                elif tokenizer.currentToken == "<":
                    outputFile.write("&lt;")
                elif tokenizer.currentToken == ">":
                    outputFile.write("&gt;")
                elif tokenizer.currentToken == "\"":
                    outputFile.write("&quot;")
                else:
                    outputFile.write(tokenizer.currentToken)
                outputFile.write("</symbol>\n")
            elif tokenizer.tokenType == JackTokenizer.IDENTIFIER:
                outputFile.write("\t<identifier>" + tokenizer.currentToken +
                                 "</identifier>\n")
            elif tokenizer.tokenType == JackTokenizer.INT_CONST:
                outputFile.write("\t<integerConstant>" +
                                 tokenizer.currentToken +
                                 "</integerConstant>\n")
            elif tokenizer.tokenType == JackTokenizer.STRING_CONST:
                outputFile.write("\t<stringConstant>" +
                                 tokenizer.currentToken +
                                 "</stringConstant>\n")

        outputFile.write("</tokens>\n")
コード例 #10
0
ファイル: Main.py プロジェクト: jay-aye-see-kay/nand2tetris
def tokenize(path):

    # for each file in the path:
    os.chdir(path)
    for filename in os.listdir(path):
        if filename[-5:] == '.jack':
            # open .jack file, read it as one string, create new JackTokenizer class 
            inStr = open(filename).read()
            jackFile = JackTokenizer(inStr)

            # create a xml file for output
            out_txt = open('My' + filename[:-5] + 'T.xml','w')
            out_txt.write('<tokens>\n')

            # run through string by tokens, adding them to the output file
            while(jackFile.hasMoreTokens()):
                jackFile.advance()
                out_txt.write(jackFile.writeXML())
            
            # close xml file
            out_txt.write('</tokens>')
            out_txt.close()
コード例 #11
0
 def compileOneFile(self):
     if len(self.file_list) == 0:
         print("No more file to be compiled!")
         return False
     input_file_name = self.file_list.pop()
     output_xml_file = open(
         self.output_dir + input_file_name.split("/")[-1].split(".")[0] +
         ".xml", "w")
     output_vm_file = open(
         self.output_dir + input_file_name.split("/")[-1].split(".")[0] +
         ".vm", "w")
     tokenizer = JackTokenizer(input_file_name)
     compeng = CompilationEngine(tokenizer, output_vm_file, output_xml_file)
     while tokenizer.hasMoreTokens():
         tokenizer.advance()
         compeng.compileClass()
     output_vm_file.close()
     output_xml_file.close()
     if True:  # well, I wrote the xml file but delete it, cuz it's messy
         os.remove(self.output_dir +
                   input_file_name.split("/")[-1].split(".")[0] + ".xml")
     print("done:  " + input_file_name)
コード例 #12
0
def main():
    if len(sys.argv) != 2:
        print("Usage: python3 JackAnalyzer.py path/file.jack\nor...\
        \nUsage: python3 JackAnalyzer.py path/dir")
        sys.exit(1)

    #check path is valid
    #and return list of in_path(s)
    in_f_paths = check_path_type()

    #Generate Tokens...
    token_xml_files = []
    for fp in in_f_paths:
        tokenizer = JackTokenizer(fp)
        tokens = []
        while tokenizer.hasMoreTokens():
            text, tag = tokenizer.advance()
            if tag:
                tokens.append(Token(text, tag))
        #setup outpath:
        out_fp = change_fp_name(fp, ".jack", ".vm")
        compiler = Compiler(tokens, out_fp)
        compiler.compileClass()
コード例 #13
0
    def create_token_file(jack_file_name):
        token_file_name = jack_file_name.replace('.jack', 'T.xml')
        token_file = open(token_file_name, 'w')
        jack_file = open(jack_file_name, 'rU')
        tokenizer = JackTokenizer(jack_file)

        token_file.write('<tokens>\n')

        while tokenizer.hasMoreTokens():
            tokenizer.advance()

            if tokenizer.tokenType() is 'KEYWORD':
                token_file.write('<keyword> {} </keyword>\n'.format(
                    tokenizer.keyWord().lower()))
            elif tokenizer.tokenType() is 'SYMBOL':
                symbol = tokenizer.symbol()

                if symbol in ['<', '>', '&']:
                    symbol = Main.XML_CONVSERSIONS[symbol]

                token_file.write('<symbol> {} </symbol>\n'.format(symbol))
            elif tokenizer.tokenType() is 'IDENTIFIER':
                token_file.write('<identifier> {} </identifier>\n'.format(
                    tokenizer.identifier()))
            elif tokenizer.tokenType() is 'INT_CONST':
                token_file.write(
                    '<integerConstant> {} </integerConstant>\n'.format(
                        tokenizer.intVal()))
            elif tokenizer.tokenType() is 'STRING_CONST':
                token_file.write(
                    '<stringConstant> {} </stringConstant>\n'.format(
                        tokenizer.stringVal()))

        token_file.write('</tokens>\n')
        token_file.close()

        return token_file_name
コード例 #14
0
class CompilationEngine:
    """
    effects the compilation engine
    """
    def __init__(self, input_file_path, output_path):
        """

        :param fileToRead:
        """
        self._indentation = 0
        self._tokenizer = JackTokenizer(input_file_path)
        self._output = open(output_path, "w+")

    def compileClass(self):
        if self._tokenizer.hasMoreTokens():
            self._tokenizer.advance()
            self._output.write("<class>\n")
            self._indentation += 1

            self._write_keyword()

            self._tokenizer.advance()
            self._write_identifier()

            self._tokenizer.advance()
            self._write_symbol()

            self._tokenizer.advance()
            while self._tokenizer.keyWord() == "static" or \
                    self._tokenizer.keyWord() == "field":
                self.compileClassVarDec()
            while self._tokenizer.keyWord() == "constructor" or \
                    self._tokenizer.keyWord() == "function" \
                    or self._tokenizer.keyWord() == "method":
                self.compileSubroutine()

            self._write_symbol()

            self._indentation -= 1
            self._output.write("</class>\n")
            self._output.close()

    def compileClassVarDec(self):
        """
        this should only print if there actually are class var decs,
        should run on the recursively
        :return:
        """
        self._output.write("  " * self._indentation + "<classVarDec>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._compile_type_and_varName()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</classVarDec>\n")

    def compileSubroutine(self):
        self._output.write("  " * self._indentation + "<subroutineDec>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        if self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            self._write_keyword()
        elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
            self._write_identifier()

        self._tokenizer.advance()
        self._write_identifier()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileParameterList()

        self._write_symbol()

        self._tokenizer.advance()
        # compile subroutineBody:
        self._output.write("  " * self._indentation + "<subroutineBody>\n")
        self._indentation += 1
        self._write_symbol()

        self._tokenizer.advance()
        while self._tokenizer.keyWord() == "var":
            self.compileVarDec()

        self.compileStatements()

        self._write_symbol()
        self._indentation -= 1
        self._output.write("  " * self._indentation + "</subroutineBody>\n")
        self._indentation -= 1
        self._output.write("  " * self._indentation + "</subroutineDec>\n")
        self._tokenizer.advance()

    def compileParameterList(self):
        self._output.write("  " * self._indentation + "<parameterList>\n")
        self._indentation += 1
        while self._tokenizer.tokenType() != self._tokenizer.SYMBOL:
            if self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
                self._write_keyword()
            elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
                self._write_identifier()
            self._tokenizer.advance()
            self._write_identifier()
            self._tokenizer.advance()
            if self._tokenizer.symbol() == ",":
                self._write_symbol()
                self._tokenizer.advance()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</parameterList>\n")

    def compileVarDec(self):
        self._output.write("  " * self._indentation + "<varDec>\n")
        self._indentation += 1

        self._write_keyword()
        self._tokenizer.advance()
        self._compile_type_and_varName()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</varDec>\n")

    def compileStatements(self):
        self._output.write("  " * self._indentation + "<statements>\n")
        self._indentation += 1
        while self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            if self._tokenizer.keyWord() == "let":
                self.compileLet()
            elif self._tokenizer.keyWord() == "if":
                self.compileIf()
            elif self._tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self._tokenizer.keyWord() == "do":
                self.compileDo()
            elif self._tokenizer.keyWord() == "return":
                self.compileReturn()
        self._indentation -= 1
        self._output.write("  " * self._indentation + "</statements>\n")

    def compileDo(self):
        self._output.write("  " * self._indentation + "<doStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        #subroutineCall
        self._write_identifier()
        self._tokenizer.advance()
        if self._tokenizer.symbol() == ".":
            self._write_symbol()
            self._tokenizer.advance()
            self._write_identifier()
            self._tokenizer.advance()

        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpressionList()

        self._write_symbol()

        self._tokenizer.advance()
        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</doStatement>\n")
        self._tokenizer.advance()

    def compileLet(self):
        self._output.write("  " * self._indentation + "<letStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._write_identifier()

        self._tokenizer.advance()
        if self._tokenizer.symbol() == "[":
            self._write_symbol()
            self._tokenizer.advance()
            self.compileExpression()
            self._write_symbol()
            self._tokenizer.advance()

        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpression()
        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</letStatement>\n")
        self._tokenizer.advance()

    def compileWhile(self):
        self._output.write("  " * self._indentation + "<whileStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpression()

        self._write_symbol()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileStatements()

        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</whileStatement>\n")
        self._tokenizer.advance()

    def compileReturn(self):
        self._output.write("  " * self._indentation + "<returnStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \
                self._tokenizer.symbol() != ";":
            self.compileExpression()

        self._write_symbol()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</returnStatement>\n")
        self._tokenizer.advance()

    def compileIf(self):
        self._output.write("  " * self._indentation + "<ifStatement>\n")
        self._indentation += 1
        self._write_keyword()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileExpression()

        self._write_symbol()

        self._tokenizer.advance()
        self._write_symbol()

        self._tokenizer.advance()
        self.compileStatements()

        self._write_symbol()

        self._tokenizer.advance()
        if self._tokenizer.tokenType() == self._tokenizer.KEYWORD and \
                self._tokenizer.keyWord() == "else":
            self._write_keyword()

            self._tokenizer.advance()
            self._write_symbol()

            self._tokenizer.advance()
            self.compileStatements()

            self._write_symbol()
            self._tokenizer.advance()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</ifStatement>\n")

    def compileExpression(self):
        """
        Note that tokenizer must be advanced before this is called!!!
        :return:
        """
        self._output.write("  " * self._indentation + "<expression>\n")
        self._indentation += 1

        self.compileTerm()
        while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \
                self._tokenizer.symbol() in OP_LIST:
            self._write_symbol()
            self._tokenizer.advance()
            self.compileTerm()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</expression>\n")

    def compileTerm(self):
        # debugging - not finished!!
        sanity_check = True
        self._output.write("  " * self._indentation + "<term>\n")
        self._indentation += 1
        if self._tokenizer.tokenType() == self._tokenizer.INT_CONST:
            self._write_int_const()
        elif self._tokenizer.tokenType() == self._tokenizer.STRING_CONST:
            self._write_str_const()
        elif self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            self._write_keyword()
        elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
            self._write_identifier()

            self._tokenizer.advance()
            sanity_check = False
            if self._tokenizer.symbol() == "[":
                sanity_check = True
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpression()
                self._write_symbol()
            elif self._tokenizer.symbol() == ".":  ## subroutine case
                sanity_check = True
                self._write_symbol()
                self._tokenizer.advance()
                self._write_identifier()
                self._tokenizer.advance()
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpressionList()
                self._write_symbol()
            elif self._tokenizer.symbol() == "(":
                sanity_check = True
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpressionList()
                self._write_symbol()

        elif self._tokenizer.symbol() == "(":
            self._write_symbol()
            self._tokenizer.advance()
            self.compileExpression()
            self._write_symbol()
        elif self._tokenizer.symbol() == "~" or self._tokenizer.symbol() == \
                "-":
            self._write_symbol()
            self._tokenizer.advance()
            self.compileTerm()
            sanity_check = False

        if sanity_check:
            self._tokenizer.advance()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</term>\n")

    def compileExpressionList(self):
        self._output.write("  " * self._indentation + "<expressionList>\n")
        self._indentation += 1

        if self._tokenizer.tokenType() != self._tokenizer.SYMBOL and \
                self._tokenizer.symbol() != ")":
            self.compileExpression()
            while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \
                    self._tokenizer.symbol() == ",":
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpression()
        if self._tokenizer.symbol() == "(":
            self.compileExpression()
            while self._tokenizer.tokenType() == self._tokenizer.SYMBOL and \
                    self._tokenizer.symbol() == ",":
                self._write_symbol()
                self._tokenizer.advance()
                self.compileExpression()

        self._indentation -= 1
        self._output.write("  " * self._indentation + "</expressionList>\n")

    def _compile_type_and_varName(self):
        if self._tokenizer.tokenType() == self._tokenizer.KEYWORD:
            self._write_keyword()
        elif self._tokenizer.tokenType() == self._tokenizer.IDENTIFIER:
            self._write_identifier()
        self._tokenizer.advance()
        self._write_identifier()
        self._tokenizer.advance()
        while self._tokenizer.symbol() == ",":
            self._write_symbol()
            self._tokenizer.advance()
            self._write_identifier()
            self._tokenizer.advance()
        self._write_symbol()
        self._tokenizer.advance()

    def _write_identifier(self):
        self._output.write("  " * self._indentation + "<identifier> " +
                           self._tokenizer.identifier() + " </identifier>\n")

    def _write_keyword(self):
        self._output.write("  " * self._indentation + "<keyword> " +
                           self._tokenizer.keyWord() + " </keyword>\n")

    def _write_symbol(self):
        string_to_write = self._tokenizer.symbol()
        if self._tokenizer.symbol() == "<":
            string_to_write = "&lt"
        elif self._tokenizer.symbol() == ">":
            string_to_write = "&gt"
        elif self._tokenizer.symbol() == "&":
            string_to_write = "&amp"
        self._output.write("  " * self._indentation + "<symbol> " +
                           string_to_write + " </symbol>\n")

    def _write_int_const(self):
        self._output.write("  " * self._indentation + "<integerConstant> " +
                           self._tokenizer.identifier() +
                           " </integerConstant>\n")

    def _write_str_const(self):
        self._output.write("  " * self._indentation + "<stringConstant> " +
                           self._tokenizer.identifier() +
                           " </stringConstant>\n")
コード例 #15
0
class CompilationEngine(object):
    
    def __init__(self, inStr):
        self.xml = ''
        self.jackFile = JackTokenizer(inStr)
        self.jackFile.advance()

    def compileClass(self):
        # check the first token, return if wrong
        if self.jackFile.token != 'class':
            print("first token needs to be 'class'")
            return ''
        
        self.xml += '<class>'       # open class tag
        self.writeAdv()             # write class keyword   
        self.writeAdv()             # write class name tag
        self.writeAdv()             # write '{'

        # look for variable declarations 
        while (self.jackFile.token == 'static' or self.jackFile.token == 'field') and \
        self.jackFile.tokenType == 'keyword':
            self.compileClassVarDec()

        # look for subroutine declarations 
        while (self.jackFile.token == 'method' or self.jackFile.token == 'function' or \
            self.jackFile.token == 'constructor') and self.jackFile.tokenType == 'keyword':
            self.compileSubroutine()

        # look for '}'
        while not (self.jackFile.token == '}' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv("Expected '}'")
        self.writeAdv()

        # '}' has been hit, end of file. close class tag and return xml string
        if self.jackFile.hasMoreTokens(): print("There is uncompiled code after the class")
        self.xml += '\n</class>'
        return self.xml

    def compileClassVarDec(self):
        self.xml += '\n<classVarDec>'     # open classVarDec tag 
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv()
        self.writeAdv()                 # write ';'
        self.xml += '\n</classVarDec>'    # close classVarDec tag
    
    def compileSubroutine(self):
        self.xml += '\n<subroutineDec>'   # open subroutineDec tag
        self.writeAdv()                 # write sub type
        self.writeAdv()                 # write sub return type
        self.writeAdv()                 # write sub name
        
        self.writeAdv()                 # write '('
        self.xml += '\n<parameterList>'   # open parameterList tag
        self.compileParameterList()     # writes the potentially empty parameter list
        self.xml += '\n</parameterList>'  # close parameterList tag
        self.writeAdv()                 # write ')'

        self.xml += '\n<subroutineBody>'  # open subroutineBody tag
        self.writeAdv()                 # write '{'
        
        # look for variable declarations 
        while (self.jackFile.token == 'var') and self.jackFile.tokenType == 'keyword':
            self.compileVarDec()
        
        # write the sub statements
        self.xml += '\n<statements>'      # open statements tag
        self.compileStatements()        # compile all statements
        self.xml += '\n</statements>'     # close statements tag

        self.writeAdv()                 # write '}' (closing the subroutine body)
        self.xml += '\n</subroutineBody>' # close subroutineBody tag
        self.xml += '\n</subroutineDec>'  # close subroutineDec tag

    def compileParameterList(self):
        # loop through until ')' without writing it  
        while not (self.jackFile.token == ')' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv()
    
    def compileVarDec(self):
        self.xml += '\n<varDec>'     # open varDec tag 
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv()
        self.writeAdv()                 # write ';'
        self.xml += '\n</varDec>'    # close varDec tag

    def compileStatements(self):
        # look for statements
        while not (self.jackFile.token == '}' and self.jackFile.tokenType == 'symbol'):
            if self.jackFile.token == 'let': self.compileLet()
            elif self.jackFile.token == 'if': self.compileIf()
            elif self.jackFile.token == 'while': self.compileWhile()
            elif self.jackFile.token == 'do': self.compileDo()
            elif self.jackFile.token == 'return': self.compileReturn()
            else:
                self.writeAdv('expected statement')
                return
    
    def compileDo(self):
        self.xml += '\n<doStatement>'     # open doStatement tag 
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            if self.jackFile.token == '(' and self.jackFile.tokenType == 'symbol':
                self.writeAdv()
                self.compileExpressionList()             
            else:
                self.writeAdv()
        self.writeAdv()                 # write ';'
        self.xml += '\n</doStatement>'    # close doStatement tag

    def compileLet(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self.xml += '\n<letStatement>'     # open letStatement tag 

        self.writeAdv()     # write 'let'
        self.writeAdv()     # write varName
        if self.jackFile.token == '[':
            self.writeAdv()     # write '['
            self.compileExpression()
            self.writeAdv()     # write ']'
        self.writeAdv()     # write '='
        self.compileExpression()
        self.writeAdv()     #write ';'

        self.xml += '\n</letStatement>'    # close letStatement tag
    
    def compileWhile(self):
        # 'while' '(' expression ')' '{' statements '}' 
        self.xml += '\n<whileStatement>'
        self.writeAdv()     # write 'while'
        self.writeAdv()     # write '('
        self.compileExpression()
        self.writeAdv()     # write ')'
        self.writeAdv()     # write '{'
        self.xml += '\n<statements>'      # open statements tag
        self.compileStatements()
        self.xml += '\n</statements>'      # close statements tag
        self.writeAdv()     # write '}'
        self.xml += '\n</whileStatement>'
    
    def compileReturn(self):
        self.xml += '\n<returnStatement>'     
        self.writeAdv()                 # write 'return'
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            self.compileExpression()
        self.writeAdv()                 # write ';'
        self.xml += '\n</returnStatement>'
    
    def compileIf(self):
        # 'if' '(' expression ')' '{' statements '}' 
        # ('else' '{' statements '}')?
        #TODO does not handle else statements
        self.xml += '\n<ifStatement>'
        self.writeAdv()     # write 'if'
        self.writeAdv()     # write '('
        self.compileExpression()
        self.writeAdv()     # write ')'
        self.writeAdv()     # write '{'
        self.xml += '\n<statements>'      # open statements tag
        self.compileStatements()
        self.xml += '\n</statements>'      # close statements tag
        self.writeAdv()     # write '}'
        if self.jackFile.token == 'else':
            self.writeAdv()     # write 'else'
            self.writeAdv()     # write '{'
            self.xml += '\n<statements>'      # open statements tag
            self.compileStatements()
            self.xml += '\n</statements>'      # close statements tag
            self.writeAdv()     # write '}' 
        self.xml += '\n</ifStatement>'


    def compileExpressionList(self):
        self.xml += '\n<expressionList>'
        # loop through until ')' without writing it  
        cont = True
        while cont:
            if self.jackFile.token == ')': 
                cont = False
            elif self.jackFile.token == ',':
                self.writeAdv()
            else:
                self.compileExpression()
        self.xml += '\n</expressionList>'

    def compileExpression(self):
        ###  term (op term)*
        # TODO can't handle unary operaters yet ('-' & '~')
        
        self.xml += '\n<expression>' 

        cont = True
        while cont:
            if self.isTerm() or self.isUnaryOp() or self.jackFile.token == '(':
                self.compileTerm()
                if self.isOp(): self.writeAdv()
            else:
                cont = False

        self.xml += '\n</expression>'
    
    def compileTerm(self):
        # this is the hard one that needs to look ahead
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        self.xml += '\n<term>' 
        
        if self.isUnaryOp():            # account for unary operators
            self.writeAdv()         
            self.compileTerm()
        elif self.isTerm() and self.jackFile.peek() == '(':
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '('
            self.compileExpressionList()
            self.writeAdv()     # write ')'
        elif self.isTerm() and self.jackFile.peek() == '.' and self.jackFile.peek(3) == '(':
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '.'
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '('
            self.compileExpressionList()
            self.writeAdv()     # write ')'
        elif self.isTerm() and self.jackFile.peek() == '[':
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '['
            self.compileExpression()
            self.writeAdv()     # write ']'
        elif self.jackFile.token == '(':
            self.writeAdv()     # write '('
            self.compileExpression()
            self.writeAdv()     # write ')'
        else:
            self.writeAdv()     # write 'term'

        self.xml += '\n</term>' 
    
    def writeAdv(self, *err):
        if err:
            print(err[0])
            self.xml += '\n<error>' + err[0] + '</error>'
            self.jackFile.advance()
        else:
            if self.jackFile.token == '<': self.jackFile.token = '&lt;'
            if self.jackFile.token == '>': self.jackFile.token = '&gt;'
            if self.jackFile.token == '&': self.jackFile.token = '&amp;'
            self.xml += '\n<' + self.jackFile.tokenType + '> ' + self.jackFile.token + ' </' + self.jackFile.tokenType + '>'
            self.jackFile.advance()

    def isOp(self):
        operators = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        return self.jackFile.token in operators
        
    def isUnaryOp(self):
        unaryOperators = ['-', '~']
        return self.jackFile.token in unaryOperators
    
    def isSymbol(self):
        symbols = ['{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~']
        return self.jackFile.token in symbols

    def isTermEnd(self):
        operators = ['+', '-', '*', '/', '&' '|', '<', '>', '=']
        termFinshers = [' ', ')', ']', ';'] +  operators
        return self.jackFile.token in termFinshers
    
    def isTerm(self):
        terms = ['keyword', 'identifier', 'integerConstant', 'stringConstant']
        return self.jackFile.tokenType in terms
コード例 #16
0
class CompilationEngine:
    """Recursive top-down parser"""

    def __init__(self, inFile, outFile):
        """Creates a new compilation engine with the given input and output.
        The next routine called must be compileClass()"""
        self.tokenizer = JackTokenizer(inFile)
        self.targetFile = open(outFile, 'w')
        self.getNext()
        self.classTable = None
        self.className = ''
        self.writer = VMWriter(outFile)
        self.labelWhile = 1
        self.labelIf = 1

    def getNext(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def compileClass(self):
        """Compiles a complete class"""
        self.classTable = SymbolTable()
        # 'class' className '{' classVarDec* subroutineDec* '}'
        # class
        self.getNext()
        # className
        self.className = self.tokenizer.getToken()
        self.getNext()
        # {
        self.getNext()

        token = self.tokenizer.getToken()
        while token in ["static", "field"]:
            self.compileDec()
            token = self.tokenizer.getToken()

        token = self.tokenizer.getToken()
        while token in ["constructor", "function", "method"]:
            self.compileSubroutine()
            token = self.tokenizer.getToken()
        # }
        self.getNext()


    def compileSubroutine(self):
        """Compiles a complete method, function, or constructor."""
        # subroutine dec
        self.classTable.startSubroutine()
        # ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
        # ('constructor' | 'function' | 'method')
        subroutineType = self.tokenizer.getToken()
        self.getNext()
        # ('void' | type)
        self.getNext()

        # subroutineName
        name = self.tokenizer.getToken()
        self.getNext()
        # (
        self.getNext()
        # parameterList
        self.compileParameterList(subroutineType == 'method')
        # )
        self.getNext()

        # subroutine body
        # '{' varDec* statements '}'
        # {
        self.getNext()

        # varDec*
        while self.tokenizer.getToken() == 'var':
            self.compileDec()
        numOfVars = self.classTable.varCount(Toolbox.VAR)

        if subroutineType == 'function':
            self.writer.writeFunction(self.className + "." + name, numOfVars)
        elif subroutineType == 'constructor':
            self.writer.writeFunction(self.className + "." + name, numOfVars)
            # push constant (num of fields)
            # call Memory.alloc 1
            # pop pointer 0
            fields = self.classTable.varCount(Toolbox.FIELD)
            self.writer.writePush(Toolbox.CONST, fields)
            self.writer.writeCall('Memory.alloc', 1)
            self.writer.writePop(Toolbox.POINTER, 0)
        else:  # method
            self.writer.writeFunction(self.className + "." + name, numOfVars)
            # push argument 0
            # pop pointer 0
            self.writer.writePush(Toolbox.SEG_ARG, 0)
            self.writer.writePop(Toolbox.POINTER, 0)

        # statements
        self.compileStatements()
        # }
        self.getNext()

    def compileParameterList(self, method=False):
        """Compiles a (possibly empty) parameter list,
        not including the enclosing "()"."""
        tokenType, name = '', ''

        if method:  # Add this to method's var list.
            self.classTable.define(None, None, Toolbox.ARG)

        if self.tokenizer.tokenType() != self.tokenizer.SYMBOL:  # param list not empty
            while True:
                tokenType = self.tokenizer.getToken()
                self.getNext()

                name = self.tokenizer.getToken()
                self.classTable.define(name, tokenType, Toolbox.ARG)
                self.getNext()

                if self.tokenizer.getToken() == ')':
                    break

                self.getNext()  # ','

    def compileStatements(self):  # (letStatement | ifStatement | whileStatement | doStatement | returnStatement)*
        """Compiles a sequence of statements,
        not including the enclosing "{}"."""
        token = self.tokenizer.getToken()
        while token in ["let", "if", "while", "do", "return"]:
            if token == 'let':
                self.compileLet()
            elif token == 'if':
                self.compileIf()
            elif token == 'while':
                self.compileWhile()
            elif token == 'do':
                self.compileDo()
            elif token == 'return':
                self.compileReturn()
            token = self.tokenizer.getToken()

    def compileSubroutineCall(self, name, printIdentifier=True):
     # subroutineName '(' expressionList ') ' | ( className | varName) '.' subroutineName '(' expressionList ') '

        var = None
        nArgs = 0
        if printIdentifier:
            # subroutineName | ( className | varName)
            self.getNext()

        var = self.classTable.searchScope(name)

        if self.tokenizer.getToken() == '.':
            if var:
                # push <this>
                self.writer.writePush(var[0], var[1])
                nArgs += 1
                className = var[2]  # Use the type instead of the variable name
            else:
                className = name
            self.getNext()
            subroutineName = self.tokenizer.getToken()
            self.getNext()
        else:
            # push <this>
            self.writer.writePush(Toolbox.POINTER, 0)
            nArgs += 1
            className = self.className
            subroutineName = name

        name = className + '.' + subroutineName
        # '('
        self.getNext()
        nArgs += self.compileExpressionList()

        self.writer.writeCall(name, nArgs)
        # ')'
        self.getNext()

    def compileDo(self):  # 'do' subroutineCall ';'
        """Compiles a do statement"""
        # do
        self.getNext()
        # subroutineCall
        self.compileSubroutineCall(self.tokenizer.getToken())
        self.writer.writePop(Toolbox.TEMP, 0)
        # ;
        if self.tokenizer.getToken() == ';':
            self.getNext()

    def compileLet(self):  # 'let' varName ('[' expression ']')? '=' expression ';'
        """Compiles a let statement"""
        # let
        # self.targetFile.write(T_LET)
        self.getNext()
        # var name
        name = self.tokenizer.getToken()
        # search scope
        segment, index, type = self.classTable.searchScope(name)

        self.getNext()
        # [
        array = False
        if self.tokenizer.getToken() == '[':
            array = True
            self.writer.writePush(segment, index)
            self.getNext()
            # expression
            self.compileExpression()
            # ]
            self.getNext()
            self.writer.writeArithmetic('add')
        # =
        self.getNext()
        # expression
        self.compileExpression()

        if array:
            self.writer.writePop(Toolbox.TEMP, 0)
            self.writer.writePop(Toolbox.TEMP, 1)
            self.writer.writePush(Toolbox.TEMP, 0)
            self.writer.writePush(Toolbox.TEMP, 1)

            self.writer.writePop(Toolbox.POINTER, 1)
            self.writer.writePop(Toolbox.THAT, 0)
        else:
            self.writer.writePop(segment, index)

        # ;
        token = self.tokenizer.getToken()
        if token == ';':
            self.getNext()

    def compileWhile(self):  # while' '(' expression ')' '{' statements '}'
        """Compiles a while statement"""
        # while
        label = str(self.labelWhile)
        self.labelWhile += 1
        self.writer.writeLabel('while' + label)
        self.getNext()
        # (
        self.getNext()
        # expression
        self.compileExpression()
        # )
        self.getNext()
        self.writer.writeArithmetic('not')
        self.writer.writeIf('endwhile' + label)
        # {
        self.getNext()
        # statements
        self.compileStatements()
        # }
        self.getNext()
        self.writer.writeGoto('while' + label)
        self.writer.writeLabel('endwhile' + label)

    def compileReturn(self):  # 'return' expression? ';'
        """Compiles a return statement"""
        # return
        self.getNext()
        # expression
        if not (self.tokenizer.getToken() == ";"):
            self.compileExpression()
        else:
            self.writer.writePush(Toolbox.CONST, 0)
        self.writer.writeReturn()
        # ;
        self.getNext()

    def compileIf(self):  # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
        """Compiles an if statement, possibly with a trailing else clause"""
        # if
        label = 'if' + str(self.labelIf)
        self.labelIf += 1

        self.getNext()
        # (
        self.getNext()
        # expression
        self.compileExpression()
        # )
        self.getNext()

        self.writer.writeArithmetic('not')
        self.writer.writeIf('else' + label)

        # {
        self.getNext()
        # statements
        self.compileStatements()
        # }
        self.getNext()

        self.writer.writeGoto('end' + label)
        self.writer.writeLabel('else' + label)

        # else
        if self.tokenizer.getToken() == 'else':
            self.getNext()
            # {
            self.getNext()
            # expression
            self.compileStatements()
            # }
            self.getNext()
        self.writer.writeLabel('end' + label)

    def compileExpression(self):
        """Compiles an expression"""
        # term (op term)*

        self.compileTerm()
        token = self.tokenizer.getToken()
        while token in ['+', '/', '-', '*', '&', '|', '>', '<', '=']:
            self.getNext()
            self.compileTerm()
            self.writer.writeArithmetic(token)

            token = self.tokenizer.getToken()


    def compileTerm(self):  #integerConstant | stringConstant | keywordConstant | varName | varName '[' expression']' |
                            # subroutineCall | '(' expression ')' | unaryOp term
        """Compiles a term"""

        token = self.tokenizer.getToken()
        tokenType = self.tokenizer.tokenType()

        if tokenType == self.tokenizer.INT_CONST:
            self.writer.writePush(Toolbox.CONST, token)
            self.getNext()
        elif tokenType == self.tokenizer.STRING_CONST:
            self.writer.writePush(Toolbox.CONST, len(token))
            self.writer.writeCall('String.new', 1)

            for c in token:
                self.writer.writePush(Toolbox.CONST, ord(c))
                self.writer.writeCall('String.appendChar', 2)

            self.getNext()
        elif tokenType == self.tokenizer.KEYWORD:  # true | false | null | this
            self.compileKeywordConstant(token)
        elif tokenType == self.tokenizer.IDENTIFIER:
            name = token
            self.getNext()
            token = self.tokenizer.getToken()
            if token == '[':
                self.compileVarName(name)
                self.getNext()
                self.compileExpression()
                self.getNext()
                self.writer.writeArithmetic('add')
                self.writer.writePop(Toolbox.POINTER, 1)
                self.writer.writePush(Toolbox.THAT, 0)
            elif token in ['(', '.']:
                self.compileSubroutineCall(name, False)
            else:
                self.compileVarName(name)

        elif token == '(':
            self.getNext()
            self.compileExpression()
            self.getNext()
        elif token in ['-', '~']:
            self.compileUnary(token)


    def compileExpressionList(self):
        """Compiles a (possibly empty) comma separated list of expressions"""
        nArgs = 0

        if self.tokenizer.getToken() != ')':
            self.compileExpression()
            nArgs += 1

            while self.tokenizer.getToken() == ',':
                self.getNext()
                self.compileExpression()
                nArgs += 1

        return nArgs

    def compileDec(self):  # 'var' type varName (',' varName)* ';'
        """Compiles a var declaration"""
        # keyword 'var'
        token = self.tokenizer.getToken()
        kind = None
        if token == 'var':
            kind = Toolbox.VAR
        elif token == 'field':
            kind = Toolbox.FIELD
        elif token == 'static':
            kind = Toolbox.STATIC
        self.getNext()
        tokenType = self.tokenizer.getToken()

        # type can be an identifier or a keyword
        self.getNext()

        # var name
        name = self.tokenizer.getToken()
        self.classTable.define(name, tokenType, kind)
        self.getNext()
        while self.tokenizer.tokenType() == self.tokenizer.SYMBOL and self.tokenizer.getToken() == ',':
            # ,
            self.getNext()
            name = self.tokenizer.getToken()
            self.classTable.define(name, tokenType, kind)
            # var name
            self.getNext()
        # ;
        self.getNext()

    def compileVarName(self, name):
        segment, index, type = self.classTable.searchScope(name)
        self.writer.writePush(segment, index)

    def compileKeywordConstant(self, keyword):
        if keyword == 'false' or keyword == 'null':
            self.writer.writePush(Toolbox.CONST, 0)
        if keyword == 'true':
            self.writer.writePush(Toolbox.CONST, 0)
            self.writer.writeArithmetic('not')
        if keyword == 'this':
            self.writer.writePush(Toolbox.POINTER, 0)
        self.getNext()

    def compileUnary(self, token):
        """
        Compiles an unary operator with its operand (term)
        :param token: unary token
        """
        self.getNext()  # '~' or '-'
        self.compileTerm()  # operand

        if token == '-':
            self.writer.writeArithmetic('neg')
        else:  # token is '~'
            self.writer.writeArithmetic('not')
コード例 #17
0
class CompilationEngine:
    """
    Effects the actual compilation output. Gets its input from a JackTokenizer
    and emits its parsed structure into an output file/stream.
    """

    INDENT = "  "

    def __init__(self, jackFile, vmFile, DEBUG=False):
        """
        Creates a new compilation engine with the given input and output. The
        next routine called must be compileClass().
        """
        self.tokenizer = JackTokenizer(jackFile)  # , DEBUG=DEBUG)
        self.DEBUG = DEBUG

        # Indentation level
        self.indentLevel = 0

        # Counters for while loops and if statements
        self.whileCounter = self.ifCounter = 0

        # Initialize the symbol table
        self.symtab = SymbolTable(DEBUG=True)

        # Initialize the VM writer
        self.writer = VMWriter(vmFile, DEBUG=True)

    def compileClass(self):
        """
        Compiles a complete class.
        """
        self.emit(xml="<class>")

        # Alias self.tokenizer to make code more compact
        t = self.tokenizer

        # Verify that there is a token to read and advance to it
        if t.hasMoreTokens():
            # Advance to the next token
            t.advance()
        else:
            # If not, we're done.
            return

        self.eatAndEmit("keyword", ["class"])
        (_, self.thisClass) = self.eatAndEmit(
            "identifier", category="CLASS", state="DEFINE"
        )
        self.eatAndEmit("symbol", ["{"])

        # Expect zero or more classVarDecs. Count the fields defined.
        self.nFields = 0
        while t.tokenType() == "keyword" and t.keyWord() in ["static", "field"]:
            kw = t.keyWord()
            count = self.compileClassVarDec()

            # Count the fields to determine the size of the object
            if kw == "field":
                self.nFields += count

        # Expect zero or more subroutineDecs
        while t.tokenType() == "keyword" and t.keyWord() in [
            "constructor",
            "function",
            "method",
        ]:
            self.compileSubroutine()

        self.eatAndEmit("symbol", ["}"])
        self.emit(xml="</class>")

        # Should not be any more input
        if self.tokenizer.hasMoreTokens():
            raise SyntaxError(
                "Token after end of class: " + self.tokenizer.currentToken
            )

        # Close the VMWriter
        self.writer.close()

    def compileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        Should only be called if keyword static or keyword field is the current
        token.
        """
        self.emit(xml="<classVarDec>")

        # Need to save the variable kind for the symbol table
        token = self.eat("keyword", ["static", "field"])
        (_, varKind) = token
        varKind = varKind.upper()
        self.emit(token=token)

        # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a
        # className (identifier).
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
        else:
            (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE")

        self.eatAndEmit("identifier", category=varKind, varType=varType, state="DEFINE")
        count = 1

        # Expect an optional list of identifiers.
        while t.tokenType() == "symbol" and t.symbol() == ",":
            self.eatAndEmit("symbol", [","])
            self.eatAndEmit("identifier", category=varKind, state="DEFINE")
            count += 1

        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</classVarDec>")

        return count

    def compileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        Should only be called if the current token is one of 'constructor',
        'function', or 'method'.
        """
        self.emit(xml="<subroutineDec>")
        (_, kw) = self.eatAndEmit("keyword", ["constructor", "function", "method"])

        # Reset the subroutine symbol table
        self.symtab.startSubroutine()

        # If this is a method, seed the symbol table with "this" as argument 0
        if kw == "method":
            self.symtab.define("this", self.thisClass, "ARG")

        # Expect 'void' or a type: one of the keywords 'int', 'char', or
        # 'boolean', or a className (identifier).
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            self.eatAndEmit("keyword", ["void", "int", "char", "boolean"])
        else:
            self.eatAndEmit("identifier", category="CLASS", state="USE")

        (_, functionName) = self.eatAndEmit(
            "identifier", category="SUBROUTINE", state="DEFINE"
        )

        self.eatAndEmit("symbol", ["("])
        self.compileParameterList()
        self.eatAndEmit("symbol", [")"])
        self.emit(xml="<subroutineBody>")
        self.eatAndEmit("symbol", ["{"])

        # Expect varDec*. Count the number of local variables.
        nLocals = 0
        while t.tokenType() == "keyword" and t.keyWord() == "var":
            nLocals += self.compileVarDec()

        # Generate the VM code to start the function.
        self.writer.writeFunction("{}.{}".format(self.thisClass, functionName), nLocals)

        # If this subroutine is a constructor, allocate memory for the new object and set the base of the this segment
        if kw == "constructor":
            self.writer.writePush("CONST", self.nFields)
            self.writer.writeCall("Memory.alloc", 1)
            self.writer.writePop("POINTER", 0)

        # If this subroutine is a method, set the base of the this segment
        if kw == "method":
            self.writer.writePush("ARG", 0)
            self.writer.writePop("POINTER", 0)

        # Compile the code of the function
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])
        self.emit(xml="</subroutineBody>")
        self.emit(xml="</subroutineDec>")

    def compileParameterList(self):
        """
        Compiles a (possibly empty) parameter list, not including the
        enclosing '( )'.
        """
        self.emit(xml="<parameterList>")

        # Alias for tokenizer
        t = self.tokenizer

        # Get the current token type
        tType = t.tokenType()

        # Expect a type: one of the keywords 'int', 'char', or 'boolean', or a
        # className (identifier).
        finished = False
        while not finished and tType in ["keyword", "identifier"]:
            if tType == "keyword":
                (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
            else:
                (_, varType) = self.eatAndEmit(
                    "identifier", category="CLASS", state="USE"
                )

            self.eatAndEmit(
                "identifier", category="ARG", state="DEFINE", varType=varType
            )

            # Look for a ',' symbol
            if t.tokenType() == "symbol" and t.symbol() == ",":
                # If found, eat it
                self.eatAndEmit("symbol", [","])

                # Get the next token type
                tType = t.tokenType()
            else:
                finished = True

        self.emit(xml="</parameterList>")

    def compileVarDec(self):
        """
        Compiles a var declaration.
        """
        self.emit(xml="<varDec>")
        self.eatAndEmit("keyword", ["var"])

        # Expect a type for the variable: one of the keywords 'int', 'char',
        # or 'boolean', or a className (identifier). Save the variable type.
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            (_, varType) = self.eatAndEmit("keyword", ["int", "char", "boolean"])
        else:
            (_, varType) = self.eatAndEmit("identifier", category="CLASS", state="USE")

        self.eatAndEmit("identifier", category="VAR", state="DEFINE", varType=varType)
        nVars = 1

        # Expect an optional list of identifiers.
        while t.tokenType() == "symbol" and t.symbol() == ",":
            self.eatAndEmit("symbol", [","])
            self.eatAndEmit(
                "identifier", category="VAR", state="DEFINE", varType=varType
            )
            nVars += 1

        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</varDec>")

        return nVars

    def compileStatements(self):
        """
        Compiles a sequence of statements, not including the enclosing
        '{ }'.
        """
        self.emit(xml="<statements>")

        t = self.tokenizer
        while t.tokenType() == "keyword":
            keyword = t.keyWord()
            if keyword == "do":
                self.compileDo()
            elif keyword == "let":
                self.compileLet()
            elif keyword == "while":
                self.compileWhile()
            elif keyword == "return":
                self.compileReturn()
            elif keyword == "if":
                self.compileIf()
            else:
                raise SyntaxError(
                    "Expected statement. Found {}.".format(t.currentToken)
                )

        self.emit(xml="</statements>")

    def compileDo(self):
        """
        Compiles a do statement.
        """
        self.emit(xml="<doStatement>")
        self.eatAndEmit("keyword", ["do"])

        # Eat the identifier. Can't emit until we know if this is a class or a subroutine.
        token = self.eat("identifier")
        (_, ident) = token

        # Check for a '.', which indicates a method call
        t = self.tokenizer
        if t.tokenType() == "symbol" and t.symbol() == ".":
            self.eatAndEmit("symbol", ["."])
            # Previous token was an object or a class. Check symbol table.
            objType = self.symtab.typeOf(ident)
            if objType:
                # ident is an object, so method is objType.method, and the object must be loaded into this as argument 0
                self.emit(token=token, category=self.symtab.kindOf(ident), state="USE")

                # subroutine starts with the class type
                subroutine = objType

                # Add an argument to the stack for "this"
                nArgs = 1
                kind = self.symtab.kindOf(ident)
                index = self.symtab.indexOf(ident)
                self.writer.writePush(kind, index)
            else:
                # ident is a class, so method is ident.method and there is no this
                self.emit(token=token, category="CLASS", state="USE")
                subroutine = ident
                nArgs = 0

            methodToken = self.eat("identifier")
            (_, method) = methodToken
            self.emit(token=methodToken, category="METHOD", state="USE")
            subroutine += "." + method
        else:
            # Bare subroutine calls are assumed to be methods of the current class
            self.emit(token=token, category="SUBROUTINE", state="USE")
            subroutine = self.thisClass + "." + ident

            # Add "this" to the stack
            nArgs = 1
            self.writer.writePush("POINTER", 0)

        self.eatAndEmit("symbol", ["("])
        nArgs += self.compileExpressionList()
        self.eatAndEmit("symbol", [")"])
        self.eatAndEmit("symbol", [";"])

        # Call the desired subroutine and consume the returned value
        self.writer.writeCall(subroutine, nArgs)
        self.writer.writePop("TEMP", 0)

        self.emit(xml="</doStatement>")

    def compileLet(self):
        """
        Compiles a let statement.
        """
        self.emit(xml="<letStatement>")
        self.eatAndEmit("keyword", ["let"])
        (_, varName) = self.eatAndEmit("identifier", category="LET", state="USE")

        # Look up the variable in the symbol table
        varKind = self.symtab.kindOf(varName)
        varIndex = self.symtab.indexOf(varName)

        # Check for array qualifier
        t = self.tokenizer
        arrayRef = False
        if t.tokenType() == "symbol" and t.symbol() == "[":
            # Compute the offset
            self.eatAndEmit("symbol", "[")
            self.compileExpression()
            self.eatAndEmit("symbol", ["]"])

            # Add the offset to the base. Leave the result on the stack.
            self.writer.writePush(varKind, varIndex)
            self.writer.writeArithmetic("+")
            arrayRef = True

        self.eatAndEmit("symbol", ["="])
        self.compileExpression()
        self.eatAndEmit("symbol", [";"])

        # Value to save is at the top of the stack.
        if not arrayRef:
            # Direct POP
            self.writer.writePop(varKind, varIndex)
        else:
            # Array reference. Save value temporarily while setting THAT.
            self.writer.writePop("TEMP", 0)
            self.writer.writePop("POINTER", 1)
            self.writer.writePush("TEMP", 0)
            self.writer.writePop("THAT", 0)

        self.emit(xml="</letStatement>")

    def compileWhile(self):
        """
        Compiles a while statement.
        """
        self.emit(xml="<whileStatement>")
        self.eatAndEmit("keyword", ["while"])
        
        whileInstance = self.whileCounter
        self.whileCounter += 1
        self.writer.writeLabel("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance))

        self.eatAndEmit("symbol", ["("])
        self.compileExpression()
        self.eatAndEmit("symbol", [")"])

        self.writer.writeArithmetic("U~")
        self.writer.writeIf("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance))

        self.eatAndEmit("symbol", ["{"])
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])

        self.writer.writeGoto("WHILE.{}.{}.EXP".format(self.thisClass, whileInstance))
        self.writer.writeLabel("WHILE.{}.{}.EXIT".format(self.thisClass, whileInstance))

        self.emit(xml="</whileStatement>")

    def compileReturn(self):
        """
        Compiles a return statement.
        """
        self.emit(xml="<returnStatement>")
        self.eatAndEmit("keyword", ["return"])

        # If not a ';', expect an expression
        t = self.tokenizer
        if not (t.tokenType() == "symbol" and t.symbol() == ";"):
            # Expect an expression
            self.compileExpression()
        else:
            # void function, so force a 0 onto the stack to return
            self.writer.writePush("CONST", 0)

        self.writer.writeReturn()
        self.eatAndEmit("symbol", [";"])
        self.emit(xml="</returnStatement>")

    def compileIf(self):
        """
        Compiles an if statement, possibly with a trailing else
        clause.
        """
        self.emit(xml="<ifStatement>")
        self.eatAndEmit("keyword", ["if"])
        self.eatAndEmit("symbol", ["("])
        self.compileExpression()
        self.eatAndEmit("symbol", [")"])

        self.writer.writeArithmetic("U~")
        ifInstance = self.ifCounter
        self.ifCounter += 1
        self.writer.writeIf("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))

        self.eatAndEmit("symbol", ["{"])
        self.compileStatements()
        self.eatAndEmit("symbol", ["}"])

        t = self.tokenizer
        if t.tokenType() == "keyword" and t.keyWord() == "else":
            self.writer.writeGoto("IF.{}.{}.EXIT".format(self.thisClass, ifInstance))
            self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))

            self.eatAndEmit("keyword", ["else"])
            self.eatAndEmit("symbol", ["{"])
            self.compileStatements()
            self.eatAndEmit("symbol", ["}"])
            self.writer.writeLabel("IF.{}.{}.EXIT".format(self.thisClass, ifInstance))
        else:
            self.writer.writeLabel("IF.{}.{}.ELSE".format(self.thisClass, ifInstance))


        self.emit(xml="</ifStatement>")

    def compileExpression(self):
        """
        Compiles an expression.
        """
        self.emit(xml="<expression>")
        self.compileTerm()

        # Look for operator-term pairs
        t = self.tokenizer
        ops = ["+", "-", "*", "/", "&", "|", "<", ">", "="]
        while t.tokenType() == "symbol" and t.symbol() in ops:
            (_, op) = self.eatAndEmit("symbol", ops)
            self.compileTerm()
            self.writer.writeArithmetic(op)

        self.emit(xml="</expression>")

    def compileTerm(self):
        """
        Compiles a term. This routine is faced with a slight difficulty when
        trying to decide between some of the alternative parsing rules.
        Specifically, if the current token is an identifier, the routine must
        distinguish between a variable, an array entry, and a subroutine call.
        A single lookahead token, which may be one of '[', '(', or '.',
        suffices to distinguish between the three possibilities. Any other
        token is not part of this term and should not be advanced over.
        """
        self.emit(xml="<term>")

        # Get the current token type
        t = self.tokenizer
        tType = t.tokenType()

        # Integer constant
        if tType == "integerConstant":
            (_, value) = self.eatAndEmit("integerConstant")
            self.writer.writePush("CONST", value)
        # String constant
        elif tType == "stringConstant":
            (_, value) = self.eatAndEmit("stringConstant")
            # Declare space for the string
            self.writer.writePush("CONST", len(value))
            self.writer.writeCall("String.new", 1)
            # Save the contents of the string
            for c in value:
                self.writer.writePush("CONST", ord(c))
                self.writer.writeCall("String.appendChar", 2)
        # Keyword constant
        elif tType == "keyword" and t.keyWord() in ["true", "false", "null", "this"]:
            (_, kw) = self.eatAndEmit("keyword", ["true", "false", "null", "this"])
            if kw in ["null", "false"]:
                # Map to 0
                self.writer.writePush("CONST", 0)
            elif kw == "true":
                # Map to -1
                self.writer.writePush("CONST", 1)
                self.writer.writeArithmetic("U-")  # NEG
            else:
                # this
                self.writer.writePush("POINTER", 0)
        # Identifier (varName, or array name, or subroutine call)
        elif tType == "identifier":
            (_, ident) = self.eatAndEmit("identifier", category="TERM", state="USE")
            if t.tokenType() == "symbol":
                symbol = t.symbol()
                if symbol == "[":
                    # Array reference
                    # ident is the array name
                    # Compute the offset
                    self.eatAndEmit("symbol", ["["])
                    self.compileExpression()
                    self.eatAndEmit("symbol", ["]"])
                    # Add base to offset
                    self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident))
                    self.writer.writeArithmetic("+")
                    # Update THAT and retrieve
                    self.writer.writePop("POINTER", 1)
                    self.writer.writePush("THAT", 0)
                elif symbol == "(":
                    # Subroutine call
                    # ident is the subroutine.
                    self.eatAndEmit("symbol", ["("])
                    nArgs = self.compileExpressionList()
                    self.eatAndEmit("symbol", [")"])
                    self.writer.writeCall(ident, nArgs)
                elif symbol == ".":
                    # Method call.
                    # ident is the class name (static method) or the object which will be argument 0 (this).

                    # Look up the object's type in the symbol table. If not found, then it is a class name and there is no object to be "this".
                    objType = self.symtab.typeOf(ident)
                    nArgs = 0
                    if objType is not None:
                        # Push this onto stack as argument 0
                        self.writer.writePush(self.symtab.kindOf(ident), self.symtab.indexOf(ident))
                        nArgs = 1
                    else:
                        # ident is the class name, so use it
                        objType = ident

                    self.eatAndEmit("symbol", ["."])
                    (_, method) = self.eatAndEmit(
                        "identifier", category="SUBROUTINE", state="USE"
                    )
                    self.eatAndEmit("symbol", ["("])
                    nArgs += self.compileExpressionList()
                    self.eatAndEmit("symbol", [")"])
                    self.writer.writeCall(objType + "." + method, nArgs)
                else:
                    # Next token not a symbol, so ident is a simple variable identifier.
                    varKind = self.symtab.kindOf(ident)
                    varIndex = self.symtab.indexOf(ident)
                    self.writer.writePush(varKind, varIndex)
        # Sub-expression
        elif tType == "symbol" and t.symbol() == "(":
            self.eatAndEmit("symbol", ["("])
            self.compileExpression()
            self.eatAndEmit("symbol", [")"])
        # Unary op and term
        elif tType == "symbol" and t.symbol() in ["-", "~"]:
            (_, op) = self.eatAndEmit("symbol", ["-", "~"])
            self.compileTerm()
            # Mark as unary to get right version of '-'
            self.writer.writeArithmetic("U" + op)
        else:
            # Not a term
            raise SyntaxError("Expected term, found {}.".format(t.currentToken))

        self.emit(xml="</term>")

    def compileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated list of expressions.
        Returns the number of expressions compiled.
        """
        self.emit(xml="<expressionList>")

        # Get the initial token type
        t = self.tokenizer
        tType = t.tokenType()

        # Count the expressions in the list
        nExpressions = 0

        # Closing parenthesis ends the list
        while not (tType == "symbol" and t.symbol() == ")"):
            nExpressions += 1
            self.compileExpression()

            # Expect an optional ','
            if t.tokenType() == "symbol" and t.symbol() == ",":
                self.eatAndEmit("symbol", [","])

            # Update the tType
            tType = t.tokenType()

        self.emit(xml="</expressionList>")

        return nExpressions

    def eat(self, tokenType, tokenVals=None):
        """
        Consume the current token if it matches the expected type and value.
        """
        # Get the type and value of the current token
        t = self.tokenizer
        tType = t.tokenType()
        if tType == "keyword":
            tVal = t.keyWord()
        elif tType == "symbol":
            tVal = t.symbol()
        elif tType == "identifier":
            tVal = t.identifier()
        elif tType == "integerConstant":
            tVal = t.intVal()
        else:  # tType == 'stringConstant'
            tVal = t.stringVal()

        # Verify that the type matches and the value is one of the values
        # expected.
        if not (tType == tokenType and (not tokenVals or tVal in tokenVals)):
            raise SyntaxError(
                "Expected {} {}. Found {}.".format(
                    tokenType, " or ".join(tokenVals or []), t.currentToken
                )
            )

        if t.hasMoreTokens():
            t.advance()

        # Return the actual token type and value
        return (tType, tVal)

    def emit(self, token=None, category=None, state=None, varType=None, xml=None):
        """
        Emit the provided XML or token as XML to the xmlFile.
        Will indent based on the current indentLevel.
        """
        # If XML code not provided, create it from the token type and value
        if not xml:
            (tokenType, tokenVal) = token

            # Handle symbol table additions/lookups
            index = None
            if state == "DEFINE" and category in ["STATIC", "FIELD", "ARG", "VAR"]:
                index = self.symtab.define(tokenVal, varType, category)

            if state == "USE" and category in ["LET", "TERM"]:
                category = self.symtab.kindOf(tokenVal)
                if category:
                    varType = self.symtab.typeOf(tokenVal)
                    index = self.symtab.indexOf(tokenVal)
                else:
                    category = "CLASS OR SUBROUTINE"

            # Define additional output fields
            fields = ""
            if category is not None:
                fields += " category={}".format(category)
            if state is not None:
                fields += " state={}".format(state)
            if varType is not None:
                fields += " varType={}".format(varType)
            if index is not None:
                fields += " index={}".format(index)

            xml = "<{0}{2}>{1}</{0}>".format(
                tokenType, self.xmlProtect(tokenVal), fields
            )

        else:
            # If the XML starts with '</', reduce the indent level
            if xml[:2] == "</":
                self.indentLevel = self.indentLevel - 1

        # Output the XML, indented to the current level
        output = "{}{}\n".format(self.INDENT * self.indentLevel, xml)
        self.writer.writeComment(output)
        if self.DEBUG:
            print(output, end="")

        # If the XML does not contain '</', increase the indent level
        if "</" not in xml:
            self.indentLevel = self.indentLevel + 1

    def eatAndEmit(
        self, tokenType, tokenVals=None, category=None, state=None, varType=None
    ):
        """
        Shorthand for common pattern of eat and emit. Returns the token eaten.
        """
        token = self.eat(tokenType, tokenVals)
        self.emit(token=token, category=category, state=state, varType=varType)

        # Return the token in case the caller wants it
        return token

    def xmlProtect(self, token):
        # Protect <, >, and & tokens from XML
        if token == "<":
            return "&lt;"
        elif token == ">":
            return "&gt;"
        elif token == "&":
            return "&amp;"
        else:
            return token
コード例 #18
0
class CompilationEngine:
    def __init__(self, filepath):
        file = filepath.replace('.jack', '.xml')
        self._outputFile = open(file, 'w')
        self._tokenizer = JackTokenizer(filepath)
        self._openedNonTerminalElements = []
        self._currentToken = None

    def constructParseTree(self):
        self._compileClass()
        self._outputFile.close()

    #compile functions
    def _compileClass(self):
        self._eatObligatory([T_KEYWORD], [K_CLASS])
        self._openNonTerminalElement(K_CLASS, eraseToken=False)
        self._writeTerminalElement()

        self._eatObligatory([T_IDENTIFIER])
        self._writeTerminalElement()

        self._eatObligatory([T_SYMBOL], ['{'])
        self._writeTerminalElement()

        self._compileClassVarDeclarations()

        self._compileSubroutineDeclarations()

        self._eatObligatory([T_SYMBOL], ['}'])
        self._writeTerminalElement()

        self._closeNonTerminalElement(K_CLASS)

    def _compileClassVarDeclarations(self):
        while self._eatExpected([T_KEYWORD], [K_STATIC, K_FIELD]):
            self._openNonTerminalElement(NON_TERMINAL_CLASS_VAR_DEC,
                                         eraseToken=False)
            self._writeTerminalElement()

            self._compileTypedVarDeclaration()

            while self._eatExpected([T_SYMBOL], [',']):
                self._writeTerminalElement()
                self._eatObligatory([T_IDENTIFIER])
                self._writeTerminalElement()

            self._eatObligatory([T_SYMBOL], [';'])
            self._writeTerminalElement()

            self._closeNonTerminalElement(NON_TERMINAL_CLASS_VAR_DEC)
        return

    def _compileSubroutineDeclarations(self):
        while self._eatExpected([T_KEYWORD],
                                [K_CONSTRUCTOR, K_FUNCTION, K_METHOD]):
            self._openNonTerminalElement(NON_TERMINAL_SUB_DEC,
                                         eraseToken=False)
            self._writeTerminalElement()

            self._eatObligatory([T_KEYWORD, T_IDENTIFIER],
                                [K_INT, K_CHAR, K_BOOLEAN, K_VOID])
            self._writeTerminalElement()

            self._eatObligatory([T_IDENTIFIER])
            self._writeTerminalElement()

            self._eatObligatory([T_SYMBOL], ['('])
            self._writeTerminalElement()

            self._compileParameterList()

            self._eatObligatory([T_SYMBOL], [')'])
            self._writeTerminalElement()

            self._compileSubroutineBody()

            self._closeNonTerminalElement(NON_TERMINAL_SUB_DEC)

    def _compileParameterList(self):
        self._openNonTerminalElement(NON_TERMINAL_PARAM_LIST)
        if self._eatExpected([T_KEYWORD, T_IDENTIFIER],
                             [K_INT, K_CHAR, K_BOOLEAN]):
            self._writeTerminalElement()

            self._eatObligatory([T_IDENTIFIER])
            self._writeTerminalElement()

            while self._eatExpected([T_SYMBOL], [',']):
                self._writeTerminalElement()
                self._compileTypedVarDeclaration()
        self._closeNonTerminalElement(NON_TERMINAL_PARAM_LIST)
        return

    def _compileSubroutineBody(self):
        self._openNonTerminalElement(NON_TERMINAL_SUB_BODY)
        self._eatObligatory([T_SYMBOL], ['{'])
        self._writeTerminalElement()

        self._compileVarDeclaration()

        self._compileStatements()

        self._eatObligatory([T_SYMBOL], ['}'])
        self._writeTerminalElement()
        self._closeNonTerminalElement(NON_TERMINAL_SUB_BODY)

    def _compileVarDeclaration(self):
        while self._eatExpected([T_KEYWORD], [K_VAR]):
            self._openNonTerminalElement(NON_TERMINAL_VAR_DEC,
                                         eraseToken=False)
            self._writeTerminalElement()

            self._compileTypedVarDeclaration()

            while self._eatExpected([T_SYMBOL], [',']):
                self._writeTerminalElement()
                self._eatObligatory([T_IDENTIFIER])
                self._writeTerminalElement()

            self._eatObligatory([T_SYMBOL], [';'])
            self._writeTerminalElement()

            self._closeNonTerminalElement(NON_TERMINAL_VAR_DEC)

    def _compileStatements(self):
        self._openNonTerminalElement(NON_TERMINAL_STATEMENTS, eraseToken=False)
        while self._eatExpected([T_KEYWORD],
                                [K_LET, K_IF, K_WHILE, K_DO, K_RETURN]):
            self._compileStatementByKeyword()

        self._closeNonTerminalElement(NON_TERMINAL_STATEMENTS)
        return

    def _compileLetStatement(self):
        self._eatObligatory([T_IDENTIFIER])
        self._writeTerminalElement()

        if self._eatExpected([T_SYMBOL], ['[']):
            self._writeTerminalElement()

            self._compileExpression()

            self._eatObligatory([T_SYMBOL], [']'])
            self._writeTerminalElement()

        self._eatObligatory([T_SYMBOL], ['='])
        self._writeTerminalElement()

        self._compileExpression()

        self._eatObligatory([T_SYMBOL], [';'])
        self._writeTerminalElement()

    def _compileIfStatement(self):
        self._compileConditionalStatementBody()

        if self._eatExpected([T_KEYWORD], [K_ELSE]):
            self._writeTerminalElement()

            self._eatObligatory([T_SYMBOL], ['{'])
            self._writeTerminalElement()

            self._compileStatements()

            self._eatObligatory([T_SYMBOL], ['}'])
            self._writeTerminalElement()

    def _compileWhileStatement(self):
        self._compileConditionalStatementBody()

    def _compileDoStatement(self):
        self._compileSubroutineCall(calledFromDoStatement=True)

        self._eatObligatory([T_SYMBOL], [';'])
        self._writeTerminalElement()

    def _compileReturnStatement(self):
        if self._eatExpected([T_SYMBOL], [';']):
            self._writeTerminalElement()
        else:
            self._compileExpression()

            self._eatObligatory([T_SYMBOL], [';'])
            self._writeTerminalElement()

    def _compileExpression(self):
        self._openNonTerminalElement(NON_TERMINAL_EXPRESSION, eraseToken=False)

        self._compileTerm()

        if self._eatExpected([T_SYMBOL],
                             ['+', '-', '*', '/', '&', '|', '<', '>', '=']):
            self._writeTerminalElement()

            self._compileTerm()

        self._closeNonTerminalElement(NON_TERMINAL_EXPRESSION)

    def _compileTerm(self):
        self._openNonTerminalElement(NON_TERMINAL_TERM, eraseToken=False)

        requiredTypes = [
            T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD, T_IDENTIFIER,
            T_SYMBOL
        ]
        requiredValues = [K_TRUE, K_FALSE, K_NULL, K_THIS, '(', '-', '~']
        self._eatObligatory(requiredTypes, requiredValues)

        if self._currentToken['type'] in [
                T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD
        ]:
            self._writeTerminalElement()

        elif self._currentToken['type'] == T_SYMBOL:
            symbol = self._currentToken['value']
            self._writeTerminalElement()

            if symbol == '(':
                self._compileExpression()

                self._eatObligatory([T_SYMBOL], [')'])
                self._writeTerminalElement()
            else:
                self._compileTerm()

        elif self._currentToken['type'] == T_IDENTIFIER:
            self._writeTerminalElement()

            if self._eatExpected([T_SYMBOL], ['[', '.', '(']):
                symbol = self._currentToken['value']

                if symbol == '[':
                    self._writeTerminalElement()

                    self._compileExpression()

                    self._eatObligatory([T_SYMBOL], [']'])
                    self._writeTerminalElement()
                else:
                    self._compileSubroutineCall()

        self._closeNonTerminalElement(NON_TERMINAL_TERM)

    def _compileExpressionList(self):
        self._openNonTerminalElement(NON_TERMINAL_EXPRESSION_LIST)

        if not self._eatExpected([T_SYMBOL], [')']):
            self._compileExpression()

            while self._eatExpected([T_SYMBOL], [',']):
                self._writeTerminalElement()

                self._compileExpression()

        self._closeNonTerminalElement(NON_TERMINAL_EXPRESSION_LIST)

    #aux compile functions
    def _compileTypedVarDeclaration(self):
        self._eatObligatory([T_KEYWORD, T_IDENTIFIER],
                            [K_INT, K_CHAR, K_BOOLEAN])
        self._writeTerminalElement()

        self._eatObligatory([T_IDENTIFIER])
        self._writeTerminalElement()

    def _compileStatementByKeyword(self):
        COMPILE_FUNCTION_BY_KEYWORD = {
            K_LET: self._compileLetStatement,
            K_IF: self._compileIfStatement,
            K_WHILE: self._compileWhileStatement,
            K_DO: self._compileDoStatement,
            K_RETURN: self._compileReturnStatement
        }

        keyword = self._currentToken['value']
        self._openNonTerminalElement(keyword + NON_TERMINAL_STATEMENT,
                                     eraseToken=False)
        self._writeTerminalElement()

        COMPILE_FUNCTION_BY_KEYWORD[keyword]()

        self._closeNonTerminalElement(keyword + NON_TERMINAL_STATEMENT)

    def _compileConditionalStatementBody(self):
        self._eatObligatory([T_SYMBOL], ['('])
        self._writeTerminalElement()

        self._compileExpression()

        self._eatObligatory([T_SYMBOL], [')'])
        self._writeTerminalElement()

        self._eatObligatory([T_SYMBOL], ['{'])
        self._writeTerminalElement()

        self._compileStatements()

        self._eatObligatory([T_SYMBOL], ['}'])
        self._writeTerminalElement()

    def _compileSubroutineCall(self, calledFromDoStatement=False):
        if calledFromDoStatement:
            self._eatObligatory([T_IDENTIFIER])
            self._writeTerminalElement()

        if self._eatExpected([T_SYMBOL], ['.']):
            self._writeTerminalElement()

            self._eatObligatory([T_IDENTIFIER])
            self._writeTerminalElement()

        self._eatObligatory([T_SYMBOL], ['('])
        self._writeTerminalElement()

        self._compileExpressionList()

        self._eatObligatory([T_SYMBOL], [')'])
        self._writeTerminalElement()

    #aux functions
    def _eatObligatory(self, requiredTokenTypes, requiredTokenValues=[]):
        if self._currentToken is None and not self._tokenizer.hasMoreTokens():
            self._outputFile.write(
                '-- COMPILATION ERROR -> MORE TOKENS EXPECTED!! --')
            self._outputFile.close()
            exit(1)

        self._currentToken = self._currentToken or self._tokenizer.advance()
        if (self._currentToken['type'] not in requiredTokenTypes or
            (self._currentToken['type'] in TOKEN_TYPES_WITH_EXPECTABLE_VALUES
             and len(requiredTokenValues) > 0
             and self._currentToken['value'] not in requiredTokenValues)):
            self._outputFile.write('-- COMPILATION ERROR -> WRONG SYNTAX!! --')
            self._outputFile.close()
            exit(1)

    def _eatExpected(self, expectedTokenTypes, expectedTokenValues=[]):
        self._currentToken = self._currentToken or self._tokenizer.advance()

        return (self._currentToken['type'] in expectedTokenTypes
                and (self._currentToken['type']
                     not in TOKEN_TYPES_WITH_EXPECTABLE_VALUES
                     or len(expectedTokenValues) == 0
                     or self._currentToken['value'] in expectedTokenValues))

    def _openNonTerminalElement(self,
                                element,
                                isNonTerminalElementUnique=False,
                                eraseToken=True):
        if not isNonTerminalElementUnique or element not in self._openedNonTerminalElements:
            self._outputFile.write(f'<{element}>\n')
            self._openedNonTerminalElements.append(element)
            self._currentToken = None if eraseToken else self._currentToken

    def _closeNonTerminalElement(self, element):
        if element in self._openedNonTerminalElements:
            self._outputFile.write(f'</{element}>\n')
            self._openedNonTerminalElements.remove(element)

    def _writeTerminalElement(self):
        XML_TRANSLATOR = {
            '<': '&lt;',
            '>': '&gt;',
            '&': '&amp;',
            '"': '&quot;'
        }

        tokenType, tokenValue = self._currentToken.values()
        tokenValue = XML_TRANSLATOR[
            tokenValue] if tokenValue in XML_TRANSLATOR else tokenValue.replace(
                '"', '')
        self._outputFile.write(
            f'<{TERMINAL_ELEMENT_BY_TOKEN_TYPE[tokenType]}>')
        self._outputFile.write(f' {tokenValue} ')
        self._outputFile.write(
            f'</{TERMINAL_ELEMENT_BY_TOKEN_TYPE[tokenType]}>\n')
        self._currentToken = None
コード例 #19
0
ファイル: TesteTokenizer.py プロジェクト: wolterpericles/Jack
# Teste de JackTokenizer

from JackTokenizer import JackTokenizer

tknz = JackTokenizer('Main.jack')
tknz.advance()
print("<tokens>")
while (tknz.hasMoreTokens()):
    classeToken = tknz.tagToken()
    print(classeToken)
    tknz.advance()
print("</tokens>")
コード例 #20
0
class CompilationEngine:
    def __init__(self, filepath):
        self._tokenizer = JackTokenizer(filepath) 
        self._writer = VMWriter(filepath)
        self._classVariables = SymbolTable()
        self._subroutineVariables = SymbolTable()
        self._currentToken = None
        self._preserveCurrentToken = False
        self._className = ''
        self._currentCompilingFunction = {'kind': '', 'name': ''}
        self._numberConditionalsStatementsCurrentFunction = 0

    def run(self):
        self._compileClass()
        self._writer.close()
        return

    #compile functions 
    def _compileClass(self):
        self._eatObligatory([T_KEYWORD], [K_CLASS])
        self._eatObligatory([T_IDENTIFIER])
        self._className = self._currentToken['value']

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileClassVarDeclarations()
        self._compileSubroutineDeclarations()
        self._eatObligatory([T_SYMBOL], ['}'])
        return

    def _compileClassVarDeclarations(self):
        self._classVariables.startSubroutine()

        while self._eatExpected([T_KEYWORD], [K_STATIC, K_FIELD]):
            kind = VAR_STATIC if self._currentToken['value'] == K_STATIC else VAR_FIELD
            varType, name = self._compileTypedVarDeclaration()
            self._classVariables.insert(name, varType, kind)

            while self._eatExpected([T_SYMBOL], [',']):
                self._eatObligatory([T_IDENTIFIER])
                name = self._currentToken['value']
                self._classVariables.insert(name, varType, kind)

            self._eatObligatory([T_SYMBOL], [';'])
        return

    def _compileSubroutineDeclarations(self):
        while self._eatExpected([T_KEYWORD], [K_CONSTRUCTOR, K_FUNCTION, K_METHOD]):
            self._currentCompilingFunction['kind'] = self._currentToken['value']
            self._subroutineVariables.startSubroutine()

            self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN, K_VOID])
            self._eatObligatory([T_IDENTIFIER])
            self._currentCompilingFunction['name'] = self._currentToken['value']

            self._eatObligatory([T_SYMBOL], ['('])
            self._compileParameterList()
            self._eatObligatory([T_SYMBOL], [')'])
            self._compileSubroutineBody()
        return

    def _compileParameterList(self):
        if self._eatExpected([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN]):
            varType = self._currentToken['value']
            self._eatObligatory([T_IDENTIFIER])
            name = self._currentToken['value']
            self._subroutineVariables.insert(name, varType, VAR_ARG)
            
            while self._eatExpected([T_SYMBOL], [',']):
                varType, name = self._compileTypedVarDeclaration()
                self._subroutineVariables.insert(name, varType, VAR_ARG)
        return

    def _compileSubroutineBody(self):
        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileVarDeclaration()

        funcName = self._className + '.' + self._currentCompilingFunction['name']
        nLocalVars = self._subroutineVariables.getVarCountByKind(VAR_LOCAL)
        self._writer.writeFunction(funcName, nLocalVars)

        self._numberConditionalsStatementsCurrentFunction = 0
        if self._currentCompilingFunction['kind'] == K_CONSTRUCTOR: self._compileConstructorCode()
        elif self._currentCompilingFunction['kind'] == K_METHOD: self._compileMethodCode()
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        return

    def _compileVarDeclaration(self):
        while self._eatExpected([T_KEYWORD], [K_VAR]):
            varType, name = self._compileTypedVarDeclaration()
            self._subroutineVariables.insert(name, varType, VAR_LOCAL)
            while self._eatExpected([T_SYMBOL], [',']):
                self._eatObligatory([T_IDENTIFIER])
                name = self._currentToken['value']
                self._subroutineVariables.insert(name, varType, VAR_LOCAL)
            self._eatObligatory([T_SYMBOL], [';'])
        return


    def _compileStatements(self):
        while self._eatExpected([T_KEYWORD], [K_LET, K_IF, K_WHILE, K_DO, K_RETURN]):
            self._compileStatementByKeyword()
        return

    def _compileLetStatement(self):
        self._eatObligatory([T_IDENTIFIER])
        name = self._currentToken['value']
        segment, index = self._searchVariableByName(name)
        
        isArrayAssignment = False
        if self._eatExpected([T_SYMBOL], ['[']):
            self._compileArrayPosition(name)
            isArrayAssignment = True

        self._eatObligatory([T_SYMBOL], ['='])
        self._compileExpression()
        if isArrayAssignment:
            self._writer.writePop(SEGMENT_TEMP, 0)
            self._writer.writePop(SEGMENT_POINTER, 1)
            self._writer.writePush(SEGMENT_TEMP, 0)
            self._writer.writePop(SEGMENT_THAT, 0)
        else:
            self._writer.writePop(segment, index)
        self._eatObligatory([T_SYMBOL], [';'])
        return

    def _compileIfStatement(self):
        funcName = self._className + '.' + self._currentCompilingFunction['name']
        notIfLabel = f'{funcName}_NOT_IF_{self._numberConditionalsStatementsCurrentFunction}' 
        endComparisonLabel = f'{funcName}_END_COMPARISON_BLOCK_{self._numberConditionalsStatementsCurrentFunction}'
        self._numberConditionalsStatementsCurrentFunction += 1
        
        self._eatObligatory([T_SYMBOL], ['('])
        self._compileExpression()
        self._writer.writeArithmetic('not')
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeIf(notIfLabel)

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeGoto(endComparisonLabel)

        self._writer.writeLabel(notIfLabel)
        if self._eatExpected([T_KEYWORD], [K_ELSE]):
            self._eatObligatory([T_SYMBOL], ['{'])
            self._compileStatements()
            self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeLabel(endComparisonLabel)
        return

    def _compileWhileStatement(self):
        funcName = self._className + '.' + self._currentCompilingFunction['name']
        loopLabel = f'{funcName}_LOOP_{self._numberConditionalsStatementsCurrentFunction}' 
        endLoopLabel = f'{funcName}_END_LOOP_{self._numberConditionalsStatementsCurrentFunction}' 
        self._numberConditionalsStatementsCurrentFunction += 1

        self._writer.writeLabel(loopLabel)
        self._eatObligatory([T_SYMBOL], ['('])
        self._compileExpression()
        self._writer.writeArithmetic('not')
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeIf(endLoopLabel)

        self._eatObligatory([T_SYMBOL], ['{'])
        self._compileStatements()
        self._eatObligatory([T_SYMBOL], ['}'])
        self._writer.writeGoto(loopLabel)
        self._writer.writeLabel(endLoopLabel)
        return

    def _compileDoStatement(self):
        self._compileSubroutineCall()
        self._writer.writePop(SEGMENT_TEMP, 0)
        self._eatObligatory([T_SYMBOL], [';'])
        return
    
    def _compileReturnStatement(self):
        if self._eatExpected([T_SYMBOL], [';']):
            self._writer.writePush(SEGMENT_CONST, 0)
        else:
            self._compileExpression()
            self._eatObligatory([T_SYMBOL], [';'])
        self._writer.writeReturn()
        return

    def _compileExpression(self):
        self._compileTerm()
        if self._eatExpected([T_SYMBOL], ['+', '-', '*', '/', '&', '|', '<', '>', '=']):
            operator = self._currentToken['value']
            self._compileTerm()
            self._writer.writeArithmetic(VM_COMMAND_BY_JACK_OPERATOR[operator])
        return

    def _compileTerm(self):
        requiredTypes = [T_INTEGER_CONSTANT, T_STRING_CONSTANT, T_KEYWORD, T_IDENTIFIER, T_SYMBOL]
        requiredValues = [K_TRUE, K_FALSE, K_NULL, K_THIS, '(', '-', '~']
        self._eatObligatory(requiredTypes, requiredValues)
        tokenType = self._currentToken['type']
        
        if tokenType == T_INTEGER_CONSTANT:
            integer = self._currentToken['value']
            self._writer.writePush(SEGMENT_CONST, integer)

        elif tokenType == T_STRING_CONSTANT:
            stringConst = self._currentToken['value'].replace('"', '')
            self._writer.writePush(SEGMENT_CONST, len(stringConst))
            self._writer.writeCall('String.new', 1)
            for char in stringConst:
                self._writer.writePush(SEGMENT_CONST, ord(char))
                self._writer.writeCall('String.appendChar', 2)
        
        elif tokenType == T_KEYWORD:
            constant = self._currentToken['value']
            if constant == K_FALSE or constant == K_NULL:
                self._writer.writePush(SEGMENT_CONST, 0)
            elif constant == K_TRUE:
                self._writer.writePush(SEGMENT_CONST, 1)
                self._writer.writeArithmetic('neg')
            else:
                self._writer.writePush(SEGMENT_POINTER, 0)

        elif tokenType == T_SYMBOL:
            symbol = self._currentToken['value']
            if symbol == '(':
                self._compileExpression()
                self._eatObligatory([T_SYMBOL], [')'])
            else:
                unaryOperation = 'neg' if symbol == '-' else 'not'
                self._compileTerm()
                self._writer.writeArithmetic(unaryOperation)

        elif tokenType == T_IDENTIFIER:
            name = self._currentToken['value']
            if self._eatExpected([T_SYMBOL], ['[', '.', '(']):
                symbol = self._currentToken['value']

                if symbol == '[':
                    self._compileArrayPosition(name)
                    self._writer.writePop(SEGMENT_POINTER, 1)
                    self._writer.writePush(SEGMENT_THAT, 0)
                else:
                    self._preserveCurrentToken = True
                    self._compileSubroutineCall(name)
            else:
                segment, index = self._searchVariableByName(name)
                self._writer.writePush(segment, index)
        return

    def _compileExpressionList(self):
        nArgs = 0
        if not self._eatExpected([T_SYMBOL], [')']):
            self._compileExpression()
            nArgs += 1
            while self._eatExpected([T_SYMBOL], [',']):
                self._compileExpression()
                nArgs += 1
        self._preserveCurrentToken = True
        return nArgs

    #aux compile functions
    def _compileTypedVarDeclaration(self):
        self._eatObligatory([T_KEYWORD, T_IDENTIFIER], [K_INT, K_CHAR, K_BOOLEAN])
        varType = self._currentToken['value']
        self._eatObligatory([T_IDENTIFIER])
        name = self._currentToken['value']
        return varType, name

    def _compileStatementByKeyword(self):
        COMPILE_FUNCTION_BY_KEYWORD = {
            K_LET : self._compileLetStatement,
            K_IF : self._compileIfStatement,
            K_WHILE : self._compileWhileStatement,
            K_DO: self._compileDoStatement,
            K_RETURN : self._compileReturnStatement
        }

        keyword = self._currentToken['value']
        COMPILE_FUNCTION_BY_KEYWORD[keyword]()
        return

    def _compileSubroutineCall(self, name = None):
        if name is None:
            self._eatObligatory([T_IDENTIFIER])
            name = self._currentToken['value']
        
        nArgs = 0
        if self._eatExpected([T_SYMBOL], ['.']):
            self._eatObligatory([T_IDENTIFIER])
            funcName = self._currentToken["value"]
            varInfo = self._searchVariableByName(name)
            if varInfo is not None:
                segment, index = varInfo
                self._writer.writePush(segment, index)
                nArgs += 1
            else: 
                funcName = f'{name}.{funcName}'
        else:
            funcName = name
                
        self._eatObligatory([T_SYMBOL], ['('])
        nArgs += self._compileExpressionList()
        self._eatObligatory([T_SYMBOL], [')'])
        self._writer.writeCall(funcName, nArgs)
        return

    def _compileConstructorCode(self):
        nArgs = self._subroutineVariables.getVarCountByKind(VAR_ARG)
        self._writer.writePush(SEGMENT_CONST, nArgs)
        self._writer.writeCall('Memory.alloc', 1)
        self._writer.writePop(SEGMENT_POINTER, 0)
        return

    def _compileMethodCode(self):
        self._writer.writePush(SEGMENT_ARG, 0)
        self._writer.writePop(SEGMENT_POINTER, 0)
        return

    def _compileArrayPosition(self, arrName):
        arrayBaseAddr = self._searchVariableByName(arrName)
        segment, index = arrayBaseAddr

        self._writer.writePush(segment, index)
        self._compileExpression()
        self._writer.writeArithmetic('add')

        self._eatObligatory([T_SYMBOL], [']'])
        return

    #aux functions
    def _eatObligatory(self, requiredTokenTypes, requiredTokenValues = []):
        if not self._preserveCurrentToken and not self._tokenizer.hasMoreTokens():
            self._writer.writeCompilationError('MORE TOKENS EXPECTED!')
            exit(1)
            
        if self._preserveCurrentToken:
            self._preserveCurrentToken = False
        else:
            self._currentToken = self._tokenizer.advance()
        
        if (self._currentToken['type'] not in requiredTokenTypes or 
            (self._currentToken['type'] in TOKEN_TYPES_WITH_EXPECTABLE_VALUES and 
            len(requiredTokenValues) > 0 and self._currentToken['value'] not in requiredTokenValues)):
                self._writer.writeCompilationError(f'SYNTAX ERROR!')
                self._writer.writeCompilationError(f'TOKEN GIVEN: {self._currentToken}')
                self._writer.writeCompilationError(f'EXPECTED: {requiredTokenValues} in {requiredTokenTypes}')
                exit(1)
        return

    def _eatExpected(self, expectedTokenTypes, expectedTokenValues = []):
        self._currentToken = self._currentToken if self._preserveCurrentToken else self._tokenizer.advance()
        ateExpected = (self._currentToken['type'] in expectedTokenTypes and 
                (self._currentToken['type'] not in TOKEN_TYPES_WITH_EXPECTABLE_VALUES or 
                len(expectedTokenValues) == 0 or self._currentToken['value'] in expectedTokenValues)) 
        self._preserveCurrentToken = not ateExpected
        return ateExpected

    def _searchVariableByName(self, name):
        subroutineVar = self._subroutineVariables.getByName(name)
        if subroutineVar is not None:
            return subroutineVar['segment'], subroutineVar['index']
        classVar = self._classVariables.getByName(name)
        if classVar is not None:
            return classVar['segment'], classVar['index']
        return None 
コード例 #21
0
class CompilationEngine(object):
    """This class recursively compiles a .jack file into (eventually) vm code.
    For now, this just outputs a grammar xml file"""
    def __init__(self, inFile):
        super(CompilationEngine, self).__init__()
        # create an internal tokenizer to iterate through
        self.tokenizer = JackTokenizer(inFile)

        # spacing so I can make nicely formatted xml, this will increase by
        # 4 spaces every time I recurse
        self.spacing = ""

        # setup the output file
        self.outputPath = inFile.name.replace(".jack", ".xml")
        self.outputFile = open(self.outputPath, 'w')
        self.outputFile.close()
        self.outputFile = open(self.outputPath, 'a')

    def __increaseSpacing(self):
        """Adds 2 spaces to self.spacing"""
        self.spacing += "  "

    def __decreaseSpacing(self):
        """Removes 2 spaces from self.spacing"""
        self.spacing = self.spacing[:-2]

    def __writeFullTag(self, tag, value):
        """Writes the spacing, then <tag> value </tag> to the output file"""
        self.outputFile.write(self.spacing + "<" + tag + "> " + value + " </" +
                              tag + ">\n")

    def __writeOpenTag(self, tag):
        """Writes spacing, then <tag>, then increases the spacing"""
        self.outputFile.write(self.spacing + "<" + tag + ">\n")
        self.__increaseSpacing()

    def __writeCloseTag(self, tag):
        """Decreases spacing, then writes spacing, then </tag>"""
        self.__decreaseSpacing()
        self.outputFile.write(self.spacing + "</" + tag + ">\n")

    def start(self):
        """Starts the compilation by creating the token XML file
        and then calling __compileClass()"""

        # start the tokenizer
        self.tokenizer.advance()

        # make token xml file
        self.__createTokenXML()

        # reset tokenizer and compile
        self.tokenizer.reset()
        self.tokenizer.advance()
        self.__compileClass()

    def __createTokenXML(self):
        """Creates the token XML file for a .jack file"""
        outputPath = self.outputPath.replace(".xml", "T.xml")
        f = open(outputPath, 'w')
        f.close()
        f = open(outputPath, 'a')
        f.write("<tokens>\n")
        # make an output file that is filename but with testXML.xml at end
        while self.tokenizer.hasMoreTokens():
            # output to xml to check
            tokenType = self.tokenizer.tokenType()
            if tokenType == "KEYWORD":
                f.write("<keyword>" + self.tokenizer.keyWord() +
                        "</keyword>\n")
            elif tokenType == "SYMBOL":
                symbol = self.tokenizer.symbol()
                symbol = symbol.replace("&", "&amp;")
                symbol = symbol.replace("<", "&lt;")
                symbol = symbol.replace(">", "&gt;")
                symbol = symbol.replace("\"", "&quot;")
                f.write("<symbol>" + symbol + "</symbol>\n")
            elif tokenType == "IDENTIFIER":
                f.write("<identifier>" + self.tokenizer.identifier() +
                        "</identifier>\n")
            elif tokenType == "INT_CONST":
                f.write("<integerConstant>" + self.tokenizer.intVal() +
                        "</integerConstant>\n")
            elif tokenType == "STRING_CONST":
                f.write("<stringConstant>" + self.tokenizer.stringVal() +
                        "</stringConstant>\n")

            self.tokenizer.advance()

        # close the xml tag
        f.write("</tokens>")

    def __compileType(self):
        """Compiles a complete jack type grammar. Returns false if there is an error"""
        # check for valid keyword
        if self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()
            if k not in ["int", "char", "boolean"]:
                print("Error: type keyword must be int, char, or boolean")
                return False
            self.__writeFullTag("keyword", k)
            self.tokenizer.advance()
            return True
        # check for className
        else:
            res = self.__compileClassName()
            # if __compileClassName() errors, this is not a valid type
            if not res:
                print("Error: type not a valid className")
            return res

    def __compileClassName(self):
        """Compiles a complete jack className grammar. Returns false if there is
        an error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False
        self.__writeFullTag("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        return True

    def __compileSubroutineName(self):
        """Compiles a complete jack subroutineName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False
        self.__writeFullTag("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        return True

    def __compileVarName(self):
        """Compiles a complete jack varName. Returns false if there is an
        error"""
        if self.tokenizer.tokenType() != "IDENTIFIER":
            return False
        self.__writeFullTag("identifier", self.tokenizer.identifier())
        self.tokenizer.advance()
        return True

    def __compileClass(self):
        """Compiles a complete jack class grammar"""
        # find the class keyword
        if self.tokenizer.tokenType() != "KEYWORD" or \
                self.tokenizer.keyWord() != "class":
            print("Error: no class declaration found")
            sys.exit(1)
        # write both the class tag and the keyword tag for class
        self.__writeOpenTag("class")
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # find the className
        if not self.__compileClassName():
            print("Error: no class name found in class declaration")
            sys.exit(1)

        # find the open curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: no opening brace found after class")
            sys.exit(0)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile the classVarDecs
        while (self.tokenizer.tokenType() == "KEYWORD"
               and (self.tokenizer.keyWord() == "static"
                    or self.tokenizer.keyWord() == "field")):
            self.__compileClassVarDec()

        # compile the subroutines
        while (self.tokenizer.tokenType() == "KEYWORD"
               and (self.tokenizer.keyWord() == "constructor"
                    or self.tokenizer.keyWord() == "function"
                    or self.tokenizer.keyWord() == "method")):
            self.__compileSubroutineDec()

        # find last curly brace
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: no closing brace found after class definition")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close class tag
        self.__writeCloseTag("class")
        self.tokenizer.advance()

    def __compileClassVarDec(self):
        """Compiles a complete jack class variable declaration. This advances the
        tokenizer completely through the variable declaration"""
        # since we already checked to make sure this is valid, we can write
        # the tag here and either static or filed
        self.__writeOpenTag("classVarDec")
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # look for a valid type
        if not self.__compileType():
            sys.exit(1)

        # check for varName
        if not self.__compileVarName():
            print("Error: missing varName identifier in classVarDec")
            sys.exit(1)

        # check for comma then more varNames (possible not existing)
        while self.tokenizer.tokenType() == "SYMBOL" and \
                self.tokenizer.symbol() == ",":
            # write the comma
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # check for varName again
            if not self.__compileVarName():
                print("Error: missing varName identifier in classVarDec")
                sys.exit(1)

        # check for closing semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing semicolon after classVarDec")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close classVarDec tag
        self.__writeCloseTag("classVarDec")

    def __compileSubroutineDec(self):
        """Compiles a complete jack subroutine description. This advances the
        tokenizer completely through the subroutine declaration"""
        # write the opening tag
        self.__writeOpenTag("subroutineDec")
        # since we already checked for constructor/function/method, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # look for void or type
        if self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() == "void":
            # if void, write it
            self.__writeFullTag("keyword", self.tokenizer.keyWord())
            self.tokenizer.advance()
        elif not self.__compileType():
            print("Error: subroutine return type not void or valid type")
            sys.exit(1)

        # check for subroutineName
        if not self.__compileSubroutineName():
            print("Error: missing subroutineName in subroutineDec")
            sys.exit(1)

        # check for open parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "(":
            print("Error: missing ( for parameter list")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # do parameter list (this could add nothing)
        self.__compileParameterList()

        # check for closing parentheses
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ")":
            print("Error: missing ) for parameter list")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile subroutine body
        self.__compileSubroutineBody()

        # close subroutineDec tag
        self.__writeCloseTag("subroutineDec")

    def __compileParameterList(self):
        """Compiles a complete jack parameter list grammar"""
        # write opening tag
        self.__writeOpenTag("parameterList")

        # if the next symbol is a ), then there is no parameter list, so just return
        # the rest of compileSubroutine will handle writing that
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ")":
            # close tag
            self.__writeCloseTag("parameterList")
            return
        # look for a valid type
        else:
            res = self.__compileType()
            if not res:
                sys.exit(1)

            # check for varName
            if not self.__compileVarName():
                print("Error: missing varName identifier in parameterList")
                sys.exit(1)

            # check for comma separated list of type and varName
            while self.tokenizer.tokenType(
            ) == "SYMBOL" and self.tokenizer.symbol() == ",":
                # write the comma
                self.__writeFullTag("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()

                # look for a valid type
                if not self.__compileType():
                    sys.exit(1)

                # check for varName
                if not self.__compileVarName():
                    print("Error: missing varName identifier in parameterList")
                    sys.exit(1)

            # write closing tag
            self.__writeCloseTag("parameterList")

    def __compileSubroutineBody(self):
        """Compile a complete jack subroutine body grammar"""
        # write opening tag
        self.__writeOpenTag("subroutineBody")

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: missing { for subroutine body")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check to see if we need to compile varDec
        while self.tokenizer.tokenType() == "KEYWORD" and \
                self.tokenizer.keyWord() == "var":
            self.__compileVarDec()

        # compile statements
        self.__compileStatements()

        # check for closing }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: missing closing } for subroutine body")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close tag
        self.__writeCloseTag("subroutineBody")
        return

    def __compileVarDec(self):
        """Compiles a complete jack varDec grammar"""
        # write open tag
        self.__writeOpenTag("varDec")
        # since we already checked to make sure there is a var, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # check for type
        if not self.__compileType():
            sys.exit(1)

        # check for varName
        if not self.__compileVarName():
            print("Error: missing varName identifier in varDec")
            sys.exit(1)

        # check for comma separated list of type and varName
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ",":
            # write the comma
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # check for varName
            if not self.__compileVarName():
                print("Error: missing varName identifier in varDec")
                sys.exit(1)

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after varDec")
            sys.exit(1)
        # write ;
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # close tag
        self.__writeCloseTag("varDec")

        return

    def __compileStatements(self):
        """Compiles a complete jack statements grammar"""
        # write statements tag
        self.__writeOpenTag("statements")

        # check for the keywords for all the statements
        while self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k == "let":
                self.__compileLetStatement()
            elif k == "if":
                self.__compileIfStatement()
            elif k == "while":
                self.__compileWhileStatement()
            elif k == "do":
                self.__compileDoStatement()
            elif k == "return":
                self.__compileReturnStatement()
            else:
                print("Error: invalid statment " + k)
                sys.exit(1)

        # close statements tag
        self.__writeCloseTag("statements")

    def __compileLetStatement(self):
        """Compiles a complete jack let statment grammar"""
        # write opening tag
        self.__writeOpenTag("letStatement")
        # since we already checked for the keyword let, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # look for varName
        if not self.__compileVarName():
            print("Error: missing varName for let statement")

        # check for [
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == "[":
            # write the bracket
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile expression
            self.__compileExpression()

            # write the closing bracket
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != "]":
                print("Error: missing closing ] in let statement")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

        # check for =
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "=":
            print("Error: missing = in let expression")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # look for ;
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after let statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("letStatement")

    def __compileIfStatement(self):
        """Compiles a complete jack if statement grammar"""
        # write opening tag
        self.__writeOpenTag("ifStatement")
        # since we already checked for if, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile more statements
        self.__compileStatements()

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check for else
        if self.tokenizer.tokenType() == "KEYWORD" and self.tokenizer.keyWord(
        ) == "else":
            # write else
            self.__writeFullTag("keyword", self.tokenizer.keyWord())
            self.tokenizer.advance()

            # check for {
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != "{":
                print("Error: missing { for if statement")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile more statements
            self.__compileStatements()

            # check for }
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != "}":
                print("Error: missing } after if statement")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

        # close tag
        self.__writeCloseTag("ifStatement")

    def __compileWhileStatement(self):
        """Compiles a complete jack while statement grammar"""
        # write opening tag
        self.__writeOpenTag("whileStatement")
        # since we checked for while already, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # check for (
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "(":
            print("Error: missing ( in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile expression
        self.__compileExpression()

        # check for )
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ")":
            print("Error: missing ) in if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # check for {
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "{":
            print("Error: missing { for if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # compile more statements
        self.__compileStatements()

        # check for }
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != "}":
            print("Error: missing } after if statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("whileStatement")

    def __compileDoStatement(self):
        """Compiles a complete jack do statement grammar"""
        # write opening tag
        self.__writeOpenTag("doStatement")
        # since we already checked for do, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # compile subroutine call
        self.__compileSubroutineCall()

        # check for semicolon
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after do statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("doStatement")

    def __compileReturnStatement(self):
        """Compiles a complete jack return statement grammar"""
        # write opening tag
        self.__writeOpenTag("returnStatement")
        # since we checked for return already, write it
        self.__writeFullTag("keyword", self.tokenizer.keyWord())
        self.tokenizer.advance()

        # if the next symbol isn't a symbol, it must be an expression
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            self.__compileExpression()

        # write ;, checking again to make sure after calling compile expression
        # that the next symbol is still a valid ;
        if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
        ) != ";":
            print("Error: missing ; after return statement")
            sys.exit(1)
        self.__writeFullTag("symbol", self.tokenizer.symbol())
        self.tokenizer.advance()

        # write closing tag
        self.__writeCloseTag("returnStatement")

    def __convertOp(self, op):
        """Converts the operators that interfere with xml tags to their properly
        escaped versions"""
        op = op.replace("&", "&amp;")
        op = op.replace("<", "&lt;")
        op = op.replace(">", "&gt;")
        op = op.replace("\"", "&quot;")

        return op

    def __compileExpression(self):
        """Compiles a complete jack expression grammar"""
        # write opening tag
        self.__writeOpenTag("expression")

        # compile term
        self.__compileTerm()

        # check for op
        while self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) in op:
            s = self.tokenizer.symbol()

            # write op
            self.__writeFullTag("symbol", self.__convertOp(s))
            self.tokenizer.advance()

            # compile another term
            self.__compileTerm()

        # close tag
        self.__writeCloseTag("expression")

    def __compileTerm(self):
        """Compiles a complete jack term grammar"""
        # write opening tag
        self.__writeOpenTag("term")

        # term logic
        # check for integerConstant
        if self.tokenizer.tokenType() == "INT_CONST":
            self.__writeFullTag("integerConstant", self.tokenizer.intVal())
            self.tokenizer.advance()
        # check for string constant
        elif self.tokenizer.tokenType() == "STRING_CONST":
            self.__writeFullTag("stringConstant", self.tokenizer.stringVal())
            self.tokenizer.advance()
        # check for keyword for KeywordConstant
        elif self.tokenizer.tokenType() == "KEYWORD":
            k = self.tokenizer.keyWord()

            if k not in KeyWordConstant:
                print("Error: invalid KeyWordConstant" + k + " in term")
                sys.exit(1)

            # write the keywordconstant
            self.__writeFullTag("keyword", k)
            self.tokenizer.advance()
        # check for symbol for either ( expression ) or unary op
        elif self.tokenizer.tokenType() == "SYMBOL":
            s = self.tokenizer.symbol()

            # ( expression )
            if s == "(":
                self.__writeFullTag("symbol", s)
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

                # check for )
                if self.tokenizer.tokenType(
                ) != "SYMBOL" or self.tokenizer.symbol() != ")":
                    print("Error: missing ) after expression in term")
                    sys.exit(1)
                self.__writeFullTag("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()
            # unaryOp term
            elif s in unaryOp:
                self.__writeFullTag("symbol", s)
                self.tokenizer.advance()

                # compile term
                self.__compileTerm()
            else:
                print("Error: invalid symbol " + s + " in term")
                sys.exit(1)
        # check for varName | varName [ expression ] | subroutineCall
        elif self.tokenizer.tokenType() == "IDENTIFIER":
            # advance the tokenizer one more step to check for [, (, or other
            self.tokenizer.advance()

            if self.tokenizer.tokenType() == "SYMBOL":
                s = self.tokenizer.symbol()

                # varName[expression]
                if s == "[":
                    # go back to varName
                    self.tokenizer.retreat()

                    # compile varName
                    if not self.__compileVarName():
                        print("Error: invalid varName in term")
                        sys.exit(1)

                    # write [
                    self.__writeFullTag("symbol", self.tokenizer.symbol())
                    self.tokenizer.advance()

                    # compile expression
                    self.__compileExpression()

                    # write ]
                    if self.tokenizer.tokenType(
                    ) != "SYMBOL" or self.tokenizer.symbol() != "]":
                        print("Error: missing ] after varName[expression]")
                        sys.exit(1)
                    self.__writeFullTag("symbol", self.tokenizer.symbol())
                    self.tokenizer.advance()
                # subroutineCall
                elif s == "(" or s == ".":
                    # go back to subroutineName
                    self.tokenizer.retreat()

                    # compile subroutineCall
                    self.__compileSubroutineCall()
                else:
                    # go back to varName
                    self.tokenizer.retreat()

                    # compile varName
                    if not self.__compileVarName():
                        print("Error: invalid varName in term")
                        sys.exit(1)
            else:
                # go back to varName
                self.tokenizer.retreat()

                # compile varName
                if not self.__compileVarName():
                    print("Error: invalid varName in term")
                    sys.exit(1)
        else:
            print("Error: invalid term")
            sys.exit(1)

        # close tag
        self.__writeCloseTag("term")

    def __compileSubroutineCall(self):
        """Compiles a complete jack subroutine call grammar"""
        # look ahead one token to see if it is a ( or a .
        self.tokenizer.advance()

        # subroutineName
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == "(":
            # go back to subroutine name
            self.tokenizer.retreat()

            # compile subroutine name
            if not self.__compileSubroutineName():
                print("Error: invalid subroutineName in subroutineCall")
                sys.exit(1)

            # check for (
            if self.tokenizer.tokenType(
            ) != "SYMBOL" and self.tokenizer.symbol() != "(":
                print(
                    "Error: missing ( in subroutineCall before expressionList")
                sys.exit(1)
            # write (
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile expression list
            self.__compileExpressionList()

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != ")":
                print(
                    "Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
        # className | varName
        elif self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ".":
            # go back to varName/className
            self.tokenizer.retreat()

            if self.tokenizer.tokenType() != "IDENTIFIER":
                print("Error: missing varName|className in subroutineCall")
            # Hacky, but className and varName both correspond to just an
            # identitifer, so I just call compileVarName to handle both
            if not self.__compileVarName():
                print("Error: invalid className or varName in subroutineCall")
                sys.exit(1)

            # check for .
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != ".":
                print("Error: missing . in subroutineCall")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile subroutineName
            if not self.__compileSubroutineName():
                print(
                    "Error: missing subroutineName after . in subroutineCall")
                sys.exit(1)

            # check for (
            if self.tokenizer.tokenType(
            ) != "SYMBOL" and self.tokenizer.symbol() != "(":
                print(
                    "Error: missing ( in subroutineCall before expressionList")
                sys.exit(1)
            # write (
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()

            # compile expression list
            self.__compileExpressionList()

            # check for )
            if self.tokenizer.tokenType() != "SYMBOL" or self.tokenizer.symbol(
            ) != ")":
                print(
                    "Error: missing ) after expressionList in subroutineCall")
                sys.exit(1)
            self.__writeFullTag("symbol", self.tokenizer.symbol())
            self.tokenizer.advance()
        else:
            print("Error: invalid subroutineCall")
            sys.exit(1)

    def __compileExpressionList(self):
        """Compiles a complete jack expression list grammar"""
        # write open tag
        self.__writeOpenTag("expressionList")

        # if the symbol is ), there is no expression list
        if self.tokenizer.tokenType() == "SYMBOL" and self.tokenizer.symbol(
        ) == ")":
            # close tag
            self.__writeCloseTag("expressionList")
            return
        else:
            # compile expression
            self.__compileExpression()

            # loop until you dont see a comma
            while self.tokenizer.tokenType(
            ) == "SYMBOL" and self.tokenizer.symbol() == ",":
                # write ,
                self.__writeFullTag("symbol", self.tokenizer.symbol())
                self.tokenizer.advance()

                # compile expression
                self.__compileExpression()

            # write closing tag
            self.__writeCloseTag("expressionList")
コード例 #22
0
from JackTokenizer import JackTokenizer

tokenizer = JackTokenizer('foo')
while tokenizer.hasMoreTokens():
    tokenizer.advance()
コード例 #23
0
class CompilationEngine:
    def __init__(self, path, filename):
        self._jt = JackTokenizer(path, filename)
        self._opfilename = filename
        self._tags = []
        self.compileClass()
        #print(*self._tags)

    def exportXML(self):
        with open(self._opfilename, 'w') as f:
            f.writelines(self._tags)

    def compileClass(self):
        self.start_non_terminal_tag('class', addToTags=True)
        self.move_to_next_token()
        self.eat_token('class', addToTags=True)
        self.move_to_next_token()
        self.eat_token_type('IDENTIFIER', addToTags=True)
        self.move_to_next_token()
        self.eat_token('{', addToTags=True)

        class_var_dec = self.compile_class_var_dec()
        while class_var_dec:
            self._tags += class_var_dec
            class_var_dec = self.compile_class_var_dec()
        else:
            subroutine_dec = self.compile_subroutine_dec()
            while subroutine_dec:
                self._tags += subroutine_dec
                subroutine_dec = self.compile_subroutine_dec()

        self.eat_token('}', addToTags=True)
        self.end_non_terminal_tag('class', addToTags=True)

    def compile_class_var_dec(self):
        tags = []
        tags.append(self.start_non_terminal_tag('classVarDec'))
        tags.append(self.eat_token('static') or self.eat_token('field'))
        if False in tags:
            return False
        tags.append(self.compile_type())
        tags.append(self.eat_token_type(self.get_token_type('varName')))
        collect = self.eat_token(',')
        while collect:
            tags.append(collect)
            self.move_to_next_token()
            tags.append(self.eat_token_type(self.get_token_type('varName')))
            self.move_to_next_token()
            collect = self.eat_token(',')
        else:
            tags.append(self.eat_token(';'))

        tags.append(self.end_non_terminal_tag('classVarDec'))

        return self.validate_tags(tags)

    def compile_type(self):
        return self.eat_token('int') or self.eat_token(
            'char') or self.eat_token('boolean') or self.eat_token_type(
                self.get_token_type('className'))

    def compile_subroutine_dec(self):
        tags = []
        tags.append(self.start_non_terminal_tag('subroutineDec'))

        self.move_to_next_token()
        tags.append(
            self.eat_token('constructor') or self.eat_token('function')
            or self.eat_token('method'))

        if False in tags:
            return False

        self.move_to_next_token()
        tags.append(self.eat_token('void') or self.compile_type())

        self.move_to_next_token()
        tags.append(self.eat_token_type(self.get_token_type('subroutineName')))

        self.move_to_next_token()
        tags.append(self.eat_token('('))

        parameter_list_tags = self.compileParameterList()
        if parameter_list_tags:
            tags += parameter_list_tags

        self.move_to_next_token()
        tags.append(self.eat_token(')'))

        subroutine_body_tags = self.compile_subroutine_body()
        if subroutine_body_tags:
            tags += subroutine_body_tags

        tags.append(self.end_non_terminal_tag('subroutineDec'))

        return self.validate_tags(tags)

    def compileParameterList(self):

        self.move_to_next_token()
        type = self.compile_type()

        if not type:
            tags = [
                self.start_non_terminal_tag('parameterList'),
                self.end_non_terminal_tag('parameterList')
            ]
            return self.validate_tags(tags)
        else:
            tags = [self.start_non_terminal_tag('parameterList'), type]

            self.move_to_next_token()
            tags.append(self.eat_token_type(self.get_token_type('varName')))

            self.move_to_next_token()
            is_comma = self.eat_token(',')

            while is_comma:
                tags.append(is_comma)

                self.move_to_next_token()
                tags.append(self.compile_type())

                self.move_to_next_token()
                tags.append(self.eat_token_type(
                    self.get_token_type('varName')))

                self.move_to_next_token()
                is_comma = self.eat_token(',')

            tags.append(self.end_non_terminal_tag('parameterList'))
            return self.validate_tags(tags)

    def compile_subroutine_body(self):
        tags = [self.start_non_terminal_tag('subroutineBody')]

        self.move_to_next_token()
        tags.append(self.eat_token('{'))

        var_dec_tags = self.compile_var_dec()
        while var_dec_tags:
            tags += var_dec_tags
            var_dec_tags = self.compile_var_dec()

        statements_tags = self.compile_statements()
        if statements_tags:
            tags += statements_tags

        self.move_to_next_token()
        tags.append(self.eat_token('}'))

        tags.append(self.end_non_terminal_tag('subroutineBody'))
        return self.validate_tags(tags)

    def compile_var_dec(self):
        tags = [self.start_non_terminal_tag('varDec')]

        self.move_to_next_token()
        tags.append(self.eat_token('var'))

        self.move_to_next_token()
        tags.append(self.compile_type())

        self.move_to_next_token()
        tags.append(self.eat_token_type(self.get_token_type('varName')))

        self.move_to_next_token()
        is_comma = self.eat_token(',')
        while is_comma:
            tags.append(is_comma)

            self.move_to_next_token()
            tags.append(self.eat_token_type(self.get_token_type('varName')))

            self.move_to_next_token()
            is_comma = self.eat_token(',')
        else:
            tags.append(self.eat_token(';'))

        tags.append(self.end_non_terminal_tag('varDec'))
        return self.validate_tags(tags)

    def compile_statements(self):
        tags = [self.start_non_terminal_tag('statements')]

        statement = self.compile_statement()
        while statement:
            tags += statement
            statement = self.compile_statement()

        tags.append(self.end_non_terminal_tag('statements'))

        return self.validate_tags(tags)

    def compile_statement(self):
        return (self.compile_let_statement() or self.compile_if_statement()
                or self.compile_while_statement()
                or self.compile_do_statement()
                or self.compile_return_statement())

    def compile_let_statement(self):
        tags = [self.start_non_terminal_tag('letStatement')]
        self.move_to_next_token()
        tags.append(self.eat_token('let'))
        if False in tags:
            return False
        self.move_to_next_token()
        tags.append(self.eat_token_type(self.get_token_type('varName')))

        self.move_to_next_token()
        bracket = self.eat_token('[')
        if bracket:
            tags.append(bracket)
            tags += self.compile_expression()
            tags.append(self.eat_token(']'))

        self.move_to_next_token()
        tags.append(self.eat_token('='))

        tags += self.compile_expression()

        self.move_to_next_token()
        tags.append(self.eat_token(';'))

        tags.append(self.end_non_terminal_tag('letStatement'))
        return self.validate_tags(tags)

    def compile_if_statement(self):
        tags = [self.start_non_terminal_tag('ifStatement')]

        self.move_to_next_token()
        tags.append(self.eat_token('if'))
        if False in tags:
            return False

        self.move_to_next_token()
        tags.append(self.eat_token('('))
        tags += self.compile_expression()

        self.move_to_next_token()
        tags.append(self.eat_token(')'))

        self.move_to_next_token()
        tags.append(self.eat_token('{'))

        tags += self.compile_statements()

        self.move_to_next_token()
        tags.append(self.eat_token('}'))

        else_tag = self.eat_token('else')
        if else_tag:
            tags.append(else_tag)
            self.move_to_next_token()
            tags.append(self.eat_token('{'))
            tags += self.compile_statements()
            self.move_to_next_token()
            tags.append(self.eat_token('}'))

        tags.append(self.end_non_terminal_tag('ifStatement'))
        return self.validate_tags(tags)

    def compile_while_statement(self):
        tags = [self.eat_token('while'), self.eat_token('(')]
        if False in tags:
            return False
        tags += self.compile_expression()
        tags.append(self.eat_token(')'))
        tags.append(self.eat_token('{'))
        tags += self.compile_statements()
        tags.append(self.eat_token('}'))
        tags = self.add_non_terminal_tags('whileStatement', tags)
        return self.validate_tags(tags)

    def compile_do_statement(self):
        tags = [
            self.eat_token('do'),
            self.eat_token_type(self.get_token_type('subroutineName'))
        ]
        if False in tags:
            return False
        paren_tag = self.eat_token('(')
        if paren_tag:
            tags.append(paren_tag)
            tags += self.compile_expression_list()
            tags.append(self.eat_token(')'))
        else:
            tags.append(self.eat_token('.'))
            tags.append(
                self.eat_token_type(self.get_token_type('subroutineName')))
            tags.append(self.eat_token('('))
            tags += self.compile_expression_list()
            tags.append(self.eat_token(')'))

        tags.append(self.eat_token(';'))
        tags = self.add_non_terminal_tags('doStatement', tags)
        return self.validate_tags(tags)

    def compile_return_statement(self):
        tags = [self.eat_token('return')]
        if False in tags:
            return False
        expression_tags = self.compile_expression()
        if expression_tags:
            tags += expression_tags
        tags.append(self.eat_token(';'))
        tags = self.add_non_terminal_tags('returnStatement', tags)
        return self.validate_tags(tags)

    def compile_expression(self):
        tags = [self.start_non_terminal_tag('expression')]

        term_tags = self.compile_term()
        if term_tags:
            tags += term_tags
        else:
            return False

        self.move_to_next_token()
        while self._jt.token in ('+', '-', '*', '/', '&amp;', '|', '&lt;',
                                 '&gt;', '='):
            tags.append(self.eat_token_type('SYMBOL'))
            tags += self.compile_term()
            self.move_to_next_token()

        tags.append(self.end_non_terminal_tag('expression'))

        return self.validate_tags(tags)

    def compile_term(self):
        tags = []

        self.move_to_next_token()
        constant = (self.eat_token_type(self.get_token_type('integerConstant'))
                    or self.eat_token_type(
                        self.get_token_type('stringConstant')))

        if not constant and self._jt.token in ('this', 'null', 'true',
                                               'false'):
            constant = self.eat_token_type('KEYWORD')

        if not constant:
            identifier = self.eat_token_type('IDENTIFIER')
            if identifier:
                self.move_to_next_token()  # checking the next token
                if self._jt.token == '[':  # is it an array invocation?
                    tags.append(identifier)
                    tags.append(self.eat_token('['))
                    tags += self.compile_expression()
                    self.move_to_next_token()
                    tags.append(self.eat_token(']'))
                elif self._jt.token in ('.', '('):  # is it a subroutine call?
                    tags.append(identifier)
                    if self._jt.token == '(':  # its a subroutine call within the Jack class
                        tags.append(self.eat_token('('))
                        tags += self.compile_expression_list()
                        self.move_to_next_token()
                        self.eat_token(')')
                    else:  #its a subroutine call outside this Jack class
                        tags.append(self.eat_token('.'))
                        self.move_to_next_token()
                        tags.append(
                            self.eat_token_type(
                                self.get_token_type('subroutineName')))
                        self.move_to_next_token()
                        tags.append(self.eat_token('('))
                        tags += self.compile_expression_list()
                        self.move_to_next_token()
                        tags.append(self.eat_token(')'))
                else:
                    tags.append(identifier)  # just varName is given
            elif self._jt.tokenType == 'SYMBOL':
                if self._jt.token == '(':
                    tags.append(self.eat_token('('))
                    tags += self.compile_expression()
                    tags.append(self.eat_token(')'))
                elif self._jt.token in ('~', '-'):
                    tags.append(self.eat_token('~') or self.eat_token('-'))
                    tags += self.compile_term()
        else:
            tags.append(constant)

        if len(tags) > 0:
            tags = self.add_non_terminal_tags('term', tags)
            return self.validate_tags(tags)
        else:
            return False

    def compile_expression_list(self):
        tags = [self.start_non_terminal_tag('expressionList')]

        expression_tags = self.compile_expression()
        if expression_tags:
            tags += expression_tags

        self.move_to_next_token()
        is_comma = self.eat_token(',')
        while is_comma:
            tags.append(is_comma)
            self.move_to_next_token()
            expression = self.compile_expression()
            if expression:
                tags += expression
            is_comma = self.eat_token(',')

        tags.append(self.end_non_terminal_tag('expressionList'))
        return self.validate_tags(tags)

    GRAMMAR_KEYWORD = {
        ('varName', 'className', 'subroutineName'): 'IDENTIFIER',
        ('integerConstant'): 'INT_CONST',
        ('stringConstant'): 'STRING_CONST'
    }

    def get_token_type(self, token):
        for t in self.GRAMMAR_KEYWORD:
            if token in t:
                return self.GRAMMAR_KEYWORD[t]
        else:
            raise Exception(token + ' not found in grammar')

    def eat_token(self, token, addToTags=False):
        self.move_to_next_token()
        if self._jt.token == token:
            self._jt.tokenConsumed = True

            xmltag = self.xmltag()
            if addToTags:
                self._tags.append(xmltag)
            return xmltag
        else:
            return False

    def eat_token_type(self, tokenType, addToTags=False):
        self.move_to_next_token()
        if self._jt.tokenType == tokenType:
            self._jt.tokenConsumed = True
            xmltag = self.xmltag()
            if addToTags:
                self._tags.append(xmltag)
            return xmltag
        else:
            return False

    def move_to_next_token(self):
        if self._jt.hasMoreTokens():
            self._jt.advance()

    def xmltag(self):
        type_ = self._jt.tokenTypeXmlTags[self._jt.tokenType]
        return '<' + type_ + '> ' + self._jt.token + ' </' + type_ + '>\n'

    def add_non_terminal_tags(self, name, tags):
        return [self.start_non_terminal_tag(name)
                ] + tags + [self.end_non_terminal_tag(name)]

    def start_non_terminal_tag(self, tag, addToTags=False):
        tag_n_ = '<' + tag + '>\n'
        if addToTags:
            self._tags.append(tag_n_)
        return tag_n_

    def end_non_terminal_tag(self, tag, addToTags=False):
        tag_n_ = '</' + tag + '>\n'
        if addToTags:
            self._tags.append(tag_n_)
        return tag_n_

    def validate_tags(self, tags):
        if False not in tags:
            return tags
        else:
            return False