def main():
    tk = Tokenizer('Mytestfor10.jack')
    while(tk.has_more_tokens()):
        tk.advance()
        print(tk.token_type(),tk.identifier())
    ce = CompilationEngine('Mytestfor10.jack')
    root = Element('do')
    ce.compile_if(root)
예제 #2
0
def test_issues__semicolon_missing_from_outfile():
    fake_file = BytesIO(b"""
      var char key;  // the key currently pressed by the user
      var boolean exit;
      let exit = false;
    """)

    t = Tokenizer(fake_file)
    assert (t.advance().token == 'var')
    assert (t.advance().token == 'char')
    assert (t.advance().token == 'key')
    assert (t.advance().token == ';')

    assert (t.advance().token == 'var')
    assert (t.advance().token == 'boolean')
    assert (t.advance().token == 'exit')
    assert (t.advance().token == ';')

    assert (t.advance().token == 'let')
    assert (t.advance().token == 'exit')
    assert (t.advance().token == '=')
    assert (t.advance().token == 'false')
    assert (t.advance().token == ';')

    fake_file = BytesIO(b"        return();")
    t = Tokenizer(fake_file)
    assert (t.advance().token == 'return')
    assert (t.advance().token == '(')
    assert (t.advance().token == ')')
    assert (t.advance().token == ';')
예제 #3
0
def test_base():
    fake_file = BytesIO(b"method void dispose();")
    t = Tokenizer(fake_file)

    token = t.advance()
    assert (token.token == 'method')
    assert (token.type == 'keyword')

    token = t.advance()
    assert (token.token == 'void')
    assert (token.type == 'keyword')

    token = t.advance()
    assert (token.token == 'dispose')
    assert (token.type == 'identifier')

    token = t.advance()
    assert (token.token == '(')
    assert (token.type == 'symbol')

    token = t.advance()
    assert (token.token == ')')
    assert (token.type == 'symbol')

    token = t.advance()
    assert (token.token == ';')
    assert (token.type == 'symbol')
def test_compile_subroutine_desc():
    fake_file = BytesIO(b"""
        {
            return this;
        }
    NEXTTOKEN
    """)
    t = Tokenizer(fake_file)
    c = CompilationEngine(t)

    body = c.compile_subroutine_body()
    assert body.name == 'subroutineBody'

    assert body.value[0].value == '{'
    statements = body.value[1].value
    assert body.value[2].value == '}'

    assert statements[0].name == 'returnStatement'
    assert statements[0].value[0].value == 'return'
    assert statements[0].value[1].name == 'expression'
    assert statements[0].value[2].value == ';'

    assert t.advance().token == 'NEXTTOKEN'
예제 #5
0
class CompilationEngine:
    XML_LINE = "<{0}> {1} </{0}>\n"
    COMPARE_SYM_REPLACER = {
        '<': "&lt;",
        '>': "&gt;",
        '"': "&quot;",
        '&': "&amp;"
    }
    KEYWORD_CONSTANT = ("true", "false", "null", "this")

    def __init__(self, input_stream, output_stream):
        """
        constructor of the Compilation Engine object
        :param input_stream: the input stream
        :param output_stream: the output stream
        """
        self.__tokenizer = Tokenizer(input_stream)  # Tokenizer object
        self.__output = VMWriter(output_stream)
        self.__symbol = SymbolTable()
        self.__class_name = ""
        self.__statements = {
            "let": self.compile_let,
            "if": self.compile_if,
            "while": self.compile_while,
            "do": self.compile_do,
            "return": self.compile_return
        }
        self.compile_class()
        # self.__output.close()

    def write_xml(self):
        """
        writing xml line
        """
        if self.__tokenizer.token_type() == "stringConstant":
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.string_val()))
        elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER:
            xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()]
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(), xml_val))
        else:
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.get_value()))

    def compile_class(self):
        """
        compiling the program from the class definition
        """
        # self.__output.write("<class>\n")
        # self.write_xml()
        self.__tokenizer.advance()  # skip "class"
        self.__class_name = self.__tokenizer.get_value()
        # self.write_xml()
        self.__tokenizer.advance()  # skip class name
        # self.write_xml()
        self.__tokenizer.advance()  # skip {
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            self.compile_class_var_dec()
            current_token = self.__tokenizer.get_value()
        while current_token == "constructor" or current_token == "function" or current_token == "method":
            self.compile_subroutine_dec()
            current_token = self.__tokenizer.get_value()
        # self.write_xml()
        # self.__output.write("</class>\n")
        self.__output.close()

    def compile_class_var_dec(self):
        """
        compiling the program from the class's declaration on vars
        """
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            # self.__output.write("<classVarDec>\n")
            # self.write_xml()
            index = self.__symbol.var_count(current_token)
            self.__tokenizer.advance()  # get token type
            token_type = self.__tokenizer.get_value()
            self.__output.write_push(current_token, index)
            self.__tokenizer.advance()  # get token name
            token_name = self.__tokenizer.get_value()
            self.__symbol.define(token_name, token_type, current_token)
            self.__tokenizer.advance()
            # self.write_xml()
            # self.__tokenizer.advance()
            # self.write_xml()
            # self.__tokenizer.advance()
            while self.__tokenizer.get_value() == ",":
                # self.write_xml()  # write ,
                self.__tokenizer.advance()  # get token name
                token_name = self.__tokenizer.get_value()
                index = self.__symbol.var_count(current_token)  # get new index
                self.__output.write_push(current_token, index)
                self.__symbol.define(token_name, token_type, current_token)
                self.__tokenizer.advance()
                # self.write_xml()  # write value
                # self.__tokenizer.advance()
            # self.write_xml()
            self.__tokenizer.advance()
            current_token = self.__tokenizer.get_value()
            # self.__output.write("</classVarDec>\n")

    def compile_subroutine_body(self):
        """
        compiling the program's subroutine body
        """
        # self.__output.write("<subroutineBody>\n")
        # self.write_xml()  # write {
        self.__tokenizer.advance()  # skip {
        while self.__tokenizer.get_value() == "var":
            self.compile_var_dec()
        self.compile_statements()
        # self.write_xml()  # write }
        self.__tokenizer.advance()  # skip }
        # self.__output.write("</subroutineBody>\n")

    def compile_subroutine_dec(self):
        """
        compiling the program's subroutine declaration
        """
        # self.__output.write("<subroutineDec>\n")
        # self.write_xml()  # write constructor/function/method

        self.__tokenizer.advance()  # skip constructor/function/method
        return_value = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        func_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        func_args = self.compile_parameter_list()
        self.__output.write_function(func_name, func_args)
        self.compile_subroutine_body()
        if return_value == "void":
            self.__output.write_pop("temp", "0")
        # self.__output.write("</subroutineDec>\n")

    def compile_parameter_list(self):
        """
        compiling a parameter list
        """
        # todo returns the number og args !
        # self.write_xml()  # write (
        counter = 0
        self.__tokenizer.advance()  # skip (
        # self.__output.write("<parameterList>\n")
        if self.__tokenizer.get_value() != ")":
            # self.write_xml()  # write type
            self.__tokenizer.advance()  # skip type
            # self.write_xml()  # write varName
            self.__tokenizer.advance()  # skip var name
            counter += 1
            while self.__tokenizer.get_value() == ",":
                # self.write_xml()  # write ,
                self.__tokenizer.advance()  # skip ,
                # self.write_xml()  # type
                self.__tokenizer.advance()  # skip type
                # self.write_xml()  # varName
                self.__tokenizer.advance()  # skip varName
                counter += 1
        # self.__output.write("</parameterList>\n")
        # self.write_xml()  # write )
        self.__tokenizer.advance()
        return counter

    def compile_var_dec(self):
        """
        compiling function's var declaration
        """
        # self.__output.write("<varDec>\n")
        # self.write_xml()  # write var
        token_kind = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # self.write_xml()  # write type
        token_type = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # self.write_xml()  # write varName
        token_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        index = self.__symbol.var_count(token_kind)
        self.__output.write_push(token_kind, index)
        self.__symbol.define(token_name, token_type, token_kind)
        while self.__tokenizer.get_value() == ",":
            # self.write_xml()  # write ,
            self.__tokenizer.advance()  # skip ,
            # self.write_xml()
            token_name = self.__tokenizer.get_value()
            index = self.__symbol.var_count(token_kind)
            self.__output.write_push(token_kind, index)
            self.__symbol.define(token_name, token_type, token_kind)
            self.__tokenizer.advance()
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</varDec>\n")

    def compile_statements(self):
        """
        compiling statements
        """
        key = self.__tokenizer.get_value()
        # self.__output.write("<statements>\n")
        if key != "}":
            while key in self.__statements:
                self.__statements[self.__tokenizer.get_value()]()
                key = self.__tokenizer.get_value()
        # self.__output.write("</statements>\n")

    def compile_do(self):
        """
        compiling do call
        """
        # self.__output.write("<doStatement>\n")
        # self.write_xml()  # write do
        self.__tokenizer.advance()  # skip do
        self.subroutine_call()
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</doStatement>\n")

    def compile_let(self):
        """
        compiling let call
        """
        # self.__output.write("<letStatement>\n")
        # self.write_xml()  # write let
        self.__tokenizer.advance()  # skip let
        # self.write_xml()  # write varName
        var_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # if self.__tokenizer.get_value() == "[":  # todo handle array
        #     self.write_xml()  # write [
        #     self.__tokenizer.advance()
        #     self.compile_expression()
        #     self.write_xml()  # write ]
        #     self.__tokenizer.advance()
        # self.write_xml()  # write =
        self.__tokenizer.advance()  # skip =
        self.compile_expression()  # todo push the value to the stack
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</letStatement>\n")
        var_kind = self.__symbol.kind_of(var_name)
        var_index = self.__symbol.index_of(var_name)
        self.__output.write_pop(var_kind, var_index)

    def compile_while(self):
        """
        compiling while loop call
        """
        self.__output.write("<whileStatement>\n")
        self.write_xml()  # write while
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        self.__output.write("</whileStatement>\n")

    def compile_return(self):
        """
        compiling return statement
        """
        self.__output.write("<returnStatement>\n")
        self.write_xml()  # write return
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() != ";":
            self.compile_expression()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</returnStatement>\n")

    def compile_if(self):
        """
        compiling if condition
        """
        self.__output.write("<ifStatement>\n")
        self.write_xml()  # write if
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() == "else":
            self.write_xml()  # write else
            self.__tokenizer.advance()
            self.write_xml()  # write {
            self.__tokenizer.advance()
            self.compile_statements()
            self.write_xml()  # write }
            self.__tokenizer.advance()
        self.__output.write("</ifStatement>\n")

    def compile_expression(self):
        """
        compiling expressions
        """
        self.__output.write("<expression>\n")
        self.compile_term()
        while self.__tokenizer.is_operator():
            self.write_xml()  # write the operator
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</expression>\n")

    def compile_term(self):
        """
        compiling any kind of terms
        """
        # dealing with unknown token
        self.__output.write("<term>\n")
        curr_type = self.__tokenizer.token_type()
        # handle consts
        if curr_type == "integerConstant" or curr_type == "stringConstant":
            self.write_xml()  # write the int \ string
            self.__tokenizer.advance()

        # handle const keyword
        elif curr_type == "keyword" and self.__tokenizer.get_value(
        ) in self.KEYWORD_CONSTANT:
            self.__tokenizer.set_type("keywordConstant")
            self.write_xml()  # write key word
            self.__tokenizer.advance()

        elif curr_type == "identifier":
            # handle var names
            if self.__tokenizer.get_next_token(
            ) != "(" and self.__tokenizer.get_next_token() != ".":
                self.write_xml()  # write the var name
                self.__tokenizer.advance()
                if self.__tokenizer.get_value() == "[":
                    self.write_xml()  # write [
                    self.__tokenizer.advance()
                    self.compile_expression()
                    self.write_xml()  # write ]
                    self.__tokenizer.advance()
            # handle function calls
            else:
                self.subroutine_call()
        # handle expression
        elif curr_type == "symbol" and self.__tokenizer.get_value() == "(":
            self.write_xml()  # write (
            self.__tokenizer.advance()
            self.compile_expression()
            self.write_xml()  # write )
            self.__tokenizer.advance()

        # handle - \ ~
        elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value(
        ) == "~":
            self.write_xml()  # write -\~
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</term>\n")

    def subroutine_call(self):
        """
        compiling the program's subroutine call
        """
        if self.__tokenizer.get_next_token() == ".":
            self.write_xml()  # write name
            self.__tokenizer.advance()
            self.write_xml()  # write .
            self.__tokenizer.advance()
        self.write_xml()  # write name
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression_list()
        self.write_xml()  # write )
        self.__tokenizer.advance()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        self.__output.write("<expressionList>\n")
        if self.__tokenizer.get_value() != ")":
            self.compile_expression()
            while self.__tokenizer.get_value() == ",":
                self.write_xml()  # write ,
                self.__tokenizer.advance()
                self.compile_expression()
        self.__output.write("</expressionList>\n")
class CompilationEngine:

    def __init__(self, source):
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.root = Element(CLASS)
        self.compile_class(self.root)

    def next(self):
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def get_xml(self):
        """
        Returns a textual XML representation of the program structure.
        :return:
        """
        return prettify(self.root)[23:]

    def compile_expression(self,caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        self.compile_term(SubElement(caller,TERM))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()
            self.compile_term(SubElement(caller,TERM))

    def compile_expressionList(self,caller):
        """

        :param caller:
        :return:
        """
        #  if expression list is empty
        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = "\n"
            return

        self.compile_expression(SubElement(caller,EXPRESSION))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()
            self.compile_expression(SubElement(caller,EXPRESSION))

    def compile_subroutineCall(self,caller,first_token):
        """
        First token, the first identifier must be sent manually, so the method
        expects the current token to be the second in the specification.
        :param caller:
        :param first_token:
        :return:
        """
        SubElement(caller, IDENTIFIER).text = first_token
        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()

        if self.tokenizer.symbol() == '.':
            self.next()

            SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()

            SubElement(caller,SYMBOL).text = self.tokenizer.symbol()

        self.next()
        self.compile_expressionList(SubElement(caller, EXPRESSION_LIST))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()


    def compile_term(self,caller):
        """

        :param caller:
        :return:
        """
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:
            SubElement(caller, STRING_CONSTANT).text = self.tokenizer.string_val()
            self.next()

        elif type is JTok.KEYWORD:
            SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                    self.compile_subroutineCall(caller,name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[':
                SubElement(caller, IDENTIFIER).text = name
                SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))

                SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            else:
                SubElement(caller, IDENTIFIER).text = name

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            elif self.tokenizer.symbol() in {'-','~'}:
                SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller,TERM))



    def compile_do(self, caller):
        """
        format : 'do' subroutineCall ';'
        :param caller:
        :return:
        """

        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller,name)

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()

    def compile_let(self, caller):
        """
        format : 'let' varName ( '[' expression ']' )? '=' expression ';'
        :param caller:
        :return:
        """
        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()  # set 'let' as text
        self.next()

        SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()  # varName
        self.next()

        if self.tokenizer.symbol() == '[':
            SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '['
            self.next()

            self.compile_expression(SubElement(caller, EXPRESSION))

            SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ']'
            self.next()

        # If there is no expression to compile:
        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '='
        self.next()

        self.compile_expression(SubElement(caller, EXPRESSION))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()


    def compile_return(self, caller):
        """
        format : 'return' expression? ';'
        :param caller:
        :return:
        """
        SubElement(caller,KEYWORD).text = self.tokenizer.identifier()
        self.next()

        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()
            return

        self.compile_expression(SubElement(caller,EXPRESSION))
        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_while(self, caller):
        """
        format : 'while' '(' expression ')' '{' statements '}'
        :param caller:
        :return:
        """
        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()  # set 'while' as text
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '('
        self.next()

        self.compile_expression(SubElement(caller, EXPRESSION))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ')'
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '{'
        self.next()

        self.compile_statements(SubElement(caller, STATEMENTS))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '}'
        self.next()


    def compile_statements(self, caller):
        """

        :param caller:
        :return:
        """
        STATEMENTS = {'do','while','let','return','if'}
        run_once = False
        while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            run_once = True
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))
        if not run_once:
            caller.text = "\n"

    def compile_if(self, caller):
        """
        format : 'if' '(' expression ')' '{' statements '}'
        ( 'else' '{' statements '}' )?
        :param caller:
        :return:
        """
        SubElement(caller,
                   KEYWORD).text = self.tokenizer.key_word()  # set 'if' as text
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '('
        self.next()

        self.compile_expression(SubElement(caller, EXPRESSION))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ')'
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '{'
        self.next()

        self.compile_statements(SubElement(caller, STATEMENTS))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set '}'
        self.next()

        if self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() == 'else':
            SubElement(caller,
                       KEYWORD).text = self.tokenizer.key_word()  # set 'else' as text
            self.next()

            SubElement(caller,
                       SYMBOL).text = self.tokenizer.symbol()  # set '{'
            self.next()

            self.compile_statements(SubElement(caller, STATEMENTS))

            SubElement(caller,
                       SYMBOL).text = self.tokenizer.symbol()  # set '}'
            self.next()

    def compile_var_dec(self, caller):
        """
        format: 'var' type varName ( ',' varName)* ';'
        :param caller:
        :return:
        """

        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()  # set var as keyword
        self.next()

        self.compile_list_of_vars(caller)

    def compile_class(self,caller):
        """

        :param caller:
        :return:
        """
        SubElement(caller,KEYWORD).text = self.tokenizer.key_word()
        self.next()

        SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier()
        self.next()

        SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #{
        self.next()

        while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in {'static','field'}:
            self.compile_classVarDec(SubElement(caller,"classVarDec"))

        while not self.tokenizer.token_type() is JTok.SYMBOL:
            self.compile_subroutine(SubElement(caller,"subroutineDec"))

        SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #}
        self.next()


    def compile_list_of_vars(self,caller):
        """
        Helper method to compile lists of variables according to
        type varName (',' varName)*
        :param caller:
        :return:
        """
        self.compile_type(caller)

        SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()  # set var name  as identifier
        self.next()

        while self.tokenizer.symbol() != ';':
            SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ','
            self.next()

            SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()  # set var name
            self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()


    def compile_classVarDec(self,caller):
        """

        :param caller:
        :return:
        """
        SubElement(caller,KEYWORD).text = self.tokenizer.key_word()
        self.next()

        self.compile_list_of_vars(caller)



    def compile_type(self,caller):
        """
        Compiles a tag according to type, for variables
        :param caller:
        :return:
        """
        tag = KEYWORD if self.tokenizer.token_type() is JTok.KEYWORD else IDENTIFIER
        text = self.tokenizer.key_word() if tag is KEYWORD else self.tokenizer.identifier()
        SubElement(caller, tag).text = text
        self.next()

    def compile_subroutine(self,caller):
        """

        :param caller:
        :return:
        """
        SubElement(caller,KEYWORD).text = self.tokenizer.key_word()
        self.next()

        if self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() == "void":
            SubElement(caller,KEYWORD).text = self.tokenizer.key_word()
            self.next()
        else:
            self.compile_type(caller)

        SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier()
        self.next()

        SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
        self.next()

        self.compile_parameterList(SubElement(caller,"parameterList"))

        SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
        self.next()

        self.compile_subroutineBody(SubElement(caller,"subroutineBody"))



    def compile_subroutineBody(self,caller):
        """
        Compiles a subroutine body
        :param caller:
        :return:
        """
        SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #{
        self.next()

        while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() == "var":
            self.compile_var_dec(SubElement(caller,"varDec"))

        self.compile_statements(SubElement(caller,"statements"))

        SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #}
        self.next()

    def compile_parameterList(self,caller):
        """

        :param caller:
        :return:
        """
        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = "\n"
            return

        self.compile_type(caller)

        SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier()
        self.next()
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()

            self.compile_type(caller)

            SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()
예제 #7
0
class JackAnalyzer:
    """ A jack parser class """

    Operators = ['+', '-', '*', '/', '|', '=', '&gt', '&amp', '&lt']
    statements_keyword = ["let", "do", "if", "while", "return"]
    Symbols = [
        '(', ')', '{', '}', '[', ']', ',', ';', '.', '+', '-', '*', '/', '&',
        '|', '>', '<', '=', '~'
    ]

    def __init__(self, file):
        """ A constructor which initializes the members of the class """
        self.tekonizer = Tokenizer(file)
        self.output_file = self.openfile(file)
        self.taps = ''
        self.two_taps = '  '

    def openfile(self, file):
        """ This function opens a file to write """
        point = file.find('.')
        new_path = file[:point] + '.xml'
        output_file = open(new_path, 'w')
        return output_file

    def compileClass(self):
        """ This function compiles a class state """
        if (self.tekonizer.current_token == "class"):
            self.output_file.write("<class>\n")
            self.taps += self.two_taps
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileClassVarDec()
            self.compileSubroutineDec()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.taps = self.taps[:-2]
            self.output_file.write(self.taps + '</' + "class" '>\n')

    def compileVarName(self):
        """ This function compile the names of the var declaration of a class """
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        if (self.tekonizer.current_token == ','):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileVarName()
        elif (self.tekonizer.current_token == ';'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.taps = self.taps[:-2]
            self.output_file.write(self.taps + '</' + "classVarDec" + '>' +
                                   '\n')
            self.tekonizer.advance()

    def compileClassVarDec(self):
        """ This function compile the names of the var declaration of a class """
        if (self.tekonizer.current_token == "static"
                or self.tekonizer.current_token == "field"):
            self.output_file.write(self.taps + '<' + "classVarDec" + '>\n')
            self.taps += self.two_taps
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileVarName()
            self.compileClassVarDec()

    def compileSubroutineDec(self):
        """ This function compile the method \ function \ constructor case """
        if (self.tekonizer.current_token == "method"
                or self.tekonizer.current_token == "constructor"
                or self.tekonizer.current_token == "function"):
            self.output_file.write(self.taps + '<subroutineDec>' + '\n')
            self.taps += self.two_taps
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.compileParameterList()
            self.compileSubrotineBody()
            self.taps = self.taps[:-2]
            self.output_file.write(self.taps + '</' + 'subroutineDec' + '>\n')
        if (self.tekonizer.current_token != '}'):
            self.compileSubroutineDec()

    def compileParameterList(self):
        """ This function compiles the parameterlist of the method\function\constructor """
        self.output_file.write(self.taps + '<' + "parameterList" + '>\n')
        self.taps += self.two_taps
        self.tekonizer.advance()
        self.compileParametrs()

    def compileParametrs(self):
        """ This function compiles the arguments of the method\function\constructor """
        if (self.tekonizer.current_token == ')'):
            self.taps = self.taps[:-2]
            self.output_file.write(self.taps + '</' + "parameterList" + '>\n')
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
        else:
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            if (self.tekonizer.current_token == ','):
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
                self.compileParametrs()
            else:
                self.compileParametrs()

    def compileSubrotineBody(self):
        """ This function compiles the subroutinebody of the method\function\constructor """
        self.output_file.write(self.taps + '<' + "subroutineBody" + '>\n')
        self.taps += self.two_taps
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.compileVarDec()
        self.compileStatements()
        self.tekonizer.advance()
        if (self.tekonizer.current_token == '}'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.taps = self.taps[:-2]
            self.output_file.write(self.taps + '</' + "subroutineBody" + '>\n')

    def compileVarDec(self):
        """ This function compile the var declaration for a method """
        if (self.tekonizer.current_token == "var"):
            self.output_file.write(self.taps + '<' + "varDec" + '>\n')
            self.taps += self.two_taps
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileVarDecName()
            self.compileVarDec()

    def compileVarDecName(self):
        """ This function compile the name of the var declaration for a method """
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        if (self.tekonizer.current_token == ','):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileVarDecName()
        if (self.tekonizer.current_token == ';'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.taps = self.taps[:-2]
            self.output_file.write(self.taps + '</' + "varDec" + '>\n')
            self.tekonizer.advance()

    def compileStatements(self):
        """ This function compiles the statements declaration """
        self.output_file.write(self.taps + '<' + "statements" + '>\n')
        self.taps += self.two_taps
        self.compile_Statements()
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "statements" + '>\n')

    def compile_Statements(self):
        """ This function checks the current statements and calls the appropriate function """
        if (self.tekonizer.current_token == "if"):
            self.if_statement()
            self.compile_Statements()
        elif (self.tekonizer.current_token == "while"):
            self.while_statement()
            self.compile_Statements()
        elif (self.tekonizer.current_token == "do"):
            self.do_statement()
            self.compile_Statements()
        elif (self.tekonizer.current_token == "let"):
            self.let_statement()
            self.compile_Statements()
        elif (self.tekonizer.current_token == "return"):
            self.return_statement()
            self.compile_Statements()

    def if_statement(self):
        """ This function compile the if statement """
        self.output_file.write(self.taps + '<' + "ifStatement" + '>\n')
        self.taps += self.two_taps
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.compile_expression()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.compileStatements()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        if (self.tekonizer.current_token == 'else'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileStatements()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "ifStatement" + '>\n')

    def while_statement(self):
        """ This function compile the while statement """
        self.output_file.write(self.taps + '<' + "whileStatement" + '>\n')
        self.taps += self.two_taps
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.compile_expression()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.compileStatements()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "whileStatement" + '>\n')
        self.tekonizer.advance()

    def do_statement(self):
        """ This function compile the do statement """
        self.output_file.write(self.taps + '<' + "doStatement" + '>\n')
        self.taps += self.two_taps
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.subroutineCall()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "doStatement" + '>\n')

    def let_statement(self):
        """ This function compile the let statement """
        self.output_file.write(self.taps + '<' + "letStatement" + '>\n')
        self.taps += self.two_taps
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        self.compileLetVarName()
        if (self.tekonizer.current_token == '='):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_expression()
        elif (self.tekonizer.current_token == '['):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_expression()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_expression()
        if (self.tekonizer.current_token == ';'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "letStatement" + '>\n')

    def return_statement(self):
        """ This function compiles the return statement """
        self.output_file.write(self.taps + '<' + "returnStatement" + '>\n')
        self.taps += self.two_taps
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        if (self.tekonizer.current_token != ';'):
            self.compile_expression()
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "returnStatement" + '>\n')

    def compileLetVarName(self):
        """ This function compiles the let var name """
        self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n')
        self.tekonizer.advance()
        if (self.tekonizer.current_token == ','):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compileVarName()

    def compile_expression(self):
        """ This function compiles the expression case's """
        self.output_file.write(self.taps + '<' + "expression" + '>\n')
        self.taps += self.two_taps
        self.compile_term()
        if (self.tekonizer.current_token in self.Operators):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_term()
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "expression" + '>\n')
        if (self.tekonizer.current_token == ','):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_expression()

    def compile_term(self):
        """ This function compiles the term case's """
        self.output_file.write(self.taps + '<' + "term" + '>\n')
        self.taps += self.two_taps
        term_flag = True
        if (self.tekonizer.current_token.isdigit()
                or self.tekonizer.current_token.startswith('"')
                or self.tekonizer.current_token == 'this'
                or self.tekonizer.current_token == 'null'
                or self.tekonizer.current_token == 'true'
                or self.tekonizer.current_token == 'false'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
        elif (self.tekonizer.current_token == '-'
              or self.tekonizer.current_token == '~'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_term()
        elif (self.tekonizer.current_token == '('):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_expression()
            if (self.tekonizer.current_token == ')'):
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
        else:
            if (self.tekonizer.all_tokens[self.tekonizer.counter + 1] == '.'
                    or self.tekonizer.all_tokens[self.tekonizer.counter + 1]
                    == '('):  #identefier
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
                self.subroutineCall()
            elif (self.tekonizer.all_tokens[self.tekonizer.counter +
                                            1] == '['):
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
                self.compile_expression()
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')  #term->expreession->current==]
                self.tekonizer.advance()
            elif (self.tekonizer.all_tokens[self.tekonizer.counter +
                                            1] == ')'):
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
            else:
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
                term_flag = False
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "term" + '>\n')
        if (self.tekonizer.current_token in self.Operators and not term_flag):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_term()

    def compile_exclist(self):
        """ This function compiles the expression list case """
        self.output_file.write(self.taps + '<' + "expressionList" + '>' + '\n')
        self.taps += self.two_taps
        if (self.tekonizer.current_token != ')'):
            self.compile_expression()  # if item != )
        self.taps = self.taps[:-2]
        self.output_file.write(self.taps + '</' + "expressionList" + '>\n')

    def subroutineCall(self):
        """ This function compiles the call of a function """
        if (self.tekonizer.return_typetoken() == "identifier"):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
        if (self.tekonizer.current_token == '.'):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
        elif (self.tekonizer.current_token == '('):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            self.compile_exclist()
            if (self.tekonizer.current_token == ')'):
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
        if (self.tekonizer.current_token == '('):
            self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                   '\n')
            self.tekonizer.advance()
            if (self.tekonizer.current_token == ')'):
                self.output_file.write(self.taps + '<' + "expressionList" +
                                       '>\n')
                self.output_file.write(self.taps + '</' + "expressionList" +
                                       '>\n')
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
            else:
                self.compile_exclist()
                self.output_file.write(self.taps + self.tekonizer.tokenType() +
                                       '\n')
                self.tekonizer.advance()
예제 #8
0
class CompilationEngine:
    XML_LINE = "<{0}> {1} </{0}>\n"
    COMPARE_SYM_REPLACER = {
        '<': "&lt;",
        '>': "&gt;",
        '"': "&quot;",
        '&': "&amp;"
    }
    KEYWORD_CONSTANT = ("true", "false", "null", "this")

    def __init__(self, input_stream, output_stream):
        """
        constructor of the Compilation Engine object
        :param input_stream: the input stream
        :param output_stream: the output stream
        """
        self.__tokenizer = Tokenizer(input_stream)  # Tokenizer object
        self.__output = open(output_stream, "w")
        self.__statements = {
            "let": self.compile_let,
            "if": self.compile_if,
            "while": self.compile_while,
            "do": self.compile_do,
            "return": self.compile_return
        }
        self.compile_class()
        self.__output.close()

    def write_xml(self):
        """
        writing xml line
        """
        if self.__tokenizer.token_type() == "stringConstant":
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.string_val()))
        elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER:
            xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()]
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(), xml_val))
        else:
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.get_value()))

    def compile_class(self):
        """
        compiling the program from the class definition
        """
        self.__output.write("<class>\n")
        self.write_xml()
        self.__tokenizer.advance()
        self.write_xml()
        self.__tokenizer.advance()
        self.write_xml()
        self.__tokenizer.advance()
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            self.compile_class_var_dec()
            current_token = self.__tokenizer.get_value()
        while current_token == "constructor" or current_token == "function" or current_token == "method":
            self.compile_subroutine_dec()
            current_token = self.__tokenizer.get_value()
        self.write_xml()
        self.__output.write("</class>\n")

    def compile_class_var_dec(self):
        """
        compiling the program from the class's declaration on vars
        """
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            self.__output.write("<classVarDec>\n")
            self.write_xml()
            self.__tokenizer.advance()
            self.write_xml()
            self.__tokenizer.advance()
            self.write_xml()
            self.__tokenizer.advance()
            while self.__tokenizer.get_value() == ",":
                self.write_xml()  # write ,
                self.__tokenizer.advance()
                self.write_xml()  # write value
                self.__tokenizer.advance()
            self.write_xml()
            self.__tokenizer.advance()
            current_token = self.__tokenizer.get_value()
            self.__output.write("</classVarDec>\n")

    def compile_subroutine_body(self):
        """
        compiling the program's subroutine body
        """
        self.__output.write("<subroutineBody>\n")
        self.write_xml()  # write {
        self.__tokenizer.advance()
        while self.__tokenizer.get_value() == "var":
            self.compile_var_dec()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        self.__output.write("</subroutineBody>\n")

    def compile_subroutine_dec(self):
        """
        compiling the program's subroutine declaration
        """
        self.__output.write("<subroutineDec>\n")
        self.write_xml()  # write constructor/function/method
        self.__tokenizer.advance()
        self.write_xml()  # write return type
        self.__tokenizer.advance()
        self.write_xml()  # write identifier name
        self.__tokenizer.advance()
        self.compile_parameter_list()
        self.compile_subroutine_body()
        self.__output.write("</subroutineDec>\n")

    def compile_parameter_list(self):
        """
        compiling a parameter list
        """
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.__output.write("<parameterList>\n")
        if self.__tokenizer.get_value() != ")":
            self.write_xml()  # write type
            self.__tokenizer.advance()
            self.write_xml()  # write varName
            self.__tokenizer.advance()
            while self.__tokenizer.get_value() == ",":
                self.write_xml()  # write ,
                self.__tokenizer.advance()
                self.write_xml()  # type
                self.__tokenizer.advance()
                self.write_xml()  # varName
                self.__tokenizer.advance()
        self.__output.write("</parameterList>\n")
        self.write_xml()  # write )
        self.__tokenizer.advance()

    def compile_var_dec(self):
        """
        compiling function's var declaration
        """
        self.__output.write("<varDec>\n")
        self.write_xml()  # write var
        self.__tokenizer.advance()
        self.write_xml()  # write type
        self.__tokenizer.advance()
        self.write_xml()  # write varName
        self.__tokenizer.advance()
        while self.__tokenizer.get_value() == ",":
            self.write_xml()  # write ,
            self.__tokenizer.advance()
            self.write_xml()
            self.__tokenizer.advance()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</varDec>\n")

    def compile_statements(self):
        """
        compiling statements
        """
        key = self.__tokenizer.get_value()
        self.__output.write("<statements>\n")
        if key != "}":
            while key in self.__statements:
                self.__statements[self.__tokenizer.get_value()]()
                key = self.__tokenizer.get_value()
        self.__output.write("</statements>\n")

    def compile_do(self):
        """
        compiling do call
        """
        self.__output.write("<doStatement>\n")
        self.write_xml()  # write do
        self.__tokenizer.advance()
        self.subroutine_call()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</doStatement>\n")

    def compile_let(self):
        """
        compiling let call
        """
        self.__output.write("<letStatement>\n")
        self.write_xml()  # write let
        self.__tokenizer.advance()
        self.write_xml()  # write varName
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() == "[":
            self.write_xml()  # write [
            self.__tokenizer.advance()
            self.compile_expression()
            self.write_xml()  # write ]
            self.__tokenizer.advance()
        self.write_xml()  # write =
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</letStatement>\n")

    def compile_while(self):
        """
        compiling while loop call
        """
        self.__output.write("<whileStatement>\n")
        self.write_xml()  # write while
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        self.__output.write("</whileStatement>\n")

    def compile_return(self):
        """
        compiling return statement
        """
        self.__output.write("<returnStatement>\n")
        self.write_xml()  # write return
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() != ";":
            self.compile_expression()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</returnStatement>\n")

    def compile_if(self):
        """
        compiling if condition
        """
        self.__output.write("<ifStatement>\n")
        self.write_xml()  # write if
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() == "else":
            self.write_xml()  # write else
            self.__tokenizer.advance()
            self.write_xml()  # write {
            self.__tokenizer.advance()
            self.compile_statements()
            self.write_xml()  # write }
            self.__tokenizer.advance()
        self.__output.write("</ifStatement>\n")

    def compile_expression(self):
        """
        compiling expressions
        """
        self.__output.write("<expression>\n")
        self.compile_term()
        while self.__tokenizer.is_operator():
            self.write_xml()  # write the operator
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</expression>\n")

    def compile_term(self):
        """
        compiling any kind of terms
        """
        # dealing with unknown token
        self.__output.write("<term>\n")
        curr_type = self.__tokenizer.token_type()
        # handle consts
        if curr_type == "integerConstant" or curr_type == "stringConstant":
            self.write_xml()  # write the int \ string
            self.__tokenizer.advance()

        # handle const keyword
        elif curr_type == "keyword" and self.__tokenizer.get_value(
        ) in self.KEYWORD_CONSTANT:
            self.__tokenizer.set_type("keywordConstant")
            self.write_xml()  # write key word
            self.__tokenizer.advance()

        elif curr_type == "identifier":
            # handle var names
            if self.__tokenizer.get_next_token(
            ) != "(" and self.__tokenizer.get_next_token() != ".":
                self.write_xml()  # write the var name
                self.__tokenizer.advance()
                if self.__tokenizer.get_value() == "[":
                    self.write_xml()  # write [
                    self.__tokenizer.advance()
                    self.compile_expression()
                    self.write_xml()  # write ]
                    self.__tokenizer.advance()
            # handle function calls
            else:
                self.subroutine_call()
        # handle expression
        elif curr_type == "symbol" and self.__tokenizer.get_value() == "(":
            self.write_xml()  # write (
            self.__tokenizer.advance()
            self.compile_expression()
            self.write_xml()  # write )
            self.__tokenizer.advance()

        # handle - \ ~
        elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value(
        ) == "~":
            self.write_xml()  # write -\~
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</term>\n")

    def subroutine_call(self):
        """
        compiling the program's subroutine call
        """
        if self.__tokenizer.get_next_token() == ".":
            self.write_xml()  # write name
            self.__tokenizer.advance()
            self.write_xml()  # write .
            self.__tokenizer.advance()
        self.write_xml()  # write name
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression_list()
        self.write_xml()  # write )
        self.__tokenizer.advance()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        self.__output.write("<expressionList>\n")
        if self.__tokenizer.get_value() != ")":
            self.compile_expression()
            while self.__tokenizer.get_value() == ",":
                self.write_xml()  # write ,
                self.__tokenizer.advance()
                self.compile_expression()
        self.__output.write("</expressionList>\n")
예제 #9
0
class CompilationEngine:
    def __init__(self, source):
        self.if_counter = 0
        self.while_counter = 0
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.symbols = SymbolTable()
        self.writer = VMWriter(source)
        self.arithmetic_op = {}
        self.init_op()
        self.root = Element(CLASS)
        self.class_name = ""
        self.compile_class(self.root)
        self.writer.close()

    def init_op(self):
        self.arithmetic_op = {
            '+': "add",
            '-': "sub",
            '*': "call Math.multiply 2",
            '/': "call Math.divide 2",
            '&': "and",
            '|': "or",
            '<': "lt",
            '>': "gt",
            '=': "eq"
        }

    def next(self):
        """
        Proceed to the next token.
        :return:
        """
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def compile_expression(self, caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        op_stack = []
        self.compile_term(SubElement(caller, TERM))
        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            op_stack.append(self.tokenizer.symbol())
            self.next()
            self.compile_term(SubElement(caller, TERM))

        while op_stack:
            self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()])

    def compile_expressionList(self, caller):
        """
            compiles a list of expressions
        :param caller:
        :return: num_of_args - number of expressions in expressions list.
        used by function call
        """
        num_of_args = 0
        #  if expression list is empty
        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return num_of_args

        num_of_args += 1
        self.compile_expression(SubElement(caller, EXPRESSION))
        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            num_of_args += 1
            self.next()
            self.compile_expression(SubElement(caller, EXPRESSION))
        return num_of_args

    def compile_subroutineCall(self, caller, first_token):
        """
        First token, the first identifier must be sent manually, so the method
        expects the current token to be the second in the specification.
        :param caller:
        :param first_token:
        :return:
        """
        #SubElement(caller, IDENTIFIER).text = first_token
        func_name = first_token
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        is_method = 0
        if self.tokenizer.symbol() == '.':
            self.next()
            if self.symbols.kind_of(func_name):  # If first token is var name
                segment = self.symbols.kind_of(func_name)
                segment = Kind.get_segment(segment)
                index = self.symbols.index_of(func_name)
                self.writer.write_push(segment, index)
                func_name = self.symbols.type_of(func_name)
                is_method = 1

            func_name = func_name + "." + self.tokenizer.identifier()
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()

            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
        else:
            func_name = self.class_name + "." + func_name
            self.writer.write_push(POINTER, 0)
            is_method = 1

        self.next()
        num_of_args = self.compile_expressionList(
            SubElement(caller, EXPRESSION_LIST)) + is_method

        self.writer.write_call(func_name, num_of_args)
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_term(self, caller):
        """

        :param caller:
        :return:
        """
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            #SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal())
            self.writer.write_push(CONSTANT, self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:

            string_val = self.tokenizer.string_val()
            self.writer.write_push(CONSTANT, len(string_val))
            self.writer.write_call("String.new", 1)
            for c in string_val:
                self.writer.write_push(CONSTANT, ord(c))
                self.writer.write_call("String.appendChar", 2)
            self.next()

        elif type is JTok.KEYWORD:
            #SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            if self.tokenizer.key_word() in {"null", "false"}:
                self.writer.write_push(CONSTANT, 0)
            elif self.tokenizer.key_word(
            ) == "true":  # Assuming valid input, it must be true
                self.writer.write_push(CONSTANT, 1)
                self.writer.write_arithmetic("neg")
            elif self.tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
            else:
                print("unexpected")

            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                self.compile_subroutineCall(caller, name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol(
            ) == '[':  #TODO: Arrays, later
                # SubElement(caller, IDENTIFIER).text = name
                # SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(), index)
                else:
                    print("unexpected")
                self.writer.write_arithmetic("add")
                self.writer.write_pop(POINTER, 1)
                self.writer.write_push("that", 0)

                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            else:
                #SubElement(caller, IDENTIFIER).text = name
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(), index)
                else:
                    print("unexpected")

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            elif self.tokenizer.symbol() in {'-', '~'}:
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                unary_op = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller, TERM))
                if unary_op == "-":
                    self.writer.write_arithmetic("neg")
                elif unary_op == "~":
                    self.writer.write_arithmetic("not")
                else:
                    "unexpected"

    def compile_do(self, caller):
        """
        format : 'do' subroutineCall ';'
        :param caller:
        :return:
        """

        #SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller, name)
        self.writer.write_pop(TEMP, 0)
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()

    def compile_let(self, caller):
        """
        format : 'let' varName ( '[' expression ']' )? '=' expression ';'
        :param caller:
        :return:
        """
        self.next()  # skip 'let'

        varName = self.tokenizer.identifier()
        self.next()

        kind = self.symbols.kind_of(varName)
        kind = kind.get_segment()
        index = self.symbols.index_of(varName)

        if self.tokenizer.symbol() == '[':  # if array
            self.next()  # skip [

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_push(kind, index)
            self.writer.write_arithmetic("add")
            self.next()  # skip ]
            self.next()  # skip =
            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(TEMP, 0)
            self.writer.write_pop(POINTER, 1)
            self.writer.write_push(TEMP, 0)
            self.writer.write_pop("that", 0)

        else:
            self.next()  # skip =

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(kind, index)

        self.next()  # skip ;

    def compile_return(self, caller):
        """
        format : 'return' expression? ';'
        :param caller:
        :return:
        """
        #SubElement(caller,KEYWORD).text = self.tokenizer.identifier()
        self.next()

        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.writer.write_push(CONSTANT, 0)
            self.writer.write_return()
            self.next()
            return

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_return()
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_while(self, caller):
        """
        format : 'while' '(' expression ')' '{' statements '}'
        :param caller:
        :return:
        """
        while_index = self.while_counter
        self.while_counter += 1
        self.writer.write_label("WHILE_EXP" + str(while_index))
        self.next()  # skip while

        self.next()  # skip (

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_arithmetic("not")
        self.writer.write_if("WHILE_END" + str(while_index))

        self.next()  # skip )

        self.next()  # skip {

        self.compile_statements(SubElement(caller, STATEMENTS))

        self.writer.write_goto("WHILE_EXP" + str(while_index))
        self.writer.write_label("WHILE_END" + str(while_index))
        self.next()  # skip }

    def compile_statements(self, caller):
        """

        :param caller:
        :return:
        """
        STATEMENTS = {'do', 'while', 'let', 'return', 'if'}
        caller.text = " "
        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))

    def compile_if(self, caller):
        """
        format : 'if' '(' expression ')' '{' statements '}'
        ( 'else' '{' statements '}' )?
        :param caller:
        :return:
        """

        self.next()  # (
        self.compile_expression(caller)
        self.next()  # {

        if_index = self.if_counter
        self.if_counter += 1
        self.writer.write_if("IF_TRUE" + str(if_index))

        self.writer.write_goto("IF_FALSE" + str(if_index))
        self.writer.write_label("IF_TRUE" + str(if_index))

        self.compile_statements(caller)

        self.next()

        if self.tokenizer.key_word() == 'else':
            self.writer.write_goto("IF_END" + str(if_index))
            self.writer.write_label("IF_FALSE" + str(if_index))

            self.next()  # else
            self.next()  # {
            self.compile_statements(caller)
            self.next()  # }

            self.writer.write_label("IF_END" + str(if_index))
        else:
            self.writer.write_label("IF_FALSE" + str(if_index))

        return

    def compile_var_dec(self, caller):
        """
        format: 'var' type varName ( ',' varName)* ';'
        :param caller:
        :return:
        """

        kind = self.tokenizer.key_word()
        #SubElement(caller, KEYWORD).text = kind  # set var as keyword
        self.next()

        return self.compile_list_of_vars(caller, "var", Kind[kind])

    def compile_class(self, caller):
        """

        :param caller:
        :return:
        """
        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
        self.class_name = self.tokenizer.identifier()
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  #{
        self.next()

        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() in {'static', 'field'}:
            self.compile_classVarDec(SubElement(caller, "classVarDec"))

        while not self.tokenizer.token_type() is JTok.SYMBOL:
            self.compile_subroutine(SubElement(caller, "subroutineDec"))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  #}
        self.next()

    def compile_list_of_vars(self, caller, category, kind):
        """
        Helper method to compile lists of variables according to
        type varName (',' varName)*
        :param caller:
        :return:
        """
        num_of_vars = 0
        type = self.compile_type(caller)
        self.symbols.define(self.tokenizer.identifier(), type, kind)
        num_of_vars += 1
        #text = category+", defined, "+type+", "+kind.name+", "+str(self.symbols.index_of(self.tokenizer.identifier()))
        #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text  # set var name  as identifier
        self.next()

        while self.tokenizer.symbol() != ';':
            #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ','
            self.next()

            self.symbols.define(self.tokenizer.identifier(), type, kind)
            num_of_vars += 1
            #text = category + ", defined, " + type + ", " + kind.name + ", " + str(
            #    self.symbols.index_of(self.tokenizer.identifier()))
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text  # set var name
            self.next()

        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()
        return num_of_vars

    def compile_classVarDec(self, caller):
        """

        :param caller:
        :return:
        """
        kind = self.tokenizer.key_word()
        #SubElement(caller,KEYWORD).text = kind
        self.next()

        self.compile_list_of_vars(caller, kind, Kind[kind])

    def compile_type(self, caller):
        """
        Compiles a tag according to type, for variables
        :param caller:
        :return:
        """
        tag = KEYWORD if self.tokenizer.token_type(
        ) is JTok.KEYWORD else IDENTIFIER
        text = self.tokenizer.key_word(
        ) if tag is KEYWORD else self.tokenizer.identifier()
        SubElement(caller, tag).text = text
        self.next()
        return text

    def compile_subroutine(self, caller):
        """

        :param caller:
        :return:
        """

        subroutine_type = self.tokenizer.key_word()
        self.next()

        # Just to skip void or type
        if self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() == "void":
            SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            self.next()
        else:
            self.compile_type(caller)

        name = self.class_name + "." + self.tokenizer.identifier()
        self.symbols.start_subroutine()
        self.next()

        self.next()  # Skips (
        if subroutine_type == "method":
            self.symbols.define("this", "", Kind.arg)
        self.compile_parameterList(SubElement(caller, "parameterList"))

        self.next()  # Skips )

        self.next()  # Skips {

        num_of_locals = 0
        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() == "var":
            num_of_locals += self.compile_var_dec(SubElement(caller, "varDec"))

        self.writer.write_function(name, num_of_locals)

        if subroutine_type == "constructor":
            self.writer.write_push(CONSTANT,
                                   self.symbols.var_count(Kind.field))
            self.writer.write_call("Memory.alloc", 1)
            self.writer.write_pop(POINTER, 0)

        elif subroutine_type == "method":
            self.writer.write_push(ARGUMENT, 0)
            self.writer.write_pop(POINTER, 0)

        self.compile_statements(SubElement(caller, "statements"))

        self.next()  # Skips }

    def compile_parameterList(self, caller):
        """

        :param caller:
        :return:
        """
        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return

        type = self.compile_type(caller)
        name = self.tokenizer.identifier()

        # SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier()
        self.symbols.define(name, type, Kind.arg)
        self.next()

        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            # SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()
            type = self.compile_type(caller)
            name = self.tokenizer.identifier()
            self.symbols.define(name, type, Kind.arg)
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()
예제 #10
0
class CompilationEngine:


    def __init__(self, source):
        self.if_counter = 0
        self.while_counter = 0
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.symbols = SymbolTable()
        self.writer = VMWriter(source)
        self.arithmetic_op = {}
        self.init_op()
        self.root = Element(CLASS)
        self.class_name = ""
        self.compile_class(self.root)
        self.writer.close()

    def init_op(self):
        self.arithmetic_op = {'+': "add",
                         '-': "sub",
                         '*': "call Math.multiply 2",
                         '/': "call Math.divide 2",
                         '&': "and",
                         '|': "or",
                              '<': "lt",
                              '>': "gt",
                              '=': "eq"
                        }

    def next(self):
        """
        Proceed to the next token.
        :return:
        """
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def compile_expression(self,caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        op_stack = []
        self.compile_term(SubElement(caller,TERM))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            op_stack.append(self.tokenizer.symbol())
            self.next()
            self.compile_term(SubElement(caller,TERM))

        while op_stack:
            self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()])

    def compile_expressionList(self,caller):
        num_of_args = 0
        #  if expression list is empty
        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return num_of_args

        num_of_args += 1
        self.compile_expression(SubElement(caller,EXPRESSION))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            num_of_args += 1
            self.next()
            self.compile_expression(SubElement(caller,EXPRESSION))
        return num_of_args

    def compile_subroutineCall(self,caller,first_token):
        func_name = first_token
        
        is_method = 0
        if self.tokenizer.symbol() == '.':
            self.next()
            if self.symbols.kind_of(func_name): 
                segment = self.symbols.kind_of(func_name)
                segment = Kind.get_segment(segment)
                index = self.symbols.index_of(func_name)
                self.writer.write_push(segment,index)
                func_name = self.symbols.type_of(func_name)
                is_method = 1

            func_name = func_name+"."+self.tokenizer.identifier()
            self.next()
        else:
            func_name = self.class_name+"."+func_name
            self.writer.write_push(POINTER,0)
            is_method = 1

        self.next()
        num_of_args = self.compile_expressionList(SubElement(caller, EXPRESSION_LIST))+is_method

        self.writer.write_call(func_name,num_of_args)
       
        self.next()


    def compile_term(self,caller):
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            self.writer.write_push(CONSTANT,self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:

            string_val = self.tokenizer.string_val()
            self.writer.write_push(CONSTANT,len(string_val))
            self.writer.write_call("String.new", 1)
            for c in string_val:
                self.writer.write_push(CONSTANT,ord(c))
                self.writer.write_call("String.appendChar", 2)
            self.next()

        elif type is JTok.KEYWORD:
           if self.tokenizer.key_word() in {"null", "false"}:
                self.writer.write_push(CONSTANT, 0)
            elif self.tokenizer.key_word() == "true": 
                self.writer.write_push(CONSTANT, 1)
                self.writer.write_arithmetic("neg")
            elif self.tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
            else:
                print("unexpected")

            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                    self.compile_subroutineCall(caller,name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[': 
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(),index)
                else:
                    print("unexpected")
                self.writer.write_arithmetic("add")
                self.writer.write_pop(POINTER,1)
                self.writer.write_push("that",0)
                self.next()

            else:
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(),index)
                else:
                    print("unexpected")

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                self.next()

            elif self.tokenizer.symbol() in {'-','~'}:
                unary_op = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller,TERM))
                if unary_op == "-":
                    self.writer.write_arithmetic("neg")
                elif unary_op == "~":
                    self.writer.write_arithmetic("not")
                else:
                    "unexpected"



    def compile_do(self, caller):
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller,name)
        self.writer.write_pop(TEMP,0)
        self.next()

    def compile_let(self, caller):
        self.next()

        varName = self.tokenizer.identifier()
        self.next()

        kind = self.symbols.kind_of(varName)
        kind = kind.get_segment()
        index = self.symbols.index_of(varName)

        if self.tokenizer.symbol() == '[': 
            self.next() 

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_push(kind,index)
            self.writer.write_arithmetic("add")
            self.next() 
            self.next() 
            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(TEMP,0)
            self.writer.write_pop(POINTER,1)
            self.writer.write_push(TEMP,0)
            self.writer.write_pop("that",0)

        else:
            self.next() 

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(kind,index)

        self.next() 


    def compile_return(self, caller):
        self.next()

        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            self.writer.write_push(CONSTANT, 0)
            self.writer.write_return()
            self.next()
            return

        self.compile_expression(SubElement(caller,EXPRESSION))
        self.writer.write_return()
        self.next()

    def compile_while(self, caller):
        while_index = self.while_counter
        self.while_counter += 1
        self.writer.write_label("WHILE_EXP"+str(while_index))
        self.next() 

        self.next() 

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_arithmetic("not")
        self.writer.write_if("WHILE_END"+str(while_index))

        self.next()

        self.next() 

        self.compile_statements(SubElement(caller, STATEMENTS))

        self.writer.write_goto("WHILE_EXP"+str(while_index))
        self.writer.write_label("WHILE_END"+str(while_index))
        self.next()


    def compile_statements(self, caller):
        STATEMENTS = {'do','while','let','return','if'}
        caller.text = " "
        while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))

    def compile_if(self, caller):
       self.next()  # (
        self.compile_expression(caller)
        self.next()  # {

        if_index = self.if_counter
        self.if_counter += 1
        self.writer.write_if("IF_TRUE" + str(if_index))

        self.writer.write_goto("IF_FALSE" + str(if_index))
        self.writer.write_label("IF_TRUE" + str(if_index))

        self.compile_statements(caller)

        self.next()

        if self.tokenizer.key_word() == 'else':
            self.writer.write_goto("IF_END" + str(if_index))
            self.writer.write_label("IF_FALSE" + str(if_index))

            self.next()  
            self.next()  
            self.compile_statements(caller)
            self.next()  
            self.writer.write_label("IF_END" + str(if_index))
        else:
            self.writer.write_label("IF_FALSE" + str(if_index))

        return
예제 #11
0
    base_name = source[:-len(".jack")]
    in_file = source
    tokenizer_outfile = "{}T.xml".format(base_name)
    compilation_engine_outfile = "{}.xml".format(base_name)

    with open(tokenizer_outfile, 'w') as tokenizer_file_out:
        tokenizer_xml_writer = XMLWriter(tokenizer_file_out)

        tokenizer_xml_writer.open_tag('tokens')

        with open(in_file, 'rb') as f_in:
            tokenizer = Tokenizer(f_in)

            while True:
                try:
                    tokenizer_xml_writer.write_token(tokenizer.advance())
                except TokenizerReachedEndOfFileException:
                    print('Reached end')
                    break

        tokenizer_xml_writer.close_tag('tokens')

    with open(compilation_engine_outfile, 'w') as ce_file_out:
        ce_xml_writer = XMLWriter(ce_file_out)

        with open(in_file, 'rb') as f_in:
            tokenizer = Tokenizer(f_in)
            ce = CompilationEngine(tokenizer)

            ce_xml_writer.write_node(ce.compile())