예제 #1
0
 def get_token(self):
     if len(self.token_list):
         self.token = self.token_list.pop(0)
         if self.token.type == "Error":
             raise ErrorMessage(2, self.token.value, self.line)
     else:
         self.token = Token("NoneToken", None, None)
예제 #2
0
 def next_token(self):
     try:
         self.current_token = self.lexer.token()
         if self.current_token is None:
             self.current_token = Token(None, None, None)
     except LexExeption as e:
         print(e)
    def __init__(self):
        print "Syntactical_Parser: in __init__"

        self.outfile = "Outputs/Output_" + testFile
        self.error_log = "Outputs/ErrorLog_" + testFile
        self.f = open(infile)
        self.o = open(self.outfile, 'w+')
        self.err = open(self.error_log, 'w+')

        self.interpreter = Lexical_Analyzer(self.f.read())

        self.lookahead = Token(SCAN_ERROR, '', 0, 0)
        self.EPSILON = 'EPSILON'

        self.err_token = ''
        self.output = ''
예제 #4
0
 def nextToken(self):
     self.i += 1
     if (self.i < len(self.tokens)):
         self.token = self.tokens[self.i]
     else:
         self.token = Token(type='end', value='')
     return self.token
예제 #5
0
def parse():
    with open(infile) as f:
        interpreter = Lexical_Analyzer(f.read())
        lookahead = Token(SCAN_ERROR, 0, 0, 0)

        outfile = "Outputs/Output_" + testFile
        error_log = "Outputs/ErrorLog_" + testFile

        with open(outfile, 'w+') as o:
            with open(error_log, 'w+') as err:
                while lookahead.type is not EOF:
                    lookahead = interpreter.scanner()
                    print(lookahead)

                    if lookahead.type == SCAN_ERROR:
                        err.write(lookahead.__str__() + "\n")
                    else:
                        o.write(lookahead.__str__() + "\n")
예제 #6
0
 def _parse_dest(self, token, lexeme):
     """Sets the dest part of the c-command, if there is one
     """
     t, _ = self._peek_next_token()
     if t == HackToken.OP_ASSIGN:
         # This token is the dest; consume the '=' and return the next token
         self._next_token
         self._dest = lexeme
         return self._next_token
     else:
         # This is not the dest; return it back to the caller
         self._dest = None
         return Token(token, lexeme)
    def __init__(self):
        print "Syntactical_Parser: in __init__"

        # Initialize logs and log messages
        self.f = open(infile)
        self.o = open(outfile, 'w+')
        self.debug_flush = open('Outputs/_FLUSH.txt', 'w+')
        self.output = 'OUTPUT OF ' + testFile + ": \n\n"
        self.errs = '\n\nERRORS OF ' + testFile + ":\n  -- ! Error locations are accurate to the original input file ! --\n\n"
        self.scanner_warnings = ''

        # just keeps measure of tabbing for nice output
        self.tabbed_scope = ''

        # generate grammar object from the specs file
        self.g = Grammar()

        # culminate list of all possible terminals
        self.terminal_list = []
        for terminal_set in all_registered_terminals:
            for terminal in terminal_set:
                self.terminal_list.append(terminal)

        # initialize all table to -1
        self.table = [[-1 for x in range(len(self.terminal_list))]
                      for y in range(len(self.g.productions))]
        # stack to be used for the table predictive parsing method
        self.parsing_stack = []

        # initialize the Lexical analyser for token scanning
        self.interpreter = Lexical_Analyzer(self.f.read())
        self.lookahead = Token(EOF, EOF, '$', 0, 0)

        # Handles Semantic actions popped from parsing stack
        self.semantic_processor = SemanticProcessor()

        # print self.g
        self.initialize_parsing_table()
class Syntactic_Parser(object):
    def __init__(self):
        print "Syntactical_Parser: in __init__"

        self.outfile = "Outputs/Output_" + testFile
        self.error_log = "Outputs/ErrorLog_" + testFile
        self.f = open(infile)
        self.o = open(self.outfile, 'w+')
        self.err = open(self.error_log, 'w+')

        self.interpreter = Lexical_Analyzer(self.f.read())

        self.lookahead = Token(SCAN_ERROR, '', 0, 0)
        self.EPSILON = 'EPSILON'

        self.err_token = ''
        self.output = ''

    def parse(self):
        print "Syntactical_Parser: in parse"
        # self.interpreter = Lexical_Analyzer(self.f.read())

        while self.lookahead.type is not EOF:
            self.lookahead = self.interpreter.scanner()
            print(self.lookahead)
            '''if self.lookahead.type == SCAN_ERROR:
                self.err.write(self.lookahead.__str__() + "\n")
            else:
                self.output += (self.lookahead.__str__() + "\n")
            '''

            if self.prog():
                self.err.write("\nmain parse() returning True")

            else:
                self.err.write("\nmain parse() returning False")

        self.prettify_output()
        #self.o.write(self.output)

        self.o.close()
        self.err.close()

    def match(self, token):
        if self.lookahead.value is not None:
            print "Syntactical_Parser: match(" + self.lookahead.value + ", " + token + ")"
        if self.lookahead.value == token:
            self.output += self.lookahead.value
            self.lookahead = self.interpreter.scanner()
            return True
        else:
            # self.lookahead = self.interpreter.scanner()
            return False

    def match_type(self, type):
        if self.lookahead.value is not None:
            print "Syntactical_Parser: match_type(" + self.lookahead.type + ", " + type + ")"
        if self.lookahead.type == type:
            self.output += self.lookahead.value
            self.lookahead = self.interpreter.scanner()
            return True
        else:
            # self.lookahead = self.interpreter.scanner()
            return False

    def is_type_Id(self):
        if self.lookahead.type == 'Id':
            return True
        else:
            return False

    def prog(self):  # LHS-RHS1 | RHS2 |
        print "Syntactical_Parser: in prog"
        if self.lookahead.value in ff_sets.prog_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.classDecl() and self.progBody():
                    print "prog -> classDecl progBody"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: prog(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def classDecl(self):
        print "Syntactical_Parser: in classDecl"
        if self.lookahead.value in ff_sets.classDecl_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('class') and self.match_type(
                        'Id') and self.match(
                            '{') and self.classBody() and self.match(
                                '}') and self.match(';') and self.classDecl():

                    print "classDecl -> class Id { classBody } ; classDecl "

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: classDecl(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.classDecl_FOLLOW:
            print "classDecl -> EPSILON"
            return True
        else:
            return False

    def classBody(self):
        print "Syntactical_Parser: in classBody"
        if self.lookahead.value in ff_sets.classBody_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.type() and self.match_type('Id') and self.varOrFunc():

                    print "classBody -> type Id varOrFunc"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: classBody(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.classBody_FOLLOW:
            print "classBody -> EPSILON"
            return True
        else:
            return False

    def varOrFunc(self):
        print "Syntactical_Parser: in varOrFunc"
        if self.lookahead.value in ff_sets.varOrFunc_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.indice() and self.match(';') and self.classBody():

                    print "varOrFunc -> indice ; classBody"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: varOrFunc(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.varOrFunc_FIRST2:
            while self.lookahead.type != '$':
                if self.match('(') and self.fParams() and self.match(
                        ')') and self.funcBody() and self.match(
                            ';') and self.classBody():

                    print "varOrFunc -> ( fParams ) funcBody classBody"

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: varOrFunc(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def progBody(self):
        print "Syntactical_Parser: in progBody"
        if self.lookahead.value in ff_sets.progBody_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('program') and self.funcBody() and self.match(
                        ';') and self.funcDef():
                    print "progBody -> program funcBody ; funcDef"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: progBody(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def funcHead(self):
        print "Syntactical_Parser: in funcHead"
        if self.lookahead.value in ff_sets.funcHead_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.type() and self.match_type('Id') and self.match(
                        '(') and self.fParams() and self.match(')'):
                    print "funcHead -> type Id ( fParams )"

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: funcHead(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def funcDef(self):
        print "Syntactical_Parser: in funcDef"
        if self.lookahead.value in ff_sets.funcDef_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.funcHead() and self.funcBody() and self.match(
                        ';') and self.funcDef():

                    print "funcDef -> funcHead funcBody ; funcDef"

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: funcDef(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.type in ff_sets.funcDef_FOLLOW:
            print "funcDef -> EPSILON"
            return True
        else:
            return False

    def funcBody(self):
        print "Syntactical_Parser: in funcBody"
        if self.lookahead.value in ff_sets.funcBody_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('{') and self.gen_statements() and self.match(
                        '}'):
                    print "funcBody -> { gen_statements }"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: funcBody(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def varDecl(self):
        print "Syntactical_Parser: in varDecl"
        if self.lookahead.type in ff_sets.varDecl_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.UD_Type() and self.createOrAssign(
                ) and self.varDeclTail():

                    print "varDecl -> UD_Type createOrAssign varDeclTail"
                    # self.output += '\n'
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: varDecl(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.varDecl_FIRST2:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.P_Type() and self.match_type(
                        'Id') and self.varDeclTail():

                    print "varDecl -> P_Type Id varDeclTail"
                    # self.output += '\n'
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: varDecl(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def createOrAssign(self):
        print "Syntactical_Parser: in createOrAssign"
        if self.lookahead.type in ff_sets.createOrAssign_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match_type('Id'):

                    print "createOrAssign -> Id"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: createOrAssign(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.createOrAssign_FOLLOW:
            print "createOrAssign -> EPSILON"
            return True
        else:
            return False

    def varDeclTail(self):
        print "Syntactical_Parser: in varDeclTail"
        if self.lookahead.value in ff_sets.varDeclTail_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.indice() and self.is_Assign() and self.match(';'):

                    print "varDeclTail -> indice is_Assign ;"

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: varDeclTail(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def is_Assign(self):
        print "Syntactical_Parser: in is_Assign"
        if self.lookahead.value in ff_sets.is_Assign_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.assignOp() and self.expr():

                    print "is_Assign -> assignOp expr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: is_Assign(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.is_Assign_FOLLOW:
            print "is_Assign -> EPSILON"
            return True
        else:
            return False

    def statement(self):
        print "Syntactical_Parser: in statement"
        if self.lookahead.value in ff_sets.statement_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('for') and self.match('(') and self.type(
                ) and self.match_type('Id') and self.assignOp() and self.expr(
                ) and self.match(';') and self.relExpr() and self.match(
                        ';') and self.assignStat() and self.match(
                            ')') and self.statBlock() and self.match(';'):
                    print "statement -> for ( type Id assignOp expr ; relExpr ; assignStat ) statBlock ;"
                    #
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statement(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        elif self.lookahead.value in ff_sets.statement_FIRST2:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('if') and self.match('(') and self.expr(
                ) and self.match(')') and self.match(
                        'then') and self.statBlock() and self.match(
                            'else') and self.statBlock() and self.match(';'):

                    print "statement -> if ( expr ) then statBlock else statBlock ;"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statement(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        elif self.lookahead.value in ff_sets.statement_FIRST3:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('get') and self.match('(') and self.variable(
                ) and self.match(')') and self.match(';'):
                    print "statement -> get ( variable ) ;"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statement(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        elif self.lookahead.value in ff_sets.statement_FIRST4:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('put') and self.match('(') and self.expr(
                ) and self.match(')') and self.match(';'):
                    print "statement -> put ( expr ) ;"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statement(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        elif self.lookahead.value in ff_sets.statement_FIRST5:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('return') and self.match('(') and self.expr(
                ) and self.match(')') and self.match(';'):
                    print "statement -> return ( expr ) ;"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statement(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        elif self.lookahead.value in ff_sets.statement_FIRST6 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.varDecl():
                    print "statement -> varDecl"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statement(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        else:
            return False

    def gen_statements(self):
        print "Syntactical_Parser: in gen_statements"
        if self.lookahead.value in ff_sets.gen_statements_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1

            while self.lookahead.type != '$':
                if self.statement() and self.gen_statements():

                    print "gen_statements -> statement gen_statements"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: gen_statements(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.gen_statements_FOLLOW:
            print "gen_statements -> EPSILON"
            return True
        else:
            return False

    def assignStat(self):
        print "Syntactical_Parser: in assignStat"
        if self.lookahead.type in ff_sets.assignStat_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.variable() and self.assignOp() and self.expr():

                    print "assignStat -> variable assignOp expr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: assignStat(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def statBlock(self):
        print "Syntactical_Parser: in statBlock"
        if self.lookahead.value in ff_sets.statBlock_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('{') and self.gen_statements() and self.match(
                        '}'):

                    print "statBlock -> { gen_statements }"

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statBlock(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.statBlock_FIRST2 or self.is_type_Id(
        ):  # LHS-RHS2
            while self.lookahead.type != '$':
                if self.statement():

                    print "statBlock -> statement"

                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: statBlock(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        elif self.lookahead.value in ff_sets.statBlock_FOLLOW:
            print "statBlock -> EPSILON"
            return True
        else:
            return False

    def expr(self):
        print "Syntactical_Parser: in expr"
        if self.lookahead.value in ff_sets.expr_FIRST1 or self.lookahead.type in ff_sets.expr_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.arithExpr() and self.gen_relArithExpr():

                    print "expr -> arithExpr gen_relArithExpr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: expr(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def relExpr(self):
        print "Syntactical_Parser: in relExpr"
        if self.lookahead.value in ff_sets.relExpr_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.arithExpr() and self.relOp() and self.arithExpr():

                    print "relExpr -> arithExpr relOp arithExpr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: relExpr(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def gen_relArithExpr(self):
        print "Syntactical_Parser: in gen_relArithExpr"
        if self.lookahead.value in ff_sets.gen_relArithExpr_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.relOp() and self.arithExpr():

                    print "gen_relArithExpr -> relOp arithExpr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: gen_relArithExpr(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.gen_relArithExpr_FOLLOW:
            print "gen_relArithExpr -> EPSILON"
            return True
        else:
            return False

    def arithExpr(self):
        print "Syntactical_Parser: in arithExpr"
        if self.lookahead.value in ff_sets.arithExpr_FIRST1 or self.lookahead.type in ff_sets.arithExpr_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.term() and self.gen_addArithExpr():

                    print "arithExpr -> term gen_addArithExpr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: arithExpr(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def gen_addArithExpr(self):
        print "Syntactical_Parser: in gen_addArithExpr"
        if self.lookahead.value in ff_sets.gen_addArithExpr_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.addOp() and self.arithExpr():

                    print "gen_addArithExpr -> addOp arithExpr"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: gen_addArithExpr(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.gen_addArithExpr_FOLLOW:
            print "gen_addArithExpr -> EPSILON"
            return True
        else:
            return False

    def sign(self):
        print "Syntactical_Parser: in sign"
        if self.lookahead.value in ff_sets.sign_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('+'):

                    print "sign -> +"
                    return True
                elif self.match('-'):

                    print "sign -> -"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: sign(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def term(self):
        print "Syntactical_Parser: in term"
        if self.lookahead.value in ff_sets.term_FIRST1 or self.lookahead.type in ff_sets.term_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.factor() and self.gen_Term():

                    print "term -> factor gen_Term"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: term(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def gen_Term(self):
        print "Syntactical_Parser: in gen_Term"
        if self.lookahead.value in ff_sets.gen_Term_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.multOp() and self.term():

                    print "gen_Term -> multOp term"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: gen_Term(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.gen_Term_FOLLOW:
            print "gen_Term -> EPSILON"
            return True
        else:
            return False

    def factor(self):
        print "Syntactical_Parser: in factor"
        if self.lookahead.type in ff_sets.factor_FIRST1:
            while self.lookahead.type != '$':
                if self.match_type('Id') and self.paramsOrIndice():

                    print "factor -> Id paramsOrIndice"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: factor(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.factor_FIRST2:
            while self.lookahead.type != '$':
                if self.match('(') and self.arithExpr() and self.match(')'):

                    print "factor -> ( arithExpr )"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: factor(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.factor_FIRST3:
            while self.lookahead.type != '$':
                if self.match('not') and self.factor():

                    print "factor -> not factor"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: factor(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.type in ff_sets.factor_FIRST4:
            while self.lookahead.type != '$':
                if self.match_type('int'):

                    print "factor -> int"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: factor(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.type in ff_sets.factor_FIRST5:
            while self.lookahead.type != '$':
                if self.match_type('float'):

                    print "factor -> float"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: factor(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.factor_FIRST6:
            while self.lookahead.type != '$':
                if self.sign() and self.factor():

                    print "factor -> sign factor"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: factor(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def paramsOrIndice(self):
        print "Syntactical_Parser: in paramsOrIndice"
        if self.lookahead.value in ff_sets.paramsOrIndice_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('(') and self.aParams() and self.match(')'):

                    print "paramsOrIndice -> ( aParams )"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: paramsOrIndice(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()

        if self.indice():

            print "paramsOrIndice -> indice"
            return True
        else:
            return False

    def variable(self):
        print "Syntactical_Parser: in variable"

        if self.match_type('Id') and self.indice():

            print "variable -> Id indice"
            return True
        else:
            return False

    def indice(self):
        print "Syntactical_Parser: in indice"
        if self.lookahead.value in ff_sets.indice_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('[') and self.arithExpr() and self.match(
                        ']') and self.indice():

                    print "indice -> [ arithExpr ] indice"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: indice(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value == '.':  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('.') and self.match_type(
                        'Id') and self.paramsOrIndice():

                    print "indice -> . Id paramsOrIndice"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: indice(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.indice_FOLLOW:
            print "indice -> EPSILON"
            return True
        else:
            return False

    def arraySize(self):
        print "Syntactical_Parser: in arraySize"
        if self.lookahead.value in ff_sets.arraySize_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('[') and self.match_type('int') and self.match(
                        ']'):

                    print "arraySize -> [ int ]"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: arraySize(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.arraySize_FOLLOW:
            print "arraySize -> EPSILON"
            return True
        else:
            return False

    def type(self):
        print "Syntactical_Parser: in type"
        if self.lookahead.value in ff_sets.fParams_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            if self.match_type('Id'):

                print "type -> Id"
                return True
            elif self.match('int'):

                print "type -> int"
                return True
            elif self.match('float'):

                print "type -> float"
                return True
        else:
            return False

    def P_Type(self):
        print "Syntactical_Parser: in P_Type"
        if self.lookahead.value in ff_sets.P_Type_FIRST1:  # LHS-RHS1
            if self.match('int'):

                print "type -> int"
                return True
            elif self.match('float'):

                print "type -> float"
                return True
        else:
            return False

    def UD_Type(self):
        print "Syntactical_Parser: in UD_Type"
        if self.lookahead.type in ff_sets.UD_Type_FIRST1:  # LHS-RHS1
            if self.match_type('Id'):

                print "type -> Id"
                return True
        else:
            return False

    def fParams(self):
        print "Syntactical_Parser: in fParams"
        if self.lookahead.value in ff_sets.fParams_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.type() and self.match_type(
                        'Id') and self.arraySize() and self.fParamsTail():

                    print "fParams -> type Id arraySize fParamsTail"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: fParams(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.fParams_FOLLOW:
            print "fParams -> EPSILON"
            return True
        else:
            return False

    def aParams(self):
        print "Syntactical_Parser: in aParams"
        if self.lookahead.value in ff_sets.aParams_FIRST1 or self.is_type_Id(
        ):  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.expr() and self.aParamsTail():

                    print "aParams -> expr aParamsTail"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: aParams(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value == ')':
            print "fParamsTail -> EPSILON"
            return True
        else:
            return False

    def fParamsTail(self):
        print "Syntactical_Parser: in fParamsTail"
        if self.lookahead.value in ff_sets.fParamsTail_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match(',') and self.type() and self.match_type(
                        'Id') and self.arraySize() and self.fParamsTail():

                    print "fParamsTail -> , type Id arraySize fParamsTail"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: fParamsTail(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.fParamsTail_FOLLOW:
            print "fParamsTail -> EPSILON"
            return True
        else:
            return False

    def aParamsTail(self):
        print "Syntactical_Parser: in aParamsTail"
        if self.lookahead.value in ff_sets.aParamsTail_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match(',') and self.expr() and self.aParamsTail():

                    print "aParamsTail -> , expr aParamsTail"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: aParamsTail(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        elif self.lookahead.value in ff_sets.aParamsTail_FOLLOW:
            print "aParamsTail -> EPSILON"
            return True
        else:
            return False

    def assignOp(self):
        print "Syntactical_Parser: in assignOp"
        if self.lookahead.value in ff_sets.assignOp_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('='):

                    print "assignOp -> ="
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: assignOp(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def relOp(self):
        print "Syntactical_Parser: in relOp"
        if self.lookahead.value in ff_sets.relOp_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('<') or self.match('<=') or self.match(
                        '<>') or self.match('==') or self.match(
                            '>') or self.match('>='):

                    print "relOp -> < | <= | <> | == | > | >="
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: relOp(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def addOp(self):
        print "Syntactical_Parser: in addOp"
        if self.lookahead.value in ff_sets.addOp_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('+') or self.match('-') or self.match('or'):

                    print "addOp -> + | - | or"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: addOp(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def multOp(self):
        print "Syntactical_Parser: in multOp"
        if self.lookahead.value in ff_sets.multOp_FIRST1:  # LHS-RHS1
            while self.lookahead.type != '$':
                if self.match('*') or self.match('/') or self.match('and'):

                    print "multOp -> * | / | and"
                    return True
                else:

                    self.err_token = self.lookahead.value
                    self.err.write('\n~err in: multOp(): ' +
                                   self.lookahead.__str__() + '\n')
                    self.lookahead = self.interpreter.scanner()
        else:
            return False

    def prettify_output(self):
        # self.o = open(self.outfile, 'rw+')
        scope = 0
        tabs = ''
        newline = False
        for c in self.output:
            if c == '}':
                tabs = tabs[:-4]

            elif c == '{':
                tabs += '    '
                c = '{\n'

            elif c == ';':
                c = ';\n'

            self.o.write(c)
예제 #9
0
 def visit_Input(self, node):
     ans = inputer.getNumber()
     self.visit(
         Parser.AST_BinOp(Token(ASSIGN, '='), node.var,
                          Parser.AST_Num(float(ans))))
 def CREATE_GLOBAL_TABLE(self):
     self.SymbolTable_stack.append(
         Symbol_Table(self.level,
                      Token('Symbol_Table', 'Global_Table', 'global', 0,
                            0)))
예제 #11
0
class Syntactic_Parser(object):
    def __init__(self):
        print "Syntactical_Parser: in __init__"

        # Initialize logs and log messages
        self.f = open(infile)
        self.o = open(outfile, 'w+')
        self.debug_flush = open('Outputs/_FLUSH.txt', 'w+')
        self.output = 'OUTPUT OF ' + testFile + ": \n\n"
        self.errs = '\n\nERRORS OF ' + testFile + ":\n  -- ! Error locations are accurate to the original input file ! --\n\n"
        self.scanner_warnings = ''

        # just keeps measure of tabbing for nice output
        self.tabbed_scope = ''

        # generate grammar object from the specs file
        self.g = Grammar()

        # culminate list of all possible terminals
        self.terminal_list = []
        for terminal_set in all_registered_terminals:
            for terminal in terminal_set:
                self.terminal_list.append(terminal)

        # initialize all table to -1
        self.table = [[-1 for x in range(len(self.terminal_list))]
                      for y in range(len(self.g.productions))]
        # stack to be used for the table predictive parsing method
        self.parsing_stack = []

        # initialize the Lexical analyser for token scanning
        self.interpreter = Lexical_Analyzer(self.f.read())
        self.lookahead = Token(EOF, EOF, '$', 0, 0)

        # Handles Semantic actions popped from parsing stack
        self.semantic_processor = SemanticProcessor()

        # print self.g
        self.initialize_parsing_table()

    def parse(self):
        print "Syntactical_Parser: in parse"

        self.parsing_stack.append(EOF)
        self.parsing_stack.append('prog')
        self.lookahead = self.interpreter.scanner()

        error = False
        while self.parsing_stack[
                -1] is not EOF and self.lookahead.termtype is not EOF:
            top = self.parsing_stack[-1]
            print "top       = " + top
            print "lookahead = " + self.lookahead.value

            # if top symbol is a semantic action
            if top in self.semantic_processor.dispatcher:
                if self.semantic_processor.error == "":
                    print top
                    # dispatches the semantic action then moves on to next symbol on stack
                    self.semantic_processor.dispatcher[top]()
                    self.parsing_stack.pop()
                else:
                    self.parsing_stack.pop()

            # if lookahead is a comment, disregard for now.
            elif self.lookahead.type == COMMENT:
                self.lookahead = self.interpreter.scanner()
            elif self.lookahead.termtype == SCAN_ERROR:
                if self.lookahead.type == EOF:
                    print 'Error (Scanner): ' + str(self.lookahead)
                    self.errs += 'Error (Scanner): ' + str(self.lookahead)
                    error = True
                    self.handleError()
                    break
                else:
                    print "\nWarning (scanner): Token scanning issue: " + str(
                        self.lookahead)
                    self.errs += "\nWarning (scanner): Token scanning issue: " + str(
                        self.lookahead)
                    self.lookahead = self.interpreter.scanner()

            # if top symbol is a terminal
            elif top in self.terminal_list and top != EPSILON:
                if top == self.lookahead.termtype:
                    self.format_output()

                    self.parsing_stack.pop()
                    self.semantic_processor.prevToken_buffer = self.lookahead
                    self.lookahead = self.interpreter.scanner()
                else:
                    print "error, wrong token"
                    self.errs += "error, wrong token. Expected: " + top + " found " + self.lookahead.__str__(
                    ) + "\n"
                    error = True
                    self.handleError()
                    # self.lookahead = self.interpreter.scanner()

            # if top symbol is a grammar rule
            elif top in self.g.productions and type(
                    self.g.productions[top]) is Production:
                # top is now the actual production of the string representation
                top = self.g.productions[top]
                #print self.lookahead

                if self.table[top.p_id][self.terminal_list.index(
                        self.lookahead.termtype)] is not -1:
                    # top is now the corresponding correct RHS from table
                    top = self.table[top.p_id][self.terminal_list.index(
                        self.lookahead.termtype)]

                    self.parsing_stack.pop()
                    self.parsing_stack.extend(top.inverse_RHS_multiple_push())
                    # print self.parsing_stack
                else:
                    print "error, table position is -1"
                    self.errs += "table error, Expected {" + top.str_production + "} found " + self.lookahead.__str__(
                    ) + '\n'
                    error = True
                    self.handleError()
                    # self.lookahead = self.interpreter.scanner()

            elif top == EPSILON:
                self.parsing_stack.pop()

            else:
                print "error, top symbol was not a SemanticSymbol / Production / token / EPSILON"
                print 'top symbol is: ' + top
                error = True
                break

            self.debug_flush.write(self.output)
            self.debug_flush.flush()
            self.output = ''
        """""" """""" """""" """""" """""" """""" """
        """ """ Parse/Semantic Evaluation """ """
        """ """""" """""" """""" """""" """""" """"""
        # final parse report
        if self.lookahead.type is EOF:
            # if lookeahead i not EOF than should throw a scan error
            print "reached EOF"
        else:
            print "not EOF"

        if self.parsing_stack != ['$'] or []:
            print "Grammar did not finish, heres whats left on the stack: \n" + str(
                self.parsing_stack)
        else:
            print "parsing stack is empty, program parsed correctly."

        if error is True:
            print "error is True"
        else:
            print "error is False"
            self.errs += '\nSource Program contained no parsing errors.\n'

            #del self.semantic_processor.SymbolTable_stack[0].entries[0].link
            self.semantic_processor.SymbolTable_stack[0].delete('Utility')
            print 'SymbolTable_stack length is: ' + \
                  str(len(self.semantic_processor.SymbolTable_stack))
            print str(self.semantic_processor.SymbolTable_stack[0])
            print self.semantic_processor.warnings

            if self.semantic_processor.error == "":
                # no semantic errors occured
                self.errs += 'Source Program contained no semantic errors.\n'
                self.errs += self.scanner_warnings
                self.errs += self.semantic_processor.warnings
                self.output = "\nSYMBOL TABLE STRUCTURE:\n" + (str(
                    self.semantic_processor.SymbolTable_stack[0]))
            else:
                # semantic errors occured
                print self.semantic_processor.error
                self.errs += self.semantic_processor.error

        self.o.write(self.errs)
        self.o.write(self.output)

        self.debug_flush.close()
        self.o.close()

        # for i in self.semantic_processor.SymbolTable_stack:
        #     print str(i) + '\n'

    def newHandleError(self):
        print 'NEW handling error...'
        self.lookahead = self.interpreter.scanner()

    # This error recovery technique syncronizes the stack and/or the lookahead to the next ;
    def handleError(self):
        print 'handling error... syncronizing to next expected sync_token'
        # print 'in error, stack: ' + str(self.parsing_stack)
        # print 'in error, lookahead: ' + str(self.lookahead.value)

        sync_tokens = [EOF, ';', '{', '}']

        # syncronizing the parsing stack to the next ;
        while self.parsing_stack[-1] not in sync_tokens:
            top = self.parsing_stack[-1]
            # print 'top: ' + top
            # print 'parsing stack: ' + str(self.parsing_stack)

            # if top is a production
            if top in self.g.productions and type(
                    self.g.productions[top]) is Production:
                top = self.g.productions[top]

                if EPSILON not in top.str_production:
                    # expand it and push it reversed on the stack, as usual..
                    top = top.RHSs[0]
                    self.parsing_stack.pop()
                    self.parsing_stack.extend(top.inverse_RHS_multiple_push())
                    # print 'inside print parsing stack: ' + str(self.parsing_stack)
                else:
                    self.parsing_stack.pop()
            else:
                self.parsing_stack.pop()

        sync_token = self.parsing_stack[-1]

        print 'syncing next lookahead'
        # syncronizing the lookahead scanner to next ;
        while self.lookahead.value != sync_token and self.lookahead.type is not EOF:
            self.lookahead = self.interpreter.scanner()
            print 'scanning for a ' + sync_token + '... ' + str(
                self.lookahead.value)

    def format_output(self):
        if self.lookahead.value == ';':
            self.output += self.lookahead.value + "\n" + self.tabbed_scope

        # handle curly braces
        elif self.lookahead.value == '{':
            self.tabbed_scope += '    '
            self.output += self.lookahead.value + "\n" + self.tabbed_scope
        elif self.lookahead.value == '}':
            self.tabbed_scope = self.tabbed_scope[:-4]
            self.output += "\n" + self.tabbed_scope + self.lookahead.value

        else:
            self.output += self.lookahead.value + " "

    # create predictive parsing table
    def initialize_parsing_table(self):

        # for each production rule in the grammar
        for prod_idx in self.g.productions:
            # process each possible right-hand-side 'handle'?
            for prod_RHS in self.g.productions[prod_idx].RHSs:
                # for each terminal in the first set of this RHS
                for RHS_first in prod_RHS.first:
                    # gets its corresponding index in the terminal_list for table referencing
                    terminal_idx = self.terminal_list.index(RHS_first)

                    # store the RHS to be evaluated should you encounter this terminal with this production sitting on top of the stack.
                    self.table[self.g.productions[prod_idx].
                               p_id][terminal_idx] = prod_RHS

                    # for prod_first in self.g.productions[prod_idx].first:
                    #     terminal_idx = self.terminal_list.index(prod_first)
                    #
                    #     # print str(prod_idx) + " " + str(prod_first)
                    #     self.table[self.g.productions[prod_idx].r_id][terminal_idx] = self.g.productions[prod_idx]

                for RHS_follow in prod_RHS.follow:
                    terminal_idx = self.terminal_list.index(RHS_follow)

                    self.table[self.g.productions[prod_idx].
                               p_id][terminal_idx] = prod_RHS

        print_table(self.table)
예제 #12
0
 def test_parse_id(self):
     self.parser.current_token = Token("ID", "test_val", 0, 0)
     self.assertTrue(self.parser.parse_id())