def parse(): with open(infile) as f: interpreter = Lexical_Analyzer(f.read()) lookahead = Token(SCAN_ERROR, 0, 0, 0) outfile = "Outputs/Output_" + testFile error_log = "Outputs/ErrorLog_" + testFile with open(outfile, 'w+') as o: with open(error_log, 'w+') as err: while lookahead.type is not EOF: lookahead = interpreter.scanner() print(lookahead) if lookahead.type == SCAN_ERROR: err.write(lookahead.__str__() + "\n") else: o.write(lookahead.__str__() + "\n")
class Syntactic_Parser(object): def __init__(self): print "Syntactical_Parser: in __init__" self.outfile = "Outputs/Output_" + testFile self.error_log = "Outputs/ErrorLog_" + testFile self.f = open(infile) self.o = open(self.outfile, 'w+') self.err = open(self.error_log, 'w+') self.interpreter = Lexical_Analyzer(self.f.read()) self.lookahead = Token(SCAN_ERROR, '', 0, 0) self.EPSILON = 'EPSILON' self.err_token = '' self.output = '' def parse(self): print "Syntactical_Parser: in parse" # self.interpreter = Lexical_Analyzer(self.f.read()) while self.lookahead.type is not EOF: self.lookahead = self.interpreter.scanner() print(self.lookahead) '''if self.lookahead.type == SCAN_ERROR: self.err.write(self.lookahead.__str__() + "\n") else: self.output += (self.lookahead.__str__() + "\n") ''' if self.prog(): self.err.write("\nmain parse() returning True") else: self.err.write("\nmain parse() returning False") self.prettify_output() #self.o.write(self.output) self.o.close() self.err.close() def match(self, token): if self.lookahead.value is not None: print "Syntactical_Parser: match(" + self.lookahead.value + ", " + token + ")" if self.lookahead.value == token: self.output += self.lookahead.value self.lookahead = self.interpreter.scanner() return True else: # self.lookahead = self.interpreter.scanner() return False def match_type(self, type): if self.lookahead.value is not None: print "Syntactical_Parser: match_type(" + self.lookahead.type + ", " + type + ")" if self.lookahead.type == type: self.output += self.lookahead.value self.lookahead = self.interpreter.scanner() return True else: # self.lookahead = self.interpreter.scanner() return False def is_type_Id(self): if self.lookahead.type == 'Id': return True else: return False def prog(self): # LHS-RHS1 | RHS2 | print "Syntactical_Parser: in prog" if self.lookahead.value in ff_sets.prog_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.classDecl() and self.progBody(): print "prog -> classDecl progBody" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: prog(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def classDecl(self): print "Syntactical_Parser: in classDecl" if self.lookahead.value in ff_sets.classDecl_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('class') and self.match_type( 'Id') and self.match( '{') and self.classBody() and self.match( '}') and self.match(';') and self.classDecl(): print "classDecl -> class Id { classBody } ; classDecl " return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: classDecl(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.classDecl_FOLLOW: print "classDecl -> EPSILON" return True else: return False def classBody(self): print "Syntactical_Parser: in classBody" if self.lookahead.value in ff_sets.classBody_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.type() and self.match_type('Id') and self.varOrFunc(): print "classBody -> type Id varOrFunc" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: classBody(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.classBody_FOLLOW: print "classBody -> EPSILON" return True else: return False def varOrFunc(self): print "Syntactical_Parser: in varOrFunc" if self.lookahead.value in ff_sets.varOrFunc_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.indice() and self.match(';') and self.classBody(): print "varOrFunc -> indice ; classBody" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: varOrFunc(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.varOrFunc_FIRST2: while self.lookahead.type != '$': if self.match('(') and self.fParams() and self.match( ')') and self.funcBody() and self.match( ';') and self.classBody(): print "varOrFunc -> ( fParams ) funcBody classBody" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: varOrFunc(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def progBody(self): print "Syntactical_Parser: in progBody" if self.lookahead.value in ff_sets.progBody_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('program') and self.funcBody() and self.match( ';') and self.funcDef(): print "progBody -> program funcBody ; funcDef" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: progBody(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def funcHead(self): print "Syntactical_Parser: in funcHead" if self.lookahead.value in ff_sets.funcHead_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.type() and self.match_type('Id') and self.match( '(') and self.fParams() and self.match(')'): print "funcHead -> type Id ( fParams )" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: funcHead(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def funcDef(self): print "Syntactical_Parser: in funcDef" if self.lookahead.value in ff_sets.funcDef_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.funcHead() and self.funcBody() and self.match( ';') and self.funcDef(): print "funcDef -> funcHead funcBody ; funcDef" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: funcDef(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.type in ff_sets.funcDef_FOLLOW: print "funcDef -> EPSILON" return True else: return False def funcBody(self): print "Syntactical_Parser: in funcBody" if self.lookahead.value in ff_sets.funcBody_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('{') and self.gen_statements() and self.match( '}'): print "funcBody -> { gen_statements }" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: funcBody(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def varDecl(self): print "Syntactical_Parser: in varDecl" if self.lookahead.type in ff_sets.varDecl_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.UD_Type() and self.createOrAssign( ) and self.varDeclTail(): print "varDecl -> UD_Type createOrAssign varDeclTail" # self.output += '\n' return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: varDecl(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.varDecl_FIRST2: # LHS-RHS1 while self.lookahead.type != '$': if self.P_Type() and self.match_type( 'Id') and self.varDeclTail(): print "varDecl -> P_Type Id varDeclTail" # self.output += '\n' return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: varDecl(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def createOrAssign(self): print "Syntactical_Parser: in createOrAssign" if self.lookahead.type in ff_sets.createOrAssign_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match_type('Id'): print "createOrAssign -> Id" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: createOrAssign(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.createOrAssign_FOLLOW: print "createOrAssign -> EPSILON" return True else: return False def varDeclTail(self): print "Syntactical_Parser: in varDeclTail" if self.lookahead.value in ff_sets.varDeclTail_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.indice() and self.is_Assign() and self.match(';'): print "varDeclTail -> indice is_Assign ;" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: varDeclTail(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def is_Assign(self): print "Syntactical_Parser: in is_Assign" if self.lookahead.value in ff_sets.is_Assign_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.assignOp() and self.expr(): print "is_Assign -> assignOp expr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: is_Assign(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.is_Assign_FOLLOW: print "is_Assign -> EPSILON" return True else: return False def statement(self): print "Syntactical_Parser: in statement" if self.lookahead.value in ff_sets.statement_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('for') and self.match('(') and self.type( ) and self.match_type('Id') and self.assignOp() and self.expr( ) and self.match(';') and self.relExpr() and self.match( ';') and self.assignStat() and self.match( ')') and self.statBlock() and self.match(';'): print "statement -> for ( type Id assignOp expr ; relExpr ; assignStat ) statBlock ;" # return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statement(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statement_FIRST2: # LHS-RHS1 while self.lookahead.type != '$': if self.match('if') and self.match('(') and self.expr( ) and self.match(')') and self.match( 'then') and self.statBlock() and self.match( 'else') and self.statBlock() and self.match(';'): print "statement -> if ( expr ) then statBlock else statBlock ;" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statement(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statement_FIRST3: # LHS-RHS1 while self.lookahead.type != '$': if self.match('get') and self.match('(') and self.variable( ) and self.match(')') and self.match(';'): print "statement -> get ( variable ) ;" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statement(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statement_FIRST4: # LHS-RHS1 while self.lookahead.type != '$': if self.match('put') and self.match('(') and self.expr( ) and self.match(')') and self.match(';'): print "statement -> put ( expr ) ;" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statement(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statement_FIRST5: # LHS-RHS1 while self.lookahead.type != '$': if self.match('return') and self.match('(') and self.expr( ) and self.match(')') and self.match(';'): print "statement -> return ( expr ) ;" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statement(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statement_FIRST6 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.varDecl(): print "statement -> varDecl" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statement(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def gen_statements(self): print "Syntactical_Parser: in gen_statements" if self.lookahead.value in ff_sets.gen_statements_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.statement() and self.gen_statements(): print "gen_statements -> statement gen_statements" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: gen_statements(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.gen_statements_FOLLOW: print "gen_statements -> EPSILON" return True else: return False def assignStat(self): print "Syntactical_Parser: in assignStat" if self.lookahead.type in ff_sets.assignStat_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.variable() and self.assignOp() and self.expr(): print "assignStat -> variable assignOp expr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: assignStat(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def statBlock(self): print "Syntactical_Parser: in statBlock" if self.lookahead.value in ff_sets.statBlock_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('{') and self.gen_statements() and self.match( '}'): print "statBlock -> { gen_statements }" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statBlock(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statBlock_FIRST2 or self.is_type_Id( ): # LHS-RHS2 while self.lookahead.type != '$': if self.statement(): print "statBlock -> statement" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: statBlock(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.statBlock_FOLLOW: print "statBlock -> EPSILON" return True else: return False def expr(self): print "Syntactical_Parser: in expr" if self.lookahead.value in ff_sets.expr_FIRST1 or self.lookahead.type in ff_sets.expr_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.arithExpr() and self.gen_relArithExpr(): print "expr -> arithExpr gen_relArithExpr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: expr(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def relExpr(self): print "Syntactical_Parser: in relExpr" if self.lookahead.value in ff_sets.relExpr_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.arithExpr() and self.relOp() and self.arithExpr(): print "relExpr -> arithExpr relOp arithExpr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: relExpr(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def gen_relArithExpr(self): print "Syntactical_Parser: in gen_relArithExpr" if self.lookahead.value in ff_sets.gen_relArithExpr_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.relOp() and self.arithExpr(): print "gen_relArithExpr -> relOp arithExpr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: gen_relArithExpr(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.gen_relArithExpr_FOLLOW: print "gen_relArithExpr -> EPSILON" return True else: return False def arithExpr(self): print "Syntactical_Parser: in arithExpr" if self.lookahead.value in ff_sets.arithExpr_FIRST1 or self.lookahead.type in ff_sets.arithExpr_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.term() and self.gen_addArithExpr(): print "arithExpr -> term gen_addArithExpr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: arithExpr(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def gen_addArithExpr(self): print "Syntactical_Parser: in gen_addArithExpr" if self.lookahead.value in ff_sets.gen_addArithExpr_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.addOp() and self.arithExpr(): print "gen_addArithExpr -> addOp arithExpr" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: gen_addArithExpr(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.gen_addArithExpr_FOLLOW: print "gen_addArithExpr -> EPSILON" return True else: return False def sign(self): print "Syntactical_Parser: in sign" if self.lookahead.value in ff_sets.sign_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('+'): print "sign -> +" return True elif self.match('-'): print "sign -> -" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: sign(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def term(self): print "Syntactical_Parser: in term" if self.lookahead.value in ff_sets.term_FIRST1 or self.lookahead.type in ff_sets.term_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.factor() and self.gen_Term(): print "term -> factor gen_Term" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: term(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def gen_Term(self): print "Syntactical_Parser: in gen_Term" if self.lookahead.value in ff_sets.gen_Term_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.multOp() and self.term(): print "gen_Term -> multOp term" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: gen_Term(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.gen_Term_FOLLOW: print "gen_Term -> EPSILON" return True else: return False def factor(self): print "Syntactical_Parser: in factor" if self.lookahead.type in ff_sets.factor_FIRST1: while self.lookahead.type != '$': if self.match_type('Id') and self.paramsOrIndice(): print "factor -> Id paramsOrIndice" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: factor(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.factor_FIRST2: while self.lookahead.type != '$': if self.match('(') and self.arithExpr() and self.match(')'): print "factor -> ( arithExpr )" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: factor(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.factor_FIRST3: while self.lookahead.type != '$': if self.match('not') and self.factor(): print "factor -> not factor" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: factor(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.type in ff_sets.factor_FIRST4: while self.lookahead.type != '$': if self.match_type('int'): print "factor -> int" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: factor(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.type in ff_sets.factor_FIRST5: while self.lookahead.type != '$': if self.match_type('float'): print "factor -> float" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: factor(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.factor_FIRST6: while self.lookahead.type != '$': if self.sign() and self.factor(): print "factor -> sign factor" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: factor(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def paramsOrIndice(self): print "Syntactical_Parser: in paramsOrIndice" if self.lookahead.value in ff_sets.paramsOrIndice_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('(') and self.aParams() and self.match(')'): print "paramsOrIndice -> ( aParams )" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: paramsOrIndice(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() if self.indice(): print "paramsOrIndice -> indice" return True else: return False def variable(self): print "Syntactical_Parser: in variable" if self.match_type('Id') and self.indice(): print "variable -> Id indice" return True else: return False def indice(self): print "Syntactical_Parser: in indice" if self.lookahead.value in ff_sets.indice_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('[') and self.arithExpr() and self.match( ']') and self.indice(): print "indice -> [ arithExpr ] indice" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: indice(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value == '.': # LHS-RHS1 while self.lookahead.type != '$': if self.match('.') and self.match_type( 'Id') and self.paramsOrIndice(): print "indice -> . Id paramsOrIndice" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: indice(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.indice_FOLLOW: print "indice -> EPSILON" return True else: return False def arraySize(self): print "Syntactical_Parser: in arraySize" if self.lookahead.value in ff_sets.arraySize_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('[') and self.match_type('int') and self.match( ']'): print "arraySize -> [ int ]" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: arraySize(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.arraySize_FOLLOW: print "arraySize -> EPSILON" return True else: return False def type(self): print "Syntactical_Parser: in type" if self.lookahead.value in ff_sets.fParams_FIRST1 or self.is_type_Id( ): # LHS-RHS1 if self.match_type('Id'): print "type -> Id" return True elif self.match('int'): print "type -> int" return True elif self.match('float'): print "type -> float" return True else: return False def P_Type(self): print "Syntactical_Parser: in P_Type" if self.lookahead.value in ff_sets.P_Type_FIRST1: # LHS-RHS1 if self.match('int'): print "type -> int" return True elif self.match('float'): print "type -> float" return True else: return False def UD_Type(self): print "Syntactical_Parser: in UD_Type" if self.lookahead.type in ff_sets.UD_Type_FIRST1: # LHS-RHS1 if self.match_type('Id'): print "type -> Id" return True else: return False def fParams(self): print "Syntactical_Parser: in fParams" if self.lookahead.value in ff_sets.fParams_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.type() and self.match_type( 'Id') and self.arraySize() and self.fParamsTail(): print "fParams -> type Id arraySize fParamsTail" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: fParams(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.fParams_FOLLOW: print "fParams -> EPSILON" return True else: return False def aParams(self): print "Syntactical_Parser: in aParams" if self.lookahead.value in ff_sets.aParams_FIRST1 or self.is_type_Id( ): # LHS-RHS1 while self.lookahead.type != '$': if self.expr() and self.aParamsTail(): print "aParams -> expr aParamsTail" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: aParams(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value == ')': print "fParamsTail -> EPSILON" return True else: return False def fParamsTail(self): print "Syntactical_Parser: in fParamsTail" if self.lookahead.value in ff_sets.fParamsTail_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match(',') and self.type() and self.match_type( 'Id') and self.arraySize() and self.fParamsTail(): print "fParamsTail -> , type Id arraySize fParamsTail" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: fParamsTail(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.fParamsTail_FOLLOW: print "fParamsTail -> EPSILON" return True else: return False def aParamsTail(self): print "Syntactical_Parser: in aParamsTail" if self.lookahead.value in ff_sets.aParamsTail_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match(',') and self.expr() and self.aParamsTail(): print "aParamsTail -> , expr aParamsTail" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: aParamsTail(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() elif self.lookahead.value in ff_sets.aParamsTail_FOLLOW: print "aParamsTail -> EPSILON" return True else: return False def assignOp(self): print "Syntactical_Parser: in assignOp" if self.lookahead.value in ff_sets.assignOp_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('='): print "assignOp -> =" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: assignOp(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def relOp(self): print "Syntactical_Parser: in relOp" if self.lookahead.value in ff_sets.relOp_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('<') or self.match('<=') or self.match( '<>') or self.match('==') or self.match( '>') or self.match('>='): print "relOp -> < | <= | <> | == | > | >=" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: relOp(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def addOp(self): print "Syntactical_Parser: in addOp" if self.lookahead.value in ff_sets.addOp_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('+') or self.match('-') or self.match('or'): print "addOp -> + | - | or" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: addOp(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def multOp(self): print "Syntactical_Parser: in multOp" if self.lookahead.value in ff_sets.multOp_FIRST1: # LHS-RHS1 while self.lookahead.type != '$': if self.match('*') or self.match('/') or self.match('and'): print "multOp -> * | / | and" return True else: self.err_token = self.lookahead.value self.err.write('\n~err in: multOp(): ' + self.lookahead.__str__() + '\n') self.lookahead = self.interpreter.scanner() else: return False def prettify_output(self): # self.o = open(self.outfile, 'rw+') scope = 0 tabs = '' newline = False for c in self.output: if c == '}': tabs = tabs[:-4] elif c == '{': tabs += ' ' c = '{\n' elif c == ';': c = ';\n' self.o.write(c)
class Syntactic_Parser(object): def __init__(self): print "Syntactical_Parser: in __init__" # Initialize logs and log messages self.f = open(infile) self.o = open(outfile, 'w+') self.debug_flush = open('Outputs/_FLUSH.txt', 'w+') self.output = 'OUTPUT OF ' + testFile + ": \n\n" self.errs = '\n\nERRORS OF ' + testFile + ":\n -- ! Error locations are accurate to the original input file ! --\n\n" self.scanner_warnings = '' # just keeps measure of tabbing for nice output self.tabbed_scope = '' # generate grammar object from the specs file self.g = Grammar() # culminate list of all possible terminals self.terminal_list = [] for terminal_set in all_registered_terminals: for terminal in terminal_set: self.terminal_list.append(terminal) # initialize all table to -1 self.table = [[-1 for x in range(len(self.terminal_list))] for y in range(len(self.g.productions))] # stack to be used for the table predictive parsing method self.parsing_stack = [] # initialize the Lexical analyser for token scanning self.interpreter = Lexical_Analyzer(self.f.read()) self.lookahead = Token(EOF, EOF, '$', 0, 0) # Handles Semantic actions popped from parsing stack self.semantic_processor = SemanticProcessor() # print self.g self.initialize_parsing_table() def parse(self): print "Syntactical_Parser: in parse" self.parsing_stack.append(EOF) self.parsing_stack.append('prog') self.lookahead = self.interpreter.scanner() error = False while self.parsing_stack[ -1] is not EOF and self.lookahead.termtype is not EOF: top = self.parsing_stack[-1] print "top = " + top print "lookahead = " + self.lookahead.value # if top symbol is a semantic action if top in self.semantic_processor.dispatcher: if self.semantic_processor.error == "": print top # dispatches the semantic action then moves on to next symbol on stack self.semantic_processor.dispatcher[top]() self.parsing_stack.pop() else: self.parsing_stack.pop() # if lookahead is a comment, disregard for now. elif self.lookahead.type == COMMENT: self.lookahead = self.interpreter.scanner() elif self.lookahead.termtype == SCAN_ERROR: if self.lookahead.type == EOF: print 'Error (Scanner): ' + str(self.lookahead) self.errs += 'Error (Scanner): ' + str(self.lookahead) error = True self.handleError() break else: print "\nWarning (scanner): Token scanning issue: " + str( self.lookahead) self.errs += "\nWarning (scanner): Token scanning issue: " + str( self.lookahead) self.lookahead = self.interpreter.scanner() # if top symbol is a terminal elif top in self.terminal_list and top != EPSILON: if top == self.lookahead.termtype: self.format_output() self.parsing_stack.pop() self.semantic_processor.prevToken_buffer = self.lookahead self.lookahead = self.interpreter.scanner() else: print "error, wrong token" self.errs += "error, wrong token. Expected: " + top + " found " + self.lookahead.__str__( ) + "\n" error = True self.handleError() # self.lookahead = self.interpreter.scanner() # if top symbol is a grammar rule elif top in self.g.productions and type( self.g.productions[top]) is Production: # top is now the actual production of the string representation top = self.g.productions[top] #print self.lookahead if self.table[top.p_id][self.terminal_list.index( self.lookahead.termtype)] is not -1: # top is now the corresponding correct RHS from table top = self.table[top.p_id][self.terminal_list.index( self.lookahead.termtype)] self.parsing_stack.pop() self.parsing_stack.extend(top.inverse_RHS_multiple_push()) # print self.parsing_stack else: print "error, table position is -1" self.errs += "table error, Expected {" + top.str_production + "} found " + self.lookahead.__str__( ) + '\n' error = True self.handleError() # self.lookahead = self.interpreter.scanner() elif top == EPSILON: self.parsing_stack.pop() else: print "error, top symbol was not a SemanticSymbol / Production / token / EPSILON" print 'top symbol is: ' + top error = True break self.debug_flush.write(self.output) self.debug_flush.flush() self.output = '' """""" """""" """""" """""" """""" """""" """ """ """ Parse/Semantic Evaluation """ """ """ """""" """""" """""" """""" """""" """""" # final parse report if self.lookahead.type is EOF: # if lookeahead i not EOF than should throw a scan error print "reached EOF" else: print "not EOF" if self.parsing_stack != ['$'] or []: print "Grammar did not finish, heres whats left on the stack: \n" + str( self.parsing_stack) else: print "parsing stack is empty, program parsed correctly." if error is True: print "error is True" else: print "error is False" self.errs += '\nSource Program contained no parsing errors.\n' #del self.semantic_processor.SymbolTable_stack[0].entries[0].link self.semantic_processor.SymbolTable_stack[0].delete('Utility') print 'SymbolTable_stack length is: ' + \ str(len(self.semantic_processor.SymbolTable_stack)) print str(self.semantic_processor.SymbolTable_stack[0]) print self.semantic_processor.warnings if self.semantic_processor.error == "": # no semantic errors occured self.errs += 'Source Program contained no semantic errors.\n' self.errs += self.scanner_warnings self.errs += self.semantic_processor.warnings self.output = "\nSYMBOL TABLE STRUCTURE:\n" + (str( self.semantic_processor.SymbolTable_stack[0])) else: # semantic errors occured print self.semantic_processor.error self.errs += self.semantic_processor.error self.o.write(self.errs) self.o.write(self.output) self.debug_flush.close() self.o.close() # for i in self.semantic_processor.SymbolTable_stack: # print str(i) + '\n' def newHandleError(self): print 'NEW handling error...' self.lookahead = self.interpreter.scanner() # This error recovery technique syncronizes the stack and/or the lookahead to the next ; def handleError(self): print 'handling error... syncronizing to next expected sync_token' # print 'in error, stack: ' + str(self.parsing_stack) # print 'in error, lookahead: ' + str(self.lookahead.value) sync_tokens = [EOF, ';', '{', '}'] # syncronizing the parsing stack to the next ; while self.parsing_stack[-1] not in sync_tokens: top = self.parsing_stack[-1] # print 'top: ' + top # print 'parsing stack: ' + str(self.parsing_stack) # if top is a production if top in self.g.productions and type( self.g.productions[top]) is Production: top = self.g.productions[top] if EPSILON not in top.str_production: # expand it and push it reversed on the stack, as usual.. top = top.RHSs[0] self.parsing_stack.pop() self.parsing_stack.extend(top.inverse_RHS_multiple_push()) # print 'inside print parsing stack: ' + str(self.parsing_stack) else: self.parsing_stack.pop() else: self.parsing_stack.pop() sync_token = self.parsing_stack[-1] print 'syncing next lookahead' # syncronizing the lookahead scanner to next ; while self.lookahead.value != sync_token and self.lookahead.type is not EOF: self.lookahead = self.interpreter.scanner() print 'scanning for a ' + sync_token + '... ' + str( self.lookahead.value) def format_output(self): if self.lookahead.value == ';': self.output += self.lookahead.value + "\n" + self.tabbed_scope # handle curly braces elif self.lookahead.value == '{': self.tabbed_scope += ' ' self.output += self.lookahead.value + "\n" + self.tabbed_scope elif self.lookahead.value == '}': self.tabbed_scope = self.tabbed_scope[:-4] self.output += "\n" + self.tabbed_scope + self.lookahead.value else: self.output += self.lookahead.value + " " # create predictive parsing table def initialize_parsing_table(self): # for each production rule in the grammar for prod_idx in self.g.productions: # process each possible right-hand-side 'handle'? for prod_RHS in self.g.productions[prod_idx].RHSs: # for each terminal in the first set of this RHS for RHS_first in prod_RHS.first: # gets its corresponding index in the terminal_list for table referencing terminal_idx = self.terminal_list.index(RHS_first) # store the RHS to be evaluated should you encounter this terminal with this production sitting on top of the stack. self.table[self.g.productions[prod_idx]. p_id][terminal_idx] = prod_RHS # for prod_first in self.g.productions[prod_idx].first: # terminal_idx = self.terminal_list.index(prod_first) # # # print str(prod_idx) + " " + str(prod_first) # self.table[self.g.productions[prod_idx].r_id][terminal_idx] = self.g.productions[prod_idx] for RHS_follow in prod_RHS.follow: terminal_idx = self.terminal_list.index(RHS_follow) self.table[self.g.productions[prod_idx]. p_id][terminal_idx] = prod_RHS print_table(self.table)