def CompFirst(self): # uses the special null production token NULLTOKEN # snarfed directly from Aho+Ullman (terminals glossed) First = kjSet.NewDG([]) # repeat the while loop until no change is made to First done = 0 while not done: done = 1 # assume we're done until a change is made to First # iterate through all rules looking for a new arc to add # indicating Terminal > possible first token derivation # for R in self.Rules: GoalNonterm = R.Nonterm Bodylength = len(R.Body) # look through the body of the rule up to the token with # no epsilon production (yet seen) Bodyindex = 0 Processindex = 1 while Processindex: # unless otherwise indicated below, don't go to next token Processindex = 0 # if index is past end of body then record # an epsilon production for this nonterminal if Bodyindex >= Bodylength: if not kjSet.HasArc(First, GoalNonterm, NULLTOKEN): kjSet.AddArc(First, GoalNonterm, NULLTOKEN) done = 0 # change made to First else: # otherwise try to add firsts of this token # to firsts of the Head of the rule. Token = R.Body[Bodyindex] (type, name) = Token if type in (KEYFLAG, TERMFLAG): # try to add this terminal to First for GoalNonterm if not kjSet.HasArc(First, GoalNonterm, Token): kjSet.AddArc(First, GoalNonterm, Token) done = 0 elif type == NONTERMFLAG: # try to add each First entry for nonterminal # to First entry for GoalNonterm for FToken in kjSet.Neighbors(First, Token): if not kjSet.HasArc(First, GoalNonterm, FToken): kjSet.AddArc(First, GoalNonterm, FToken) done = 0 # does this nonterminal have a known e production? if kjSet.HasArc(First, Token, NULLTOKEN): # if so, process next token in rule Processindex = 1 else: raise TokenError, "unknown token type in rule body" #endif Bodyindex = Bodyindex + 1 #endwhile Processindex #endfor R in self.Rules #endwhile not done self.First = First
def compFollow(self): ''' computing the Follow set for the ruleset the good news: I think it's correct. the bad news: It's slower than it needs to be for epsilon cases. ''' Follow = kjSet.NewDG([]) # put end marker on follow of start nonterminal kjSet.AddArc(Follow, self.StartNonterm, kjParser.ENDOFFILETOKEN) # now compute other follows using the rules; # repeat the loop until no change to Follow. while not self.compFollowRules(Follow): pass self.Follow = Follow
def Eclosure(self, Epsilon, DoNullMaps=0): ''' return the epsilon closure of the FSM as a new FSM DoNullMap, if set, will map unexpected tokens to the "empty" state (usually creating a really big fsm) ''' Closure = CFSMachine( self.root_nonTerminal ) # compute the Epsilon Graph between states EGraph = kjSet.NewDG([]) for State in range(0,self.maxState+1): # every state is E-connected to self kjSet.AddArc( EGraph, State, State ) # add possible transition on epsilon (ONLY ONE SUPPORTED!) key = (State, Epsilon) if self.StateTokenMap.has_key(key): keymap = self.StateTokenMap[key] if keymap[0][0] != MOVETOFLAG: raise TypeError, "unexpected map type in StateTokenMap" for (Flag,ToState) in keymap: kjSet.AddArc( EGraph, State, ToState ) #endfor # transitively close EGraph kjSet.TransClose( EGraph ) # Translate EGraph into a dictionary of lists EMap = {} for State in range(0,self.maxState+1): EMap[State] = kjSet.Neighbors( EGraph, State ) # make each e-closure of each self.state a state of the closure FSM. # here closure states assumed transient -- reset elsewhere. # first do the initial state Closure.States[ Closure.initial_state ] = \ [TRANSFLAG, kjSet.NewSet(EMap[self.initial_state]) ] # do all other states (save initial and successful final states) #for State in range(0,self.maxState+1): # if State != self.initial_state \ # and State != self.successful_final_state: # Closure.NewSetState(TRANSFLAG, kjSet.NewSet(EMap[State]) ) ##endfor # compute set of all known tokens EXCEPT EPSILON Tokens = kjSet.NewSet( [] ) for (State, Token) in self.StateTokenMap.keys(): if Token != Epsilon: kjSet.addMember(Token, Tokens) # tranform it into a list Tokens = kjSet.get_elts(Tokens) # for each state of the the closure FSM (past final) add transitions # and add new states as needed until all states are processed # (uses convention that states are allocated sequentially) ThisClosureState = 1 while ThisClosureState <= Closure.maxState: MemberStates = kjSet.get_elts(Closure.States[ThisClosureState][1]) # for each possible Token, compute the union UTrans of all # e-closures for all transitions for all member states, # on the Token, make UTrans a new state (if needed), # and transition ThisClosureState to UTrans on Token for Token in Tokens: UTrans = kjSet.NewSet( [] ) for MState in MemberStates: # if MState has a transition on Token, include # EMap for the destination state key = (MState, Token) if self.StateTokenMap.has_key(key): DStateTup = self.StateTokenMap[key] if DStateTup[0][0] != MOVETOFLAG: raise TypeError, "unknown map type" for (DFlag, DState) in DStateTup: for EDState in EMap[DState]: kjSet.addMember(EDState, UTrans) #endif #endfor MState # register UTrans as a new state if needed UTState = Closure.NewSetState(TRANSFLAG, UTrans) # record transition from # ThisClosureState to UTState on Token if DoNullMaps: Closure.SetMap( ThisClosureState, Token, UTState) else: if not kjSet.Empty(UTrans): Closure.SetMap( ThisClosureState, Token, UTState) #endfor Token ThisClosureState = ThisClosureState +1 #endwhile return Closure
def compFollowRule(self, Follow, R): done = 1 # work backwards in the rule body to # avoid retesting for epsilon nonterminals Bodylength = len(R.Body) # the tail of rule may expand to null EpsilonTail = 1 # loop starts at the last for BodyIndex in range(Bodylength-1, -1, -1): Token = R.Body[BodyIndex] (Ttype,Tname) = Token if Ttype not in (KEYFLAG, TERMFLAG, NONTERMFLAG): raise TokenError, "unknown token type in rule body" if Ttype in (KEYFLAG,TERMFLAG): # keywords etc cancel epsilon tail, otherwise ignore EpsilonTail = 0 continue # if the tail expands to epsilon, map # follow for the goal nonterminal to this token # and also follow for the tail nonterms if EpsilonTail: # add follow for goal for FToken in kjSet.Neighbors(Follow,R.Nonterm): if not kjSet.HasArc(Follow, Token, FToken): kjSet.AddArc(Follow, Token, FToken) # follow changed, loop again done = 0 # add follow for tail members #for Index2 in range(BodyIndex+1, Bodylength): # TailToken = R.Body[Index2] # for FToken in kjSet.Neighbors(Follow,TailToken): # if not kjSet.HasArc(Follow,Token,FToken): # kjSet.AddArc(Follow,Token,FToken) # done = 0 #endif EpsilonTail # if we are not at the end use First set for next token if BodyIndex != Bodylength-1: NextToken = R.Body[BodyIndex+1] (NTtype, NTname) = NextToken if NTtype in (KEYFLAG,TERMFLAG): if not kjSet.HasArc(Follow, Token, NextToken): kjSet.AddArc(Follow, Token, NextToken) done = 0 elif NTtype == NONTERMFLAG: for FToken in kjSet.Neighbors(self.First, NextToken): if FToken != NULLTOKEN: if not kjSet.HasArc(Follow, Token, FToken): kjSet.AddArc(Follow, Token, FToken) done = 0 continue # next token expands to epsilon: # add its follow, unless already done above for FToken in kjSet.Neighbors(Follow, NextToken): if not kjSet.HasArc(Follow, Token, FToken): kjSet.AddArc(Follow, Token, FToken) done = 0 else: raise TokenError, "unknown token type in rule body" # finally, check whether next iteration has epsilon tail if not kjSet.HasArc(self.First, Token, NULLTOKEN): EpsilonTail = 0 return done
def CompFollow(self): Follow = kjSet.NewDG( [] ) # put end marker on follow of start nonterminal kjSet.AddArc(Follow, self.StartNonterm, kjParser.ENDOFFILETOKEN) # now compute other follows using the rules; # repeat the loop until no change to Follow. done = 0 while not done: done = 1 # assume done unless Follow changes for R in self.Rules: #print R # work backwards in the rule body to # avoid retesting for epsilon nonterminals Bodylength = len(R.Body) EpsilonTail = 1 # the tail of rule may expand to null BodyIndex = Bodylength - 1 Last = 1 # loop starts at the last from types import TupleType while BodyIndex >= 0: Token = R.Body[BodyIndex] (Ttype,Tname) = Token if Ttype in (KEYFLAG,TERMFLAG): # keywords etc cancel epsilon tail, otherwise ignore EpsilonTail = 0 elif Ttype == NONTERMFLAG: # if the tail expands to epsilon, map # follow for the goal nonterminal to this token # and also follow for the tail nonterms if EpsilonTail: # add follow for goal for FToken in kjSet.Neighbors(Follow,R.Nonterm): if not kjSet.HasArc(Follow,Token,FToken): kjSet.AddArc(Follow,Token,FToken) #if type(FToken[0])==TupleType: # raise ValueError, "bad FToken"+`FToken` #print "new", Token, FToken done = 0 # follow changed, loop again # add follow for tail members #for Index2 in range(BodyIndex+1, Bodylength): # TailToken = R.Body[Index2] # for FToken in kjSet.Neighbors(Follow,TailToken): # if not kjSet.HasArc(Follow,Token,FToken): # kjSet.AddArc(Follow,Token,FToken) # done = 0 #endif EpsilonTail # if we are not at the end use First set for next token if not Last: NextToken = R.Body[BodyIndex+1] (NTtype, NTname) = NextToken if NTtype in (KEYFLAG,TERMFLAG): if not kjSet.HasArc(Follow,Token,NextToken): kjSet.AddArc(Follow,Token,NextToken) #print "next", Token, NextToken done = 0 elif NTtype == NONTERMFLAG: for FToken in kjSet.Neighbors(self.First, NextToken): if FToken != NULLTOKEN: if not kjSet.HasArc(Follow,Token,FToken): kjSet.AddArc(Follow,Token,FToken) #print "neighbor", Token, FToken done = 0 else: # next token expands to epsilon: # add its follow, unless already done above #if not EpsilonTail: for FToken in kjSet.Neighbors(Follow,NextToken): if not kjSet.HasArc(Follow,Token,FToken): kjSet.AddArc(Follow,Token,FToken) #print "epsilon", Token, FToken done = 0 else: raise TokenError, "unknown token type in rule body" #endif not Last # finally, check whether next iteration has epsilon tail if not kjSet.HasArc(self.First, Token, NULLTOKEN): EpsilonTail = 0 else: raise TokenError, "unknown token type in rule body" BodyIndex = BodyIndex - 1 Last = 0 # no longer at the last token of the rule #endwhile #endfor #endwhile self.Follow = Follow