def addDFAState(self, configs:ATNConfigSet) -> DFAState: proposed = DFAState(configs=configs) firstConfigWithRuleStopState = None for c in configs: if isinstance(c.state, RuleStopState): firstConfigWithRuleStopState = c break if firstConfigWithRuleStopState is not None: proposed.isAcceptState = True proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex] dfa = self.decisionToDFA[self.mode] existing = dfa.states.get(proposed, None) if existing is not None: return existing newState = proposed newState.stateNumber = len(dfa.states) configs.setReadonly(True) newState.configs = configs dfa.states[newState] = newState return newState
def hasSLLConflictTerminatingPrediction(cls, mode, configs): # Configs in rule stop states indicate reaching the end of the decision # rule (local context) or end of start rule (full context). If all # configs meet this condition, then none of the configurations is able # to match additional input so we terminate prediction. # if cls.allConfigsInRuleStopStates(configs): return True # pure SLL mode parsing if mode == PredictionMode.SLL: # Don't bother with combining configs from different semantic # contexts if we can fail over to full LL; costs more time # since we'll often fail over anyway. if configs.hasSemanticContext: # dup configs, tossing out semantic predicates dup = ATNConfigSet() for c in configs: c = ATNConfig(c,SemanticContext.NONE) dup.add(c) configs = dup # now we have combined contexts for configs with dissimilar preds # pure SLL or combined SLL+LL mode parsing altsets = cls.getConflictingAltSubsets(configs) return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
def hasSLLConflictTerminatingPrediction(cls, mode, configs): # Configs in rule stop states indicate reaching the end of the decision # rule (local context) or end of start rule (full context). If all # configs meet this condition, then none of the configurations is able # to match additional input so we terminate prediction. # if cls.allConfigsInRuleStopStates(configs): return True # pure SLL mode parsing if mode == PredictionMode.SLL: # Don't bother with combining configs from different semantic # contexts if we can fail over to full LL; costs more time # since we'll often fail over anyway. if configs.hasSemanticContext: # dup configs, tossing out semantic predicates dup = ATNConfigSet() for c in configs: c = ATNConfig(config=c, semantic=SemanticContext.NONE) dup.add(c) configs = dup # now we have combined contexts for configs with dissimilar preds # pure SLL or combined SLL+LL mode parsing altsets = cls.getConflictingAltSubsets(configs) return cls.hasConflictingAltSet( altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
def addDFAState(self, configs: ATNConfigSet) -> DFAState: # the lexer evaluates predicates on-the-fly; by this point configs # should not contain any configurations with unevaluated predicates. assert not configs.hasSemanticContext proposed = DFAState(configs=configs) firstConfigWithRuleStopState = None for c in configs: if isinstance(c.state, RuleStopState): firstConfigWithRuleStopState = c break if firstConfigWithRuleStopState is not None: proposed.isAcceptState = True proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex] dfa = self.decisionToDFA[self.mode] existing = dfa.states.get(proposed, None) if existing is not None: return existing newState = proposed newState.stateNumber = len(dfa.states) configs.setReadonly(True) newState.configs = configs dfa.states[newState] = newState return newState
def addDFAState(self, configs: ATNConfigSet) -> DFAState: # the lexer evaluates predicates on-the-fly; by this point configs # should not contain any configurations with unevaluated predicates. assert not configs.hasSemanticContext proposed = DFAState(configs=configs) firstConfigWithRuleStopState = None for c in configs: if isinstance(c.state, RuleStopState): firstConfigWithRuleStopState = c break if firstConfigWithRuleStopState is not None: proposed.isAcceptState = True proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor proposed.prediction = self.atn.ruleToTokenType[ firstConfigWithRuleStopState.state.ruleIndex] dfa = self.decisionToDFA[self.mode] existing = dfa.states.get(proposed, None) if existing is not None: return existing newState = proposed newState.stateNumber = len(dfa.states) configs.setReadonly(True) newState.configs = configs dfa.states[newState] = newState return newState
def __init__(self, stateNumber: int = -1, configs: ATNConfigSet = ATNConfigSet()): self.stateNumber = stateNumber self.configs = configs # {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) # {@link Token#EOF} maps to {@code edges[0]}. self.edges = None self.isAcceptState = False # if accept state, what ttype do we match or alt do we predict? # This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or # {@link #requiresFullContext}. self.prediction = 0 self.lexerActionExecutor = None # Indicates that this state was created during SLL prediction that # discovered a conflict between the configurations in the state. Future # {@link ParserATNSimulator#execATN} invocations immediately jumped doing # full context prediction if this field is true. self.requiresFullContext = False # During SLL parsing, this is a list of predicates associated with the # ATN configurations of the DFA state. When we have predicates, # {@link #requiresFullContext} is {@code false} since full context prediction evaluates predicates # on-the-fly. If this is not null, then {@link #prediction} is # {@link ATN#INVALID_ALT_NUMBER}. # # <p>We only use these for non-{@link #requiresFullContext} but conflicting states. That # means we know from the context (it's $ or we don't dip into outer # context) that it's an ambiguity not a conflict.</p> # # <p>This list is computed by {@link ParserATNSimulator#predicateDFAState}.</p> self.predicates = None
class ATNSimulator(object): # Must distinguish between missing edge and edge we know leads nowhere#/ ERROR = DFAState(0x7FFFFFFF, ATNConfigSet()) # The context cache maps all PredictionContext objects that are == # to a single cached copy. This cache is shared across all contexts # in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet # to use only cached nodes/graphs in addDFAState(). We don't want to # fill this during closure() since there are lots of contexts that # pop up but are not used ever again. It also greatly slows down closure(). # # <p>This cache makes a huge difference in memory and a little bit in speed. # For the Java grammar on java.*, it dropped the memory requirements # at the end from 25M to 16M. We don't store any of the full context # graphs in the DFA because they are limited to local context only, # but apparently there's a lot of repetition there as well. We optimize # the config contexts before storing the config set in the DFA states # by literally rebuilding them with cached subgraphs only.</p> # # <p>I tried a cache for use during closure operations, that was # whacked after each adaptivePredict(). It cost a little bit # more time I think and doesn't save on the overall footprint # so it's not worth the complexity.</p> #/ def __init__(self, atn, sharedContextCache): self.atn = atn self.sharedContextCache = sharedContextCache def getCachedContext(self, context): if self.sharedContextCache is None: return context visited = dict() return getCachedPredictionContext(context, self.sharedContextCache, visited)
def setPrecedenceDfa(self, precedenceDfa): if self.precedenceDfa != precedenceDfa: self._states = dict() if precedenceDfa: precedenceState = DFAState(configs=ATNConfigSet()) precedenceState.edges = [] precedenceState.isAcceptState = False precedenceState.requiresFullContext = False self.s0 = precedenceState else: self.s0 = None self.precedenceDfa = precedenceDfa
def closure(self, input: InputStream, config: LexerATNConfig, configs: ATNConfigSet, currentAltReachedAcceptState: bool, speculative: bool, treatEofAsEpsilon: bool): if self.debug: print("closure(" + config.toString(self.recog, True) + ")") if isinstance(config.state, RuleStopState): if self.debug: if self.recog is not None: print("closure at %s rule stop %s\n", self.recog.getRuleNames()[config.state.ruleIndex], config) else: print("closure at rule stop %s\n", config) if config.context is None or config.context.hasEmptyPath(): if config.context is None or config.context.isEmpty(): configs.add(config) return True else: configs.add( LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY)) currentAltReachedAcceptState = True if config.context is not None and not config.context.isEmpty(): for i in range(0, len(config.context)): if config.context.getReturnState( i) != PredictionContext.EMPTY_RETURN_STATE: newContext = config.context.getParent( i) # "pop" return state returnState = self.atn.states[ config.context.getReturnState(i)] c = LexerATNConfig(state=returnState, config=config, context=newContext) currentAltReachedAcceptState = self.closure( input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon) return currentAltReachedAcceptState # optimization if not config.state.epsilonOnlyTransitions: if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision: configs.add(config) for t in config.state.transitions: c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon) if c is not None: currentAltReachedAcceptState = self.closure( input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon) return currentAltReachedAcceptState
def closure( self, input: InputStream, config: LexerATNConfig, configs: ATNConfigSet, currentAltReachedAcceptState: bool, speculative: bool, treatEofAsEpsilon: bool, ): if self.debug: print("closure(" + config.toString(self.recog, True) + ")") if isinstance(config.state, RuleStopState): if self.debug: if self.recog is not None: print("closure at %s rule stop %s\n", self.recog.getRuleNames()[config.state.ruleIndex], config) else: print("closure at rule stop %s\n", config) if config.context is None or config.context.hasEmptyPath(): if config.context is None or config.context.isEmpty(): configs.add(config) return True else: configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY)) currentAltReachedAcceptState = True if config.context is not None and not config.context.isEmpty(): for i in range(0, len(config.context)): if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE: newContext = config.context.getParent(i) # "pop" return state returnState = self.atn.states[config.context.getReturnState(i)] c = LexerATNConfig(state=returnState, config=config, context=newContext) currentAltReachedAcceptState = self.closure( input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon ) return currentAltReachedAcceptState # optimization if not config.state.epsilonOnlyTransitions: if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision: configs.add(config) for t in config.state.transitions: c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon) if c is not None: currentAltReachedAcceptState = self.closure( input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon ) return currentAltReachedAcceptState
def addDFAEdge(self, from_: DFAState, tk: int, to: DFAState = None, cfgs: ATNConfigSet = None) -> DFAState: if to is None and cfgs is not None: # leading to this call, ATNConfigSet.hasSemanticContext is used as a # marker indicating dynamic predicate evaluation makes this edge # dependent on the specific input sequence, so the static edge in the # DFA should be omitted. The target DFAState is still created since # execATN has the ability to resynchronize with the DFA state cache # following the predicate evaluation step. # # TJP notes: next time through the DFA, we see a pred again and eval. # If that gets us to a previously created (but dangling) DFA # state, we can continue in pure DFA mode from there. #/ suppressEdge = cfgs.hasSemanticContext cfgs.hasSemanticContext = False to = self.addDFAState(cfgs) if suppressEdge: return to # add the edge if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE: # Only track edges within the DFA bounds return to if self.debug: print("EDGE " + str(from_) + " -> " + str(to) + " upon " + chr(tk)) if from_.edges is None: # make room for tokens 1..n and -1 masquerading as index 0 from_.edges = [None] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1) from_.edges[tk - self.MIN_DFA_EDGE] = to # connect return to
def __init__(self, atnStartState, decision=0): # From which ATN state did we create this DFA? self.atnStartState = atnStartState self.decision = decision # A set of all DFA states. Use {@link Map} so we can get old state back # ({@link Set} only allows you to see if it's there). self._states = dict() self.s0 = None # {@code true} if this DFA is for a precedence decision; otherwise, # {@code false}. This is the backing field for {@link #isPrecedenceDfa}, # {@link #setPrecedenceDfa}. self.precedenceDfa = False if isinstance(atnStartState, StarLoopEntryState): if atnStartState.isPrecedenceDecision: self.precedenceDfa = True precedenceState = DFAState(configs=ATNConfigSet()) precedenceState.edges = [] precedenceState.isAcceptState = False precedenceState.requiresFullContext = False self.s0 = precedenceState
def addDFAEdge(self, from_:DFAState, tk:int, to:DFAState=None, cfgs:ATNConfigSet=None) -> DFAState: if to is None and cfgs is not None: # leading to this call, ATNConfigSet.hasSemanticContext is used as a # marker indicating dynamic predicate evaluation makes this edge # dependent on the specific input sequence, so the static edge in the # DFA should be omitted. The target DFAState is still created since # execATN has the ability to resynchronize with the DFA state cache # following the predicate evaluation step. # # TJP notes: next time through the DFA, we see a pred again and eval. # If that gets us to a previously created (but dangling) DFA # state, we can continue in pure DFA mode from there. #/ suppressEdge = cfgs.hasSemanticContext cfgs.hasSemanticContext = False to = self.addDFAState(cfgs) if suppressEdge: return to # add the edge if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE: # Only track edges within the DFA bounds return to if LexerATNSimulator.debug: print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk)) if from_.edges is None: # make room for tokens 1..n and -1 masquerading as index 0 from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1) from_.edges[tk - self.MIN_DFA_EDGE] = to # connect return to
def getEpsilonTarget(self, input: InputStream, config: LexerATNConfig, t: Transition, configs: ATNConfigSet, speculative: bool, treatEofAsEpsilon: bool): c = None if t.serializationType == Transition.RULE: newContext = SingletonPredictionContext.create( config.context, t.followState.stateNumber) c = LexerATNConfig(state=t.target, config=config, context=newContext) elif t.serializationType == Transition.PRECEDENCE: raise UnsupportedOperationException( "Precedence predicates are not supported in lexers.") elif t.serializationType == Transition.PREDICATE: # Track traversing semantic predicates. If we traverse, # we cannot add a DFA state for this "reach" computation # because the DFA would not test the predicate again in the # future. Rather than creating collections of semantic predicates # like v3 and testing them on prediction, v4 will test them on the # fly all the time using the ATN not the DFA. This is slower but # semantically it's not used that often. One of the key elements to # this predicate mechanism is not adding DFA states that see # predicates immediately afterwards in the ATN. For example, # a : ID {p1}? | ID {p2}? ; # should create the start state for rule 'a' (to save start state # competition), but should not create target of ID state. The # collection of ATN states the following ID references includes # states reached by traversing predicates. Since this is when we # test them, we cannot cash the DFA state target of ID. if self.debug: print("EVAL rule " + str(t.ruleIndex) + ":" + str(t.predIndex)) configs.hasSemanticContext = True if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative): c = LexerATNConfig(state=t.target, config=config) elif t.serializationType == Transition.ACTION: if config.context is None or config.context.hasEmptyPath(): # execute actions anywhere in the start rule for a token. # # TODO: if the entry rule is invoked recursively, some # actions may be executed during the recursive call. The # problem can appear when hasEmptyPath() is true but # isEmpty() is false. In this case, the config needs to be # split into two contexts - one with just the empty path # and another with everything but the empty path. # Unfortunately, the current algorithm does not allow # getEpsilonTarget to return two configurations, so # additional modifications are needed before we can support # the split operation. lexerActionExecutor = LexerActionExecutor.append( config.lexerActionExecutor, self.atn.lexerActions[t.actionIndex]) c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor) else: # ignore actions in referenced rules c = LexerATNConfig(state=t.target, config=config) elif t.serializationType == Transition.EPSILON: c = LexerATNConfig(state=t.target, config=config) elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]: if treatEofAsEpsilon: if t.matches(Token.EOF, 0, 0xFFFF): c = LexerATNConfig(state=t.target, config=config) return c
newState.configs = configs dfa.states[newState] = newState return newState def getDFA(self, mode: int): return self.decisionToDFA[mode] # Get the text matched so far for the current token. def getText(self, input: InputStream): # index is first lookahead char, don't include. return input.getText(self.startIndex, input.index - 1) def consume(self, input: InputStream): curChar = input.LA(1) if curChar == ord('\n'): self.line += 1 self.column = 0 else: self.column += 1 input.consume() def getTokenName(self, t: int): if t == -1: return "EOF" else: return "'" + chr(t) + "'" LexerATNSimulator.ERROR = DFAState(0x7FFFFFFF, ATNConfigSet()) del Lexer
def getEpsilonTarget(self, input:InputStream, config:LexerATNConfig, t:Transition, configs:ATNConfigSet, speculative:bool, treatEofAsEpsilon:bool): c = None if t.serializationType==Transition.RULE: newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber) c = LexerATNConfig(state=t.target, config=config, context=newContext) elif t.serializationType==Transition.PRECEDENCE: raise UnsupportedOperationException("Precedence predicates are not supported in lexers.") elif t.serializationType==Transition.PREDICATE: # Track traversing semantic predicates. If we traverse, # we cannot add a DFA state for this "reach" computation # because the DFA would not test the predicate again in the # future. Rather than creating collections of semantic predicates # like v3 and testing them on prediction, v4 will test them on the # fly all the time using the ATN not the DFA. This is slower but # semantically it's not used that often. One of the key elements to # this predicate mechanism is not adding DFA states that see # predicates immediately afterwards in the ATN. For example, # a : ID {p1}? | ID {p2}? ; # should create the start state for rule 'a' (to save start state # competition), but should not create target of ID state. The # collection of ATN states the following ID references includes # states reached by traversing predicates. Since this is when we # test them, we cannot cash the DFA state target of ID. if LexerATNSimulator.debug: print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex)) configs.hasSemanticContext = True if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative): c = LexerATNConfig(state=t.target, config=config) elif t.serializationType==Transition.ACTION: if config.context is None or config.context.hasEmptyPath(): # execute actions anywhere in the start rule for a token. # # TODO: if the entry rule is invoked recursively, some # actions may be executed during the recursive call. The # problem can appear when hasEmptyPath() is true but # isEmpty() is false. In this case, the config needs to be # split into two contexts - one with just the empty path # and another with everything but the empty path. # Unfortunately, the current algorithm does not allow # getEpsilonTarget to return two configurations, so # additional modifications are needed before we can support # the split operation. lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor, self.atn.lexerActions[t.actionIndex]) c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor) else: # ignore actions in referenced rules c = LexerATNConfig(state=t.target, config=config) elif t.serializationType==Transition.EPSILON: c = LexerATNConfig(state=t.target, config=config) elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]: if treatEofAsEpsilon: if t.matches(Token.EOF, 0, 0xFFFF): c = LexerATNConfig(state=t.target, config=config) return c