예제 #1
0
    def addDFAState(self, configs:ATNConfigSet) -> DFAState:

        proposed = DFAState(configs=configs)
        firstConfigWithRuleStopState = None
        for c in configs:
            if isinstance(c.state, RuleStopState):
                firstConfigWithRuleStopState = c
                break

        if firstConfigWithRuleStopState is not None:
            proposed.isAcceptState = True
            proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
            proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]

        dfa = self.decisionToDFA[self.mode]
        existing = dfa.states.get(proposed, None)
        if existing is not None:
            return existing

        newState = proposed

        newState.stateNumber = len(dfa.states)
        configs.setReadonly(True)
        newState.configs = configs
        dfa.states[newState] = newState
        return newState
예제 #2
0
    def hasSLLConflictTerminatingPrediction(cls, mode, configs):
        # Configs in rule stop states indicate reaching the end of the decision
        # rule (local context) or end of start rule (full context). If all
        # configs meet this condition, then none of the configurations is able
        # to match additional input so we terminate prediction.
        #
        if cls.allConfigsInRuleStopStates(configs):
            return True

        # pure SLL mode parsing
        if mode == PredictionMode.SLL:
            # Don't bother with combining configs from different semantic
            # contexts if we can fail over to full LL; costs more time
            # since we'll often fail over anyway.
            if configs.hasSemanticContext:
                # dup configs, tossing out semantic predicates
                dup = ATNConfigSet()
                for c in configs:
                    c = ATNConfig(c,SemanticContext.NONE)
                    dup.add(c)
                configs = dup
            # now we have combined contexts for configs with dissimilar preds

        # pure SLL or combined SLL+LL mode parsing
        altsets = cls.getConflictingAltSubsets(configs)
        return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
예제 #3
0
    def hasSLLConflictTerminatingPrediction(cls, mode, configs):
        # Configs in rule stop states indicate reaching the end of the decision
        # rule (local context) or end of start rule (full context). If all
        # configs meet this condition, then none of the configurations is able
        # to match additional input so we terminate prediction.
        #
        if cls.allConfigsInRuleStopStates(configs):
            return True

        # pure SLL mode parsing
        if mode == PredictionMode.SLL:
            # Don't bother with combining configs from different semantic
            # contexts if we can fail over to full LL; costs more time
            # since we'll often fail over anyway.
            if configs.hasSemanticContext:
                # dup configs, tossing out semantic predicates
                dup = ATNConfigSet()
                for c in configs:
                    c = ATNConfig(config=c, semantic=SemanticContext.NONE)
                    dup.add(c)
                configs = dup
            # now we have combined contexts for configs with dissimilar preds

        # pure SLL or combined SLL+LL mode parsing
        altsets = cls.getConflictingAltSubsets(configs)
        return cls.hasConflictingAltSet(
            altsets) and not cls.hasStateAssociatedWithOneAlt(configs)
예제 #4
0
    def addDFAState(self, configs: ATNConfigSet) -> DFAState:
        # the lexer evaluates predicates on-the-fly; by this point configs
        # should not contain any configurations with unevaluated predicates.
        assert not configs.hasSemanticContext

        proposed = DFAState(configs=configs)
        firstConfigWithRuleStopState = None
        for c in configs:
            if isinstance(c.state, RuleStopState):
                firstConfigWithRuleStopState = c
                break

        if firstConfigWithRuleStopState is not None:
            proposed.isAcceptState = True
            proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
            proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]

        dfa = self.decisionToDFA[self.mode]
        existing = dfa.states.get(proposed, None)
        if existing is not None:
            return existing

        newState = proposed

        newState.stateNumber = len(dfa.states)
        configs.setReadonly(True)
        newState.configs = configs
        dfa.states[newState] = newState
        return newState
예제 #5
0
    def addDFAState(self, configs: ATNConfigSet) -> DFAState:
        # the lexer evaluates predicates on-the-fly; by this point configs
        # should not contain any configurations with unevaluated predicates.
        assert not configs.hasSemanticContext

        proposed = DFAState(configs=configs)
        firstConfigWithRuleStopState = None
        for c in configs:
            if isinstance(c.state, RuleStopState):
                firstConfigWithRuleStopState = c
                break

        if firstConfigWithRuleStopState is not None:
            proposed.isAcceptState = True
            proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
            proposed.prediction = self.atn.ruleToTokenType[
                firstConfigWithRuleStopState.state.ruleIndex]

        dfa = self.decisionToDFA[self.mode]
        existing = dfa.states.get(proposed, None)
        if existing is not None:
            return existing

        newState = proposed

        newState.stateNumber = len(dfa.states)
        configs.setReadonly(True)
        newState.configs = configs
        dfa.states[newState] = newState
        return newState
예제 #6
0
    def addDFAState(self, configs:ATNConfigSet) -> DFAState:

        proposed = DFAState(configs=configs)
        firstConfigWithRuleStopState = None
        for c in configs:
            if isinstance(c.state, RuleStopState):
                firstConfigWithRuleStopState = c
                break

        if firstConfigWithRuleStopState is not None:
            proposed.isAcceptState = True
            proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
            proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]

        dfa = self.decisionToDFA[self.mode]
        existing = dfa.states.get(proposed, None)
        if existing is not None:
            return existing

        newState = proposed

        newState.stateNumber = len(dfa.states)
        configs.setReadonly(True)
        newState.configs = configs
        dfa.states[newState] = newState
        return newState
예제 #7
0
파일: DFAState.py 프로젝트: clinzy/butools
 def __init__(self,
              stateNumber: int = -1,
              configs: ATNConfigSet = ATNConfigSet()):
     self.stateNumber = stateNumber
     self.configs = configs
     # {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1)
     #  {@link Token#EOF} maps to {@code edges[0]}.
     self.edges = None
     self.isAcceptState = False
     # if accept state, what ttype do we match or alt do we predict?
     #  This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or
     #  {@link #requiresFullContext}.
     self.prediction = 0
     self.lexerActionExecutor = None
     # Indicates that this state was created during SLL prediction that
     # discovered a conflict between the configurations in the state. Future
     # {@link ParserATNSimulator#execATN} invocations immediately jumped doing
     # full context prediction if this field is true.
     self.requiresFullContext = False
     # During SLL parsing, this is a list of predicates associated with the
     #  ATN configurations of the DFA state. When we have predicates,
     #  {@link #requiresFullContext} is {@code false} since full context prediction evaluates predicates
     #  on-the-fly. If this is not null, then {@link #prediction} is
     #  {@link ATN#INVALID_ALT_NUMBER}.
     #
     #  <p>We only use these for non-{@link #requiresFullContext} but conflicting states. That
     #  means we know from the context (it's $ or we don't dip into outer
     #  context) that it's an ambiguity not a conflict.</p>
     #
     #  <p>This list is computed by {@link ParserATNSimulator#predicateDFAState}.</p>
     self.predicates = None
예제 #8
0
class ATNSimulator(object):

    # Must distinguish between missing edge and edge we know leads nowhere#/
    ERROR = DFAState(0x7FFFFFFF, ATNConfigSet())

    # The context cache maps all PredictionContext objects that are ==
    #  to a single cached copy. This cache is shared across all contexts
    #  in all ATNConfigs in all DFA states.  We rebuild each ATNConfigSet
    #  to use only cached nodes/graphs in addDFAState(). We don't want to
    #  fill this during closure() since there are lots of contexts that
    #  pop up but are not used ever again. It also greatly slows down closure().
    #
    #  <p>This cache makes a huge difference in memory and a little bit in speed.
    #  For the Java grammar on java.*, it dropped the memory requirements
    #  at the end from 25M to 16M. We don't store any of the full context
    #  graphs in the DFA because they are limited to local context only,
    #  but apparently there's a lot of repetition there as well. We optimize
    #  the config contexts before storing the config set in the DFA states
    #  by literally rebuilding them with cached subgraphs only.</p>
    #
    #  <p>I tried a cache for use during closure operations, that was
    #  whacked after each adaptivePredict(). It cost a little bit
    #  more time I think and doesn't save on the overall footprint
    #  so it's not worth the complexity.</p>
    #/
    def __init__(self, atn, sharedContextCache):
        self.atn = atn
        self.sharedContextCache = sharedContextCache

    def getCachedContext(self, context):
        if self.sharedContextCache is None:
            return context
        visited = dict()
        return getCachedPredictionContext(context, self.sharedContextCache,
                                          visited)
예제 #9
0
 def setPrecedenceDfa(self, precedenceDfa):
     if self.precedenceDfa != precedenceDfa:
         self._states = dict()
         if precedenceDfa:
             precedenceState = DFAState(configs=ATNConfigSet())
             precedenceState.edges = []
             precedenceState.isAcceptState = False
             precedenceState.requiresFullContext = False
             self.s0 = precedenceState
         else:
             self.s0 = None
         self.precedenceDfa = precedenceDfa
예제 #10
0
    def closure(self, input: InputStream, config: LexerATNConfig,
                configs: ATNConfigSet, currentAltReachedAcceptState: bool,
                speculative: bool, treatEofAsEpsilon: bool):
        if self.debug:
            print("closure(" + config.toString(self.recog, True) + ")")

        if isinstance(config.state, RuleStopState):
            if self.debug:
                if self.recog is not None:
                    print("closure at %s rule stop %s\n",
                          self.recog.getRuleNames()[config.state.ruleIndex],
                          config)
                else:
                    print("closure at rule stop %s\n", config)

            if config.context is None or config.context.hasEmptyPath():
                if config.context is None or config.context.isEmpty():
                    configs.add(config)
                    return True
                else:
                    configs.add(
                        LexerATNConfig(state=config.state,
                                       config=config,
                                       context=PredictionContext.EMPTY))
                    currentAltReachedAcceptState = True

            if config.context is not None and not config.context.isEmpty():
                for i in range(0, len(config.context)):
                    if config.context.getReturnState(
                            i) != PredictionContext.EMPTY_RETURN_STATE:
                        newContext = config.context.getParent(
                            i)  # "pop" return state
                        returnState = self.atn.states[
                            config.context.getReturnState(i)]
                        c = LexerATNConfig(state=returnState,
                                           config=config,
                                           context=newContext)
                        currentAltReachedAcceptState = self.closure(
                            input, c, configs, currentAltReachedAcceptState,
                            speculative, treatEofAsEpsilon)

            return currentAltReachedAcceptState

        # optimization
        if not config.state.epsilonOnlyTransitions:
            if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision:
                configs.add(config)

        for t in config.state.transitions:
            c = self.getEpsilonTarget(input, config, t, configs, speculative,
                                      treatEofAsEpsilon)
            if c is not None:
                currentAltReachedAcceptState = self.closure(
                    input, c, configs, currentAltReachedAcceptState,
                    speculative, treatEofAsEpsilon)

        return currentAltReachedAcceptState
예제 #11
0
    def closure(
        self,
        input: InputStream,
        config: LexerATNConfig,
        configs: ATNConfigSet,
        currentAltReachedAcceptState: bool,
        speculative: bool,
        treatEofAsEpsilon: bool,
    ):
        if self.debug:
            print("closure(" + config.toString(self.recog, True) + ")")

        if isinstance(config.state, RuleStopState):
            if self.debug:
                if self.recog is not None:
                    print("closure at %s rule stop %s\n", self.recog.getRuleNames()[config.state.ruleIndex], config)
                else:
                    print("closure at rule stop %s\n", config)

            if config.context is None or config.context.hasEmptyPath():
                if config.context is None or config.context.isEmpty():
                    configs.add(config)
                    return True
                else:
                    configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY))
                    currentAltReachedAcceptState = True

            if config.context is not None and not config.context.isEmpty():
                for i in range(0, len(config.context)):
                    if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE:
                        newContext = config.context.getParent(i)  # "pop" return state
                        returnState = self.atn.states[config.context.getReturnState(i)]
                        c = LexerATNConfig(state=returnState, config=config, context=newContext)
                        currentAltReachedAcceptState = self.closure(
                            input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon
                        )

            return currentAltReachedAcceptState

        # optimization
        if not config.state.epsilonOnlyTransitions:
            if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision:
                configs.add(config)

        for t in config.state.transitions:
            c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
            if c is not None:
                currentAltReachedAcceptState = self.closure(
                    input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon
                )

        return currentAltReachedAcceptState
예제 #12
0
    def addDFAEdge(self,
                   from_: DFAState,
                   tk: int,
                   to: DFAState = None,
                   cfgs: ATNConfigSet = None) -> DFAState:

        if to is None and cfgs is not None:
            # leading to this call, ATNConfigSet.hasSemanticContext is used as a
            # marker indicating dynamic predicate evaluation makes this edge
            # dependent on the specific input sequence, so the static edge in the
            # DFA should be omitted. The target DFAState is still created since
            # execATN has the ability to resynchronize with the DFA state cache
            # following the predicate evaluation step.
            #
            # TJP notes: next time through the DFA, we see a pred again and eval.
            # If that gets us to a previously created (but dangling) DFA
            # state, we can continue in pure DFA mode from there.
            #/
            suppressEdge = cfgs.hasSemanticContext
            cfgs.hasSemanticContext = False

            to = self.addDFAState(cfgs)

            if suppressEdge:
                return to

        # add the edge
        if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE:
            # Only track edges within the DFA bounds
            return to

        if self.debug:
            print("EDGE " + str(from_) + " -> " + str(to) + " upon " + chr(tk))

        if from_.edges is None:
            #  make room for tokens 1..n and -1 masquerading as index 0
            from_.edges = [None] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1)

        from_.edges[tk - self.MIN_DFA_EDGE] = to  # connect

        return to
예제 #13
0
    def __init__(self, atnStartState, decision=0):
        # From which ATN state did we create this DFA?
        self.atnStartState = atnStartState
        self.decision = decision
        # A set of all DFA states. Use {@link Map} so we can get old state back
        #  ({@link Set} only allows you to see if it's there).
        self._states = dict()
        self.s0 = None
        # {@code true} if this DFA is for a precedence decision; otherwise,
        # {@code false}. This is the backing field for {@link #isPrecedenceDfa},
        # {@link #setPrecedenceDfa}.
        self.precedenceDfa = False

        if isinstance(atnStartState, StarLoopEntryState):
            if atnStartState.isPrecedenceDecision:
                self.precedenceDfa = True
                precedenceState = DFAState(configs=ATNConfigSet())
                precedenceState.edges = []
                precedenceState.isAcceptState = False
                precedenceState.requiresFullContext = False
                self.s0 = precedenceState
예제 #14
0
    def addDFAEdge(self, from_:DFAState, tk:int, to:DFAState=None, cfgs:ATNConfigSet=None) -> DFAState:

        if to is None and cfgs is not None:
            # leading to this call, ATNConfigSet.hasSemanticContext is used as a
            # marker indicating dynamic predicate evaluation makes this edge
            # dependent on the specific input sequence, so the static edge in the
            # DFA should be omitted. The target DFAState is still created since
            # execATN has the ability to resynchronize with the DFA state cache
            # following the predicate evaluation step.
            #
            # TJP notes: next time through the DFA, we see a pred again and eval.
            # If that gets us to a previously created (but dangling) DFA
            # state, we can continue in pure DFA mode from there.
            #/
            suppressEdge = cfgs.hasSemanticContext
            cfgs.hasSemanticContext = False

            to = self.addDFAState(cfgs)

            if suppressEdge:
                return to

        # add the edge
        if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE:
            # Only track edges within the DFA bounds
            return to

        if LexerATNSimulator.debug:
            print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk))

        if from_.edges is None:
            #  make room for tokens 1..n and -1 masquerading as index 0
            from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1)

        from_.edges[tk - self.MIN_DFA_EDGE] = to # connect

        return to
예제 #15
0
    def getEpsilonTarget(self, input: InputStream, config: LexerATNConfig,
                         t: Transition, configs: ATNConfigSet,
                         speculative: bool, treatEofAsEpsilon: bool):
        c = None
        if t.serializationType == Transition.RULE:
            newContext = SingletonPredictionContext.create(
                config.context, t.followState.stateNumber)
            c = LexerATNConfig(state=t.target,
                               config=config,
                               context=newContext)

        elif t.serializationType == Transition.PRECEDENCE:
            raise UnsupportedOperationException(
                "Precedence predicates are not supported in lexers.")

        elif t.serializationType == Transition.PREDICATE:
            #  Track traversing semantic predicates. If we traverse,
            # we cannot add a DFA state for this "reach" computation
            # because the DFA would not test the predicate again in the
            # future. Rather than creating collections of semantic predicates
            # like v3 and testing them on prediction, v4 will test them on the
            # fly all the time using the ATN not the DFA. This is slower but
            # semantically it's not used that often. One of the key elements to
            # this predicate mechanism is not adding DFA states that see
            # predicates immediately afterwards in the ATN. For example,

            # a : ID {p1}? | ID {p2}? ;

            # should create the start state for rule 'a' (to save start state
            # competition), but should not create target of ID state. The
            # collection of ATN states the following ID references includes
            # states reached by traversing predicates. Since this is when we
            # test them, we cannot cash the DFA state target of ID.

            if self.debug:
                print("EVAL rule " + str(t.ruleIndex) + ":" + str(t.predIndex))
            configs.hasSemanticContext = True
            if self.evaluatePredicate(input, t.ruleIndex, t.predIndex,
                                      speculative):
                c = LexerATNConfig(state=t.target, config=config)

        elif t.serializationType == Transition.ACTION:
            if config.context is None or config.context.hasEmptyPath():
                # execute actions anywhere in the start rule for a token.
                #
                # TODO: if the entry rule is invoked recursively, some
                # actions may be executed during the recursive call. The
                # problem can appear when hasEmptyPath() is true but
                # isEmpty() is false. In this case, the config needs to be
                # split into two contexts - one with just the empty path
                # and another with everything but the empty path.
                # Unfortunately, the current algorithm does not allow
                # getEpsilonTarget to return two configurations, so
                # additional modifications are needed before we can support
                # the split operation.
                lexerActionExecutor = LexerActionExecutor.append(
                    config.lexerActionExecutor,
                    self.atn.lexerActions[t.actionIndex])
                c = LexerATNConfig(state=t.target,
                                   config=config,
                                   lexerActionExecutor=lexerActionExecutor)

            else:
                # ignore actions in referenced rules
                c = LexerATNConfig(state=t.target, config=config)

        elif t.serializationType == Transition.EPSILON:
            c = LexerATNConfig(state=t.target, config=config)

        elif t.serializationType in [
                Transition.ATOM, Transition.RANGE, Transition.SET
        ]:
            if treatEofAsEpsilon:
                if t.matches(Token.EOF, 0, 0xFFFF):
                    c = LexerATNConfig(state=t.target, config=config)

        return c
예제 #16
0
        newState.configs = configs
        dfa.states[newState] = newState
        return newState

    def getDFA(self, mode: int):
        return self.decisionToDFA[mode]

    # Get the text matched so far for the current token.
    def getText(self, input: InputStream):
        # index is first lookahead char, don't include.
        return input.getText(self.startIndex, input.index - 1)

    def consume(self, input: InputStream):
        curChar = input.LA(1)
        if curChar == ord('\n'):
            self.line += 1
            self.column = 0
        else:
            self.column += 1
        input.consume()

    def getTokenName(self, t: int):
        if t == -1:
            return "EOF"
        else:
            return "'" + chr(t) + "'"


LexerATNSimulator.ERROR = DFAState(0x7FFFFFFF, ATNConfigSet())

del Lexer
예제 #17
0
    def getEpsilonTarget(self, input:InputStream, config:LexerATNConfig, t:Transition, configs:ATNConfigSet,
                                           speculative:bool, treatEofAsEpsilon:bool):
        c = None
        if t.serializationType==Transition.RULE:
                newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
                c = LexerATNConfig(state=t.target, config=config, context=newContext)

        elif t.serializationType==Transition.PRECEDENCE:
                raise UnsupportedOperationException("Precedence predicates are not supported in lexers.")

        elif t.serializationType==Transition.PREDICATE:
                #  Track traversing semantic predicates. If we traverse,
                # we cannot add a DFA state for this "reach" computation
                # because the DFA would not test the predicate again in the
                # future. Rather than creating collections of semantic predicates
                # like v3 and testing them on prediction, v4 will test them on the
                # fly all the time using the ATN not the DFA. This is slower but
                # semantically it's not used that often. One of the key elements to
                # this predicate mechanism is not adding DFA states that see
                # predicates immediately afterwards in the ATN. For example,

                # a : ID {p1}? | ID {p2}? ;

                # should create the start state for rule 'a' (to save start state
                # competition), but should not create target of ID state. The
                # collection of ATN states the following ID references includes
                # states reached by traversing predicates. Since this is when we
                # test them, we cannot cash the DFA state target of ID.

                if LexerATNSimulator.debug:
                    print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex))
                configs.hasSemanticContext = True
                if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative):
                    c = LexerATNConfig(state=t.target, config=config)

        elif t.serializationType==Transition.ACTION:
                if config.context is None or config.context.hasEmptyPath():
                    # execute actions anywhere in the start rule for a token.
                    #
                    # TODO: if the entry rule is invoked recursively, some
                    # actions may be executed during the recursive call. The
                    # problem can appear when hasEmptyPath() is true but
                    # isEmpty() is false. In this case, the config needs to be
                    # split into two contexts - one with just the empty path
                    # and another with everything but the empty path.
                    # Unfortunately, the current algorithm does not allow
                    # getEpsilonTarget to return two configurations, so
                    # additional modifications are needed before we can support
                    # the split operation.
                    lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
                                    self.atn.lexerActions[t.actionIndex])
                    c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor)

                else:
                    # ignore actions in referenced rules
                    c = LexerATNConfig(state=t.target, config=config)

        elif t.serializationType==Transition.EPSILON:
            c = LexerATNConfig(state=t.target, config=config)

        elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
            if treatEofAsEpsilon:
                if t.matches(Token.EOF, 0, 0xFFFF):
                    c = LexerATNConfig(state=t.target, config=config)

        return c