Esempio n. 1
0
    def __init__(self, SM_A, SM_B, StartingSM=None):
        self.original = SM_A
        self.admissible = SM_B

        if StartingSM is None:
            self.result = StateMachine(
                InitStateIndex=index.map_state_combination_to_index(
                    (SM_A.init_state_index, SM_B.init_state_index)),
                InitState=self.get_state_core(SM_A.init_state_index,
                                              SM_B.init_state_index))
        else:
            self.result = StartingSM

        # TODO: Think if 'state_db' cannot be replaced by 'result'
        self.state_db = {}

        self.path = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Esempio n. 2
0
 def get_state(self, a_state_index, b_state_index):
     state_index = index.map_state_combination_to_index((a_state_index, b_state_index))
     state       = self.result.states.get(state_index)
     if state is None:
         state = self.get_state_core(a_state_index)
         self.result.states[state_index] = state
         #print "#enter:", state_index
     return state_index, state
Esempio n. 3
0
 def get_state(self, Args):
     state_index = index.map_state_combination_to_index(Args)
     state       = self.state_db.get(state_index)
     if state is None:
         a_state_index, b_state_index = Args
         state = self.get_state_core(a_state_index, b_state_index)
         self.result.states[state_index] = state
     return state
Esempio n. 4
0
 def get_state(self, a_state_index, b_state_index):
     state_index = index.map_state_combination_to_index(
         (a_state_index, b_state_index))
     state = self.result.states.get(state_index)
     if state is None:
         state = self.get_state_core(a_state_index)
         self.result.states[state_index] = state
         #print "#enter:", state_index
     return state_index, state
Esempio n. 5
0
    def on_enter(self, Args):
        if Args in self.path:
            return None

        a_state_index, b_state_index = Args
        self.path.append((a_state_index, b_state_index))

        state = self.get_state(Args)

        sub_node_list = []

        a_tm = self.original.states[a_state_index].target_map.get_map()
        assert b_state_index != self.operation_index

        b_tm = self.admissible.states[b_state_index].target_map.get_map()
        for a_ti, a_trigger_set in a_tm.iteritems():
            remainder = a_trigger_set.clone()
            for b_ti, b_trigger_set in b_tm.iteritems():
                # If an acceptance state in 'B' is reached, than the lexeme starts
                # with something in 'LB'. Thus, rest of paths is inadmissible.
                if self.admissible.states[b_ti].is_acceptance():
                    remainder.subtract(b_trigger_set)
                    continue

                intersection = a_trigger_set.intersection(b_trigger_set)
                if intersection.is_empty():
                    continue

                combi = (a_ti, b_ti)
                state.add_transition(
                    intersection, index.map_state_combination_to_index(combi))
                sub_node_list.append(combi)

                remainder.subtract(intersection)

            if not remainder.is_empty():
                combi = (a_ti, self.operation_index)
                state.add_transition(
                    remainder, index.map_state_combination_to_index(combi))
                self.result.mount_cloned_subsequent_states(
                    self.original, a_ti, self.operation_index)

        ## print "#1-sub_node_list:", sub_node_list
        return sub_node_list
Esempio n. 6
0
    def mount_cloned(self, OtherSM, OperationIndex, OtherStartIndex,
                     OtherEndIndex):
        """Clone all states in 'OtherSM' which lie on the path from 'OtherStartIndex'
        to 'OtherEndIndex'. If 'OtherEndIndex' is None, then it ends when there's no further
        path to go. 

        State indices of the cloned states are generated by pairs of (other_i, OperationIndex).
        This makes it possible to refer to those states, even before they are generated.
        """
        assert OtherStartIndex is not None

        work_set = set([OtherStartIndex])

        if OtherEndIndex is None: done_set = set()
        else: done_set = set([OtherEndIndex])

        while len(work_set) != 0:
            other_i = work_set.pop()
            other_state = OtherSM.states[other_i]

            state_i = state_machine_index.map_state_combination_to_index(
                (other_i, OperationIndex))
            done_set.add(state_i)

            state = self.states.get(state_i)
            if state is None:
                state = State(AcceptanceF=other_state.is_acceptance())
                self.states[state_i] = state

            for other_ti, other_trigger_set in other_state.target_map.get_map(
            ).iteritems():
                target_i = state_machine_index.map_state_combination_to_index(
                    (other_ti, OperationIndex))
                # The state 'target_i' either:
                #   -- exists, because it is in the done_set, or
                #   -- will be created because its correspondance 'other_i' is
                #      added to the work set.
                state.add_transition(other_trigger_set, target_i)
                if target_i not in done_set:
                    assert other_i in OtherSM.states
                    work_set.add(other_ti)

        return
Esempio n. 7
0
    def on_enter(self, Args):
        if Args in self.path: 
            return None

        a_state_index, b_state_index = Args
        self.path.append((a_state_index, b_state_index))

        state = self.get_state(Args)

        sub_node_list = []

        a_tm = self.original.states[a_state_index].target_map.get_map()
        assert b_state_index != self.operation_index

        b_tm = self.admissible.states[b_state_index].target_map.get_map()
        for a_ti, a_trigger_set in a_tm.iteritems():
            remainder = a_trigger_set.clone()
            for b_ti, b_trigger_set in b_tm.iteritems():
                # If an acceptance state in 'B' is reached, than the lexeme starts
                # with something in 'LB'. Thus, rest of paths is inadmissible.
                if self.admissible.states[b_ti].is_acceptance(): 
                    remainder.subtract(b_trigger_set)
                    continue                                     

                intersection = a_trigger_set.intersection(b_trigger_set)
                if intersection.is_empty(): 
                    continue

                combi = (a_ti, b_ti)
                state.add_transition(intersection, index.map_state_combination_to_index(combi))
                sub_node_list.append(combi)

                remainder.subtract(intersection)

            if not remainder.is_empty():
                combi = (a_ti, self.operation_index)
                state.add_transition(remainder, index.map_state_combination_to_index(combi))
                self.result.mount_cloned_subsequent_states(self.original, a_ti, self.operation_index)

        ## print "#1-sub_node_list:", sub_node_list
        return sub_node_list
Esempio n. 8
0
    def mount_cloned(self, OtherSM, OperationIndex, OtherStartIndex, OtherEndIndex):
        """Clone all states in 'OtherSM' which lie on the path from 'OtherStartIndex'
        to 'OtherEndIndex'. If 'OtherEndIndex' is None, then it ends when there's no further
        path to go. 

        State indices of the cloned states are generated by pairs of (other_i, OperationIndex).
        This makes it possible to refer to those states, even before they are generated.
        """
        assert OtherStartIndex is not None

        work_set = set([OtherStartIndex])

        if OtherEndIndex is None:   done_set = set()
        else:                       done_set = set([OtherEndIndex])

        while len(work_set) != 0:
            other_i     = work_set.pop()
            other_state = OtherSM.states[other_i]

            state_i = state_machine_index.map_state_combination_to_index((other_i, OperationIndex))
            done_set.add(state_i)

            state = self.states.get(state_i)
            if state is None:
                state = State(AcceptanceF=other_state.is_acceptance())
                self.states[state_i] = state

            for other_ti, other_trigger_set in other_state.target_map.get_map().iteritems():
                target_i = state_machine_index.map_state_combination_to_index((other_ti, OperationIndex))
                # The state 'target_i' either:
                #   -- exists, because it is in the done_set, or
                #   -- will be created because its correspondance 'other_i' is 
                #      added to the work set.
                state.add_transition(other_trigger_set, target_i)
                if target_i not in done_set:
                    assert other_i in OtherSM.states
                    work_set.add(other_ti)

        return
Esempio n. 9
0
    def __init__(self, SM_A, SM_B, result=None):
        self.original    = SM_A
        self.admissible  = SM_B

        if result is None:
            init_state_index = index.map_state_combination_to_index((SM_A.init_state_index, 
                                                                     SM_B.init_state_index))
            state            = self.get_state_core(SM_A.init_state_index)
            self.result      = StateMachine(InitStateIndex = init_state_index,
                                            InitState      = state)
        else:
            self.result      = result
        self.path        = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Esempio n. 10
0
    def __init__(self, SM_A, SM_B, result=None):
        self.original = SM_A
        self.admissible = SM_B

        if result is None:
            init_state_index = index.map_state_combination_to_index(
                (SM_A.init_state_index, SM_B.init_state_index))
            state = self.get_state_core(SM_A.init_state_index)
            self.result = StateMachine(InitStateIndex=init_state_index,
                                       InitState=state)
        else:
            self.result = result
        self.path = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Esempio n. 11
0
    def __init__(self, SM_A, SM_B, StartingSM=None):
        self.original   = SM_A
        self.admissible = SM_B

        if StartingSM is None:
            self.result = StateMachine(InitStateIndex = index.map_state_combination_to_index((SM_A.init_state_index, 
                                                                                              SM_B.init_state_index)), 
                                       InitState      = self.get_state_core(SM_A.init_state_index, 
                                                                            SM_B.init_state_index))
        else:
            self.result = StartingSM

        # TODO: Think if 'state_db' cannot be replaced by 'result'
        self.state_db   = {}

        self.path       = []

        # Use 'operation_index' to get a unique index that allows to indicate
        # that 'SM_B' is no longer involved. Also, it ensures that the
        # generated state indices from (a_state_index, operation_index) are
        # unique.
        self.operation_index = index.get()

        TreeWalker.__init__(self)
Esempio n. 12
0
def do(SM, Class_StateMachine=StateMachine, Class_State=State):
    """Creates a deterministic finite automaton (DFA) from a state machine 
    - which may be a NFA (non-deterministic finite automaton). 
    
    This is a generalized version of the 'subset construction' algorithm. Where
    subsection construction focuses on letters of an alphabet for the
    investigation of transitions, this algorithm focuses on elementary trigger
    sets. A very good description of the subset construction algorithm can be
    found in 'Engineering a Compiler' by Keith Cooper.
    """
    # (*) create the result state machine
    initial_state_epsilon_closure = SM.get_epsilon_closure(SM.init_state_index) 

    # (*) initial state of resulting DFA = epsilon closure of initial state of NFA
    #     -- add the command list of all states in the epsilon closure
    InitState = Class_State.from_state_iterable(
                           SM.states[i] for i in initial_state_epsilon_closure)

    # NOTE: 
    # State machines with an initial acceptance state are conceivable!  In a
    # 'define' section building bricks of patterns may be defined that 'accept
    # nothing'. Those 'building bricks' may use nfa_to_dfa here, too.  
    #
    # (A pattern state machine for pattern matching, of course, has to disallow 
    #  'accept nothing'.)
    result = Class_StateMachine(InitState=InitState)
                          
    # (*) prepare the initial worklist
    worklist = [ ( result.init_state_index, initial_state_epsilon_closure) ]

    epsilon_closure_db = SM.get_epsilon_closure_db()

    while worklist:
        # 'start_state_index' is the index of an **existing** state in the state machine.
        # It was either created above, in StateMachine's constructor, or as a target
        # state index.
        start_state_index, start_state_combination = worklist.pop()
 
        # (*) compute the elementary trigger sets together with the 
        #     epsilon closure of target state combinations that they trigger to.
        #     In other words: find the ranges of characters where the state triggers to
        #     a unique state combination. E.g:
        #                Range        Target State Combination 
        #                [0:23]   --> [ State1, State2, State10 ]
        #                [24:60]  --> [ State1 ]
        #                [61:123] --> [ State2, State10 ]
        #
        elementary_trigger_set_infos = SM.get_elementary_trigger_sets(start_state_combination,
                                                                      epsilon_closure_db)
        ## DEBUG_print(start_state_combination, elementary_trigger_set_infos)

        # (*) loop over all elementary trigger sets
        for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems():
            #  -- if there is no trigger to the given target state combination, then drop it
            if trigger_set.is_empty(): continue

            # -- add a new target state representing the state combination
            #    (if this did not happen yet)
            target_state_index = \
                 map_state_combination_to_index(epsilon_closure_of_target_state_combination)

            # -- if target state combination was not considered yet, then create 
            #    a new state in the state machine
            if not result.states.has_key(target_state_index):
                # create the new target state in the state machine
                result.states[target_state_index] = \
                    Class_State.from_state_iterable(
                        SM.states[i] 
                        for i in epsilon_closure_of_target_state_combination)

                worklist.append((target_state_index, 
                                 epsilon_closure_of_target_state_combination))  

            # -- add the transition 'start state to target state'
            result.add_transition(start_state_index, trigger_set, target_state_index)

    return result 
Esempio n. 13
0
def do(SM, Class_StateMachine=StateMachine, Class_State=State):
    """Creates a deterministic finite automaton (DFA) from a state machine 
    - which may be a NFA (non-deterministic finite automaton). 
    
    This is a generalized version of the 'subset construction' algorithm. Where
    subsection construction focuses on letters of an alphabet for the
    investigation of transitions, this algorithm focuses on elementary trigger
    sets. A very good description of the subset construction algorithm can be
    found in 'Engineering a Compiler' by Keith Cooper.
    """
    # (*) create the result state machine
    initial_state_epsilon_closure = SM.get_epsilon_closure(SM.init_state_index)

    # (*) initial state of resulting DFA = epsilon closure of initial state of NFA
    #     -- add the command list of all states in the epsilon closure
    InitState = Class_State.from_state_iterable(
        SM.states[i] for i in initial_state_epsilon_closure)

    # NOTE:
    # State machines with an initial acceptance state are conceivable!  In a
    # 'define' section building bricks of patterns may be defined that 'accept
    # nothing'. Those 'building bricks' may use nfa_to_dfa here, too.
    #
    # (A pattern state machine for pattern matching, of course, has to disallow
    #  'accept nothing'.)
    result = Class_StateMachine(InitState=InitState)

    # (*) prepare the initial worklist
    worklist = [(result.init_state_index, initial_state_epsilon_closure)]

    epsilon_closure_db = SM.get_epsilon_closure_db()

    while worklist:
        # 'start_state_index' is the index of an **existing** state in the state machine.
        # It was either created above, in StateMachine's constructor, or as a target
        # state index.
        start_state_index, start_state_combination = worklist.pop()

        # (*) compute the elementary trigger sets together with the
        #     epsilon closure of target state combinations that they trigger to.
        #     In other words: find the ranges of characters where the state triggers to
        #     a unique state combination. E.g:
        #                Range        Target State Combination
        #                [0:23]   --> [ State1, State2, State10 ]
        #                [24:60]  --> [ State1 ]
        #                [61:123] --> [ State2, State10 ]
        #
        elementary_trigger_set_infos = SM.get_elementary_trigger_sets(
            start_state_combination, epsilon_closure_db)
        ## DEBUG_print(start_state_combination, elementary_trigger_set_infos)

        # (*) loop over all elementary trigger sets
        for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems(
        ):
            #  -- if there is no trigger to the given target state combination, then drop it
            if trigger_set.is_empty(): continue

            # -- add a new target state representing the state combination
            #    (if this did not happen yet)
            target_state_index = \
                 map_state_combination_to_index(epsilon_closure_of_target_state_combination)

            # -- if target state combination was not considered yet, then create
            #    a new state in the state machine
            if not result.states.has_key(target_state_index):
                # create the new target state in the state machine
                result.states[target_state_index] = \
                    Class_State.from_state_iterable(
                        SM.states[i]
                        for i in epsilon_closure_of_target_state_combination)

                worklist.append((target_state_index,
                                 epsilon_closure_of_target_state_combination))

            # -- add the transition 'start state to target state'
            result.add_transition(start_state_index, trigger_set,
                                  target_state_index)

    return result
Esempio n. 14
0
def do(SM_List):
    """Intersection: 

       Only match on patterns which are matched by all state machines
       in 'SM_List'.

       (C) 2013 Frank-Rene Schaefer
       ________________________________________________________________________

       A lexeme which matches all patterns must reach an acceptance in each 
       given state machine. That is, 
       
          For each state machine; there is a path from the init 
          state to an acceptance state triggered along the by 
          the characters of the lexeme.

       We cannot go forward, since we cannot omit a path upon non-fit.

       Now, consider the super-state consisting of all acceptance states
       of all state machines. There there must be a way backward from the 
       super-acceptance-state to the init state states. As soon, as a
       path is interupted, it can be thrown away. This can be achieved
       by reversed state machines which are combined into a single one.
       
       Reverse all state machines; The epsilon closure of the init state
       corresponds to the super acceptance state. The transitions in the
       super-state machine correspond to the way backwards in the state
       machine. For each feasible state in the super-state machine create
       a new state. 

       The acceptance state of the reversed state machines correspond to 
       the init state of the original state machines. If the super state
       contains an acceptance state of the original state, it can become
       an acceptance state of the intersection, because we now found a 
       path. The found state machine must be reversed at the end.

    """
    for sm in SM_List:
        if special.is_none(sm):         # If one state machine is '\None'
            return special.get_none()   # then, the intersection is '\None'

    reverse_sm_list          = [ reverse.do(sm)                            for sm in SM_List ]
    state_id_set_list        = [ set(sm.states.iterkeys())                 for sm in reverse_sm_list ]
    acceptance_state_id_list = [ set(sm.get_acceptance_state_index_list()) for sm in reverse_sm_list ]

    def has_one_from_each(StateIDSet_List, StateIDSet):
        """StateIDSet_List[i] is the set of state indices from state 
        machine 'i' in 'reverse_sm_list'. 

        RETURNS: True -- If the StateIDSet has at least one state 
                         from every state machine.
                 False -- If there is at least one state machine 
                          that has no state in 'StateIDSet'.
        """
        for state_id_set in StateIDSet_List:
            if state_id_set.isdisjoint(StateIDSet): 
                return False
        return True

    def get_merged_state(AcceptanceStateIndexList, EpsilonClosure):
        """Create the new target state in the state machine
           Accept only if all accept.
        """
        acceptance_f = has_one_from_each(AcceptanceStateIndexList, 
                                         EpsilonClosure)
        return State(AcceptanceF=acceptance_f)

    # Plain merge of all states of all state machines with an 
    # epsilon transition from the init state to all init states
    # of the reverse_sm
    sm = StateMachine()
    for rsm in reverse_sm_list:
        sm.states.update(rsm.states)
        sm.add_epsilon_transition(sm.init_state_index, rsm.init_state_index) 

    initial_state_epsilon_closure = sm.get_epsilon_closure(sm.init_state_index) 

    InitState = get_merged_state(acceptance_state_id_list, 
                                 initial_state_epsilon_closure)

    result    = StateMachine(InitStateIndex=index.get(), InitState=InitState)

    # (*) prepare the initial worklist
    worklist = [ ( result.init_state_index, initial_state_epsilon_closure) ]

    epsilon_closure_db = sm.get_epsilon_closure_db()

    while len(worklist) != 0:
        # 'start_state_index' is the index of an **existing** state in the state machine.
        # It was either created above, in StateMachine's constructor, or as a target
        # state index.
        start_state_index, start_state_combination = worklist.pop()
 
        # (*) compute the elementary trigger sets together with the 
        #     epsilon closure of target state combinations that they trigger to.
        #     In other words: find the ranges of characters where the state triggers to
        #     a unique state combination. E.g:
        #                Range        Target State Combination 
        #                [0:23]   --> [ State1, State2, State10 ]
        #                [24:60]  --> [ State1 ]
        #                [61:123] --> [ State2, State10 ]
        #
        elementary_trigger_set_infos = sm.get_elementary_trigger_sets(start_state_combination,
                                                                      epsilon_closure_db)
        ## DEBUG_print(start_state_combination, elementary_trigger_set_infos)

        # (*) loop over all elementary trigger sets
        for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems():
            #  -- if there is no trigger to the given target state combination, then drop it
            if trigger_set.is_empty(): 
                continue
            elif not has_one_from_each(state_id_set_list, epsilon_closure_of_target_state_combination):
                continue

            # -- add a new target state representing the state combination
            #    (if this did not happen yet)
            target_state_index = \
                 map_state_combination_to_index(epsilon_closure_of_target_state_combination)

            # -- if target state combination was not considered yet, then create 
            #    a new state in the state machine
            if not result.states.has_key(target_state_index):
                result.states[target_state_index] = get_merged_state(acceptance_state_id_list, 
                                                                     epsilon_closure_of_target_state_combination)

                worklist.append((target_state_index, epsilon_closure_of_target_state_combination))  

            # -- add the transition 'start state to target state'
            result.add_transition(start_state_index, trigger_set, target_state_index)

    if not result.has_acceptance_states():
        return StateMachine()
    else:
        return beautifier.do(reverse.do(result))