Ejemplo n.º 1
0
def do(SM):
    """Construct a state machine which is equivalent to SM and is:

       -- DFA compliant, i.e. without epsilon transitions and no two
              transitions to the same target.
       -- Hopcroft-minimized.
    """
    result = nfa_to_dfa.do(SM)
    hopcroft.do(result, CreateNewStateMachineF=False)

    assert result.is_DFA_compliant()
    return result
Ejemplo n.º 2
0
def do(SM):
    """Construct a state machine which is equivalent to SM and is:

       -- DFA compliant, i.e. without epsilon transitions and no two
              transitions to the same target.
       -- Hopcroft-minimized.
    """
    result = nfa_to_dfa.do(SM)
    hopcroft.do(result, CreateNewStateMachineF=False)

    assert result.is_DFA_compliant()
    return result
Ejemplo n.º 3
0
def do(SM_List):
    """The 'parallelize' module does a union of multiple state machines,
    even if they have different origins and need to be combined carefully.
    There is no reason, why another 'union' operation should be implemented
    in this case.
    """
    result = parallelize.do(SM_List)
    return hopcroft_minimization.do(result, CreateNewStateMachineF=False)
Ejemplo n.º 4
0
def test(sm):
    backup_sm = deepcopy(sm)
    optimal_sm = hopcroft.do(sm, CreateNewStateMachineF=CreateNewStateMachineF)
    print optimal_sm
    orphan_state_index_list = optimal_sm.get_orphaned_state_index_list()
    if len(orphan_state_index_list) != 0:
        print "ERROR: orphan states found = ", orphan_state_index_list
    if identity_checker.do(backup_sm, optimal_sm) == False:
        print "ERROR: state machines not equivalent"
Ejemplo n.º 5
0
def test(sm, post_sm):
    print "EXPRESSION = ", sm
    print "POST CONDITION = ", post_sm
    return_sm = setup_post_context.do(sm, post_sm, False, SourceRef_VOID)
    print "APPENDED = ", sm
    sm = nfa_to_dfa.do(sm)
    print "DFA = ", sm
    sm = hopcroft.do(sm)
    print "HOPCROFT = ", sm
Ejemplo n.º 6
0
def test(sm, post_sm):    
    print "EXPRESSION = ", sm
    print "POST CONDITION = ", post_sm
    return_sm = setup_post_context.do(sm, post_sm, False, SourceRef_VOID)
    print "APPENDED = ", sm
    sm = nfa_to_dfa.do(sm)
    print "DFA = ", sm
    sm = hopcroft.do(sm)
    print "HOPCROFT = ", sm
def test(sm, txt):
    global test_i
    backup_sm = deepcopy(sm)
    print "_______________________________________________________________________________"
    print ("(%i)" % test_i),
    print txt
    optimal_sm = hopcroft.do(sm, CreateNewStateMachineF=CreateNewStateMachineF)
    print optimal_sm
    test_i += 1
    orphan_state_index_list = optimal_sm.get_orphaned_state_index_list()
    if len(orphan_state_index_list) != 0:
        print "ERROR: orphan states found = ", orphan_state_index_list
    if identity_checker.do(backup_sm, optimal_sm) == False:
        print "ERROR: state machines not equivalent"
Ejemplo n.º 8
0
def test(sm, txt):
    global test_i
    backup_sm = deepcopy(sm)
    print "_______________________________________________________________________________"
    print("(%i)" % test_i),
    print txt
    optimal_sm = hopcroft.do(sm, CreateNewStateMachineF=CreateNewStateMachineF)
    print optimal_sm
    test_i += 1
    orphan_state_index_list = optimal_sm.get_orphaned_state_index_list()
    if len(orphan_state_index_list) != 0:
        print "ERROR: orphan states found = ", orphan_state_index_list
    if identity_checker.do(backup_sm, optimal_sm) == False:
        print "ERROR: state machines not equivalent"
Ejemplo n.º 9
0
 def get_pattern_object(SM):
     if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
     else:                         result = SM
     result = hopcroft.do(result, CreateNewStateMachineF=False)
     return Pattern(result, AllowStateMachineTrafoF=True)
Ejemplo n.º 10
0
 def get_pattern_object(SM):
     if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
     else: result = SM
     result = hopcroft.do(result, CreateNewStateMachineF=False)
     return Pattern(result, AllowStateMachineTrafoF=True)
Ejemplo n.º 11
0
def do(SM):
    result = nfa_to_dfa.do(SM)
    hopcroft.do(result, CreateNewStateMachineF=False)
    return result
Ejemplo n.º 12
0
    def do_state_machine(self, sm):
        """Transforms a given state machine from 'Unicode Driven' to another
        character encoding type.
        
        RETURNS: 
           [0] Transformation complete (True->yes, False->not all transformed)
           [1] Transformed state machine. It may be the same as it was 
               before if there was no transformation actually.

        It is ensured that the result of this function is a DFA compliant
        state machine.
        """
        assert Setup.lexatom.type_range is not None

        if sm is None: return True, None
        assert sm.is_DFA_compliant()

        all_complete_f = True
        if Setup.bad_lexatom_detection_f:
            bad_lexatom_si = state_machine_index.get()
            # Generate the 'bad lexatom accepter'.
            bad_lexatom_state = DFA_State(AcceptanceF=True)
            bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM)
            sm.states[bad_lexatom_si] = bad_lexatom_state

        else:
            bad_lexatom_si = None

        # NOTE: Not 'iteritems()', for some encodings intermediate states are
        #       generated. Those shall not be subject to transformation.
        for from_si, state in sm.states.items():
            if from_si == bad_lexatom_si: continue
            target_map = state.target_map.get_map()

            for to_si, trigger_set in target_map.items():
                if to_si == bad_lexatom_si: continue

                complete_f,  \
                new_state_db = self.do_transition(target_map, from_si, to_si,
                                                  bad_lexatom_si)
                # Assume that the 'target_map' has been adapted if changes were
                # necessary.
                if new_state_db is not None:
                    sm.states.update(new_state_db)

                all_complete_f &= complete_f

            # Transition to 'bad lexatom acceptor' on first code unit is best
            # to happen here, after all transitions have been adapted.
            self._add_transition_to_bad_lexatom_detector(
                target_map, bad_lexatom_si, 0)

            # If there were intermediate states being generated, the error
            # error detection must have been implemented right then.

        sm.delete_transitions_beyond_interval(Setup.lexatom.type_range)

        sm.delete_orphaned_states()

        # AFTER: Whatever happend, the transitions in the state machine MUST
        #        lie in the drain_set.
        if not sm.is_DFA_compliant():
            sm = nfa_to_dfa.do(sm)
        sm = hopcroft_minimization.do(sm, CreateNewStateMachineF=False)
        return all_complete_f, sm
Ejemplo n.º 13
0
def do(the_state_machine, post_context_sm, EndOfLinePostContextF, fh=-1):
    """Appends a post context to the given state machine and changes 
       state infos as required. 

       NOTE: 

           In case that:    post_context_sm is not None 
                         or EndOfLinePostContextF  

           The function appends something to the state machine and
           it is therefore required to pass 'NFA to DFA'--better
           also Hopcroft Minimization.
       
       ________________________________________________________________________
       This process is very similar to sequentialization. 
       There is a major difference, though:
       
       Given a state machine (e.g. a pattern) X with a post context Y, 
       a match is only valid if X is followed by Y. Let Xn be an acceptance
       state of X and Ym an acceptance state of Y: 

              ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym))
                            store                       acceptance
                            input
                            position
       
       That is, it holds:

          -- The next input position is stored the position of Xn, even though
             it is 'officially' not an acceptance state.

          -- Ym will be an acceptance state, but it will not store 
             the input position!       

       The analysis of the next pattern will start at the position where
       X stopped, even though Ym is required to state acceptance.    
       
    """
    # State machines with no states are senseless here. 
    assert not the_state_machine.is_empty(), \
           "empty state machine can have no post context."
    assert post_context_sm is None or not post_context_sm.is_empty(), \
           "empty state machine cannot be a post-context."

    # State machines involved with post condition building are part of a pattern, 
    # but not configured out of multiple patterns. Thus there should be no origins.
    assert the_state_machine.has_origins() == False
    assert post_context_sm is None or not post_context_sm.has_origins()
    for state in the_state_machine.get_acceptance_state_list():
        for origin in state.origins(): 
            assert origin.pre_context_id() == E_PreContextIDs.NONE, \
                   "Post Contexts MUST be mounted BEFORE pre-contexts."

    if post_context_sm is None:
        if not EndOfLinePostContextF:
            return the_state_machine, None
        # Generate a new post context that just contains the 'newline'
        post_context_sm = StateMachine(AcceptanceF=True)
        post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f)

    elif EndOfLinePostContextF: 
        # Mount 'newline' to existing post context
        post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f)

    # A post context with an initial state that is acceptance is not really a
    # 'context' since it accepts anything. The state machine remains un-post context.
    if post_context_sm.get_init_state().is_acceptance():
        error_msg("Post context accepts anything---replaced by no post context.", fh, 
                  DontExitF=True)
        return the_state_machine, None
    
    # (*) Two ways of handling post-contexts:
    #
    #     -- Seldom Exception: 
    #        Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the 
    #        core pattern after the end of the post context
    #        has been reached.
    #
    if ambiguous_post_context.detect_forward(the_state_machine, post_context_sm):
        if ambiguous_post_context.detect_backward(the_state_machine, post_context_sm):
            # -- for post contexts that are forward and backward ambiguous
            #    a philosophical cut is necessary.
            error_msg("Post context requires philosophical cut--handle with care!\n"
                      "Proposal: Isolate pattern and ensure results are as expected!", fh, 
                      DontExitF=True)
            post_context_sm = ambiguous_post_context.philosophical_cut(the_state_machine, post_context_sm)
        
        # NOTE: May be, the_state_machine does contain now an epsilon transition. See
        #       comment at entry of this function.
        ipsb_sm = ambiguous_post_context.mount(the_state_machine, post_context_sm)
        the_state_machine = beautifier.do(the_state_machine)
        ipsb_sm           = beautifier.do(ipsb_sm)
        return the_state_machine, ipsb_sm 

    # -- The 'normal' way: storing the input position at the end of the core
    #    pattern.
    #
    # (*) Need to clone the state machines, i.e. provide their internal
    #     states with new ids, but the 'behavior' remains. This allows
    #     state machines to appear twice, or being used in 'larger'
    #     conglomerates.
    post_clone = post_context_sm.clone() 

    # -- Once an acceptance state is reached no further analysis is necessary.
    ## NO: acceptance_pruning.do(post_clone)
    ## BECAUSE: it may have to compete with a pseudo-ambiguous post context

    # (*) collect all transitions from both state machines into a single one
    #
    #     NOTE: The start index is unique. Therefore, one can assume that each
    #           clone_list '.states' dictionary has different keys. One can simply
    #           take over all transitions of a start index into the result without
    #           considering interferences (see below)
    #
    orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list()

    # -- mount on every acceptance state the initial state of the following state
    #    machine via epsilon transition
    the_state_machine.mount_to_acceptance_states(post_clone.init_state_index, 
                                                 CancelStartAcceptanceStateF=True)
    for start_state_index, state in post_clone.states.iteritems():        
        the_state_machine.states[start_state_index] = state # states are already cloned

    # -- raise at each old acceptance state the 'store input position flag'
    # -- set the post context flag for all acceptance states
    for state_idx in orig_acceptance_state_id_list:
        state = the_state_machine.states[state_idx]
        state.set_input_position_store_f(True)
    
    # -- no acceptance state shall store the input position
    # -- set the post context flag for all acceptance states
    for state in the_state_machine.get_acceptance_state_list():
        state.set_input_position_store_f(False)
        state.set_input_position_restore_f(True)

    # No input position backward search required
    the_state_machine = nfa_to_dfa.do(the_state_machine)
    hopcroft.do(the_state_machine, CreateNewStateMachineF=False)
    return the_state_machine, None
#                    /           /  '\
#    (0)-- 'a' -->((1))        'c'   'b'
#                    \           \,  /
#                     '-- 'c' -->((3))<-------.
#                                   \         |
#                                    '-- 'c'--'
#
#    ((1)), ((2)), and ((3))  are the acceptance states.
#
sm = DFA()
n0 = sm.init_state_index
n1 = sm.add_transition(n0, ord('a'), AcceptanceF=True)
n2 = sm.add_transition(n1, ord('b'), AcceptanceF=True)
n3 = sm.add_transition(n1, ord('c'), AcceptanceF=True)
sm.add_transition(n2, ord('b'), n2)
sm.add_transition(n3, ord('c'), n3)
sm.add_transition(n2, ord('c'), n3)
sm.add_transition(n3, ord('b'), n2)
print sm

#set_cmd_list(sm, n0, (0, 0,  False), (1, 0,  False), (2, 0,  False))
set_cmd_list(sm, n1, (0, 66, True), (1, 66, True), (2, 66, True))
set_cmd_list(sm, n2, (0, 77, True), (0, 77, True), (2, 77, True),
             (1, 77, True))
set_cmd_list(sm, n3, (0, 88, True), (1, 88, True), (2, 88, True))

print sm
# (*) minimize the number of states using hopcroft optimization
optimal_sm = hopcroft.do(sm)
print optimal_sm
Ejemplo n.º 15
0
def do(StateMachine_List, FilterDominatedOriginsF=True,
       MarkNotSet=set(), AlllowInitStateAcceptF=False):
    """Creates a DFA state machine that incorporates the paralell
           process of all pattern passed as state machines in 
           the StateMachine_List. Each origins of each state machine
           are kept in the final state, if it is not dominated.

           Performs: -- parallelization
                     -- translation from NFA to DFA
                     -- Frank Schaefers Adapted Hopcroft optimization.

           Again: The state machine ids of the original state machines
                  are traced through the whole process.
                  
           FilterDominatedOriginsF, if set to False, can disable the filtering
                  of dominated origins. This is important for pre-contexts, because,
                  all successful patterns need to be reported!            
                          
    """   
    if len(StateMachine_List) == 0:
        return None

    def __insight_check(Place, sm, AlllowInitStateAcceptF):
        __check_on_orphan_states(Place, sm)
        if not AlllowInitStateAcceptF:
            __check_on_init_state_not_acceptance(Place, sm)
        error.insight("%s done." % Place)

    def __check_on_orphan_states(Place, sm):
        orphan_state_list = sm.get_orphaned_state_index_list()
        if len(orphan_state_list) == 0: return
        error.log("After '%s'" % Place + "\n" + \
                  "Orphaned state(s) detected in regular expression (optimization lack).\n" + \
                  "Please, log a defect at the projects website quex.sourceforge.net.\n"    + \
                  "Orphan state(s) = " + repr(orphan_state_list)) 

    def __check_on_init_state_not_acceptance(Place, sm):
        if sm.get_init_state().is_acceptance():
            error.log("After '%s'" % Place + "\n" + \
                      "Initial state 'accepts'. This should never happen.\n" + \
                      "Please, log a defect at the projects web site quex.sourceforge.net.\n")

    def __insight_begin(SM_List):
        ttn = 0
        for sm in SM_List:
            ttn += sum(state.target_map.get_transition_n() 
                       for state in sm.states.itervalues())
        error.insight("Combine Patterns: %i patterns; %i total transition number;" \
                      % (len(SM_List), ttn))

    __insight_begin(StateMachine_List)

    # (1) mark at each state machine the machine and states as 'original'.
    #      
    # This is necessary to trace in the combined state machine the pattern that
    # actually matched. Note, that a state machine in the StateMachine_List
    # represents one possible pattern that can match the current input.   
    #
    for sm in StateMachine_List:
        if sm.get_id() in MarkNotSet: continue
        sm.mark_state_origins()
        assert sm.is_DFA_compliant(), sm.get_string(Option="hex")

    # (2) setup all patterns in paralell 
    sm = parallelize.do([sm.clone() for sm in StateMachine_List], 
                        CommonTerminalStateF=False) 
    __insight_check("Combine patterns", sm, AlllowInitStateAcceptF)


    # (4) determine for each state in the DFA what is the dominating original 
    #     state
    if FilterDominatedOriginsF: sm.filter_dominated_origins()
    __insight_check("Clean-up state entry operations", sm, AlllowInitStateAcceptF)

    # (3) convert the state machine to an DFA (paralellization created an NFA)
    sm = hopcroft_minimization.do(sm, CreateNewStateMachineF=False)
    __insight_check("Hopcroft Minimization", sm, AlllowInitStateAcceptF)
    
    return sm
#                    /           /  '\ 
#    (0)-- 'a' -->((1))        'c'   'b'
#                    \           \,  /
#                     '-- 'c' -->((3))<-------.
#                                   \         |
#                                    '-- 'c'--'
#
#    ((1)), ((2)), and ((3))  are the acceptance states.
#
sm = StateMachine()
n0 = sm.init_state_index
n1 = sm.add_transition(n0, ord('a'), AcceptanceF=True)
n2 = sm.add_transition(n1, ord('b'), AcceptanceF=True)
n3 = sm.add_transition(n1, ord('c'), AcceptanceF=True)
sm.add_transition(n2, ord('b'), n2)
sm.add_transition(n3, ord('c'), n3)
sm.add_transition(n2, ord('c'), n3)
sm.add_transition(n3, ord('b'), n2)
print sm

#set_cmd_list(sm, n0, (0, 0,  False), (1, 0,  False), (2, 0,  False))
set_cmd_list(sm, n1, (0, 66, True), (1, 66, True), (2, 66, True))
set_cmd_list(sm, n2, (0, 77, True), (0, 77, True), (2, 77, True), (1, 77, True))
set_cmd_list(sm, n3, (0, 88, True), (1, 88, True), (2, 88, True))

print sm
# (*) minimize the number of states using hopcroft optimization
optimal_sm = hopcroft.do(sm)
print optimal_sm