Example #1
0
def __clone_until_acceptance(Dfa, StartSi):
    """Make a new DFA from the graph between the given 'StartSi' to the 
    until an acceptance state is reached. Walks from a given 'StartSi'
    along all paths until an acceptance state is reached.

    RETURNS: DFA containing the graph.
    """
    correspondance_db = {si: state_index.get() for si in Dfa.states}
    result = DFA(InitStateIndex=correspondance_db[StartSi],
                 AcceptanceF=Dfa.states[StartSi].is_acceptance())

    work_set = set([StartSi])
    done_set = set([StartSi])
    while work_set:
        si = work_set.pop()
        state = Dfa.states[si]

        if si == Dfa.init_state_index:
            result_state = result.get_init_state()
            target_si_iterable = state.target_map.get_target_state_index_list()
        elif not state.is_acceptance():
            result_state = state.clone(correspondance_db)
            target_si_iterable = state.target_map.get_target_state_index_list()
        else:
            result_state = DFA_State()
            result_state.set_acceptance()
            target_si_iterable = []

        work_set.update(target_si for target_si in target_si_iterable
                        if target_si not in done_set)
        result.states[correspondance_db[si]] = result_state

    return result
Example #2
0
    def add_transition(self,
                       StartStateIdx,
                       TriggerSet,
                       TargetStateIdx=None,
                       AcceptanceF=False):
        """Adds a transition from Start to Target based on a given Trigger.

           TriggerSet can be of different types: ... see add_transition()
           
           (see comment on 'DFA_State::add_transition)

           RETURNS: The target state index.
        """
        # NOTE: The Transition Constructor is very tolerant, so no tests on TriggerSet()
        #       assert TriggerSet.__class__.__name__ == "NumberSet"
        assert type(
            TargetStateIdx
        ) == long or TargetStateIdx is None or TargetStateIdx in E_StateIndices

        # If target state is undefined (None) then a new one has to be created
        if TargetStateIdx is None: TargetStateIdx = state_machine_index.get()
        if self.states.has_key(StartStateIdx) == False:
            self.states[StartStateIdx] = DFA_State()
        if self.states.has_key(TargetStateIdx) == False:
            self.states[TargetStateIdx] = DFA_State()
        if AcceptanceF: self.states[TargetStateIdx].set_acceptance(True)

        self.states[StartStateIdx].add_transition(TriggerSet, TargetStateIdx)

        return TargetStateIdx
Example #3
0
    def add_epsilon_transition(self,
                               StartStateIdx,
                               TargetStateIdx=None,
                               RaiseAcceptanceF=False):
        assert TargetStateIdx is None or type(TargetStateIdx) == long

        # create new state if index does not exist
        if not self.states.has_key(StartStateIdx):
            self.states[StartStateIdx] = DFA_State()
        if TargetStateIdx is None:
            TargetStateIdx = self.create_new_state(
                AcceptanceF=RaiseAcceptanceF)
        elif not self.states.has_key(TargetStateIdx):
            self.states[TargetStateIdx] = DFA_State()

        # add the epsilon target state
        self.states[StartStateIdx].target_map.add_epsilon_target_state(
            TargetStateIdx)
        # optionally raise the state of the target to 'acceptance'
        if RaiseAcceptanceF: self.states[TargetStateIdx].set_acceptance(True)

        return TargetStateIdx
Example #4
0
def __nfa_to_dfa_required(SmList):
    """NFA to DFA transformation is only required if:

         -- there are epsilon transitions
         -- more than one target is reached by the same trigger

    Assumed that the input are DFAs, the result is only possibly an
    NFA, if the init state has intersecting transitions, or if there
    are transitions to the init state so that the state machines
    have to be considered seperatedly.
    """
    if DFA_State.interference([sm.get_init_state() for sm in SmList]):
        return True
    return any(sm.has_transition_to(sm.init_state_index) for sm in SmList)
Example #5
0
    def __init__(self,
                 InitStateIndex=None,
                 AcceptanceF=False,
                 InitState=None,
                 DoNothingF=False,
                 DfaId=None):
        if DfaId is None:
            self.set_id(state_machine_index.get_state_machine_id())
        else:
            self.set_id(DfaId)

        self.sr = SourceRef_VOID

        if DoNothingF: return

        if InitStateIndex is None: InitStateIndex = state_machine_index.get()
        self.init_state_index = InitStateIndex

        # DFA_State Index => DFA_State (information about what triggers transition to what target state).
        if InitState is None: InitState = DFA_State(AcceptanceF=AcceptanceF)
        self.states = {self.init_state_index: InitState}
Example #6
0
    def create_new_state(self,
                         AcceptanceF=False,
                         StateIdx=None,
                         RestoreInputPositionF=False,
                         MarkAcceptanceId=None):
        """RETURNS: DFA_State index of the new state.
        """
        if StateIdx is None: new_si = state_machine_index.get()
        else: new_si = StateIdx

        new_state = DFA_State(AcceptanceF or MarkAcceptanceId is not None)
        if MarkAcceptanceId is not None:
            new_state.mark_acceptance_id(MarkAcceptanceId)
            if RestoreInputPositionF:
                new_state.set_read_position_restore_f()

        self.states[new_si] = new_state
        return new_si
Example #7
0
def test_plug_sequence(ByteSequenceDB):
    L = len(ByteSequenceDB[0])

    for seq in ByteSequenceDB:
        assert len(seq) == L
        for x in seq:
            assert isinstance(x, Interval)

    first_different_byte_index = -1
    for i in range(L):
        x0 = ByteSequenceDB[0][i]
        for seq in ByteSequenceDB[1:]:
            if not seq[i].is_equal(x0):
                first_different_byte_index = i
                break
        if first_different_byte_index != -1:
            break
    if first_different_byte_index == -1:
        first_different_byte_index = 0

    print "# Best To be Displayed by:"
    print "#"
    print "#  > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg"
    print "#"
    print "# -------------------------"
    print "# Byte Sequences:     "
    i = -1
    for seq in ByteSequenceDB:
        i += 1
        print "# (%i) " % i,
        for x in seq:
            print "    " + x.get_string(Option="hex"),
        print
    print "#    L    = %i" % L
    print "#    DIdx = %i" % first_different_byte_index

    sm = DFA()
    end_index = state_machine.index.get()
    sm.states[end_index] = DFA_State()

    Setup.buffer_setup("", 1, "utf8")

    if Setup.bad_lexatom_detection_f: bad_lexatom_si = index.get()
    else: bad_lexatom_si = None

    trafo = Setup.buffer_encoding

    new_first_tm,    \
    new_state_db = trafo.plug_interval_sequences(sm.init_state_index, end_index,
                                                 ByteSequenceDB,
                                                 BadLexatomSi=bad_lexatom_si)

    if bad_lexatom_si is not None:
        new_first_tm[bad_lexatom_si] = trafo._error_range_by_code_unit_db[0]

    # Generate the 'bad lexatom accepter'.
    bad_lexatom_state = DFA_State(AcceptanceF=True)
    bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM)
    sm.states[bad_lexatom_si] = bad_lexatom_state

    first_tm = sm.get_init_state().target_map.get_map()
    if end_index in first_tm: del first_tm[end_index]
    first_tm.update(new_first_tm)

    sm.states.update(new_state_db)

    sm = beautifier.do(sm)
    if len(sm.get_orphaned_state_index_list()) != 0:
        print "Error: Orphaned States Detected!"

    # Double check, that there are no 'circles'
    predecessor_db = sm.get_predecessor_db()
    assert not any(si in predecessor_db[si] for si in sm.states)

    show_graphviz(sm)
Example #8
0
def setup_state_operation(sm, CmdList, StateIndex):
    state = DFA_State()
    for cmd in CmdList:
        state.single_entry.add(cmd)
    sm.states[StateIndex] = state
Example #9
0
    def do_state_machine(self, sm):
        """Transforms a given state machine from 'Unicode Driven' to another
        character encoding type.
        
        RETURNS: 
           [0] Transformation complete (True->yes, False->not all transformed)
           [1] Transformed state machine. It may be the same as it was 
               before if there was no transformation actually.

        It is ensured that the result of this function is a DFA compliant
        state machine.
        """
        assert Setup.lexatom.type_range is not None

        if sm is None: return True, None
        assert sm.is_DFA_compliant()

        all_complete_f = True
        if Setup.bad_lexatom_detection_f:
            bad_lexatom_si = state_machine_index.get()
            # Generate the 'bad lexatom accepter'.
            bad_lexatom_state = DFA_State(AcceptanceF=True)
            bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM)
            sm.states[bad_lexatom_si] = bad_lexatom_state

        else:
            bad_lexatom_si = None

        # NOTE: Not 'iteritems()', for some encodings intermediate states are
        #       generated. Those shall not be subject to transformation.
        for from_si, state in sm.states.items():
            if from_si == bad_lexatom_si: continue
            target_map = state.target_map.get_map()

            for to_si, trigger_set in target_map.items():
                if to_si == bad_lexatom_si: continue

                complete_f,  \
                new_state_db = self.do_transition(target_map, from_si, to_si,
                                                  bad_lexatom_si)
                # Assume that the 'target_map' has been adapted if changes were
                # necessary.
                if new_state_db is not None:
                    sm.states.update(new_state_db)

                all_complete_f &= complete_f

            # Transition to 'bad lexatom acceptor' on first code unit is best
            # to happen here, after all transitions have been adapted.
            self._add_transition_to_bad_lexatom_detector(
                target_map, bad_lexatom_si, 0)

            # If there were intermediate states being generated, the error
            # error detection must have been implemented right then.

        sm.delete_transitions_beyond_interval(Setup.lexatom.type_range)

        sm.delete_orphaned_states()

        # AFTER: Whatever happend, the transitions in the state machine MUST
        #        lie in the drain_set.
        if not sm.is_DFA_compliant():
            sm = nfa_to_dfa.do(sm)
        sm = hopcroft_minimization.do(sm, CreateNewStateMachineF=False)
        return all_complete_f, sm
DFA(InitStateIndex=5L, AcceptanceF=True)
DFA(InitStateIndex=6L, AcceptanceF=True)
DFA(InitStateIndex=100L, AcceptanceF=False)
DFA(InitStateIndex=101L, AcceptanceF=False)
DFA(InitStateIndex=102L, AcceptanceF=False)
DFA(InitStateIndex=103L, AcceptanceF=False)
DFA(InitStateIndex=104L, AcceptanceF=False)
DFA(InitStateIndex=105L, AcceptanceF=False)
DFA(InitStateIndex=106L, AcceptanceF=False)
    
# (*) add priviledges
# add_priority(4L, 0L)   
# add_priority(6L, 3L)   

# (1) only acceptance and non-acceptance states    
si = DFA_State()    
add_origin(1, True)    
add_origin(4, True)    
add_origin(0, True)    
add_origin(5, True)    
add_origin(6, True)    
add_origin(3, True)    
add_origin(2, True)    
add_origin(7, False)    
add_origin(8, False)    
add_origin(9, False)    
add_origin(10, False)    
add_origin(11, False)    
add_origin(12, False)    
add_origin(13, False)    
Example #11
0
    def plug_interval_sequences(self, FromSi, ToSi, IntervalSequenceList,
                                BadLexatomSi):
        """Transform the list of interval sequences into intermediate state
        transitions. 
        
        'BadLexatomSi' is None => no bad lexatom detection.
                       else, transitions to 'bad lexatom state' are added
                       on invalid code units.
        
        RETURN: [0] Target map update for the first state.
                [1] State Db update for intermediate states.

        """
        def simplify(tm_db, tm_end_inv, ToSi):
            """Those states which trigger on the same intervals to 'ToSi' are
            equivalent, i.e. can replaced by one state.
            """
            # Find the states that trigger on the same interval list to the
            # terminal 'ToSi'.
            equivalence_db = {}
            replacement_db = {}
            for from_si, interval_list in tm_end_inv.iteritems():
                key = tuple(sorted(interval_list))
                equivalent_si = equivalence_db.get(key)
                if equivalent_si is None: equivalence_db[key] = from_si
                else: replacement_db[from_si] = equivalent_si

            # Replace target states which are equivalent
            result = {}
            for from_si, tm in tm_db.iteritems():
                new_tm = defaultdict(NumberSet)
                for target_si, interval in tm.iteritems():
                    replacement_si = replacement_db.get(target_si)
                    if replacement_si is not None: target_si = replacement_si
                    new_tm[target_si].quick_append_interval(interval)

                if any(number_set.is_empty()
                       for si, number_set in new_tm.items()):
                    for si, number_set in new_tm.iteritems():
                        print "#sim", si, number_set

                if from_si in tm_end_inv:
                    for interval in tm_end_inv[from_si]:
                        new_tm[ToSi].quick_append_interval(interval)

                result[from_si] = new_tm

            return result

        tm_db,      \
        tm_end_inv, \
        position_db = _get_intermediate_transition_maps(FromSi, ToSi,
                                                        IntervalSequenceList)

        result_tm_db = simplify(tm_db, tm_end_inv, ToSi)

        if BadLexatomSi is not None:
            for si, position in position_db.iteritems():
                # The 'positon 0' is done by 'do_state_machine'. It is concerned
                # with the first state's transition.
                assert position != 0
                self._add_transition_to_bad_lexatom_detector(
                    result_tm_db[si], BadLexatomSi, position)

        for tm in result_tm_db.itervalues():
            assert not any(number_set.is_empty()
                           for number_set in tm.itervalues())

        # Generate the target map to be inserted into state 'FromSi'.
        # Generate list of intermediate states that implement the sequence
        # of intervals.
        first_tm = result_tm_db.pop(FromSi)
        new_state_db = dict((si, DFA_State.from_TargetMap(tm))
                            for si, tm in result_tm_db.iteritems())
        return first_tm, new_state_db