def __clone_until_acceptance(Dfa, StartSi): """Make a new DFA from the graph between the given 'StartSi' to the until an acceptance state is reached. Walks from a given 'StartSi' along all paths until an acceptance state is reached. RETURNS: DFA containing the graph. """ correspondance_db = {si: state_index.get() for si in Dfa.states} result = DFA(InitStateIndex=correspondance_db[StartSi], AcceptanceF=Dfa.states[StartSi].is_acceptance()) work_set = set([StartSi]) done_set = set([StartSi]) while work_set: si = work_set.pop() state = Dfa.states[si] if si == Dfa.init_state_index: result_state = result.get_init_state() target_si_iterable = state.target_map.get_target_state_index_list() elif not state.is_acceptance(): result_state = state.clone(correspondance_db) target_si_iterable = state.target_map.get_target_state_index_list() else: result_state = DFA_State() result_state.set_acceptance() target_si_iterable = [] work_set.update(target_si for target_si in target_si_iterable if target_si not in done_set) result.states[correspondance_db[si]] = result_state return result
def add_transition(self, StartStateIdx, TriggerSet, TargetStateIdx=None, AcceptanceF=False): """Adds a transition from Start to Target based on a given Trigger. TriggerSet can be of different types: ... see add_transition() (see comment on 'DFA_State::add_transition) RETURNS: The target state index. """ # NOTE: The Transition Constructor is very tolerant, so no tests on TriggerSet() # assert TriggerSet.__class__.__name__ == "NumberSet" assert type( TargetStateIdx ) == long or TargetStateIdx is None or TargetStateIdx in E_StateIndices # If target state is undefined (None) then a new one has to be created if TargetStateIdx is None: TargetStateIdx = state_machine_index.get() if self.states.has_key(StartStateIdx) == False: self.states[StartStateIdx] = DFA_State() if self.states.has_key(TargetStateIdx) == False: self.states[TargetStateIdx] = DFA_State() if AcceptanceF: self.states[TargetStateIdx].set_acceptance(True) self.states[StartStateIdx].add_transition(TriggerSet, TargetStateIdx) return TargetStateIdx
def add_epsilon_transition(self, StartStateIdx, TargetStateIdx=None, RaiseAcceptanceF=False): assert TargetStateIdx is None or type(TargetStateIdx) == long # create new state if index does not exist if not self.states.has_key(StartStateIdx): self.states[StartStateIdx] = DFA_State() if TargetStateIdx is None: TargetStateIdx = self.create_new_state( AcceptanceF=RaiseAcceptanceF) elif not self.states.has_key(TargetStateIdx): self.states[TargetStateIdx] = DFA_State() # add the epsilon target state self.states[StartStateIdx].target_map.add_epsilon_target_state( TargetStateIdx) # optionally raise the state of the target to 'acceptance' if RaiseAcceptanceF: self.states[TargetStateIdx].set_acceptance(True) return TargetStateIdx
def __nfa_to_dfa_required(SmList): """NFA to DFA transformation is only required if: -- there are epsilon transitions -- more than one target is reached by the same trigger Assumed that the input are DFAs, the result is only possibly an NFA, if the init state has intersecting transitions, or if there are transitions to the init state so that the state machines have to be considered seperatedly. """ if DFA_State.interference([sm.get_init_state() for sm in SmList]): return True return any(sm.has_transition_to(sm.init_state_index) for sm in SmList)
def __init__(self, InitStateIndex=None, AcceptanceF=False, InitState=None, DoNothingF=False, DfaId=None): if DfaId is None: self.set_id(state_machine_index.get_state_machine_id()) else: self.set_id(DfaId) self.sr = SourceRef_VOID if DoNothingF: return if InitStateIndex is None: InitStateIndex = state_machine_index.get() self.init_state_index = InitStateIndex # DFA_State Index => DFA_State (information about what triggers transition to what target state). if InitState is None: InitState = DFA_State(AcceptanceF=AcceptanceF) self.states = {self.init_state_index: InitState}
def create_new_state(self, AcceptanceF=False, StateIdx=None, RestoreInputPositionF=False, MarkAcceptanceId=None): """RETURNS: DFA_State index of the new state. """ if StateIdx is None: new_si = state_machine_index.get() else: new_si = StateIdx new_state = DFA_State(AcceptanceF or MarkAcceptanceId is not None) if MarkAcceptanceId is not None: new_state.mark_acceptance_id(MarkAcceptanceId) if RestoreInputPositionF: new_state.set_read_position_restore_f() self.states[new_si] = new_state return new_si
def test_plug_sequence(ByteSequenceDB): L = len(ByteSequenceDB[0]) for seq in ByteSequenceDB: assert len(seq) == L for x in seq: assert isinstance(x, Interval) first_different_byte_index = -1 for i in range(L): x0 = ByteSequenceDB[0][i] for seq in ByteSequenceDB[1:]: if not seq[i].is_equal(x0): first_different_byte_index = i break if first_different_byte_index != -1: break if first_different_byte_index == -1: first_different_byte_index = 0 print "# Best To be Displayed by:" print "#" print "# > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg" print "#" print "# -------------------------" print "# Byte Sequences: " i = -1 for seq in ByteSequenceDB: i += 1 print "# (%i) " % i, for x in seq: print " " + x.get_string(Option="hex"), print print "# L = %i" % L print "# DIdx = %i" % first_different_byte_index sm = DFA() end_index = state_machine.index.get() sm.states[end_index] = DFA_State() Setup.buffer_setup("", 1, "utf8") if Setup.bad_lexatom_detection_f: bad_lexatom_si = index.get() else: bad_lexatom_si = None trafo = Setup.buffer_encoding new_first_tm, \ new_state_db = trafo.plug_interval_sequences(sm.init_state_index, end_index, ByteSequenceDB, BadLexatomSi=bad_lexatom_si) if bad_lexatom_si is not None: new_first_tm[bad_lexatom_si] = trafo._error_range_by_code_unit_db[0] # Generate the 'bad lexatom accepter'. bad_lexatom_state = DFA_State(AcceptanceF=True) bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM) sm.states[bad_lexatom_si] = bad_lexatom_state first_tm = sm.get_init_state().target_map.get_map() if end_index in first_tm: del first_tm[end_index] first_tm.update(new_first_tm) sm.states.update(new_state_db) sm = beautifier.do(sm) if len(sm.get_orphaned_state_index_list()) != 0: print "Error: Orphaned States Detected!" # Double check, that there are no 'circles' predecessor_db = sm.get_predecessor_db() assert not any(si in predecessor_db[si] for si in sm.states) show_graphviz(sm)
def setup_state_operation(sm, CmdList, StateIndex): state = DFA_State() for cmd in CmdList: state.single_entry.add(cmd) sm.states[StateIndex] = state
def do_state_machine(self, sm): """Transforms a given state machine from 'Unicode Driven' to another character encoding type. RETURNS: [0] Transformation complete (True->yes, False->not all transformed) [1] Transformed state machine. It may be the same as it was before if there was no transformation actually. It is ensured that the result of this function is a DFA compliant state machine. """ assert Setup.lexatom.type_range is not None if sm is None: return True, None assert sm.is_DFA_compliant() all_complete_f = True if Setup.bad_lexatom_detection_f: bad_lexatom_si = state_machine_index.get() # Generate the 'bad lexatom accepter'. bad_lexatom_state = DFA_State(AcceptanceF=True) bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM) sm.states[bad_lexatom_si] = bad_lexatom_state else: bad_lexatom_si = None # NOTE: Not 'iteritems()', for some encodings intermediate states are # generated. Those shall not be subject to transformation. for from_si, state in sm.states.items(): if from_si == bad_lexatom_si: continue target_map = state.target_map.get_map() for to_si, trigger_set in target_map.items(): if to_si == bad_lexatom_si: continue complete_f, \ new_state_db = self.do_transition(target_map, from_si, to_si, bad_lexatom_si) # Assume that the 'target_map' has been adapted if changes were # necessary. if new_state_db is not None: sm.states.update(new_state_db) all_complete_f &= complete_f # Transition to 'bad lexatom acceptor' on first code unit is best # to happen here, after all transitions have been adapted. self._add_transition_to_bad_lexatom_detector( target_map, bad_lexatom_si, 0) # If there were intermediate states being generated, the error # error detection must have been implemented right then. sm.delete_transitions_beyond_interval(Setup.lexatom.type_range) sm.delete_orphaned_states() # AFTER: Whatever happend, the transitions in the state machine MUST # lie in the drain_set. if not sm.is_DFA_compliant(): sm = nfa_to_dfa.do(sm) sm = hopcroft_minimization.do(sm, CreateNewStateMachineF=False) return all_complete_f, sm
DFA(InitStateIndex=5L, AcceptanceF=True) DFA(InitStateIndex=6L, AcceptanceF=True) DFA(InitStateIndex=100L, AcceptanceF=False) DFA(InitStateIndex=101L, AcceptanceF=False) DFA(InitStateIndex=102L, AcceptanceF=False) DFA(InitStateIndex=103L, AcceptanceF=False) DFA(InitStateIndex=104L, AcceptanceF=False) DFA(InitStateIndex=105L, AcceptanceF=False) DFA(InitStateIndex=106L, AcceptanceF=False) # (*) add priviledges # add_priority(4L, 0L) # add_priority(6L, 3L) # (1) only acceptance and non-acceptance states si = DFA_State() add_origin(1, True) add_origin(4, True) add_origin(0, True) add_origin(5, True) add_origin(6, True) add_origin(3, True) add_origin(2, True) add_origin(7, False) add_origin(8, False) add_origin(9, False) add_origin(10, False) add_origin(11, False) add_origin(12, False) add_origin(13, False)
def plug_interval_sequences(self, FromSi, ToSi, IntervalSequenceList, BadLexatomSi): """Transform the list of interval sequences into intermediate state transitions. 'BadLexatomSi' is None => no bad lexatom detection. else, transitions to 'bad lexatom state' are added on invalid code units. RETURN: [0] Target map update for the first state. [1] State Db update for intermediate states. """ def simplify(tm_db, tm_end_inv, ToSi): """Those states which trigger on the same intervals to 'ToSi' are equivalent, i.e. can replaced by one state. """ # Find the states that trigger on the same interval list to the # terminal 'ToSi'. equivalence_db = {} replacement_db = {} for from_si, interval_list in tm_end_inv.iteritems(): key = tuple(sorted(interval_list)) equivalent_si = equivalence_db.get(key) if equivalent_si is None: equivalence_db[key] = from_si else: replacement_db[from_si] = equivalent_si # Replace target states which are equivalent result = {} for from_si, tm in tm_db.iteritems(): new_tm = defaultdict(NumberSet) for target_si, interval in tm.iteritems(): replacement_si = replacement_db.get(target_si) if replacement_si is not None: target_si = replacement_si new_tm[target_si].quick_append_interval(interval) if any(number_set.is_empty() for si, number_set in new_tm.items()): for si, number_set in new_tm.iteritems(): print "#sim", si, number_set if from_si in tm_end_inv: for interval in tm_end_inv[from_si]: new_tm[ToSi].quick_append_interval(interval) result[from_si] = new_tm return result tm_db, \ tm_end_inv, \ position_db = _get_intermediate_transition_maps(FromSi, ToSi, IntervalSequenceList) result_tm_db = simplify(tm_db, tm_end_inv, ToSi) if BadLexatomSi is not None: for si, position in position_db.iteritems(): # The 'positon 0' is done by 'do_state_machine'. It is concerned # with the first state's transition. assert position != 0 self._add_transition_to_bad_lexatom_detector( result_tm_db[si], BadLexatomSi, position) for tm in result_tm_db.itervalues(): assert not any(number_set.is_empty() for number_set in tm.itervalues()) # Generate the target map to be inserted into state 'FromSi'. # Generate list of intermediate states that implement the sequence # of intervals. first_tm = result_tm_db.pop(FromSi) new_state_db = dict((si, DFA_State.from_TargetMap(tm)) for si, tm in result_tm_db.iteritems()) return first_tm, new_state_db