def __core(Original, Cutter): print ("Original = " + Original).replace("\n", "\\n").replace("\t", "\\t") print ("Cutter = " + Cutter).replace("\n", "\\n").replace("\t", "\\t") orig = regex.do(Original, {}).sm cutter = regex.do(Cutter, {}).sm #print orig.get_string(NormalizeF=False) #print cutter.get_string(NormalizeF=False) result = clean(complement_end.do(orig, cutter)) print if not special.is_none(result): print "superset(Original, result): %s" % superset.do(orig, result) if not special.is_none(result): tmp = clean(intersection.do([cutter, result])) print "intersection(Cutter, result) is None: %s" % special.is_none(tmp) tmp = clean(union.do([orig, result])) print "union(Original, result) == Original: %s" % identity.do(tmp, orig) print print "result = ", result.get_string(NormalizeF=True)
def __core(Original, Cutter): print("Original = " + Original).replace("\n", "\\n").replace("\t", "\\t") print("Cutter = " + Cutter).replace("\n", "\\n").replace("\t", "\\t") orig = regex.do(Original, {}).sm cutter = regex.do(Cutter, {}).sm #print orig.get_string(NormalizeF=False) #print cutter.get_string(NormalizeF=False) result = clean(complement_end.do(orig, cutter)) print if not special.is_none(result): print "superset(Original, result): %s" % superset.do( orig, result) if not special.is_none(result): tmp = clean(intersection.do([cutter, result])) print "intersection(Cutter, result) is None: %s" % special.is_none( tmp) tmp = clean(union.do([orig, result])) print "union(Original, result) == Original: %s" % identity.do( tmp, orig) print print "result = ", result.get_string(NormalizeF=True)
def test(A_str): print "_____________________________________________________________________" if isinstance(A_str, (str, unicode)): print ("A = " + A_str).replace("\n", "\\n").replace("\t", "\\t") sm = regex.do(A_str, {}).sm else: sm = A_str print "A = ", sm result_1st = complement.do(sm) print "complement(A):", result_1st result_2nd = complement.do(result_1st) print print "union(A, complement(A)): All =", is_all(union.do([sm, result_1st])) print "intersection(A, complement(A)): None =", is_none(intersection.do([sm, result_1st])) print "identity(A, complement(complement(A)):", identity.do(sm, result_2nd)
def do(StateMachineList, CommonTerminalStateF=True, CloneF=True): """Connect state machines paralell. CommonTerminalStateF tells whether the state machines shall trigger to a common terminal. This may help nfa-to-dfa or hopcroft minimization for ISOLATED patterns. A state machine that consists of the COMBINATION of patterns MUST set this flag to 'False'. CloneF Controls if state machine list is cloned or not. If the single state machines are no longer required after construction, the CloneF can be set to False. If Cloning is disabled the state machines themselves will be altered--which brings some advantage in speed. """ assert type(StateMachineList) == list assert len(StateMachineList) != 0 for x in StateMachineList: assert isinstance(x, StateMachine), x.__class__.__name__ # filter out empty state machines from the consideration state_machine_list = [ sm for sm in StateMachineList if not (sm.is_empty() or special.is_none(sm)) ] empty_state_machine_list = [ sm for sm in StateMachineList if (sm.is_empty() or special.is_none(sm)) ] if len(state_machine_list) < 2: if len(state_machine_list) < 1: result = StateMachine() elif CloneF: result = state_machine_list[0].clone() else: result = state_machine_list[0] return __consider_empty_state_machines(result, empty_state_machine_list) # (*) need to clone the state machines, i.e. provide their internal # states with new ids, but the 'behavior' remains. This allows # state machines to appear twice, or being used in 'larger' # conglomerates. if CloneF: clone_list = map(lambda sm: sm.clone(), state_machine_list) else: clone_list = state_machine_list # (*) collect all transitions from both state machines into a single one # (clone to ensure unique identifiers of states) new_init_state = State.new_merged_core_state( (clone.get_init_state() for clone in clone_list), ClearF=True) result = StateMachine(InitState=new_init_state) for clone in clone_list: result.states.update(clone.states) # (*) add additional **init** and **end** state # NOTE: when the result state machine was created, it already contains a # new initial state index. thus at this point only the new terminal # state has to be created. # NOTE: it is essential that the acceptance flag stays False, at this # point in time, so that the mounting operations only happen on # the old acceptance states. Later the acceptance state is raised # to 'accepted' (see below) new_terminal_state_index = -1L if CommonTerminalStateF: new_terminal_state_index = index.get() result.states[new_terminal_state_index] = \ State.new_merged_core_state(result.get_acceptance_state_list(), \ ClearF=True) # (*) Connect from the new initial state to the initial states of the # clones via epsilon transition. # Connect from each success state of the clones to the new end state # via epsilon transition. for clone in clone_list: result.mount_to_initial_state(clone.init_state_index) if CommonTerminalStateF: result.mount_to_acceptance_states(new_terminal_state_index, CancelStartAcceptanceStateF=False) return __consider_empty_state_machines(result, empty_state_machine_list)
def do(StateMachineList, CommonTerminalStateF=True, CloneF=True): """Connect state machines paralell. CommonTerminalStateF tells whether the state machines shall trigger to a common terminal. This may help nfa-to-dfa or hopcroft minimization for ISOLATED patterns. A state machine that consists of the COMBINATION of patterns MUST set this flag to 'False'. CloneF Controls if state machine list is cloned or not. If the single state machines are no longer required after construction, the CloneF can be set to False. If Cloning is disabled the state machines themselves will be altered--which brings some advantage in speed. """ assert type(StateMachineList) == list assert len(StateMachineList) != 0 for x in StateMachineList: assert isinstance(x, StateMachine), x.__class__.__name__ # filter out empty state machines from the consideration state_machine_list = [ sm for sm in StateMachineList if not (sm.is_empty() or special.is_none(sm))] empty_state_machine_list = [ sm for sm in StateMachineList if (sm.is_empty() or special.is_none(sm))] if len(state_machine_list) < 2: if len(state_machine_list) < 1: result = StateMachine() elif CloneF: result = state_machine_list[0].clone() else: result = state_machine_list[0] return __consider_empty_state_machines(result, empty_state_machine_list) # (*) need to clone the state machines, i.e. provide their internal # states with new ids, but the 'behavior' remains. This allows # state machines to appear twice, or being used in 'larger' # conglomerates. if CloneF: clone_list = map(lambda sm: sm.clone(), state_machine_list) else: clone_list = state_machine_list # (*) collect all transitions from both state machines into a single one # (clone to ensure unique identifiers of states) new_init_state = State.new_merged_core_state((clone.get_init_state() for clone in clone_list), ClearF=True) result = StateMachine(InitState=new_init_state) for clone in clone_list: result.states.update(clone.states) # (*) add additional **init** and **end** state # NOTE: when the result state machine was created, it already contains a # new initial state index. thus at this point only the new terminal # state has to be created. # NOTE: it is essential that the acceptance flag stays False, at this # point in time, so that the mounting operations only happen on # the old acceptance states. Later the acceptance state is raised # to 'accepted' (see below) new_terminal_state_index = -1L if CommonTerminalStateF: new_terminal_state_index = index.get() result.states[new_terminal_state_index] = \ State.new_merged_core_state(result.get_acceptance_state_list(), \ ClearF=True) # (*) Connect from the new initial state to the initial states of the # clones via epsilon transition. # Connect from each success state of the clones to the new end state # via epsilon transition. for clone in clone_list: result.mount_to_initial_state(clone.init_state_index) if CommonTerminalStateF: result.mount_to_acceptance_states(new_terminal_state_index, CancelStartAcceptanceStateF=False) return __consider_empty_state_machines(result, empty_state_machine_list)
def do(SM_List): """Intersection: Only match on patterns which are matched by all state machines in 'SM_List'. (C) 2013 Frank-Rene Schaefer ________________________________________________________________________ A lexeme which matches all patterns must reach an acceptance in each given state machine. That is, For each state machine; there is a path from the init state to an acceptance state triggered along the by the characters of the lexeme. We cannot go forward, since we cannot omit a path upon non-fit. Now, consider the super-state consisting of all acceptance states of all state machines. There there must be a way backward from the super-acceptance-state to the init state states. As soon, as a path is interupted, it can be thrown away. This can be achieved by reversed state machines which are combined into a single one. Reverse all state machines; The epsilon closure of the init state corresponds to the super acceptance state. The transitions in the super-state machine correspond to the way backwards in the state machine. For each feasible state in the super-state machine create a new state. The acceptance state of the reversed state machines correspond to the init state of the original state machines. If the super state contains an acceptance state of the original state, it can become an acceptance state of the intersection, because we now found a path. The found state machine must be reversed at the end. """ for sm in SM_List: if special.is_none(sm): # If one state machine is '\None' return special.get_none() # then, the intersection is '\None' reverse_sm_list = [ reverse.do(sm) for sm in SM_List ] state_id_set_list = [ set(sm.states.iterkeys()) for sm in reverse_sm_list ] acceptance_state_id_list = [ set(sm.get_acceptance_state_index_list()) for sm in reverse_sm_list ] def has_one_from_each(StateIDSet_List, StateIDSet): """StateIDSet_List[i] is the set of state indices from state machine 'i' in 'reverse_sm_list'. RETURNS: True -- If the StateIDSet has at least one state from every state machine. False -- If there is at least one state machine that has no state in 'StateIDSet'. """ for state_id_set in StateIDSet_List: if state_id_set.isdisjoint(StateIDSet): return False return True def get_merged_state(AcceptanceStateIndexList, EpsilonClosure): """Create the new target state in the state machine Accept only if all accept. """ acceptance_f = has_one_from_each(AcceptanceStateIndexList, EpsilonClosure) return State(AcceptanceF=acceptance_f) # Plain merge of all states of all state machines with an # epsilon transition from the init state to all init states # of the reverse_sm sm = StateMachine() for rsm in reverse_sm_list: sm.states.update(rsm.states) sm.add_epsilon_transition(sm.init_state_index, rsm.init_state_index) initial_state_epsilon_closure = sm.get_epsilon_closure(sm.init_state_index) InitState = get_merged_state(acceptance_state_id_list, initial_state_epsilon_closure) result = StateMachine(InitStateIndex=index.get(), InitState=InitState) # (*) prepare the initial worklist worklist = [ ( result.init_state_index, initial_state_epsilon_closure) ] epsilon_closure_db = sm.get_epsilon_closure_db() while len(worklist) != 0: # 'start_state_index' is the index of an **existing** state in the state machine. # It was either created above, in StateMachine's constructor, or as a target # state index. start_state_index, start_state_combination = worklist.pop() # (*) compute the elementary trigger sets together with the # epsilon closure of target state combinations that they trigger to. # In other words: find the ranges of characters where the state triggers to # a unique state combination. E.g: # Range Target State Combination # [0:23] --> [ State1, State2, State10 ] # [24:60] --> [ State1 ] # [61:123] --> [ State2, State10 ] # elementary_trigger_set_infos = sm.get_elementary_trigger_sets(start_state_combination, epsilon_closure_db) ## DEBUG_print(start_state_combination, elementary_trigger_set_infos) # (*) loop over all elementary trigger sets for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems(): # -- if there is no trigger to the given target state combination, then drop it if trigger_set.is_empty(): continue elif not has_one_from_each(state_id_set_list, epsilon_closure_of_target_state_combination): continue # -- add a new target state representing the state combination # (if this did not happen yet) target_state_index = \ map_state_combination_to_index(epsilon_closure_of_target_state_combination) # -- if target state combination was not considered yet, then create # a new state in the state machine if not result.states.has_key(target_state_index): result.states[target_state_index] = get_merged_state(acceptance_state_id_list, epsilon_closure_of_target_state_combination) worklist.append((target_state_index, epsilon_closure_of_target_state_combination)) # -- add the transition 'start state to target state' result.add_transition(start_state_index, trigger_set, target_state_index) if not result.has_acceptance_states(): return StateMachine() else: return beautifier.do(reverse.do(result))