def do(SM_A, SM_B): """Find a state machine that stops right before the state machine 'SM_B'. If there is a lexeme 'l' (lowercase L) in SM_A: l = [x0, x1, ... xj, xk, ... xN ] and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall only match '[x0, x1, ... xj]'. All lexemes 'l' translate into lexemes 's' in reverse(SM_A): s = [xN, ... xk, xj, ... x1, x0 ] lexemes in SM_B translate into reverse(SM_B) as t = [xN, ... xk] The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers u = [ xj, ... x1, x0 ] Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers u = [ x0, x1, ... xj ] as desired for all lexemes in SM_A that end with something that matches SM_B. (C) Frank-Rene Schaefer """ Ar = beautifier.do(reverse.do(SM_A)) Br = beautifier.do(reverse.do(SM_B)) cut_Ar_Br = complement_begin.do(Ar, Br) return reverse.do(cut_Ar_Br)
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}).finalize(None) # During 'finalize()': pattern.transform(Setup.buffer_encoding) # During 'finalize()': pattern.mount_post_context_sm() # During 'finalize()': pattern.mount_pre_context_sm() print "pattern\n" assert pattern.sm.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine(pattern.sm) sm = beautifier.do(pattern.sm) print sm.get_string(NormalizeF=True, Option="hex") if pattern.sm_pre_context_to_be_reversed: assert pattern.sm_pre_context_to_be_reversed.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine( pattern.sm_pre_context_to_be_reversed) reversed_sm = reverse.do(sm) print "pre-context = ", reversed_sm.get_string(NormalizeF=True, Option="hex") if pattern.sm_bipd_to_be_reversed: assert pattern.sm_bipd_to_be_reversed.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine( pattern.sm_bipd_to_be_reversed) sm = reverse.do(sm) print "post-context backward input position detector = ", sm.get_string( NormalizeF=True, Option="hex")
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF): """Sets up a pre-condition to the given state machine. This process is entirely different from any sequentializing or parallelization of state machines. Here, the state machine representing the pre- condition is **not** webbed into the original state machine! Instead, the following happens: -- the pre-condition state machine is inverted, because it is to be walked through backwards. -- the inverted state machine is marked with the state machine id of the_state_machine. -- the original state machine will refer to the inverse state machine of the pre-condition. -- the initial state origins and the origins of the acceptance states are marked as 'pre-conditioned' indicating the id of the inverted state machine of the pre-condition. """ #___________________________________________________________________________________________ # (*) do some consistency checking # -- state machines with no states are senseless here. assert not the_state_machine.is_empty() assert pre_context_sm is None or not pre_context_sm.is_empty() # -- trivial pre-conditions should be added last, for simplicity #___________________________________________________________________________________________ if pre_context_sm is None: # NOT: 'and ...' ! if BeginOfLinePreContextF: # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE) return None # (*) Reverse the state machine of the pre-condition reverse_pre_context = reverse.do(pre_context_sm) if BeginOfLinePreContextF: # Extend the existing pre-context with a preceeding 'begin-of-line'. reverse_newline_sm = reverse.do(StateMachine_Newline()) reverse_pre_context = sequentialize.do([reverse_pre_context, reverse_newline_sm]) # (*) Once an acceptance state is reached no further analysis is necessary. acceptance_pruning.do(reverse_pre_context) # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF') # AFTER acceptance_pruning (!) reverse_pre_context = beautifier.do(reverse_pre_context) # (*) let the state machine refer to it # [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>] pre_context_sm_id = reverse_pre_context.get_id() # (*) Associate acceptance with pre-context id. for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(pre_context_sm_id) return reverse_pre_context
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}).finalize(None) print "pattern\n" print pattern.sm print "pre-context = ", reverse.do(pattern.sm_pre_context_to_be_reversed)
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF): """Sets up a pre-condition to the given state machine. This process is entirely different from any sequentializing or parallelization of state machines. Here, the state machine representing the pre- condition is **not** webbed into the original state machine! Instead, the following happens: -- the pre-condition state machine is inverted, because it is to be walked through backwards. -- the inverted state machine is marked with the state machine id of the_state_machine. -- the original state machine will refer to the inverse state machine of the pre-condition. -- the initial state origins and the origins of the acceptance states are marked as 'pre-conditioned' indicating the id of the inverted state machine of the pre-condition. """ #___________________________________________________________________________________________ # (*) do some consistency checking # -- state machines with no states are senseless here. assert not the_state_machine.is_empty() assert pre_context_sm is None or not pre_context_sm.is_empty() # -- trivial pre-conditions should be added last, for simplicity #___________________________________________________________________________________________ if pre_context_sm is None: if BeginOfLinePreContextF: # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE) return None # (*) Reverse the state machine of the pre-condition inverse_pre_context = reverse.do(pre_context_sm) if BeginOfLinePreContextF: # Extend the existing pre-context with a preceeding 'begin-of-line'. inverse_pre_context.mount_newline_to_acceptance_states( Setup.dos_carriage_return_newline_f, InverseF=True) # (*) Once an acceptance state is reached no further analysis is necessary. acceptance_pruning.do(inverse_pre_context) # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF') # AFTER acceptance_pruning (!) inverse_pre_context = beautifier.do(inverse_pre_context) # (*) let the state machine refer to it # [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>] pre_context_sm_id = inverse_pre_context.get_id() # (*) create origin data, in case where there is none yet create new one. # (do not delete, otherwise existing information gets lost) for state in the_state_machine.states.itervalues(): if not state.is_acceptance(): continue state.set_pre_context_id(pre_context_sm_id) return inverse_pre_context
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF): """Sets up a pre-condition to the given state machine. This process is entirely different from any sequentializing or parallelization of state machines. Here, the state machine representing the pre- condition is **not** webbed into the original state machine! Instead, the following happens: -- the pre-condition state machine is inverted, because it is to be walked through backwards. -- the inverted state machine is marked with the state machine id of the_state_machine. -- the original state machine will refer to the inverse state machine of the pre-condition. -- the initial state origins and the origins of the acceptance states are marked as 'pre-conditioned' indicating the id of the inverted state machine of the pre-condition. """ #___________________________________________________________________________________________ # (*) do some consistency checking # -- state machines with no states are senseless here. assert not the_state_machine.is_empty() assert pre_context_sm is None or not pre_context_sm.is_empty() # -- trivial pre-conditions should be added last, for simplicity #___________________________________________________________________________________________ if pre_context_sm is None: if BeginOfLinePreContextF: # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE) return None # (*) Reverse the state machine of the pre-condition inverse_pre_context = reverse.do(pre_context_sm) if BeginOfLinePreContextF: # Extend the existing pre-context with a preceeding 'begin-of-line'. inverse_pre_context.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f, InverseF=True) # (*) Once an acceptance state is reached no further analysis is necessary. acceptance_pruning.do(inverse_pre_context) # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF') # AFTER acceptance_pruning (!) inverse_pre_context = beautifier.do(inverse_pre_context) # (*) let the state machine refer to it # [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>] pre_context_sm_id = inverse_pre_context.get_id() # (*) create origin data, in case where there is none yet create new one. # (do not delete, otherwise existing information gets lost) for state in the_state_machine.states.itervalues(): if not state.is_acceptance(): continue state.set_pre_context_id(pre_context_sm_id) return inverse_pre_context
def cut_end(A, B): """Cut End: Any lexeme that matches 'A' and ends with a lexeme matching 'B' is pruned by what matches 'B'. SCHEME: 'CutEnd yyyyyyyyyy' lexemes before lexemes after aaaaaaaaaaaxxxxxxxxxx ---> aaaaaaaaaaaxxxxxxxxxx bbbbbbbbbbbyyyyyyyyyy bbbbbbbbbbb """ Ar = reverse.do(A) Br = reverse.do(B) cut_Ar_Br, cut_f = __cut_begin_core(Ar, Br) if not cut_f: return A else: return reverse.do(cut_Ar_Br)
def test(sm, pre_sm): print "EXPRESSION = ", sm print "PRE-CONDITION = ", pre_sm pre_context_sm = setup_pre_context.do(sm, pre_sm, False, False) inverse_pre_context_sm = reverse.do(pre_context_sm) inverse_pre_context_sm.set_id(pre_context_sm.get_id()) # print "with pre-context = ", sm print "inverse pre-context = ", inverse_pre_context_sm
def do(self, Option="utf8"): """Prepare output in the 'dot' language, that graphviz uses.""" assert Option in ["utf8", "hex"] self.__do(combination.do(self.mode.core_sm_list), self.file_name_main, Option) if self.mode.pre_context_sm_to_be_reversed_list: sm_list = [ reverse.do(sm) for sm in self.mode.pre_context_sm_to_be_reversed_list ] sm = combination.do(sm_list, FilterDominatedOriginsF=False) self.__do(sm, self.file_name_pre_context, Option) if len(self.mode.bipd_sm_to_be_reversed_db) != 0: for sm in self.mode.bipd_sm_to_be_reversed_db.itervalues(): file_name = self.file_name_bipd_db[sm.get_id()] reversed_sm = reverse.do(sm) self.__do(reversed_sm, file_name, Option)
def detect_backward(CoreStateMachine, PostConditionStateMachine): """A 'backward ambiguity' denotes the case where it cannot be clearly be determined how far to go back from the end of a post-condition. NOTE: This does not mean that the post-condition is ambiguous. Many cases that are backward ambiguous can be handled by quex's normal post-condition handling. Examples: x/x+ is backward ambiguous because in a stream of 'x' one cannot determine with a pure state machine where to stop. This case, though can be handled by the normal post- condition implementation. x+/x+ is backward ambiguous and cannot be handled by the normal implementation. In fact, this specification does not allow any conclusions about the users intend where to reset the input after match. """ __assert_state_machines(CoreStateMachine, PostConditionStateMachine) my_post_context_sm = PostConditionStateMachine.clone() # (*) Create a modified version of the post condition, where the # initial state is an acceptance state, and no other. This # allows the detector to trigger on 'iteration'. # # -- delete all acceptance states in the post condition # for state in my_post_context_sm.states.values(): # state.set_acceptance(False) # -- set the initial state as acceptance state # my_post_context_sm.get_init_state().set_acceptance(True) my_core_sm = beautifier.do(reverse.do(CoreStateMachine)) tmp = deepcopy(PostConditionStateMachine) # no deeepcopy needed here, I guess <fschaef 11y11m01d> my_post_context_sm = beautifier.do(reverse.do(tmp)) return detect_forward(my_post_context_sm, my_core_sm)
def detect_backward(CoreStateMachine, PostConditionStateMachine): """A 'backward ambiguity' denotes the case where it cannot be clearly be determined how far to go back from the end of a post-condition. NOTE: This does not mean that the post-condition is ambiguous. Many cases that are backward ambiguous can be handled by quex's normal post-condition handling. Examples: x/x+ is backward ambiguous because in a stream of 'x' one cannot determine with a pure state machine where to stop. This case, though can be handled by the normal post- condition implementation. x+/x+ is backward ambiguous and cannot be handled by the normal implementation. In fact, this specification does not allow any conclusions about the users intend where to reset the input after match. """ __assert_state_machines(CoreStateMachine, PostConditionStateMachine) my_post_context_sm = PostConditionStateMachine.clone() # (*) Create a modified version of the post condition, where the # initial state is an acceptance state, and no other. This # allows the detector to trigger on 'iteration'. # # -- delete all acceptance states in the post condition # for state in my_post_context_sm.states.values(): # state.set_acceptance(False) # -- set the initial state as acceptance state # my_post_context_sm.get_init_state().set_acceptance(True) my_core_sm = reverse.do(CoreStateMachine) tmp = deepcopy(PostConditionStateMachine ) # no deeepcopy needed here, I guess <fschaef 11y11m01d> my_post_context_sm = reverse.do(tmp) return detect_forward(my_post_context_sm, my_core_sm)
def treat(sm, ReverseF): backup_id = sm.get_id() ok_f, sm = Setup.buffer_encoding.do_state_machine(sm) if not ok_f: error.warning("Pattern contains elements not found in engine codec '%s'.\n" % Setup.buffer_encoding.name \ + "(Buffer element size is %s [byte])" % Setup.lexatom.size_in_byte, sm.sr) if ReverseF: sm = reverse.do(sm, EnsureDFA_f=True) sm.set_id(backup_id) return sm
def test_core(TestString): print "___________________________________________________________________________" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}, AllowNothingIsNecessaryF=True).finalize(None) if pattern is None: print "pattern syntax error" else: print "pattern\n", print pattern.sm if pattern.sm_pre_context_to_be_reversed: reversed_sm = reverse.do(pattern.sm_pre_context_to_be_reversed) print "pre-context = ", reversed_sm print "begin of line = ", pattern.sm.has_acceptance_condition(E_AcceptanceCondition.BEGIN_OF_LINE)
def do(A, T): """Checks: (1) whether there are sequence that match 'T' and the end of 'A'. (2) whether there are sequences that do not match 'T' and still match the end of 'A'. RETURNS: [0] True, ALL ending sequences of 'A' match 'T'. False, else. [1] True, if there are some ending sequences of 'A' that match 'T'. False, else. """ Ar = reverse.do(A) Tr = reverse.do(T) # Does 'T' match AT LEAST ONE tail of 'A'? # == Are there paths in 'Tr' to their acceptance states that are # also present in 'Ar'? common_tail_exists_f = len(outrun.commonality(Tr, Ar)) != 0 # Are there tails in 'A' which are not covered by 'T'? # == Are there paths in 'Ar' which are not covered by paths in 'Tr'? uncommon_tail_exists_f = diversion(Ar, Tr) return common_tail_exists_f and not uncommon_tail_exists_f, \ common_tail_exists_f
def mount_post_context_sm(self): self.__sm, \ self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, self.__post_context_sm, self.__post_context_end_of_line_f, self.__sr) if self.__bipd_sm_to_be_inverted is None: return if self.__bipd_sm_to_be_inverted is not None \ and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted) self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
def test_core(TestString): print "___________________________________________________________________________" print "expression = \"" + TestString + "\"" Setup.dos_carriage_return_newline_f = True pattern_raw = core.do(TestString, {}) pattern = pattern_raw.finalize(None) if pattern is None: print "pattern syntax error" else: print "pattern\n", print pattern.sm if pattern.sm_pre_context_to_be_reversed: reversed_sm = reverse.do(pattern.sm_pre_context_to_be_reversed) print "pre-context =", reversed_sm print "begin of line = ", pattern.has_acceptance_condition( E_AcceptanceCondition.BEGIN_OF_LINE)
def test(Idx, sm_pre, sm, sm_post, BOL_F, EOL_F): ## if Idx != 5: return result = sm.clone() print "##-- %i -----------------------------------------------------------------------" % Idx if sm_pre is not None: print " -- pre-condition = True" else: print " -- pre-condition = False" if sm_post is not None: print " -- post-condition = True" else: print " -- post-condition = False" print " -- begin of line = ", BOL_F print " -- end of line = ", EOL_F ipsb_sm = setup_post_context.do(result, sm_post, EOL_F, False, SourceRef_VOID) pre_context_sm = setup_pre_context.do(result, sm_pre, BOL_F, False) if pre_context_sm is None: inverse_pre_context_sm = None else: inverse_pre_context_sm = reverse.do(pre_context_sm) inverse_pre_context_sm.set_id(pre_context_sm.get_id()) # # print "EXPRESSION = ", result # print "POST CONDITION = ", post_sm # print "APPENDED = ", result result = beautifier.do(result) # # print print "result sm.id = ", result.get_id() if inverse_pre_context_sm is not None: print "result pre sm.id = ", inverse_pre_context_sm.get_id() begin_of_line_f = None for state in result.get_acceptance_state_list(): BOF = state.single_entry.has_pre_context_begin_of_line() if begin_of_line_f is None: begin_of_line_f = BOF else: assert begin_of_line_f == BOF print "result = ", result if inverse_pre_context_sm is not None: print "inverse_pre_context_sm = ", inverse_pre_context_sm print "trivially pre-conditioned = ", begin_of_line_f
def do(SM_A, SM_B): Ar = beautifier.do(reverse.do(SM_A)) Br = beautifier.do(reverse.do(SM_B)) cut_Ar_Br = cut_begin.do(Ar, Br) return reverse.do(cut_Ar_Br)
print "Core Pattern:\n" + repr(x) return try: post_context_sm = regex.do(string_stream_PostCondition, {}).extract_sm() except RegularExpressionException, x: print "Post Condition Pattern:\n" + repr(x) return print "---------------------------------------------------------" print "core pattern =", RE_Core print "post condition pattern =", RE_PostCondition backward_search_sm = ambiguous_post_context.mount(core_sm, post_context_sm) backward_search_sm = reverse.do(backward_search_sm) # .mount() does not transformation from NFA to DFA core_sm = beautifier.do(core_sm) print "ambigous post condition =", core_sm print "backward detector =", backward_search_sm test('"xy"+', '((ab)+|xy)') test('"xz"+', '[a-z]{2}') test('"xyz"+', '"xyz"') test("(a)+", "ab") test("(.)+a", "(.)+") # test('"xz"+', '"xz"+')
except RegularExpressionException, x: print "Core Pattern:\n" + repr(x) return try: post_context_sm = regex.do(string_stream_PostCondition, {}).sm except RegularExpressionException, x: print "Post Condition Pattern:\n" + repr(x) return print "---------------------------------------------------------" print "core pattern =", RE_Core print "post condition pattern =", RE_PostCondition backward_search_sm = ambiguous_post_context.mount(core_sm, post_context_sm) backward_search_sm = beautifier.do(reverse.do(backward_search_sm)) # .mount() does not transformation from NFA to DFA core_sm = beautifier.do(core_sm) print "ambigous post condition =", core_sm print "backward detector =", backward_search_sm test('"xy"+', '((ab)+|xy)') test('"xz"+', '[a-z]{2}') test('"xyz"+', '"xyz"') test("(a)+", "ab") test("(.)+a", "(.)+") # test('"xz"+', '"xz"+')
def invert_this(sm): print "-------------------------------------------------------------------------------" print "sm = ", sm tmp = reverse.do(sm) print "inverse = ", tmp return tmp
def rev(A): return reverse.do(A) def uni(*A): return union.do(list(A))
def test(sm): print "-------------------------------------------------------------------------------" print "sm = ", sm sm = reverse.do(sm) print "inverse = ", sm
def snap_reverse(stream, PatternDict): result = snap_curly_bracketed_expression(stream, PatternDict, "reverse operator", "R")[0] return reverse.do(result)
def do(SM_List): """Intersection: Only match on patterns which are matched by all state machines in 'SM_List'. (C) 2013 Frank-Rene Schaefer ________________________________________________________________________ A lexeme which matches all patterns must reach an acceptance in each given state machine. That is, For each state machine; there is a path from the init state to an acceptance state triggered along the by the characters of the lexeme. We cannot go forward, since we cannot omit a path upon non-fit. Now, consider the super-state consisting of all acceptance states of all state machines. There there must be a way backward from the super-acceptance-state to the init state states. As soon, as a path is interupted, it can be thrown away. This can be achieved by reversed state machines which are combined into a single one. Reverse all state machines; The epsilon closure of the init state corresponds to the super acceptance state. The transitions in the super-state machine correspond to the way backwards in the state machine. For each feasible state in the super-state machine create a new state. The acceptance state of the reversed state machines correspond to the init state of the original state machines. If the super state contains an acceptance state of the original state, it can become an acceptance state of the intersection, because we now found a path. The found state machine must be reversed at the end. """ for sm in SM_List: if special.is_none(sm): # If one state machine is '\None' return special.get_none() # then, the intersection is '\None' reverse_sm_list = [ reverse.do(sm) for sm in SM_List ] state_id_set_list = [ set(sm.states.iterkeys()) for sm in reverse_sm_list ] acceptance_state_id_list = [ set(sm.get_acceptance_state_index_list()) for sm in reverse_sm_list ] def has_one_from_each(StateIDSet_List, StateIDSet): """StateIDSet_List[i] is the set of state indices from state machine 'i' in 'reverse_sm_list'. RETURNS: True -- If the StateIDSet has at least one state from every state machine. False -- If there is at least one state machine that has no state in 'StateIDSet'. """ for state_id_set in StateIDSet_List: if state_id_set.isdisjoint(StateIDSet): return False return True def get_merged_state(AcceptanceStateIndexList, EpsilonClosure): """Create the new target state in the state machine Accept only if all accept. """ acceptance_f = has_one_from_each(AcceptanceStateIndexList, EpsilonClosure) return State(AcceptanceF=acceptance_f) # Plain merge of all states of all state machines with an # epsilon transition from the init state to all init states # of the reverse_sm sm = StateMachine() for rsm in reverse_sm_list: sm.states.update(rsm.states) sm.add_epsilon_transition(sm.init_state_index, rsm.init_state_index) initial_state_epsilon_closure = sm.get_epsilon_closure(sm.init_state_index) InitState = get_merged_state(acceptance_state_id_list, initial_state_epsilon_closure) result = StateMachine(InitStateIndex=index.get(), InitState=InitState) # (*) prepare the initial worklist worklist = [ ( result.init_state_index, initial_state_epsilon_closure) ] epsilon_closure_db = sm.get_epsilon_closure_db() while len(worklist) != 0: # 'start_state_index' is the index of an **existing** state in the state machine. # It was either created above, in StateMachine's constructor, or as a target # state index. start_state_index, start_state_combination = worklist.pop() # (*) compute the elementary trigger sets together with the # epsilon closure of target state combinations that they trigger to. # In other words: find the ranges of characters where the state triggers to # a unique state combination. E.g: # Range Target State Combination # [0:23] --> [ State1, State2, State10 ] # [24:60] --> [ State1 ] # [61:123] --> [ State2, State10 ] # elementary_trigger_set_infos = sm.get_elementary_trigger_sets(start_state_combination, epsilon_closure_db) ## DEBUG_print(start_state_combination, elementary_trigger_set_infos) # (*) loop over all elementary trigger sets for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems(): # -- if there is no trigger to the given target state combination, then drop it if trigger_set.is_empty(): continue elif not has_one_from_each(state_id_set_list, epsilon_closure_of_target_state_combination): continue # -- add a new target state representing the state combination # (if this did not happen yet) target_state_index = \ map_state_combination_to_index(epsilon_closure_of_target_state_combination) # -- if target state combination was not considered yet, then create # a new state in the state machine if not result.states.has_key(target_state_index): result.states[target_state_index] = get_merged_state(acceptance_state_id_list, epsilon_closure_of_target_state_combination) worklist.append((target_state_index, epsilon_closure_of_target_state_combination)) # -- add the transition 'start state to target state' result.add_transition(start_state_index, trigger_set, target_state_index) if not result.has_acceptance_states(): return StateMachine() else: return beautifier.do(reverse.do(result))
def do(A, B): """RETURNS: True - if A == SUPERSET of B False - if not """ if isinstance(A, StateMachine): assert isinstance(B, StateMachine) return Checker(A, B).do() assert not isinstance(B, StateMachine) # (*) Core Pattern ________________________________________________________ # # (including the mounted post context, if there is one). # # NOTE: Post-conditions do not change anything, since they match only when # the whole lexeme has matched (from begin to end of post condition). # Post-conditions only tell something about the place where the # analyzer returns after the match. superset_f = Checker(A.sm, B.sm).do() if not superset_f: return False # NOW: For the core state machines it holds: # # 'core(A)' matches a super set of 'core(B)'. # # (*) Pre-Condition _______________________________________________________ # if not A.has_pre_context(): # core(A) is a superset of core(B). # A is not restricted. B may be (who cares). # => A can match more than B. return True # NOW: Acceptance of A is restricted by a pre-context. # if not B.has_pre_context(): # A is restricted by pre-context, B is not. # => B can match things that A cannot. return False # NOW: A is restricted by pre-context. # B is restricted by pre-context. # # For A to be a superset of B, A must be less or equally restricted than B. # # pre(B) is a superset of pre(A) # if B.pre_context_trivial_begin_of_line_f: if not A.pre_context_trivial_begin_of_line_f: # pre(A) can never be a subset of pre(B) return False else: # pre(A) = pre(B) which fulfills the condition return True # IMPORTANT: The pre-contexts must be mounted at this point! # Call to '.mount_pre_context_sm()' must preceed this function. assert A.pre_context_sm is not None assert B.pre_context_sm is not None # NOW: B is a 'real' pre-context not only a 'begin-of-line' # # Decision about "pre(A) is subset of pre(B)" done by Checker if not A.pre_context_trivial_begin_of_line_f: A_pre_sm = A.pre_context_sm else: # A contains only 'begin-of-line'. Note, however, that # -- newline definition may include '\r\n' so inversion is # required. A_pre_sm = reverse.do(StateMachine.from_sequence("\n")) ## NOT: -- at this point in time we are dealing with transformed ## machines. So this has also to be transformed. ## complete_f, A_pre_sm = transformation.do_state_machine(A_pre_sm) return Checker(B.pre_context_sm, A_pre_sm).do()
def rev(A): return reverse.do(A)
def do(A, B): """RETURNS: True - if A == SUPERSET of B False - if not """ if isinstance(A, StateMachine): assert isinstance(B, StateMachine) return Checker(A, B).do() assert not isinstance(B, StateMachine) # (*) Core Pattern ________________________________________________________ # # (including the mounted post context, if there is one). # # NOTE: Post-conditions do not change anything, since they match only when # the whole lexeme has matched (from begin to end of post condition). # Post-conditions only tell something about the place where the # analyzer returns after the match. superset_f = Checker(A.sm, B.sm).do() if not superset_f: return False # NOW: For the core state machines it holds: # # 'core(A)' matches a super set of 'core(B)'. # # (*) Pre-Condition _______________________________________________________ # if not A.has_pre_context(): # core(A) is a superset of core(B). # A is not restricted. B may be (who cares). # => A can match more than B. return True # NOW: Acceptance of A is restricted by a pre-context. # if not B.has_pre_context(): # A is restricted by pre-context, B is not. # => B can match things that A cannot. return False # NOW: A is restricted by pre-context. # B is restricted by pre-context. # # For A to be a superset of B, A must be less or equally restricted than B. # # pre(B) is a superset of pre(A) # if B.pre_context_trivial_begin_of_line_f: if not A.pre_context_trivial_begin_of_line_f: # pre(A) can never be a subset of pre(B) return False else: # pre(A) = pre(B) which fulfills the condition return True # IMPORTANT: The pre-contexts must be mounted at this point! # Call to '.mount_pre_context_sm()' must preceed this function. assert A.pre_context_sm is not None assert B.pre_context_sm is not None # NOW: B is a 'real' pre-context not only a 'begin-of-line' # # Decision about "pre(A) is subset of pre(B)" done by Checker if not A.pre_context_trivial_begin_of_line_f: A_pre_sm = A.pre_context_sm else: # A contains only 'begin-of-line'. Note, however, that # -- newline definition may include '\r\n' so inversion is # required. A_pre_sm = reverse.do(StateMachine.from_sequence("\n")) ## NOT: -- at this point in time we are dealing with transformed ## machines. So this has also to be transformed. ## complete_f, A_pre_sm = Setup.buffer_codec.do_state_machine(A_pre_sm) return Checker(B.pre_context_sm, A_pre_sm).do()