def get_all(): """RETURNS: A state machine that 'eats' absolutely everything, i.e. .--- \Any ---. | | (0)--- \Any --->(( 0 ))<--------' """ result = StateMachine() i = index.get() state = State(AcceptanceF=True) state.add_transition(NumberSet(Interval(-sys.maxint, sys.maxint)), i) result.states[i] = state result.get_init_state().add_transition(NumberSet(Interval(-sys.maxint, sys.maxint)), i) return result
def get_all(): """RETURNS: A state machine that 'eats' absolutely everything, i.e. .--- \Any ---. | | (0)--- \Any --->(( 0 ))<--------' """ result = StateMachine() i = index.get() state = State(AcceptanceF=True) state.add_transition(NumberSet_All(), i) result.states[i] = state result.get_init_state().add_transition(NumberSet_All(), i) return result
def _do(the_state_machine, post_context_sm, EndOfLinePostContextF, SourceReference): """Appends a post context to the given state machine and changes state infos as required. NOTE: In case that: post_context_sm is not None or EndOfLinePostContextF The function appends something to the state machine and it is therefore required to pass 'NFA to DFA'--better also Hopcroft Minimization. ________________________________________________________________________ This process is very similar to sequentialization. There is a major difference, though: Given a state machine (e.g. a pattern) X with a post context Y, a match is only valid if X is followed by Y. Let Xn be an acceptance state of X and Ym an acceptance state of Y: ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym)) store acceptance input position That is, it holds: -- The next input position is stored the position of Xn, even though it is 'officially' not an acceptance state. -- Ym will be an acceptance state, but it will not store the input position! The analysis of the next pattern will start at the position where X stopped, even though Ym is required to state acceptance. """ if post_context_sm is None and EndOfLinePostContextF == False: return the_state_machine, None # State machines with no states are senseless here. assert not the_state_machine.is_empty(), \ "empty state machine can have no post context." assert post_context_sm is None or not post_context_sm.is_empty(), \ "empty state machine cannot be a post-context." # State machines involved with post condition building are part of a pattern, # but not configured out of multiple patterns. Thus there should be no origins. assert the_state_machine.has_origins() == False assert post_context_sm is None or not post_context_sm.has_origins() for state in the_state_machine.get_acceptance_state_list(): for origin in state.origins(): assert origin.pre_context_id() == E_PreContextIDs.NONE, \ "Post Contexts MUST be mounted BEFORE pre-contexts." if post_context_sm is None: assert EndOfLinePostContextF # Generate a new post context that just contains the 'newline' post_context_sm = StateMachine(AcceptanceF=True) post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f) elif EndOfLinePostContextF: # Mount 'newline' to existing post context post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f) # A post context with an initial state that is acceptance is not really a # 'context' since it accepts anything. The state machine remains un-post context. if post_context_sm.get_init_state().is_acceptance(): error_msg("Post context accepts anything--replaced by no post context.", SourceReference, DontExitF=True) return the_state_machine, None # (*) Two ways of handling post-contexts: # # -- Seldom Exception: # Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the # core pattern after the end of the post context # has been reached. # if ambiguous_post_context.detect_forward(the_state_machine, post_context_sm): if ambiguous_post_context.detect_backward(the_state_machine, post_context_sm): # -- for post contexts that are forward and backward ambiguous # a philosophical cut is necessary. error_msg("Post context requires philosophical cut--handle with care!\n" "Proposal: Isolate pattern and ensure results are as expected!", SourceReference, DontExitF=True) post_context_sm = ambiguous_post_context.philosophical_cut(the_state_machine, post_context_sm) # NOTE: May be, the_state_machine does contain now an epsilon transition. See # comment at entry of this function. bipd_sm_to_be_inverted = ambiguous_post_context.mount(the_state_machine, post_context_sm) the_state_machine = beautifier.do(the_state_machine) return the_state_machine, bipd_sm_to_be_inverted # -- The 'normal' way: storing the input position at the end of the core # pattern. # # (*) Need to clone the state machines, i.e. provide their internal # states with new ids, but the 'behavior' remains. This allows # state machines to appear twice, or being used in 'larger' # conglomerates. post_clone = post_context_sm.clone() # -- Once an acceptance state is reached no further analysis is necessary. ## NO: acceptance_pruning.do(post_clone) ## BECAUSE: it may have to compete with a pseudo-ambiguous post context # (*) collect all transitions from both state machines into a single one # # NOTE: The start index is unique. Therefore, one can assume that each # clone_list '.states' dictionary has different keys. One can simply # take over all transitions of a start index into the result without # considering interferences (see below) # orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list() # -- mount on every acceptance state the initial state of the following state # machine via epsilon transition the_state_machine.mount_to_acceptance_states(post_clone.init_state_index, CancelStartAcceptanceStateF=True) for start_state_index, state in post_clone.states.iteritems(): the_state_machine.states[start_state_index] = state # states are already cloned # -- raise at each old acceptance state the 'store input position flag' # -- set the post context flag for all acceptance states for state_idx in orig_acceptance_state_id_list: state = the_state_machine.states[state_idx] state.set_input_position_store_f(True) # -- no acceptance state shall store the input position # -- set the post context flag for all acceptance states for state in the_state_machine.get_acceptance_state_list(): state.set_input_position_store_f(False) state.set_input_position_restore_f(True) # No input position backward search required return beautifier.do(the_state_machine), None
class WalkAlong(TreeWalker): def __init__(self, SM_A, SM_B, result=None): self.original = SM_A self.admissible = SM_B if result is None: init_state_index = index.map_state_combination_to_index((SM_A.init_state_index, SM_B.init_state_index)) state = self.get_state_core(SM_A.init_state_index) self.result = StateMachine(InitStateIndex = init_state_index, InitState = state) else: self.result = result self.path = [] # Use 'operation_index' to get a unique index that allows to indicate # that 'SM_B' is no longer involved. Also, it ensures that the # generated state indices from (a_state_index, operation_index) are # unique. self.operation_index = index.get() TreeWalker.__init__(self) def on_enter(self, Args): a_state_index, b_state_index, trigger_set = Args assert b_state_index != self.operation_index if self.is_on_path(Args): return None self.path.append((a_state_index, b_state_index, trigger_set)) a_tm = self.original.states[a_state_index].target_map.get_map() if self.original.states[a_state_index].is_acceptance(): # SM_A has reached a terminal if self.admissible.states[b_state_index].is_acceptance(): # SM_B cuts the path until the terminal. pass else: self.integrate_path_in_result() if len(a_tm) == 0: return None # No further path to walk along b_tm = self.admissible.states[b_state_index].target_map.get_map() #print "#loop:START", a_tm sub_node_list = [] for a_ti, a_trigger_set in a_tm.iteritems(): remainder = a_trigger_set.clone() #print "#a_trigger_set: %s" % a_trigger_set.get_utf8_string() for b_ti, b_trigger_set in b_tm.iteritems(): intersection = a_trigger_set.intersection(b_trigger_set) if intersection.is_empty(): continue #print "#intersection:", intersection.get_utf8_string() sub_node_list.append((a_ti, b_ti, intersection)) remainder.subtract(intersection) #print "#remainder: '%s'" % remainder.get_utf8_string() if not remainder.is_empty(): #print "#B" # SM_B is not involved --> b_ti = self.operation_index self.path.append((a_ti, self.operation_index, remainder)) #print "#result0:", self.result.get_string(NormalizeF=False) self.integrate_path_in_result() self.path.pop() #print "#result1:", self.result.get_string(NormalizeF=False) self.result.mount_cloned_subsequent_states(self.original, a_ti, self.operation_index) #print "#result2:", self.result.get_string(NormalizeF=False) #print "#loop:END", sub_node_list return sub_node_list def on_finished(self, Node): self.path.pop() def is_on_path(self, Args): a_state_index, b_state_index, dummy = Args for ai, bi, dummy in self.path: if ai == a_state_index and bi == b_state_index: return True return False def integrate_path_in_result(self): #print "#integrate_path_in_result:" #for i, x in enumerate(self.path): # try: #print "# [%i] %s, %s, %s" % (i, x[0], x[1], x[2].get_string(Option="utf8")) # except: #print "# [%i] %s" % (i, x) for k, info in r_enumerate(self.path): dummy, bi, dummy = info if bi != self.operation_index and self.admissible.states[bi].is_acceptance(): first_remainder_k = k + 1 # (ai, bi) is cut; next state is good break else: first_remainder_k = 1 if first_remainder_k == len(self.path): # The last element of the path is an acceptance in SM_B, thus it is cut too. return # Nothing left. #print "#first_remainder_k:", first_remainder_k ai, bi, trigger_set = self.path[first_remainder_k] #print "#ai, bi:", ai, bi state_index, state = self.get_state(ai, bi) if state_index != self.result.init_state_index: ##print "#(%s, %s) %s -- epsilon --> %s" % (ai, bi, self.result.init_state_index, state_index) self.result.get_init_state().target_map.add_transition(trigger_set, state_index) #print "#state.target_map:", state.target_map.get_map() #old_ti = state_index for ai, bi, trigger_set in islice(self.path, first_remainder_k+1, None): target_index, target_state = self.get_state(ai, bi) state.add_transition(trigger_set, target_index) #print "# %i -- %s --> %s" % (old_ti, trigger_set.get_utf8_string(), target_index) state = target_state #old_ti = target_index return def get_state_core(self, AStateIndex): acceptance_f = self.original.states[AStateIndex].is_acceptance() return State(AcceptanceF=acceptance_f) def get_state(self, a_state_index, b_state_index): state_index = index.map_state_combination_to_index((a_state_index, b_state_index)) state = self.result.states.get(state_index) if state is None: state = self.get_state_core(a_state_index) self.result.states[state_index] = state #print "#enter:", state_index return state_index, state
def do(the_state_machine, post_context_sm, EndOfLinePostContextF, fh=-1): """Appends a post context to the given state machine and changes state infos as required. NOTE: In case that: post_context_sm is not None or EndOfLinePostContextF The function appends something to the state machine and it is therefore required to pass 'NFA to DFA'--better also Hopcroft Minimization. ________________________________________________________________________ This process is very similar to sequentialization. There is a major difference, though: Given a state machine (e.g. a pattern) X with a post context Y, a match is only valid if X is followed by Y. Let Xn be an acceptance state of X and Ym an acceptance state of Y: ---(Xn-1)---->(Xn)---->(Y0)----> ... ---->((Ym)) store acceptance input position That is, it holds: -- The next input position is stored the position of Xn, even though it is 'officially' not an acceptance state. -- Ym will be an acceptance state, but it will not store the input position! The analysis of the next pattern will start at the position where X stopped, even though Ym is required to state acceptance. """ # State machines with no states are senseless here. assert not the_state_machine.is_empty(), \ "empty state machine can have no post context." assert post_context_sm is None or not post_context_sm.is_empty(), \ "empty state machine cannot be a post-context." # State machines involved with post condition building are part of a pattern, # but not configured out of multiple patterns. Thus there should be no origins. assert the_state_machine.has_origins() == False assert post_context_sm is None or not post_context_sm.has_origins() for state in the_state_machine.get_acceptance_state_list(): for origin in state.origins(): assert origin.pre_context_id() == E_PreContextIDs.NONE, \ "Post Contexts MUST be mounted BEFORE pre-contexts." if post_context_sm is None: if not EndOfLinePostContextF: return the_state_machine, None # Generate a new post context that just contains the 'newline' post_context_sm = StateMachine(AcceptanceF=True) post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f) elif EndOfLinePostContextF: # Mount 'newline' to existing post context post_context_sm.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f) # A post context with an initial state that is acceptance is not really a # 'context' since it accepts anything. The state machine remains un-post context. if post_context_sm.get_init_state().is_acceptance(): error_msg("Post context accepts anything---replaced by no post context.", fh, DontExitF=True) return the_state_machine, None # (*) Two ways of handling post-contexts: # # -- Seldom Exception: # Pseudo-Ambiguous Post Conditions (x+/x) -- detecting the end of the # core pattern after the end of the post context # has been reached. # if ambiguous_post_context.detect_forward(the_state_machine, post_context_sm): if ambiguous_post_context.detect_backward(the_state_machine, post_context_sm): # -- for post contexts that are forward and backward ambiguous # a philosophical cut is necessary. error_msg("Post context requires philosophical cut--handle with care!\n" "Proposal: Isolate pattern and ensure results are as expected!", fh, DontExitF=True) post_context_sm = ambiguous_post_context.philosophical_cut(the_state_machine, post_context_sm) # NOTE: May be, the_state_machine does contain now an epsilon transition. See # comment at entry of this function. ipsb_sm = ambiguous_post_context.mount(the_state_machine, post_context_sm) the_state_machine = beautifier.do(the_state_machine) ipsb_sm = beautifier.do(ipsb_sm) return the_state_machine, ipsb_sm # -- The 'normal' way: storing the input position at the end of the core # pattern. # # (*) Need to clone the state machines, i.e. provide their internal # states with new ids, but the 'behavior' remains. This allows # state machines to appear twice, or being used in 'larger' # conglomerates. post_clone = post_context_sm.clone() # -- Once an acceptance state is reached no further analysis is necessary. ## NO: acceptance_pruning.do(post_clone) ## BECAUSE: it may have to compete with a pseudo-ambiguous post context # (*) collect all transitions from both state machines into a single one # # NOTE: The start index is unique. Therefore, one can assume that each # clone_list '.states' dictionary has different keys. One can simply # take over all transitions of a start index into the result without # considering interferences (see below) # orig_acceptance_state_id_list = the_state_machine.get_acceptance_state_index_list() # -- mount on every acceptance state the initial state of the following state # machine via epsilon transition the_state_machine.mount_to_acceptance_states(post_clone.init_state_index, CancelStartAcceptanceStateF=True) for start_state_index, state in post_clone.states.iteritems(): the_state_machine.states[start_state_index] = state # states are already cloned # -- raise at each old acceptance state the 'store input position flag' # -- set the post context flag for all acceptance states for state_idx in orig_acceptance_state_id_list: state = the_state_machine.states[state_idx] state.set_input_position_store_f(True) # -- no acceptance state shall store the input position # -- set the post context flag for all acceptance states for state in the_state_machine.get_acceptance_state_list(): state.set_input_position_store_f(False) state.set_input_position_restore_f(True) # No input position backward search required the_state_machine = nfa_to_dfa.do(the_state_machine) hopcroft.do(the_state_machine, CreateNewStateMachineF=False) return the_state_machine, None
class WalkAlong(TreeWalker): def __init__(self, SM_A, SM_B, result=None): self.original = SM_A self.admissible = SM_B if result is None: init_state_index = index.map_state_combination_to_index( (SM_A.init_state_index, SM_B.init_state_index)) state = self.get_state_core(SM_A.init_state_index) self.result = StateMachine(InitStateIndex=init_state_index, InitState=state) else: self.result = result self.path = [] # Use 'operation_index' to get a unique index that allows to indicate # that 'SM_B' is no longer involved. Also, it ensures that the # generated state indices from (a_state_index, operation_index) are # unique. self.operation_index = index.get() TreeWalker.__init__(self) def on_enter(self, Args): a_state_index, b_state_index, trigger_set = Args assert b_state_index != self.operation_index if self.is_on_path(Args): return None self.path.append((a_state_index, b_state_index, trigger_set)) a_tm = self.original.states[a_state_index].target_map.get_map() if self.original.states[a_state_index].is_acceptance(): # SM_A has reached a terminal if self.admissible.states[b_state_index].is_acceptance(): # SM_B cuts the path until the terminal. pass else: self.integrate_path_in_result() if len(a_tm) == 0: return None # No further path to walk along b_tm = self.admissible.states[b_state_index].target_map.get_map() #print "#loop:START", a_tm sub_node_list = [] for a_ti, a_trigger_set in a_tm.iteritems(): remainder = a_trigger_set.clone() #print "#a_trigger_set: %s" % a_trigger_set.get_utf8_string() for b_ti, b_trigger_set in b_tm.iteritems(): intersection = a_trigger_set.intersection(b_trigger_set) if intersection.is_empty(): continue #print "#intersection:", intersection.get_utf8_string() sub_node_list.append((a_ti, b_ti, intersection)) remainder.subtract(intersection) #print "#remainder: '%s'" % remainder.get_utf8_string() if not remainder.is_empty(): #print "#B" # SM_B is not involved --> b_ti = self.operation_index self.path.append((a_ti, self.operation_index, remainder)) #print "#result0:", self.result.get_string(NormalizeF=False) self.integrate_path_in_result() self.path.pop() #print "#result1:", self.result.get_string(NormalizeF=False) self.result.mount_cloned_subsequent_states( self.original, a_ti, self.operation_index) #print "#result2:", self.result.get_string(NormalizeF=False) #print "#loop:END", sub_node_list return sub_node_list def on_finished(self, Node): self.path.pop() def is_on_path(self, Args): a_state_index, b_state_index, dummy = Args for ai, bi, dummy in self.path: if ai == a_state_index and bi == b_state_index: return True return False def integrate_path_in_result(self): #print "#integrate_path_in_result:" #for i, x in enumerate(self.path): # try: #print "# [%i] %s, %s, %s" % (i, x[0], x[1], x[2].get_string(Option="utf8")) # except: #print "# [%i] %s" % (i, x) for k, info in r_enumerate(self.path): dummy, bi, dummy = info if bi != self.operation_index and self.admissible.states[ bi].is_acceptance(): first_remainder_k = k + 1 # (ai, bi) is cut; next state is good break else: first_remainder_k = 1 if first_remainder_k == len(self.path): # The last element of the path is an acceptance in SM_B, thus it is cut too. return # Nothing left. #print "#first_remainder_k:", first_remainder_k ai, bi, trigger_set = self.path[first_remainder_k] #print "#ai, bi:", ai, bi state_index, state = self.get_state(ai, bi) if state_index != self.result.init_state_index: ##print "#(%s, %s) %s -- epsilon --> %s" % (ai, bi, self.result.init_state_index, state_index) self.result.get_init_state().target_map.add_transition( trigger_set, state_index) #print "#state.target_map:", state.target_map.get_map() #old_ti = state_index for ai, bi, trigger_set in islice(self.path, first_remainder_k + 1, None): target_index, target_state = self.get_state(ai, bi) state.add_transition(trigger_set, target_index) #print "# %i -- %s --> %s" % (old_ti, trigger_set.get_utf8_string(), target_index) state = target_state #old_ti = target_index return def get_state_core(self, AStateIndex): acceptance_f = self.original.states[AStateIndex].is_acceptance() return State(AcceptanceF=acceptance_f) def get_state(self, a_state_index, b_state_index): state_index = index.map_state_combination_to_index( (a_state_index, b_state_index)) state = self.result.states.get(state_index) if state is None: state = self.get_state_core(a_state_index) self.result.states[state_index] = state #print "#enter:", state_index return state_index, state