def do(SM_A, SM_B): """Find a state machine that stops right before the state machine 'SM_B'. If there is a lexeme 'l' (lowercase L) in SM_A: l = [x0, x1, ... xj, xk, ... xN ] and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall only match '[x0, x1, ... xj]'. All lexemes 'l' translate into lexemes 's' in reverse(SM_A): s = [xN, ... xk, xj, ... x1, x0 ] lexemes in SM_B translate into reverse(SM_B) as t = [xN, ... xk] The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers u = [ xj, ... x1, x0 ] Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers u = [ x0, x1, ... xj ] as desired for all lexemes in SM_A that end with something that matches SM_B. (C) Frank-Rene Schaefer """ Ar = beautifier.do(reverse.do(SM_A)) Br = beautifier.do(reverse.do(SM_B)) cut_Ar_Br = complement_begin.do(Ar, Br) return reverse.do(cut_Ar_Br)
def do(SM_A, SM_B): """Find a state machine that stops right before the state machine 'SM_B'. If there is a lexeme 'l' (lowercase L) in SM_A: l = [x0, x1, ... xj, xk, ... xN ] and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall only match '[x0, x1, ... xj]'. All lexemes 'l' translate into lexemes 's' in reverse(SM_A): s = [xN, ... xk, xj, ... x1, x0 ] lexemes in SM_B translate into reverse(SM_B) as t = [xN, ... xk] The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers u = [ xj, ... x1, x0 ] Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers u = [ x0, x1, ... xj ] as desired for all lexemes in SM_A that end with something that matches SM_B. (C) Frank-Rene Schaefer """ Ar = beautifier.do(reverse.do(SM_A)) Br = beautifier.do(reverse.do(SM_B)) cut_Ar_Br = complement_begin.do(Ar, Br) return reverse.do(cut_Ar_Br)
def __init__(self, CoreSM, PreContextSM=None, PostContextSM=None, BeginOfLineF=False, EndOfLineF=False, Sr=SourceRef_VOID, PatternString="", AllowNothingIsNecessaryF=False): assert PreContextSM is None or isinstance(PreContextSM, StateMachine) Pattern.check_initial(CoreSM, BeginOfLineF, PreContextSM, EndOfLineF, PostContextSM, Sr, AllowNothingIsNecessaryF) self.__pattern_string = PatternString self.__sr = Sr # (*) Setup the whole pattern self.__sm = CoreSM self.__post_context_sm = PostContextSM self.__post_context_end_of_line_f = EndOfLineF assert self.__sm is not None # -- [optional] post contexts self.__post_context_f = (PostContextSM is not None) # Backward input position detection requires an inversion of the # state machine. This can only be done after the (optional) codec # transformation. Thus, a non-inverted version of the state machine # is maintained until the transformation is done. self.__bipd_sm_to_be_inverted = None self.__bipd_sm = None # -- [optional] pre contexts # # Same as for backward input position detection holds for pre-contexts. self.__pre_context_sm_to_be_inverted = PreContextSM self.__pre_context_sm = None # All state machines must be DFAs if not self.__sm.is_DFA_compliant(): self.__sm = beautifier.do(self.__sm) if self.__pre_context_sm_to_be_inverted is not None \ and not self.__pre_context_sm_to_be_inverted.is_DFA_compliant(): self.__pre_context_sm_to_be_inverted = beautifier.do( self.__pre_context_sm_to_be_inverted) # Detect the trivial pre-context self.__pre_context_begin_of_line_f = BeginOfLineF # The line/column count information can only be determined when the # line/column count database is present. Thus, it is delayed. self.__count_info = None # Ensure, that the pattern is never transformed twice self.__alarm_transformed_f = False self.__validate(Sr)
def __init__(self, CoreSM, PreContextSM=None, PostContextSM=None, BeginOfLineF=False, EndOfLineF=False, Sr=SourceRef_VOID, PatternString="", AllowNothingIsNecessaryF=False): assert PreContextSM is None or isinstance(PreContextSM, StateMachine) Pattern.check_initial(CoreSM, BeginOfLineF, PreContextSM, EndOfLineF, PostContextSM, Sr, AllowNothingIsNecessaryF) self.__pattern_string = PatternString self.__sr = Sr # (*) Setup the whole pattern self.__sm = CoreSM self.__post_context_sm = PostContextSM self.__post_context_end_of_line_f = EndOfLineF assert self.__sm is not None # -- [optional] post contexts self.__post_context_f = (PostContextSM is not None) # Backward input position detection requires an inversion of the # state machine. This can only be done after the (optional) codec # transformation. Thus, a non-inverted version of the state machine # is maintained until the transformation is done. self.__bipd_sm_to_be_inverted = None self.__bipd_sm = None # -- [optional] pre contexts # # Same as for backward input position detection holds for pre-contexts. self.__pre_context_sm_to_be_inverted = PreContextSM self.__pre_context_sm = None # All state machines must be DFAs if not self.__sm.is_DFA_compliant(): self.__sm = beautifier.do(self.__sm) if self.__pre_context_sm_to_be_inverted is not None \ and not self.__pre_context_sm_to_be_inverted.is_DFA_compliant(): self.__pre_context_sm_to_be_inverted = beautifier.do(self.__pre_context_sm_to_be_inverted) # Detect the trivial pre-context self.__pre_context_begin_of_line_f = BeginOfLineF # The line/column count information can only be determined when the # line/column count database is present. Thus, it is delayed. self.__count_info = None # Ensure, that the pattern is never transformed twice self.__alarm_transformed_f = False self.__validate(Sr)
def more_DFAs(A, B): """RETURNS: [0] B+ [1] B* [2] B*A """ B_plus = repeat.do(B) B_star = repeat.do(B, min_repetition_n=0) B_star_A = beautifier.do(sequentialize.do([B_star, A])) return beautifier.do(B_plus), \ beautifier.do(B_star), \ B_star_A
def mount_post_context_sm(self): self.__sm, \ self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, self.__post_context_sm, self.__post_context_end_of_line_f, self.__sr) if self.__bipd_sm_to_be_inverted is None: return if self.__bipd_sm_to_be_inverted is not None \ and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted) self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
def mount_post_context_sm(self): self.__sm, \ self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, self.__post_context_sm, self.__post_context_end_of_line_f, self.__sr) if self.__bipd_sm_to_be_inverted is None: return if self.__bipd_sm_to_be_inverted is not None \ and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted) self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
def do_state_machine(SmIn): """Transforms a given state machine from 'Unicode Driven' to another character encoding type. RETURNS: [0] Transformation complete (True->yes, False->not all transformed) [1] Transformed state machine. It may be the same as it was before if there was no transformation actually. It is ensured that the result of this function is a DFA compliant state machine. """ if SmIn is None: return True, None assert SmIn.is_DFA_compliant() # BEFORE: Forgive characters not in source range. What comes out is # important. It is checked in 'transform()' of the Pattern. complete_f, sm_out = Setup.buffer_codec.transform(SmIn) # AFTER: Whatever happend, the transitions in the state machine MUST # lie in the drain_set. sm_out.assert_range(Setup.buffer_codec.drain_set) if sm_out.is_DFA_compliant(): return complete_f, sm_out else: return complete_f, beautifier.do(sm_out)
def __get_inverse_state_machine_that_finds_end_of_core_expression( PostConditionSM): """In case of a pseudo-ambiguous post condition one needs to go backwards in order to search for the end of the core condition. This function creates the inverse state machine that is able to go backwards. NOTE: This is a special case, because one already knows that the state machine reaches the acceptance state sometime (this is where it actually started). That means, that in states other than acceptance states one can take out the 'drop out' triggers since they CANNOT occur. This enables some speed-up when going backwards. """ result = beautifier.do(PostConditionSM.get_inverse()) # -- delete 'drop-out' transitions in non-acceptance states # NOTE: When going backwards one already knows that the acceptance # state (the init state of the post condition) is reached, see above. # for state in result.states.values(): # # -- acceptance states can have 'drop-out' (actually, they need to have) # if state.is_acceptance(): continue # # state.transitions().replace_drop_out_target_states_with_adjacent_targets() # # result = nfa_to_dfa.do(result) # result = hopcroft.do(result) # Acceptance States need to be marked: Store input position. # NOTE: When tracing backwards the match is guaranteed, but there might # still be some 'trail' in case of iterations that are not directly # iterated to the ambiguous post condition. Thus drop out may # happen and it must be clear where to put the input pointer in this case. return result
def do(sm): state_list = sm.states.items() for state_index, state in state_list: # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet) # which indicates what target state is reached via what number set. transition_list = state.transitions().get_map().items() # Clear the state's transitions, now. This way it can absorb new # transitions to intermediate states. state.transitions().clear() # Loop over all transitions for target_state_index, number_set in transition_list: # We take the intervals with 'PromiseToTreatWellF' even though they # are changed. This is because the intervals would be lost anyway # after the state split, so we use the same memory and do not # cause a time consuming memory copy and constructor calls. interval_list = number_set.get_intervals(PromiseToTreatWellF=True) # 1st check whether a modification is necessary modification_required_f = False for interval in interval_list: if interval.begin >= 0x10000: modification_required_f = True; break if modification_required_f == False: sm.states[state_index].add_transition(number_set, target_state_index) continue # Now, intermediate states may be added for interval in interval_list: create_intermediate_states(sm, state_index, target_state_index, interval) result = beautifier.do(sm) return result
def philosophical_cut(core_sm, post_context_sm): """The 'philosophical cut' is a technique introduced by Frank-Rene Schaefer to produce a pair of a core- and a post-condition that otherwise would be forward and backward ambiguous. The philosophical ground for this cut is 'greed', i.e. a core pattern should eat as much characters as it can. This idea is followed during the whole construction of the lexical analyzer. For the case of total ambiguity 'x+/x+', this idea translates into leaving the iteration in the core condition and cutting the iteration in the post condition. Thus 'x+/x+' is transformed into 'x+/x' and can be solved by the technique for forward ambiguous post conditions. __dive -- indicator of recursion! replace by TreeWalker """ core_acceptance_state_list = core_sm.get_acceptance_state_list() pcsm_init_state = post_context_sm.get_init_state() for csm_state in core_acceptance_state_list: __dive_to_cut_iteration(core_sm, csm_state, post_context_sm, pcsm_init_state, SM1_Path=[post_context_sm.init_state_index]) # By means of cutting, some states might have become bold. That is, they have # only an epsilon transition. Thus, it is required to do a transformation NFA->DFA # and add a hopcroft optimization. new_post_sm = beautifier.do(post_context_sm) return new_post_sm
def __specify_comment(self, Sm, sr): _error_if_defined_before(self.result.sm_comment, sr) self.specifier_count_op_map.add(Sm.get_beginning_character_set(), "begin(comment to newline)", None, sr) if not Sm.is_DFA_compliant(): Sm = beautifier.do(Sm) self.result.sm_comment.set(Sm, sr)
def specify_suppressor(self, Sm, sr): _error_if_defined_before(self.sm_newline_suppressor, sr) self.count_command_map.add(Sm.get_beginning_character_set(), "begin(newline suppressor)", None, sr) if not Sm.is_DFA_compliant(): Sm = beautifier.do(Sm) self.sm_newline_suppressor.set(Sm, sr)
def philosophical_cut(core_sm, post_context_sm): """The 'philosophical cut' is a technique introduced by Frank-Rene Schaefer to produce a pair of a core- and a post-condition that otherwise would be forward and backward ambiguous. The philosophical ground for this cut is 'greed', i.e. a core pattern should eat as much characters as it can. This idea is followed during the whole construction of the lexical analyzer. For the case of total ambiguity 'x+/x+', this idea translates into leaving the iteration in the core condition and cutting the iteration in the post condition. Thus 'x+/x+' is transformed into 'x+/x' and can be solved by the technique for forward ambiguous post conditions. __dive -- indicator of recursion! replace by TreeWalker """ core_acceptance_state_list = core_sm.get_acceptance_state_list() pcsm_init_state = post_context_sm.get_init_state() for csm_state in core_acceptance_state_list: __dive_to_cut_iteration(core_sm, csm_state, post_context_sm, pcsm_init_state, SM1_Path=[post_context_sm.init_state_index]) # By means of cutting, some states might have become bold. That is, they have # only an epsilon transition. Thus, it is required to do a transformation NFA->DFA # and add a hopcroft optimization. new_post_sm = beautifier.do(post_context_sm) return new_post_sm
def __get_inverse_state_machine_that_finds_end_of_core_expression(PostConditionSM): """In case of a pseudo-ambiguous post condition one needs to go backwards in order to search for the end of the core condition. This function creates the inverse state machine that is able to go backwards. NOTE: This is a special case, because one already knows that the state machine reaches the acceptance state sometime (this is where it actually started). That means, that in states other than acceptance states one can take out the 'drop out' triggers since they CANNOT occur. This enables some speed-up when going backwards. """ result = beautifier.do(PostConditionSM.get_inverse()) # -- delete 'drop-out' transitions in non-acceptance states # NOTE: When going backwards one already knows that the acceptance # state (the init state of the post condition) is reached, see above. # for state in result.states.values(): # # -- acceptance states can have 'drop-out' (actually, they need to have) # if state.is_acceptance(): continue # # state.transitions().replace_drop_out_target_states_with_adjacent_targets() # # result = nfa_to_dfa.do(result) # result = hopcroft.do(result) # Acceptance States need to be marked: Store input position. # NOTE: When tracing backwards the match is guaranteed, but there might # still be some 'trail' in case of iterations that are not directly # iterated to the ambiguous post condition. Thus drop out may # happen and it must be clear where to put the input pointer in this case. return result
def is_all(SM): """Pattern has only two states: the init state which is not accepting, and the accepting state which transits to itself forever. """ sm = beautifier.do(SM) # Init State: # -- not an acceptance state # -- has only one transition on 'all' to an acceptance state # if len(sm.states) != 2: return False init_state = sm.get_init_state() if init_state.is_acceptance(): return False tm = init_state.target_map.get_map() if len(tm) != 1: return False target_index, trigger_set = tm.iteritems().next() if trigger_set.is_all() == False: return False if target_index == sm.init_state_index: return False # The Acceptance State: # -- has only one transition on 'all' to itself. # target_state = sm.states[target_index] if not target_state.is_acceptance(): return False tm = target_state.target_map.get_map() if len(tm) != 1: return False target_index_2, trigger_set = tm.iteritems().next() if trigger_set.is_all() == False: return False if target_index_2 != target_index: return False return True
def is_all(SM): """Pattern has only two states: the init state which is not accepting, and the accepting state which transits to itself forever. """ sm = beautifier.do(SM) # Init State: # -- not an acceptance state # -- has only one transition on 'all' to an acceptance state # if len(sm.states) != 2: return False init_state = sm.get_init_state() if init_state.is_acceptance(): return False tm = init_state.target_map.get_map() if len(tm) != 1: return False target_index, trigger_set = tm.iteritems().next() if trigger_set.is_all() == False: return False if target_index == sm.init_state_index: return False # The Acceptance State: # -- has only one transition on 'all' to itself. # target_state = sm.states[target_index] if not target_state.is_acceptance(): return False tm = target_state.target_map.get_map() if len(tm) != 1: return False target_index_2, trigger_set = tm.iteritems().next() if trigger_set.is_all() == False: return False if target_index_2 != target_index: return False return True
def snap_expression(stream, PatternDict): """expression: term term | expression """ __debug_entry("expression", stream) # -- term result = snap_term(stream, PatternDict) if result is None: return __debug_exit(None, stream) # -- optional '|' if not check(stream, '|'): return __debug_exit(result, stream) position_1 = stream.tell() __debug_print("'|' (in expression)") # -- expression result_2 = snap_expression(stream, PatternDict) __debug_print("expression(in expression):", result_2) if result_2 is None: stream.seek(position_1) return __debug_exit(result, stream) result = parallelize.do([result, result_2], CloneF=True) # CloneF = False (shold be!) return __debug_exit(beautifier.do(result), stream)
def snap_term(stream, PatternDict): """term: primary primary term """ __debug_entry("term", stream) # -- primary result = snap_primary(stream, PatternDict) __debug_print("##primary(in term):", result) if result is None: return __debug_exit(None, stream) position_1 = stream.tell() # -- optional 'term' result_2 = snap_term(stream, PatternDict) __debug_print("##term(in term):", result_2) if result_2 is None: stream.seek(position_1) return __debug_exit(result, stream) ## print "##1:", result.get_string(NormalizeF=False) ## print "##2:", result_2.get_string(NormalizeF=False) result = sequentialize.do([result, result_2], MountToFirstStateMachineF=True, CloneRemainingStateMachinesF=False) return __debug_exit(beautifier.do(result), stream)
def snap_expression(stream, PatternDict): """expression: term term | expression """ __debug_entry("expression", stream) # -- term result = snap_term(stream, PatternDict) if result is None: return __debug_exit(None, stream) # -- optional '|' if not check(stream, '|'): return __debug_exit(result, stream) position_1 = stream.tell() __debug_print("'|' (in expression)") # -- expression result_2 = snap_expression(stream, PatternDict) __debug_print("expression(in expression):", result_2) if result_2 is None: stream.seek(position_1) return __debug_exit(result, stream) result = parallelize.do([result, result_2], CloneF=True) # CloneF = False (shold be!) return __debug_exit(beautifier.do(result), stream)
def test(TestString): print "-------------------------------------------------------------------" print "expression = \"" + TestString + "\"" pattern = core.do(TestString, {}).finalize(None) # During 'finalize()': pattern.transform(Setup.buffer_encoding) # During 'finalize()': pattern.mount_post_context_sm() # During 'finalize()': pattern.mount_pre_context_sm() print "pattern\n" assert pattern.sm.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine(pattern.sm) sm = beautifier.do(pattern.sm) print sm.get_string(NormalizeF=True, Option="hex") if pattern.sm_pre_context_to_be_reversed: assert pattern.sm_pre_context_to_be_reversed.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine( pattern.sm_pre_context_to_be_reversed) reversed_sm = reverse.do(sm) print "pre-context = ", reversed_sm.get_string(NormalizeF=True, Option="hex") if pattern.sm_bipd_to_be_reversed: assert pattern.sm_bipd_to_be_reversed.is_DFA_compliant() ok_f, sm = Setup.buffer_encoding.do_state_machine( pattern.sm_bipd_to_be_reversed) sm = reverse.do(sm) print "post-context backward input position detector = ", sm.get_string( NormalizeF=True, Option="hex")
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF): """Sets up a pre-condition to the given state machine. This process is entirely different from any sequentializing or parallelization of state machines. Here, the state machine representing the pre- condition is **not** webbed into the original state machine! Instead, the following happens: -- the pre-condition state machine is inverted, because it is to be walked through backwards. -- the inverted state machine is marked with the state machine id of the_state_machine. -- the original state machine will refer to the inverse state machine of the pre-condition. -- the initial state origins and the origins of the acceptance states are marked as 'pre-conditioned' indicating the id of the inverted state machine of the pre-condition. """ #___________________________________________________________________________________________ # (*) do some consistency checking # -- state machines with no states are senseless here. assert not the_state_machine.is_empty() assert pre_context_sm is None or not pre_context_sm.is_empty() # -- trivial pre-conditions should be added last, for simplicity #___________________________________________________________________________________________ if pre_context_sm is None: # NOT: 'and ...' ! if BeginOfLinePreContextF: # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE) return None # (*) Reverse the state machine of the pre-condition reverse_pre_context = reverse.do(pre_context_sm) if BeginOfLinePreContextF: # Extend the existing pre-context with a preceeding 'begin-of-line'. reverse_newline_sm = reverse.do(StateMachine_Newline()) reverse_pre_context = sequentialize.do([reverse_pre_context, reverse_newline_sm]) # (*) Once an acceptance state is reached no further analysis is necessary. acceptance_pruning.do(reverse_pre_context) # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF') # AFTER acceptance_pruning (!) reverse_pre_context = beautifier.do(reverse_pre_context) # (*) let the state machine refer to it # [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>] pre_context_sm_id = reverse_pre_context.get_id() # (*) Associate acceptance with pre-context id. for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(pre_context_sm_id) return reverse_pre_context
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF): """Sets up a pre-condition to the given state machine. This process is entirely different from any sequentializing or parallelization of state machines. Here, the state machine representing the pre- condition is **not** webbed into the original state machine! Instead, the following happens: -- the pre-condition state machine is inverted, because it is to be walked through backwards. -- the inverted state machine is marked with the state machine id of the_state_machine. -- the original state machine will refer to the inverse state machine of the pre-condition. -- the initial state origins and the origins of the acceptance states are marked as 'pre-conditioned' indicating the id of the inverted state machine of the pre-condition. """ #___________________________________________________________________________________________ # (*) do some consistency checking # -- state machines with no states are senseless here. assert not the_state_machine.is_empty() assert pre_context_sm is None or not pre_context_sm.is_empty() # -- trivial pre-conditions should be added last, for simplicity #___________________________________________________________________________________________ if pre_context_sm is None: if BeginOfLinePreContextF: # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag for state in the_state_machine.get_acceptance_state_list(): state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE) return None # (*) Reverse the state machine of the pre-condition inverse_pre_context = reverse.do(pre_context_sm) if BeginOfLinePreContextF: # Extend the existing pre-context with a preceeding 'begin-of-line'. inverse_pre_context.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f, InverseF=True) # (*) Once an acceptance state is reached no further analysis is necessary. acceptance_pruning.do(inverse_pre_context) # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF') # AFTER acceptance_pruning (!) inverse_pre_context = beautifier.do(inverse_pre_context) # (*) let the state machine refer to it # [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>] pre_context_sm_id = inverse_pre_context.get_id() # (*) create origin data, in case where there is none yet create new one. # (do not delete, otherwise existing information gets lost) for state in the_state_machine.states.itervalues(): if not state.is_acceptance(): continue state.set_pre_context_id(pre_context_sm_id) return inverse_pre_context
def do(SM_List): """The 'parallelize' module does a union of multiple state machines, even if they have different origins and need to be combined carefully. There is no reason, why another 'union' operation should be implemented in this case. """ result = parallelize.do(SM_List) return beautifier.do(result)
def is_none(SM): """Does the given state machine represent a pattern which matches absolutely nothing? """ sm = beautifier.do(SM) if len(sm.states) != 1: return False elif sm.get_init_state().is_acceptance(): return False else: return True
def is_none(SM): """Does the given state machine represent a pattern which matches absolutely nothing? """ sm = beautifier.do(SM) if len(sm.states) != 1: return False elif sm.get_init_state().is_acceptance(): return False else: return True
def do(SM_List): """The 'parallelize' module does a union of multiple state machines, even if they have different origins and need to be combined carefully. There is no reason, why another 'union' operation should be implemented in this case. """ result = parallelize.do(SM_List) return beautifier.do(result)
def __DFA(SM): if SM is None: return None elif SM.is_DFA_compliant(): return SM result = beautifier.do(SM) return result
def equal(X_str, Y_str): global X global Y global report exec("sm0 = " + X_str.replace("All", "All_sm").replace("None", "None_sm")) exec("sm1 = " + Y_str.replace("All", "All_sm").replace("None", "None_sm")) sm0 = beautifier.do(sm0) sm1 = beautifier.do(sm1) result = identity.do(sm0, sm1) if result is False: print "X:", X # print "Y:", Y print "Error" print "%s: -->\n%s" % (X_str, sm0) print "%s: -->\n%s" % (Y_str, sm1) print "#---------------------------------------------------------" protocol.append((X_str, "==", Y_str, result))
def equal(X_str, Y_str): global X global Y global report exec("sm0 = " + X_str.replace("All", "All_sm").replace("None", "None_sm")) exec("sm1 = " + Y_str.replace("All", "All_sm").replace("None", "None_sm")) sm0 = beautifier.do(sm0) sm1 = beautifier.do(sm1) result = identity.do(sm0, sm1) if result is False: print "X:", X # print "Y:", Y print "Error" print "%s: -->\n%s" % (X_str, sm0) print "%s: -->\n%s" % (Y_str, sm1) print "#---------------------------------------------------------" protocol.append((X_str, "==", Y_str, result))
def _prepare_indentation_counter(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI): """Prepare indentation counter. An indentation counter is implemented by the following: 'newline' pattern --> triggers as soon as an UNSUPPRESSED newline occurs. --> entry to the INDENTATION COUNTER. 'suppressed newline' --> INDENTATION COUNTER is NOT triggered. The supressed newline pattern is longer (and has precedence) over the newline pattern. With the suppressed newline it is possible to write lines which overstep the newline (see backslahs in Python, for example). RETURNS: List of: [0] newline PPT and [1] optionally the PPT of the newline suppressor. The primary pattern action pair list is to be the head of all pattern action pairs. MHI = Mode hierarchie index defining the priority of the current mode. """ ISetup = OptionsDb.value("indentation") if ISetup is None: return [], [] check_indentation_setup(ISetup) if ISetup.sm_newline_suppressor.get() is not None: sm_suppressed_newline = sequentialize.do([ISetup.sm_newline_suppressor.get(), ISetup.sm_newline.get()]) sm_suppressed_newline = beautifier.do(sm_suppressed_newline) else: sm_suppressed_newline = None data = { "counter_db": CounterDb, "indentation_setup": ISetup, "incidence_db": IncidenceDb, "default_indentation_handler_f": IncidenceDb.default_indentation_handler_f(), "mode_name": ModeName, "sm_suppressed_newline": sm_suppressed_newline, } ppt_list = [ # 'newline' triggers --> indentation counter PPT_indentation_handler_newline(MHI, data, ISetup, CounterDb) ] if sm_suppressed_newline is not None: ppt_list.append( # 'newline-suppressor' followed by 'newline' is ignored (skipped) PPT_indentation_handler_suppressed_newline(MHI, sm_suppressed_newline) ) return [], ppt_list
def _prepare_indentation_counter(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI): """Prepare indentation counter. An indentation counter is implemented by the following: 'newline' pattern --> triggers as soon as an UNSUPPRESSED newline occurs. --> entry to the INDENTATION COUNTER. 'suppressed newline' --> INDENTATION COUNTER is NOT triggered. The supressed newline pattern is longer (and has precedence) over the newline pattern. With the suppressed newline it is possible to write lines which overstep the newline (see backslahs in Python, for example). RETURNS: List of: [0] newline PPT and [1] optionally the PPT of the newline suppressor. The primary pattern action pair list is to be the head of all pattern action pairs. MHI = Mode hierarchie index defining the priority of the current mode. """ ISetup = OptionsDb.value("indentation") if ISetup is None: return [], [] check_indentation_setup(ISetup) if ISetup.sm_newline_suppressor.get() is not None: sm_suppressed_newline = sequentialize.do([ISetup.sm_newline_suppressor.get(), ISetup.sm_newline.get()]) sm_suppressed_newline = beautifier.do(sm_suppressed_newline) else: sm_suppressed_newline = None data = { "counter_db": CounterDb, "indentation_setup": ISetup, "incidence_db": IncidenceDb, "default_indentation_handler_f": IncidenceDb.default_indentation_handler_f(), "mode_name": ModeName, "sm_suppressed_newline": sm_suppressed_newline, } ppt_list = [ # 'newline' triggers --> indentation counter PPT_indentation_handler_newline(MHI, data, ISetup, CounterDb) ] if sm_suppressed_newline is not None: ppt_list.append( # 'newline-suppressor' followed by 'newline' is ignored (skipped) PPT_indentation_handler_suppressed_newline(MHI, sm_suppressed_newline) ) return [], ppt_list
def __core(SuperPattern, SubPattern): print("super = " + SuperPattern).replace("\n", "\\n").replace("\t", "\\t") print("sub = " + SubPattern).replace("\n", "\\n").replace("\t", "\\t") super_p = regex.do(SuperPattern, {}).extract_sm() sub_p = regex.do(SubPattern, {}).extract_sm() result = difference.do(super_p, sub_p) print "result = ", beautifier.do(difference.do( super_p, sub_p)) # .get_string(NormalizeF=False)
def detect_backward(CoreStateMachine, PostConditionStateMachine): """A 'backward ambiguity' denotes the case where it cannot be clearly be determined how far to go back from the end of a post-condition. NOTE: This does not mean that the post-condition is ambiguous. Many cases that are backward ambiguous can be handled by quex's normal post-condition handling. Examples: x/x+ is backward ambiguous because in a stream of 'x' one cannot determine with a pure state machine where to stop. This case, though can be handled by the normal post- condition implementation. x+/x+ is backward ambiguous and cannot be handled by the normal implementation. In fact, this specification does not allow any conclusions about the users intend where to reset the input after match. """ __assert_state_machines(CoreStateMachine, PostConditionStateMachine) my_post_context_sm = PostConditionStateMachine.clone() # (*) Create a modified version of the post condition, where the # initial state is an acceptance state, and no other. This # allows the detector to trigger on 'iteration'. # # -- delete all acceptance states in the post condition # for state in my_post_context_sm.states.values(): # state.set_acceptance(False) # -- set the initial state as acceptance state # my_post_context_sm.get_init_state().set_acceptance(True) my_core_sm = beautifier.do(reverse.do(CoreStateMachine)) tmp = deepcopy(PostConditionStateMachine) # no deeepcopy needed here, I guess <fschaef 11y11m01d> my_post_context_sm = beautifier.do(reverse.do(tmp)) return detect_forward(my_post_context_sm, my_core_sm)
def detect_backward(CoreStateMachine, PostConditionStateMachine): """A 'backward ambiguity' denotes the case where it cannot be clearly be determined how far to go back from the end of a post-condition. NOTE: This does not mean that the post-condition is ambiguous. Many cases that are backward ambiguous can be handled by quex's normal post-condition handling. Examples: x/x+ is backward ambiguous because in a stream of 'x' one cannot determine with a pure state machine where to stop. This case, though can be handled by the normal post- condition implementation. x+/x+ is backward ambiguous and cannot be handled by the normal implementation. In fact, this specification does not allow any conclusions about the users intend where to reset the input after match. """ __assert_state_machines(CoreStateMachine, PostConditionStateMachine) my_post_context_sm = PostConditionStateMachine.clone() # (*) Create a modified version of the post condition, where the # initial state is an acceptance state, and no other. This # allows the detector to trigger on 'iteration'. # # -- delete all acceptance states in the post condition # for state in my_post_context_sm.states.values(): # state.set_acceptance(False) # -- set the initial state as acceptance state # my_post_context_sm.get_init_state().set_acceptance(True) my_core_sm = beautifier.do(reverse.do(CoreStateMachine)) tmp = deepcopy(PostConditionStateMachine) # no deeepcopy needed here, I guess <fschaef 11y11m01d> my_post_context_sm = beautifier.do(reverse.do(tmp)) return detect_forward(my_post_context_sm, my_core_sm)
def do(SM_A, SM_B): """Cut Begin: Let SM_A match the set of lexemes LA and SM_B match the set of lexemes LB. Then, the cut begin operation 'CutBegin' SM_C = CutBegin(SM_A, SM_B) results in a state machine SM_C. The set of lexemes which it matches is given by .- | c(La) for all La in L(SM_A) where La | starts with one of L(SM_B). L(SM_C) = < | La for all other La from L(SM_A) '- The cut operation 'c(La)' takes the elements Lb out of La that match SM_B. That is if La = [x0, x1, ... xi, xj, ... xN] and there is a Lb in L(SM_B) with Lb = [x0, x1, ... xi], then c(La) = [xj, ... XN] EXAMPLE 1: NotBegin([0-9]+, [0-9]) = [0-9]{2,} That is where '[0-9]+' required at least one character in [0-9], the cut version does not allow lexemes with one [0-9]. The result is a repetition of at least two characters in [0-9]. EXAMPLE 2: NotBegin(1(2?), 12) = 1 Because the lexeme "12" is not to be matched by the result. The lexeme "1", though, does not start with "12". Thus, it remains. EXAMPLE 2: NotBegin([a-z]+, print) = all identifiers except 'print' (C) 2013 Frank-Rene Schaefer """ cutter = WalkAlong(SM_A, SM_B) cutter.do((SM_A.init_state_index, SM_B.init_state_index, None)) # Delete orphaned and hopeless states in result cutter.result.clean_up() # Get propper state indices for result return beautifier.do(cutter.result)
def do(SM_A, SM_B): """Cut Begin: Let SM_A match the set of lexemes LA and SM_B match the set of lexemes LB. Then, the cut begin operation 'CutBegin' SM_C = CutBegin(SM_A, SM_B) results in a state machine SM_C. The set of lexemes which it matches is given by .- | c(La) for all La in L(SM_A) where La | starts with one of L(SM_B). L(SM_C) = < | La for all other La from L(SM_A) '- The cut operation 'c(La)' takes the elements Lb out of La that match SM_B. That is if La = [x0, x1, ... xi, xj, ... xN] and there is a Lb in L(SM_B) with Lb = [x0, x1, ... xi], then c(La) = [xj, ... XN] EXAMPLE 1: NotBegin([0-9]+, [0-9]) = [0-9]{2,} That is where '[0-9]+' required at least one character in [0-9], the cut version does not allow lexemes with one [0-9]. The result is a repetition of at least two characters in [0-9]. EXAMPLE 2: NotBegin(1(2?), 12) = 1 Because the lexeme "12" is not to be matched by the result. The lexeme "1", though, does not start with "12". Thus, it remains. EXAMPLE 2: NotBegin([a-z]+, print) = all identifiers except 'print' (C) 2013 Frank-Rene Schaefer """ cutter = WalkAlong(SM_A, SM_B) cutter.do((SM_A.init_state_index, SM_B.init_state_index, None)) # Delete orphaned and hopeless states in result cutter.result.clean_up() # Get propper state indices for result return beautifier.do(cutter.result)
def unary_checks(Q, operation): Q_plus = beautifier.do(repeat.do(Q)) Q_star = beautifier.do(repeat.do(Q, min_repetition_n=0)) Q_is_Q_star = identity.do(Q, Q_star) Q_is_Q_plus = identity.do(Q, Q_plus) # \Cut{Q Q} = \Nothing y = operation(Q, Q) assert y.is_Nothing() # if Q != Q+: \CutBegin{Q+ Q} = Q* if not Q_is_Q_plus: y = operation(Q_plus, Q) assert identity.do(y, Q_star) # if Q != Q*: \CutBegin{Q* Q} = Q* if not Q_is_Q_star: y = operation(Q_star, Q) assert identity.do(y, Q_star) # \Cut{Q \Nothing} = Q y = operation(Q, DFA.Nothing()) assert identity.do(y, Q) # \Cut{\Nothing Q} = \Nothing y = operation(DFA.Nothing(), Q) assert y.is_Nothing() # \Cut{Q \Universal} = \Nothing y = operation(Q, DFA.Universal()) assert y.is_Nothing() # NOT: \Cut{\Universal Q} = \Universal if not Q_is_Q_star and not Q_is_Q_plus: y = operation(Q, DFA.Universal()) assert y.is_Nothing() return Q_star, Q_plus
def __specify_newline(self, Sm, sr): _error_if_defined_before(self.result.sm_newline, sr) beginning_char_set = Sm.get_beginning_character_set() ending_char_set = Sm.get_ending_character_set() self.specifier_count_op_map.add(beginning_char_set, "begin(newline)", None, sr) # Do not consider a character from newline twice ending_char_set.subtract(beginning_char_set) if not ending_char_set.is_empty(): self.specifier_count_op_map.add(ending_char_set, "end(newline)", None, sr) if not Sm.is_DFA_compliant(): Sm = beautifier.do(Sm) self.result.sm_newline.set(Sm, sr)
def _finalize_mount_post_context_sm(Sm, SmPostContext, PostEOL_f, PostEOS_f, Sr): # In case of a 'trailing post context' a 'bipd_sm' may be provided # to detect the input position after match in backward direction. # BIPD = backward input position detection. sm, \ bipd_sm_to_be_reversed = setup_post_context.do(Sm, SmPostContext, PostEOL_f, PostEOS_f, Sr) if bipd_sm_to_be_reversed is None: return sm, None elif not bipd_sm_to_be_reversed.is_DFA_compliant(): bipd_sm_to_be_reversed = beautifier.do(bipd_sm_to_be_reversed) return sm, bipd_sm_to_be_reversed
def do(SM_A, SM_B): """\NotIn{P Q} = \NotBegin{P \Any*(Q+)} """ all_star = repeat.do(special.get_any(), min_repetition_n=0) sm_b_repeated = repeat.do(SM_B, min_repetition_n=1) tmp = sequentialize.do([all_star, sm_b_repeated], MountToFirstStateMachineF=True, CloneRemainingStateMachinesF=True) tmp = beautifier.do(tmp) # There might be many paths which have no hope to reach acceptance tmp.clean_up() return complement_begin.do(SM_A, tmp)
def do(SM_A, SM_B): """\NotIn{P Q} = \NotBegin{P \Any*(Q+)} """ all_star = repeat.do(special.get_any(), min_repetition_n=0) sm_b_repeated = repeat.do(SM_B, min_repetition_n=1) tmp = sequentialize.do([all_star, sm_b_repeated], MountToFirstStateMachineF=True, CloneRemainingStateMachinesF=True) tmp = beautifier.do(tmp) # There might be many paths which have no hope to reach acceptance tmp.clean_up() return complement_begin.do(SM_A, tmp)
def __implement_epsilon_transitions(result, A, epsilon_transition_set): """RETURNS: [0] The resulting state machine, if a 'cut' has happened. The original state machine if no 'cut' has happened. [1] True, if a cut has happened, False else. """ if not epsilon_transition_set: return A, False else: for from_si, to_si, acceptance_f in epsilon_transition_set: if from_si == result.init_state_index: result.add_epsilon_transition(from_si, to_si) else: result.add_epsilon_transition(from_si, to_si, RaiseAcceptanceF=acceptance_f) result.delete_hopeless_states() return beautifier.do(result), True
def test(Idx, sm_pre, sm, sm_post, BOL_F, EOL_F): ## if Idx != 5: return result = sm.clone() print "##-- %i -----------------------------------------------------------------------" % Idx if sm_pre is not None: print " -- pre-condition = True" else: print " -- pre-condition = False" if sm_post is not None: print " -- post-condition = True" else: print " -- post-condition = False" print " -- begin of line = ", BOL_F print " -- end of line = ", EOL_F ipsb_sm = setup_post_context.do(result, sm_post, EOL_F, False, SourceRef_VOID) pre_context_sm = setup_pre_context.do(result, sm_pre, BOL_F, False) if pre_context_sm is None: inverse_pre_context_sm = None else: inverse_pre_context_sm = reverse.do(pre_context_sm) inverse_pre_context_sm.set_id(pre_context_sm.get_id()) # # print "EXPRESSION = ", result # print "POST CONDITION = ", post_sm # print "APPENDED = ", result result = beautifier.do(result) # # print print "result sm.id = ", result.get_id() if inverse_pre_context_sm is not None: print "result pre sm.id = ", inverse_pre_context_sm.get_id() begin_of_line_f = None for state in result.get_acceptance_state_list(): BOF = state.single_entry.has_pre_context_begin_of_line() if begin_of_line_f is None: begin_of_line_f = BOF else: assert begin_of_line_f == BOF print "result = ", result if inverse_pre_context_sm is not None: print "inverse_pre_context_sm = ", inverse_pre_context_sm print "trivially pre-conditioned = ", begin_of_line_f
def do_state_machine(X): """Transforms a given state machine from 'Unicode Driven' to another character encoding type. RETURNS: [0] Transformation complete (True->yes, False->not all transformed) [1] Transformed state machine. It may be the same as it was before if there was no transformation actually. It is ensured that the result of this function is a DFA compliant state machine. """ if X is None: return True, None assert X.is_DFA_compliant() complete_f, sm = Setup.buffer_codec.transform(X) if sm.is_DFA_compliant(): return complete_f, sm else: return complete_f, beautifier.do(sm)
def do_state_machine(X): """Transforms a given state machine from 'Unicode Driven' to another character encoding type. RETURNS: [0] Transformation complete (True->yes, False->not all transformed) [1] Transformed state machine. It may be the same as it was before if there was no transformation actually. It is ensured that the result of this function is a DFA compliant state machine. """ if X is None: return True, None assert X.is_DFA_compliant() complete_f, sm = Setup.buffer_codec.transform(X) if sm.is_DFA_compliant(): return complete_f, sm else: return complete_f, beautifier.do(sm)
def do(SM_A, SM_B): """Complement Begin: Let SM_A match the set of lexemes LA and SM_B match the set of lexemes LB. Then, the complement begin operation 'NotBegin' SM_C = NotBegin(SM_A, SM_B) results in a state machine SM_C, matches all lexemes of LA except for those that start with a lexeme from LB. EXAMPLE 1: NotBegin([0-9]+, [0-9]) = \None EXAMPLE 2: NotBegin(1(2?), 12) = 1 Because the lexeme "12" is not to be matched by the result. The lexeme "1", though, does not start with "12". Thus, it remains. EXAMPLE 2: NotBegin([a-z]+, print) = all identifiers except 'print' (C) 2013 Frank-Rene Schaefer """ cutter = WalkAlong(SM_A, SM_B) if SM_B.get_init_state().is_acceptance(): return special.get_none() cutter.do((SM_A.init_state_index, SM_B.init_state_index)) # Delete orphaned and hopeless states in result cutter.result.clean_up() # Get propper state indices for result return beautifier.do(cutter.result)
def do(sm): """The UTF8 encoding causes a single unicode character code being translated into a sequence of bytes. A state machine triggering on unicode characters can be converted into a state machine triggering on UTF8 bytes. For this a simple transition on a character 'X': [ 1 ]---( X )--->[ 2 ] needs to be translated into a sequence of state transitions [ 1 ]---(x0)--->[ S0 ]---(x1)--->[ S1 ]---(x2)--->[ 2 ] where, x0, x1, x2 are the UTF8 bytes that represent unicode 'X'. States S0 and S1 are intermediate states created only so that x1, x2, and x3 can trigger. Note, that the UTF8 sequence ends at the same state '2' as the previous single trigger 'X'. """ state_list = sm.states.items() for state_index, state in state_list: # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet) # which indicates what target state is reached via what number set. transition_list = state.target_map.get_map().items() # Clear the state's transitions, now. This way it can absorb new # transitions to intermediate states. state.target_map.clear() # Loop over all transitions for target_state_index, number_set in transition_list: # We take the intervals with 'PromiseToTreatWellF' even though they # are changed. This is because the intervals would be lost anyway # after the state split, so we use the same memory and do not # cause a time consuming memory copy and constructor calls. for interval in number_set.get_intervals(PromiseToTreatWellF=True): create_intermediate_states(sm, state_index, target_state_index, interval) return beautifier.do(sm)
def do(sm): global ForbiddenRange state_list = sm.states.items() for state_index, state in state_list: # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet) # which indicates what target state is reached via what number set. transition_list = state.target_map.get_map().items() # Clear the state's transitions, now. This way it can absorb new # transitions to intermediate states. state.target_map.clear() # Loop over all transitions for target_state_index, number_set in transition_list: # -- 1st check whether a modification is necessary if number_set.supremum() <= 0x10000: sm.states[state_index].add_transition(number_set, target_state_index) continue # -- We help: General regular expressions may not bother with # the 'ForbiddenRange'. Let us be so kind and cut it here. number_set.subtract(ForbiddenRange) number_set.cut_lesser(0) number_set.cut_greater_or_equal(0x110000) # -- Add intermediate States # We take the intervals with 'PromiseToTreatWellF' even though they # are changed. This is because the intervals would be lost anyway # after the state split, so we use the same memory and do not # cause a time consuming memory copy and constructor calls. interval_list = number_set.get_intervals(PromiseToTreatWellF=True) for interval in interval_list: create_intermediate_states(sm, state_index, target_state_index, interval) result = beautifier.do(sm) return result
def StateMachine_Newline(): """Creates a state machine matching newline according to what has been specified in the setup (Setup.dos_carriage_return_newline_f). That is, if is DOS newline then the state machine represents '\r\n' and if it is unix only, then it represents '\n'. If both is required they are implemented in parallel. RETURNS: StateMachine """ UnixF = True DosF = Setup.dos_carriage_return_newline_f NL = ord('\n') # (pure) newline, i.e. line feed CR = ord('\r') # carriage return sm = StateMachine() if UnixF: sm.add_transition(sm.init_state_index, NL, AcceptanceF=True) if DosF: idx = sm.add_transition(sm.init_state_index, CR, AcceptanceF=False) sm.add_transition(idx, NL, AcceptanceF=True) return beautifier.do(sm)
def test(Idx, sm_pre, sm, sm_post, BOF_F, EOF_F): result = sm.clone() print "##-- %i -----------------------------------------------------------------------" % Idx if sm_pre is not None: print " -- pre-condition = True" else: print " -- pre-condition = False" if sm_post is not None: print " -- post-condition = True" else: print " -- post-condition = False" print " -- begin of line = ", BOF_F print " -- end of line = ", EOF_F ipsb_sm = setup_post_context.do(result, sm_post, EOF_F, SourceRef_VOID) inverse_pre_context_sm = setup_pre_context.do(result, sm_pre, BOF_F) # # print "EXPRESSION = ", result # print "POST CONDITION = ", post_sm # print "APPENDED = ", result result = beautifier.do(result) # # print print "result sm.id = ", result.get_id() if inverse_pre_context_sm is not None: print "result pre sm.id = ", inverse_pre_context_sm.get_id() begin_of_line_f = None for state in result.get_acceptance_state_list(): BOF = state.single_entry.has_begin_of_line_pre_context() if begin_of_line_f is None: begin_of_line_f = BOF else: assert begin_of_line_f == BOF print "result = ", result if inverse_pre_context_sm is not None: print "inverse_pre_context_sm = ", inverse_pre_context_sm print "trivially pre-conditioned = ", begin_of_line_f
def __parse_option(fh, new_mode): def get_pattern_object(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return Pattern(result, AllowStateMachineTrafoF=True) identifier = read_option_start(fh) if identifier is None: return False verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["character_set"] = trigger_set new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), Comment=E_SpecialPatterns.SKIP) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine.from_sequence(opener_sequence) else: opener_str, opener_pattern = regular_expression.parse(fh) opener_sm = opener_pattern.sm # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function, comment = { "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE), "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE), }[identifier] action = GeneratedCode(generator_function, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern_str = "" if value.newline_suppressor_state_machine.get() is not None: suppressed_newline_pattern_str = \ "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \ + "(" + value.newline_state_machine.pattern_string() + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN) new_mode.add_match(suppressed_newline_pattern_str, code, get_pattern_object(suppressed_newline_sm), Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE) # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = beautifier.do(x4) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value new_mode.add_match(value.newline_state_machine.pattern_string(), action, get_pattern_object(sm), Comment=E_SpecialPatterns.INDENTATION_NEWLINE) # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = mode_option_info_db[identifier] if option_info.domain is not None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \ "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True