Ejemplo n.º 1
0
def do(SM_A, SM_B):
    """Find a state machine that stops right before the state machine 'SM_B'.
    If there is a lexeme 'l' (lowercase L) in SM_A:

                       l = [x0, x1, ... xj, xk, ... xN ]

    and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall
    only match 
                       '[x0, x1, ... xj]'. 
                       
    All lexemes 'l' translate into lexemes 's' in reverse(SM_A):

                       s = [xN, ... xk, xj, ... x1, x0 ]

    lexemes in SM_B translate into reverse(SM_B) as

                       t = [xN, ... xk]

    The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers

                       u = [ xj, ... x1, x0 ]

    Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers

                       u = [ x0, x1, ... xj ]

    as desired for all lexemes in SM_A that end with something that 
    matches SM_B.
                       
    (C) Frank-Rene Schaefer
    """
    Ar        = beautifier.do(reverse.do(SM_A))
    Br        = beautifier.do(reverse.do(SM_B))
    cut_Ar_Br = complement_begin.do(Ar, Br)
    return reverse.do(cut_Ar_Br)
Ejemplo n.º 2
0
def do(SM_A, SM_B):
    """Find a state machine that stops right before the state machine 'SM_B'.
    If there is a lexeme 'l' (lowercase L) in SM_A:

                       l = [x0, x1, ... xj, xk, ... xN ]

    and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall
    only match 
                       '[x0, x1, ... xj]'. 
                       
    All lexemes 'l' translate into lexemes 's' in reverse(SM_A):

                       s = [xN, ... xk, xj, ... x1, x0 ]

    lexemes in SM_B translate into reverse(SM_B) as

                       t = [xN, ... xk]

    The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers

                       u = [ xj, ... x1, x0 ]

    Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers

                       u = [ x0, x1, ... xj ]

    as desired for all lexemes in SM_A that end with something that 
    matches SM_B.
                       
    (C) Frank-Rene Schaefer
    """
    Ar = beautifier.do(reverse.do(SM_A))
    Br = beautifier.do(reverse.do(SM_B))
    cut_Ar_Br = complement_begin.do(Ar, Br)
    return reverse.do(cut_Ar_Br)
Ejemplo n.º 3
0
    def __init__(self,
                 CoreSM,
                 PreContextSM=None,
                 PostContextSM=None,
                 BeginOfLineF=False,
                 EndOfLineF=False,
                 Sr=SourceRef_VOID,
                 PatternString="",
                 AllowNothingIsNecessaryF=False):
        assert PreContextSM is None or isinstance(PreContextSM, StateMachine)
        Pattern.check_initial(CoreSM, BeginOfLineF, PreContextSM, EndOfLineF,
                              PostContextSM, Sr, AllowNothingIsNecessaryF)

        self.__pattern_string = PatternString
        self.__sr = Sr

        # (*) Setup the whole pattern
        self.__sm = CoreSM
        self.__post_context_sm = PostContextSM
        self.__post_context_end_of_line_f = EndOfLineF
        assert self.__sm is not None

        # -- [optional] post contexts
        self.__post_context_f = (PostContextSM is not None)

        #    Backward input position detection requires an inversion of the
        #    state machine. This can only be done after the (optional) codec
        #    transformation. Thus, a non-inverted version of the state machine
        #    is maintained until the transformation is done.
        self.__bipd_sm_to_be_inverted = None
        self.__bipd_sm = None

        # -- [optional] pre contexts
        #
        #    Same as for backward input position detection holds for pre-contexts.
        self.__pre_context_sm_to_be_inverted = PreContextSM
        self.__pre_context_sm = None

        # All state machines must be DFAs
        if not self.__sm.is_DFA_compliant():
            self.__sm = beautifier.do(self.__sm)

        if         self.__pre_context_sm_to_be_inverted is not None \
           and not self.__pre_context_sm_to_be_inverted.is_DFA_compliant():
            self.__pre_context_sm_to_be_inverted = beautifier.do(
                self.__pre_context_sm_to_be_inverted)

        # Detect the trivial pre-context
        self.__pre_context_begin_of_line_f = BeginOfLineF

        # The line/column count information can only be determined when the
        # line/column count database is present. Thus, it is delayed.
        self.__count_info = None

        # Ensure, that the pattern is never transformed twice
        self.__alarm_transformed_f = False

        self.__validate(Sr)
Ejemplo n.º 4
0
    def __init__(self, CoreSM, PreContextSM=None, PostContextSM=None, 
                 BeginOfLineF=False, EndOfLineF=False, Sr=SourceRef_VOID, 
                 PatternString="",
                 AllowNothingIsNecessaryF=False):
        assert PreContextSM is None or isinstance(PreContextSM, StateMachine)
        Pattern.check_initial(CoreSM, 
                              BeginOfLineF, PreContextSM, 
                              EndOfLineF, PostContextSM, 
                              Sr,
                              AllowNothingIsNecessaryF)

        self.__pattern_string = PatternString
        self.__sr             = Sr

        # (*) Setup the whole pattern
        self.__sm                         = CoreSM
        self.__post_context_sm            = PostContextSM
        self.__post_context_end_of_line_f = EndOfLineF
        assert self.__sm is not None

        # -- [optional] post contexts
        self.__post_context_f = (PostContextSM is not None)

        #    Backward input position detection requires an inversion of the 
        #    state machine. This can only be done after the (optional) codec
        #    transformation. Thus, a non-inverted version of the state machine
        #    is maintained until the transformation is done.
        self.__bipd_sm_to_be_inverted = None
        self.__bipd_sm                = None

        # -- [optional] pre contexts
        #
        #    Same as for backward input position detection holds for pre-contexts.
        self.__pre_context_sm_to_be_inverted = PreContextSM
        self.__pre_context_sm                = None

        # All state machines must be DFAs
        if not self.__sm.is_DFA_compliant(): 
            self.__sm  = beautifier.do(self.__sm)

        if         self.__pre_context_sm_to_be_inverted is not None \
           and not self.__pre_context_sm_to_be_inverted.is_DFA_compliant(): 
            self.__pre_context_sm_to_be_inverted = beautifier.do(self.__pre_context_sm_to_be_inverted)

        # Detect the trivial pre-context
        self.__pre_context_begin_of_line_f = BeginOfLineF
        
        # The line/column count information can only be determined when the 
        # line/column count database is present. Thus, it is delayed.
        self.__count_info = None

        # Ensure, that the pattern is never transformed twice
        self.__alarm_transformed_f = False

        self.__validate(Sr)
Ejemplo n.º 5
0
def more_DFAs(A, B):
    """RETURNS: [0] B+
                [1] B*
                [2] B*A
    """
    B_plus = repeat.do(B)
    B_star = repeat.do(B, min_repetition_n=0)
    B_star_A = beautifier.do(sequentialize.do([B_star, A]))
    return beautifier.do(B_plus), \
           beautifier.do(B_star), \
           B_star_A
Ejemplo n.º 6
0
    def mount_post_context_sm(self):
        self.__sm,     \
        self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, 
                                                              self.__post_context_sm, 
                                                              self.__post_context_end_of_line_f, 
                                                              self.__sr)

        if self.__bipd_sm_to_be_inverted is None: 
            return

        if         self.__bipd_sm_to_be_inverted is not None \
           and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): 
            self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted)

        self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
Ejemplo n.º 7
0
    def mount_post_context_sm(self):
        self.__sm,     \
        self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, 
                                                              self.__post_context_sm, 
                                                              self.__post_context_end_of_line_f, 
                                                              self.__sr)

        if self.__bipd_sm_to_be_inverted is None: 
            return

        if         self.__bipd_sm_to_be_inverted is not None \
           and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): 
            self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted)

        self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
Ejemplo n.º 8
0
def do_state_machine(SmIn):
    """Transforms a given state machine from 'Unicode Driven' to another
       character encoding type.
    
       RETURNS: 
       [0] Transformation complete (True->yes, False->not all transformed)
       [1] Transformed state machine. It may be the same as it was 
           before if there was no transformation actually.

       It is ensured that the result of this function is a DFA compliant
       state machine.
    """
    if SmIn is None: return True, None
    assert SmIn.is_DFA_compliant()

    # BEFORE: Forgive characters not in source range. What comes out is 
    #         important. It is checked in 'transform()' of the Pattern.
    complete_f, sm_out = Setup.buffer_codec.transform(SmIn)

    # AFTER: Whatever happend, the transitions in the state machine MUST
    #        lie in the drain_set.
    sm_out.assert_range(Setup.buffer_codec.drain_set)

    if sm_out.is_DFA_compliant(): return complete_f, sm_out
    else:                         return complete_f, beautifier.do(sm_out)
Ejemplo n.º 9
0
def __get_inverse_state_machine_that_finds_end_of_core_expression(
        PostConditionSM):
    """In case of a pseudo-ambiguous post condition one needs to go backwards
       in order to search for the end of the core condition. This function 
       creates the inverse state machine that is able to go backwards.

       NOTE: This is a special case, because one already knows that the state
       machine reaches the acceptance state sometime (this is where it actually
       started). That means, that in states other than acceptance states one
       can take out the 'drop out' triggers since they CANNOT occur. This
       enables some speed-up when going backwards.
    """
    result = beautifier.do(PostConditionSM.get_inverse())

    # -- delete 'drop-out' transitions in non-acceptance states
    #    NOTE: When going backwards one already knows that the acceptance
    #          state (the init state of the post condition) is reached, see above.
    # for state in result.states.values():
    #    # -- acceptance states can have 'drop-out' (actually, they need to have)
    #    if state.is_acceptance(): continue
    #
    #    state.transitions().replace_drop_out_target_states_with_adjacent_targets()
    #
    # result = nfa_to_dfa.do(result)
    # result = hopcroft.do(result)

    # Acceptance States need to be marked: Store input position.
    # NOTE: When tracing backwards the match is guaranteed, but there might
    #       still be some 'trail' in case of iterations that are not directly
    #       iterated to the ambiguous post condition. Thus drop out may
    #       happen and it must be clear where to put the input pointer in this case.

    return result
Ejemplo n.º 10
0
def do(sm):
    state_list = sm.states.items()
    for state_index, state in state_list:
        # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet)
        # which indicates what target state is reached via what number set.
        transition_list = state.transitions().get_map().items()
        # Clear the state's transitions, now. This way it can absorb new
        # transitions to intermediate states.
        state.transitions().clear()
        # Loop over all transitions
        for target_state_index, number_set in transition_list:
            # We take the intervals with 'PromiseToTreatWellF' even though they
            # are changed. This is because the intervals would be lost anyway
            # after the state split, so we use the same memory and do not 
            # cause a time consuming memory copy and constructor calls.
            interval_list = number_set.get_intervals(PromiseToTreatWellF=True)

            # 1st check whether a modification is necessary
            modification_required_f = False
            for interval in interval_list:
                if interval.begin >= 0x10000: modification_required_f = True; break

            if modification_required_f == False:
                sm.states[state_index].add_transition(number_set, target_state_index)
                continue

            # Now, intermediate states may be added
            for interval in interval_list:
                create_intermediate_states(sm, state_index, target_state_index, interval)
    
    result = beautifier.do(sm)
    return result
Ejemplo n.º 11
0
def philosophical_cut(core_sm, post_context_sm):
    """The 'philosophical cut' is a technique introduced by Frank-Rene Schaefer
       to produce a pair of a core- and a post-condition that otherwise would 
       be forward and backward ambiguous. The philosophical ground for this
       cut is 'greed', i.e. a core pattern should eat as much characters as
       it can. This idea is followed during the whole construction of the lexical
       analyzer. 
       
       For the case of total ambiguity 'x+/x+', this idea translates into leaving
       the iteration in the core condition and cutting the iteration in the post
       condition. Thus 'x+/x+' is transformed into 'x+/x' and can be solved by
       the technique for forward ambiguous post conditions.

       __dive -- indicator of recursion! replace by TreeWalker
    """
    core_acceptance_state_list = core_sm.get_acceptance_state_list()

    pcsm_init_state = post_context_sm.get_init_state()
    for csm_state in core_acceptance_state_list:
        __dive_to_cut_iteration(core_sm, csm_state, post_context_sm, pcsm_init_state,
                                SM1_Path=[post_context_sm.init_state_index])

    # By means of cutting, some states might have become bold. That is, they have
    # only an epsilon transition. Thus, it is required to do a transformation NFA->DFA
    # and add a hopcroft optimization.
    new_post_sm = beautifier.do(post_context_sm)
    return new_post_sm
Ejemplo n.º 12
0
    def __specify_comment(self, Sm, sr):
        _error_if_defined_before(self.result.sm_comment, sr)

        self.specifier_count_op_map.add(Sm.get_beginning_character_set(), 
                                   "begin(comment to newline)", None, sr)
        if not Sm.is_DFA_compliant(): Sm = beautifier.do(Sm)
        self.result.sm_comment.set(Sm, sr)
Ejemplo n.º 13
0
    def specify_suppressor(self, Sm, sr):
        _error_if_defined_before(self.sm_newline_suppressor, sr)

        self.count_command_map.add(Sm.get_beginning_character_set(), 
                                   "begin(newline suppressor)", None, sr)
        if not Sm.is_DFA_compliant(): Sm = beautifier.do(Sm)
        self.sm_newline_suppressor.set(Sm, sr)
Ejemplo n.º 14
0
def philosophical_cut(core_sm, post_context_sm):
    """The 'philosophical cut' is a technique introduced by Frank-Rene Schaefer
       to produce a pair of a core- and a post-condition that otherwise would 
       be forward and backward ambiguous. The philosophical ground for this
       cut is 'greed', i.e. a core pattern should eat as much characters as
       it can. This idea is followed during the whole construction of the lexical
       analyzer. 
       
       For the case of total ambiguity 'x+/x+', this idea translates into leaving
       the iteration in the core condition and cutting the iteration in the post
       condition. Thus 'x+/x+' is transformed into 'x+/x' and can be solved by
       the technique for forward ambiguous post conditions.

       __dive -- indicator of recursion! replace by TreeWalker
    """
    core_acceptance_state_list = core_sm.get_acceptance_state_list()

    pcsm_init_state = post_context_sm.get_init_state()
    for csm_state in core_acceptance_state_list:
        __dive_to_cut_iteration(core_sm,
                                csm_state,
                                post_context_sm,
                                pcsm_init_state,
                                SM1_Path=[post_context_sm.init_state_index])

    # By means of cutting, some states might have become bold. That is, they have
    # only an epsilon transition. Thus, it is required to do a transformation NFA->DFA
    # and add a hopcroft optimization.
    new_post_sm = beautifier.do(post_context_sm)
    return new_post_sm
Ejemplo n.º 15
0
def __get_inverse_state_machine_that_finds_end_of_core_expression(PostConditionSM):
    """In case of a pseudo-ambiguous post condition one needs to go backwards
       in order to search for the end of the core condition. This function 
       creates the inverse state machine that is able to go backwards.

       NOTE: This is a special case, because one already knows that the state
       machine reaches the acceptance state sometime (this is where it actually
       started). That means, that in states other than acceptance states one
       can take out the 'drop out' triggers since they CANNOT occur. This
       enables some speed-up when going backwards.
    """
    result = beautifier.do(PostConditionSM.get_inverse())

    # -- delete 'drop-out' transitions in non-acceptance states
    #    NOTE: When going backwards one already knows that the acceptance
    #          state (the init state of the post condition) is reached, see above.
    # for state in result.states.values():
    #    # -- acceptance states can have 'drop-out' (actually, they need to have)
    #    if state.is_acceptance(): continue
    #
    #    state.transitions().replace_drop_out_target_states_with_adjacent_targets()
    #
    # result = nfa_to_dfa.do(result)
    # result = hopcroft.do(result)

    # Acceptance States need to be marked: Store input position.
    # NOTE: When tracing backwards the match is guaranteed, but there might
    #       still be some 'trail' in case of iterations that are not directly
    #       iterated to the ambiguous post condition. Thus drop out may
    #       happen and it must be clear where to put the input pointer in this case.

    return result
Ejemplo n.º 16
0
def is_all(SM):
    """Pattern has only two states: the init state which is not 
    accepting, and the accepting state which transits to itself
    forever.
    """
    sm = beautifier.do(SM)
    # Init State:
    #   -- not an acceptance state
    #   -- has only one transition on 'all' to an acceptance state
    #
    if   len(sm.states) != 2:                 return False
    init_state = sm.get_init_state()
    if   init_state.is_acceptance():          return False
    tm = init_state.target_map.get_map()
    if   len(tm) != 1:                        return False
    target_index, trigger_set = tm.iteritems().next()
    if trigger_set.is_all() == False:         return False
    if target_index == sm.init_state_index:   return False

    # The Acceptance State:
    #   -- has only one transition on 'all' to itself.
    #
    target_state = sm.states[target_index]
    if not target_state.is_acceptance():      return False
    tm = target_state.target_map.get_map()
    if len(tm) != 1:                          return False
    
    target_index_2, trigger_set = tm.iteritems().next()
    if trigger_set.is_all() == False:         return False
    if target_index_2 != target_index:        return False
    return True
Ejemplo n.º 17
0
def is_all(SM):
    """Pattern has only two states: the init state which is not 
    accepting, and the accepting state which transits to itself
    forever.
    """
    sm = beautifier.do(SM)
    # Init State:
    #   -- not an acceptance state
    #   -- has only one transition on 'all' to an acceptance state
    #
    if len(sm.states) != 2: return False
    init_state = sm.get_init_state()
    if init_state.is_acceptance(): return False
    tm = init_state.target_map.get_map()
    if len(tm) != 1: return False
    target_index, trigger_set = tm.iteritems().next()
    if trigger_set.is_all() == False: return False
    if target_index == sm.init_state_index: return False

    # The Acceptance State:
    #   -- has only one transition on 'all' to itself.
    #
    target_state = sm.states[target_index]
    if not target_state.is_acceptance(): return False
    tm = target_state.target_map.get_map()
    if len(tm) != 1: return False

    target_index_2, trigger_set = tm.iteritems().next()
    if trigger_set.is_all() == False: return False
    if target_index_2 != target_index: return False
    return True
Ejemplo n.º 18
0
def snap_expression(stream, PatternDict):
    """expression:  term
                    term | expression
    """              
    __debug_entry("expression", stream)    
    # -- term
    result = snap_term(stream, PatternDict) 
    if result is None: 
        return __debug_exit(None, stream)

    # -- optional '|'
    if not check(stream, '|'): 
        return __debug_exit(result, stream)
    
    position_1 = stream.tell()
    __debug_print("'|' (in expression)")

    # -- expression
    result_2 = snap_expression(stream, PatternDict) 
    __debug_print("expression(in expression):",  result_2)
    if result_2 is None:
        stream.seek(position_1) 
        return __debug_exit(result, stream)

    result = parallelize.do([result, result_2], CloneF=True)   # CloneF = False (shold be!)
    return __debug_exit(beautifier.do(result), stream)
Ejemplo n.º 19
0
def snap_term(stream, PatternDict):
    """term:  primary
              primary term 
    """
    __debug_entry("term", stream)    

    # -- primary
    result = snap_primary(stream, PatternDict) 
    __debug_print("##primary(in term):", result)
    if result is None: return __debug_exit(None, stream)
    position_1 = stream.tell()

    # -- optional 'term' 
    result_2 = snap_term(stream, PatternDict) 
    __debug_print("##term(in term):",  result_2)
    if result_2 is None: 
        stream.seek(position_1)
        return __debug_exit(result, stream)
    
    ## print "##1:", result.get_string(NormalizeF=False)
    ## print "##2:", result_2.get_string(NormalizeF=False)
    result = sequentialize.do([result, result_2], 
                              MountToFirstStateMachineF=True, 
                              CloneRemainingStateMachinesF=False)    

    return __debug_exit(beautifier.do(result), stream)
Ejemplo n.º 20
0
def snap_expression(stream, PatternDict):
    """expression:  term
                    term | expression
    """
    __debug_entry("expression", stream)
    # -- term
    result = snap_term(stream, PatternDict)
    if result is None:
        return __debug_exit(None, stream)

    # -- optional '|'
    if not check(stream, '|'):
        return __debug_exit(result, stream)

    position_1 = stream.tell()
    __debug_print("'|' (in expression)")

    # -- expression
    result_2 = snap_expression(stream, PatternDict)
    __debug_print("expression(in expression):", result_2)
    if result_2 is None:
        stream.seek(position_1)
        return __debug_exit(result, stream)

    result = parallelize.do([result, result_2],
                            CloneF=True)  # CloneF = False (shold be!)
    return __debug_exit(beautifier.do(result), stream)
Ejemplo n.º 21
0
def test(TestString):
    print "-------------------------------------------------------------------"
    print "expression    = \"" + TestString + "\""
    pattern = core.do(TestString, {}).finalize(None)

    # During 'finalize()': pattern.transform(Setup.buffer_encoding)
    # During 'finalize()': pattern.mount_post_context_sm()
    # During 'finalize()': pattern.mount_pre_context_sm()
    print "pattern\n"
    assert pattern.sm.is_DFA_compliant()
    ok_f, sm = Setup.buffer_encoding.do_state_machine(pattern.sm)
    sm = beautifier.do(pattern.sm)
    print sm.get_string(NormalizeF=True, Option="hex")
    if pattern.sm_pre_context_to_be_reversed:
        assert pattern.sm_pre_context_to_be_reversed.is_DFA_compliant()
        ok_f, sm = Setup.buffer_encoding.do_state_machine(
            pattern.sm_pre_context_to_be_reversed)
        reversed_sm = reverse.do(sm)
        print "pre-context = ", reversed_sm.get_string(NormalizeF=True,
                                                       Option="hex")
    if pattern.sm_bipd_to_be_reversed:
        assert pattern.sm_bipd_to_be_reversed.is_DFA_compliant()
        ok_f, sm = Setup.buffer_encoding.do_state_machine(
            pattern.sm_bipd_to_be_reversed)
        sm = reverse.do(sm)
        print "post-context backward input position detector = ", sm.get_string(
            NormalizeF=True, Option="hex")
Ejemplo n.º 22
0
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF):
    """Sets up a pre-condition to the given state machine. This process
       is entirely different from any sequentializing or parallelization
       of state machines. Here, the state machine representing the pre-
       condition is **not** webbed into the original state machine!

       Instead, the following happens:

          -- the pre-condition state machine is inverted, because
             it is to be walked through backwards.
          -- the inverted state machine is marked with the state machine id
             of the_state_machine.        
          -- the original state machine will refer to the inverse
             state machine of the pre-condition.
          -- the initial state origins and the origins of the acceptance
             states are marked as 'pre-conditioned' indicating the id
             of the inverted state machine of the pre-condition.             
    """
    #___________________________________________________________________________________________
    # (*) do some consistency checking   
    # -- state machines with no states are senseless here. 
    assert not the_state_machine.is_empty() 
    assert pre_context_sm is None or not pre_context_sm.is_empty()
    # -- trivial pre-conditions should be added last, for simplicity

    #___________________________________________________________________________________________
    if pre_context_sm is None:
        # NOT: 'and ...' !
        if BeginOfLinePreContextF:
            # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag
            for state in the_state_machine.get_acceptance_state_list():
                state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE)
        return None

    # (*) Reverse the state machine of the pre-condition 
    reverse_pre_context = reverse.do(pre_context_sm)
        
    if BeginOfLinePreContextF:
        # Extend the existing pre-context with a preceeding 'begin-of-line'.
        reverse_newline_sm  = reverse.do(StateMachine_Newline())
        reverse_pre_context = sequentialize.do([reverse_pre_context, 
                                                reverse_newline_sm])

    # (*) Once an acceptance state is reached no further analysis is necessary.
    acceptance_pruning.do(reverse_pre_context)

    # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF')
    #     AFTER acceptance_pruning (!)
    reverse_pre_context = beautifier.do(reverse_pre_context)

    # (*) let the state machine refer to it 
    #     [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>]
    pre_context_sm_id = reverse_pre_context.get_id()

    # (*) Associate acceptance with pre-context id. 
    for state in the_state_machine.get_acceptance_state_list():
        state.set_pre_context_id(pre_context_sm_id)
    
    return reverse_pre_context
Ejemplo n.º 23
0
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF):
    """Sets up a pre-condition to the given state machine. This process
       is entirely different from any sequentializing or parallelization
       of state machines. Here, the state machine representing the pre-
       condition is **not** webbed into the original state machine!

       Instead, the following happens:

          -- the pre-condition state machine is inverted, because
             it is to be walked through backwards.
          -- the inverted state machine is marked with the state machine id
             of the_state_machine.        
          -- the original state machine will refer to the inverse
             state machine of the pre-condition.
          -- the initial state origins and the origins of the acceptance
             states are marked as 'pre-conditioned' indicating the id
             of the inverted state machine of the pre-condition.             
    """
    #___________________________________________________________________________________________
    # (*) do some consistency checking   
    # -- state machines with no states are senseless here. 
    assert not the_state_machine.is_empty() 
    assert pre_context_sm is None or not pre_context_sm.is_empty()
    # -- trivial pre-conditions should be added last, for simplicity

    #___________________________________________________________________________________________
    if pre_context_sm is  None:
        if BeginOfLinePreContextF:
            # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag
            for state in the_state_machine.get_acceptance_state_list():
                state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE)
        return None

    # (*) Reverse the state machine of the pre-condition 
    inverse_pre_context = reverse.do(pre_context_sm)
        
    if BeginOfLinePreContextF:
        # Extend the existing pre-context with a preceeding 'begin-of-line'.
        inverse_pre_context.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f, 
                                                               InverseF=True)

    # (*) Once an acceptance state is reached no further analysis is necessary.
    acceptance_pruning.do(inverse_pre_context)

    # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF')
    #     AFTER acceptance_pruning (!)
    inverse_pre_context = beautifier.do(inverse_pre_context)

    # (*) let the state machine refer to it 
    #     [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>]
    pre_context_sm_id = inverse_pre_context.get_id()

    # (*) create origin data, in case where there is none yet create new one.
    #     (do not delete, otherwise existing information gets lost)
    for state in the_state_machine.states.itervalues():
        if not state.is_acceptance(): continue
        state.set_pre_context_id(pre_context_sm_id)
    
    return inverse_pre_context
Ejemplo n.º 24
0
def do(SM_List):
    """The 'parallelize' module does a union of multiple state machines,
    even if they have different origins and need to be combined carefully.
    There is no reason, why another 'union' operation should be implemented
    in this case.
    """
    result = parallelize.do(SM_List)
    return beautifier.do(result)
Ejemplo n.º 25
0
def is_none(SM):
    """Does the given state machine represent a pattern which 
    matches absolutely nothing?
    """
    sm = beautifier.do(SM)
    if   len(sm.states) != 1:                 return False
    elif sm.get_init_state().is_acceptance(): return False
    else:                                     return True
Ejemplo n.º 26
0
def is_none(SM):
    """Does the given state machine represent a pattern which 
    matches absolutely nothing?
    """
    sm = beautifier.do(SM)
    if len(sm.states) != 1: return False
    elif sm.get_init_state().is_acceptance(): return False
    else: return True
Ejemplo n.º 27
0
def do(SM_List):
    """The 'parallelize' module does a union of multiple state machines,
    even if they have different origins and need to be combined carefully.
    There is no reason, why another 'union' operation should be implemented
    in this case.
    """
    result = parallelize.do(SM_List)
    return beautifier.do(result)
Ejemplo n.º 28
0
def __DFA(SM):
    if SM is None:
        return None
    elif SM.is_DFA_compliant():
        return SM

    result = beautifier.do(SM)
    return result
Ejemplo n.º 29
0
def equal(X_str, Y_str):
    global X
    global Y
    global report
    exec("sm0 = " + X_str.replace("All", "All_sm").replace("None", "None_sm"))
    exec("sm1 = " + Y_str.replace("All", "All_sm").replace("None", "None_sm"))
    sm0 = beautifier.do(sm0)
    sm1 = beautifier.do(sm1)
    result = identity.do(sm0, sm1)
    if result is False:
        print "X:", X
        # print "Y:", Y
        print "Error"
        print "%s: -->\n%s" % (X_str, sm0)
        print "%s: -->\n%s" % (Y_str, sm1)
        print "#---------------------------------------------------------"
    protocol.append((X_str, "==", Y_str, result))
Ejemplo n.º 30
0
def equal(X_str, Y_str):
    global X
    global Y
    global report
    exec("sm0 = " + X_str.replace("All", "All_sm").replace("None", "None_sm"))
    exec("sm1 = " + Y_str.replace("All", "All_sm").replace("None", "None_sm"))
    sm0 = beautifier.do(sm0)
    sm1 = beautifier.do(sm1)
    result = identity.do(sm0, sm1)
    if result is False:
        print "X:", X
        # print "Y:", Y
        print "Error"
        print "%s: -->\n%s" % (X_str, sm0)
        print "%s: -->\n%s" % (Y_str, sm1)
        print "#---------------------------------------------------------"
    protocol.append((X_str, "==", Y_str, result))
Ejemplo n.º 31
0
def _prepare_indentation_counter(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI):
    """Prepare indentation counter. An indentation counter is implemented by 
    the following:

    'newline' pattern --> triggers as soon as an UNSUPPRESSED newline occurs. 
                      --> entry to the INDENTATION COUNTER.

    'suppressed newline' --> INDENTATION COUNTER is NOT triggered.
     
    The supressed newline pattern is longer (and has precedence) over the
    newline pattern. With the suppressed newline it is possible to write
    lines which overstep the newline (see backslahs in Python, for example).

    RETURNS: List of:
             [0] newline PPT and
             [1] optionally the PPT of the newline suppressor.

    The primary pattern action pair list is to be the head of all pattern
    action pairs.

    MHI = Mode hierarchie index defining the priority of the current mode.
    """
    ISetup = OptionsDb.value("indentation")
    if ISetup is None: return [], []

    check_indentation_setup(ISetup)

    if ISetup.sm_newline_suppressor.get() is not None:
        sm_suppressed_newline = sequentialize.do([ISetup.sm_newline_suppressor.get(),
                                                  ISetup.sm_newline.get()])
        sm_suppressed_newline = beautifier.do(sm_suppressed_newline)
    else:
        sm_suppressed_newline = None

    data = { 
        "counter_db":                    CounterDb,
        "indentation_setup":             ISetup,
        "incidence_db":                  IncidenceDb,
        "default_indentation_handler_f": IncidenceDb.default_indentation_handler_f(),
        "mode_name":                     ModeName,
        "sm_suppressed_newline":         sm_suppressed_newline,
    }

    ppt_list = [
        # 'newline' triggers --> indentation counter
        PPT_indentation_handler_newline(MHI, data, ISetup, CounterDb)
    ]

    if sm_suppressed_newline is not None:
        ppt_list.append(
            # 'newline-suppressor' followed by 'newline' is ignored (skipped)
            PPT_indentation_handler_suppressed_newline(MHI, 
                                                       sm_suppressed_newline)
        )

    return [], ppt_list
Ejemplo n.º 32
0
def _prepare_indentation_counter(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI):
    """Prepare indentation counter. An indentation counter is implemented by 
    the following:

    'newline' pattern --> triggers as soon as an UNSUPPRESSED newline occurs. 
                      --> entry to the INDENTATION COUNTER.

    'suppressed newline' --> INDENTATION COUNTER is NOT triggered.
     
    The supressed newline pattern is longer (and has precedence) over the
    newline pattern. With the suppressed newline it is possible to write
    lines which overstep the newline (see backslahs in Python, for example).

    RETURNS: List of:
             [0] newline PPT and
             [1] optionally the PPT of the newline suppressor.

    The primary pattern action pair list is to be the head of all pattern
    action pairs.

    MHI = Mode hierarchie index defining the priority of the current mode.
    """
    ISetup = OptionsDb.value("indentation")
    if ISetup is None: return [], []

    check_indentation_setup(ISetup)

    if ISetup.sm_newline_suppressor.get() is not None:
        sm_suppressed_newline = sequentialize.do([ISetup.sm_newline_suppressor.get(),
                                                  ISetup.sm_newline.get()])
        sm_suppressed_newline = beautifier.do(sm_suppressed_newline)
    else:
        sm_suppressed_newline = None

    data = { 
        "counter_db":                    CounterDb,
        "indentation_setup":             ISetup,
        "incidence_db":                  IncidenceDb,
        "default_indentation_handler_f": IncidenceDb.default_indentation_handler_f(),
        "mode_name":                     ModeName,
        "sm_suppressed_newline":         sm_suppressed_newline,
    }

    ppt_list = [
        # 'newline' triggers --> indentation counter
        PPT_indentation_handler_newline(MHI, data, ISetup, CounterDb)
    ]

    if sm_suppressed_newline is not None:
        ppt_list.append(
            # 'newline-suppressor' followed by 'newline' is ignored (skipped)
            PPT_indentation_handler_suppressed_newline(MHI, 
                                                       sm_suppressed_newline)
        )

    return [], ppt_list
Ejemplo n.º 33
0
 def __core(SuperPattern, SubPattern):
     print("super = " + SuperPattern).replace("\n",
                                              "\\n").replace("\t", "\\t")
     print("sub   = " + SubPattern).replace("\n",
                                            "\\n").replace("\t", "\\t")
     super_p = regex.do(SuperPattern, {}).extract_sm()
     sub_p = regex.do(SubPattern, {}).extract_sm()
     result = difference.do(super_p, sub_p)
     print "result = ", beautifier.do(difference.do(
         super_p, sub_p))  # .get_string(NormalizeF=False)
Ejemplo n.º 34
0
def detect_backward(CoreStateMachine, PostConditionStateMachine):

    """A 'backward ambiguity' denotes the case where it cannot be clearly be
       determined how far to go back from the end of a post-condition. 
       
       NOTE: This does not mean that the post-condition is ambiguous. Many
       cases that are backward ambiguous can be handled by quex's normal
       post-condition handling.

       Examples:  x/x+   is backward ambiguous because in a stream
                         of 'x' one cannot determine with a pure
                         state machine where to stop. This case,
                         though can be handled by the normal post-
                         condition implementation.

                  x+/x+  is backward ambiguous and cannot be handled
                         by the normal implementation. In fact, this
                         specification does not allow any conclusions
                         about the users intend where to reset the 
                         input after match.
    """

    __assert_state_machines(CoreStateMachine, PostConditionStateMachine)

    my_post_context_sm = PostConditionStateMachine.clone()

    # (*) Create a modified version of the post condition, where the
    #     initial state is an acceptance state, and no other. This 
    #     allows the detector to trigger on 'iteration'.
    #
    # -- delete all acceptance states in the post condition
    # for state in my_post_context_sm.states.values():
    #   state.set_acceptance(False)
    # -- set the initial state as acceptance state
    # my_post_context_sm.get_init_state().set_acceptance(True)
    my_core_sm = beautifier.do(reverse.do(CoreStateMachine))

    tmp = deepcopy(PostConditionStateMachine) # no deeepcopy needed here, I guess <fschaef 11y11m01d>
    my_post_context_sm = beautifier.do(reverse.do(tmp))

    return detect_forward(my_post_context_sm, my_core_sm)
Ejemplo n.º 35
0
def detect_backward(CoreStateMachine, PostConditionStateMachine):

    """A 'backward ambiguity' denotes the case where it cannot be clearly be
       determined how far to go back from the end of a post-condition. 
       
       NOTE: This does not mean that the post-condition is ambiguous. Many
       cases that are backward ambiguous can be handled by quex's normal
       post-condition handling.

       Examples:  x/x+   is backward ambiguous because in a stream
                         of 'x' one cannot determine with a pure
                         state machine where to stop. This case,
                         though can be handled by the normal post-
                         condition implementation.

                  x+/x+  is backward ambiguous and cannot be handled
                         by the normal implementation. In fact, this
                         specification does not allow any conclusions
                         about the users intend where to reset the 
                         input after match.
    """

    __assert_state_machines(CoreStateMachine, PostConditionStateMachine)

    my_post_context_sm = PostConditionStateMachine.clone()

    # (*) Create a modified version of the post condition, where the
    #     initial state is an acceptance state, and no other. This 
    #     allows the detector to trigger on 'iteration'.
    #
    # -- delete all acceptance states in the post condition
    # for state in my_post_context_sm.states.values():
    #   state.set_acceptance(False)
    # -- set the initial state as acceptance state
    # my_post_context_sm.get_init_state().set_acceptance(True)
    my_core_sm = beautifier.do(reverse.do(CoreStateMachine))

    tmp = deepcopy(PostConditionStateMachine) # no deeepcopy needed here, I guess <fschaef 11y11m01d>
    my_post_context_sm = beautifier.do(reverse.do(tmp))

    return detect_forward(my_post_context_sm, my_core_sm)
Ejemplo n.º 36
0
def do(SM_A, SM_B):
    """Cut Begin:

    Let SM_A match the set of lexemes LA and SM_B match the set of lexemes LB.
    Then, the cut begin operation 'CutBegin'

                           SM_C = CutBegin(SM_A, SM_B)

    results in a state machine SM_C. The set of lexemes which it matches is
    given by 
                             .-
                             |   c(La) for all La in L(SM_A) where La
                             |         starts with one of L(SM_B).
                L(SM_C)  =  <          
                             |   La    for all other La from L(SM_A)
                             '-

    The cut operation 'c(La)' takes the elements Lb out of La that match SM_B.
    That is if La = [x0, x1, ... xi, xj, ... xN] and there is a Lb in L(SM_B)
    with Lb = [x0, x1, ... xi], then

                    c(La) = [xj, ... XN]
                           
    EXAMPLE 1: 

          NotBegin([0-9]+, [0-9]) = [0-9]{2,}

    That is where '[0-9]+' required at least one character in [0-9], the 
    cut version does not allow lexemes with one [0-9]. The result is a
    repetition of at least two characters in [0-9].

    EXAMPLE 2: 

          NotBegin(1(2?), 12) = 1

    Because the lexeme "12" is not to be matched by the result. The lexeme
    "1", though, does not start with "12". Thus, it remains.

    EXAMPLE 2: 

          NotBegin([a-z]+, print) = all identifiers except 'print'

    (C) 2013 Frank-Rene Schaefer
    """
    cutter = WalkAlong(SM_A, SM_B)
    cutter.do((SM_A.init_state_index, SM_B.init_state_index, None))

    # Delete orphaned and hopeless states in result
    cutter.result.clean_up()

    # Get propper state indices for result
    return beautifier.do(cutter.result)
Ejemplo n.º 37
0
def do(SM_A, SM_B):
    """Cut Begin:

    Let SM_A match the set of lexemes LA and SM_B match the set of lexemes LB.
    Then, the cut begin operation 'CutBegin'

                           SM_C = CutBegin(SM_A, SM_B)

    results in a state machine SM_C. The set of lexemes which it matches is
    given by 
                             .-
                             |   c(La) for all La in L(SM_A) where La
                             |         starts with one of L(SM_B).
                L(SM_C)  =  <          
                             |   La    for all other La from L(SM_A)
                             '-

    The cut operation 'c(La)' takes the elements Lb out of La that match SM_B.
    That is if La = [x0, x1, ... xi, xj, ... xN] and there is a Lb in L(SM_B)
    with Lb = [x0, x1, ... xi], then

                    c(La) = [xj, ... XN]
                           
    EXAMPLE 1: 

          NotBegin([0-9]+, [0-9]) = [0-9]{2,}

    That is where '[0-9]+' required at least one character in [0-9], the 
    cut version does not allow lexemes with one [0-9]. The result is a
    repetition of at least two characters in [0-9].

    EXAMPLE 2: 

          NotBegin(1(2?), 12) = 1

    Because the lexeme "12" is not to be matched by the result. The lexeme
    "1", though, does not start with "12". Thus, it remains.

    EXAMPLE 2: 

          NotBegin([a-z]+, print) = all identifiers except 'print'

    (C) 2013 Frank-Rene Schaefer
    """
    cutter = WalkAlong(SM_A, SM_B)
    cutter.do((SM_A.init_state_index, SM_B.init_state_index, None))

    # Delete orphaned and hopeless states in result
    cutter.result.clean_up()

    # Get propper state indices for result
    return beautifier.do(cutter.result)
Ejemplo n.º 38
0
def unary_checks(Q, operation):
    Q_plus = beautifier.do(repeat.do(Q))
    Q_star = beautifier.do(repeat.do(Q, min_repetition_n=0))

    Q_is_Q_star = identity.do(Q, Q_star)
    Q_is_Q_plus = identity.do(Q, Q_plus)

    # \Cut{Q Q} = \Nothing
    y = operation(Q, Q)
    assert y.is_Nothing()

    # if Q != Q+: \CutBegin{Q+ Q} = Q*
    if not Q_is_Q_plus:
        y = operation(Q_plus, Q)
        assert identity.do(y, Q_star)

    # if Q != Q*: \CutBegin{Q* Q} = Q*
    if not Q_is_Q_star:
        y = operation(Q_star, Q)
        assert identity.do(y, Q_star)

    # \Cut{Q \Nothing} = Q
    y = operation(Q, DFA.Nothing())
    assert identity.do(y, Q)

    # \Cut{\Nothing Q} = \Nothing
    y = operation(DFA.Nothing(), Q)
    assert y.is_Nothing()

    # \Cut{Q \Universal} = \Nothing
    y = operation(Q, DFA.Universal())
    assert y.is_Nothing()

    # NOT: \Cut{\Universal Q} = \Universal
    if not Q_is_Q_star and not Q_is_Q_plus:
        y = operation(Q, DFA.Universal())
        assert y.is_Nothing()

    return Q_star, Q_plus
Ejemplo n.º 39
0
    def __specify_newline(self, Sm, sr):
        _error_if_defined_before(self.result.sm_newline, sr)

        beginning_char_set = Sm.get_beginning_character_set()
        ending_char_set    = Sm.get_ending_character_set()

        self.specifier_count_op_map.add(beginning_char_set, "begin(newline)", None, sr)

        # Do not consider a character from newline twice
        ending_char_set.subtract(beginning_char_set)
        if not ending_char_set.is_empty():
            self.specifier_count_op_map.add(ending_char_set, "end(newline)", None, sr)

        if not Sm.is_DFA_compliant(): Sm = beautifier.do(Sm)
        self.result.sm_newline.set(Sm, sr)
Ejemplo n.º 40
0
def _finalize_mount_post_context_sm(Sm, SmPostContext, PostEOL_f, PostEOS_f, Sr):
    # In case of a 'trailing post context' a 'bipd_sm' may be provided
    # to detect the input position after match in backward direction.
    # BIPD = backward input position detection.
    sm,                    \
    bipd_sm_to_be_reversed = setup_post_context.do(Sm, SmPostContext, 
                                                   PostEOL_f, PostEOS_f, Sr)

    if bipd_sm_to_be_reversed is None: 
        return sm, None

    elif not bipd_sm_to_be_reversed.is_DFA_compliant(): 
        bipd_sm_to_be_reversed = beautifier.do(bipd_sm_to_be_reversed)

    return sm, bipd_sm_to_be_reversed
Ejemplo n.º 41
0
def do(SM_A, SM_B):
    """\NotIn{P Q} = \NotBegin{P \Any*(Q+)}
    """
    all_star = repeat.do(special.get_any(), min_repetition_n=0)
    sm_b_repeated = repeat.do(SM_B, min_repetition_n=1)

    tmp = sequentialize.do([all_star, sm_b_repeated],
                           MountToFirstStateMachineF=True,
                           CloneRemainingStateMachinesF=True)

    tmp = beautifier.do(tmp)

    # There might be many paths which have no hope to reach acceptance
    tmp.clean_up()

    return complement_begin.do(SM_A, tmp)
Ejemplo n.º 42
0
def do(SM_A, SM_B):
    """\NotIn{P Q} = \NotBegin{P \Any*(Q+)}
    """
    all_star      = repeat.do(special.get_any(), min_repetition_n=0)
    sm_b_repeated = repeat.do(SM_B, min_repetition_n=1)

    tmp = sequentialize.do([all_star, sm_b_repeated], 
                           MountToFirstStateMachineF=True, 
                           CloneRemainingStateMachinesF=True)

    tmp = beautifier.do(tmp)

    # There might be many paths which have no hope to reach acceptance
    tmp.clean_up()

    return complement_begin.do(SM_A, tmp)
Ejemplo n.º 43
0
def __implement_epsilon_transitions(result, A, epsilon_transition_set):
    """RETURNS: [0] The resulting state machine, if a 'cut' has happened.
                    The original state machine if no 'cut' has happened.
                [1] True, if a cut has happened, False else.
    """
    if not epsilon_transition_set:
        return A, False
    else:
        for from_si, to_si, acceptance_f in epsilon_transition_set:
            if from_si == result.init_state_index:
                result.add_epsilon_transition(from_si, to_si)
            else:
                result.add_epsilon_transition(from_si,
                                              to_si,
                                              RaiseAcceptanceF=acceptance_f)
        result.delete_hopeless_states()
        return beautifier.do(result), True
Ejemplo n.º 44
0
def test(Idx, sm_pre, sm, sm_post, BOL_F, EOL_F):
    ## if Idx != 5: return
    result = sm.clone()
    print "##-- %i -----------------------------------------------------------------------" % Idx

    if sm_pre is not None: print " -- pre-condition  = True"
    else: print " -- pre-condition  = False"

    if sm_post is not None: print " -- post-condition = True"
    else: print " -- post-condition = False"
    print " -- begin of line  = ", BOL_F
    print " -- end of line    = ", EOL_F

    ipsb_sm = setup_post_context.do(result, sm_post, EOL_F, False,
                                    SourceRef_VOID)
    pre_context_sm = setup_pre_context.do(result, sm_pre, BOL_F, False)
    if pre_context_sm is None:
        inverse_pre_context_sm = None
    else:
        inverse_pre_context_sm = reverse.do(pre_context_sm)
        inverse_pre_context_sm.set_id(pre_context_sm.get_id())
    #
    # print "EXPRESSION = ", result
    # print "POST CONDITION = ", post_sm
    # print "APPENDED = ", result
    result = beautifier.do(result)
    #
    #
    print
    print "result sm.id     = ", result.get_id()
    if inverse_pre_context_sm is not None:
        print "result pre sm.id = ", inverse_pre_context_sm.get_id()

    begin_of_line_f = None
    for state in result.get_acceptance_state_list():
        BOF = state.single_entry.has_pre_context_begin_of_line()
        if begin_of_line_f is None: begin_of_line_f = BOF
        else: assert begin_of_line_f == BOF

    print "result = ", result
    if inverse_pre_context_sm is not None:
        print "inverse_pre_context_sm = ", inverse_pre_context_sm
    print "trivially pre-conditioned = ", begin_of_line_f
Ejemplo n.º 45
0
def do_state_machine(X):
    """Transforms a given state machine from 'Unicode Driven' to another
       character encoding type.
    
       RETURNS: 
       [0] Transformation complete (True->yes, False->not all transformed)
       [1] Transformed state machine. It may be the same as it was 
           before if there was no transformation actually.

       It is ensured that the result of this function is a DFA compliant
       state machine.
    """
    if X is None: return True, None
    assert X.is_DFA_compliant()

    complete_f, sm = Setup.buffer_codec.transform(X)

    if sm.is_DFA_compliant(): return complete_f, sm
    else: return complete_f, beautifier.do(sm)
Ejemplo n.º 46
0
def do_state_machine(X):
    """Transforms a given state machine from 'Unicode Driven' to another
       character encoding type.
    
       RETURNS: 
       [0] Transformation complete (True->yes, False->not all transformed)
       [1] Transformed state machine. It may be the same as it was 
           before if there was no transformation actually.

       It is ensured that the result of this function is a DFA compliant
       state machine.
    """
    if X is None: return True, None
    assert X.is_DFA_compliant()

    complete_f, sm = Setup.buffer_codec.transform(X)

    if sm.is_DFA_compliant(): return complete_f, sm
    else:                     return complete_f, beautifier.do(sm)
Ejemplo n.º 47
0
def do(SM_A, SM_B):
    """Complement Begin:

    Let SM_A match the set of lexemes LA and SM_B match the set of lexemes LB.
    Then, the complement begin operation 'NotBegin'

                           SM_C = NotBegin(SM_A, SM_B)

    results in a state machine SM_C, matches all lexemes of LA except for those
    that start with a lexeme from LB.

    EXAMPLE 1: 

          NotBegin([0-9]+, [0-9]) = \None

    EXAMPLE 2: 

          NotBegin(1(2?), 12) = 1

    Because the lexeme "12" is not to be matched by the result. The lexeme
    "1", though, does not start with "12". Thus, it remains.

    EXAMPLE 2: 

          NotBegin([a-z]+, print) = all identifiers except 'print'

    (C) 2013 Frank-Rene Schaefer
    """
    cutter = WalkAlong(SM_A, SM_B)
    if SM_B.get_init_state().is_acceptance():
        return special.get_none()

    cutter.do((SM_A.init_state_index, SM_B.init_state_index))

    # Delete orphaned and hopeless states in result
    cutter.result.clean_up()

    # Get propper state indices for result
    return beautifier.do(cutter.result)
Ejemplo n.º 48
0
def do(sm):
    """The UTF8 encoding causes a single unicode character code being translated
       into a sequence of bytes. A state machine triggering on unicode characters
       can be converted into a state machine triggering on UTF8 bytes.

       For this a simple transition on a character 'X':

            [ 1 ]---( X )--->[ 2 ]

       needs to be translated into a sequence of state transitions

            [ 1 ]---(x0)--->[ S0 ]---(x1)--->[ S1 ]---(x2)--->[ 2 ]

       where, x0, x1, x2 are the UTF8 bytes that represent unicode 'X'. 
       States S0 and S1 are intermediate states created only so that
       x1, x2, and x3 can trigger. Note, that the UTF8 sequence ends
       at the same state '2' as the previous single trigger 'X'.
    """
    state_list = sm.states.items()
    for state_index, state in state_list:
        # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet)
        # which indicates what target state is reached via what number set.
        transition_list = state.target_map.get_map().items()
        # Clear the state's transitions, now. This way it can absorb new
        # transitions to intermediate states.
        state.target_map.clear()
        # Loop over all transitions
        for target_state_index, number_set in transition_list:
            # We take the intervals with 'PromiseToTreatWellF' even though they
            # are changed. This is because the intervals would be lost anyway
            # after the state split, so we use the same memory and do not 
            # cause a time consuming memory copy and constructor calls.
            for interval in number_set.get_intervals(PromiseToTreatWellF=True):
                create_intermediate_states(sm, state_index, target_state_index, interval)

    return beautifier.do(sm)
Ejemplo n.º 49
0
def do(sm):
    global ForbiddenRange

    state_list = sm.states.items()
    for state_index, state in state_list:
        # Get the 'transition_list', i.e. a list of pairs (TargetState, NumberSet)
        # which indicates what target state is reached via what number set.
        transition_list = state.target_map.get_map().items()
        # Clear the state's transitions, now. This way it can absorb new
        # transitions to intermediate states.
        state.target_map.clear()
        # Loop over all transitions
        for target_state_index, number_set in transition_list:
            # -- 1st check whether a modification is necessary
            if number_set.supremum() <= 0x10000:
                sm.states[state_index].add_transition(number_set, target_state_index)
                continue

            # -- We help: General regular expressions may not bother with 
            #    the 'ForbiddenRange'. Let us be so kind and cut it here.
            number_set.subtract(ForbiddenRange)
            number_set.cut_lesser(0)
            number_set.cut_greater_or_equal(0x110000)

            # -- Add intermediate States
            #    We take the intervals with 'PromiseToTreatWellF' even though they
            #    are changed. This is because the intervals would be lost anyway
            #    after the state split, so we use the same memory and do not 
            #    cause a time consuming memory copy and constructor calls.
            interval_list = number_set.get_intervals(PromiseToTreatWellF=True)

            for interval in interval_list:
                create_intermediate_states(sm, state_index, target_state_index, interval)
    
    result = beautifier.do(sm)
    return result
Ejemplo n.º 50
0
def StateMachine_Newline():
    """Creates a state machine matching newline according to what has been 
    specified in the setup (Setup.dos_carriage_return_newline_f). 

    That is, if is DOS newline then the state machine represents '\r\n' and
    if it is unix only, then it represents '\n'. If both is required they 
    are implemented in parallel.

    RETURNS: StateMachine
    """
    UnixF = True
    DosF  = Setup.dos_carriage_return_newline_f

    NL = ord('\n')  # (pure) newline, i.e. line feed
    CR = ord('\r')  # carriage return

    sm = StateMachine()
    if UnixF:
        sm.add_transition(sm.init_state_index, NL, AcceptanceF=True)
    if DosF:
        idx = sm.add_transition(sm.init_state_index, CR, AcceptanceF=False)
        sm.add_transition(idx, NL, AcceptanceF=True)

    return beautifier.do(sm)
Ejemplo n.º 51
0
def test(Idx, sm_pre, sm, sm_post, BOF_F, EOF_F):    
    result = sm.clone()
    print "##-- %i -----------------------------------------------------------------------" % Idx

    if sm_pre is not None: print " -- pre-condition  = True"
    else:                  print " -- pre-condition  = False"
        
    if sm_post is not None: print " -- post-condition = True"
    else:                   print " -- post-condition = False"
    print " -- begin of line  = ", BOF_F
    print " -- end of line    = ", EOF_F

    ipsb_sm                = setup_post_context.do(result, sm_post, EOF_F, SourceRef_VOID)
    inverse_pre_context_sm = setup_pre_context.do(result, sm_pre, BOF_F)
    #
    # print "EXPRESSION = ", result
    # print "POST CONDITION = ", post_sm
    # print "APPENDED = ", result
    result = beautifier.do(result)
    #
    #
    print
    print "result sm.id     = ", result.get_id()
    if inverse_pre_context_sm is not None:
        print "result pre sm.id = ", inverse_pre_context_sm.get_id()

    begin_of_line_f = None
    for state in result.get_acceptance_state_list():
        BOF = state.single_entry.has_begin_of_line_pre_context()
        if begin_of_line_f is None: begin_of_line_f = BOF
        else:                       assert begin_of_line_f == BOF

    print "result = ", result
    if inverse_pre_context_sm is not None:
        print "inverse_pre_context_sm = ", inverse_pre_context_sm
    print "trivially pre-conditioned = ", begin_of_line_f
Ejemplo n.º 52
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), 
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function, comment = { 
                "skip_range":        (skip_range.do,        E_SpecialPatterns.SKIP_RANGE),
                "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            new_mode.add_match(suppressed_newline_pattern_str, code, 
                               get_pattern_object(suppressed_newline_sm),
                               Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(), action, 
                           get_pattern_object(sm), 
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True