Esempio n. 1
0
def do(SM_A, SM_B):
    """Find a state machine that stops right before the state machine 'SM_B'.
    If there is a lexeme 'l' (lowercase L) in SM_A:

                       l = [x0, x1, ... xj, xk, ... xN ]

    and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall
    only match 
                       '[x0, x1, ... xj]'. 
                       
    All lexemes 'l' translate into lexemes 's' in reverse(SM_A):

                       s = [xN, ... xk, xj, ... x1, x0 ]

    lexemes in SM_B translate into reverse(SM_B) as

                       t = [xN, ... xk]

    The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers

                       u = [ xj, ... x1, x0 ]

    Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers

                       u = [ x0, x1, ... xj ]

    as desired for all lexemes in SM_A that end with something that 
    matches SM_B.
                       
    (C) Frank-Rene Schaefer
    """
    Ar = beautifier.do(reverse.do(SM_A))
    Br = beautifier.do(reverse.do(SM_B))
    cut_Ar_Br = complement_begin.do(Ar, Br)
    return reverse.do(cut_Ar_Br)
Esempio n. 2
0
def do(SM_A, SM_B):
    """Find a state machine that stops right before the state machine 'SM_B'.
    If there is a lexeme 'l' (lowercase L) in SM_A:

                       l = [x0, x1, ... xj, xk, ... xN ]

    and '[xk ... xN]' is a lexeme from L(SM_B). The 'rcut(SM_A, SM_B)' shall
    only match 
                       '[x0, x1, ... xj]'. 
                       
    All lexemes 'l' translate into lexemes 's' in reverse(SM_A):

                       s = [xN, ... xk, xj, ... x1, x0 ]

    lexemes in SM_B translate into reverse(SM_B) as

                       t = [xN, ... xk]

    The 'cut' operation cut(reverse(SM_A), reverse(SM_B)) delivers

                       u = [ xj, ... x1, x0 ]

    Then, the 'reverse(cut(reverse(SM_A), reverse(SM_B)))' delivers

                       u = [ x0, x1, ... xj ]

    as desired for all lexemes in SM_A that end with something that 
    matches SM_B.
                       
    (C) Frank-Rene Schaefer
    """
    Ar        = beautifier.do(reverse.do(SM_A))
    Br        = beautifier.do(reverse.do(SM_B))
    cut_Ar_Br = complement_begin.do(Ar, Br)
    return reverse.do(cut_Ar_Br)
Esempio n. 3
0
def test(TestString):
    print "-------------------------------------------------------------------"
    print "expression    = \"" + TestString + "\""
    pattern = core.do(TestString, {}).finalize(None)

    # During 'finalize()': pattern.transform(Setup.buffer_encoding)
    # During 'finalize()': pattern.mount_post_context_sm()
    # During 'finalize()': pattern.mount_pre_context_sm()
    print "pattern\n"
    assert pattern.sm.is_DFA_compliant()
    ok_f, sm = Setup.buffer_encoding.do_state_machine(pattern.sm)
    sm = beautifier.do(pattern.sm)
    print sm.get_string(NormalizeF=True, Option="hex")
    if pattern.sm_pre_context_to_be_reversed:
        assert pattern.sm_pre_context_to_be_reversed.is_DFA_compliant()
        ok_f, sm = Setup.buffer_encoding.do_state_machine(
            pattern.sm_pre_context_to_be_reversed)
        reversed_sm = reverse.do(sm)
        print "pre-context = ", reversed_sm.get_string(NormalizeF=True,
                                                       Option="hex")
    if pattern.sm_bipd_to_be_reversed:
        assert pattern.sm_bipd_to_be_reversed.is_DFA_compliant()
        ok_f, sm = Setup.buffer_encoding.do_state_machine(
            pattern.sm_bipd_to_be_reversed)
        sm = reverse.do(sm)
        print "post-context backward input position detector = ", sm.get_string(
            NormalizeF=True, Option="hex")
Esempio n. 4
0
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF):
    """Sets up a pre-condition to the given state machine. This process
       is entirely different from any sequentializing or parallelization
       of state machines. Here, the state machine representing the pre-
       condition is **not** webbed into the original state machine!

       Instead, the following happens:

          -- the pre-condition state machine is inverted, because
             it is to be walked through backwards.
          -- the inverted state machine is marked with the state machine id
             of the_state_machine.        
          -- the original state machine will refer to the inverse
             state machine of the pre-condition.
          -- the initial state origins and the origins of the acceptance
             states are marked as 'pre-conditioned' indicating the id
             of the inverted state machine of the pre-condition.             
    """
    #___________________________________________________________________________________________
    # (*) do some consistency checking   
    # -- state machines with no states are senseless here. 
    assert not the_state_machine.is_empty() 
    assert pre_context_sm is None or not pre_context_sm.is_empty()
    # -- trivial pre-conditions should be added last, for simplicity

    #___________________________________________________________________________________________
    if pre_context_sm is None:
        # NOT: 'and ...' !
        if BeginOfLinePreContextF:
            # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag
            for state in the_state_machine.get_acceptance_state_list():
                state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE)
        return None

    # (*) Reverse the state machine of the pre-condition 
    reverse_pre_context = reverse.do(pre_context_sm)
        
    if BeginOfLinePreContextF:
        # Extend the existing pre-context with a preceeding 'begin-of-line'.
        reverse_newline_sm  = reverse.do(StateMachine_Newline())
        reverse_pre_context = sequentialize.do([reverse_pre_context, 
                                                reverse_newline_sm])

    # (*) Once an acceptance state is reached no further analysis is necessary.
    acceptance_pruning.do(reverse_pre_context)

    # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF')
    #     AFTER acceptance_pruning (!)
    reverse_pre_context = beautifier.do(reverse_pre_context)

    # (*) let the state machine refer to it 
    #     [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>]
    pre_context_sm_id = reverse_pre_context.get_id()

    # (*) Associate acceptance with pre-context id. 
    for state in the_state_machine.get_acceptance_state_list():
        state.set_pre_context_id(pre_context_sm_id)
    
    return reverse_pre_context
Esempio n. 5
0
def test(TestString):
    print "-------------------------------------------------------------------"
    print "expression    = \"" + TestString + "\""
    pattern = core.do(TestString, {}).finalize(None)
    print "pattern\n"
    print pattern.sm
    print "pre-context = ", reverse.do(pattern.sm_pre_context_to_be_reversed)
Esempio n. 6
0
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF):
    """Sets up a pre-condition to the given state machine. This process
       is entirely different from any sequentializing or parallelization
       of state machines. Here, the state machine representing the pre-
       condition is **not** webbed into the original state machine!

       Instead, the following happens:

          -- the pre-condition state machine is inverted, because
             it is to be walked through backwards.
          -- the inverted state machine is marked with the state machine id
             of the_state_machine.        
          -- the original state machine will refer to the inverse
             state machine of the pre-condition.
          -- the initial state origins and the origins of the acceptance
             states are marked as 'pre-conditioned' indicating the id
             of the inverted state machine of the pre-condition.             
    """
    #___________________________________________________________________________________________
    # (*) do some consistency checking
    # -- state machines with no states are senseless here.
    assert not the_state_machine.is_empty()
    assert pre_context_sm is None or not pre_context_sm.is_empty()
    # -- trivial pre-conditions should be added last, for simplicity

    #___________________________________________________________________________________________
    if pre_context_sm is None:
        if BeginOfLinePreContextF:
            # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag
            for state in the_state_machine.get_acceptance_state_list():
                state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE)
        return None

    # (*) Reverse the state machine of the pre-condition
    inverse_pre_context = reverse.do(pre_context_sm)

    if BeginOfLinePreContextF:
        # Extend the existing pre-context with a preceeding 'begin-of-line'.
        inverse_pre_context.mount_newline_to_acceptance_states(
            Setup.dos_carriage_return_newline_f, InverseF=True)

    # (*) Once an acceptance state is reached no further analysis is necessary.
    acceptance_pruning.do(inverse_pre_context)

    # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF')
    #     AFTER acceptance_pruning (!)
    inverse_pre_context = beautifier.do(inverse_pre_context)

    # (*) let the state machine refer to it
    #     [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>]
    pre_context_sm_id = inverse_pre_context.get_id()

    # (*) create origin data, in case where there is none yet create new one.
    #     (do not delete, otherwise existing information gets lost)
    for state in the_state_machine.states.itervalues():
        if not state.is_acceptance(): continue
        state.set_pre_context_id(pre_context_sm_id)

    return inverse_pre_context
Esempio n. 7
0
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF):
    """Sets up a pre-condition to the given state machine. This process
       is entirely different from any sequentializing or parallelization
       of state machines. Here, the state machine representing the pre-
       condition is **not** webbed into the original state machine!

       Instead, the following happens:

          -- the pre-condition state machine is inverted, because
             it is to be walked through backwards.
          -- the inverted state machine is marked with the state machine id
             of the_state_machine.        
          -- the original state machine will refer to the inverse
             state machine of the pre-condition.
          -- the initial state origins and the origins of the acceptance
             states are marked as 'pre-conditioned' indicating the id
             of the inverted state machine of the pre-condition.             
    """
    #___________________________________________________________________________________________
    # (*) do some consistency checking   
    # -- state machines with no states are senseless here. 
    assert not the_state_machine.is_empty() 
    assert pre_context_sm is None or not pre_context_sm.is_empty()
    # -- trivial pre-conditions should be added last, for simplicity

    #___________________________________________________________________________________________
    if pre_context_sm is  None:
        if BeginOfLinePreContextF:
            # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag
            for state in the_state_machine.get_acceptance_state_list():
                state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE)
        return None

    # (*) Reverse the state machine of the pre-condition 
    inverse_pre_context = reverse.do(pre_context_sm)
        
    if BeginOfLinePreContextF:
        # Extend the existing pre-context with a preceeding 'begin-of-line'.
        inverse_pre_context.mount_newline_to_acceptance_states(Setup.dos_carriage_return_newline_f, 
                                                               InverseF=True)

    # (*) Once an acceptance state is reached no further analysis is necessary.
    acceptance_pruning.do(inverse_pre_context)

    # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF')
    #     AFTER acceptance_pruning (!)
    inverse_pre_context = beautifier.do(inverse_pre_context)

    # (*) let the state machine refer to it 
    #     [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>]
    pre_context_sm_id = inverse_pre_context.get_id()

    # (*) create origin data, in case where there is none yet create new one.
    #     (do not delete, otherwise existing information gets lost)
    for state in the_state_machine.states.itervalues():
        if not state.is_acceptance(): continue
        state.set_pre_context_id(pre_context_sm_id)
    
    return inverse_pre_context
Esempio n. 8
0
def cut_end(A, B):
    """Cut End:

    Any lexeme that matches 'A' and ends with a lexeme matching 'B' is 
    pruned by what matches 'B'.

    SCHEME:   'CutEnd yyyyyyyyyy'

              lexemes before               lexemes after
              aaaaaaaaaaaxxxxxxxxxx  --->  aaaaaaaaaaaxxxxxxxxxx
              bbbbbbbbbbbyyyyyyyyyy        bbbbbbbbbbb
    """
    Ar = reverse.do(A)
    Br = reverse.do(B)
    cut_Ar_Br, cut_f = __cut_begin_core(Ar, Br)
    if not cut_f: return A
    else: return reverse.do(cut_Ar_Br)
Esempio n. 9
0
def test(sm, pre_sm):
    print "EXPRESSION = ", sm
    print "PRE-CONDITION = ", pre_sm
    pre_context_sm = setup_pre_context.do(sm, pre_sm, False, False)
    inverse_pre_context_sm = reverse.do(pre_context_sm)
    inverse_pre_context_sm.set_id(pre_context_sm.get_id())
    #
    print "with pre-context = ", sm
    print "inverse pre-context = ", inverse_pre_context_sm
Esempio n. 10
0
    def do(self, Option="utf8"):
        """Prepare output in the 'dot' language, that graphviz uses."""
        assert Option in ["utf8", "hex"]

        self.__do(combination.do(self.mode.core_sm_list), self.file_name_main,
                  Option)

        if self.mode.pre_context_sm_to_be_reversed_list:
            sm_list = [
                reverse.do(sm)
                for sm in self.mode.pre_context_sm_to_be_reversed_list
            ]
            sm = combination.do(sm_list, FilterDominatedOriginsF=False)
            self.__do(sm, self.file_name_pre_context, Option)

        if len(self.mode.bipd_sm_to_be_reversed_db) != 0:
            for sm in self.mode.bipd_sm_to_be_reversed_db.itervalues():
                file_name = self.file_name_bipd_db[sm.get_id()]
                reversed_sm = reverse.do(sm)
                self.__do(reversed_sm, file_name, Option)
Esempio n. 11
0
def detect_backward(CoreStateMachine, PostConditionStateMachine):

    """A 'backward ambiguity' denotes the case where it cannot be clearly be
       determined how far to go back from the end of a post-condition. 
       
       NOTE: This does not mean that the post-condition is ambiguous. Many
       cases that are backward ambiguous can be handled by quex's normal
       post-condition handling.

       Examples:  x/x+   is backward ambiguous because in a stream
                         of 'x' one cannot determine with a pure
                         state machine where to stop. This case,
                         though can be handled by the normal post-
                         condition implementation.

                  x+/x+  is backward ambiguous and cannot be handled
                         by the normal implementation. In fact, this
                         specification does not allow any conclusions
                         about the users intend where to reset the 
                         input after match.
    """

    __assert_state_machines(CoreStateMachine, PostConditionStateMachine)

    my_post_context_sm = PostConditionStateMachine.clone()

    # (*) Create a modified version of the post condition, where the
    #     initial state is an acceptance state, and no other. This 
    #     allows the detector to trigger on 'iteration'.
    #
    # -- delete all acceptance states in the post condition
    # for state in my_post_context_sm.states.values():
    #   state.set_acceptance(False)
    # -- set the initial state as acceptance state
    # my_post_context_sm.get_init_state().set_acceptance(True)
    my_core_sm = beautifier.do(reverse.do(CoreStateMachine))

    tmp = deepcopy(PostConditionStateMachine) # no deeepcopy needed here, I guess <fschaef 11y11m01d>
    my_post_context_sm = beautifier.do(reverse.do(tmp))

    return detect_forward(my_post_context_sm, my_core_sm)
Esempio n. 12
0
def detect_backward(CoreStateMachine, PostConditionStateMachine):
    """A 'backward ambiguity' denotes the case where it cannot be clearly be
       determined how far to go back from the end of a post-condition. 
       
       NOTE: This does not mean that the post-condition is ambiguous. Many
       cases that are backward ambiguous can be handled by quex's normal
       post-condition handling.

       Examples:  x/x+   is backward ambiguous because in a stream
                         of 'x' one cannot determine with a pure
                         state machine where to stop. This case,
                         though can be handled by the normal post-
                         condition implementation.

                  x+/x+  is backward ambiguous and cannot be handled
                         by the normal implementation. In fact, this
                         specification does not allow any conclusions
                         about the users intend where to reset the 
                         input after match.
    """

    __assert_state_machines(CoreStateMachine, PostConditionStateMachine)

    my_post_context_sm = PostConditionStateMachine.clone()

    # (*) Create a modified version of the post condition, where the
    #     initial state is an acceptance state, and no other. This
    #     allows the detector to trigger on 'iteration'.
    #
    # -- delete all acceptance states in the post condition
    # for state in my_post_context_sm.states.values():
    #   state.set_acceptance(False)
    # -- set the initial state as acceptance state
    # my_post_context_sm.get_init_state().set_acceptance(True)
    my_core_sm = reverse.do(CoreStateMachine)

    tmp = deepcopy(PostConditionStateMachine
                   )  # no deeepcopy needed here, I guess <fschaef 11y11m01d>
    my_post_context_sm = reverse.do(tmp)

    return detect_forward(my_post_context_sm, my_core_sm)
Esempio n. 13
0
    def treat(sm, ReverseF):
        backup_id = sm.get_id()
        ok_f, sm = Setup.buffer_encoding.do_state_machine(sm)
        if not ok_f:
            error.warning("Pattern contains elements not found in engine codec '%s'.\n" % Setup.buffer_encoding.name \
                          + "(Buffer element size is %s [byte])" % Setup.lexatom.size_in_byte,
                          sm.sr)

        if ReverseF:
            sm = reverse.do(sm, EnsureDFA_f=True)
        sm.set_id(backup_id)
        return sm
Esempio n. 14
0
def test_core(TestString):
    print "___________________________________________________________________________"
    print "expression    = \"" + TestString + "\""
    pattern = core.do(TestString, {}, AllowNothingIsNecessaryF=True).finalize(None)
    if pattern is None: 
        print "pattern syntax error"
    else:
        print "pattern\n", 
        print pattern.sm
        if pattern.sm_pre_context_to_be_reversed:
            reversed_sm = reverse.do(pattern.sm_pre_context_to_be_reversed)
            print "pre-context = ", reversed_sm
        print "begin of line = ", pattern.sm.has_acceptance_condition(E_AcceptanceCondition.BEGIN_OF_LINE)
Esempio n. 15
0
def do(A, T):
    """Checks: (1) whether there are sequence that match 'T' and the end of 'A'.
               (2) whether there are sequences that do not match 'T' and still
                   match the end of 'A'.

    RETURNS: [0] True, ALL ending sequences of 'A' match 'T'.
                 False, else.
             [1] True, if there are some ending sequences of 'A' that match 'T'.
                 False, else.
    """
    Ar = reverse.do(A)
    Tr = reverse.do(T)

    # Does 'T' match AT LEAST ONE tail of 'A'?
    # == Are there paths in 'Tr' to their acceptance states that are
    #    also present in 'Ar'?
    common_tail_exists_f = len(outrun.commonality(Tr, Ar)) != 0
    # Are there tails in 'A' which are not covered by 'T'?
    # == Are there paths in 'Ar' which are not covered by paths in 'Tr'?
    uncommon_tail_exists_f = diversion(Ar, Tr)

    return common_tail_exists_f and not uncommon_tail_exists_f, \
           common_tail_exists_f
Esempio n. 16
0
    def mount_post_context_sm(self):
        self.__sm,     \
        self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, 
                                                              self.__post_context_sm, 
                                                              self.__post_context_end_of_line_f, 
                                                              self.__sr)

        if self.__bipd_sm_to_be_inverted is None: 
            return

        if         self.__bipd_sm_to_be_inverted is not None \
           and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): 
            self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted)

        self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
Esempio n. 17
0
    def mount_post_context_sm(self):
        self.__sm,     \
        self.__bipd_sm_to_be_inverted = setup_post_context.do(self.__sm, 
                                                              self.__post_context_sm, 
                                                              self.__post_context_end_of_line_f, 
                                                              self.__sr)

        if self.__bipd_sm_to_be_inverted is None: 
            return

        if         self.__bipd_sm_to_be_inverted is not None \
           and not self.__bipd_sm_to_be_inverted.is_DFA_compliant(): 
            self.__bipd_sm_to_be_inverted = beautifier.do(self.__bipd_sm_to_be_inverted)

        self.__bipd_sm = beautifier.do(reverse.do(self.__bipd_sm_to_be_inverted))
Esempio n. 18
0
def test_core(TestString):
    print "___________________________________________________________________________"
    print "expression    = \"" + TestString + "\""

    Setup.dos_carriage_return_newline_f = True

    pattern_raw = core.do(TestString, {})
    pattern = pattern_raw.finalize(None)
    if pattern is None:
        print "pattern syntax error"
    else:
        print "pattern\n",
        print pattern.sm
        if pattern.sm_pre_context_to_be_reversed:
            reversed_sm = reverse.do(pattern.sm_pre_context_to_be_reversed)
            print "pre-context =", reversed_sm
            print "begin of line = ", pattern.has_acceptance_condition(
                E_AcceptanceCondition.BEGIN_OF_LINE)
Esempio n. 19
0
def test(Idx, sm_pre, sm, sm_post, BOL_F, EOL_F):
    ## if Idx != 5: return
    result = sm.clone()
    print "##-- %i -----------------------------------------------------------------------" % Idx

    if sm_pre is not None: print " -- pre-condition  = True"
    else: print " -- pre-condition  = False"

    if sm_post is not None: print " -- post-condition = True"
    else: print " -- post-condition = False"
    print " -- begin of line  = ", BOL_F
    print " -- end of line    = ", EOL_F

    ipsb_sm = setup_post_context.do(result, sm_post, EOL_F, False,
                                    SourceRef_VOID)
    pre_context_sm = setup_pre_context.do(result, sm_pre, BOL_F, False)
    if pre_context_sm is None:
        inverse_pre_context_sm = None
    else:
        inverse_pre_context_sm = reverse.do(pre_context_sm)
        inverse_pre_context_sm.set_id(pre_context_sm.get_id())
    #
    # print "EXPRESSION = ", result
    # print "POST CONDITION = ", post_sm
    # print "APPENDED = ", result
    result = beautifier.do(result)
    #
    #
    print
    print "result sm.id     = ", result.get_id()
    if inverse_pre_context_sm is not None:
        print "result pre sm.id = ", inverse_pre_context_sm.get_id()

    begin_of_line_f = None
    for state in result.get_acceptance_state_list():
        BOF = state.single_entry.has_pre_context_begin_of_line()
        if begin_of_line_f is None: begin_of_line_f = BOF
        else: assert begin_of_line_f == BOF

    print "result = ", result
    if inverse_pre_context_sm is not None:
        print "inverse_pre_context_sm = ", inverse_pre_context_sm
    print "trivially pre-conditioned = ", begin_of_line_f
Esempio n. 20
0
def do(SM_A, SM_B):
    Ar = beautifier.do(reverse.do(SM_A))
    Br = beautifier.do(reverse.do(SM_B))
    cut_Ar_Br = cut_begin.do(Ar, Br)
    return reverse.do(cut_Ar_Br)
        print "Core Pattern:\n" + repr(x)
        return

    try:
        post_context_sm = regex.do(string_stream_PostCondition,
                                   {}).extract_sm()
    except RegularExpressionException, x:
        print "Post Condition Pattern:\n" + repr(x)
        return

    print "---------------------------------------------------------"
    print "core pattern            =", RE_Core
    print "post condition pattern  =", RE_PostCondition

    backward_search_sm = ambiguous_post_context.mount(core_sm, post_context_sm)
    backward_search_sm = reverse.do(backward_search_sm)
    # .mount() does not transformation from NFA to DFA
    core_sm = beautifier.do(core_sm)

    print "ambigous post condition =", core_sm

    print "backward detector =", backward_search_sm


test('"xy"+', '((ab)+|xy)')
test('"xz"+', '[a-z]{2}')
test('"xyz"+', '"xyz"')
test("(a)+", "ab")
test("(.)+a", "(.)+")

# test('"xz"+', '"xz"+')
Esempio n. 22
0
def do(SM_A, SM_B):
    Ar        = beautifier.do(reverse.do(SM_A))
    Br        = beautifier.do(reverse.do(SM_B))
    cut_Ar_Br = cut_begin.do(Ar, Br)
    return reverse.do(cut_Ar_Br)
Esempio n. 23
0
    except RegularExpressionException, x:
        print "Core Pattern:\n" + repr(x)
        return

    try:
        post_context_sm = regex.do(string_stream_PostCondition, {}).sm
    except RegularExpressionException, x:
        print "Post Condition Pattern:\n" + repr(x)
        return

    print "---------------------------------------------------------"
    print "core pattern            =", RE_Core
    print "post condition pattern  =", RE_PostCondition

    backward_search_sm = ambiguous_post_context.mount(core_sm, post_context_sm)
    backward_search_sm = beautifier.do(reverse.do(backward_search_sm))
    # .mount() does not transformation from NFA to DFA
    core_sm = beautifier.do(core_sm)

    print "ambigous post condition =", core_sm

    print "backward detector =", backward_search_sm


test('"xy"+', '((ab)+|xy)')
test('"xz"+', '[a-z]{2}')
test('"xyz"+', '"xyz"')
test("(a)+",   "ab")
test("(.)+a",   "(.)+")

# test('"xz"+', '"xz"+')
Esempio n. 24
0
def do(the_state_machine, pre_context_sm, BeginOfLinePreContextF):
    """Sets up a pre-condition to the given state machine. This process
       is entirely different from any sequentializing or parallelization
       of state machines. Here, the state machine representing the pre-
       condition is **not** webbed into the original state machine!

       Instead, the following happens:

          -- the pre-condition state machine is inverted, because
             it is to be walked through backwards.
          -- the inverted state machine is marked with the state machine id
             of the_state_machine.        
          -- the original state machine will refer to the inverse
             state machine of the pre-condition.
          -- the initial state origins and the origins of the acceptance
             states are marked as 'pre-conditioned' indicating the id
             of the inverted state machine of the pre-condition.             
    """
    #___________________________________________________________________________________________
    # (*) do some consistency checking   
    # -- state machines with no states are senseless here. 
    assert not the_state_machine.is_empty() 
    assert pre_context_sm is None or not pre_context_sm.is_empty()
    # -- trivial pre-conditions should be added last, for simplicity

    #___________________________________________________________________________________________
    if pre_context_sm is None:
        # NOT: 'and ...' !
        if BeginOfLinePreContextF:
            # Mark all acceptance states with the 'trivial pre-context BeginOfLine' flag
            for state in the_state_machine.get_acceptance_state_list():
                state.set_pre_context_id(E_PreContextIDs.BEGIN_OF_LINE)
        return None

    # (*) Reverse the state machine of the pre-condition 
    reverse_pre_context = reverse.do(pre_context_sm)
        
    if BeginOfLinePreContextF:
        # Extend the existing pre-context with a preceeding 'begin-of-line'.
        reverse_newline_sm  = reverse.do(StateMachine_Newline())
        reverse_pre_context = sequentialize.do([reverse_pre_context, 
                                                reverse_newline_sm])

    # (*) Once an acceptance state is reached no further analysis is necessary.
    acceptance_pruning.do(reverse_pre_context)

    # (*) Clean up what has been done by inversion (and optionally 'BeginOfLinePreContextF')
    #     AFTER acceptance_pruning (!)
    reverse_pre_context = beautifier.do(reverse_pre_context)

    # (*) let the state machine refer to it 
    #     [Is this necessary? Is it not enough that the acceptance origins point to it? <fschaef>]
    pre_context_sm_id = reverse_pre_context.get_id()

    # (*) Associate acceptance with pre-context id. 
    for state in the_state_machine.get_acceptance_state_list():
        state.set_pre_context_id(pre_context_sm_id)
    
    return reverse_pre_context

            
def invert_this(sm):
    print "-------------------------------------------------------------------------------"
    print "sm       = ", sm
    tmp = reverse.do(sm)
    print "inverse  = ", tmp
    return tmp
Esempio n. 26
0
def rev(A):          return reverse.do(A)
def uni(*A):         return union.do(list(A))
Esempio n. 27
0
def test(sm):
    print "-------------------------------------------------------------------------------"
    print "sm       = ",      sm
    sm = reverse.do(sm)
    print "inverse  = ", sm 
Esempio n. 28
0
def snap_reverse(stream, PatternDict):
    result = snap_curly_bracketed_expression(stream, PatternDict, "reverse operator", "R")[0]
    return reverse.do(result)
Esempio n. 29
0
def test(sm):
    print "-------------------------------------------------------------------------------"
    print "sm       = ", sm
    sm = reverse.do(sm)
    print "inverse  = ", sm
def invert_this(sm):
    print "-------------------------------------------------------------------------------"
    print "sm       = ", sm
    tmp = reverse.do(sm)
    print "inverse  = ", tmp 
    return tmp
Esempio n. 31
0
def do(SM_List):
    """Intersection: 

       Only match on patterns which are matched by all state machines
       in 'SM_List'.

       (C) 2013 Frank-Rene Schaefer
       ________________________________________________________________________

       A lexeme which matches all patterns must reach an acceptance in each 
       given state machine. That is, 
       
          For each state machine; there is a path from the init 
          state to an acceptance state triggered along the by 
          the characters of the lexeme.

       We cannot go forward, since we cannot omit a path upon non-fit.

       Now, consider the super-state consisting of all acceptance states
       of all state machines. There there must be a way backward from the 
       super-acceptance-state to the init state states. As soon, as a
       path is interupted, it can be thrown away. This can be achieved
       by reversed state machines which are combined into a single one.
       
       Reverse all state machines; The epsilon closure of the init state
       corresponds to the super acceptance state. The transitions in the
       super-state machine correspond to the way backwards in the state
       machine. For each feasible state in the super-state machine create
       a new state. 

       The acceptance state of the reversed state machines correspond to 
       the init state of the original state machines. If the super state
       contains an acceptance state of the original state, it can become
       an acceptance state of the intersection, because we now found a 
       path. The found state machine must be reversed at the end.

    """
    for sm in SM_List:
        if special.is_none(sm):         # If one state machine is '\None'
            return special.get_none()   # then, the intersection is '\None'

    reverse_sm_list          = [ reverse.do(sm)                            for sm in SM_List ]
    state_id_set_list        = [ set(sm.states.iterkeys())                 for sm in reverse_sm_list ]
    acceptance_state_id_list = [ set(sm.get_acceptance_state_index_list()) for sm in reverse_sm_list ]

    def has_one_from_each(StateIDSet_List, StateIDSet):
        """StateIDSet_List[i] is the set of state indices from state 
        machine 'i' in 'reverse_sm_list'. 

        RETURNS: True -- If the StateIDSet has at least one state 
                         from every state machine.
                 False -- If there is at least one state machine 
                          that has no state in 'StateIDSet'.
        """
        for state_id_set in StateIDSet_List:
            if state_id_set.isdisjoint(StateIDSet): 
                return False
        return True

    def get_merged_state(AcceptanceStateIndexList, EpsilonClosure):
        """Create the new target state in the state machine
           Accept only if all accept.
        """
        acceptance_f = has_one_from_each(AcceptanceStateIndexList, 
                                         EpsilonClosure)
        return State(AcceptanceF=acceptance_f)

    # Plain merge of all states of all state machines with an 
    # epsilon transition from the init state to all init states
    # of the reverse_sm
    sm = StateMachine()
    for rsm in reverse_sm_list:
        sm.states.update(rsm.states)
        sm.add_epsilon_transition(sm.init_state_index, rsm.init_state_index) 

    initial_state_epsilon_closure = sm.get_epsilon_closure(sm.init_state_index) 

    InitState = get_merged_state(acceptance_state_id_list, 
                                 initial_state_epsilon_closure)

    result    = StateMachine(InitStateIndex=index.get(), InitState=InitState)

    # (*) prepare the initial worklist
    worklist = [ ( result.init_state_index, initial_state_epsilon_closure) ]

    epsilon_closure_db = sm.get_epsilon_closure_db()

    while len(worklist) != 0:
        # 'start_state_index' is the index of an **existing** state in the state machine.
        # It was either created above, in StateMachine's constructor, or as a target
        # state index.
        start_state_index, start_state_combination = worklist.pop()
 
        # (*) compute the elementary trigger sets together with the 
        #     epsilon closure of target state combinations that they trigger to.
        #     In other words: find the ranges of characters where the state triggers to
        #     a unique state combination. E.g:
        #                Range        Target State Combination 
        #                [0:23]   --> [ State1, State2, State10 ]
        #                [24:60]  --> [ State1 ]
        #                [61:123] --> [ State2, State10 ]
        #
        elementary_trigger_set_infos = sm.get_elementary_trigger_sets(start_state_combination,
                                                                      epsilon_closure_db)
        ## DEBUG_print(start_state_combination, elementary_trigger_set_infos)

        # (*) loop over all elementary trigger sets
        for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems():
            #  -- if there is no trigger to the given target state combination, then drop it
            if trigger_set.is_empty(): 
                continue
            elif not has_one_from_each(state_id_set_list, epsilon_closure_of_target_state_combination):
                continue

            # -- add a new target state representing the state combination
            #    (if this did not happen yet)
            target_state_index = \
                 map_state_combination_to_index(epsilon_closure_of_target_state_combination)

            # -- if target state combination was not considered yet, then create 
            #    a new state in the state machine
            if not result.states.has_key(target_state_index):
                result.states[target_state_index] = get_merged_state(acceptance_state_id_list, 
                                                                     epsilon_closure_of_target_state_combination)

                worklist.append((target_state_index, epsilon_closure_of_target_state_combination))  

            # -- add the transition 'start state to target state'
            result.add_transition(start_state_index, trigger_set, target_state_index)

    if not result.has_acceptance_states():
        return StateMachine()
    else:
        return beautifier.do(reverse.do(result))
Esempio n. 32
0
def do(A, B):
    """RETURNS: True  - if A == SUPERSET of B
                False - if not
    """
    if isinstance(A, StateMachine):
        assert isinstance(B, StateMachine)
        return Checker(A, B).do()

    assert not isinstance(B, StateMachine)
    # (*) Core Pattern ________________________________________________________
    #
    #     (including the mounted post context, if there is one).
    #
    # NOTE: Post-conditions do not change anything, since they match only when
    #       the whole lexeme has matched (from begin to end of post condition).
    #       Post-conditions only tell something about the place where the
    #       analyzer returns after the match.
    superset_f = Checker(A.sm, B.sm).do()

    if not superset_f: return False

    # NOW: For the core state machines it holds:
    #
    #                      'core(A)' matches a super set of 'core(B)'.
    #

    # (*) Pre-Condition _______________________________________________________
    #
    if not A.has_pre_context():
        # core(A) is a superset of core(B).
        # A is not restricted. B may be (who cares).
        # => A can match more than B.
        return True

    # NOW: Acceptance of A is restricted by a pre-context.
    #
    if not B.has_pre_context():
        # A is restricted by pre-context, B is not.
        # => B can match things that A cannot.
        return False

    # NOW: A is restricted by pre-context.
    #      B is restricted by pre-context.
    #
    #      For A to be a superset of B, A must be less or equally restricted than B.
    #
    #                 pre(B) is a superset of pre(A)
    #
    if B.pre_context_trivial_begin_of_line_f:
        if not A.pre_context_trivial_begin_of_line_f:
            # pre(A) can never be a subset of pre(B)
            return False
        else:
            # pre(A) = pre(B) which fulfills the condition
            return True

    # IMPORTANT: The pre-contexts must be mounted at this point!
    #            Call to '.mount_pre_context_sm()' must preceed this function.
    assert A.pre_context_sm is not None
    assert B.pre_context_sm is not None

    # NOW: B is a 'real' pre-context not only a 'begin-of-line'
    #
    # Decision about "pre(A) is subset of pre(B)" done by Checker
    if not A.pre_context_trivial_begin_of_line_f:
        A_pre_sm = A.pre_context_sm
    else:
        # A contains only 'begin-of-line'. Note, however, that
        # -- newline definition may include '\r\n' so inversion is
        #    required.
        A_pre_sm = reverse.do(StateMachine.from_sequence("\n"))
        ## NOT: -- at this point in time we are dealing with transformed
        ##         machines. So this has also to be transformed.
        ## complete_f, A_pre_sm = transformation.do_state_machine(A_pre_sm)

    return Checker(B.pre_context_sm, A_pre_sm).do()
Esempio n. 33
0
def rev(A):
    return reverse.do(A)
Esempio n. 34
0
def do(A, B):
    """RETURNS: True  - if A == SUPERSET of B
                False - if not
    """
    if isinstance(A, StateMachine):
        assert isinstance(B, StateMachine)
        return Checker(A, B).do()

    assert not isinstance(B, StateMachine)
    # (*) Core Pattern ________________________________________________________
    #
    #     (including the mounted post context, if there is one).
    #
    # NOTE: Post-conditions do not change anything, since they match only when
    #       the whole lexeme has matched (from begin to end of post condition).
    #       Post-conditions only tell something about the place where the 
    #       analyzer returns after the match.
    superset_f = Checker(A.sm, B.sm).do()

    if not superset_f: return False

    # NOW: For the core state machines it holds: 
    #
    #                      'core(A)' matches a super set of 'core(B)'.
    #

    # (*) Pre-Condition _______________________________________________________
    #
    if not A.has_pre_context(): 
        # core(A) is a superset of core(B). 
        # A is not restricted. B may be (who cares).
        # => A can match more than B.
        return True

    # NOW: Acceptance of A is restricted by a pre-context.
    #
    if not B.has_pre_context(): 
        # A is restricted by pre-context, B is not.
        # => B can match things that A cannot. 
        return False

    # NOW: A is restricted by pre-context. 
    #      B is restricted by pre-context. 
    #
    #      For A to be a superset of B, A must be less or equally restricted than B.
    #
    #                 pre(B) is a superset of pre(A) 
    # 
    if B.pre_context_trivial_begin_of_line_f:
        if not A.pre_context_trivial_begin_of_line_f:
            # pre(A) can never be a subset of pre(B)
            return False
        else:
            # pre(A) = pre(B) which fulfills the condition
            return True

    # IMPORTANT: The pre-contexts must be mounted at this point!
    #            Call to '.mount_pre_context_sm()' must preceed this function.
    assert A.pre_context_sm is not None
    assert B.pre_context_sm is not None

    # NOW: B is a 'real' pre-context not only a 'begin-of-line'
    #
    # Decision about "pre(A) is subset of pre(B)" done by Checker
    if not A.pre_context_trivial_begin_of_line_f:
        A_pre_sm = A.pre_context_sm
    else:
        # A contains only 'begin-of-line'. Note, however, that 
        # -- newline definition may include '\r\n' so inversion is 
        #    required. 
        A_pre_sm = reverse.do(StateMachine.from_sequence("\n"))
        ## NOT: -- at this point in time we are dealing with transformed 
        ##         machines. So this has also to be transformed.
        ## complete_f, A_pre_sm = Setup.buffer_codec.do_state_machine(A_pre_sm)

    return Checker(B.pre_context_sm, A_pre_sm).do()