Exemple #1
0
def get_sm_list(FSM0, FSM1, FSM2):
    # SPECIALITIES: -- sm0 and sm1 have an intersection between their second
    #                  transition.
    #               -- sm1 transits further upon acceptance.
    #               -- sm2 has only one transition.
    # Generate DFA that does not have any intersection with
    # the loop transitions.
    sm0 = DFA()
    si = sm0.add_transition(sm0.init_state_index, FSM0)
    si = sm0.add_transition(si, NS_A, AcceptanceF=True)
    sm0.states[si].mark_acceptance_id(dial.new_incidence_id())

    sm1 = DFA()
    si0 = sm1.add_transition(sm1.init_state_index, FSM1)
    si = sm1.add_transition(si0, NS_A, AcceptanceF=True)
    iid1 = dial.new_incidence_id()
    sm1.states[si].mark_acceptance_id(iid1)
    si = sm1.add_transition(si, NS_B, si0)
    sm1.states[si].mark_acceptance_id(iid1)

    sm2 = DFA()
    si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True)
    sm2.states[si].mark_acceptance_id(dial.new_incidence_id())

    return [sm0, sm1, sm2]
Exemple #2
0
def do(StateMachineList, CommonTerminalStateF=True):
    """Connect state machines paralell.

       CommonTerminalStateF tells whether the state machines shall trigger 
                            to a common terminal. This may help nfa-to-dfa
                            or hopcroft minimization for ISOLATED patterns.

                            A state machine that consists of the COMBINATION
                            of patterns MUST set this flag to 'False'.
    """
    assert len(StateMachineList) != 0
              
    def consider(sm):
        return not sm.is_Empty() and sm.get_init_state().has_transitions()

    # filter out empty state machines from the consideration          
    sm_list       = [ sm for sm in StateMachineList if consider(sm) ]
    empty_sm_list = [ sm for sm in StateMachineList if not consider(sm) ]

    if len(sm_list) < 2:
        if len(sm_list) < 1: result = DFA()
        else:                result = sm_list[0]

        return __consider_empty_state_machines(result, empty_sm_list)

    # (*) collect all transitions from both state machines into a single one
    result     = DFA()
    init_state = result.get_init_state()

    # Connect from the new initial state to the initial states of the
    # sms via epsilon transition. 
    # Connect from each success state of the sms to the new terminal
    # state via epsilon transition.
    if __nfa_to_dfa_required(sm_list):
        for sm in sm_list:
            result.states.update(sm.states)
            init_state.target_map.add_epsilon_target_state(sm.init_state_index)
        result = nfa_to_dfa.do(result)
    else:
        # Set the 'single_entry' operations.
        init_state.set_single_entry(sm_list[0].get_init_state().single_entry.clone())
        # Add transitions to the states.
        for sm in sm_list:
            init_state.target_map.update(sm.get_init_state().target_map)
            # not __nfa_to_dfa_required(...) 
            # => No transition to an an init state.
            # => Original init states can be taken out.
            result.states.update(
                (si, state) for si, state in sm.states.iteritems()
                            if si != sm.init_state_index
            )
        result.assert_consistency()


    #if CommonTerminalStateF:
    #    __combine_transitionless_acceptance_states(result)

    return __consider_empty_state_machines(result, empty_sm_list)
Exemple #3
0
def do(sh):
    """Converts a uni-code string into a state machine that parses 
       its letters sequentially. Each state in the sequence correponds
       to the sucessful triggering of a letter. Only the last state, though,
       is an acceptance state. Any bailing out before is 'not accepted'. 
       Example:

       "hey" is translated into the state machine:

           (0)-- 'h' -->(1)-- 'e' -->(2)-- 'y' --> ACCEPTANCE
            |            |            |
           FAIL         FAIL         FAIL
    
      Note: The state indices are globally unique. But, they are not necessarily
            0, 1, 2, ... 
    """
    assert     sh.__class__.__name__ == "StringIO" \
            or sh.__class__.__name__ == "file"

    # resulting state machine
    result = DFA()
    state_idx = result.init_state_index

    # Only \" is a special character '"', any other backslashed character
    # remains as the sequence 'backslash' + character
    for char_code in get_character_code_sequence(sh):
        state_idx = result.add_transition(state_idx, char_code)

    # when the last state has trigger it is supposed to end up in 'acceptance'
    result.states[state_idx].set_acceptance()
    return result
Exemple #4
0
def __clone_until_acceptance(Dfa, StartSi):
    """Make a new DFA from the graph between the given 'StartSi' to the 
    until an acceptance state is reached. Walks from a given 'StartSi'
    along all paths until an acceptance state is reached.

    RETURNS: DFA containing the graph.
    """
    correspondance_db = {si: state_index.get() for si in Dfa.states}
    result = DFA(InitStateIndex=correspondance_db[StartSi],
                 AcceptanceF=Dfa.states[StartSi].is_acceptance())

    work_set = set([StartSi])
    done_set = set([StartSi])
    while work_set:
        si = work_set.pop()
        state = Dfa.states[si]

        if si == Dfa.init_state_index:
            result_state = result.get_init_state()
            target_si_iterable = state.target_map.get_target_state_index_list()
        elif not state.is_acceptance():
            result_state = state.clone(correspondance_db)
            target_si_iterable = state.target_map.get_target_state_index_list()
        else:
            result_state = DFA_State()
            result_state.set_acceptance()
            target_si_iterable = []

        work_set.update(target_si for target_si in target_si_iterable
                        if target_si not in done_set)
        result.states[correspondance_db[si]] = result_state

    return result
Exemple #5
0
def test_on_UCS_range(Trafo, Source, Drain, CharacterBackwardTrafo):
    sm = DFA()
    acc_db = {}
    for x in range(Source.begin, Source.end):
        ti = sm.add_transition(sm.init_state_index, x, AcceptanceF=True)
        acc_id = len(acc_db)
        sm.states[ti].mark_acceptance_id(acc_id)
        acc_db[x] = acc_id

    if Setup.bad_lexatom_detection_f:
        acc_db[None] = E_IncidenceIDs.BAD_LEXATOM
    else:
        acc_db[None] = None

    state_n_before, result = transform(Trafo, sm)
    # assert state_n_before == len(result.states)

    init_state = result.get_init_state()
    count = 0
    for y in range(Drain.begin, Drain.end):
        # Translate character into
        x = CharacterBackwardTrafo(y)
        # Transit on the translated charater
        ti = init_state.target_map.get_resulting_target_state_index(y)
        # Compare resulting state with the expected state's acceptance
        assert_only_acceptance_id(sm.states, ti, acc_db, x, y)

        count += 1

    print "<terminated: %i transitions ok>" % count
Exemple #6
0
def __cut_begin_core(A, B, SearchBeginList=None):
    """RETURN: [0] Resulting DFA
               [1] True, if cut has been performed; False else.

    If no cut has been performed, then 'A' is returned as is.
    """
    A.assert_consistency()
    B.assert_consistency()

    if B.is_Empty(): return A, False

    work_list = WorkList()
    result = DFA(InitStateIndex=work_list.get_result_si(
        A.init_state_index, None, None),
                 AcceptanceF=A.states[A.init_state_index].is_acceptance())

    epsilon_transition_set = __together_walk(work_list, A, B, result)
    # No cut => return original DFA
    if epsilon_transition_set is None: return A, False

    __tail_walk(work_list, A, result)

    result.delete_hopeless_states()

    return __implement_epsilon_transitions(result, A, epsilon_transition_set)
Exemple #7
0
 def __init__(self, Name):
     sh = StringIO("[:\\P{Script=%s}:]" % Name)
     self.name = Name
     self.charset = regex.snap_set_expression(sh, {})
     self.sm = DFA()
     self.sm.add_transition(self.sm.init_state_index,
                            self.charset,
                            AcceptanceF=True)
     self.id = self.sm.get_id()
Exemple #8
0
def test(TestString):
    print "expression    = \"" + TestString + "\""
    sm = DFA()
    try:
        trigger_set = character_set.do(StringIO(TestString + "]"))
        sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)
        print "state machine\n", sm
    except RegularExpressionException, x:
        print repr(x)
Exemple #9
0
def snap_non_control_character(stream, PatternDict):
    __debug_entry("non-control characters", stream)

    # (*) read first character
    char_code = utf8.__read_one_utf8_code_from_stream(stream)
    if char_code is None:
        error.log(
            "Character could not be interpreted as UTF8 code or End of File reached prematurely.",
            stream)
    result = DFA()
    result.add_transition(result.init_state_index, char_code, AcceptanceF=True)
    return __debug_exit(result, stream)
Exemple #10
0
def generate_sm_for_boarders(Boarders, Trafo):
    sm = DFA()
    for ucs_char in Boarders:
        target_idx = index.get()
        sms.line(sm, sm.init_state_index, (ucs_char, target_idx),
                 (ucs_char, target_idx))
        sm.states[target_idx].set_acceptance()

    Trafo.adapt_ranges_to_lexatom_type_range(Setup.lexatom.type_range)
    verdict_f, result = Trafo.do_state_machine(sm)
    assert verdict_f
    return result
Exemple #11
0
def __do(SM):
    """Creates a state machine that matches the reverse of what 'SM' matches.
    """
    result = DFA(InitStateIndex=SM.init_state_index)
    original_acceptance_state_index_list = SM.get_acceptance_state_index_list()

    if len(original_acceptance_state_index_list) == 0:
        # If there is no acceptance state in a state machine, the state machine
        # cannot match any pattern, it is equivalent to '\Empty'. The reverse
        # of \Empty is \Empty.
        return DFA.Empty()

    # Ensure that each target state index has a state inside the state machine
    for state_index in SM.states.keys():
        result.create_new_state(StateIdx=state_index)

    for state_index, state in SM.states.items():
        for target_state_index, trigger_set in state.target_map.get_map(
        ).items():
            result.states[target_state_index].add_transition(
                trigger_set.clone(), state_index)

        for target_state_index in state.target_map.get_epsilon_target_state_index_list(
        ):
            result.states[
                target_state_index].target_map.add_epsilon_target_state(
                    state_index)

    # -- copy all origins of the original state machine
    # -- We need to cancel any acceptance, because the inverted engine now starts
    #    from a combination of the acceptance states and ends at the initial state.
    for state_index, state in SM.states.items():
        result.states[state_index].single_entry.set(
            cmd.clone() for cmd in state.single_entry
            if cmd.__class__ != SeAccept)  # deepcopy implicit

    # -- only the ORIGINAL initial state becomes an acceptance state (end of inverse)
    result.states[SM.init_state_index].set_acceptance(True)

    # -- setup an epsilon transition from an new init state to all previous
    #    acceptance states.
    new_init_state_index = result.create_new_init_state()
    for state_index in original_acceptance_state_index_list:
        result.add_epsilon_transition(new_init_state_index, state_index)

    # -- for uniqueness of state ids, clone the result
    return result.clone()
Exemple #12
0
def create_ALL_BUT_NEWLINE_state_machine(stream):
    global Setup
    result = DFA()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n"))).get_complement(
        Setup.buffer_encoding.source_set)
    if trigger_set.is_empty():
        error.log(
            "The set of admissible characters contains only newline.\n"
            "The '.' for 'all but newline' is an empty set.",
            SourceRef.from_FileHandle(stream))

    result.add_transition(result.init_state_index,
                          trigger_set,
                          AcceptanceF=True)
    return result
Exemple #13
0
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression:
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum"
    #                 "alpha"
    #                 "blank"
    #                 "cntrl"
    #                 "digit"
    #                 "graph"
    #                 "lower"
    #                 "print"
    #                 "punct"
    #                 "space"
    #                 "upper"
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "complement"   '(' set_term ')'
    #                 set_expression
    #
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None:
        error.log("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error.warning(
            "Regular Expression: Character set expression results in empty set.",
            stream)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = DFA()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
Exemple #14
0
def do(SM_List):
    for sm in SM_List:
        sm.assert_consistency() 

    if any(sm.is_Empty() for sm in SM_List): # If one state machine is '\Empty',
        return DFA.Empty()                   # then the intersection is '\Empty'.

    init_state_setup = tuple(sm.init_state_index for sm in SM_List)
    result           = DFA(AcceptanceF=intersect_acceptance(init_state_setup, SM_List))

    # Result state setup: A result state is setup out of a state from each DFA.
    #                     state_setup[i] is the state from DFA 'SM_List[i]'.
    worklist       = [ (result.init_state_index, init_state_setup) ]
    state_setup_db = {}
    N              = len(SM_List)
    while worklist:
        state_index, state_setup = worklist.pop()

        # Generate Map that shows what lexatoms trigger to what state combination.
        #
        #       NumberSet    Target DFA_State Combination 
        #       [0:23]   --> [ State1, State24, State56 ]
        #       [0:23]   --> [ State5, State21, State55 ]
        #       [24:60]  --> [ State1, State23, State51 ]
        #
        # 'get_intersection_line_up()' only delivers those transitions where there
        # is a transition for each state machine's state.
        line_up = get_intersection_line_up([SM_List[i].states[si].target_map
                                            for i, si in enumerate(state_setup)])
        for target_state_setup, trigger_set in line_up.iteritems():
            assert len(target_state_setup) == N
            target_index, new_f = state_index_for_combination(state_setup_db,
                                                              target_state_setup)

            acceptance_f = intersect_acceptance(target_state_setup, SM_List)
            result.add_transition(state_index, trigger_set, target_index,
                                  AcceptanceF = acceptance_f)

            if new_f:
                worklist.append((target_index, target_state_setup))

    result.delete_hopeless_states()
    return result
Exemple #15
0
def get_sm_shape_by_name(Name):
    sm = DFA(InitStateIndex=0L)
    if   Name == "linear":      sm, state_n, pic = get_linear(sm)
    elif Name == "butterfly":   sm, state_n, pic = get_butterfly(sm)
    elif Name == "long_loop":   sm, state_n, pic = get_long_loop(sm)
    elif Name == "nested_loop": sm, state_n, pic = get_nested_loop(sm)
    elif Name == "mini_loop":   sm, state_n, pic = get_mini_loop(sm)
    elif Name == "fork":        sm, state_n, pic = get_fork(sm)
    elif Name == "fork2":       sm, state_n, pic = get_fork2(sm)
    elif Name == "fork3":       sm, state_n, pic = get_fork3(sm)
    elif Name == "fork4":       sm, state_n, pic = get_fork4(sm)
    elif Name == "mini_bubble": sm, state_n, pic = get_mini_bubble(sm)
    elif Name == "bubble":      sm, state_n, pic = get_bubble(sm)
    elif Name == "bubble2":     sm, state_n, pic = get_bubble2(sm)
    elif Name == "bubble2b":    sm, state_n, pic = get_bubble2b(sm)
    elif Name == "bubble3":     sm, state_n, pic = get_bubble3(sm)
    elif Name == "bubble4":     sm, state_n, pic = get_bubble4(sm)
    elif Name == "mini_join":   sm, state_n, pic = get_mini_join(sm)
    elif Name == "DEBUG":       sm, state_n, pic = get_DEBUG(sm)
    else:                       sm, state_n, pic = get_tree(sm)
    return sm, state_n, pic
Exemple #16
0
def DFA_Newline():
    """Creates a state machine matching newline according to what has been 
    specified in the setup (Setup.dos_carriage_return_newline_f). 

    That is, if is DOS newline then the state machine represents '\r\n' and
    if it is unix only, then it represents '\n'. If both is required they 
    are implemented in parallel.

    RETURNS: DFA
    """
    UnixF = True
    DosF = Setup.dos_carriage_return_newline_f

    NL = ord('\n')  # (pure) newline, i.e. line feed
    CR = ord('\r')  # carriage return

    dfa = DFA()
    if UnixF:
        dfa.add_transition(dfa.init_state_index, NL, AcceptanceF=True)
    if DosF:
        idx = dfa.add_transition(dfa.init_state_index, CR, AcceptanceF=False)
        dfa.add_transition(idx, NL, AcceptanceF=True)

    return beautifier.do(dfa)
Exemple #17
0
def setup(EntryN, StateOperation):
    sm = DFA()
    examiner = Examiner(sm, RecipeAcceptance)

    si = 1111L
    setup_state_operation(sm, StateOperation, si)
    operation = sm.states[si].single_entry

    examiner.linear_db[sm.init_state_index] = LinearStateInfo()

    predecessor0_recipe = RecipeAcceptance(
        [SeAccept(0)],
        {
            E_IncidenceIDs.CONTEXT_FREE_MATCH: 0,
            10L: -1,  # same for both / no restore
            11L: -2,  # unequal for both
            12L: E_Values.RESTORE,  # same for both / restore same
            13L: E_Values.RESTORE,  # same for both / restore differs
            21L: 0,  # no present in other                 
        },
        {
            (E_R.PositionRegister, 12L): 0,
            (E_R.PositionRegister, 13L): 0
        })
Exemple #18
0
acceptance_scheme_0 = [ 
    RecipeAcceptance.RestoreAcceptance 
]
acceptance_scheme_1 = [ 
    SeAccept(1111L, None, False) 
]
acceptance_scheme_2 = [ 
    SeAccept(2222L, None, True) 
]
acceptance_scheme_3 = [ 
    SeAccept(3333L, 33L, True), 
    SeAccept(4444L, 44L, True), 
    SeAccept(5555L, None, True) 
]


examiner = Examiner(DFA(), RecipeAcceptance)

# For the test, only 'examiner.mouth_db' and 'examiner.recipe_type'
# are important.
examiner.mouth_db[1L] = get_MouthStateInfoSnapshotMap(entry_n, acceptance_scheme_0, ip_offset_scheme_0)
examiner.mouth_db[2L] = get_MouthStateInfoSnapshotMap(entry_n, acceptance_scheme_1, ip_offset_scheme_1)
examiner.mouth_db[3L] = get_MouthStateInfoSnapshotMap(entry_n, acceptance_scheme_2, ip_offset_scheme_2)
examiner.mouth_db[4L] = get_MouthStateInfoSnapshotMap(entry_n, acceptance_scheme_3, ip_offset_scheme_3)

examiner._interference(set([1L, 2L, 3L, 4L]))

print_interference_result(examiner.mouth_db)

Exemple #19
0
def do(the_state_machine_list,
       LeaveIntermediateAcceptanceStatesF=False,
       MountToFirstStateMachineF=False,
       CloneRemainingStateMachinesF=True):
    """Creates a state machine connecting all state machines in the array
    'state_machine_list'. When the flag 'LeaveIntermediateAcceptanceStatesF' is
    given as True, the connection points between the state machines will remain
    acceptances states. In any other case (e.g. the normal sequentialization)
    the connection points leave there acceptance status and only the last state
    machine in the list keeps its acceptance states.

    If MountToFirstStateMachineF is set, then the first state machine will
    contain the result of the concatination.
    """
    assert len(the_state_machine_list) != 0

    for sm in the_state_machine_list:  # DEBUG
        sm.assert_consistency()  # DEBUG

    # state machines with no states can be deleted from the list. they do not do anything
    # and do not introduce triggers.
    state_machine_list = [
        sm for sm in the_state_machine_list
        if not sm.is_Empty() and not sm.is_Nothing()
    ]

    if len(state_machine_list) < 2:
        if len(state_machine_list) < 1: return DFA()
        else: return state_machine_list[0]

    # (*) collect all transitions from both state machines into a single one
    #     (clone to ensure unique identifiers of states)
    result = state_machine_list[0]
    if not MountToFirstStateMachineF: result = result.clone()

    # (*) need to clone the state machines, i.e. provide their internal
    #     states with new ids, but the 'behavior' remains. This allows
    #     state machines to appear twice, or being used in 'larger'
    #     conglomerates.
    appended_sm_list = state_machine_list[1:]
    if CloneRemainingStateMachinesF:
        appended_sm_list = map(lambda sm: sm.clone(), appended_sm_list)

    # (*) all but last state machine enter the subsequent one, in case of SUCCESS
    #     NOTE: The start index is unique. Therefore, one can assume that each
    #           appended_sm_list's '.states' dictionary has different keys. One can simply
    #           take over all transitions of a start index into the result without
    #           considering interferences (see below)
    for appendix in appended_sm_list:
        appendix.assert_consistency()  # DEBUG
        # Mount on every acceptance state the initial state of the following state
        # machine via epsilon transition.
        result.mount_to_acceptance_states(
            appendix.init_state_index,
            CancelStartAcceptanceStateF=not LeaveIntermediateAcceptanceStatesF)
        for state_index, state in appendix.states.items():
            result.states[
                state_index] = state  # state is already cloned (if desired), so no deepcopy here

    # (*) double check for consistency (each target state is contained in state machine)
    result.assert_consistency()  # DEBUG
    return result
Exemple #20
0
def snap_primary(stream, PatternDict):
    """primary:  " non_double_quote *  "              = character string
                 [ non_rect_bracket_close ]           = set of characters
                 { identifier }                       = pattern replacement
                 ( expression )
                 non_control_character+               = lonely characters
                 primary repetition_cmd
    """
    global SPECIAL_TERMINATOR

    __debug_entry("primary", stream)
    x = stream.read(1)
    if x == "": return __debug_exit(None, stream)

    # -- 'primary' primary
    if x == "\"": result = snap_character_string.do(stream)
    elif x == "[":
        stream.seek(-1, 1)
        result = snap_character_set_expression(stream, PatternDict)
    elif x == "{":
        result = snap_replacement(stream, PatternDict)
    elif x == ".":
        result = create_ALL_BUT_NEWLINE_state_machine(stream)
    elif x == "(":
        result = snap_bracketed_expression(stream, PatternDict)

    elif x.isspace():
        # a lonestanding space ends the regular expression
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    elif x in ["*", "+", "?"]:
        raise RegularExpressionException(
            "lonely operator '%s' without expression proceeding." % x)

    elif x == "\\":
        result = snap_command(stream, PatternDict)
        if result is None:
            stream.seek(-1, 1)
            trigger_set = snap_property_set(stream)
            if trigger_set is None:
                # snap the '\'
                stream.read(1)
                char_code = snap_backslashed_character.do(stream)
                if char_code is None:
                    raise RegularExpressionException(
                        "Backslash followed by unrecognized character code.")
                trigger_set = char_code
            result = DFA()
            result.add_transition(result.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

    elif x not in CONTROL_CHARACTERS and x != SPECIAL_TERMINATOR:
        # NOTE: The '\' is not inside the control characters---for a reason.
        #       It is used to define for example character codes using '\x' etc.
        stream.seek(-1, 1)
        result = snap_non_control_character(stream, PatternDict)

    else:
        # NOTE: This includes the '$' sign which means 'end of line'
        #       because the '$' sign is in CONTROL_CHARACTERS, but is not checked
        #       against. Thus, it it good to leave here on '$' because the
        #       '$' sign is handled on the very top level.
        # this is not a valid primary
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    # -- optional repetition command?
    result_repeated = __snap_repetition_range(result, stream)
    if result_repeated is not None: result = result_repeated

    return __debug_exit(result, stream)
    print "(*) " + Title    
    print

    for cmd in si.single_entry:
        acceptance_mark = " "
        if cmd.is_acceptance(): acceptance_mark = "*"
        print acceptance_mark + repr(cmd)

    print "---------------------------------------------------------------------"

#----------------------------------------------------------------------------------------    
# (*) setup the state machine origins    
#
# -- the function 'filter dominated origins searches for the original acceptance
#    in the state machine => need to create to dummy state machines
DFA(InitStateIndex=0L, AcceptanceF=True)
DFA(InitStateIndex=1L, AcceptanceF=True)
DFA(InitStateIndex=2L, AcceptanceF=True)
DFA(InitStateIndex=3L, AcceptanceF=True)
DFA(InitStateIndex=4L, AcceptanceF=True)
DFA(InitStateIndex=5L, AcceptanceF=True)
DFA(InitStateIndex=6L, AcceptanceF=True)
DFA(InitStateIndex=100L, AcceptanceF=False)
DFA(InitStateIndex=101L, AcceptanceF=False)
DFA(InitStateIndex=102L, AcceptanceF=False)
DFA(InitStateIndex=103L, AcceptanceF=False)
DFA(InitStateIndex=104L, AcceptanceF=False)
DFA(InitStateIndex=105L, AcceptanceF=False)
DFA(InitStateIndex=106L, AcceptanceF=False)
    
# (*) add priviledges
Exemple #22
0
def test(A_txt, B_txt):
    """Performs: \CutBegin{P Q} and prints the result!

    """
    print "---------------------------"

    A, B = parse_REs(A_txt, B_txt)
    result = core(A, B)

    print
    print "result = ", result.get_string(NormalizeF=True)


if False:  # Selected Test
    dfa = DFA()
    dfa.add_transition(dfa.init_state_index, 1, AcceptanceF=True)

    set_unique_transition_f()

    # for name in get_sm_shape_names_list():
    if True:
        name = "DEBUG"
        Q = get_sm_shape_by_name_with_acceptance(name)
        print "# %s" % name, Q
        cut_Q_Q = __operation(Q, Q)
        # \CutBegin{Q          Q}          = \Empty
        print "#cut_Q_Q:", cut_Q_Q
        print "#verdict:", cut_Q_Q.is_Nothing()
        try:
            assert cut_Q_Q.is_Nothing()
Exemple #23
0
    ca_list = get_ca_list(0x10, 0x60)
    sm_list = get_sm_list(NumberSet.from_range(0x10, 0x40),
                          NumberSet.from_range(0x20, 0x50),
                          NumberSet.from_range(0x30, 0x60))

    # Test for each 'sm' in 'sm_list' is superfluous.
    # It is done in 'AppendixNoI'.
    test(ca_list, sm_list)

elif "Split" in sys.argv:
    # A first transition of a state machine is separated into two, because
    # it is covered by more than one different count action.
    NS1 = NumberSet.from_range(0x10, 0x20)
    NS2 = NumberSet.from_range(0x20, 0x30)
    NS3 = NumberSet.from_range(0x30, 0x40)
    NS4 = NumberSet.from_range(0x40, 0x50)
    ca_list = [
        (NS1, CountAction(E_CharacterCountType.COLUMN, 1)),
        (NS2, CountAction(E_CharacterCountType.COLUMN, 2)),
        (NS3, CountAction(E_CharacterCountType.COLUMN, 3)),
        (NS4, CountAction(E_CharacterCountType.COLUMN, 4)),
    ]

    sm = DFA()
    si = sm.init_state_index
    iid = dial.new_incidence_id()
    ti0 = sm.add_transition(si, NumberSet.from_range(0x1A, 0x4B))
    ac0 = sm.add_transition(ti0, NS_A, AcceptanceF=True)

    test(ca_list, [sm])
Exemple #24
0
def test_plug_sequence(ByteSequenceDB):
    L = len(ByteSequenceDB[0])

    for seq in ByteSequenceDB:
        assert len(seq) == L
        for x in seq:
            assert isinstance(x, Interval)

    first_different_byte_index = -1
    for i in range(L):
        x0 = ByteSequenceDB[0][i]
        for seq in ByteSequenceDB[1:]:
            if not seq[i].is_equal(x0):
                first_different_byte_index = i
                break
        if first_different_byte_index != -1:
            break
    if first_different_byte_index == -1:
        first_different_byte_index = 0

    print "# Best To be Displayed by:"
    print "#"
    print "#  > " + sys.argv[0] + " " + sys.argv[1] + " | dot -Tsvg -o tmp.svg"
    print "#"
    print "# -------------------------"
    print "# Byte Sequences:     "
    i = -1
    for seq in ByteSequenceDB:
        i += 1
        print "# (%i) " % i,
        for x in seq:
            print "    " + x.get_string(Option="hex"),
        print
    print "#    L    = %i" % L
    print "#    DIdx = %i" % first_different_byte_index

    sm = DFA()
    end_index = state_machine.index.get()
    sm.states[end_index] = DFA_State()

    Setup.buffer_setup("", 1, "utf8")

    if Setup.bad_lexatom_detection_f: bad_lexatom_si = index.get()
    else: bad_lexatom_si = None

    trafo = Setup.buffer_encoding

    new_first_tm,    \
    new_state_db = trafo.plug_interval_sequences(sm.init_state_index, end_index,
                                                 ByteSequenceDB,
                                                 BadLexatomSi=bad_lexatom_si)

    if bad_lexatom_si is not None:
        new_first_tm[bad_lexatom_si] = trafo._error_range_by_code_unit_db[0]

    # Generate the 'bad lexatom accepter'.
    bad_lexatom_state = DFA_State(AcceptanceF=True)
    bad_lexatom_state.mark_acceptance_id(E_IncidenceIDs.BAD_LEXATOM)
    sm.states[bad_lexatom_si] = bad_lexatom_state

    first_tm = sm.get_init_state().target_map.get_map()
    if end_index in first_tm: del first_tm[end_index]
    first_tm.update(new_first_tm)

    sm.states.update(new_state_db)

    sm = beautifier.do(sm)
    if len(sm.get_orphaned_state_index_list()) != 0:
        print "Error: Orphaned States Detected!"

    # Double check, that there are no 'circles'
    predecessor_db = sm.get_predecessor_db()
    assert not any(si in predecessor_db[si] for si in sm.states)

    show_graphviz(sm)
    print "Loop: combine_intersecting_character_sets;"
    sys.exit()


# Define heavily-self intersecting character sets.
## The intersection of a character set with another shall
## still have intersections with the character set
def test(TL):
    print "#======================================"
    result = combine_intersecting_character_sets(TL)

    for x in result:
        print x[0], [sm.get_id() for sm in x[2]]


SM0 = DFA()
SM1 = DFA()
SM2 = DFA()
CA0 = CountAction(E_CharacterCountType.COLUMN, 0)

test_list = [
    (NumberSet(Interval(0, 100)), CA0, SM0),
    (NumberSet(Interval(10, 90)), CA0, SM1),
    (NumberSet(Interval(20, 80)), CA0, SM2),
]
test(test_list)

test_list = [
    (NumberSet(Interval(20, 80)), CA0, SM2),
    (NumberSet(Interval(10, 90)), CA0, SM1),
    (NumberSet(Interval(0, 100)), CA0, SM0),